← go back

I heart regular expressions

pattern = re.compile(r"""
# Super Duper Hyper Mega Email Address Regex Pattern
# Only matches valid email "mailboxes"
# as specified in RFC 5321 (section 4.1.2)
(   # Local part of the address.
    # Dotted string 
    [A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+
    (?:\.
        [A-Za-z0-9!#$%&'*+/=?^_`{|}~-]+
    )*
    |
    # Quoted string
    \"(?:[ -!#-\[\]-~]|\\[ -~])\"
)
@
(   # Domain of the address.
    # Domain name
    [A-Za-z-0-9]
    (?:[A-Za-z-0-9-]*[A-Za-z-0-9])?
    (?:\.
        [A-Za-z-0-9]
        (?:[A-Za-z-0-9-]*[A-Za-z-0-9])?
    )*
    |
    # Address literals
    \[(?:
        # IPv4 address
        \d{1,3}
        (?:\.
            \d{1,3}
        ){3}
        |
        IPv6:(?:
            # In full
            [0-9A-Fa-f]{1,4}
            (?::
                [0-9A-Fa-f]{1,4}
            ){7}
            |
            # Shortened
            # Section 4.1.3 noted that the terms on the left and the right 
            # must be no more than 6, so the main pattern is repeated
            # over and over again (since regex doesn't do math)
            (?:
                # 0 on the left, at least 6 on the right
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    (?::
                        [0-9A-Fa-f]{1,4}
                    ){0,5}
                )?
                |
                # 1 on the left, at least 5 on the right
                [0-9A-Fa-f]{1,4}
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    (?::
                        [0-9A-Fa-f]{1,4}
                    ){0,2}
                )?
                |
                # 2 on the left, at least 4 on the right
                [0-9A-Fa-f]{1,4}
                (?::
                    [0-9A-Fa-f]{1,4}
                )
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    (?::
                        [0-9A-Fa-f]{1,4}
                    ){0,3}
                )?
                |
                # 3 on the left, at least 3 on the right
                [0-9A-Fa-f]{1,4}
                (?::
                    [0-9A-Fa-f]{1,4}
                ){2}
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    (?::
                        [0-9A-Fa-f]{1,4}
                    ){0,2}
                )?
                |
                # 4 on the left, at least 2 on the right
                [0-9A-Fa-f]{1,4}
                (?::
                    [0-9A-Fa-f]{1,4}
                ){3}
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    (?::
                        [0-9A-Fa-f]{1,4}
                    ){0,1}
                )?
                |
                # 5 on the left, at least 1 on the right
                [0-9A-Fa-f]{1,4}
                (?::
                    [0-9A-Fa-f]{1,4}
                ){4}
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                )?
                # 6 on the left, none on the right
                [0-9A-Fa-f]{1,4}
                (?::
                    [0-9A-Fa-f]{1,4}
                ){4}
                ::
            )
            |
            # In full (with IPv4 address)
            [0-9A-Fa-f]{1,4}
            (?::
                [0-9A-Fa-f]{1,4}
            ){5}
            :\d{1,3}
            (?:\.
                \d{1,3}
            ){3}
            |
            # Shortened (with IPv4 address)
            # See the previous comment for explanation of the repetition.
            (?:
                # 0 on the left, at least 4 on the right
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    (?::
                        [0-9A-Fa-f]{1,4}
                    ){0,3}
                    :
                )?
                \d{1,3}
                (?:\.
                    \d{1,3}
                ){3}
                |
                # 1 on the left, at least 3 on the right
                [0-9A-Fa-f]{1,4}
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    (?::
                        [0-9A-Fa-f]{1,4}
                    ){0,2}
                    :
                )?
                \d{1,3}
                (?:\.
                    \d{1,3}
                ){3}
                |
                # 2 on the left, at least 2 on the right
                [0-9A-Fa-f]{1,4}
                (?::
                    [0-9A-Fa-f]{1,4}
                )
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    (?::
                        [0-9A-Fa-f]{1,4}
                    ){0,1}
                    :
                )?
                \d{1,3}
                (?:\.
                    \d{1,3}
                ){3}
                |
                # 3 on the left, at least 1 on the right
                [0-9A-Fa-f]{1,4}
                (?::
                    [0-9A-Fa-f]{1,4}
                ){2}
                ::
                (?:
                    [0-9A-Fa-f]{1,4}
                    :
                )?
                \d{1,3}
                (?:\.
                    \d{1,3}
                ){3}
                |
                # 4 on the left, none on the right
                [0-9A-Fa-f]{1,4}
                (?::
                    [0-9A-Fa-f]{1,4}
                ){3}
                ::
                \d{1,3}
                (?:\.
                    \d{1,3}
                ){3}
            )
        )
        |
        # General address
        # Verifying the tag should be done separately
        [A-Za-z-0-9-]*[A-Za-z-0-9]
        :([!-Z^-~])+
    )\]
)
""", re.X)
Apr 25, 2025, 5:15 PM
2 0 0

Comments