validate_email.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. from typing import Optional, Union, TYPE_CHECKING
  2. import unicodedata
  3. from .exceptions import EmailSyntaxError
  4. from .types import ValidatedEmail
  5. from .syntax import split_email, validate_email_local_part, validate_email_domain_name, validate_email_domain_literal, validate_email_length
  6. from .rfc_constants import CASE_INSENSITIVE_MAILBOX_NAMES
  7. if TYPE_CHECKING:
  8. import dns.resolver
  9. _Resolver = dns.resolver.Resolver
  10. else:
  11. _Resolver = object
  12. def validate_email(
  13. email: Union[str, bytes],
  14. /, # prior arguments are positional-only
  15. *, # subsequent arguments are keyword-only
  16. allow_smtputf8: Optional[bool] = None,
  17. allow_empty_local: Optional[bool] = None,
  18. allow_quoted_local: Optional[bool] = None,
  19. allow_domain_literal: Optional[bool] = None,
  20. allow_display_name: Optional[bool] = None,
  21. strict: Optional[bool] = None,
  22. check_deliverability: Optional[bool] = None,
  23. test_environment: Optional[bool] = None,
  24. globally_deliverable: Optional[bool] = None,
  25. timeout: Optional[int] = None,
  26. dns_resolver: Optional[_Resolver] = None
  27. ) -> ValidatedEmail:
  28. """
  29. Given an email address, and some options, returns a ValidatedEmail instance
  30. with information about the address if it is valid or, if the address is not
  31. valid, raises an EmailNotValidError. This is the main function of the module.
  32. """
  33. # Fill in default values of arguments.
  34. from . import ALLOW_SMTPUTF8, ALLOW_EMPTY_LOCAL, ALLOW_QUOTED_LOCAL, ALLOW_DOMAIN_LITERAL, ALLOW_DISPLAY_NAME, \
  35. STRICT, GLOBALLY_DELIVERABLE, CHECK_DELIVERABILITY, TEST_ENVIRONMENT, DEFAULT_TIMEOUT
  36. if allow_smtputf8 is None:
  37. allow_smtputf8 = ALLOW_SMTPUTF8
  38. if allow_empty_local is None:
  39. allow_empty_local = ALLOW_EMPTY_LOCAL
  40. if allow_quoted_local is None:
  41. allow_quoted_local = ALLOW_QUOTED_LOCAL
  42. if allow_domain_literal is None:
  43. allow_domain_literal = ALLOW_DOMAIN_LITERAL
  44. if allow_display_name is None:
  45. allow_display_name = ALLOW_DISPLAY_NAME
  46. if strict is None:
  47. strict = STRICT
  48. if check_deliverability is None:
  49. check_deliverability = CHECK_DELIVERABILITY
  50. if test_environment is None:
  51. test_environment = TEST_ENVIRONMENT
  52. if globally_deliverable is None:
  53. globally_deliverable = GLOBALLY_DELIVERABLE
  54. if timeout is None and dns_resolver is None:
  55. timeout = DEFAULT_TIMEOUT
  56. if isinstance(email, str):
  57. pass
  58. elif isinstance(email, bytes):
  59. # Allow email to be a bytes instance as if it is what
  60. # will be transmitted on the wire. But assume SMTPUTF8
  61. # is unavailable, so it must be ASCII.
  62. try:
  63. email = email.decode("ascii")
  64. except ValueError as e:
  65. raise EmailSyntaxError("The email address is not valid ASCII.") from e
  66. else:
  67. raise TypeError("email must be str or bytes")
  68. # Split the address into the display name (or None), the local part
  69. # (before the @-sign), and the domain part (after the @-sign).
  70. # Normally, there is only one @-sign. But the awkward "quoted string"
  71. # local part form (RFC 5321 4.1.2) allows @-signs in the local
  72. # part if the local part is quoted.
  73. display_name, local_part, domain_part, is_quoted_local_part \
  74. = split_email(email)
  75. if display_name:
  76. # UTS #39 3.3 Email Security Profiles for Identifiers requires
  77. # display names (incorrectly called "quoted-string-part" there)
  78. # to be NFC normalized. Since these are not a part of what we
  79. # are really validating, we won't check that the input was NFC
  80. # normalized, but we'll normalize in output.
  81. display_name = unicodedata.normalize("NFC", display_name)
  82. # Collect return values in this instance.
  83. ret = ValidatedEmail()
  84. ret.original = ((local_part if not is_quoted_local_part
  85. else ('"' + local_part + '"'))
  86. + "@" + domain_part) # drop the display name, if any, for email length tests at the end
  87. ret.display_name = display_name
  88. # Validate the email address's local part syntax and get a normalized form.
  89. # If the original address was quoted and the decoded local part is a valid
  90. # unquoted local part, then we'll get back a normalized (unescaped) local
  91. # part.
  92. local_part_info = validate_email_local_part(local_part,
  93. allow_smtputf8=allow_smtputf8,
  94. allow_empty_local=allow_empty_local,
  95. quoted_local_part=is_quoted_local_part,
  96. strict=strict)
  97. ret.local_part = local_part_info["local_part"]
  98. ret.ascii_local_part = local_part_info["ascii_local_part"]
  99. ret.smtputf8 = local_part_info["smtputf8"]
  100. # RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
  101. # so we'll return the NFC-normalized local part. Since the caller may use that
  102. # string in place of the original string, ensure it is also valid.
  103. #
  104. # UTS #39 3.3 Email Security Profiles for Identifiers requires local parts
  105. # to be NFKC normalized, which loses some information in characters that can
  106. # be decomposed. We might want to consider applying NFKC normalization, but
  107. # we can't make the change easily because it would break database lookups
  108. # for any caller that put a normalized address from a previous version of
  109. # this library. (UTS #39 seems to require that the *input* be NKFC normalized
  110. # and has other requirements that are hard to check without additional Unicode
  111. # data, and I don't know whether the rules really apply in the wild.)
  112. normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
  113. if normalized_local_part != ret.local_part:
  114. try:
  115. validate_email_local_part(normalized_local_part,
  116. allow_smtputf8=allow_smtputf8,
  117. allow_empty_local=allow_empty_local,
  118. quoted_local_part=is_quoted_local_part,
  119. strict=strict)
  120. except EmailSyntaxError as e:
  121. raise EmailSyntaxError("After Unicode normalization: " + str(e)) from e
  122. ret.local_part = normalized_local_part
  123. # If a quoted local part isn't allowed but is present, now raise an exception.
  124. # This is done after any exceptions raised by validate_email_local_part so
  125. # that mandatory checks have highest precedence.
  126. if is_quoted_local_part and not allow_quoted_local:
  127. raise EmailSyntaxError("Quoting the part before the @-sign is not allowed here.")
  128. # Some local parts are required to be case-insensitive, so we should normalize
  129. # to lowercase.
  130. # RFC 2142
  131. if ret.ascii_local_part is not None \
  132. and ret.ascii_local_part.lower() in CASE_INSENSITIVE_MAILBOX_NAMES \
  133. and ret.local_part is not None:
  134. ret.ascii_local_part = ret.ascii_local_part.lower()
  135. ret.local_part = ret.local_part.lower()
  136. # Validate the email address's domain part syntax and get a normalized form.
  137. is_domain_literal = False
  138. if len(domain_part) == 0:
  139. raise EmailSyntaxError("There must be something after the @-sign.")
  140. elif domain_part.startswith("[") and domain_part.endswith("]"):
  141. # Parse the address in the domain literal and get back a normalized domain.
  142. domain_literal_info = validate_email_domain_literal(domain_part[1:-1])
  143. if not allow_domain_literal:
  144. raise EmailSyntaxError("A bracketed IP address after the @-sign is not allowed here.")
  145. ret.domain = domain_literal_info["domain"]
  146. ret.ascii_domain = domain_literal_info["domain"] # Domain literals are always ASCII.
  147. ret.domain_address = domain_literal_info["domain_address"]
  148. is_domain_literal = True # Prevent deliverability checks.
  149. else:
  150. # Check the syntax of the domain and get back a normalized
  151. # internationalized and ASCII form.
  152. domain_name_info = validate_email_domain_name(domain_part, test_environment=test_environment, globally_deliverable=globally_deliverable)
  153. ret.domain = domain_name_info["domain"]
  154. ret.ascii_domain = domain_name_info["ascii_domain"]
  155. # Construct the complete normalized form.
  156. ret.normalized = ret.local_part + "@" + ret.domain
  157. # If the email address has an ASCII form, add it.
  158. if not ret.smtputf8:
  159. if not ret.ascii_domain:
  160. raise Exception("Missing ASCII domain.")
  161. ret.ascii_email = (ret.ascii_local_part or "") + "@" + ret.ascii_domain
  162. else:
  163. ret.ascii_email = None
  164. # Check the length of the address.
  165. validate_email_length(ret)
  166. # Check that a display name is permitted. It's the last syntax check
  167. # because we always check against optional parsing features last.
  168. if display_name is not None and not allow_display_name:
  169. raise EmailSyntaxError("A display name and angle brackets around the email address are not permitted here.")
  170. if check_deliverability and not test_environment:
  171. # Validate the email address's deliverability using DNS
  172. # and update the returned ValidatedEmail object with metadata.
  173. if is_domain_literal:
  174. # There is nothing to check --- skip deliverability checks.
  175. return ret
  176. # Lazy load `deliverability` as it is slow to import (due to dns.resolver)
  177. from .deliverability import validate_email_deliverability
  178. deliverability_info = validate_email_deliverability(
  179. ret.ascii_domain, ret.domain, timeout, dns_resolver
  180. )
  181. mx = deliverability_info.get("mx")
  182. if mx is not None:
  183. ret.mx = mx
  184. ret.mx_fallback_type = deliverability_info.get("mx_fallback_type")
  185. return ret