_hypothesis_plugin.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. """
  2. Register Hypothesis strategies for Pydantic custom types.
  3. This enables fully-automatic generation of test data for most Pydantic classes.
  4. Note that this module has *no* runtime impact on Pydantic itself; instead it
  5. is registered as a setuptools entry point and Hypothesis will import it if
  6. Pydantic is installed. See also:
  7. https://hypothesis.readthedocs.io/en/latest/strategies.html#registering-strategies-via-setuptools-entry-points
  8. https://hypothesis.readthedocs.io/en/latest/data.html#hypothesis.strategies.register_type_strategy
  9. https://hypothesis.readthedocs.io/en/latest/strategies.html#interaction-with-pytest-cov
  10. https://docs.pydantic.dev/usage/types/#pydantic-types
  11. Note that because our motivation is to *improve user experience*, the strategies
  12. are always sound (never generate invalid data) but sacrifice completeness for
  13. maintainability (ie may be unable to generate some tricky but valid data).
  14. Finally, this module makes liberal use of `# type: ignore[<code>]` pragmas.
  15. This is because Hypothesis annotates `register_type_strategy()` with
  16. `(T, SearchStrategy[T])`, but in most cases we register e.g. `ConstrainedInt`
  17. to generate instances of the builtin `int` type which match the constraints.
  18. """
  19. import contextlib
  20. import datetime
  21. import ipaddress
  22. import json
  23. import math
  24. from fractions import Fraction
  25. from typing import Callable, Dict, Type, Union, cast, overload
  26. import hypothesis.strategies as st
  27. import pydantic
  28. import pydantic.color
  29. import pydantic.types
  30. from pydantic.v1.utils import lenient_issubclass
  31. # FilePath and DirectoryPath are explicitly unsupported, as we'd have to create
  32. # them on-disk, and that's unsafe in general without being told *where* to do so.
  33. #
  34. # URLs are unsupported because it's easy for users to define their own strategy for
  35. # "normal" URLs, and hard for us to define a general strategy which includes "weird"
  36. # URLs but doesn't also have unpredictable performance problems.
  37. #
  38. # conlist() and conset() are unsupported for now, because the workarounds for
  39. # Cython and Hypothesis to handle parametrized generic types are incompatible.
  40. # We are rethinking Hypothesis compatibility in Pydantic v2.
  41. # Emails
  42. try:
  43. import email_validator
  44. except ImportError: # pragma: no cover
  45. pass
  46. else:
  47. def is_valid_email(s: str) -> bool:
  48. # Hypothesis' st.emails() occasionally generates emails like 0@A0--0.ac
  49. # that are invalid according to email-validator, so we filter those out.
  50. try:
  51. email_validator.validate_email(s, check_deliverability=False)
  52. return True
  53. except email_validator.EmailNotValidError: # pragma: no cover
  54. return False
  55. # Note that these strategies deliberately stay away from any tricky Unicode
  56. # or other encoding issues; we're just trying to generate *something* valid.
  57. st.register_type_strategy(pydantic.EmailStr, st.emails().filter(is_valid_email)) # type: ignore[arg-type]
  58. st.register_type_strategy(
  59. pydantic.NameEmail,
  60. st.builds(
  61. '{} <{}>'.format, # type: ignore[arg-type]
  62. st.from_regex('[A-Za-z0-9_]+( [A-Za-z0-9_]+){0,5}', fullmatch=True),
  63. st.emails().filter(is_valid_email),
  64. ),
  65. )
  66. # PyObject - dotted names, in this case taken from the math module.
  67. st.register_type_strategy(
  68. pydantic.PyObject, # type: ignore[arg-type]
  69. st.sampled_from(
  70. [cast(pydantic.PyObject, f'math.{name}') for name in sorted(vars(math)) if not name.startswith('_')]
  71. ),
  72. )
  73. # CSS3 Colors; as name, hex, rgb(a) tuples or strings, or hsl strings
  74. _color_regexes = (
  75. '|'.join(
  76. (
  77. pydantic.color.r_hex_short,
  78. pydantic.color.r_hex_long,
  79. pydantic.color.r_rgb,
  80. pydantic.color.r_rgba,
  81. pydantic.color.r_hsl,
  82. pydantic.color.r_hsla,
  83. )
  84. )
  85. # Use more precise regex patterns to avoid value-out-of-range errors
  86. .replace(pydantic.color._r_sl, r'(?:(\d\d?(?:\.\d+)?|100(?:\.0+)?)%)')
  87. .replace(pydantic.color._r_alpha, r'(?:(0(?:\.\d+)?|1(?:\.0+)?|\.\d+|\d{1,2}%))')
  88. .replace(pydantic.color._r_255, r'(?:((?:\d|\d\d|[01]\d\d|2[0-4]\d|25[0-4])(?:\.\d+)?|255(?:\.0+)?))')
  89. )
  90. st.register_type_strategy(
  91. pydantic.color.Color,
  92. st.one_of(
  93. st.sampled_from(sorted(pydantic.color.COLORS_BY_NAME)),
  94. st.tuples(
  95. st.integers(0, 255),
  96. st.integers(0, 255),
  97. st.integers(0, 255),
  98. st.none() | st.floats(0, 1) | st.floats(0, 100).map('{}%'.format),
  99. ),
  100. st.from_regex(_color_regexes, fullmatch=True),
  101. ),
  102. )
  103. # Card numbers, valid according to the Luhn algorithm
  104. def add_luhn_digit(card_number: str) -> str:
  105. # See https://en.wikipedia.org/wiki/Luhn_algorithm
  106. for digit in '0123456789':
  107. with contextlib.suppress(Exception):
  108. pydantic.PaymentCardNumber.validate_luhn_check_digit(card_number + digit)
  109. return card_number + digit
  110. raise AssertionError('Unreachable') # pragma: no cover
  111. card_patterns = (
  112. # Note that these patterns omit the Luhn check digit; that's added by the function above
  113. '4[0-9]{14}', # Visa
  114. '5[12345][0-9]{13}', # Mastercard
  115. '3[47][0-9]{12}', # American Express
  116. '[0-26-9][0-9]{10,17}', # other (incomplete to avoid overlap)
  117. )
  118. st.register_type_strategy(
  119. pydantic.PaymentCardNumber,
  120. st.from_regex('|'.join(card_patterns), fullmatch=True).map(add_luhn_digit), # type: ignore[arg-type]
  121. )
  122. # UUIDs
  123. st.register_type_strategy(pydantic.UUID1, st.uuids(version=1))
  124. st.register_type_strategy(pydantic.UUID3, st.uuids(version=3))
  125. st.register_type_strategy(pydantic.UUID4, st.uuids(version=4))
  126. st.register_type_strategy(pydantic.UUID5, st.uuids(version=5))
  127. # Secrets
  128. st.register_type_strategy(pydantic.SecretBytes, st.binary().map(pydantic.SecretBytes))
  129. st.register_type_strategy(pydantic.SecretStr, st.text().map(pydantic.SecretStr))
  130. # IP addresses, networks, and interfaces
  131. st.register_type_strategy(pydantic.IPvAnyAddress, st.ip_addresses()) # type: ignore[arg-type]
  132. st.register_type_strategy(
  133. pydantic.IPvAnyInterface,
  134. st.from_type(ipaddress.IPv4Interface) | st.from_type(ipaddress.IPv6Interface), # type: ignore[arg-type]
  135. )
  136. st.register_type_strategy(
  137. pydantic.IPvAnyNetwork,
  138. st.from_type(ipaddress.IPv4Network) | st.from_type(ipaddress.IPv6Network), # type: ignore[arg-type]
  139. )
  140. # We hook into the con***() functions and the ConstrainedNumberMeta metaclass,
  141. # so here we only have to register subclasses for other constrained types which
  142. # don't go via those mechanisms. Then there are the registration hooks below.
  143. st.register_type_strategy(pydantic.StrictBool, st.booleans())
  144. st.register_type_strategy(pydantic.StrictStr, st.text())
  145. # FutureDate, PastDate
  146. st.register_type_strategy(pydantic.FutureDate, st.dates(min_value=datetime.date.today() + datetime.timedelta(days=1)))
  147. st.register_type_strategy(pydantic.PastDate, st.dates(max_value=datetime.date.today() - datetime.timedelta(days=1)))
  148. # Constrained-type resolver functions
  149. #
  150. # For these ones, we actually want to inspect the type in order to work out a
  151. # satisfying strategy. First up, the machinery for tracking resolver functions:
  152. RESOLVERS: Dict[type, Callable[[type], st.SearchStrategy]] = {} # type: ignore[type-arg]
  153. @overload
  154. def _registered(typ: Type[pydantic.types.T]) -> Type[pydantic.types.T]:
  155. pass
  156. @overload
  157. def _registered(typ: pydantic.types.ConstrainedNumberMeta) -> pydantic.types.ConstrainedNumberMeta:
  158. pass
  159. def _registered(
  160. typ: Union[Type[pydantic.types.T], pydantic.types.ConstrainedNumberMeta]
  161. ) -> Union[Type[pydantic.types.T], pydantic.types.ConstrainedNumberMeta]:
  162. # This function replaces the version in `pydantic.types`, in order to
  163. # effect the registration of new constrained types so that Hypothesis
  164. # can generate valid examples.
  165. pydantic.types._DEFINED_TYPES.add(typ)
  166. for supertype, resolver in RESOLVERS.items():
  167. if issubclass(typ, supertype):
  168. st.register_type_strategy(typ, resolver(typ)) # type: ignore
  169. return typ
  170. raise NotImplementedError(f'Unknown type {typ!r} has no resolver to register') # pragma: no cover
  171. def resolves(
  172. typ: Union[type, pydantic.types.ConstrainedNumberMeta]
  173. ) -> Callable[[Callable[..., st.SearchStrategy]], Callable[..., st.SearchStrategy]]: # type: ignore[type-arg]
  174. def inner(f): # type: ignore
  175. assert f not in RESOLVERS
  176. RESOLVERS[typ] = f
  177. return f
  178. return inner
  179. # Type-to-strategy resolver functions
  180. @resolves(pydantic.JsonWrapper)
  181. def resolve_json(cls): # type: ignore[no-untyped-def]
  182. try:
  183. inner = st.none() if cls.inner_type is None else st.from_type(cls.inner_type)
  184. except Exception: # pragma: no cover
  185. finite = st.floats(allow_infinity=False, allow_nan=False)
  186. inner = st.recursive(
  187. base=st.one_of(st.none(), st.booleans(), st.integers(), finite, st.text()),
  188. extend=lambda x: st.lists(x) | st.dictionaries(st.text(), x), # type: ignore
  189. )
  190. inner_type = getattr(cls, 'inner_type', None)
  191. return st.builds(
  192. cls.inner_type.json if lenient_issubclass(inner_type, pydantic.BaseModel) else json.dumps,
  193. inner,
  194. ensure_ascii=st.booleans(),
  195. indent=st.none() | st.integers(0, 16),
  196. sort_keys=st.booleans(),
  197. )
  198. @resolves(pydantic.ConstrainedBytes)
  199. def resolve_conbytes(cls): # type: ignore[no-untyped-def] # pragma: no cover
  200. min_size = cls.min_length or 0
  201. max_size = cls.max_length
  202. if not cls.strip_whitespace:
  203. return st.binary(min_size=min_size, max_size=max_size)
  204. # Fun with regex to ensure we neither start nor end with whitespace
  205. repeats = '{{{},{}}}'.format(
  206. min_size - 2 if min_size > 2 else 0,
  207. max_size - 2 if (max_size or 0) > 2 else '',
  208. )
  209. if min_size >= 2:
  210. pattern = rf'\W.{repeats}\W'
  211. elif min_size == 1:
  212. pattern = rf'\W(.{repeats}\W)?'
  213. else:
  214. assert min_size == 0
  215. pattern = rf'(\W(.{repeats}\W)?)?'
  216. return st.from_regex(pattern.encode(), fullmatch=True)
  217. @resolves(pydantic.ConstrainedDecimal)
  218. def resolve_condecimal(cls): # type: ignore[no-untyped-def]
  219. min_value = cls.ge
  220. max_value = cls.le
  221. if cls.gt is not None:
  222. assert min_value is None, 'Set `gt` or `ge`, but not both'
  223. min_value = cls.gt
  224. if cls.lt is not None:
  225. assert max_value is None, 'Set `lt` or `le`, but not both'
  226. max_value = cls.lt
  227. s = st.decimals(min_value, max_value, allow_nan=False, places=cls.decimal_places)
  228. if cls.lt is not None:
  229. s = s.filter(lambda d: d < cls.lt)
  230. if cls.gt is not None:
  231. s = s.filter(lambda d: cls.gt < d)
  232. return s
  233. @resolves(pydantic.ConstrainedFloat)
  234. def resolve_confloat(cls): # type: ignore[no-untyped-def]
  235. min_value = cls.ge
  236. max_value = cls.le
  237. exclude_min = False
  238. exclude_max = False
  239. if cls.gt is not None:
  240. assert min_value is None, 'Set `gt` or `ge`, but not both'
  241. min_value = cls.gt
  242. exclude_min = True
  243. if cls.lt is not None:
  244. assert max_value is None, 'Set `lt` or `le`, but not both'
  245. max_value = cls.lt
  246. exclude_max = True
  247. if cls.multiple_of is None:
  248. return st.floats(min_value, max_value, exclude_min=exclude_min, exclude_max=exclude_max, allow_nan=False)
  249. if min_value is not None:
  250. min_value = math.ceil(min_value / cls.multiple_of)
  251. if exclude_min:
  252. min_value = min_value + 1
  253. if max_value is not None:
  254. assert max_value >= cls.multiple_of, 'Cannot build model with max value smaller than multiple of'
  255. max_value = math.floor(max_value / cls.multiple_of)
  256. if exclude_max:
  257. max_value = max_value - 1
  258. return st.integers(min_value, max_value).map(lambda x: x * cls.multiple_of)
  259. @resolves(pydantic.ConstrainedInt)
  260. def resolve_conint(cls): # type: ignore[no-untyped-def]
  261. min_value = cls.ge
  262. max_value = cls.le
  263. if cls.gt is not None:
  264. assert min_value is None, 'Set `gt` or `ge`, but not both'
  265. min_value = cls.gt + 1
  266. if cls.lt is not None:
  267. assert max_value is None, 'Set `lt` or `le`, but not both'
  268. max_value = cls.lt - 1
  269. if cls.multiple_of is None or cls.multiple_of == 1:
  270. return st.integers(min_value, max_value)
  271. # These adjustments and the .map handle integer-valued multiples, while the
  272. # .filter handles trickier cases as for confloat.
  273. if min_value is not None:
  274. min_value = math.ceil(Fraction(min_value) / Fraction(cls.multiple_of))
  275. if max_value is not None:
  276. max_value = math.floor(Fraction(max_value) / Fraction(cls.multiple_of))
  277. return st.integers(min_value, max_value).map(lambda x: x * cls.multiple_of)
  278. @resolves(pydantic.ConstrainedDate)
  279. def resolve_condate(cls): # type: ignore[no-untyped-def]
  280. if cls.ge is not None:
  281. assert cls.gt is None, 'Set `gt` or `ge`, but not both'
  282. min_value = cls.ge
  283. elif cls.gt is not None:
  284. min_value = cls.gt + datetime.timedelta(days=1)
  285. else:
  286. min_value = datetime.date.min
  287. if cls.le is not None:
  288. assert cls.lt is None, 'Set `lt` or `le`, but not both'
  289. max_value = cls.le
  290. elif cls.lt is not None:
  291. max_value = cls.lt - datetime.timedelta(days=1)
  292. else:
  293. max_value = datetime.date.max
  294. return st.dates(min_value, max_value)
  295. @resolves(pydantic.ConstrainedStr)
  296. def resolve_constr(cls): # type: ignore[no-untyped-def] # pragma: no cover
  297. min_size = cls.min_length or 0
  298. max_size = cls.max_length
  299. if cls.regex is None and not cls.strip_whitespace:
  300. return st.text(min_size=min_size, max_size=max_size)
  301. if cls.regex is not None:
  302. strategy = st.from_regex(cls.regex)
  303. if cls.strip_whitespace:
  304. strategy = strategy.filter(lambda s: s == s.strip())
  305. elif cls.strip_whitespace:
  306. repeats = '{{{},{}}}'.format(
  307. min_size - 2 if min_size > 2 else 0,
  308. max_size - 2 if (max_size or 0) > 2 else '',
  309. )
  310. if min_size >= 2:
  311. strategy = st.from_regex(rf'\W.{repeats}\W')
  312. elif min_size == 1:
  313. strategy = st.from_regex(rf'\W(.{repeats}\W)?')
  314. else:
  315. assert min_size == 0
  316. strategy = st.from_regex(rf'(\W(.{repeats}\W)?)?')
  317. if min_size == 0 and max_size is None:
  318. return strategy
  319. elif max_size is None:
  320. return strategy.filter(lambda s: min_size <= len(s))
  321. return strategy.filter(lambda s: min_size <= len(s) <= max_size)
  322. # Finally, register all previously-defined types, and patch in our new function
  323. for typ in list(pydantic.types._DEFINED_TYPES):
  324. _registered(typ)
  325. pydantic.types._registered = _registered
  326. st.register_type_strategy(pydantic.Json, resolve_json)