hashes.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. from __future__ import annotations
  2. import hashlib
  3. from collections.abc import Iterable
  4. from typing import TYPE_CHECKING, BinaryIO, NoReturn
  5. from pip._internal.exceptions import HashMismatch, HashMissing, InstallationError
  6. from pip._internal.utils.misc import read_chunks
  7. if TYPE_CHECKING:
  8. from hashlib import _Hash
  9. # The recommended hash algo of the moment. Change this whenever the state of
  10. # the art changes; it won't hurt backward compatibility.
  11. FAVORITE_HASH = "sha256"
  12. # Names of hashlib algorithms allowed by the --hash option and ``pip hash``
  13. # Currently, those are the ones at least as collision-resistant as sha256.
  14. STRONG_HASHES = ["sha256", "sha384", "sha512"]
  15. class Hashes:
  16. """A wrapper that builds multiple hashes at once and checks them against
  17. known-good values
  18. """
  19. def __init__(self, hashes: dict[str, list[str]] | None = None) -> None:
  20. """
  21. :param hashes: A dict of algorithm names pointing to lists of allowed
  22. hex digests
  23. """
  24. allowed = {}
  25. if hashes is not None:
  26. for alg, keys in hashes.items():
  27. # Make sure values are always sorted (to ease equality checks)
  28. allowed[alg] = [k.lower() for k in sorted(keys)]
  29. self._allowed = allowed
  30. def __and__(self, other: Hashes) -> Hashes:
  31. if not isinstance(other, Hashes):
  32. return NotImplemented
  33. # If either of the Hashes object is entirely empty (i.e. no hash
  34. # specified at all), all hashes from the other object are allowed.
  35. if not other:
  36. return self
  37. if not self:
  38. return other
  39. # Otherwise only hashes that present in both objects are allowed.
  40. new = {}
  41. for alg, values in other._allowed.items():
  42. if alg not in self._allowed:
  43. continue
  44. new[alg] = [v for v in values if v in self._allowed[alg]]
  45. return Hashes(new)
  46. @property
  47. def digest_count(self) -> int:
  48. return sum(len(digests) for digests in self._allowed.values())
  49. def is_hash_allowed(self, hash_name: str, hex_digest: str) -> bool:
  50. """Return whether the given hex digest is allowed."""
  51. return hex_digest in self._allowed.get(hash_name, [])
  52. def check_against_chunks(self, chunks: Iterable[bytes]) -> None:
  53. """Check good hashes against ones built from iterable of chunks of
  54. data.
  55. Raise HashMismatch if none match.
  56. """
  57. gots = {}
  58. for hash_name in self._allowed.keys():
  59. try:
  60. gots[hash_name] = hashlib.new(hash_name)
  61. except (ValueError, TypeError):
  62. raise InstallationError(f"Unknown hash name: {hash_name}")
  63. for chunk in chunks:
  64. for hash in gots.values():
  65. hash.update(chunk)
  66. for hash_name, got in gots.items():
  67. if got.hexdigest() in self._allowed[hash_name]:
  68. return
  69. self._raise(gots)
  70. def _raise(self, gots: dict[str, _Hash]) -> NoReturn:
  71. raise HashMismatch(self._allowed, gots)
  72. def check_against_file(self, file: BinaryIO) -> None:
  73. """Check good hashes against a file-like object
  74. Raise HashMismatch if none match.
  75. """
  76. return self.check_against_chunks(read_chunks(file))
  77. def check_against_path(self, path: str) -> None:
  78. with open(path, "rb") as file:
  79. return self.check_against_file(file)
  80. def has_one_of(self, hashes: dict[str, str]) -> bool:
  81. """Return whether any of the given hashes are allowed."""
  82. for hash_name, hex_digest in hashes.items():
  83. if self.is_hash_allowed(hash_name, hex_digest):
  84. return True
  85. return False
  86. def __bool__(self) -> bool:
  87. """Return whether I know any known-good hashes."""
  88. return bool(self._allowed)
  89. def __eq__(self, other: object) -> bool:
  90. if not isinstance(other, Hashes):
  91. return NotImplemented
  92. return self._allowed == other._allowed
  93. def __hash__(self) -> int:
  94. return hash(
  95. ",".join(
  96. sorted(
  97. ":".join((alg, digest))
  98. for alg, digest_list in self._allowed.items()
  99. for digest in digest_list
  100. )
  101. )
  102. )
  103. class MissingHashes(Hashes):
  104. """A workalike for Hashes used when we're missing a hash for a requirement
  105. It computes the actual hash of the requirement and raises a HashMissing
  106. exception showing it to the user.
  107. """
  108. def __init__(self) -> None:
  109. """Don't offer the ``hashes`` kwarg."""
  110. # Pass our favorite hash in to generate a "gotten hash". With the
  111. # empty list, it will never match, so an error will always raise.
  112. super().__init__(hashes={FAVORITE_HASH: []})
  113. def _raise(self, gots: dict[str, _Hash]) -> NoReturn:
  114. raise HashMissing(gots[FAVORITE_HASH].hexdigest())