utils.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. from typing import Dict, Generator
  2. from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
  3. from pip._internal.exceptions import NetworkConnectionError
  4. # The following comments and HTTP headers were originally added by
  5. # Donald Stufft in git commit 22c562429a61bb77172039e480873fb239dd8c03.
  6. #
  7. # We use Accept-Encoding: identity here because requests defaults to
  8. # accepting compressed responses. This breaks in a variety of ways
  9. # depending on how the server is configured.
  10. # - Some servers will notice that the file isn't a compressible file
  11. # and will leave the file alone and with an empty Content-Encoding
  12. # - Some servers will notice that the file is already compressed and
  13. # will leave the file alone, adding a Content-Encoding: gzip header
  14. # - Some servers won't notice anything at all and will take a file
  15. # that's already been compressed and compress it again, and set
  16. # the Content-Encoding: gzip header
  17. # By setting this to request only the identity encoding we're hoping
  18. # to eliminate the third case. Hopefully there does not exist a server
  19. # which when given a file will notice it is already compressed and that
  20. # you're not asking for a compressed file and will then decompress it
  21. # before sending because if that's the case I don't think it'll ever be
  22. # possible to make this work.
  23. HEADERS: Dict[str, str] = {"Accept-Encoding": "identity"}
  24. def raise_for_status(resp: Response) -> None:
  25. http_error_msg = ""
  26. if isinstance(resp.reason, bytes):
  27. # We attempt to decode utf-8 first because some servers
  28. # choose to localize their reason strings. If the string
  29. # isn't utf-8, we fall back to iso-8859-1 for all other
  30. # encodings.
  31. try:
  32. reason = resp.reason.decode("utf-8")
  33. except UnicodeDecodeError:
  34. reason = resp.reason.decode("iso-8859-1")
  35. else:
  36. reason = resp.reason
  37. if 400 <= resp.status_code < 500:
  38. http_error_msg = (
  39. f"{resp.status_code} Client Error: {reason} for url: {resp.url}"
  40. )
  41. elif 500 <= resp.status_code < 600:
  42. http_error_msg = (
  43. f"{resp.status_code} Server Error: {reason} for url: {resp.url}"
  44. )
  45. if http_error_msg:
  46. raise NetworkConnectionError(http_error_msg, response=resp)
  47. def response_chunks(
  48. response: Response, chunk_size: int = CONTENT_CHUNK_SIZE
  49. ) -> Generator[bytes, None, None]:
  50. """Given a requests Response, provide the data chunks."""
  51. try:
  52. # Special case for urllib3.
  53. for chunk in response.raw.stream(
  54. chunk_size,
  55. # We use decode_content=False here because we don't
  56. # want urllib3 to mess with the raw bytes we get
  57. # from the server. If we decompress inside of
  58. # urllib3 then we cannot verify the checksum
  59. # because the checksum will be of the compressed
  60. # file. This breakage will only occur if the
  61. # server adds a Content-Encoding header, which
  62. # depends on how the server was configured:
  63. # - Some servers will notice that the file isn't a
  64. # compressible file and will leave the file alone
  65. # and with an empty Content-Encoding
  66. # - Some servers will notice that the file is
  67. # already compressed and will leave the file
  68. # alone and will add a Content-Encoding: gzip
  69. # header
  70. # - Some servers won't notice anything at all and
  71. # will take a file that's already been compressed
  72. # and compress it again and set the
  73. # Content-Encoding: gzip header
  74. #
  75. # By setting this not to decode automatically we
  76. # hope to eliminate problems with the second case.
  77. decode_content=False,
  78. ):
  79. yield chunk
  80. except AttributeError:
  81. # Standard file-like object.
  82. while True:
  83. chunk = response.raw.read(chunk_size)
  84. if not chunk:
  85. break
  86. yield chunk