appengine.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. """
  2. This module provides a pool manager that uses Google App Engine's
  3. `URLFetch Service <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
  4. Example usage::
  5. from pip._vendor.urllib3 import PoolManager
  6. from pip._vendor.urllib3.contrib.appengine import AppEngineManager, is_appengine_sandbox
  7. if is_appengine_sandbox():
  8. # AppEngineManager uses AppEngine's URLFetch API behind the scenes
  9. http = AppEngineManager()
  10. else:
  11. # PoolManager uses a socket-level API behind the scenes
  12. http = PoolManager()
  13. r = http.request('GET', 'https://google.com/')
  14. There are `limitations <https://cloud.google.com/appengine/docs/python/\
  15. urlfetch/#Python_Quotas_and_limits>`_ to the URLFetch service and it may not be
  16. the best choice for your application. There are three options for using
  17. urllib3 on Google App Engine:
  18. 1. You can use :class:`AppEngineManager` with URLFetch. URLFetch is
  19. cost-effective in many circumstances as long as your usage is within the
  20. limitations.
  21. 2. You can use a normal :class:`~urllib3.PoolManager` by enabling sockets.
  22. Sockets also have `limitations and restrictions
  23. <https://cloud.google.com/appengine/docs/python/sockets/\
  24. #limitations-and-restrictions>`_ and have a lower free quota than URLFetch.
  25. To use sockets, be sure to specify the following in your ``app.yaml``::
  26. env_variables:
  27. GAE_USE_SOCKETS_HTTPLIB : 'true'
  28. 3. If you are using `App Engine Flexible
  29. <https://cloud.google.com/appengine/docs/flexible/>`_, you can use the standard
  30. :class:`PoolManager` without any configuration or special environment variables.
  31. """
  32. from __future__ import absolute_import
  33. import io
  34. import logging
  35. import warnings
  36. from ..exceptions import (
  37. HTTPError,
  38. HTTPWarning,
  39. MaxRetryError,
  40. ProtocolError,
  41. SSLError,
  42. TimeoutError,
  43. )
  44. from ..packages.six.moves.urllib.parse import urljoin
  45. from ..request import RequestMethods
  46. from ..response import HTTPResponse
  47. from ..util.retry import Retry
  48. from ..util.timeout import Timeout
  49. from . import _appengine_environ
  50. try:
  51. from google.appengine.api import urlfetch
  52. except ImportError:
  53. urlfetch = None
  54. log = logging.getLogger(__name__)
  55. class AppEnginePlatformWarning(HTTPWarning):
  56. pass
  57. class AppEnginePlatformError(HTTPError):
  58. pass
  59. class AppEngineManager(RequestMethods):
  60. """
  61. Connection manager for Google App Engine sandbox applications.
  62. This manager uses the URLFetch service directly instead of using the
  63. emulated httplib, and is subject to URLFetch limitations as described in
  64. the App Engine documentation `here
  65. <https://cloud.google.com/appengine/docs/python/urlfetch>`_.
  66. Notably it will raise an :class:`AppEnginePlatformError` if:
  67. * URLFetch is not available.
  68. * If you attempt to use this on App Engine Flexible, as full socket
  69. support is available.
  70. * If a request size is more than 10 megabytes.
  71. * If a response size is more than 32 megabytes.
  72. * If you use an unsupported request method such as OPTIONS.
  73. Beyond those cases, it will raise normal urllib3 errors.
  74. """
  75. def __init__(
  76. self,
  77. headers=None,
  78. retries=None,
  79. validate_certificate=True,
  80. urlfetch_retries=True,
  81. ):
  82. if not urlfetch:
  83. raise AppEnginePlatformError(
  84. "URLFetch is not available in this environment."
  85. )
  86. warnings.warn(
  87. "urllib3 is using URLFetch on Google App Engine sandbox instead "
  88. "of sockets. To use sockets directly instead of URLFetch see "
  89. "https://urllib3.readthedocs.io/en/1.26.x/reference/urllib3.contrib.html.",
  90. AppEnginePlatformWarning,
  91. )
  92. RequestMethods.__init__(self, headers)
  93. self.validate_certificate = validate_certificate
  94. self.urlfetch_retries = urlfetch_retries
  95. self.retries = retries or Retry.DEFAULT
  96. def __enter__(self):
  97. return self
  98. def __exit__(self, exc_type, exc_val, exc_tb):
  99. # Return False to re-raise any potential exceptions
  100. return False
  101. def urlopen(
  102. self,
  103. method,
  104. url,
  105. body=None,
  106. headers=None,
  107. retries=None,
  108. redirect=True,
  109. timeout=Timeout.DEFAULT_TIMEOUT,
  110. **response_kw
  111. ):
  112. retries = self._get_retries(retries, redirect)
  113. try:
  114. follow_redirects = redirect and retries.redirect != 0 and retries.total
  115. response = urlfetch.fetch(
  116. url,
  117. payload=body,
  118. method=method,
  119. headers=headers or {},
  120. allow_truncated=False,
  121. follow_redirects=self.urlfetch_retries and follow_redirects,
  122. deadline=self._get_absolute_timeout(timeout),
  123. validate_certificate=self.validate_certificate,
  124. )
  125. except urlfetch.DeadlineExceededError as e:
  126. raise TimeoutError(self, e)
  127. except urlfetch.InvalidURLError as e:
  128. if "too large" in str(e):
  129. raise AppEnginePlatformError(
  130. "URLFetch request too large, URLFetch only "
  131. "supports requests up to 10mb in size.",
  132. e,
  133. )
  134. raise ProtocolError(e)
  135. except urlfetch.DownloadError as e:
  136. if "Too many redirects" in str(e):
  137. raise MaxRetryError(self, url, reason=e)
  138. raise ProtocolError(e)
  139. except urlfetch.ResponseTooLargeError as e:
  140. raise AppEnginePlatformError(
  141. "URLFetch response too large, URLFetch only supports"
  142. "responses up to 32mb in size.",
  143. e,
  144. )
  145. except urlfetch.SSLCertificateError as e:
  146. raise SSLError(e)
  147. except urlfetch.InvalidMethodError as e:
  148. raise AppEnginePlatformError(
  149. "URLFetch does not support method: %s" % method, e
  150. )
  151. http_response = self._urlfetch_response_to_http_response(
  152. response, retries=retries, **response_kw
  153. )
  154. # Handle redirect?
  155. redirect_location = redirect and http_response.get_redirect_location()
  156. if redirect_location:
  157. # Check for redirect response
  158. if self.urlfetch_retries and retries.raise_on_redirect:
  159. raise MaxRetryError(self, url, "too many redirects")
  160. else:
  161. if http_response.status == 303:
  162. method = "GET"
  163. try:
  164. retries = retries.increment(
  165. method, url, response=http_response, _pool=self
  166. )
  167. except MaxRetryError:
  168. if retries.raise_on_redirect:
  169. raise MaxRetryError(self, url, "too many redirects")
  170. return http_response
  171. retries.sleep_for_retry(http_response)
  172. log.debug("Redirecting %s -> %s", url, redirect_location)
  173. redirect_url = urljoin(url, redirect_location)
  174. return self.urlopen(
  175. method,
  176. redirect_url,
  177. body,
  178. headers,
  179. retries=retries,
  180. redirect=redirect,
  181. timeout=timeout,
  182. **response_kw
  183. )
  184. # Check if we should retry the HTTP response.
  185. has_retry_after = bool(http_response.headers.get("Retry-After"))
  186. if retries.is_retry(method, http_response.status, has_retry_after):
  187. retries = retries.increment(method, url, response=http_response, _pool=self)
  188. log.debug("Retry: %s", url)
  189. retries.sleep(http_response)
  190. return self.urlopen(
  191. method,
  192. url,
  193. body=body,
  194. headers=headers,
  195. retries=retries,
  196. redirect=redirect,
  197. timeout=timeout,
  198. **response_kw
  199. )
  200. return http_response
  201. def _urlfetch_response_to_http_response(self, urlfetch_resp, **response_kw):
  202. if is_prod_appengine():
  203. # Production GAE handles deflate encoding automatically, but does
  204. # not remove the encoding header.
  205. content_encoding = urlfetch_resp.headers.get("content-encoding")
  206. if content_encoding == "deflate":
  207. del urlfetch_resp.headers["content-encoding"]
  208. transfer_encoding = urlfetch_resp.headers.get("transfer-encoding")
  209. # We have a full response's content,
  210. # so let's make sure we don't report ourselves as chunked data.
  211. if transfer_encoding == "chunked":
  212. encodings = transfer_encoding.split(",")
  213. encodings.remove("chunked")
  214. urlfetch_resp.headers["transfer-encoding"] = ",".join(encodings)
  215. original_response = HTTPResponse(
  216. # In order for decoding to work, we must present the content as
  217. # a file-like object.
  218. body=io.BytesIO(urlfetch_resp.content),
  219. msg=urlfetch_resp.header_msg,
  220. headers=urlfetch_resp.headers,
  221. status=urlfetch_resp.status_code,
  222. **response_kw
  223. )
  224. return HTTPResponse(
  225. body=io.BytesIO(urlfetch_resp.content),
  226. headers=urlfetch_resp.headers,
  227. status=urlfetch_resp.status_code,
  228. original_response=original_response,
  229. **response_kw
  230. )
  231. def _get_absolute_timeout(self, timeout):
  232. if timeout is Timeout.DEFAULT_TIMEOUT:
  233. return None # Defer to URLFetch's default.
  234. if isinstance(timeout, Timeout):
  235. if timeout._read is not None or timeout._connect is not None:
  236. warnings.warn(
  237. "URLFetch does not support granular timeout settings, "
  238. "reverting to total or default URLFetch timeout.",
  239. AppEnginePlatformWarning,
  240. )
  241. return timeout.total
  242. return timeout
  243. def _get_retries(self, retries, redirect):
  244. if not isinstance(retries, Retry):
  245. retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
  246. if retries.connect or retries.read or retries.redirect:
  247. warnings.warn(
  248. "URLFetch only supports total retries and does not "
  249. "recognize connect, read, or redirect retry parameters.",
  250. AppEnginePlatformWarning,
  251. )
  252. return retries
  253. # Alias methods from _appengine_environ to maintain public API interface.
  254. is_appengine = _appengine_environ.is_appengine
  255. is_appengine_sandbox = _appengine_environ.is_appengine_sandbox
  256. is_local_appengine = _appengine_environ.is_local_appengine
  257. is_prod_appengine = _appengine_environ.is_prod_appengine
  258. is_prod_appengine_mvms = _appengine_environ.is_prod_appengine_mvms