index.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Copyright (C) 2013 Vinay Sajip.
  4. # Licensed to the Python Software Foundation under a contributor agreement.
  5. # See LICENSE.txt and CONTRIBUTORS.txt.
  6. #
  7. import hashlib
  8. import logging
  9. import os
  10. import shutil
  11. import subprocess
  12. import tempfile
  13. try:
  14. from threading import Thread
  15. except ImportError: # pragma: no cover
  16. from dummy_threading import Thread
  17. from . import DistlibException
  18. from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
  19. urlparse, build_opener, string_types)
  20. from .util import zip_dir, ServerProxy
  21. logger = logging.getLogger(__name__)
  22. DEFAULT_INDEX = 'https://pypi.org/pypi'
  23. DEFAULT_REALM = 'pypi'
  24. class PackageIndex(object):
  25. """
  26. This class represents a package index compatible with PyPI, the Python
  27. Package Index.
  28. """
  29. boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
  30. def __init__(self, url=None):
  31. """
  32. Initialise an instance.
  33. :param url: The URL of the index. If not specified, the URL for PyPI is
  34. used.
  35. """
  36. self.url = url or DEFAULT_INDEX
  37. self.read_configuration()
  38. scheme, netloc, path, params, query, frag = urlparse(self.url)
  39. if params or query or frag or scheme not in ('http', 'https'):
  40. raise DistlibException('invalid repository: %s' % self.url)
  41. self.password_handler = None
  42. self.ssl_verifier = None
  43. self.gpg = None
  44. self.gpg_home = None
  45. with open(os.devnull, 'w') as sink:
  46. # Use gpg by default rather than gpg2, as gpg2 insists on
  47. # prompting for passwords
  48. for s in ('gpg', 'gpg2'):
  49. try:
  50. rc = subprocess.check_call([s, '--version'], stdout=sink,
  51. stderr=sink)
  52. if rc == 0:
  53. self.gpg = s
  54. break
  55. except OSError:
  56. pass
  57. def _get_pypirc_command(self):
  58. """
  59. Get the distutils command for interacting with PyPI configurations.
  60. :return: the command.
  61. """
  62. from .util import _get_pypirc_command as cmd
  63. return cmd()
  64. def read_configuration(self):
  65. """
  66. Read the PyPI access configuration as supported by distutils. This populates
  67. ``username``, ``password``, ``realm`` and ``url`` attributes from the
  68. configuration.
  69. """
  70. from .util import _load_pypirc
  71. cfg = _load_pypirc(self)
  72. self.username = cfg.get('username')
  73. self.password = cfg.get('password')
  74. self.realm = cfg.get('realm', 'pypi')
  75. self.url = cfg.get('repository', self.url)
  76. def save_configuration(self):
  77. """
  78. Save the PyPI access configuration. You must have set ``username`` and
  79. ``password`` attributes before calling this method.
  80. """
  81. self.check_credentials()
  82. from .util import _store_pypirc
  83. _store_pypirc(self)
  84. def check_credentials(self):
  85. """
  86. Check that ``username`` and ``password`` have been set, and raise an
  87. exception if not.
  88. """
  89. if self.username is None or self.password is None:
  90. raise DistlibException('username and password must be set')
  91. pm = HTTPPasswordMgr()
  92. _, netloc, _, _, _, _ = urlparse(self.url)
  93. pm.add_password(self.realm, netloc, self.username, self.password)
  94. self.password_handler = HTTPBasicAuthHandler(pm)
  95. def register(self, metadata): # pragma: no cover
  96. """
  97. Register a distribution on PyPI, using the provided metadata.
  98. :param metadata: A :class:`Metadata` instance defining at least a name
  99. and version number for the distribution to be
  100. registered.
  101. :return: The HTTP response received from PyPI upon submission of the
  102. request.
  103. """
  104. self.check_credentials()
  105. metadata.validate()
  106. d = metadata.todict()
  107. d[':action'] = 'verify'
  108. request = self.encode_request(d.items(), [])
  109. response = self.send_request(request)
  110. d[':action'] = 'submit'
  111. request = self.encode_request(d.items(), [])
  112. return self.send_request(request)
  113. def _reader(self, name, stream, outbuf):
  114. """
  115. Thread runner for reading lines of from a subprocess into a buffer.
  116. :param name: The logical name of the stream (used for logging only).
  117. :param stream: The stream to read from. This will typically a pipe
  118. connected to the output stream of a subprocess.
  119. :param outbuf: The list to append the read lines to.
  120. """
  121. while True:
  122. s = stream.readline()
  123. if not s:
  124. break
  125. s = s.decode('utf-8').rstrip()
  126. outbuf.append(s)
  127. logger.debug('%s: %s' % (name, s))
  128. stream.close()
  129. def get_sign_command(self, filename, signer, sign_password, keystore=None): # pragma: no cover
  130. """
  131. Return a suitable command for signing a file.
  132. :param filename: The pathname to the file to be signed.
  133. :param signer: The identifier of the signer of the file.
  134. :param sign_password: The passphrase for the signer's
  135. private key used for signing.
  136. :param keystore: The path to a directory which contains the keys
  137. used in verification. If not specified, the
  138. instance's ``gpg_home`` attribute is used instead.
  139. :return: The signing command as a list suitable to be
  140. passed to :class:`subprocess.Popen`.
  141. """
  142. cmd = [self.gpg, '--status-fd', '2', '--no-tty']
  143. if keystore is None:
  144. keystore = self.gpg_home
  145. if keystore:
  146. cmd.extend(['--homedir', keystore])
  147. if sign_password is not None:
  148. cmd.extend(['--batch', '--passphrase-fd', '0'])
  149. td = tempfile.mkdtemp()
  150. sf = os.path.join(td, os.path.basename(filename) + '.asc')
  151. cmd.extend(['--detach-sign', '--armor', '--local-user',
  152. signer, '--output', sf, filename])
  153. logger.debug('invoking: %s', ' '.join(cmd))
  154. return cmd, sf
  155. def run_command(self, cmd, input_data=None):
  156. """
  157. Run a command in a child process , passing it any input data specified.
  158. :param cmd: The command to run.
  159. :param input_data: If specified, this must be a byte string containing
  160. data to be sent to the child process.
  161. :return: A tuple consisting of the subprocess' exit code, a list of
  162. lines read from the subprocess' ``stdout``, and a list of
  163. lines read from the subprocess' ``stderr``.
  164. """
  165. kwargs = {
  166. 'stdout': subprocess.PIPE,
  167. 'stderr': subprocess.PIPE,
  168. }
  169. if input_data is not None:
  170. kwargs['stdin'] = subprocess.PIPE
  171. stdout = []
  172. stderr = []
  173. p = subprocess.Popen(cmd, **kwargs)
  174. # We don't use communicate() here because we may need to
  175. # get clever with interacting with the command
  176. t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
  177. t1.start()
  178. t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
  179. t2.start()
  180. if input_data is not None:
  181. p.stdin.write(input_data)
  182. p.stdin.close()
  183. p.wait()
  184. t1.join()
  185. t2.join()
  186. return p.returncode, stdout, stderr
  187. def sign_file(self, filename, signer, sign_password, keystore=None): # pragma: no cover
  188. """
  189. Sign a file.
  190. :param filename: The pathname to the file to be signed.
  191. :param signer: The identifier of the signer of the file.
  192. :param sign_password: The passphrase for the signer's
  193. private key used for signing.
  194. :param keystore: The path to a directory which contains the keys
  195. used in signing. If not specified, the instance's
  196. ``gpg_home`` attribute is used instead.
  197. :return: The absolute pathname of the file where the signature is
  198. stored.
  199. """
  200. cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
  201. keystore)
  202. rc, stdout, stderr = self.run_command(cmd,
  203. sign_password.encode('utf-8'))
  204. if rc != 0:
  205. raise DistlibException('sign command failed with error '
  206. 'code %s' % rc)
  207. return sig_file
  208. def upload_file(self, metadata, filename, signer=None, sign_password=None,
  209. filetype='sdist', pyversion='source', keystore=None):
  210. """
  211. Upload a release file to the index.
  212. :param metadata: A :class:`Metadata` instance defining at least a name
  213. and version number for the file to be uploaded.
  214. :param filename: The pathname of the file to be uploaded.
  215. :param signer: The identifier of the signer of the file.
  216. :param sign_password: The passphrase for the signer's
  217. private key used for signing.
  218. :param filetype: The type of the file being uploaded. This is the
  219. distutils command which produced that file, e.g.
  220. ``sdist`` or ``bdist_wheel``.
  221. :param pyversion: The version of Python which the release relates
  222. to. For code compatible with any Python, this would
  223. be ``source``, otherwise it would be e.g. ``3.2``.
  224. :param keystore: The path to a directory which contains the keys
  225. used in signing. If not specified, the instance's
  226. ``gpg_home`` attribute is used instead.
  227. :return: The HTTP response received from PyPI upon submission of the
  228. request.
  229. """
  230. self.check_credentials()
  231. if not os.path.exists(filename):
  232. raise DistlibException('not found: %s' % filename)
  233. metadata.validate()
  234. d = metadata.todict()
  235. sig_file = None
  236. if signer:
  237. if not self.gpg:
  238. logger.warning('no signing program available - not signed')
  239. else:
  240. sig_file = self.sign_file(filename, signer, sign_password,
  241. keystore)
  242. with open(filename, 'rb') as f:
  243. file_data = f.read()
  244. md5_digest = hashlib.md5(file_data).hexdigest()
  245. sha256_digest = hashlib.sha256(file_data).hexdigest()
  246. d.update({
  247. ':action': 'file_upload',
  248. 'protocol_version': '1',
  249. 'filetype': filetype,
  250. 'pyversion': pyversion,
  251. 'md5_digest': md5_digest,
  252. 'sha256_digest': sha256_digest,
  253. })
  254. files = [('content', os.path.basename(filename), file_data)]
  255. if sig_file:
  256. with open(sig_file, 'rb') as f:
  257. sig_data = f.read()
  258. files.append(('gpg_signature', os.path.basename(sig_file),
  259. sig_data))
  260. shutil.rmtree(os.path.dirname(sig_file))
  261. request = self.encode_request(d.items(), files)
  262. return self.send_request(request)
  263. def upload_documentation(self, metadata, doc_dir): # pragma: no cover
  264. """
  265. Upload documentation to the index.
  266. :param metadata: A :class:`Metadata` instance defining at least a name
  267. and version number for the documentation to be
  268. uploaded.
  269. :param doc_dir: The pathname of the directory which contains the
  270. documentation. This should be the directory that
  271. contains the ``index.html`` for the documentation.
  272. :return: The HTTP response received from PyPI upon submission of the
  273. request.
  274. """
  275. self.check_credentials()
  276. if not os.path.isdir(doc_dir):
  277. raise DistlibException('not a directory: %r' % doc_dir)
  278. fn = os.path.join(doc_dir, 'index.html')
  279. if not os.path.exists(fn):
  280. raise DistlibException('not found: %r' % fn)
  281. metadata.validate()
  282. name, version = metadata.name, metadata.version
  283. zip_data = zip_dir(doc_dir).getvalue()
  284. fields = [(':action', 'doc_upload'),
  285. ('name', name), ('version', version)]
  286. files = [('content', name, zip_data)]
  287. request = self.encode_request(fields, files)
  288. return self.send_request(request)
  289. def get_verify_command(self, signature_filename, data_filename,
  290. keystore=None):
  291. """
  292. Return a suitable command for verifying a file.
  293. :param signature_filename: The pathname to the file containing the
  294. signature.
  295. :param data_filename: The pathname to the file containing the
  296. signed data.
  297. :param keystore: The path to a directory which contains the keys
  298. used in verification. If not specified, the
  299. instance's ``gpg_home`` attribute is used instead.
  300. :return: The verifying command as a list suitable to be
  301. passed to :class:`subprocess.Popen`.
  302. """
  303. cmd = [self.gpg, '--status-fd', '2', '--no-tty']
  304. if keystore is None:
  305. keystore = self.gpg_home
  306. if keystore:
  307. cmd.extend(['--homedir', keystore])
  308. cmd.extend(['--verify', signature_filename, data_filename])
  309. logger.debug('invoking: %s', ' '.join(cmd))
  310. return cmd
  311. def verify_signature(self, signature_filename, data_filename,
  312. keystore=None):
  313. """
  314. Verify a signature for a file.
  315. :param signature_filename: The pathname to the file containing the
  316. signature.
  317. :param data_filename: The pathname to the file containing the
  318. signed data.
  319. :param keystore: The path to a directory which contains the keys
  320. used in verification. If not specified, the
  321. instance's ``gpg_home`` attribute is used instead.
  322. :return: True if the signature was verified, else False.
  323. """
  324. if not self.gpg:
  325. raise DistlibException('verification unavailable because gpg '
  326. 'unavailable')
  327. cmd = self.get_verify_command(signature_filename, data_filename,
  328. keystore)
  329. rc, stdout, stderr = self.run_command(cmd)
  330. if rc not in (0, 1):
  331. raise DistlibException('verify command failed with error '
  332. 'code %s' % rc)
  333. return rc == 0
  334. def download_file(self, url, destfile, digest=None, reporthook=None):
  335. """
  336. This is a convenience method for downloading a file from an URL.
  337. Normally, this will be a file from the index, though currently
  338. no check is made for this (i.e. a file can be downloaded from
  339. anywhere).
  340. The method is just like the :func:`urlretrieve` function in the
  341. standard library, except that it allows digest computation to be
  342. done during download and checking that the downloaded data
  343. matched any expected value.
  344. :param url: The URL of the file to be downloaded (assumed to be
  345. available via an HTTP GET request).
  346. :param destfile: The pathname where the downloaded file is to be
  347. saved.
  348. :param digest: If specified, this must be a (hasher, value)
  349. tuple, where hasher is the algorithm used (e.g.
  350. ``'md5'``) and ``value`` is the expected value.
  351. :param reporthook: The same as for :func:`urlretrieve` in the
  352. standard library.
  353. """
  354. if digest is None:
  355. digester = None
  356. logger.debug('No digest specified')
  357. else:
  358. if isinstance(digest, (list, tuple)):
  359. hasher, digest = digest
  360. else:
  361. hasher = 'md5'
  362. digester = getattr(hashlib, hasher)()
  363. logger.debug('Digest specified: %s' % digest)
  364. # The following code is equivalent to urlretrieve.
  365. # We need to do it this way so that we can compute the
  366. # digest of the file as we go.
  367. with open(destfile, 'wb') as dfp:
  368. # addinfourl is not a context manager on 2.x
  369. # so we have to use try/finally
  370. sfp = self.send_request(Request(url))
  371. try:
  372. headers = sfp.info()
  373. blocksize = 8192
  374. size = -1
  375. read = 0
  376. blocknum = 0
  377. if "content-length" in headers:
  378. size = int(headers["Content-Length"])
  379. if reporthook:
  380. reporthook(blocknum, blocksize, size)
  381. while True:
  382. block = sfp.read(blocksize)
  383. if not block:
  384. break
  385. read += len(block)
  386. dfp.write(block)
  387. if digester:
  388. digester.update(block)
  389. blocknum += 1
  390. if reporthook:
  391. reporthook(blocknum, blocksize, size)
  392. finally:
  393. sfp.close()
  394. # check that we got the whole file, if we can
  395. if size >= 0 and read < size:
  396. raise DistlibException(
  397. 'retrieval incomplete: got only %d out of %d bytes'
  398. % (read, size))
  399. # if we have a digest, it must match.
  400. if digester:
  401. actual = digester.hexdigest()
  402. if digest != actual:
  403. raise DistlibException('%s digest mismatch for %s: expected '
  404. '%s, got %s' % (hasher, destfile,
  405. digest, actual))
  406. logger.debug('Digest verified: %s', digest)
  407. def send_request(self, req):
  408. """
  409. Send a standard library :class:`Request` to PyPI and return its
  410. response.
  411. :param req: The request to send.
  412. :return: The HTTP response from PyPI (a standard library HTTPResponse).
  413. """
  414. handlers = []
  415. if self.password_handler:
  416. handlers.append(self.password_handler)
  417. if self.ssl_verifier:
  418. handlers.append(self.ssl_verifier)
  419. opener = build_opener(*handlers)
  420. return opener.open(req)
  421. def encode_request(self, fields, files):
  422. """
  423. Encode fields and files for posting to an HTTP server.
  424. :param fields: The fields to send as a list of (fieldname, value)
  425. tuples.
  426. :param files: The files to send as a list of (fieldname, filename,
  427. file_bytes) tuple.
  428. """
  429. # Adapted from packaging, which in turn was adapted from
  430. # http://code.activestate.com/recipes/146306
  431. parts = []
  432. boundary = self.boundary
  433. for k, values in fields:
  434. if not isinstance(values, (list, tuple)):
  435. values = [values]
  436. for v in values:
  437. parts.extend((
  438. b'--' + boundary,
  439. ('Content-Disposition: form-data; name="%s"' %
  440. k).encode('utf-8'),
  441. b'',
  442. v.encode('utf-8')))
  443. for key, filename, value in files:
  444. parts.extend((
  445. b'--' + boundary,
  446. ('Content-Disposition: form-data; name="%s"; filename="%s"' %
  447. (key, filename)).encode('utf-8'),
  448. b'',
  449. value))
  450. parts.extend((b'--' + boundary + b'--', b''))
  451. body = b'\r\n'.join(parts)
  452. ct = b'multipart/form-data; boundary=' + boundary
  453. headers = {
  454. 'Content-type': ct,
  455. 'Content-length': str(len(body))
  456. }
  457. return Request(self.url, body, headers)
  458. def search(self, terms, operator=None): # pragma: no cover
  459. if isinstance(terms, string_types):
  460. terms = {'name': terms}
  461. rpc_proxy = ServerProxy(self.url, timeout=3.0)
  462. try:
  463. return rpc_proxy.search(terms, operator or 'and')
  464. finally:
  465. rpc_proxy('close')()