formats.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. import logging
  2. import os
  3. import re
  4. import string
  5. import typing
  6. from itertools import chain as _chain
  7. _logger = logging.getLogger(__name__)
  8. # -------------------------------------------------------------------------------------
  9. # PEP 440
  10. VERSION_PATTERN = r"""
  11. v?
  12. (?:
  13. (?:(?P<epoch>[0-9]+)!)? # epoch
  14. (?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
  15. (?P<pre> # pre-release
  16. [-_\.]?
  17. (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
  18. [-_\.]?
  19. (?P<pre_n>[0-9]+)?
  20. )?
  21. (?P<post> # post release
  22. (?:-(?P<post_n1>[0-9]+))
  23. |
  24. (?:
  25. [-_\.]?
  26. (?P<post_l>post|rev|r)
  27. [-_\.]?
  28. (?P<post_n2>[0-9]+)?
  29. )
  30. )?
  31. (?P<dev> # dev release
  32. [-_\.]?
  33. (?P<dev_l>dev)
  34. [-_\.]?
  35. (?P<dev_n>[0-9]+)?
  36. )?
  37. )
  38. (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
  39. """
  40. VERSION_REGEX = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.X | re.I)
  41. def pep440(version: str) -> bool:
  42. return VERSION_REGEX.match(version) is not None
  43. # -------------------------------------------------------------------------------------
  44. # PEP 508
  45. PEP508_IDENTIFIER_PATTERN = r"([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])"
  46. PEP508_IDENTIFIER_REGEX = re.compile(f"^{PEP508_IDENTIFIER_PATTERN}$", re.I)
  47. def pep508_identifier(name: str) -> bool:
  48. return PEP508_IDENTIFIER_REGEX.match(name) is not None
  49. try:
  50. try:
  51. from packaging import requirements as _req
  52. except ImportError: # pragma: no cover
  53. # let's try setuptools vendored version
  54. from setuptools._vendor.packaging import requirements as _req # type: ignore
  55. def pep508(value: str) -> bool:
  56. try:
  57. _req.Requirement(value)
  58. return True
  59. except _req.InvalidRequirement:
  60. return False
  61. except ImportError: # pragma: no cover
  62. _logger.warning(
  63. "Could not find an installation of `packaging`. Requirements, dependencies and "
  64. "versions might not be validated. "
  65. "To enforce validation, please install `packaging`."
  66. )
  67. def pep508(value: str) -> bool:
  68. return True
  69. def pep508_versionspec(value: str) -> bool:
  70. """Expression that can be used to specify/lock versions (including ranges)"""
  71. if any(c in value for c in (";", "]", "@")):
  72. # In PEP 508:
  73. # conditional markers, extras and URL specs are not included in the
  74. # versionspec
  75. return False
  76. # Let's pretend we have a dependency called `requirement` with the given
  77. # version spec, then we can re-use the pep508 function for validation:
  78. return pep508(f"requirement{value}")
  79. # -------------------------------------------------------------------------------------
  80. # PEP 517
  81. def pep517_backend_reference(value: str) -> bool:
  82. module, _, obj = value.partition(":")
  83. identifiers = (i.strip() for i in _chain(module.split("."), obj.split(".")))
  84. return all(python_identifier(i) for i in identifiers if i)
  85. # -------------------------------------------------------------------------------------
  86. # Classifiers - PEP 301
  87. def _download_classifiers() -> str:
  88. import ssl
  89. from email.message import Message
  90. from urllib.request import urlopen
  91. url = "https://pypi.org/pypi?:action=list_classifiers"
  92. context = ssl.create_default_context()
  93. with urlopen(url, context=context) as response:
  94. headers = Message()
  95. headers["content_type"] = response.getheader("content-type", "text/plain")
  96. return response.read().decode(headers.get_param("charset", "utf-8"))
  97. class _TroveClassifier:
  98. """The ``trove_classifiers`` package is the official way of validating classifiers,
  99. however this package might not be always available.
  100. As a workaround we can still download a list from PyPI.
  101. We also don't want to be over strict about it, so simply skipping silently is an
  102. option (classifiers will be validated anyway during the upload to PyPI).
  103. """
  104. def __init__(self):
  105. self.downloaded: typing.Union[None, False, typing.Set[str]] = None
  106. self._skip_download = False
  107. # None => not cached yet
  108. # False => cache not available
  109. self.__name__ = "trove_classifier" # Emulate a public function
  110. def _disable_download(self):
  111. # This is a private API. Only setuptools has the consent of using it.
  112. self._skip_download = True
  113. def __call__(self, value: str) -> bool:
  114. if self.downloaded is False or self._skip_download is True:
  115. return True
  116. if os.getenv("NO_NETWORK") or os.getenv("VALIDATE_PYPROJECT_NO_NETWORK"):
  117. self.downloaded = False
  118. msg = (
  119. "Install ``trove-classifiers`` to ensure proper validation. "
  120. "Skipping download of classifiers list from PyPI (NO_NETWORK)."
  121. )
  122. _logger.debug(msg)
  123. return True
  124. if self.downloaded is None:
  125. msg = (
  126. "Install ``trove-classifiers`` to ensure proper validation. "
  127. "Meanwhile a list of classifiers will be downloaded from PyPI."
  128. )
  129. _logger.debug(msg)
  130. try:
  131. self.downloaded = set(_download_classifiers().splitlines())
  132. except Exception:
  133. self.downloaded = False
  134. _logger.debug("Problem with download, skipping validation")
  135. return True
  136. return value in self.downloaded or value.lower().startswith("private ::")
  137. try:
  138. from trove_classifiers import classifiers as _trove_classifiers
  139. def trove_classifier(value: str) -> bool:
  140. return value in _trove_classifiers or value.lower().startswith("private ::")
  141. except ImportError: # pragma: no cover
  142. trove_classifier = _TroveClassifier()
  143. # -------------------------------------------------------------------------------------
  144. # Non-PEP related
  145. def url(value: str) -> bool:
  146. from urllib.parse import urlparse
  147. try:
  148. parts = urlparse(value)
  149. if not parts.scheme:
  150. _logger.warning(
  151. "For maximum compatibility please make sure to include a "
  152. "`scheme` prefix in your URL (e.g. 'http://'). "
  153. f"Given value: {value}"
  154. )
  155. if not (value.startswith("/") or value.startswith("\\") or "@" in value):
  156. parts = urlparse(f"http://{value}")
  157. return bool(parts.scheme and parts.netloc)
  158. except Exception:
  159. return False
  160. # https://packaging.python.org/specifications/entry-points/
  161. ENTRYPOINT_PATTERN = r"[^\[\s=]([^=]*[^\s=])?"
  162. ENTRYPOINT_REGEX = re.compile(f"^{ENTRYPOINT_PATTERN}$", re.I)
  163. RECOMMEDED_ENTRYPOINT_PATTERN = r"[\w.-]+"
  164. RECOMMEDED_ENTRYPOINT_REGEX = re.compile(f"^{RECOMMEDED_ENTRYPOINT_PATTERN}$", re.I)
  165. ENTRYPOINT_GROUP_PATTERN = r"\w+(\.\w+)*"
  166. ENTRYPOINT_GROUP_REGEX = re.compile(f"^{ENTRYPOINT_GROUP_PATTERN}$", re.I)
  167. def python_identifier(value: str) -> bool:
  168. return value.isidentifier()
  169. def python_qualified_identifier(value: str) -> bool:
  170. if value.startswith(".") or value.endswith("."):
  171. return False
  172. return all(python_identifier(m) for m in value.split("."))
  173. def python_module_name(value: str) -> bool:
  174. return python_qualified_identifier(value)
  175. def python_entrypoint_group(value: str) -> bool:
  176. return ENTRYPOINT_GROUP_REGEX.match(value) is not None
  177. def python_entrypoint_name(value: str) -> bool:
  178. if not ENTRYPOINT_REGEX.match(value):
  179. return False
  180. if not RECOMMEDED_ENTRYPOINT_REGEX.match(value):
  181. msg = f"Entry point `{value}` does not follow recommended pattern: "
  182. msg += RECOMMEDED_ENTRYPOINT_PATTERN
  183. _logger.warning(msg)
  184. return True
  185. def python_entrypoint_reference(value: str) -> bool:
  186. module, _, rest = value.partition(":")
  187. if "[" in rest:
  188. obj, _, extras_ = rest.partition("[")
  189. if extras_.strip()[-1] != "]":
  190. return False
  191. extras = (x.strip() for x in extras_.strip(string.whitespace + "[]").split(","))
  192. if not all(pep508_identifier(e) for e in extras):
  193. return False
  194. _logger.warning(f"`{value}` - using extras for entry points is not recommended")
  195. else:
  196. obj = rest
  197. module_parts = module.split(".")
  198. identifiers = _chain(module_parts, obj.split(".")) if rest else module_parts
  199. return all(python_identifier(i.strip()) for i in identifiers)