_manylinux.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. import collections
  2. import functools
  3. import os
  4. import re
  5. import struct
  6. import sys
  7. import warnings
  8. from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple
  9. # Python does not provide platform information at sufficient granularity to
  10. # identify the architecture of the running executable in some cases, so we
  11. # determine it dynamically by reading the information from the running
  12. # process. This only applies on Linux, which uses the ELF format.
  13. class _ELFFileHeader:
  14. # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header
  15. class _InvalidELFFileHeader(ValueError):
  16. """
  17. An invalid ELF file header was found.
  18. """
  19. ELF_MAGIC_NUMBER = 0x7F454C46
  20. ELFCLASS32 = 1
  21. ELFCLASS64 = 2
  22. ELFDATA2LSB = 1
  23. ELFDATA2MSB = 2
  24. EM_386 = 3
  25. EM_S390 = 22
  26. EM_ARM = 40
  27. EM_X86_64 = 62
  28. EF_ARM_ABIMASK = 0xFF000000
  29. EF_ARM_ABI_VER5 = 0x05000000
  30. EF_ARM_ABI_FLOAT_HARD = 0x00000400
  31. def __init__(self, file: IO[bytes]) -> None:
  32. def unpack(fmt: str) -> int:
  33. try:
  34. data = file.read(struct.calcsize(fmt))
  35. result: Tuple[int, ...] = struct.unpack(fmt, data)
  36. except struct.error:
  37. raise _ELFFileHeader._InvalidELFFileHeader()
  38. return result[0]
  39. self.e_ident_magic = unpack(">I")
  40. if self.e_ident_magic != self.ELF_MAGIC_NUMBER:
  41. raise _ELFFileHeader._InvalidELFFileHeader()
  42. self.e_ident_class = unpack("B")
  43. if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}:
  44. raise _ELFFileHeader._InvalidELFFileHeader()
  45. self.e_ident_data = unpack("B")
  46. if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}:
  47. raise _ELFFileHeader._InvalidELFFileHeader()
  48. self.e_ident_version = unpack("B")
  49. self.e_ident_osabi = unpack("B")
  50. self.e_ident_abiversion = unpack("B")
  51. self.e_ident_pad = file.read(7)
  52. format_h = "<H" if self.e_ident_data == self.ELFDATA2LSB else ">H"
  53. format_i = "<I" if self.e_ident_data == self.ELFDATA2LSB else ">I"
  54. format_q = "<Q" if self.e_ident_data == self.ELFDATA2LSB else ">Q"
  55. format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q
  56. self.e_type = unpack(format_h)
  57. self.e_machine = unpack(format_h)
  58. self.e_version = unpack(format_i)
  59. self.e_entry = unpack(format_p)
  60. self.e_phoff = unpack(format_p)
  61. self.e_shoff = unpack(format_p)
  62. self.e_flags = unpack(format_i)
  63. self.e_ehsize = unpack(format_h)
  64. self.e_phentsize = unpack(format_h)
  65. self.e_phnum = unpack(format_h)
  66. self.e_shentsize = unpack(format_h)
  67. self.e_shnum = unpack(format_h)
  68. self.e_shstrndx = unpack(format_h)
  69. def _get_elf_header() -> Optional[_ELFFileHeader]:
  70. try:
  71. with open(sys.executable, "rb") as f:
  72. elf_header = _ELFFileHeader(f)
  73. except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader):
  74. return None
  75. return elf_header
  76. def _is_linux_armhf() -> bool:
  77. # hard-float ABI can be detected from the ELF header of the running
  78. # process
  79. # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
  80. elf_header = _get_elf_header()
  81. if elf_header is None:
  82. return False
  83. result = elf_header.e_ident_class == elf_header.ELFCLASS32
  84. result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB
  85. result &= elf_header.e_machine == elf_header.EM_ARM
  86. result &= (
  87. elf_header.e_flags & elf_header.EF_ARM_ABIMASK
  88. ) == elf_header.EF_ARM_ABI_VER5
  89. result &= (
  90. elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD
  91. ) == elf_header.EF_ARM_ABI_FLOAT_HARD
  92. return result
  93. def _is_linux_i686() -> bool:
  94. elf_header = _get_elf_header()
  95. if elf_header is None:
  96. return False
  97. result = elf_header.e_ident_class == elf_header.ELFCLASS32
  98. result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB
  99. result &= elf_header.e_machine == elf_header.EM_386
  100. return result
  101. def _have_compatible_abi(arch: str) -> bool:
  102. if arch == "armv7l":
  103. return _is_linux_armhf()
  104. if arch == "i686":
  105. return _is_linux_i686()
  106. return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"}
  107. # If glibc ever changes its major version, we need to know what the last
  108. # minor version was, so we can build the complete list of all versions.
  109. # For now, guess what the highest minor version might be, assume it will
  110. # be 50 for testing. Once this actually happens, update the dictionary
  111. # with the actual value.
  112. _LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50)
  113. class _GLibCVersion(NamedTuple):
  114. major: int
  115. minor: int
  116. def _glibc_version_string_confstr() -> Optional[str]:
  117. """
  118. Primary implementation of glibc_version_string using os.confstr.
  119. """
  120. # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely
  121. # to be broken or missing. This strategy is used in the standard library
  122. # platform module.
  123. # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183
  124. try:
  125. # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17".
  126. version_string = os.confstr("CS_GNU_LIBC_VERSION")
  127. assert version_string is not None
  128. _, version = version_string.split()
  129. except (AssertionError, AttributeError, OSError, ValueError):
  130. # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
  131. return None
  132. return version
  133. def _glibc_version_string_ctypes() -> Optional[str]:
  134. """
  135. Fallback implementation of glibc_version_string using ctypes.
  136. """
  137. try:
  138. import ctypes
  139. except ImportError:
  140. return None
  141. # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen
  142. # manpage says, "If filename is NULL, then the returned handle is for the
  143. # main program". This way we can let the linker do the work to figure out
  144. # which libc our process is actually using.
  145. #
  146. # We must also handle the special case where the executable is not a
  147. # dynamically linked executable. This can occur when using musl libc,
  148. # for example. In this situation, dlopen() will error, leading to an
  149. # OSError. Interestingly, at least in the case of musl, there is no
  150. # errno set on the OSError. The single string argument used to construct
  151. # OSError comes from libc itself and is therefore not portable to
  152. # hard code here. In any case, failure to call dlopen() means we
  153. # can proceed, so we bail on our attempt.
  154. try:
  155. process_namespace = ctypes.CDLL(None)
  156. except OSError:
  157. return None
  158. try:
  159. gnu_get_libc_version = process_namespace.gnu_get_libc_version
  160. except AttributeError:
  161. # Symbol doesn't exist -> therefore, we are not linked to
  162. # glibc.
  163. return None
  164. # Call gnu_get_libc_version, which returns a string like "2.5"
  165. gnu_get_libc_version.restype = ctypes.c_char_p
  166. version_str: str = gnu_get_libc_version()
  167. # py2 / py3 compatibility:
  168. if not isinstance(version_str, str):
  169. version_str = version_str.decode("ascii")
  170. return version_str
  171. def _glibc_version_string() -> Optional[str]:
  172. """Returns glibc version string, or None if not using glibc."""
  173. return _glibc_version_string_confstr() or _glibc_version_string_ctypes()
  174. def _parse_glibc_version(version_str: str) -> Tuple[int, int]:
  175. """Parse glibc version.
  176. We use a regexp instead of str.split because we want to discard any
  177. random junk that might come after the minor version -- this might happen
  178. in patched/forked versions of glibc (e.g. Linaro's version of glibc
  179. uses version strings like "2.20-2014.11"). See gh-3588.
  180. """
  181. m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str)
  182. if not m:
  183. warnings.warn(
  184. "Expected glibc version with 2 components major.minor,"
  185. " got: %s" % version_str,
  186. RuntimeWarning,
  187. )
  188. return -1, -1
  189. return int(m.group("major")), int(m.group("minor"))
  190. @functools.lru_cache()
  191. def _get_glibc_version() -> Tuple[int, int]:
  192. version_str = _glibc_version_string()
  193. if version_str is None:
  194. return (-1, -1)
  195. return _parse_glibc_version(version_str)
  196. # From PEP 513, PEP 600
  197. def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool:
  198. sys_glibc = _get_glibc_version()
  199. if sys_glibc < version:
  200. return False
  201. # Check for presence of _manylinux module.
  202. try:
  203. import _manylinux # noqa
  204. except ImportError:
  205. return True
  206. if hasattr(_manylinux, "manylinux_compatible"):
  207. result = _manylinux.manylinux_compatible(version[0], version[1], arch)
  208. if result is not None:
  209. return bool(result)
  210. return True
  211. if version == _GLibCVersion(2, 5):
  212. if hasattr(_manylinux, "manylinux1_compatible"):
  213. return bool(_manylinux.manylinux1_compatible)
  214. if version == _GLibCVersion(2, 12):
  215. if hasattr(_manylinux, "manylinux2010_compatible"):
  216. return bool(_manylinux.manylinux2010_compatible)
  217. if version == _GLibCVersion(2, 17):
  218. if hasattr(_manylinux, "manylinux2014_compatible"):
  219. return bool(_manylinux.manylinux2014_compatible)
  220. return True
  221. _LEGACY_MANYLINUX_MAP = {
  222. # CentOS 7 w/ glibc 2.17 (PEP 599)
  223. (2, 17): "manylinux2014",
  224. # CentOS 6 w/ glibc 2.12 (PEP 571)
  225. (2, 12): "manylinux2010",
  226. # CentOS 5 w/ glibc 2.5 (PEP 513)
  227. (2, 5): "manylinux1",
  228. }
  229. def platform_tags(linux: str, arch: str) -> Iterator[str]:
  230. if not _have_compatible_abi(arch):
  231. return
  232. # Oldest glibc to be supported regardless of architecture is (2, 17).
  233. too_old_glibc2 = _GLibCVersion(2, 16)
  234. if arch in {"x86_64", "i686"}:
  235. # On x86/i686 also oldest glibc to be supported is (2, 5).
  236. too_old_glibc2 = _GLibCVersion(2, 4)
  237. current_glibc = _GLibCVersion(*_get_glibc_version())
  238. glibc_max_list = [current_glibc]
  239. # We can assume compatibility across glibc major versions.
  240. # https://sourceware.org/bugzilla/show_bug.cgi?id=24636
  241. #
  242. # Build a list of maximum glibc versions so that we can
  243. # output the canonical list of all glibc from current_glibc
  244. # down to too_old_glibc2, including all intermediary versions.
  245. for glibc_major in range(current_glibc.major - 1, 1, -1):
  246. glibc_minor = _LAST_GLIBC_MINOR[glibc_major]
  247. glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor))
  248. for glibc_max in glibc_max_list:
  249. if glibc_max.major == too_old_glibc2.major:
  250. min_minor = too_old_glibc2.minor
  251. else:
  252. # For other glibc major versions oldest supported is (x, 0).
  253. min_minor = -1
  254. for glibc_minor in range(glibc_max.minor, min_minor, -1):
  255. glibc_version = _GLibCVersion(glibc_max.major, glibc_minor)
  256. tag = "manylinux_{}_{}".format(*glibc_version)
  257. if _is_compatible(tag, arch, glibc_version):
  258. yield linux.replace("linux", tag)
  259. # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags.
  260. if glibc_version in _LEGACY_MANYLINUX_MAP:
  261. legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version]
  262. if _is_compatible(legacy_tag, arch, glibc_version):
  263. yield linux.replace("linux", legacy_tag)