123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600 |
- """Automatic discovery of Python modules and packages (for inclusion in the
- distribution) and other config values.
- For the purposes of this module, the following nomenclature is used:
- - "src-layout": a directory representing a Python project that contains a "src"
- folder. Everything under the "src" folder is meant to be included in the
- distribution when packaging the project. Example::
- .
- ├── tox.ini
- ├── pyproject.toml
- └── src/
- └── mypkg/
- ├── __init__.py
- ├── mymodule.py
- └── my_data_file.txt
- - "flat-layout": a Python project that does not use "src-layout" but instead
- have a directory under the project root for each package::
- .
- ├── tox.ini
- ├── pyproject.toml
- └── mypkg/
- ├── __init__.py
- ├── mymodule.py
- └── my_data_file.txt
- - "single-module": a project that contains a single Python script direct under
- the project root (no directory used)::
- .
- ├── tox.ini
- ├── pyproject.toml
- └── mymodule.py
- """
- import itertools
- import os
- from fnmatch import fnmatchcase
- from glob import glob
- from pathlib import Path
- from typing import (
- TYPE_CHECKING,
- Callable,
- Dict,
- Iterable,
- Iterator,
- List,
- Mapping,
- Optional,
- Tuple,
- Union
- )
- import _distutils_hack.override # noqa: F401
- from distutils import log
- from distutils.util import convert_path
- _Path = Union[str, os.PathLike]
- _Filter = Callable[[str], bool]
- StrIter = Iterator[str]
- chain_iter = itertools.chain.from_iterable
- if TYPE_CHECKING:
- from setuptools import Distribution # noqa
- def _valid_name(path: _Path) -> bool:
- # Ignore invalid names that cannot be imported directly
- return os.path.basename(path).isidentifier()
- class _Finder:
- """Base class that exposes functionality for module/package finders"""
- ALWAYS_EXCLUDE: Tuple[str, ...] = ()
- DEFAULT_EXCLUDE: Tuple[str, ...] = ()
- @classmethod
- def find(
- cls,
- where: _Path = '.',
- exclude: Iterable[str] = (),
- include: Iterable[str] = ('*',)
- ) -> List[str]:
- """Return a list of all Python items (packages or modules, depending on
- the finder implementation) found within directory 'where'.
- 'where' is the root directory which will be searched.
- It should be supplied as a "cross-platform" (i.e. URL-style) path;
- it will be converted to the appropriate local path syntax.
- 'exclude' is a sequence of names to exclude; '*' can be used
- as a wildcard in the names.
- When finding packages, 'foo.*' will exclude all subpackages of 'foo'
- (but not 'foo' itself).
- 'include' is a sequence of names to include.
- If it's specified, only the named items will be included.
- If it's not specified, all found items will be included.
- 'include' can contain shell style wildcard patterns just like
- 'exclude'.
- """
- exclude = exclude or cls.DEFAULT_EXCLUDE
- return list(
- cls._find_iter(
- convert_path(str(where)),
- cls._build_filter(*cls.ALWAYS_EXCLUDE, *exclude),
- cls._build_filter(*include),
- )
- )
- @classmethod
- def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
- raise NotImplementedError
- @staticmethod
- def _build_filter(*patterns: str) -> _Filter:
- """
- Given a list of patterns, return a callable that will be true only if
- the input matches at least one of the patterns.
- """
- return lambda name: any(fnmatchcase(name, pat) for pat in patterns)
- class PackageFinder(_Finder):
- """
- Generate a list of all Python packages found within a directory
- """
- ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__")
- @classmethod
- def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
- """
- All the packages found in 'where' that pass the 'include' filter, but
- not the 'exclude' filter.
- """
- for root, dirs, files in os.walk(str(where), followlinks=True):
- # Copy dirs to iterate over it, then empty dirs.
- all_dirs = dirs[:]
- dirs[:] = []
- for dir in all_dirs:
- full_path = os.path.join(root, dir)
- rel_path = os.path.relpath(full_path, where)
- package = rel_path.replace(os.path.sep, '.')
- # Skip directory trees that are not valid packages
- if '.' in dir or not cls._looks_like_package(full_path, package):
- continue
- # Should this package be included?
- if include(package) and not exclude(package):
- yield package
- # Keep searching subdirectories, as there may be more packages
- # down there, even if the parent was excluded.
- dirs.append(dir)
- @staticmethod
- def _looks_like_package(path: _Path, _package_name: str) -> bool:
- """Does a directory look like a package?"""
- return os.path.isfile(os.path.join(path, '__init__.py'))
- class PEP420PackageFinder(PackageFinder):
- @staticmethod
- def _looks_like_package(_path: _Path, _package_name: str) -> bool:
- return True
- class ModuleFinder(_Finder):
- """Find isolated Python modules.
- This function will **not** recurse subdirectories.
- """
- @classmethod
- def _find_iter(cls, where: _Path, exclude: _Filter, include: _Filter) -> StrIter:
- for file in glob(os.path.join(where, "*.py")):
- module, _ext = os.path.splitext(os.path.basename(file))
- if not cls._looks_like_module(module):
- continue
- if include(module) and not exclude(module):
- yield module
- _looks_like_module = staticmethod(_valid_name)
- # We have to be extra careful in the case of flat layout to not include files
- # and directories not meant for distribution (e.g. tool-related)
- class FlatLayoutPackageFinder(PEP420PackageFinder):
- _EXCLUDE = (
- "ci",
- "bin",
- "doc",
- "docs",
- "documentation",
- "manpages",
- "news",
- "changelog",
- "test",
- "tests",
- "unit_test",
- "unit_tests",
- "example",
- "examples",
- "scripts",
- "tools",
- "util",
- "utils",
- "python",
- "build",
- "dist",
- "venv",
- "env",
- "requirements",
- # ---- Task runners / Build tools ----
- "tasks", # invoke
- "fabfile", # fabric
- "site_scons", # SCons
- # ---- Other tools ----
- "benchmark",
- "benchmarks",
- "exercise",
- "exercises",
- # ---- Hidden directories/Private packages ----
- "[._]*",
- )
- DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE))
- """Reserved package names"""
- @staticmethod
- def _looks_like_package(_path: _Path, package_name: str) -> bool:
- names = package_name.split('.')
- # Consider PEP 561
- root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs")
- return root_pkg_is_valid and all(name.isidentifier() for name in names[1:])
- class FlatLayoutModuleFinder(ModuleFinder):
- DEFAULT_EXCLUDE = (
- "setup",
- "conftest",
- "test",
- "tests",
- "example",
- "examples",
- "build",
- # ---- Task runners ----
- "toxfile",
- "noxfile",
- "pavement",
- "dodo",
- "tasks",
- "fabfile",
- # ---- Other tools ----
- "[Ss][Cc]onstruct", # SCons
- "conanfile", # Connan: C/C++ build tool
- "manage", # Django
- "benchmark",
- "benchmarks",
- "exercise",
- "exercises",
- # ---- Hidden files/Private modules ----
- "[._]*",
- )
- """Reserved top-level module names"""
- def _find_packages_within(root_pkg: str, pkg_dir: _Path) -> List[str]:
- nested = PEP420PackageFinder.find(pkg_dir)
- return [root_pkg] + [".".join((root_pkg, n)) for n in nested]
- class ConfigDiscovery:
- """Fill-in metadata and options that can be automatically derived
- (from other metadata/options, the file system or conventions)
- """
- def __init__(self, distribution: "Distribution"):
- self.dist = distribution
- self._called = False
- self._disabled = False
- self._skip_ext_modules = False
- def _disable(self):
- """Internal API to disable automatic discovery"""
- self._disabled = True
- def _ignore_ext_modules(self):
- """Internal API to disregard ext_modules.
- Normally auto-discovery would not be triggered if ``ext_modules`` are set
- (this is done for backward compatibility with existing packages relying on
- ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function
- to ignore given ``ext_modules`` and proceed with the auto-discovery if
- ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml
- metadata).
- """
- self._skip_ext_modules = True
- @property
- def _root_dir(self) -> _Path:
- # The best is to wait until `src_root` is set in dist, before using _root_dir.
- return self.dist.src_root or os.curdir
- @property
- def _package_dir(self) -> Dict[str, str]:
- if self.dist.package_dir is None:
- return {}
- return self.dist.package_dir
- def __call__(self, force=False, name=True, ignore_ext_modules=False):
- """Automatically discover missing configuration fields
- and modifies the given ``distribution`` object in-place.
- Note that by default this will only have an effect the first time the
- ``ConfigDiscovery`` object is called.
- To repeatedly invoke automatic discovery (e.g. when the project
- directory changes), please use ``force=True`` (or create a new
- ``ConfigDiscovery`` instance).
- """
- if force is False and (self._called or self._disabled):
- # Avoid overhead of multiple calls
- return
- self._analyse_package_layout(ignore_ext_modules)
- if name:
- self.analyse_name() # depends on ``packages`` and ``py_modules``
- self._called = True
- def _explicitly_specified(self, ignore_ext_modules: bool) -> bool:
- """``True`` if the user has specified some form of package/module listing"""
- ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules
- ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules)
- return (
- self.dist.packages is not None
- or self.dist.py_modules is not None
- or ext_modules
- or hasattr(self.dist, "configuration") and self.dist.configuration
- # ^ Some projects use numpy.distutils.misc_util.Configuration
- )
- def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool:
- if self._explicitly_specified(ignore_ext_modules):
- # For backward compatibility, just try to find modules/packages
- # when nothing is given
- return True
- log.debug(
- "No `packages` or `py_modules` configuration, performing "
- "automatic discovery."
- )
- return (
- self._analyse_explicit_layout()
- or self._analyse_src_layout()
- # flat-layout is the trickiest for discovery so it should be last
- or self._analyse_flat_layout()
- )
- def _analyse_explicit_layout(self) -> bool:
- """The user can explicitly give a package layout via ``package_dir``"""
- package_dir = self._package_dir.copy() # don't modify directly
- package_dir.pop("", None) # This falls under the "src-layout" umbrella
- root_dir = self._root_dir
- if not package_dir:
- return False
- log.debug(f"`explicit-layout` detected -- analysing {package_dir}")
- pkgs = chain_iter(
- _find_packages_within(pkg, os.path.join(root_dir, parent_dir))
- for pkg, parent_dir in package_dir.items()
- )
- self.dist.packages = list(pkgs)
- log.debug(f"discovered packages -- {self.dist.packages}")
- return True
- def _analyse_src_layout(self) -> bool:
- """Try to find all packages or modules under the ``src`` directory
- (or anything pointed by ``package_dir[""]``).
- The "src-layout" is relatively safe for automatic discovery.
- We assume that everything within is meant to be included in the
- distribution.
- If ``package_dir[""]`` is not given, but the ``src`` directory exists,
- this function will set ``package_dir[""] = "src"``.
- """
- package_dir = self._package_dir
- src_dir = os.path.join(self._root_dir, package_dir.get("", "src"))
- if not os.path.isdir(src_dir):
- return False
- log.debug(f"`src-layout` detected -- analysing {src_dir}")
- package_dir.setdefault("", os.path.basename(src_dir))
- self.dist.package_dir = package_dir # persist eventual modifications
- self.dist.packages = PEP420PackageFinder.find(src_dir)
- self.dist.py_modules = ModuleFinder.find(src_dir)
- log.debug(f"discovered packages -- {self.dist.packages}")
- log.debug(f"discovered py_modules -- {self.dist.py_modules}")
- return True
- def _analyse_flat_layout(self) -> bool:
- """Try to find all packages and modules under the project root.
- Since the ``flat-layout`` is more dangerous in terms of accidentally including
- extra files/directories, this function is more conservative and will raise an
- error if multiple packages or modules are found.
- This assumes that multi-package dists are uncommon and refuse to support that
- use case in order to be able to prevent unintended errors.
- """
- log.debug(f"`flat-layout` detected -- analysing {self._root_dir}")
- return self._analyse_flat_packages() or self._analyse_flat_modules()
- def _analyse_flat_packages(self) -> bool:
- self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir)
- top_level = remove_nested_packages(remove_stubs(self.dist.packages))
- log.debug(f"discovered packages -- {self.dist.packages}")
- self._ensure_no_accidental_inclusion(top_level, "packages")
- return bool(top_level)
- def _analyse_flat_modules(self) -> bool:
- self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir)
- log.debug(f"discovered py_modules -- {self.dist.py_modules}")
- self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules")
- return bool(self.dist.py_modules)
- def _ensure_no_accidental_inclusion(self, detected: List[str], kind: str):
- if len(detected) > 1:
- from inspect import cleandoc
- from setuptools.errors import PackageDiscoveryError
- msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}.
- To avoid accidental inclusion of unwanted files or directories,
- setuptools will not proceed with this build.
- If you are trying to create a single distribution with multiple {kind}
- on purpose, you should not rely on automatic discovery.
- Instead, consider the following options:
- 1. set up custom discovery (`find` directive with `include` or `exclude`)
- 2. use a `src-layout`
- 3. explicitly set `py_modules` or `packages` with a list of names
- To find more information, look for "package discovery" on setuptools docs.
- """
- raise PackageDiscoveryError(cleandoc(msg))
- def analyse_name(self):
- """The packages/modules are the essential contribution of the author.
- Therefore the name of the distribution can be derived from them.
- """
- if self.dist.metadata.name or self.dist.name:
- # get_name() is not reliable (can return "UNKNOWN")
- return None
- log.debug("No `name` configuration, performing automatic discovery")
- name = (
- self._find_name_single_package_or_module()
- or self._find_name_from_packages()
- )
- if name:
- self.dist.metadata.name = name
- def _find_name_single_package_or_module(self) -> Optional[str]:
- """Exactly one module or package"""
- for field in ('packages', 'py_modules'):
- items = getattr(self.dist, field, None) or []
- if items and len(items) == 1:
- log.debug(f"Single module/package detected, name: {items[0]}")
- return items[0]
- return None
- def _find_name_from_packages(self) -> Optional[str]:
- """Try to find the root package that is not a PEP 420 namespace"""
- if not self.dist.packages:
- return None
- packages = remove_stubs(sorted(self.dist.packages, key=len))
- package_dir = self.dist.package_dir or {}
- parent_pkg = find_parent_package(packages, package_dir, self._root_dir)
- if parent_pkg:
- log.debug(f"Common parent package detected, name: {parent_pkg}")
- return parent_pkg
- log.warn("No parent package detected, impossible to derive `name`")
- return None
- def remove_nested_packages(packages: List[str]) -> List[str]:
- """Remove nested packages from a list of packages.
- >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"])
- ['a']
- >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"])
- ['a', 'b', 'c.d', 'g.h']
- """
- pkgs = sorted(packages, key=len)
- top_level = pkgs[:]
- size = len(pkgs)
- for i, name in enumerate(reversed(pkgs)):
- if any(name.startswith(f"{other}.") for other in top_level):
- top_level.pop(size - i - 1)
- return top_level
- def remove_stubs(packages: List[str]) -> List[str]:
- """Remove type stubs (:pep:`561`) from a list of packages.
- >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"])
- ['a', 'a.b', 'b']
- """
- return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")]
- def find_parent_package(
- packages: List[str], package_dir: Mapping[str, str], root_dir: _Path
- ) -> Optional[str]:
- """Find the parent package that is not a namespace."""
- packages = sorted(packages, key=len)
- common_ancestors = []
- for i, name in enumerate(packages):
- if not all(n.startswith(f"{name}.") for n in packages[i+1:]):
- # Since packages are sorted by length, this condition is able
- # to find a list of all common ancestors.
- # When there is divergence (e.g. multiple root packages)
- # the list will be empty
- break
- common_ancestors.append(name)
- for name in common_ancestors:
- pkg_path = find_package_path(name, package_dir, root_dir)
- init = os.path.join(pkg_path, "__init__.py")
- if os.path.isfile(init):
- return name
- return None
- def find_package_path(
- name: str, package_dir: Mapping[str, str], root_dir: _Path
- ) -> str:
- """Given a package name, return the path where it should be found on
- disk, considering the ``package_dir`` option.
- >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".")
- >>> path.replace(os.sep, "/")
- './root/is/nested/my/pkg'
- >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".")
- >>> path.replace(os.sep, "/")
- './root/is/nested/pkg'
- >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".")
- >>> path.replace(os.sep, "/")
- './root/is/nested'
- >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".")
- >>> path.replace(os.sep, "/")
- './other/pkg'
- """
- parts = name.split(".")
- for i in range(len(parts), 0, -1):
- # Look backwards, the most specific package_dir first
- partial_name = ".".join(parts[:i])
- if partial_name in package_dir:
- parent = package_dir[partial_name]
- return os.path.join(root_dir, parent, *parts[i:])
- parent = package_dir.get("") or ""
- return os.path.join(root_dir, *parent.split("/"), *parts)
- def construct_package_dir(packages: List[str], package_path: _Path) -> Dict[str, str]:
- parent_pkgs = remove_nested_packages(packages)
- prefix = Path(package_path).parts
- return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs}
|