From 64ff61413baabb1637edd012e52aafe46ce1ec57 Mon Sep 17 00:00:00 2001 From: Richard Si Date: Sun, 20 Apr 2025 18:34:58 -0400 Subject: [PATCH] Trim distlib to remove non scripts code Distlib is our largest dependency. The vast majority of that is the Windows stub executables which we can't get rid of. However, distlib is a general purpose low-level packaging library, of which we only use the scripts module. By line count, trimming the parts of distlib that aren't used for scripts results in a 5% reduction in total (`_internal` and `_vendor`) size, which translates to ~250 KB of uncompressed and 64 KB of compressed (wheel) savings. --- pyproject.toml | 2 + src/pip/_vendor/distlib/database.py | 1329 --------------------------- src/pip/_vendor/distlib/index.py | 508 ---------- src/pip/_vendor/distlib/locators.py | 1295 -------------------------- src/pip/_vendor/distlib/manifest.py | 384 -------- src/pip/_vendor/distlib/markers.py | 162 ---- src/pip/_vendor/distlib/metadata.py | 1031 --------------------- src/pip/_vendor/distlib/version.py | 750 --------------- src/pip/_vendor/distlib/wheel.py | 1100 ---------------------- 9 files changed, 2 insertions(+), 6559 deletions(-) delete mode 100644 src/pip/_vendor/distlib/database.py delete mode 100644 src/pip/_vendor/distlib/index.py delete mode 100644 src/pip/_vendor/distlib/locators.py delete mode 100644 src/pip/_vendor/distlib/manifest.py delete mode 100644 src/pip/_vendor/distlib/markers.py delete mode 100644 src/pip/_vendor/distlib/metadata.py delete mode 100644 src/pip/_vendor/distlib/version.py delete mode 100644 src/pip/_vendor/distlib/wheel.py diff --git a/pyproject.toml b/pyproject.toml index 5d79eae2637..93493926f2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,6 +137,8 @@ drop = [ '^pygments/cmdline\.py$', # trim rich's markdown support "rich/markdown.py", + # trim parts of distlib that aren't related to scripts + '^distlib/(?!__init__|compat|resources|scripts|util).*\.py$', ] [tool.vendoring.typing-stubs] diff --git a/src/pip/_vendor/distlib/database.py b/src/pip/_vendor/distlib/database.py deleted file mode 100644 index c0f896a7d85..00000000000 --- a/src/pip/_vendor/distlib/database.py +++ /dev/null @@ -1,1329 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012-2023 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -"""PEP 376 implementation.""" - -from __future__ import unicode_literals - -import base64 -import codecs -import contextlib -import hashlib -import logging -import os -import posixpath -import sys -import zipimport - -from . import DistlibException, resources -from .compat import StringIO -from .version import get_scheme, UnsupportedVersionError -from .metadata import (Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME, LEGACY_METADATA_FILENAME) -from .util import (parse_requirement, cached_property, parse_name_and_version, read_exports, write_exports, CSVReader, - CSVWriter) - -__all__ = [ - 'Distribution', 'BaseInstalledDistribution', 'InstalledDistribution', 'EggInfoDistribution', 'DistributionPath' -] - -logger = logging.getLogger(__name__) - -EXPORTS_FILENAME = 'pydist-exports.json' -COMMANDS_FILENAME = 'pydist-commands.json' - -DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED', 'RESOURCES', EXPORTS_FILENAME, 'SHARED') - -DISTINFO_EXT = '.dist-info' - - -class _Cache(object): - """ - A simple cache mapping names and .dist-info paths to distributions - """ - - def __init__(self): - """ - Initialise an instance. There is normally one for each DistributionPath. - """ - self.name = {} - self.path = {} - self.generated = False - - def clear(self): - """ - Clear the cache, setting it to its initial state. - """ - self.name.clear() - self.path.clear() - self.generated = False - - def add(self, dist): - """ - Add a distribution to the cache. - :param dist: The distribution to add. - """ - if dist.path not in self.path: - self.path[dist.path] = dist - self.name.setdefault(dist.key, []).append(dist) - - -class DistributionPath(object): - """ - Represents a set of distributions installed on a path (typically sys.path). - """ - - def __init__(self, path=None, include_egg=False): - """ - Create an instance from a path, optionally including legacy (distutils/ - setuptools/distribute) distributions. - :param path: The path to use, as a list of directories. If not specified, - sys.path is used. - :param include_egg: If True, this instance will look for and return legacy - distributions as well as those based on PEP 376. - """ - if path is None: - path = sys.path - self.path = path - self._include_dist = True - self._include_egg = include_egg - - self._cache = _Cache() - self._cache_egg = _Cache() - self._cache_enabled = True - self._scheme = get_scheme('default') - - def _get_cache_enabled(self): - return self._cache_enabled - - def _set_cache_enabled(self, value): - self._cache_enabled = value - - cache_enabled = property(_get_cache_enabled, _set_cache_enabled) - - def clear_cache(self): - """ - Clears the internal cache. - """ - self._cache.clear() - self._cache_egg.clear() - - def _yield_distributions(self): - """ - Yield .dist-info and/or .egg(-info) distributions. - """ - # We need to check if we've seen some resources already, because on - # some Linux systems (e.g. some Debian/Ubuntu variants) there are - # symlinks which alias other files in the environment. - seen = set() - for path in self.path: - finder = resources.finder_for_path(path) - if finder is None: - continue - r = finder.find('') - if not r or not r.is_container: - continue - rset = sorted(r.resources) - for entry in rset: - r = finder.find(entry) - if not r or r.path in seen: - continue - try: - if self._include_dist and entry.endswith(DISTINFO_EXT): - possible_filenames = [METADATA_FILENAME, WHEEL_METADATA_FILENAME, LEGACY_METADATA_FILENAME] - for metadata_filename in possible_filenames: - metadata_path = posixpath.join(entry, metadata_filename) - pydist = finder.find(metadata_path) - if pydist: - break - else: - continue - - with contextlib.closing(pydist.as_stream()) as stream: - metadata = Metadata(fileobj=stream, scheme='legacy') - logger.debug('Found %s', r.path) - seen.add(r.path) - yield new_dist_class(r.path, metadata=metadata, env=self) - elif self._include_egg and entry.endswith(('.egg-info', '.egg')): - logger.debug('Found %s', r.path) - seen.add(r.path) - yield old_dist_class(r.path, self) - except Exception as e: - msg = 'Unable to read distribution at %s, perhaps due to bad metadata: %s' - logger.warning(msg, r.path, e) - import warnings - warnings.warn(msg % (r.path, e), stacklevel=2) - - def _generate_cache(self): - """ - Scan the path for distributions and populate the cache with - those that are found. - """ - gen_dist = not self._cache.generated - gen_egg = self._include_egg and not self._cache_egg.generated - if gen_dist or gen_egg: - for dist in self._yield_distributions(): - if isinstance(dist, InstalledDistribution): - self._cache.add(dist) - else: - self._cache_egg.add(dist) - - if gen_dist: - self._cache.generated = True - if gen_egg: - self._cache_egg.generated = True - - @classmethod - def distinfo_dirname(cls, name, version): - """ - The *name* and *version* parameters are converted into their - filename-escaped form, i.e. any ``'-'`` characters are replaced - with ``'_'`` other than the one in ``'dist-info'`` and the one - separating the name from the version number. - - :parameter name: is converted to a standard distribution name by replacing - any runs of non- alphanumeric characters with a single - ``'-'``. - :type name: string - :parameter version: is converted to a standard version string. Spaces - become dots, and all other non-alphanumeric characters - (except dots) become dashes, with runs of multiple - dashes condensed to a single dash. - :type version: string - :returns: directory name - :rtype: string""" - name = name.replace('-', '_') - return '-'.join([name, version]) + DISTINFO_EXT - - def get_distributions(self): - """ - Provides an iterator that looks for distributions and returns - :class:`InstalledDistribution` or - :class:`EggInfoDistribution` instances for each one of them. - - :rtype: iterator of :class:`InstalledDistribution` and - :class:`EggInfoDistribution` instances - """ - if not self._cache_enabled: - for dist in self._yield_distributions(): - yield dist - else: - self._generate_cache() - - for dist in self._cache.path.values(): - yield dist - - if self._include_egg: - for dist in self._cache_egg.path.values(): - yield dist - - def get_distribution(self, name): - """ - Looks for a named distribution on the path. - - This function only returns the first result found, as no more than one - value is expected. If nothing is found, ``None`` is returned. - - :rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution` - or ``None`` - """ - result = None - name = name.lower() - if not self._cache_enabled: - for dist in self._yield_distributions(): - if dist.key == name: - result = dist - break - else: - self._generate_cache() - - if name in self._cache.name: - result = self._cache.name[name][0] - elif self._include_egg and name in self._cache_egg.name: - result = self._cache_egg.name[name][0] - return result - - def provides_distribution(self, name, version=None): - """ - Iterates over all distributions to find which distributions provide *name*. - If a *version* is provided, it will be used to filter the results. - - This function only returns the first result found, since no more than - one values are expected. If the directory is not found, returns ``None``. - - :parameter version: a version specifier that indicates the version - required, conforming to the format in ``PEP-345`` - - :type name: string - :type version: string - """ - matcher = None - if version is not None: - try: - matcher = self._scheme.matcher('%s (%s)' % (name, version)) - except ValueError: - raise DistlibException('invalid name or version: %r, %r' % (name, version)) - - for dist in self.get_distributions(): - # We hit a problem on Travis where enum34 was installed and doesn't - # have a provides attribute ... - if not hasattr(dist, 'provides'): - logger.debug('No "provides": %s', dist) - else: - provided = dist.provides - - for p in provided: - p_name, p_ver = parse_name_and_version(p) - if matcher is None: - if p_name == name: - yield dist - break - else: - if p_name == name and matcher.match(p_ver): - yield dist - break - - def get_file_path(self, name, relative_path): - """ - Return the path to a resource file. - """ - dist = self.get_distribution(name) - if dist is None: - raise LookupError('no distribution named %r found' % name) - return dist.get_resource_path(relative_path) - - def get_exported_entries(self, category, name=None): - """ - Return all of the exported entries in a particular category. - - :param category: The category to search for entries. - :param name: If specified, only entries with that name are returned. - """ - for dist in self.get_distributions(): - r = dist.exports - if category in r: - d = r[category] - if name is not None: - if name in d: - yield d[name] - else: - for v in d.values(): - yield v - - -class Distribution(object): - """ - A base class for distributions, whether installed or from indexes. - Either way, it must have some metadata, so that's all that's needed - for construction. - """ - - build_time_dependency = False - """ - Set to True if it's known to be only a build-time dependency (i.e. - not needed after installation). - """ - - requested = False - """A boolean that indicates whether the ``REQUESTED`` metadata file is - present (in other words, whether the package was installed by user - request or it was installed as a dependency).""" - - def __init__(self, metadata): - """ - Initialise an instance. - :param metadata: The instance of :class:`Metadata` describing this - distribution. - """ - self.metadata = metadata - self.name = metadata.name - self.key = self.name.lower() # for case-insensitive comparisons - self.version = metadata.version - self.locator = None - self.digest = None - self.extras = None # additional features requested - self.context = None # environment marker overrides - self.download_urls = set() - self.digests = {} - - @property - def source_url(self): - """ - The source archive download URL for this distribution. - """ - return self.metadata.source_url - - download_url = source_url # Backward compatibility - - @property - def name_and_version(self): - """ - A utility property which displays the name and version in parentheses. - """ - return '%s (%s)' % (self.name, self.version) - - @property - def provides(self): - """ - A set of distribution names and versions provided by this distribution. - :return: A set of "name (version)" strings. - """ - plist = self.metadata.provides - s = '%s (%s)' % (self.name, self.version) - if s not in plist: - plist.append(s) - return plist - - def _get_requirements(self, req_attr): - md = self.metadata - reqts = getattr(md, req_attr) - logger.debug('%s: got requirements %r from metadata: %r', self.name, req_attr, reqts) - return set(md.get_requirements(reqts, extras=self.extras, env=self.context)) - - @property - def run_requires(self): - return self._get_requirements('run_requires') - - @property - def meta_requires(self): - return self._get_requirements('meta_requires') - - @property - def build_requires(self): - return self._get_requirements('build_requires') - - @property - def test_requires(self): - return self._get_requirements('test_requires') - - @property - def dev_requires(self): - return self._get_requirements('dev_requires') - - def matches_requirement(self, req): - """ - Say if this instance matches (fulfills) a requirement. - :param req: The requirement to match. - :rtype req: str - :return: True if it matches, else False. - """ - # Requirement may contain extras - parse to lose those - # from what's passed to the matcher - r = parse_requirement(req) - scheme = get_scheme(self.metadata.scheme) - try: - matcher = scheme.matcher(r.requirement) - except UnsupportedVersionError: - # XXX compat-mode if cannot read the version - logger.warning('could not read version %r - using name only', req) - name = req.split()[0] - matcher = scheme.matcher(name) - - name = matcher.key # case-insensitive - - result = False - for p in self.provides: - p_name, p_ver = parse_name_and_version(p) - if p_name != name: - continue - try: - result = matcher.match(p_ver) - break - except UnsupportedVersionError: - pass - return result - - def __repr__(self): - """ - Return a textual representation of this instance, - """ - if self.source_url: - suffix = ' [%s]' % self.source_url - else: - suffix = '' - return '' % (self.name, self.version, suffix) - - def __eq__(self, other): - """ - See if this distribution is the same as another. - :param other: The distribution to compare with. To be equal to one - another. distributions must have the same type, name, - version and source_url. - :return: True if it is the same, else False. - """ - if type(other) is not type(self): - result = False - else: - result = (self.name == other.name and self.version == other.version and self.source_url == other.source_url) - return result - - def __hash__(self): - """ - Compute hash in a way which matches the equality test. - """ - return hash(self.name) + hash(self.version) + hash(self.source_url) - - -class BaseInstalledDistribution(Distribution): - """ - This is the base class for installed distributions (whether PEP 376 or - legacy). - """ - - hasher = None - - def __init__(self, metadata, path, env=None): - """ - Initialise an instance. - :param metadata: An instance of :class:`Metadata` which describes the - distribution. This will normally have been initialised - from a metadata file in the ``path``. - :param path: The path of the ``.dist-info`` or ``.egg-info`` - directory for the distribution. - :param env: This is normally the :class:`DistributionPath` - instance where this distribution was found. - """ - super(BaseInstalledDistribution, self).__init__(metadata) - self.path = path - self.dist_path = env - - def get_hash(self, data, hasher=None): - """ - Get the hash of some data, using a particular hash algorithm, if - specified. - - :param data: The data to be hashed. - :type data: bytes - :param hasher: The name of a hash implementation, supported by hashlib, - or ``None``. Examples of valid values are ``'sha1'``, - ``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and - ``'sha512'``. If no hasher is specified, the ``hasher`` - attribute of the :class:`InstalledDistribution` instance - is used. If the hasher is determined to be ``None``, MD5 - is used as the hashing algorithm. - :returns: The hash of the data. If a hasher was explicitly specified, - the returned hash will be prefixed with the specified hasher - followed by '='. - :rtype: str - """ - if hasher is None: - hasher = self.hasher - if hasher is None: - hasher = hashlib.md5 - prefix = '' - else: - hasher = getattr(hashlib, hasher) - prefix = '%s=' % self.hasher - digest = hasher(data).digest() - digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii') - return '%s%s' % (prefix, digest) - - -class InstalledDistribution(BaseInstalledDistribution): - """ - Created with the *path* of the ``.dist-info`` directory provided to the - constructor. It reads the metadata contained in ``pydist.json`` when it is - instantiated., or uses a passed in Metadata instance (useful for when - dry-run mode is being used). - """ - - hasher = 'sha256' - - def __init__(self, path, metadata=None, env=None): - self.modules = [] - self.finder = finder = resources.finder_for_path(path) - if finder is None: - raise ValueError('finder unavailable for %s' % path) - if env and env._cache_enabled and path in env._cache.path: - metadata = env._cache.path[path].metadata - elif metadata is None: - r = finder.find(METADATA_FILENAME) - # Temporary - for Wheel 0.23 support - if r is None: - r = finder.find(WHEEL_METADATA_FILENAME) - # Temporary - for legacy support - if r is None: - r = finder.find(LEGACY_METADATA_FILENAME) - if r is None: - raise ValueError('no %s found in %s' % (METADATA_FILENAME, path)) - with contextlib.closing(r.as_stream()) as stream: - metadata = Metadata(fileobj=stream, scheme='legacy') - - super(InstalledDistribution, self).__init__(metadata, path, env) - - if env and env._cache_enabled: - env._cache.add(self) - - r = finder.find('REQUESTED') - self.requested = r is not None - p = os.path.join(path, 'top_level.txt') - if os.path.exists(p): - with open(p, 'rb') as f: - data = f.read().decode('utf-8') - self.modules = data.splitlines() - - def __repr__(self): - return '' % (self.name, self.version, self.path) - - def __str__(self): - return "%s %s" % (self.name, self.version) - - def _get_records(self): - """ - Get the list of installed files for the distribution - :return: A list of tuples of path, hash and size. Note that hash and - size might be ``None`` for some entries. The path is exactly - as stored in the file (which is as in PEP 376). - """ - results = [] - r = self.get_distinfo_resource('RECORD') - with contextlib.closing(r.as_stream()) as stream: - with CSVReader(stream=stream) as record_reader: - # Base location is parent dir of .dist-info dir - # base_location = os.path.dirname(self.path) - # base_location = os.path.abspath(base_location) - for row in record_reader: - missing = [None for i in range(len(row), 3)] - path, checksum, size = row + missing - # if not os.path.isabs(path): - # path = path.replace('/', os.sep) - # path = os.path.join(base_location, path) - results.append((path, checksum, size)) - return results - - @cached_property - def exports(self): - """ - Return the information exported by this distribution. - :return: A dictionary of exports, mapping an export category to a dict - of :class:`ExportEntry` instances describing the individual - export entries, and keyed by name. - """ - result = {} - r = self.get_distinfo_resource(EXPORTS_FILENAME) - if r: - result = self.read_exports() - return result - - def read_exports(self): - """ - Read exports data from a file in .ini format. - - :return: A dictionary of exports, mapping an export category to a list - of :class:`ExportEntry` instances describing the individual - export entries. - """ - result = {} - r = self.get_distinfo_resource(EXPORTS_FILENAME) - if r: - with contextlib.closing(r.as_stream()) as stream: - result = read_exports(stream) - return result - - def write_exports(self, exports): - """ - Write a dictionary of exports to a file in .ini format. - :param exports: A dictionary of exports, mapping an export category to - a list of :class:`ExportEntry` instances describing the - individual export entries. - """ - rf = self.get_distinfo_file(EXPORTS_FILENAME) - with open(rf, 'w') as f: - write_exports(exports, f) - - def get_resource_path(self, relative_path): - """ - NOTE: This API may change in the future. - - Return the absolute path to a resource file with the given relative - path. - - :param relative_path: The path, relative to .dist-info, of the resource - of interest. - :return: The absolute path where the resource is to be found. - """ - r = self.get_distinfo_resource('RESOURCES') - with contextlib.closing(r.as_stream()) as stream: - with CSVReader(stream=stream) as resources_reader: - for relative, destination in resources_reader: - if relative == relative_path: - return destination - raise KeyError('no resource file with relative path %r ' - 'is installed' % relative_path) - - def list_installed_files(self): - """ - Iterates over the ``RECORD`` entries and returns a tuple - ``(path, hash, size)`` for each line. - - :returns: iterator of (path, hash, size) - """ - for result in self._get_records(): - yield result - - def write_installed_files(self, paths, prefix, dry_run=False): - """ - Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any - existing ``RECORD`` file is silently overwritten. - - prefix is used to determine when to write absolute paths. - """ - prefix = os.path.join(prefix, '') - base = os.path.dirname(self.path) - base_under_prefix = base.startswith(prefix) - base = os.path.join(base, '') - record_path = self.get_distinfo_file('RECORD') - logger.info('creating %s', record_path) - if dry_run: - return None - with CSVWriter(record_path) as writer: - for path in paths: - if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')): - # do not put size and hash, as in PEP-376 - hash_value = size = '' - else: - size = '%d' % os.path.getsize(path) - with open(path, 'rb') as fp: - hash_value = self.get_hash(fp.read()) - if path.startswith(base) or (base_under_prefix and path.startswith(prefix)): - path = os.path.relpath(path, base) - writer.writerow((path, hash_value, size)) - - # add the RECORD file itself - if record_path.startswith(base): - record_path = os.path.relpath(record_path, base) - writer.writerow((record_path, '', '')) - return record_path - - def check_installed_files(self): - """ - Checks that the hashes and sizes of the files in ``RECORD`` are - matched by the files themselves. Returns a (possibly empty) list of - mismatches. Each entry in the mismatch list will be a tuple consisting - of the path, 'exists', 'size' or 'hash' according to what didn't match - (existence is checked first, then size, then hash), the expected - value and the actual value. - """ - mismatches = [] - base = os.path.dirname(self.path) - record_path = self.get_distinfo_file('RECORD') - for path, hash_value, size in self.list_installed_files(): - if not os.path.isabs(path): - path = os.path.join(base, path) - if path == record_path: - continue - if not os.path.exists(path): - mismatches.append((path, 'exists', True, False)) - elif os.path.isfile(path): - actual_size = str(os.path.getsize(path)) - if size and actual_size != size: - mismatches.append((path, 'size', size, actual_size)) - elif hash_value: - if '=' in hash_value: - hasher = hash_value.split('=', 1)[0] - else: - hasher = None - - with open(path, 'rb') as f: - actual_hash = self.get_hash(f.read(), hasher) - if actual_hash != hash_value: - mismatches.append((path, 'hash', hash_value, actual_hash)) - return mismatches - - @cached_property - def shared_locations(self): - """ - A dictionary of shared locations whose keys are in the set 'prefix', - 'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'. - The corresponding value is the absolute path of that category for - this distribution, and takes into account any paths selected by the - user at installation time (e.g. via command-line arguments). In the - case of the 'namespace' key, this would be a list of absolute paths - for the roots of namespace packages in this distribution. - - The first time this property is accessed, the relevant information is - read from the SHARED file in the .dist-info directory. - """ - result = {} - shared_path = os.path.join(self.path, 'SHARED') - if os.path.isfile(shared_path): - with codecs.open(shared_path, 'r', encoding='utf-8') as f: - lines = f.read().splitlines() - for line in lines: - key, value = line.split('=', 1) - if key == 'namespace': - result.setdefault(key, []).append(value) - else: - result[key] = value - return result - - def write_shared_locations(self, paths, dry_run=False): - """ - Write shared location information to the SHARED file in .dist-info. - :param paths: A dictionary as described in the documentation for - :meth:`shared_locations`. - :param dry_run: If True, the action is logged but no file is actually - written. - :return: The path of the file written to. - """ - shared_path = os.path.join(self.path, 'SHARED') - logger.info('creating %s', shared_path) - if dry_run: - return None - lines = [] - for key in ('prefix', 'lib', 'headers', 'scripts', 'data'): - path = paths[key] - if os.path.isdir(paths[key]): - lines.append('%s=%s' % (key, path)) - for ns in paths.get('namespace', ()): - lines.append('namespace=%s' % ns) - - with codecs.open(shared_path, 'w', encoding='utf-8') as f: - f.write('\n'.join(lines)) - return shared_path - - def get_distinfo_resource(self, path): - if path not in DIST_FILES: - raise DistlibException('invalid path for a dist-info file: ' - '%r at %r' % (path, self.path)) - finder = resources.finder_for_path(self.path) - if finder is None: - raise DistlibException('Unable to get a finder for %s' % self.path) - return finder.find(path) - - def get_distinfo_file(self, path): - """ - Returns a path located under the ``.dist-info`` directory. Returns a - string representing the path. - - :parameter path: a ``'/'``-separated path relative to the - ``.dist-info`` directory or an absolute path; - If *path* is an absolute path and doesn't start - with the ``.dist-info`` directory path, - a :class:`DistlibException` is raised - :type path: str - :rtype: str - """ - # Check if it is an absolute path # XXX use relpath, add tests - if path.find(os.sep) >= 0: - # it's an absolute path? - distinfo_dirname, path = path.split(os.sep)[-2:] - if distinfo_dirname != self.path.split(os.sep)[-1]: - raise DistlibException('dist-info file %r does not belong to the %r %s ' - 'distribution' % (path, self.name, self.version)) - - # The file must be relative - if path not in DIST_FILES: - raise DistlibException('invalid path for a dist-info file: ' - '%r at %r' % (path, self.path)) - - return os.path.join(self.path, path) - - def list_distinfo_files(self): - """ - Iterates over the ``RECORD`` entries and returns paths for each line if - the path is pointing to a file located in the ``.dist-info`` directory - or one of its subdirectories. - - :returns: iterator of paths - """ - base = os.path.dirname(self.path) - for path, checksum, size in self._get_records(): - # XXX add separator or use real relpath algo - if not os.path.isabs(path): - path = os.path.join(base, path) - if path.startswith(self.path): - yield path - - def __eq__(self, other): - return (isinstance(other, InstalledDistribution) and self.path == other.path) - - # See http://docs.python.org/reference/datamodel#object.__hash__ - __hash__ = object.__hash__ - - -class EggInfoDistribution(BaseInstalledDistribution): - """Created with the *path* of the ``.egg-info`` directory or file provided - to the constructor. It reads the metadata contained in the file itself, or - if the given path happens to be a directory, the metadata is read from the - file ``PKG-INFO`` under that directory.""" - - requested = True # as we have no way of knowing, assume it was - shared_locations = {} - - def __init__(self, path, env=None): - - def set_name_and_version(s, n, v): - s.name = n - s.key = n.lower() # for case-insensitive comparisons - s.version = v - - self.path = path - self.dist_path = env - if env and env._cache_enabled and path in env._cache_egg.path: - metadata = env._cache_egg.path[path].metadata - set_name_and_version(self, metadata.name, metadata.version) - else: - metadata = self._get_metadata(path) - - # Need to be set before caching - set_name_and_version(self, metadata.name, metadata.version) - - if env and env._cache_enabled: - env._cache_egg.add(self) - super(EggInfoDistribution, self).__init__(metadata, path, env) - - def _get_metadata(self, path): - requires = None - - def parse_requires_data(data): - """Create a list of dependencies from a requires.txt file. - - *data*: the contents of a setuptools-produced requires.txt file. - """ - reqs = [] - lines = data.splitlines() - for line in lines: - line = line.strip() - # sectioned files have bare newlines (separating sections) - if not line: # pragma: no cover - continue - if line.startswith('['): # pragma: no cover - logger.warning('Unexpected line: quitting requirement scan: %r', line) - break - r = parse_requirement(line) - if not r: # pragma: no cover - logger.warning('Not recognised as a requirement: %r', line) - continue - if r.extras: # pragma: no cover - logger.warning('extra requirements in requires.txt are ' - 'not supported') - if not r.constraints: - reqs.append(r.name) - else: - cons = ', '.join('%s%s' % c for c in r.constraints) - reqs.append('%s (%s)' % (r.name, cons)) - return reqs - - def parse_requires_path(req_path): - """Create a list of dependencies from a requires.txt file. - - *req_path*: the path to a setuptools-produced requires.txt file. - """ - - reqs = [] - try: - with codecs.open(req_path, 'r', 'utf-8') as fp: - reqs = parse_requires_data(fp.read()) - except IOError: - pass - return reqs - - tl_path = tl_data = None - if path.endswith('.egg'): - if os.path.isdir(path): - p = os.path.join(path, 'EGG-INFO') - meta_path = os.path.join(p, 'PKG-INFO') - metadata = Metadata(path=meta_path, scheme='legacy') - req_path = os.path.join(p, 'requires.txt') - tl_path = os.path.join(p, 'top_level.txt') - requires = parse_requires_path(req_path) - else: - # FIXME handle the case where zipfile is not available - zipf = zipimport.zipimporter(path) - fileobj = StringIO(zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8')) - metadata = Metadata(fileobj=fileobj, scheme='legacy') - try: - data = zipf.get_data('EGG-INFO/requires.txt') - tl_data = zipf.get_data('EGG-INFO/top_level.txt').decode('utf-8') - requires = parse_requires_data(data.decode('utf-8')) - except IOError: - requires = None - elif path.endswith('.egg-info'): - if os.path.isdir(path): - req_path = os.path.join(path, 'requires.txt') - requires = parse_requires_path(req_path) - path = os.path.join(path, 'PKG-INFO') - tl_path = os.path.join(path, 'top_level.txt') - metadata = Metadata(path=path, scheme='legacy') - else: - raise DistlibException('path must end with .egg-info or .egg, ' - 'got %r' % path) - - if requires: - metadata.add_requirements(requires) - # look for top-level modules in top_level.txt, if present - if tl_data is None: - if tl_path is not None and os.path.exists(tl_path): - with open(tl_path, 'rb') as f: - tl_data = f.read().decode('utf-8') - if not tl_data: - tl_data = [] - else: - tl_data = tl_data.splitlines() - self.modules = tl_data - return metadata - - def __repr__(self): - return '' % (self.name, self.version, self.path) - - def __str__(self): - return "%s %s" % (self.name, self.version) - - def check_installed_files(self): - """ - Checks that the hashes and sizes of the files in ``RECORD`` are - matched by the files themselves. Returns a (possibly empty) list of - mismatches. Each entry in the mismatch list will be a tuple consisting - of the path, 'exists', 'size' or 'hash' according to what didn't match - (existence is checked first, then size, then hash), the expected - value and the actual value. - """ - mismatches = [] - record_path = os.path.join(self.path, 'installed-files.txt') - if os.path.exists(record_path): - for path, _, _ in self.list_installed_files(): - if path == record_path: - continue - if not os.path.exists(path): - mismatches.append((path, 'exists', True, False)) - return mismatches - - def list_installed_files(self): - """ - Iterates over the ``installed-files.txt`` entries and returns a tuple - ``(path, hash, size)`` for each line. - - :returns: a list of (path, hash, size) - """ - - def _md5(path): - f = open(path, 'rb') - try: - content = f.read() - finally: - f.close() - return hashlib.md5(content).hexdigest() - - def _size(path): - return os.stat(path).st_size - - record_path = os.path.join(self.path, 'installed-files.txt') - result = [] - if os.path.exists(record_path): - with codecs.open(record_path, 'r', encoding='utf-8') as f: - for line in f: - line = line.strip() - p = os.path.normpath(os.path.join(self.path, line)) - # "./" is present as a marker between installed files - # and installation metadata files - if not os.path.exists(p): - logger.warning('Non-existent file: %s', p) - if p.endswith(('.pyc', '.pyo')): - continue - # otherwise fall through and fail - if not os.path.isdir(p): - result.append((p, _md5(p), _size(p))) - result.append((record_path, None, None)) - return result - - def list_distinfo_files(self, absolute=False): - """ - Iterates over the ``installed-files.txt`` entries and returns paths for - each line if the path is pointing to a file located in the - ``.egg-info`` directory or one of its subdirectories. - - :parameter absolute: If *absolute* is ``True``, each returned path is - transformed into a local absolute path. Otherwise the - raw value from ``installed-files.txt`` is returned. - :type absolute: boolean - :returns: iterator of paths - """ - record_path = os.path.join(self.path, 'installed-files.txt') - if os.path.exists(record_path): - skip = True - with codecs.open(record_path, 'r', encoding='utf-8') as f: - for line in f: - line = line.strip() - if line == './': - skip = False - continue - if not skip: - p = os.path.normpath(os.path.join(self.path, line)) - if p.startswith(self.path): - if absolute: - yield p - else: - yield line - - def __eq__(self, other): - return (isinstance(other, EggInfoDistribution) and self.path == other.path) - - # See http://docs.python.org/reference/datamodel#object.__hash__ - __hash__ = object.__hash__ - - -new_dist_class = InstalledDistribution -old_dist_class = EggInfoDistribution - - -class DependencyGraph(object): - """ - Represents a dependency graph between distributions. - - The dependency relationships are stored in an ``adjacency_list`` that maps - distributions to a list of ``(other, label)`` tuples where ``other`` - is a distribution and the edge is labeled with ``label`` (i.e. the version - specifier, if such was provided). Also, for more efficient traversal, for - every distribution ``x``, a list of predecessors is kept in - ``reverse_list[x]``. An edge from distribution ``a`` to - distribution ``b`` means that ``a`` depends on ``b``. If any missing - dependencies are found, they are stored in ``missing``, which is a - dictionary that maps distributions to a list of requirements that were not - provided by any other distributions. - """ - - def __init__(self): - self.adjacency_list = {} - self.reverse_list = {} - self.missing = {} - - def add_distribution(self, distribution): - """Add the *distribution* to the graph. - - :type distribution: :class:`distutils2.database.InstalledDistribution` - or :class:`distutils2.database.EggInfoDistribution` - """ - self.adjacency_list[distribution] = [] - self.reverse_list[distribution] = [] - # self.missing[distribution] = [] - - def add_edge(self, x, y, label=None): - """Add an edge from distribution *x* to distribution *y* with the given - *label*. - - :type x: :class:`distutils2.database.InstalledDistribution` or - :class:`distutils2.database.EggInfoDistribution` - :type y: :class:`distutils2.database.InstalledDistribution` or - :class:`distutils2.database.EggInfoDistribution` - :type label: ``str`` or ``None`` - """ - self.adjacency_list[x].append((y, label)) - # multiple edges are allowed, so be careful - if x not in self.reverse_list[y]: - self.reverse_list[y].append(x) - - def add_missing(self, distribution, requirement): - """ - Add a missing *requirement* for the given *distribution*. - - :type distribution: :class:`distutils2.database.InstalledDistribution` - or :class:`distutils2.database.EggInfoDistribution` - :type requirement: ``str`` - """ - logger.debug('%s missing %r', distribution, requirement) - self.missing.setdefault(distribution, []).append(requirement) - - def _repr_dist(self, dist): - return '%s %s' % (dist.name, dist.version) - - def repr_node(self, dist, level=1): - """Prints only a subgraph""" - output = [self._repr_dist(dist)] - for other, label in self.adjacency_list[dist]: - dist = self._repr_dist(other) - if label is not None: - dist = '%s [%s]' % (dist, label) - output.append(' ' * level + str(dist)) - suboutput = self.repr_node(other, level + 1) - subs = suboutput.split('\n') - output.extend(subs[1:]) - return '\n'.join(output) - - def to_dot(self, f, skip_disconnected=True): - """Writes a DOT output for the graph to the provided file *f*. - - If *skip_disconnected* is set to ``True``, then all distributions - that are not dependent on any other distribution are skipped. - - :type f: has to support ``file``-like operations - :type skip_disconnected: ``bool`` - """ - disconnected = [] - - f.write("digraph dependencies {\n") - for dist, adjs in self.adjacency_list.items(): - if len(adjs) == 0 and not skip_disconnected: - disconnected.append(dist) - for other, label in adjs: - if label is not None: - f.write('"%s" -> "%s" [label="%s"]\n' % (dist.name, other.name, label)) - else: - f.write('"%s" -> "%s"\n' % (dist.name, other.name)) - if not skip_disconnected and len(disconnected) > 0: - f.write('subgraph disconnected {\n') - f.write('label = "Disconnected"\n') - f.write('bgcolor = red\n') - - for dist in disconnected: - f.write('"%s"' % dist.name) - f.write('\n') - f.write('}\n') - f.write('}\n') - - def topological_sort(self): - """ - Perform a topological sort of the graph. - :return: A tuple, the first element of which is a topologically sorted - list of distributions, and the second element of which is a - list of distributions that cannot be sorted because they have - circular dependencies and so form a cycle. - """ - result = [] - # Make a shallow copy of the adjacency list - alist = {} - for k, v in self.adjacency_list.items(): - alist[k] = v[:] - while True: - # See what we can remove in this run - to_remove = [] - for k, v in list(alist.items())[:]: - if not v: - to_remove.append(k) - del alist[k] - if not to_remove: - # What's left in alist (if anything) is a cycle. - break - # Remove from the adjacency list of others - for k, v in alist.items(): - alist[k] = [(d, r) for d, r in v if d not in to_remove] - logger.debug('Moving to result: %s', ['%s (%s)' % (d.name, d.version) for d in to_remove]) - result.extend(to_remove) - return result, list(alist.keys()) - - def __repr__(self): - """Representation of the graph""" - output = [] - for dist, adjs in self.adjacency_list.items(): - output.append(self.repr_node(dist)) - return '\n'.join(output) - - -def make_graph(dists, scheme='default'): - """Makes a dependency graph from the given distributions. - - :parameter dists: a list of distributions - :type dists: list of :class:`distutils2.database.InstalledDistribution` and - :class:`distutils2.database.EggInfoDistribution` instances - :rtype: a :class:`DependencyGraph` instance - """ - scheme = get_scheme(scheme) - graph = DependencyGraph() - provided = {} # maps names to lists of (version, dist) tuples - - # first, build the graph and find out what's provided - for dist in dists: - graph.add_distribution(dist) - - for p in dist.provides: - name, version = parse_name_and_version(p) - logger.debug('Add to provided: %s, %s, %s', name, version, dist) - provided.setdefault(name, []).append((version, dist)) - - # now make the edges - for dist in dists: - requires = (dist.run_requires | dist.meta_requires | dist.build_requires | dist.dev_requires) - for req in requires: - try: - matcher = scheme.matcher(req) - except UnsupportedVersionError: - # XXX compat-mode if cannot read the version - logger.warning('could not read version %r - using name only', req) - name = req.split()[0] - matcher = scheme.matcher(name) - - name = matcher.key # case-insensitive - - matched = False - if name in provided: - for version, provider in provided[name]: - try: - match = matcher.match(version) - except UnsupportedVersionError: - match = False - - if match: - graph.add_edge(dist, provider, req) - matched = True - break - if not matched: - graph.add_missing(dist, req) - return graph - - -def get_dependent_dists(dists, dist): - """Recursively generate a list of distributions from *dists* that are - dependent on *dist*. - - :param dists: a list of distributions - :param dist: a distribution, member of *dists* for which we are interested - """ - if dist not in dists: - raise DistlibException('given distribution %r is not a member ' - 'of the list' % dist.name) - graph = make_graph(dists) - - dep = [dist] # dependent distributions - todo = graph.reverse_list[dist] # list of nodes we should inspect - - while todo: - d = todo.pop() - dep.append(d) - for succ in graph.reverse_list[d]: - if succ not in dep: - todo.append(succ) - - dep.pop(0) # remove dist from dep, was there to prevent infinite loops - return dep - - -def get_required_dists(dists, dist): - """Recursively generate a list of distributions from *dists* that are - required by *dist*. - - :param dists: a list of distributions - :param dist: a distribution, member of *dists* for which we are interested - in finding the dependencies. - """ - if dist not in dists: - raise DistlibException('given distribution %r is not a member ' - 'of the list' % dist.name) - graph = make_graph(dists) - - req = set() # required distributions - todo = graph.adjacency_list[dist] # list of nodes we should inspect - seen = set(t[0] for t in todo) # already added to todo - - while todo: - d = todo.pop()[0] - req.add(d) - pred_list = graph.adjacency_list[d] - for pred in pred_list: - d = pred[0] - if d not in req and d not in seen: - seen.add(d) - todo.append(pred) - return req - - -def make_dist(name, version, **kwargs): - """ - A convenience method for making a dist given just a name and version. - """ - summary = kwargs.pop('summary', 'Placeholder for summary') - md = Metadata(**kwargs) - md.name = name - md.version = version - md.summary = summary or 'Placeholder for summary' - return Distribution(md) diff --git a/src/pip/_vendor/distlib/index.py b/src/pip/_vendor/distlib/index.py deleted file mode 100644 index 56cd2867145..00000000000 --- a/src/pip/_vendor/distlib/index.py +++ /dev/null @@ -1,508 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2013-2023 Vinay Sajip. -# Licensed to the Python Software Foundation under a contributor agreement. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -import hashlib -import logging -import os -import shutil -import subprocess -import tempfile -try: - from threading import Thread -except ImportError: # pragma: no cover - from dummy_threading import Thread - -from . import DistlibException -from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr, - urlparse, build_opener, string_types) -from .util import zip_dir, ServerProxy - -logger = logging.getLogger(__name__) - -DEFAULT_INDEX = 'https://pypi.org/pypi' -DEFAULT_REALM = 'pypi' - - -class PackageIndex(object): - """ - This class represents a package index compatible with PyPI, the Python - Package Index. - """ - - boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$' - - def __init__(self, url=None): - """ - Initialise an instance. - - :param url: The URL of the index. If not specified, the URL for PyPI is - used. - """ - self.url = url or DEFAULT_INDEX - self.read_configuration() - scheme, netloc, path, params, query, frag = urlparse(self.url) - if params or query or frag or scheme not in ('http', 'https'): - raise DistlibException('invalid repository: %s' % self.url) - self.password_handler = None - self.ssl_verifier = None - self.gpg = None - self.gpg_home = None - with open(os.devnull, 'w') as sink: - # Use gpg by default rather than gpg2, as gpg2 insists on - # prompting for passwords - for s in ('gpg', 'gpg2'): - try: - rc = subprocess.check_call([s, '--version'], stdout=sink, - stderr=sink) - if rc == 0: - self.gpg = s - break - except OSError: - pass - - def _get_pypirc_command(self): - """ - Get the distutils command for interacting with PyPI configurations. - :return: the command. - """ - from .util import _get_pypirc_command as cmd - return cmd() - - def read_configuration(self): - """ - Read the PyPI access configuration as supported by distutils. This populates - ``username``, ``password``, ``realm`` and ``url`` attributes from the - configuration. - """ - from .util import _load_pypirc - cfg = _load_pypirc(self) - self.username = cfg.get('username') - self.password = cfg.get('password') - self.realm = cfg.get('realm', 'pypi') - self.url = cfg.get('repository', self.url) - - def save_configuration(self): - """ - Save the PyPI access configuration. You must have set ``username`` and - ``password`` attributes before calling this method. - """ - self.check_credentials() - from .util import _store_pypirc - _store_pypirc(self) - - def check_credentials(self): - """ - Check that ``username`` and ``password`` have been set, and raise an - exception if not. - """ - if self.username is None or self.password is None: - raise DistlibException('username and password must be set') - pm = HTTPPasswordMgr() - _, netloc, _, _, _, _ = urlparse(self.url) - pm.add_password(self.realm, netloc, self.username, self.password) - self.password_handler = HTTPBasicAuthHandler(pm) - - def register(self, metadata): # pragma: no cover - """ - Register a distribution on PyPI, using the provided metadata. - - :param metadata: A :class:`Metadata` instance defining at least a name - and version number for the distribution to be - registered. - :return: The HTTP response received from PyPI upon submission of the - request. - """ - self.check_credentials() - metadata.validate() - d = metadata.todict() - d[':action'] = 'verify' - request = self.encode_request(d.items(), []) - self.send_request(request) - d[':action'] = 'submit' - request = self.encode_request(d.items(), []) - return self.send_request(request) - - def _reader(self, name, stream, outbuf): - """ - Thread runner for reading lines of from a subprocess into a buffer. - - :param name: The logical name of the stream (used for logging only). - :param stream: The stream to read from. This will typically a pipe - connected to the output stream of a subprocess. - :param outbuf: The list to append the read lines to. - """ - while True: - s = stream.readline() - if not s: - break - s = s.decode('utf-8').rstrip() - outbuf.append(s) - logger.debug('%s: %s' % (name, s)) - stream.close() - - def get_sign_command(self, filename, signer, sign_password, keystore=None): # pragma: no cover - """ - Return a suitable command for signing a file. - - :param filename: The pathname to the file to be signed. - :param signer: The identifier of the signer of the file. - :param sign_password: The passphrase for the signer's - private key used for signing. - :param keystore: The path to a directory which contains the keys - used in verification. If not specified, the - instance's ``gpg_home`` attribute is used instead. - :return: The signing command as a list suitable to be - passed to :class:`subprocess.Popen`. - """ - cmd = [self.gpg, '--status-fd', '2', '--no-tty'] - if keystore is None: - keystore = self.gpg_home - if keystore: - cmd.extend(['--homedir', keystore]) - if sign_password is not None: - cmd.extend(['--batch', '--passphrase-fd', '0']) - td = tempfile.mkdtemp() - sf = os.path.join(td, os.path.basename(filename) + '.asc') - cmd.extend(['--detach-sign', '--armor', '--local-user', - signer, '--output', sf, filename]) - logger.debug('invoking: %s', ' '.join(cmd)) - return cmd, sf - - def run_command(self, cmd, input_data=None): - """ - Run a command in a child process , passing it any input data specified. - - :param cmd: The command to run. - :param input_data: If specified, this must be a byte string containing - data to be sent to the child process. - :return: A tuple consisting of the subprocess' exit code, a list of - lines read from the subprocess' ``stdout``, and a list of - lines read from the subprocess' ``stderr``. - """ - kwargs = { - 'stdout': subprocess.PIPE, - 'stderr': subprocess.PIPE, - } - if input_data is not None: - kwargs['stdin'] = subprocess.PIPE - stdout = [] - stderr = [] - p = subprocess.Popen(cmd, **kwargs) - # We don't use communicate() here because we may need to - # get clever with interacting with the command - t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout)) - t1.start() - t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr)) - t2.start() - if input_data is not None: - p.stdin.write(input_data) - p.stdin.close() - - p.wait() - t1.join() - t2.join() - return p.returncode, stdout, stderr - - def sign_file(self, filename, signer, sign_password, keystore=None): # pragma: no cover - """ - Sign a file. - - :param filename: The pathname to the file to be signed. - :param signer: The identifier of the signer of the file. - :param sign_password: The passphrase for the signer's - private key used for signing. - :param keystore: The path to a directory which contains the keys - used in signing. If not specified, the instance's - ``gpg_home`` attribute is used instead. - :return: The absolute pathname of the file where the signature is - stored. - """ - cmd, sig_file = self.get_sign_command(filename, signer, sign_password, - keystore) - rc, stdout, stderr = self.run_command(cmd, - sign_password.encode('utf-8')) - if rc != 0: - raise DistlibException('sign command failed with error ' - 'code %s' % rc) - return sig_file - - def upload_file(self, metadata, filename, signer=None, sign_password=None, - filetype='sdist', pyversion='source', keystore=None): - """ - Upload a release file to the index. - - :param metadata: A :class:`Metadata` instance defining at least a name - and version number for the file to be uploaded. - :param filename: The pathname of the file to be uploaded. - :param signer: The identifier of the signer of the file. - :param sign_password: The passphrase for the signer's - private key used for signing. - :param filetype: The type of the file being uploaded. This is the - distutils command which produced that file, e.g. - ``sdist`` or ``bdist_wheel``. - :param pyversion: The version of Python which the release relates - to. For code compatible with any Python, this would - be ``source``, otherwise it would be e.g. ``3.2``. - :param keystore: The path to a directory which contains the keys - used in signing. If not specified, the instance's - ``gpg_home`` attribute is used instead. - :return: The HTTP response received from PyPI upon submission of the - request. - """ - self.check_credentials() - if not os.path.exists(filename): - raise DistlibException('not found: %s' % filename) - metadata.validate() - d = metadata.todict() - sig_file = None - if signer: - if not self.gpg: - logger.warning('no signing program available - not signed') - else: - sig_file = self.sign_file(filename, signer, sign_password, - keystore) - with open(filename, 'rb') as f: - file_data = f.read() - md5_digest = hashlib.md5(file_data).hexdigest() - sha256_digest = hashlib.sha256(file_data).hexdigest() - d.update({ - ':action': 'file_upload', - 'protocol_version': '1', - 'filetype': filetype, - 'pyversion': pyversion, - 'md5_digest': md5_digest, - 'sha256_digest': sha256_digest, - }) - files = [('content', os.path.basename(filename), file_data)] - if sig_file: - with open(sig_file, 'rb') as f: - sig_data = f.read() - files.append(('gpg_signature', os.path.basename(sig_file), - sig_data)) - shutil.rmtree(os.path.dirname(sig_file)) - request = self.encode_request(d.items(), files) - return self.send_request(request) - - def upload_documentation(self, metadata, doc_dir): # pragma: no cover - """ - Upload documentation to the index. - - :param metadata: A :class:`Metadata` instance defining at least a name - and version number for the documentation to be - uploaded. - :param doc_dir: The pathname of the directory which contains the - documentation. This should be the directory that - contains the ``index.html`` for the documentation. - :return: The HTTP response received from PyPI upon submission of the - request. - """ - self.check_credentials() - if not os.path.isdir(doc_dir): - raise DistlibException('not a directory: %r' % doc_dir) - fn = os.path.join(doc_dir, 'index.html') - if not os.path.exists(fn): - raise DistlibException('not found: %r' % fn) - metadata.validate() - name, version = metadata.name, metadata.version - zip_data = zip_dir(doc_dir).getvalue() - fields = [(':action', 'doc_upload'), - ('name', name), ('version', version)] - files = [('content', name, zip_data)] - request = self.encode_request(fields, files) - return self.send_request(request) - - def get_verify_command(self, signature_filename, data_filename, - keystore=None): - """ - Return a suitable command for verifying a file. - - :param signature_filename: The pathname to the file containing the - signature. - :param data_filename: The pathname to the file containing the - signed data. - :param keystore: The path to a directory which contains the keys - used in verification. If not specified, the - instance's ``gpg_home`` attribute is used instead. - :return: The verifying command as a list suitable to be - passed to :class:`subprocess.Popen`. - """ - cmd = [self.gpg, '--status-fd', '2', '--no-tty'] - if keystore is None: - keystore = self.gpg_home - if keystore: - cmd.extend(['--homedir', keystore]) - cmd.extend(['--verify', signature_filename, data_filename]) - logger.debug('invoking: %s', ' '.join(cmd)) - return cmd - - def verify_signature(self, signature_filename, data_filename, - keystore=None): - """ - Verify a signature for a file. - - :param signature_filename: The pathname to the file containing the - signature. - :param data_filename: The pathname to the file containing the - signed data. - :param keystore: The path to a directory which contains the keys - used in verification. If not specified, the - instance's ``gpg_home`` attribute is used instead. - :return: True if the signature was verified, else False. - """ - if not self.gpg: - raise DistlibException('verification unavailable because gpg ' - 'unavailable') - cmd = self.get_verify_command(signature_filename, data_filename, - keystore) - rc, stdout, stderr = self.run_command(cmd) - if rc not in (0, 1): - raise DistlibException('verify command failed with error code %s' % rc) - return rc == 0 - - def download_file(self, url, destfile, digest=None, reporthook=None): - """ - This is a convenience method for downloading a file from an URL. - Normally, this will be a file from the index, though currently - no check is made for this (i.e. a file can be downloaded from - anywhere). - - The method is just like the :func:`urlretrieve` function in the - standard library, except that it allows digest computation to be - done during download and checking that the downloaded data - matched any expected value. - - :param url: The URL of the file to be downloaded (assumed to be - available via an HTTP GET request). - :param destfile: The pathname where the downloaded file is to be - saved. - :param digest: If specified, this must be a (hasher, value) - tuple, where hasher is the algorithm used (e.g. - ``'md5'``) and ``value`` is the expected value. - :param reporthook: The same as for :func:`urlretrieve` in the - standard library. - """ - if digest is None: - digester = None - logger.debug('No digest specified') - else: - if isinstance(digest, (list, tuple)): - hasher, digest = digest - else: - hasher = 'md5' - digester = getattr(hashlib, hasher)() - logger.debug('Digest specified: %s' % digest) - # The following code is equivalent to urlretrieve. - # We need to do it this way so that we can compute the - # digest of the file as we go. - with open(destfile, 'wb') as dfp: - # addinfourl is not a context manager on 2.x - # so we have to use try/finally - sfp = self.send_request(Request(url)) - try: - headers = sfp.info() - blocksize = 8192 - size = -1 - read = 0 - blocknum = 0 - if "content-length" in headers: - size = int(headers["Content-Length"]) - if reporthook: - reporthook(blocknum, blocksize, size) - while True: - block = sfp.read(blocksize) - if not block: - break - read += len(block) - dfp.write(block) - if digester: - digester.update(block) - blocknum += 1 - if reporthook: - reporthook(blocknum, blocksize, size) - finally: - sfp.close() - - # check that we got the whole file, if we can - if size >= 0 and read < size: - raise DistlibException( - 'retrieval incomplete: got only %d out of %d bytes' - % (read, size)) - # if we have a digest, it must match. - if digester: - actual = digester.hexdigest() - if digest != actual: - raise DistlibException('%s digest mismatch for %s: expected ' - '%s, got %s' % (hasher, destfile, - digest, actual)) - logger.debug('Digest verified: %s', digest) - - def send_request(self, req): - """ - Send a standard library :class:`Request` to PyPI and return its - response. - - :param req: The request to send. - :return: The HTTP response from PyPI (a standard library HTTPResponse). - """ - handlers = [] - if self.password_handler: - handlers.append(self.password_handler) - if self.ssl_verifier: - handlers.append(self.ssl_verifier) - opener = build_opener(*handlers) - return opener.open(req) - - def encode_request(self, fields, files): - """ - Encode fields and files for posting to an HTTP server. - - :param fields: The fields to send as a list of (fieldname, value) - tuples. - :param files: The files to send as a list of (fieldname, filename, - file_bytes) tuple. - """ - # Adapted from packaging, which in turn was adapted from - # http://code.activestate.com/recipes/146306 - - parts = [] - boundary = self.boundary - for k, values in fields: - if not isinstance(values, (list, tuple)): - values = [values] - - for v in values: - parts.extend(( - b'--' + boundary, - ('Content-Disposition: form-data; name="%s"' % - k).encode('utf-8'), - b'', - v.encode('utf-8'))) - for key, filename, value in files: - parts.extend(( - b'--' + boundary, - ('Content-Disposition: form-data; name="%s"; filename="%s"' % - (key, filename)).encode('utf-8'), - b'', - value)) - - parts.extend((b'--' + boundary + b'--', b'')) - - body = b'\r\n'.join(parts) - ct = b'multipart/form-data; boundary=' + boundary - headers = { - 'Content-type': ct, - 'Content-length': str(len(body)) - } - return Request(self.url, body, headers) - - def search(self, terms, operator=None): # pragma: no cover - if isinstance(terms, string_types): - terms = {'name': terms} - rpc_proxy = ServerProxy(self.url, timeout=3.0) - try: - return rpc_proxy.search(terms, operator or 'and') - finally: - rpc_proxy('close')() diff --git a/src/pip/_vendor/distlib/locators.py b/src/pip/_vendor/distlib/locators.py deleted file mode 100644 index 222c1bf3e90..00000000000 --- a/src/pip/_vendor/distlib/locators.py +++ /dev/null @@ -1,1295 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012-2023 Vinay Sajip. -# Licensed to the Python Software Foundation under a contributor agreement. -# See LICENSE.txt and CONTRIBUTORS.txt. -# - -import gzip -from io import BytesIO -import json -import logging -import os -import posixpath -import re -try: - import threading -except ImportError: # pragma: no cover - import dummy_threading as threading -import zlib - -from . import DistlibException -from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url, queue, quote, unescape, build_opener, - HTTPRedirectHandler as BaseRedirectHandler, text_type, Request, HTTPError, URLError) -from .database import Distribution, DistributionPath, make_dist -from .metadata import Metadata, MetadataInvalidError -from .util import (cached_property, ensure_slash, split_filename, get_project_data, parse_requirement, - parse_name_and_version, ServerProxy, normalize_name) -from .version import get_scheme, UnsupportedVersionError -from .wheel import Wheel, is_compatible - -logger = logging.getLogger(__name__) - -HASHER_HASH = re.compile(r'^(\w+)=([a-f0-9]+)') -CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I) -HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml') -DEFAULT_INDEX = 'https://pypi.org/pypi' - - -def get_all_distribution_names(url=None): - """ - Return all distribution names known by an index. - :param url: The URL of the index. - :return: A list of all known distribution names. - """ - if url is None: - url = DEFAULT_INDEX - client = ServerProxy(url, timeout=3.0) - try: - return client.list_packages() - finally: - client('close')() - - -class RedirectHandler(BaseRedirectHandler): - """ - A class to work around a bug in some Python 3.2.x releases. - """ - - # There's a bug in the base version for some 3.2.x - # (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header - # returns e.g. /abc, it bails because it says the scheme '' - # is bogus, when actually it should use the request's - # URL for the scheme. See Python issue #13696. - def http_error_302(self, req, fp, code, msg, headers): - # Some servers (incorrectly) return multiple Location headers - # (so probably same goes for URI). Use first header. - newurl = None - for key in ('location', 'uri'): - if key in headers: - newurl = headers[key] - break - if newurl is None: # pragma: no cover - return - urlparts = urlparse(newurl) - if urlparts.scheme == '': - newurl = urljoin(req.get_full_url(), newurl) - if hasattr(headers, 'replace_header'): - headers.replace_header(key, newurl) - else: - headers[key] = newurl - return BaseRedirectHandler.http_error_302(self, req, fp, code, msg, headers) - - http_error_301 = http_error_303 = http_error_307 = http_error_302 - - -class Locator(object): - """ - A base class for locators - things that locate distributions. - """ - source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz') - binary_extensions = ('.egg', '.exe', '.whl') - excluded_extensions = ('.pdf', ) - - # A list of tags indicating which wheels you want to match. The default - # value of None matches against the tags compatible with the running - # Python. If you want to match other values, set wheel_tags on a locator - # instance to a list of tuples (pyver, abi, arch) which you want to match. - wheel_tags = None - - downloadable_extensions = source_extensions + ('.whl', ) - - def __init__(self, scheme='default'): - """ - Initialise an instance. - :param scheme: Because locators look for most recent versions, they - need to know the version scheme to use. This specifies - the current PEP-recommended scheme - use ``'legacy'`` - if you need to support existing distributions on PyPI. - """ - self._cache = {} - self.scheme = scheme - # Because of bugs in some of the handlers on some of the platforms, - # we use our own opener rather than just using urlopen. - self.opener = build_opener(RedirectHandler()) - # If get_project() is called from locate(), the matcher instance - # is set from the requirement passed to locate(). See issue #18 for - # why this can be useful to know. - self.matcher = None - self.errors = queue.Queue() - - def get_errors(self): - """ - Return any errors which have occurred. - """ - result = [] - while not self.errors.empty(): # pragma: no cover - try: - e = self.errors.get(False) - result.append(e) - except self.errors.Empty: - continue - self.errors.task_done() - return result - - def clear_errors(self): - """ - Clear any errors which may have been logged. - """ - # Just get the errors and throw them away - self.get_errors() - - def clear_cache(self): - self._cache.clear() - - def _get_scheme(self): - return self._scheme - - def _set_scheme(self, value): - self._scheme = value - - scheme = property(_get_scheme, _set_scheme) - - def _get_project(self, name): - """ - For a given project, get a dictionary mapping available versions to Distribution - instances. - - This should be implemented in subclasses. - - If called from a locate() request, self.matcher will be set to a - matcher for the requirement to satisfy, otherwise it will be None. - """ - raise NotImplementedError('Please implement in the subclass') - - def get_distribution_names(self): - """ - Return all the distribution names known to this locator. - """ - raise NotImplementedError('Please implement in the subclass') - - def get_project(self, name): - """ - For a given project, get a dictionary mapping available versions to Distribution - instances. - - This calls _get_project to do all the work, and just implements a caching layer on top. - """ - if self._cache is None: # pragma: no cover - result = self._get_project(name) - elif name in self._cache: - result = self._cache[name] - else: - self.clear_errors() - result = self._get_project(name) - self._cache[name] = result - return result - - def score_url(self, url): - """ - Give an url a score which can be used to choose preferred URLs - for a given project release. - """ - t = urlparse(url) - basename = posixpath.basename(t.path) - compatible = True - is_wheel = basename.endswith('.whl') - is_downloadable = basename.endswith(self.downloadable_extensions) - if is_wheel: - compatible = is_compatible(Wheel(basename), self.wheel_tags) - return (t.scheme == 'https', 'pypi.org' in t.netloc, is_downloadable, is_wheel, compatible, basename) - - def prefer_url(self, url1, url2): - """ - Choose one of two URLs where both are candidates for distribution - archives for the same version of a distribution (for example, - .tar.gz vs. zip). - - The current implementation favours https:// URLs over http://, archives - from PyPI over those from other locations, wheel compatibility (if a - wheel) and then the archive name. - """ - result = url2 - if url1: - s1 = self.score_url(url1) - s2 = self.score_url(url2) - if s1 > s2: - result = url1 - if result != url2: - logger.debug('Not replacing %r with %r', url1, url2) - else: - logger.debug('Replacing %r with %r', url1, url2) - return result - - def split_filename(self, filename, project_name): - """ - Attempt to split a filename in project name, version and Python version. - """ - return split_filename(filename, project_name) - - def convert_url_to_download_info(self, url, project_name): - """ - See if a URL is a candidate for a download URL for a project (the URL - has typically been scraped from an HTML page). - - If it is, a dictionary is returned with keys "name", "version", - "filename" and "url"; otherwise, None is returned. - """ - - def same_project(name1, name2): - return normalize_name(name1) == normalize_name(name2) - - result = None - scheme, netloc, path, params, query, frag = urlparse(url) - if frag.lower().startswith('egg='): # pragma: no cover - logger.debug('%s: version hint in fragment: %r', project_name, frag) - m = HASHER_HASH.match(frag) - if m: - algo, digest = m.groups() - else: - algo, digest = None, None - origpath = path - if path and path[-1] == '/': # pragma: no cover - path = path[:-1] - if path.endswith('.whl'): - try: - wheel = Wheel(path) - if not is_compatible(wheel, self.wheel_tags): - logger.debug('Wheel not compatible: %s', path) - else: - if project_name is None: - include = True - else: - include = same_project(wheel.name, project_name) - if include: - result = { - 'name': wheel.name, - 'version': wheel.version, - 'filename': wheel.filename, - 'url': urlunparse((scheme, netloc, origpath, params, query, '')), - 'python-version': ', '.join(['.'.join(list(v[2:])) for v in wheel.pyver]), - } - except Exception: # pragma: no cover - logger.warning('invalid path for wheel: %s', path) - elif not path.endswith(self.downloadable_extensions): # pragma: no cover - logger.debug('Not downloadable: %s', path) - else: # downloadable extension - path = filename = posixpath.basename(path) - for ext in self.downloadable_extensions: - if path.endswith(ext): - path = path[:-len(ext)] - t = self.split_filename(path, project_name) - if not t: # pragma: no cover - logger.debug('No match for project/version: %s', path) - else: - name, version, pyver = t - if not project_name or same_project(project_name, name): - result = { - 'name': name, - 'version': version, - 'filename': filename, - 'url': urlunparse((scheme, netloc, origpath, params, query, '')), - } - if pyver: # pragma: no cover - result['python-version'] = pyver - break - if result and algo: - result['%s_digest' % algo] = digest - return result - - def _get_digest(self, info): - """ - Get a digest from a dictionary by looking at a "digests" dictionary - or keys of the form 'algo_digest'. - - Returns a 2-tuple (algo, digest) if found, else None. Currently - looks only for SHA256, then MD5. - """ - result = None - if 'digests' in info: - digests = info['digests'] - for algo in ('sha256', 'md5'): - if algo in digests: - result = (algo, digests[algo]) - break - if not result: - for algo in ('sha256', 'md5'): - key = '%s_digest' % algo - if key in info: - result = (algo, info[key]) - break - return result - - def _update_version_data(self, result, info): - """ - Update a result dictionary (the final result from _get_project) with a - dictionary for a specific version, which typically holds information - gleaned from a filename or URL for an archive for the distribution. - """ - name = info.pop('name') - version = info.pop('version') - if version in result: - dist = result[version] - md = dist.metadata - else: - dist = make_dist(name, version, scheme=self.scheme) - md = dist.metadata - dist.digest = digest = self._get_digest(info) - url = info['url'] - result['digests'][url] = digest - if md.source_url != info['url']: - md.source_url = self.prefer_url(md.source_url, url) - result['urls'].setdefault(version, set()).add(url) - dist.locator = self - result[version] = dist - - def locate(self, requirement, prereleases=False): - """ - Find the most recent distribution which matches the given - requirement. - - :param requirement: A requirement of the form 'foo (1.0)' or perhaps - 'foo (>= 1.0, < 2.0, != 1.3)' - :param prereleases: If ``True``, allow pre-release versions - to be located. Otherwise, pre-release versions - are not returned. - :return: A :class:`Distribution` instance, or ``None`` if no such - distribution could be located. - """ - result = None - r = parse_requirement(requirement) - if r is None: # pragma: no cover - raise DistlibException('Not a valid requirement: %r' % requirement) - scheme = get_scheme(self.scheme) - self.matcher = matcher = scheme.matcher(r.requirement) - logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__) - versions = self.get_project(r.name) - if len(versions) > 2: # urls and digests keys are present - # sometimes, versions are invalid - slist = [] - vcls = matcher.version_class - for k in versions: - if k in ('urls', 'digests'): - continue - try: - if not matcher.match(k): - pass # logger.debug('%s did not match %r', matcher, k) - else: - if prereleases or not vcls(k).is_prerelease: - slist.append(k) - except Exception: # pragma: no cover - logger.warning('error matching %s with %r', matcher, k) - pass # slist.append(k) - if len(slist) > 1: - slist = sorted(slist, key=scheme.key) - if slist: - logger.debug('sorted list: %s', slist) - version = slist[-1] - result = versions[version] - if result: - if r.extras: - result.extras = r.extras - result.download_urls = versions.get('urls', {}).get(version, set()) - d = {} - sd = versions.get('digests', {}) - for url in result.download_urls: - if url in sd: # pragma: no cover - d[url] = sd[url] - result.digests = d - self.matcher = None - return result - - -class PyPIRPCLocator(Locator): - """ - This locator uses XML-RPC to locate distributions. It therefore - cannot be used with simple mirrors (that only mirror file content). - """ - - def __init__(self, url, **kwargs): - """ - Initialise an instance. - - :param url: The URL to use for XML-RPC. - :param kwargs: Passed to the superclass constructor. - """ - super(PyPIRPCLocator, self).__init__(**kwargs) - self.base_url = url - self.client = ServerProxy(url, timeout=3.0) - - def get_distribution_names(self): - """ - Return all the distribution names known to this locator. - """ - return set(self.client.list_packages()) - - def _get_project(self, name): - result = {'urls': {}, 'digests': {}} - versions = self.client.package_releases(name, True) - for v in versions: - urls = self.client.release_urls(name, v) - data = self.client.release_data(name, v) - metadata = Metadata(scheme=self.scheme) - metadata.name = data['name'] - metadata.version = data['version'] - metadata.license = data.get('license') - metadata.keywords = data.get('keywords', []) - metadata.summary = data.get('summary') - dist = Distribution(metadata) - if urls: - info = urls[0] - metadata.source_url = info['url'] - dist.digest = self._get_digest(info) - dist.locator = self - result[v] = dist - for info in urls: - url = info['url'] - digest = self._get_digest(info) - result['urls'].setdefault(v, set()).add(url) - result['digests'][url] = digest - return result - - -class PyPIJSONLocator(Locator): - """ - This locator uses PyPI's JSON interface. It's very limited in functionality - and probably not worth using. - """ - - def __init__(self, url, **kwargs): - super(PyPIJSONLocator, self).__init__(**kwargs) - self.base_url = ensure_slash(url) - - def get_distribution_names(self): - """ - Return all the distribution names known to this locator. - """ - raise NotImplementedError('Not available from this locator') - - def _get_project(self, name): - result = {'urls': {}, 'digests': {}} - url = urljoin(self.base_url, '%s/json' % quote(name)) - try: - resp = self.opener.open(url) - data = resp.read().decode() # for now - d = json.loads(data) - md = Metadata(scheme=self.scheme) - data = d['info'] - md.name = data['name'] - md.version = data['version'] - md.license = data.get('license') - md.keywords = data.get('keywords', []) - md.summary = data.get('summary') - dist = Distribution(md) - dist.locator = self - # urls = d['urls'] - result[md.version] = dist - for info in d['urls']: - url = info['url'] - dist.download_urls.add(url) - dist.digests[url] = self._get_digest(info) - result['urls'].setdefault(md.version, set()).add(url) - result['digests'][url] = self._get_digest(info) - # Now get other releases - for version, infos in d['releases'].items(): - if version == md.version: - continue # already done - omd = Metadata(scheme=self.scheme) - omd.name = md.name - omd.version = version - odist = Distribution(omd) - odist.locator = self - result[version] = odist - for info in infos: - url = info['url'] - odist.download_urls.add(url) - odist.digests[url] = self._get_digest(info) - result['urls'].setdefault(version, set()).add(url) - result['digests'][url] = self._get_digest(info) - - -# for info in urls: -# md.source_url = info['url'] -# dist.digest = self._get_digest(info) -# dist.locator = self -# for info in urls: -# url = info['url'] -# result['urls'].setdefault(md.version, set()).add(url) -# result['digests'][url] = self._get_digest(info) - except Exception as e: - self.errors.put(text_type(e)) - logger.exception('JSON fetch failed: %s', e) - return result - - -class Page(object): - """ - This class represents a scraped HTML page. - """ - # The following slightly hairy-looking regex just looks for the contents of - # an anchor link, which has an attribute "href" either immediately preceded - # or immediately followed by a "rel" attribute. The attribute values can be - # declared with double quotes, single quotes or no quotes - which leads to - # the length of the expression. - _href = re.compile( - """ -(rel\\s*=\\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\\s\n]*))\\s+)? -href\\s*=\\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\\s\n]*)) -(\\s+rel\\s*=\\s*(?:"(?P[^"]*)"|'(?P[^']*)'|(?P[^>\\s\n]*)))? -""", re.I | re.S | re.X) - _base = re.compile(r"""]+)""", re.I | re.S) - - def __init__(self, data, url): - """ - Initialise an instance with the Unicode page contents and the URL they - came from. - """ - self.data = data - self.base_url = self.url = url - m = self._base.search(self.data) - if m: - self.base_url = m.group(1) - - _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I) - - @cached_property - def links(self): - """ - Return the URLs of all the links on a page together with information - about their "rel" attribute, for determining which ones to treat as - downloads and which ones to queue for further scraping. - """ - - def clean(url): - "Tidy up an URL." - scheme, netloc, path, params, query, frag = urlparse(url) - return urlunparse((scheme, netloc, quote(path), params, query, frag)) - - result = set() - for match in self._href.finditer(self.data): - d = match.groupdict('') - rel = (d['rel1'] or d['rel2'] or d['rel3'] or d['rel4'] or d['rel5'] or d['rel6']) - url = d['url1'] or d['url2'] or d['url3'] - url = urljoin(self.base_url, url) - url = unescape(url) - url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url) - result.add((url, rel)) - # We sort the result, hoping to bring the most recent versions - # to the front - result = sorted(result, key=lambda t: t[0], reverse=True) - return result - - -class SimpleScrapingLocator(Locator): - """ - A locator which scrapes HTML pages to locate downloads for a distribution. - This runs multiple threads to do the I/O; performance is at least as good - as pip's PackageFinder, which works in an analogous fashion. - """ - - # These are used to deal with various Content-Encoding schemes. - decoders = { - 'deflate': zlib.decompress, - 'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(b)).read(), - 'none': lambda b: b, - } - - def __init__(self, url, timeout=None, num_workers=10, **kwargs): - """ - Initialise an instance. - :param url: The root URL to use for scraping. - :param timeout: The timeout, in seconds, to be applied to requests. - This defaults to ``None`` (no timeout specified). - :param num_workers: The number of worker threads you want to do I/O, - This defaults to 10. - :param kwargs: Passed to the superclass. - """ - super(SimpleScrapingLocator, self).__init__(**kwargs) - self.base_url = ensure_slash(url) - self.timeout = timeout - self._page_cache = {} - self._seen = set() - self._to_fetch = queue.Queue() - self._bad_hosts = set() - self.skip_externals = False - self.num_workers = num_workers - self._lock = threading.RLock() - # See issue #45: we need to be resilient when the locator is used - # in a thread, e.g. with concurrent.futures. We can't use self._lock - # as it is for coordinating our internal threads - the ones created - # in _prepare_threads. - self._gplock = threading.RLock() - self.platform_check = False # See issue #112 - - def _prepare_threads(self): - """ - Threads are created only when get_project is called, and terminate - before it returns. They are there primarily to parallelise I/O (i.e. - fetching web pages). - """ - self._threads = [] - for i in range(self.num_workers): - t = threading.Thread(target=self._fetch) - t.daemon = True - t.start() - self._threads.append(t) - - def _wait_threads(self): - """ - Tell all the threads to terminate (by sending a sentinel value) and - wait for them to do so. - """ - # Note that you need two loops, since you can't say which - # thread will get each sentinel - for t in self._threads: - self._to_fetch.put(None) # sentinel - for t in self._threads: - t.join() - self._threads = [] - - def _get_project(self, name): - result = {'urls': {}, 'digests': {}} - with self._gplock: - self.result = result - self.project_name = name - url = urljoin(self.base_url, '%s/' % quote(name)) - self._seen.clear() - self._page_cache.clear() - self._prepare_threads() - try: - logger.debug('Queueing %s', url) - self._to_fetch.put(url) - self._to_fetch.join() - finally: - self._wait_threads() - del self.result - return result - - platform_dependent = re.compile(r'\b(linux_(i\d86|x86_64|arm\w+)|' - r'win(32|_amd64)|macosx_?\d+)\b', re.I) - - def _is_platform_dependent(self, url): - """ - Does an URL refer to a platform-specific download? - """ - return self.platform_dependent.search(url) - - def _process_download(self, url): - """ - See if an URL is a suitable download for a project. - - If it is, register information in the result dictionary (for - _get_project) about the specific version it's for. - - Note that the return value isn't actually used other than as a boolean - value. - """ - if self.platform_check and self._is_platform_dependent(url): - info = None - else: - info = self.convert_url_to_download_info(url, self.project_name) - logger.debug('process_download: %s -> %s', url, info) - if info: - with self._lock: # needed because self.result is shared - self._update_version_data(self.result, info) - return info - - def _should_queue(self, link, referrer, rel): - """ - Determine whether a link URL from a referring page and with a - particular "rel" attribute should be queued for scraping. - """ - scheme, netloc, path, _, _, _ = urlparse(link) - if path.endswith(self.source_extensions + self.binary_extensions + self.excluded_extensions): - result = False - elif self.skip_externals and not link.startswith(self.base_url): - result = False - elif not referrer.startswith(self.base_url): - result = False - elif rel not in ('homepage', 'download'): - result = False - elif scheme not in ('http', 'https', 'ftp'): - result = False - elif self._is_platform_dependent(link): - result = False - else: - host = netloc.split(':', 1)[0] - if host.lower() == 'localhost': - result = False - else: - result = True - logger.debug('should_queue: %s (%s) from %s -> %s', link, rel, referrer, result) - return result - - def _fetch(self): - """ - Get a URL to fetch from the work queue, get the HTML page, examine its - links for download candidates and candidates for further scraping. - - This is a handy method to run in a thread. - """ - while True: - url = self._to_fetch.get() - try: - if url: - page = self.get_page(url) - if page is None: # e.g. after an error - continue - for link, rel in page.links: - if link not in self._seen: - try: - self._seen.add(link) - if (not self._process_download(link) and self._should_queue(link, url, rel)): - logger.debug('Queueing %s from %s', link, url) - self._to_fetch.put(link) - except MetadataInvalidError: # e.g. invalid versions - pass - except Exception as e: # pragma: no cover - self.errors.put(text_type(e)) - finally: - # always do this, to avoid hangs :-) - self._to_fetch.task_done() - if not url: - # logger.debug('Sentinel seen, quitting.') - break - - def get_page(self, url): - """ - Get the HTML for an URL, possibly from an in-memory cache. - - XXX TODO Note: this cache is never actually cleared. It's assumed that - the data won't get stale over the lifetime of a locator instance (not - necessarily true for the default_locator). - """ - # http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api - scheme, netloc, path, _, _, _ = urlparse(url) - if scheme == 'file' and os.path.isdir(url2pathname(path)): - url = urljoin(ensure_slash(url), 'index.html') - - if url in self._page_cache: - result = self._page_cache[url] - logger.debug('Returning %s from cache: %s', url, result) - else: - host = netloc.split(':', 1)[0] - result = None - if host in self._bad_hosts: - logger.debug('Skipping %s due to bad host %s', url, host) - else: - req = Request(url, headers={'Accept-encoding': 'identity'}) - try: - logger.debug('Fetching %s', url) - resp = self.opener.open(req, timeout=self.timeout) - logger.debug('Fetched %s', url) - headers = resp.info() - content_type = headers.get('Content-Type', '') - if HTML_CONTENT_TYPE.match(content_type): - final_url = resp.geturl() - data = resp.read() - encoding = headers.get('Content-Encoding') - if encoding: - decoder = self.decoders[encoding] # fail if not found - data = decoder(data) - encoding = 'utf-8' - m = CHARSET.search(content_type) - if m: - encoding = m.group(1) - try: - data = data.decode(encoding) - except UnicodeError: # pragma: no cover - data = data.decode('latin-1') # fallback - result = Page(data, final_url) - self._page_cache[final_url] = result - except HTTPError as e: - if e.code != 404: - logger.exception('Fetch failed: %s: %s', url, e) - except URLError as e: # pragma: no cover - logger.exception('Fetch failed: %s: %s', url, e) - with self._lock: - self._bad_hosts.add(host) - except Exception as e: # pragma: no cover - logger.exception('Fetch failed: %s: %s', url, e) - finally: - self._page_cache[url] = result # even if None (failure) - return result - - _distname_re = re.compile(']*>([^<]+)<') - - def get_distribution_names(self): - """ - Return all the distribution names known to this locator. - """ - result = set() - page = self.get_page(self.base_url) - if not page: - raise DistlibException('Unable to get %s' % self.base_url) - for match in self._distname_re.finditer(page.data): - result.add(match.group(1)) - return result - - -class DirectoryLocator(Locator): - """ - This class locates distributions in a directory tree. - """ - - def __init__(self, path, **kwargs): - """ - Initialise an instance. - :param path: The root of the directory tree to search. - :param kwargs: Passed to the superclass constructor, - except for: - * recursive - if True (the default), subdirectories are - recursed into. If False, only the top-level directory - is searched, - """ - self.recursive = kwargs.pop('recursive', True) - super(DirectoryLocator, self).__init__(**kwargs) - path = os.path.abspath(path) - if not os.path.isdir(path): # pragma: no cover - raise DistlibException('Not a directory: %r' % path) - self.base_dir = path - - def should_include(self, filename, parent): - """ - Should a filename be considered as a candidate for a distribution - archive? As well as the filename, the directory which contains it - is provided, though not used by the current implementation. - """ - return filename.endswith(self.downloadable_extensions) - - def _get_project(self, name): - result = {'urls': {}, 'digests': {}} - for root, dirs, files in os.walk(self.base_dir): - for fn in files: - if self.should_include(fn, root): - fn = os.path.join(root, fn) - url = urlunparse(('file', '', pathname2url(os.path.abspath(fn)), '', '', '')) - info = self.convert_url_to_download_info(url, name) - if info: - self._update_version_data(result, info) - if not self.recursive: - break - return result - - def get_distribution_names(self): - """ - Return all the distribution names known to this locator. - """ - result = set() - for root, dirs, files in os.walk(self.base_dir): - for fn in files: - if self.should_include(fn, root): - fn = os.path.join(root, fn) - url = urlunparse(('file', '', pathname2url(os.path.abspath(fn)), '', '', '')) - info = self.convert_url_to_download_info(url, None) - if info: - result.add(info['name']) - if not self.recursive: - break - return result - - -class JSONLocator(Locator): - """ - This locator uses special extended metadata (not available on PyPI) and is - the basis of performant dependency resolution in distlib. Other locators - require archive downloads before dependencies can be determined! As you - might imagine, that can be slow. - """ - - def get_distribution_names(self): - """ - Return all the distribution names known to this locator. - """ - raise NotImplementedError('Not available from this locator') - - def _get_project(self, name): - result = {'urls': {}, 'digests': {}} - data = get_project_data(name) - if data: - for info in data.get('files', []): - if info['ptype'] != 'sdist' or info['pyversion'] != 'source': - continue - # We don't store summary in project metadata as it makes - # the data bigger for no benefit during dependency - # resolution - dist = make_dist(data['name'], - info['version'], - summary=data.get('summary', 'Placeholder for summary'), - scheme=self.scheme) - md = dist.metadata - md.source_url = info['url'] - # TODO SHA256 digest - if 'digest' in info and info['digest']: - dist.digest = ('md5', info['digest']) - md.dependencies = info.get('requirements', {}) - dist.exports = info.get('exports', {}) - result[dist.version] = dist - result['urls'].setdefault(dist.version, set()).add(info['url']) - return result - - -class DistPathLocator(Locator): - """ - This locator finds installed distributions in a path. It can be useful for - adding to an :class:`AggregatingLocator`. - """ - - def __init__(self, distpath, **kwargs): - """ - Initialise an instance. - - :param distpath: A :class:`DistributionPath` instance to search. - """ - super(DistPathLocator, self).__init__(**kwargs) - assert isinstance(distpath, DistributionPath) - self.distpath = distpath - - def _get_project(self, name): - dist = self.distpath.get_distribution(name) - if dist is None: - result = {'urls': {}, 'digests': {}} - else: - result = { - dist.version: dist, - 'urls': { - dist.version: set([dist.source_url]) - }, - 'digests': { - dist.version: set([None]) - } - } - return result - - -class AggregatingLocator(Locator): - """ - This class allows you to chain and/or merge a list of locators. - """ - - def __init__(self, *locators, **kwargs): - """ - Initialise an instance. - - :param locators: The list of locators to search. - :param kwargs: Passed to the superclass constructor, - except for: - * merge - if False (the default), the first successful - search from any of the locators is returned. If True, - the results from all locators are merged (this can be - slow). - """ - self.merge = kwargs.pop('merge', False) - self.locators = locators - super(AggregatingLocator, self).__init__(**kwargs) - - def clear_cache(self): - super(AggregatingLocator, self).clear_cache() - for locator in self.locators: - locator.clear_cache() - - def _set_scheme(self, value): - self._scheme = value - for locator in self.locators: - locator.scheme = value - - scheme = property(Locator.scheme.fget, _set_scheme) - - def _get_project(self, name): - result = {} - for locator in self.locators: - d = locator.get_project(name) - if d: - if self.merge: - files = result.get('urls', {}) - digests = result.get('digests', {}) - # next line could overwrite result['urls'], result['digests'] - result.update(d) - df = result.get('urls') - if files and df: - for k, v in files.items(): - if k in df: - df[k] |= v - else: - df[k] = v - dd = result.get('digests') - if digests and dd: - dd.update(digests) - else: - # See issue #18. If any dists are found and we're looking - # for specific constraints, we only return something if - # a match is found. For example, if a DirectoryLocator - # returns just foo (1.0) while we're looking for - # foo (>= 2.0), we'll pretend there was nothing there so - # that subsequent locators can be queried. Otherwise we - # would just return foo (1.0) which would then lead to a - # failure to find foo (>= 2.0), because other locators - # weren't searched. Note that this only matters when - # merge=False. - if self.matcher is None: - found = True - else: - found = False - for k in d: - if self.matcher.match(k): - found = True - break - if found: - result = d - break - return result - - def get_distribution_names(self): - """ - Return all the distribution names known to this locator. - """ - result = set() - for locator in self.locators: - try: - result |= locator.get_distribution_names() - except NotImplementedError: - pass - return result - - -# We use a legacy scheme simply because most of the dists on PyPI use legacy -# versions which don't conform to PEP 440. -default_locator = AggregatingLocator( - # JSONLocator(), # don't use as PEP 426 is withdrawn - SimpleScrapingLocator('https://pypi.org/simple/', timeout=3.0), - scheme='legacy') - -locate = default_locator.locate - - -class DependencyFinder(object): - """ - Locate dependencies for distributions. - """ - - def __init__(self, locator=None): - """ - Initialise an instance, using the specified locator - to locate distributions. - """ - self.locator = locator or default_locator - self.scheme = get_scheme(self.locator.scheme) - - def add_distribution(self, dist): - """ - Add a distribution to the finder. This will update internal information - about who provides what. - :param dist: The distribution to add. - """ - logger.debug('adding distribution %s', dist) - name = dist.key - self.dists_by_name[name] = dist - self.dists[(name, dist.version)] = dist - for p in dist.provides: - name, version = parse_name_and_version(p) - logger.debug('Add to provided: %s, %s, %s', name, version, dist) - self.provided.setdefault(name, set()).add((version, dist)) - - def remove_distribution(self, dist): - """ - Remove a distribution from the finder. This will update internal - information about who provides what. - :param dist: The distribution to remove. - """ - logger.debug('removing distribution %s', dist) - name = dist.key - del self.dists_by_name[name] - del self.dists[(name, dist.version)] - for p in dist.provides: - name, version = parse_name_and_version(p) - logger.debug('Remove from provided: %s, %s, %s', name, version, dist) - s = self.provided[name] - s.remove((version, dist)) - if not s: - del self.provided[name] - - def get_matcher(self, reqt): - """ - Get a version matcher for a requirement. - :param reqt: The requirement - :type reqt: str - :return: A version matcher (an instance of - :class:`distlib.version.Matcher`). - """ - try: - matcher = self.scheme.matcher(reqt) - except UnsupportedVersionError: # pragma: no cover - # XXX compat-mode if cannot read the version - name = reqt.split()[0] - matcher = self.scheme.matcher(name) - return matcher - - def find_providers(self, reqt): - """ - Find the distributions which can fulfill a requirement. - - :param reqt: The requirement. - :type reqt: str - :return: A set of distribution which can fulfill the requirement. - """ - matcher = self.get_matcher(reqt) - name = matcher.key # case-insensitive - result = set() - provided = self.provided - if name in provided: - for version, provider in provided[name]: - try: - match = matcher.match(version) - except UnsupportedVersionError: - match = False - - if match: - result.add(provider) - break - return result - - def try_to_replace(self, provider, other, problems): - """ - Attempt to replace one provider with another. This is typically used - when resolving dependencies from multiple sources, e.g. A requires - (B >= 1.0) while C requires (B >= 1.1). - - For successful replacement, ``provider`` must meet all the requirements - which ``other`` fulfills. - - :param provider: The provider we are trying to replace with. - :param other: The provider we're trying to replace. - :param problems: If False is returned, this will contain what - problems prevented replacement. This is currently - a tuple of the literal string 'cantreplace', - ``provider``, ``other`` and the set of requirements - that ``provider`` couldn't fulfill. - :return: True if we can replace ``other`` with ``provider``, else - False. - """ - rlist = self.reqts[other] - unmatched = set() - for s in rlist: - matcher = self.get_matcher(s) - if not matcher.match(provider.version): - unmatched.add(s) - if unmatched: - # can't replace other with provider - problems.add(('cantreplace', provider, other, frozenset(unmatched))) - result = False - else: - # can replace other with provider - self.remove_distribution(other) - del self.reqts[other] - for s in rlist: - self.reqts.setdefault(provider, set()).add(s) - self.add_distribution(provider) - result = True - return result - - def find(self, requirement, meta_extras=None, prereleases=False): - """ - Find a distribution and all distributions it depends on. - - :param requirement: The requirement specifying the distribution to - find, or a Distribution instance. - :param meta_extras: A list of meta extras such as :test:, :build: and - so on. - :param prereleases: If ``True``, allow pre-release versions to be - returned - otherwise, don't return prereleases - unless they're all that's available. - - Return a set of :class:`Distribution` instances and a set of - problems. - - The distributions returned should be such that they have the - :attr:`required` attribute set to ``True`` if they were - from the ``requirement`` passed to ``find()``, and they have the - :attr:`build_time_dependency` attribute set to ``True`` unless they - are post-installation dependencies of the ``requirement``. - - The problems should be a tuple consisting of the string - ``'unsatisfied'`` and the requirement which couldn't be satisfied - by any distribution known to the locator. - """ - - self.provided = {} - self.dists = {} - self.dists_by_name = {} - self.reqts = {} - - meta_extras = set(meta_extras or []) - if ':*:' in meta_extras: - meta_extras.remove(':*:') - # :meta: and :run: are implicitly included - meta_extras |= set([':test:', ':build:', ':dev:']) - - if isinstance(requirement, Distribution): - dist = odist = requirement - logger.debug('passed %s as requirement', odist) - else: - dist = odist = self.locator.locate(requirement, prereleases=prereleases) - if dist is None: - raise DistlibException('Unable to locate %r' % requirement) - logger.debug('located %s', odist) - dist.requested = True - problems = set() - todo = set([dist]) - install_dists = set([odist]) - while todo: - dist = todo.pop() - name = dist.key # case-insensitive - if name not in self.dists_by_name: - self.add_distribution(dist) - else: - # import pdb; pdb.set_trace() - other = self.dists_by_name[name] - if other != dist: - self.try_to_replace(dist, other, problems) - - ireqts = dist.run_requires | dist.meta_requires - sreqts = dist.build_requires - ereqts = set() - if meta_extras and dist in install_dists: - for key in ('test', 'build', 'dev'): - e = ':%s:' % key - if e in meta_extras: - ereqts |= getattr(dist, '%s_requires' % key) - all_reqts = ireqts | sreqts | ereqts - for r in all_reqts: - providers = self.find_providers(r) - if not providers: - logger.debug('No providers found for %r', r) - provider = self.locator.locate(r, prereleases=prereleases) - # If no provider is found and we didn't consider - # prereleases, consider them now. - if provider is None and not prereleases: - provider = self.locator.locate(r, prereleases=True) - if provider is None: - logger.debug('Cannot satisfy %r', r) - problems.add(('unsatisfied', r)) - else: - n, v = provider.key, provider.version - if (n, v) not in self.dists: - todo.add(provider) - providers.add(provider) - if r in ireqts and dist in install_dists: - install_dists.add(provider) - logger.debug('Adding %s to install_dists', provider.name_and_version) - for p in providers: - name = p.key - if name not in self.dists_by_name: - self.reqts.setdefault(p, set()).add(r) - else: - other = self.dists_by_name[name] - if other != p: - # see if other can be replaced by p - self.try_to_replace(p, other, problems) - - dists = set(self.dists.values()) - for dist in dists: - dist.build_time_dependency = dist not in install_dists - if dist.build_time_dependency: - logger.debug('%s is a build-time dependency only.', dist.name_and_version) - logger.debug('find done for %s', odist) - return dists, problems diff --git a/src/pip/_vendor/distlib/manifest.py b/src/pip/_vendor/distlib/manifest.py deleted file mode 100644 index 420dcf12ed2..00000000000 --- a/src/pip/_vendor/distlib/manifest.py +++ /dev/null @@ -1,384 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012-2023 Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -""" -Class representing the list of files in a distribution. - -Equivalent to distutils.filelist, but fixes some problems. -""" -import fnmatch -import logging -import os -import re -import sys - -from . import DistlibException -from .compat import fsdecode -from .util import convert_path - - -__all__ = ['Manifest'] - -logger = logging.getLogger(__name__) - -# a \ followed by some spaces + EOL -_COLLAPSE_PATTERN = re.compile('\\\\w*\n', re.M) -_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S) - -# -# Due to the different results returned by fnmatch.translate, we need -# to do slightly different processing for Python 2.7 and 3.2 ... this needed -# to be brought in for Python 3.6 onwards. -# -_PYTHON_VERSION = sys.version_info[:2] - - -class Manifest(object): - """ - A list of files built by exploring the filesystem and filtered by applying various - patterns to what we find there. - """ - - def __init__(self, base=None): - """ - Initialise an instance. - - :param base: The base directory to explore under. - """ - self.base = os.path.abspath(os.path.normpath(base or os.getcwd())) - self.prefix = self.base + os.sep - self.allfiles = None - self.files = set() - - # - # Public API - # - - def findall(self): - """Find all files under the base and set ``allfiles`` to the absolute - pathnames of files found. - """ - from stat import S_ISREG, S_ISDIR, S_ISLNK - - self.allfiles = allfiles = [] - root = self.base - stack = [root] - pop = stack.pop - push = stack.append - - while stack: - root = pop() - names = os.listdir(root) - - for name in names: - fullname = os.path.join(root, name) - - # Avoid excess stat calls -- just one will do, thank you! - stat = os.stat(fullname) - mode = stat.st_mode - if S_ISREG(mode): - allfiles.append(fsdecode(fullname)) - elif S_ISDIR(mode) and not S_ISLNK(mode): - push(fullname) - - def add(self, item): - """ - Add a file to the manifest. - - :param item: The pathname to add. This can be relative to the base. - """ - if not item.startswith(self.prefix): - item = os.path.join(self.base, item) - self.files.add(os.path.normpath(item)) - - def add_many(self, items): - """ - Add a list of files to the manifest. - - :param items: The pathnames to add. These can be relative to the base. - """ - for item in items: - self.add(item) - - def sorted(self, wantdirs=False): - """ - Return sorted files in directory order - """ - - def add_dir(dirs, d): - dirs.add(d) - logger.debug('add_dir added %s', d) - if d != self.base: - parent, _ = os.path.split(d) - assert parent not in ('', '/') - add_dir(dirs, parent) - - result = set(self.files) # make a copy! - if wantdirs: - dirs = set() - for f in result: - add_dir(dirs, os.path.dirname(f)) - result |= dirs - return [os.path.join(*path_tuple) for path_tuple in - sorted(os.path.split(path) for path in result)] - - def clear(self): - """Clear all collected files.""" - self.files = set() - self.allfiles = [] - - def process_directive(self, directive): - """ - Process a directive which either adds some files from ``allfiles`` to - ``files``, or removes some files from ``files``. - - :param directive: The directive to process. This should be in a format - compatible with distutils ``MANIFEST.in`` files: - - http://docs.python.org/distutils/sourcedist.html#commands - """ - # Parse the line: split it up, make sure the right number of words - # is there, and return the relevant words. 'action' is always - # defined: it's the first word of the line. Which of the other - # three are defined depends on the action; it'll be either - # patterns, (dir and patterns), or (dirpattern). - action, patterns, thedir, dirpattern = self._parse_directive(directive) - - # OK, now we know that the action is valid and we have the - # right number of words on the line for that action -- so we - # can proceed with minimal error-checking. - if action == 'include': - for pattern in patterns: - if not self._include_pattern(pattern, anchor=True): - logger.warning('no files found matching %r', pattern) - - elif action == 'exclude': - for pattern in patterns: - self._exclude_pattern(pattern, anchor=True) - - elif action == 'global-include': - for pattern in patterns: - if not self._include_pattern(pattern, anchor=False): - logger.warning('no files found matching %r ' - 'anywhere in distribution', pattern) - - elif action == 'global-exclude': - for pattern in patterns: - self._exclude_pattern(pattern, anchor=False) - - elif action == 'recursive-include': - for pattern in patterns: - if not self._include_pattern(pattern, prefix=thedir): - logger.warning('no files found matching %r ' - 'under directory %r', pattern, thedir) - - elif action == 'recursive-exclude': - for pattern in patterns: - self._exclude_pattern(pattern, prefix=thedir) - - elif action == 'graft': - if not self._include_pattern(None, prefix=dirpattern): - logger.warning('no directories found matching %r', - dirpattern) - - elif action == 'prune': - if not self._exclude_pattern(None, prefix=dirpattern): - logger.warning('no previously-included directories found ' - 'matching %r', dirpattern) - else: # pragma: no cover - # This should never happen, as it should be caught in - # _parse_template_line - raise DistlibException( - 'invalid action %r' % action) - - # - # Private API - # - - def _parse_directive(self, directive): - """ - Validate a directive. - :param directive: The directive to validate. - :return: A tuple of action, patterns, thedir, dir_patterns - """ - words = directive.split() - if len(words) == 1 and words[0] not in ('include', 'exclude', - 'global-include', - 'global-exclude', - 'recursive-include', - 'recursive-exclude', - 'graft', 'prune'): - # no action given, let's use the default 'include' - words.insert(0, 'include') - - action = words[0] - patterns = thedir = dir_pattern = None - - if action in ('include', 'exclude', - 'global-include', 'global-exclude'): - if len(words) < 2: - raise DistlibException( - '%r expects ...' % action) - - patterns = [convert_path(word) for word in words[1:]] - - elif action in ('recursive-include', 'recursive-exclude'): - if len(words) < 3: - raise DistlibException( - '%r expects ...' % action) - - thedir = convert_path(words[1]) - patterns = [convert_path(word) for word in words[2:]] - - elif action in ('graft', 'prune'): - if len(words) != 2: - raise DistlibException( - '%r expects a single ' % action) - - dir_pattern = convert_path(words[1]) - - else: - raise DistlibException('unknown action %r' % action) - - return action, patterns, thedir, dir_pattern - - def _include_pattern(self, pattern, anchor=True, prefix=None, - is_regex=False): - """Select strings (presumably filenames) from 'self.files' that - match 'pattern', a Unix-style wildcard (glob) pattern. - - Patterns are not quite the same as implemented by the 'fnmatch' - module: '*' and '?' match non-special characters, where "special" - is platform-dependent: slash on Unix; colon, slash, and backslash on - DOS/Windows; and colon on Mac OS. - - If 'anchor' is true (the default), then the pattern match is more - stringent: "*.py" will match "foo.py" but not "foo/bar.py". If - 'anchor' is false, both of these will match. - - If 'prefix' is supplied, then only filenames starting with 'prefix' - (itself a pattern) and ending with 'pattern', with anything in between - them, will match. 'anchor' is ignored in this case. - - If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and - 'pattern' is assumed to be either a string containing a regex or a - regex object -- no translation is done, the regex is just compiled - and used as-is. - - Selected strings will be added to self.files. - - Return True if files are found. - """ - # XXX docstring lying about what the special chars are? - found = False - pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex) - - # delayed loading of allfiles list - if self.allfiles is None: - self.findall() - - for name in self.allfiles: - if pattern_re.search(name): - self.files.add(name) - found = True - return found - - def _exclude_pattern(self, pattern, anchor=True, prefix=None, - is_regex=False): - """Remove strings (presumably filenames) from 'files' that match - 'pattern'. - - Other parameters are the same as for 'include_pattern()', above. - The list 'self.files' is modified in place. Return True if files are - found. - - This API is public to allow e.g. exclusion of SCM subdirs, e.g. when - packaging source distributions - """ - found = False - pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex) - for f in list(self.files): - if pattern_re.search(f): - self.files.remove(f) - found = True - return found - - def _translate_pattern(self, pattern, anchor=True, prefix=None, - is_regex=False): - """Translate a shell-like wildcard pattern to a compiled regular - expression. - - Return the compiled regex. If 'is_regex' true, - then 'pattern' is directly compiled to a regex (if it's a string) - or just returned as-is (assumes it's a regex object). - """ - if is_regex: - if isinstance(pattern, str): - return re.compile(pattern) - else: - return pattern - - if _PYTHON_VERSION > (3, 2): - # ditch start and end characters - start, _, end = self._glob_to_re('_').partition('_') - - if pattern: - pattern_re = self._glob_to_re(pattern) - if _PYTHON_VERSION > (3, 2): - assert pattern_re.startswith(start) and pattern_re.endswith(end) - else: - pattern_re = '' - - base = re.escape(os.path.join(self.base, '')) - if prefix is not None: - # ditch end of pattern character - if _PYTHON_VERSION <= (3, 2): - empty_pattern = self._glob_to_re('') - prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)] - else: - prefix_re = self._glob_to_re(prefix) - assert prefix_re.startswith(start) and prefix_re.endswith(end) - prefix_re = prefix_re[len(start): len(prefix_re) - len(end)] - sep = os.sep - if os.sep == '\\': - sep = r'\\' - if _PYTHON_VERSION <= (3, 2): - pattern_re = '^' + base + sep.join((prefix_re, - '.*' + pattern_re)) - else: - pattern_re = pattern_re[len(start): len(pattern_re) - len(end)] - pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep, - pattern_re, end) - else: # no prefix -- respect anchor flag - if anchor: - if _PYTHON_VERSION <= (3, 2): - pattern_re = '^' + base + pattern_re - else: - pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):]) - - return re.compile(pattern_re) - - def _glob_to_re(self, pattern): - """Translate a shell-like glob pattern to a regular expression. - - Return a string containing the regex. Differs from - 'fnmatch.translate()' in that '*' does not match "special characters" - (which are platform-specific). - """ - pattern_re = fnmatch.translate(pattern) - - # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which - # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix, - # and by extension they shouldn't match such "special characters" under - # any OS. So change all non-escaped dots in the RE to match any - # character except the special characters (currently: just os.sep). - sep = os.sep - if os.sep == '\\': - # we're using a regex to manipulate a regex, so we need - # to escape the backslash twice - sep = r'\\\\' - escaped = r'\1[^%s]' % sep - pattern_re = re.sub(r'((? y, - '!=': lambda x, y: x != y, - '<': lambda x, y: x < y, - '<=': lambda x, y: x == y or x < y, - '>': lambda x, y: x > y, - '>=': lambda x, y: x == y or x > y, - 'and': lambda x, y: x and y, - 'or': lambda x, y: x or y, - 'in': lambda x, y: x in y, - 'not in': lambda x, y: x not in y, - } - - def evaluate(self, expr, context): - """ - Evaluate a marker expression returned by the :func:`parse_requirement` - function in the specified context. - """ - if isinstance(expr, string_types): - if expr[0] in '\'"': - result = expr[1:-1] - else: - if expr not in context: - raise SyntaxError('unknown variable: %s' % expr) - result = context[expr] - else: - assert isinstance(expr, dict) - op = expr['op'] - if op not in self.operations: - raise NotImplementedError('op not implemented: %s' % op) - elhs = expr['lhs'] - erhs = expr['rhs'] - if _is_literal(expr['lhs']) and _is_literal(expr['rhs']): - raise SyntaxError('invalid comparison: %s %s %s' % (elhs, op, erhs)) - - lhs = self.evaluate(elhs, context) - rhs = self.evaluate(erhs, context) - if ((_is_version_marker(elhs) or _is_version_marker(erhs)) and - op in ('<', '<=', '>', '>=', '===', '==', '!=', '~=')): - lhs = LV(lhs) - rhs = LV(rhs) - elif _is_version_marker(elhs) and op in ('in', 'not in'): - lhs = LV(lhs) - rhs = _get_versions(rhs) - result = self.operations[op](lhs, rhs) - return result - - -_DIGITS = re.compile(r'\d+\.\d+') - - -def default_context(): - - def format_full_version(info): - version = '%s.%s.%s' % (info.major, info.minor, info.micro) - kind = info.releaselevel - if kind != 'final': - version += kind[0] + str(info.serial) - return version - - if hasattr(sys, 'implementation'): - implementation_version = format_full_version(sys.implementation.version) - implementation_name = sys.implementation.name - else: - implementation_version = '0' - implementation_name = '' - - ppv = platform.python_version() - m = _DIGITS.match(ppv) - pv = m.group(0) - result = { - 'implementation_name': implementation_name, - 'implementation_version': implementation_version, - 'os_name': os.name, - 'platform_machine': platform.machine(), - 'platform_python_implementation': platform.python_implementation(), - 'platform_release': platform.release(), - 'platform_system': platform.system(), - 'platform_version': platform.version(), - 'platform_in_venv': str(in_venv()), - 'python_full_version': ppv, - 'python_version': pv, - 'sys_platform': sys.platform, - } - return result - - -DEFAULT_CONTEXT = default_context() -del default_context - -evaluator = Evaluator() - - -def interpret(marker, execution_context=None): - """ - Interpret a marker and return a result depending on environment. - - :param marker: The marker to interpret. - :type marker: str - :param execution_context: The context used for name lookup. - :type execution_context: mapping - """ - try: - expr, rest = parse_marker(marker) - except Exception as e: - raise SyntaxError('Unable to interpret marker syntax: %s: %s' % (marker, e)) - if rest and rest[0] != '#': - raise SyntaxError('unexpected trailing data in marker: %s: %s' % (marker, rest)) - context = dict(DEFAULT_CONTEXT) - if execution_context: - context.update(execution_context) - return evaluator.evaluate(expr, context) diff --git a/src/pip/_vendor/distlib/metadata.py b/src/pip/_vendor/distlib/metadata.py deleted file mode 100644 index ce9a34b3e24..00000000000 --- a/src/pip/_vendor/distlib/metadata.py +++ /dev/null @@ -1,1031 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -"""Implementation of the Metadata for Python packages PEPs. - -Supports all metadata formats (1.0, 1.1, 1.2, 1.3/2.1 and 2.2). -""" -from __future__ import unicode_literals - -import codecs -from email import message_from_file -import json -import logging -import re - -from . import DistlibException, __version__ -from .compat import StringIO, string_types, text_type -from .markers import interpret -from .util import extract_by_key, get_extras -from .version import get_scheme, PEP440_VERSION_RE - -logger = logging.getLogger(__name__) - - -class MetadataMissingError(DistlibException): - """A required metadata is missing""" - - -class MetadataConflictError(DistlibException): - """Attempt to read or write metadata fields that are conflictual.""" - - -class MetadataUnrecognizedVersionError(DistlibException): - """Unknown metadata version number.""" - - -class MetadataInvalidError(DistlibException): - """A metadata value is invalid""" - - -# public API of this module -__all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION'] - -# Encoding used for the PKG-INFO files -PKG_INFO_ENCODING = 'utf-8' - -# preferred version. Hopefully will be changed -# to 1.2 once PEP 345 is supported everywhere -PKG_INFO_PREFERRED_VERSION = '1.1' - -_LINE_PREFIX_1_2 = re.compile('\n \\|') -_LINE_PREFIX_PRE_1_2 = re.compile('\n ') -_241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', 'Summary', 'Description', 'Keywords', 'Home-page', - 'Author', 'Author-email', 'License') - -_314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', 'Supported-Platform', 'Summary', 'Description', - 'Keywords', 'Home-page', 'Author', 'Author-email', 'License', 'Classifier', 'Download-URL', 'Obsoletes', - 'Provides', 'Requires') - -_314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier', 'Download-URL') - -_345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', 'Supported-Platform', 'Summary', 'Description', - 'Keywords', 'Home-page', 'Author', 'Author-email', 'Maintainer', 'Maintainer-email', 'License', - 'Classifier', 'Download-URL', 'Obsoletes-Dist', 'Project-URL', 'Provides-Dist', 'Requires-Dist', - 'Requires-Python', 'Requires-External') - -_345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python', 'Obsoletes-Dist', 'Requires-External', - 'Maintainer', 'Maintainer-email', 'Project-URL') - -_426_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform', 'Supported-Platform', 'Summary', 'Description', - 'Keywords', 'Home-page', 'Author', 'Author-email', 'Maintainer', 'Maintainer-email', 'License', - 'Classifier', 'Download-URL', 'Obsoletes-Dist', 'Project-URL', 'Provides-Dist', 'Requires-Dist', - 'Requires-Python', 'Requires-External', 'Private-Version', 'Obsoleted-By', 'Setup-Requires-Dist', - 'Extension', 'Provides-Extra') - -_426_MARKERS = ('Private-Version', 'Provides-Extra', 'Obsoleted-By', 'Setup-Requires-Dist', 'Extension') - -# See issue #106: Sometimes 'Requires' and 'Provides' occur wrongly in -# the metadata. Include them in the tuple literal below to allow them -# (for now). -# Ditto for Obsoletes - see issue #140. -_566_FIELDS = _426_FIELDS + ('Description-Content-Type', 'Requires', 'Provides', 'Obsoletes') - -_566_MARKERS = ('Description-Content-Type', ) - -_643_MARKERS = ('Dynamic', 'License-File') - -_643_FIELDS = _566_FIELDS + _643_MARKERS - -_ALL_FIELDS = set() -_ALL_FIELDS.update(_241_FIELDS) -_ALL_FIELDS.update(_314_FIELDS) -_ALL_FIELDS.update(_345_FIELDS) -_ALL_FIELDS.update(_426_FIELDS) -_ALL_FIELDS.update(_566_FIELDS) -_ALL_FIELDS.update(_643_FIELDS) - -EXTRA_RE = re.compile(r'''extra\s*==\s*("([^"]+)"|'([^']+)')''') - - -def _version2fieldlist(version): - if version == '1.0': - return _241_FIELDS - elif version == '1.1': - return _314_FIELDS - elif version == '1.2': - return _345_FIELDS - elif version in ('1.3', '2.1'): - # avoid adding field names if already there - return _345_FIELDS + tuple(f for f in _566_FIELDS if f not in _345_FIELDS) - elif version == '2.0': - raise ValueError('Metadata 2.0 is withdrawn and not supported') - # return _426_FIELDS - elif version == '2.2': - return _643_FIELDS - raise MetadataUnrecognizedVersionError(version) - - -def _best_version(fields): - """Detect the best version depending on the fields used.""" - - def _has_marker(keys, markers): - return any(marker in keys for marker in markers) - - keys = [key for key, value in fields.items() if value not in ([], 'UNKNOWN', None)] - possible_versions = ['1.0', '1.1', '1.2', '1.3', '2.1', '2.2'] # 2.0 removed - - # first let's try to see if a field is not part of one of the version - for key in keys: - if key not in _241_FIELDS and '1.0' in possible_versions: - possible_versions.remove('1.0') - logger.debug('Removed 1.0 due to %s', key) - if key not in _314_FIELDS and '1.1' in possible_versions: - possible_versions.remove('1.1') - logger.debug('Removed 1.1 due to %s', key) - if key not in _345_FIELDS and '1.2' in possible_versions: - possible_versions.remove('1.2') - logger.debug('Removed 1.2 due to %s', key) - if key not in _566_FIELDS and '1.3' in possible_versions: - possible_versions.remove('1.3') - logger.debug('Removed 1.3 due to %s', key) - if key not in _566_FIELDS and '2.1' in possible_versions: - if key != 'Description': # In 2.1, description allowed after headers - possible_versions.remove('2.1') - logger.debug('Removed 2.1 due to %s', key) - if key not in _643_FIELDS and '2.2' in possible_versions: - possible_versions.remove('2.2') - logger.debug('Removed 2.2 due to %s', key) - # if key not in _426_FIELDS and '2.0' in possible_versions: - # possible_versions.remove('2.0') - # logger.debug('Removed 2.0 due to %s', key) - - # possible_version contains qualified versions - if len(possible_versions) == 1: - return possible_versions[0] # found ! - elif len(possible_versions) == 0: - logger.debug('Out of options - unknown metadata set: %s', fields) - raise MetadataConflictError('Unknown metadata set') - - # let's see if one unique marker is found - is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS) - is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS) - is_2_1 = '2.1' in possible_versions and _has_marker(keys, _566_MARKERS) - # is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS) - is_2_2 = '2.2' in possible_versions and _has_marker(keys, _643_MARKERS) - if int(is_1_1) + int(is_1_2) + int(is_2_1) + int(is_2_2) > 1: - raise MetadataConflictError('You used incompatible 1.1/1.2/2.1/2.2 fields') - - # we have the choice, 1.0, or 1.2, 2.1 or 2.2 - # - 1.0 has a broken Summary field but works with all tools - # - 1.1 is to avoid - # - 1.2 fixes Summary but has little adoption - # - 2.1 adds more features - # - 2.2 is the latest - if not is_1_1 and not is_1_2 and not is_2_1 and not is_2_2: - # we couldn't find any specific marker - if PKG_INFO_PREFERRED_VERSION in possible_versions: - return PKG_INFO_PREFERRED_VERSION - if is_1_1: - return '1.1' - if is_1_2: - return '1.2' - if is_2_1: - return '2.1' - # if is_2_2: - # return '2.2' - - return '2.2' - - -# This follows the rules about transforming keys as described in -# https://www.python.org/dev/peps/pep-0566/#id17 -_ATTR2FIELD = {name.lower().replace("-", "_"): name for name in _ALL_FIELDS} -_FIELD2ATTR = {field: attr for attr, field in _ATTR2FIELD.items()} - -_PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist') -_VERSIONS_FIELDS = ('Requires-Python', ) -_VERSION_FIELDS = ('Version', ) -_LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes', 'Requires', 'Provides', 'Obsoletes-Dist', 'Provides-Dist', - 'Requires-Dist', 'Requires-External', 'Project-URL', 'Supported-Platform', 'Setup-Requires-Dist', - 'Provides-Extra', 'Extension', 'License-File') -_LISTTUPLEFIELDS = ('Project-URL', ) - -_ELEMENTSFIELD = ('Keywords', ) - -_UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description') - -_MISSING = object() - -_FILESAFE = re.compile('[^A-Za-z0-9.]+') - - -def _get_name_and_version(name, version, for_filename=False): - """Return the distribution name with version. - - If for_filename is true, return a filename-escaped form.""" - if for_filename: - # For both name and version any runs of non-alphanumeric or '.' - # characters are replaced with a single '-'. Additionally any - # spaces in the version string become '.' - name = _FILESAFE.sub('-', name) - version = _FILESAFE.sub('-', version.replace(' ', '.')) - return '%s-%s' % (name, version) - - -class LegacyMetadata(object): - """The legacy metadata of a release. - - Supports versions 1.0, 1.1, 1.2, 2.0 and 1.3/2.1 (auto-detected). You can - instantiate the class with one of these arguments (or none): - - *path*, the path to a metadata file - - *fileobj* give a file-like object with metadata as content - - *mapping* is a dict-like object - - *scheme* is a version scheme name - """ - - # TODO document the mapping API and UNKNOWN default key - - def __init__(self, path=None, fileobj=None, mapping=None, scheme='default'): - if [path, fileobj, mapping].count(None) < 2: - raise TypeError('path, fileobj and mapping are exclusive') - self._fields = {} - self.requires_files = [] - self._dependencies = None - self.scheme = scheme - if path is not None: - self.read(path) - elif fileobj is not None: - self.read_file(fileobj) - elif mapping is not None: - self.update(mapping) - self.set_metadata_version() - - def set_metadata_version(self): - self._fields['Metadata-Version'] = _best_version(self._fields) - - def _write_field(self, fileobj, name, value): - fileobj.write('%s: %s\n' % (name, value)) - - def __getitem__(self, name): - return self.get(name) - - def __setitem__(self, name, value): - return self.set(name, value) - - def __delitem__(self, name): - field_name = self._convert_name(name) - try: - del self._fields[field_name] - except KeyError: - raise KeyError(name) - - def __contains__(self, name): - return (name in self._fields or self._convert_name(name) in self._fields) - - def _convert_name(self, name): - if name in _ALL_FIELDS: - return name - name = name.replace('-', '_').lower() - return _ATTR2FIELD.get(name, name) - - def _default_value(self, name): - if name in _LISTFIELDS or name in _ELEMENTSFIELD: - return [] - return 'UNKNOWN' - - def _remove_line_prefix(self, value): - if self.metadata_version in ('1.0', '1.1'): - return _LINE_PREFIX_PRE_1_2.sub('\n', value) - else: - return _LINE_PREFIX_1_2.sub('\n', value) - - def __getattr__(self, name): - if name in _ATTR2FIELD: - return self[name] - raise AttributeError(name) - - # - # Public API - # - - def get_fullname(self, filesafe=False): - """ - Return the distribution name with version. - - If filesafe is true, return a filename-escaped form. - """ - return _get_name_and_version(self['Name'], self['Version'], filesafe) - - def is_field(self, name): - """return True if name is a valid metadata key""" - name = self._convert_name(name) - return name in _ALL_FIELDS - - def is_multi_field(self, name): - name = self._convert_name(name) - return name in _LISTFIELDS - - def read(self, filepath): - """Read the metadata values from a file path.""" - fp = codecs.open(filepath, 'r', encoding='utf-8') - try: - self.read_file(fp) - finally: - fp.close() - - def read_file(self, fileob): - """Read the metadata values from a file object.""" - msg = message_from_file(fileob) - self._fields['Metadata-Version'] = msg['metadata-version'] - - # When reading, get all the fields we can - for field in _ALL_FIELDS: - if field not in msg: - continue - if field in _LISTFIELDS: - # we can have multiple lines - values = msg.get_all(field) - if field in _LISTTUPLEFIELDS and values is not None: - values = [tuple(value.split(',')) for value in values] - self.set(field, values) - else: - # single line - value = msg[field] - if value is not None and value != 'UNKNOWN': - self.set(field, value) - - # PEP 566 specifies that the body be used for the description, if - # available - body = msg.get_payload() - self["Description"] = body if body else self["Description"] - # logger.debug('Attempting to set metadata for %s', self) - # self.set_metadata_version() - - def write(self, filepath, skip_unknown=False): - """Write the metadata fields to filepath.""" - fp = codecs.open(filepath, 'w', encoding='utf-8') - try: - self.write_file(fp, skip_unknown) - finally: - fp.close() - - def write_file(self, fileobject, skip_unknown=False): - """Write the PKG-INFO format data to a file object.""" - self.set_metadata_version() - - for field in _version2fieldlist(self['Metadata-Version']): - values = self.get(field) - if skip_unknown and values in ('UNKNOWN', [], ['UNKNOWN']): - continue - if field in _ELEMENTSFIELD: - self._write_field(fileobject, field, ','.join(values)) - continue - if field not in _LISTFIELDS: - if field == 'Description': - if self.metadata_version in ('1.0', '1.1'): - values = values.replace('\n', '\n ') - else: - values = values.replace('\n', '\n |') - values = [values] - - if field in _LISTTUPLEFIELDS: - values = [','.join(value) for value in values] - - for value in values: - self._write_field(fileobject, field, value) - - def update(self, other=None, **kwargs): - """Set metadata values from the given iterable `other` and kwargs. - - Behavior is like `dict.update`: If `other` has a ``keys`` method, - they are looped over and ``self[key]`` is assigned ``other[key]``. - Else, ``other`` is an iterable of ``(key, value)`` iterables. - - Keys that don't match a metadata field or that have an empty value are - dropped. - """ - - def _set(key, value): - if key in _ATTR2FIELD and value: - self.set(self._convert_name(key), value) - - if not other: - # other is None or empty container - pass - elif hasattr(other, 'keys'): - for k in other.keys(): - _set(k, other[k]) - else: - for k, v in other: - _set(k, v) - - if kwargs: - for k, v in kwargs.items(): - _set(k, v) - - def set(self, name, value): - """Control then set a metadata field.""" - name = self._convert_name(name) - - if ((name in _ELEMENTSFIELD or name == 'Platform') and not isinstance(value, (list, tuple))): - if isinstance(value, string_types): - value = [v.strip() for v in value.split(',')] - else: - value = [] - elif (name in _LISTFIELDS and not isinstance(value, (list, tuple))): - if isinstance(value, string_types): - value = [value] - else: - value = [] - - if logger.isEnabledFor(logging.WARNING): - project_name = self['Name'] - - scheme = get_scheme(self.scheme) - if name in _PREDICATE_FIELDS and value is not None: - for v in value: - # check that the values are valid - if not scheme.is_valid_matcher(v.split(';')[0]): - logger.warning("'%s': '%s' is not valid (field '%s')", project_name, v, name) - # FIXME this rejects UNKNOWN, is that right? - elif name in _VERSIONS_FIELDS and value is not None: - if not scheme.is_valid_constraint_list(value): - logger.warning("'%s': '%s' is not a valid version (field '%s')", project_name, value, name) - elif name in _VERSION_FIELDS and value is not None: - if not scheme.is_valid_version(value): - logger.warning("'%s': '%s' is not a valid version (field '%s')", project_name, value, name) - - if name in _UNICODEFIELDS: - if name == 'Description': - value = self._remove_line_prefix(value) - - self._fields[name] = value - - def get(self, name, default=_MISSING): - """Get a metadata field.""" - name = self._convert_name(name) - if name not in self._fields: - if default is _MISSING: - default = self._default_value(name) - return default - if name in _UNICODEFIELDS: - value = self._fields[name] - return value - elif name in _LISTFIELDS: - value = self._fields[name] - if value is None: - return [] - res = [] - for val in value: - if name not in _LISTTUPLEFIELDS: - res.append(val) - else: - # That's for Project-URL - res.append((val[0], val[1])) - return res - - elif name in _ELEMENTSFIELD: - value = self._fields[name] - if isinstance(value, string_types): - return value.split(',') - return self._fields[name] - - def check(self, strict=False): - """Check if the metadata is compliant. If strict is True then raise if - no Name or Version are provided""" - self.set_metadata_version() - - # XXX should check the versions (if the file was loaded) - missing, warnings = [], [] - - for attr in ('Name', 'Version'): # required by PEP 345 - if attr not in self: - missing.append(attr) - - if strict and missing != []: - msg = 'missing required metadata: %s' % ', '.join(missing) - raise MetadataMissingError(msg) - - for attr in ('Home-page', 'Author'): - if attr not in self: - missing.append(attr) - - # checking metadata 1.2 (XXX needs to check 1.1, 1.0) - if self['Metadata-Version'] != '1.2': - return missing, warnings - - scheme = get_scheme(self.scheme) - - def are_valid_constraints(value): - for v in value: - if not scheme.is_valid_matcher(v.split(';')[0]): - return False - return True - - for fields, controller in ((_PREDICATE_FIELDS, are_valid_constraints), - (_VERSIONS_FIELDS, scheme.is_valid_constraint_list), (_VERSION_FIELDS, - scheme.is_valid_version)): - for field in fields: - value = self.get(field, None) - if value is not None and not controller(value): - warnings.append("Wrong value for '%s': %s" % (field, value)) - - return missing, warnings - - def todict(self, skip_missing=False): - """Return fields as a dict. - - Field names will be converted to use the underscore-lowercase style - instead of hyphen-mixed case (i.e. home_page instead of Home-page). - This is as per https://www.python.org/dev/peps/pep-0566/#id17. - """ - self.set_metadata_version() - - fields = _version2fieldlist(self['Metadata-Version']) - - data = {} - - for field_name in fields: - if not skip_missing or field_name in self._fields: - key = _FIELD2ATTR[field_name] - if key != 'project_url': - data[key] = self[field_name] - else: - data[key] = [','.join(u) for u in self[field_name]] - - return data - - def add_requirements(self, requirements): - if self['Metadata-Version'] == '1.1': - # we can't have 1.1 metadata *and* Setuptools requires - for field in ('Obsoletes', 'Requires', 'Provides'): - if field in self: - del self[field] - self['Requires-Dist'] += requirements - - # Mapping API - # TODO could add iter* variants - - def keys(self): - return list(_version2fieldlist(self['Metadata-Version'])) - - def __iter__(self): - for key in self.keys(): - yield key - - def values(self): - return [self[key] for key in self.keys()] - - def items(self): - return [(key, self[key]) for key in self.keys()] - - def __repr__(self): - return '<%s %s %s>' % (self.__class__.__name__, self.name, self.version) - - -METADATA_FILENAME = 'pydist.json' -WHEEL_METADATA_FILENAME = 'metadata.json' -LEGACY_METADATA_FILENAME = 'METADATA' - - -class Metadata(object): - """ - The metadata of a release. This implementation uses 2.1 - metadata where possible. If not possible, it wraps a LegacyMetadata - instance which handles the key-value metadata format. - """ - - METADATA_VERSION_MATCHER = re.compile(r'^\d+(\.\d+)*$') - - NAME_MATCHER = re.compile('^[0-9A-Z]([0-9A-Z_.-]*[0-9A-Z])?$', re.I) - - FIELDNAME_MATCHER = re.compile('^[A-Z]([0-9A-Z-]*[0-9A-Z])?$', re.I) - - VERSION_MATCHER = PEP440_VERSION_RE - - SUMMARY_MATCHER = re.compile('.{1,2047}') - - METADATA_VERSION = '2.0' - - GENERATOR = 'distlib (%s)' % __version__ - - MANDATORY_KEYS = { - 'name': (), - 'version': (), - 'summary': ('legacy', ), - } - - INDEX_KEYS = ('name version license summary description author ' - 'author_email keywords platform home_page classifiers ' - 'download_url') - - DEPENDENCY_KEYS = ('extras run_requires test_requires build_requires ' - 'dev_requires provides meta_requires obsoleted_by ' - 'supports_environments') - - SYNTAX_VALIDATORS = { - 'metadata_version': (METADATA_VERSION_MATCHER, ()), - 'name': (NAME_MATCHER, ('legacy', )), - 'version': (VERSION_MATCHER, ('legacy', )), - 'summary': (SUMMARY_MATCHER, ('legacy', )), - 'dynamic': (FIELDNAME_MATCHER, ('legacy', )), - } - - __slots__ = ('_legacy', '_data', 'scheme') - - def __init__(self, path=None, fileobj=None, mapping=None, scheme='default'): - if [path, fileobj, mapping].count(None) < 2: - raise TypeError('path, fileobj and mapping are exclusive') - self._legacy = None - self._data = None - self.scheme = scheme - # import pdb; pdb.set_trace() - if mapping is not None: - try: - self._validate_mapping(mapping, scheme) - self._data = mapping - except MetadataUnrecognizedVersionError: - self._legacy = LegacyMetadata(mapping=mapping, scheme=scheme) - self.validate() - else: - data = None - if path: - with open(path, 'rb') as f: - data = f.read() - elif fileobj: - data = fileobj.read() - if data is None: - # Initialised with no args - to be added - self._data = { - 'metadata_version': self.METADATA_VERSION, - 'generator': self.GENERATOR, - } - else: - if not isinstance(data, text_type): - data = data.decode('utf-8') - try: - self._data = json.loads(data) - self._validate_mapping(self._data, scheme) - except ValueError: - # Note: MetadataUnrecognizedVersionError does not - # inherit from ValueError (it's a DistlibException, - # which should not inherit from ValueError). - # The ValueError comes from the json.load - if that - # succeeds and we get a validation error, we want - # that to propagate - self._legacy = LegacyMetadata(fileobj=StringIO(data), scheme=scheme) - self.validate() - - common_keys = set(('name', 'version', 'license', 'keywords', 'summary')) - - none_list = (None, list) - none_dict = (None, dict) - - mapped_keys = { - 'run_requires': ('Requires-Dist', list), - 'build_requires': ('Setup-Requires-Dist', list), - 'dev_requires': none_list, - 'test_requires': none_list, - 'meta_requires': none_list, - 'extras': ('Provides-Extra', list), - 'modules': none_list, - 'namespaces': none_list, - 'exports': none_dict, - 'commands': none_dict, - 'classifiers': ('Classifier', list), - 'source_url': ('Download-URL', None), - 'metadata_version': ('Metadata-Version', None), - } - - del none_list, none_dict - - def __getattribute__(self, key): - common = object.__getattribute__(self, 'common_keys') - mapped = object.__getattribute__(self, 'mapped_keys') - if key in mapped: - lk, maker = mapped[key] - if self._legacy: - if lk is None: - result = None if maker is None else maker() - else: - result = self._legacy.get(lk) - else: - value = None if maker is None else maker() - if key not in ('commands', 'exports', 'modules', 'namespaces', 'classifiers'): - result = self._data.get(key, value) - else: - # special cases for PEP 459 - sentinel = object() - result = sentinel - d = self._data.get('extensions') - if d: - if key == 'commands': - result = d.get('python.commands', value) - elif key == 'classifiers': - d = d.get('python.details') - if d: - result = d.get(key, value) - else: - d = d.get('python.exports') - if not d: - d = self._data.get('python.exports') - if d: - result = d.get(key, value) - if result is sentinel: - result = value - elif key not in common: - result = object.__getattribute__(self, key) - elif self._legacy: - result = self._legacy.get(key) - else: - result = self._data.get(key) - return result - - def _validate_value(self, key, value, scheme=None): - if key in self.SYNTAX_VALIDATORS: - pattern, exclusions = self.SYNTAX_VALIDATORS[key] - if (scheme or self.scheme) not in exclusions: - m = pattern.match(value) - if not m: - raise MetadataInvalidError("'%s' is an invalid value for " - "the '%s' property" % (value, key)) - - def __setattr__(self, key, value): - self._validate_value(key, value) - common = object.__getattribute__(self, 'common_keys') - mapped = object.__getattribute__(self, 'mapped_keys') - if key in mapped: - lk, _ = mapped[key] - if self._legacy: - if lk is None: - raise NotImplementedError - self._legacy[lk] = value - elif key not in ('commands', 'exports', 'modules', 'namespaces', 'classifiers'): - self._data[key] = value - else: - # special cases for PEP 459 - d = self._data.setdefault('extensions', {}) - if key == 'commands': - d['python.commands'] = value - elif key == 'classifiers': - d = d.setdefault('python.details', {}) - d[key] = value - else: - d = d.setdefault('python.exports', {}) - d[key] = value - elif key not in common: - object.__setattr__(self, key, value) - else: - if key == 'keywords': - if isinstance(value, string_types): - value = value.strip() - if value: - value = value.split() - else: - value = [] - if self._legacy: - self._legacy[key] = value - else: - self._data[key] = value - - @property - def name_and_version(self): - return _get_name_and_version(self.name, self.version, True) - - @property - def provides(self): - if self._legacy: - result = self._legacy['Provides-Dist'] - else: - result = self._data.setdefault('provides', []) - s = '%s (%s)' % (self.name, self.version) - if s not in result: - result.append(s) - return result - - @provides.setter - def provides(self, value): - if self._legacy: - self._legacy['Provides-Dist'] = value - else: - self._data['provides'] = value - - def get_requirements(self, reqts, extras=None, env=None): - """ - Base method to get dependencies, given a set of extras - to satisfy and an optional environment context. - :param reqts: A list of sometimes-wanted dependencies, - perhaps dependent on extras and environment. - :param extras: A list of optional components being requested. - :param env: An optional environment for marker evaluation. - """ - if self._legacy: - result = reqts - else: - result = [] - extras = get_extras(extras or [], self.extras) - for d in reqts: - if 'extra' not in d and 'environment' not in d: - # unconditional - include = True - else: - if 'extra' not in d: - # Not extra-dependent - only environment-dependent - include = True - else: - include = d.get('extra') in extras - if include: - # Not excluded because of extras, check environment - marker = d.get('environment') - if marker: - include = interpret(marker, env) - if include: - result.extend(d['requires']) - for key in ('build', 'dev', 'test'): - e = ':%s:' % key - if e in extras: - extras.remove(e) - # A recursive call, but it should terminate since 'test' - # has been removed from the extras - reqts = self._data.get('%s_requires' % key, []) - result.extend(self.get_requirements(reqts, extras=extras, env=env)) - return result - - @property - def dictionary(self): - if self._legacy: - return self._from_legacy() - return self._data - - @property - def dependencies(self): - if self._legacy: - raise NotImplementedError - else: - return extract_by_key(self._data, self.DEPENDENCY_KEYS) - - @dependencies.setter - def dependencies(self, value): - if self._legacy: - raise NotImplementedError - else: - self._data.update(value) - - def _validate_mapping(self, mapping, scheme): - if mapping.get('metadata_version') != self.METADATA_VERSION: - raise MetadataUnrecognizedVersionError() - missing = [] - for key, exclusions in self.MANDATORY_KEYS.items(): - if key not in mapping: - if scheme not in exclusions: - missing.append(key) - if missing: - msg = 'Missing metadata items: %s' % ', '.join(missing) - raise MetadataMissingError(msg) - for k, v in mapping.items(): - self._validate_value(k, v, scheme) - - def validate(self): - if self._legacy: - missing, warnings = self._legacy.check(True) - if missing or warnings: - logger.warning('Metadata: missing: %s, warnings: %s', missing, warnings) - else: - self._validate_mapping(self._data, self.scheme) - - def todict(self): - if self._legacy: - return self._legacy.todict(True) - else: - result = extract_by_key(self._data, self.INDEX_KEYS) - return result - - def _from_legacy(self): - assert self._legacy and not self._data - result = { - 'metadata_version': self.METADATA_VERSION, - 'generator': self.GENERATOR, - } - lmd = self._legacy.todict(True) # skip missing ones - for k in ('name', 'version', 'license', 'summary', 'description', 'classifier'): - if k in lmd: - if k == 'classifier': - nk = 'classifiers' - else: - nk = k - result[nk] = lmd[k] - kw = lmd.get('Keywords', []) - if kw == ['']: - kw = [] - result['keywords'] = kw - keys = (('requires_dist', 'run_requires'), ('setup_requires_dist', 'build_requires')) - for ok, nk in keys: - if ok in lmd and lmd[ok]: - result[nk] = [{'requires': lmd[ok]}] - result['provides'] = self.provides - # author = {} - # maintainer = {} - return result - - LEGACY_MAPPING = { - 'name': 'Name', - 'version': 'Version', - ('extensions', 'python.details', 'license'): 'License', - 'summary': 'Summary', - 'description': 'Description', - ('extensions', 'python.project', 'project_urls', 'Home'): 'Home-page', - ('extensions', 'python.project', 'contacts', 0, 'name'): 'Author', - ('extensions', 'python.project', 'contacts', 0, 'email'): 'Author-email', - 'source_url': 'Download-URL', - ('extensions', 'python.details', 'classifiers'): 'Classifier', - } - - def _to_legacy(self): - - def process_entries(entries): - reqts = set() - for e in entries: - extra = e.get('extra') - env = e.get('environment') - rlist = e['requires'] - for r in rlist: - if not env and not extra: - reqts.add(r) - else: - marker = '' - if extra: - marker = 'extra == "%s"' % extra - if env: - if marker: - marker = '(%s) and %s' % (env, marker) - else: - marker = env - reqts.add(';'.join((r, marker))) - return reqts - - assert self._data and not self._legacy - result = LegacyMetadata() - nmd = self._data - # import pdb; pdb.set_trace() - for nk, ok in self.LEGACY_MAPPING.items(): - if not isinstance(nk, tuple): - if nk in nmd: - result[ok] = nmd[nk] - else: - d = nmd - found = True - for k in nk: - try: - d = d[k] - except (KeyError, IndexError): - found = False - break - if found: - result[ok] = d - r1 = process_entries(self.run_requires + self.meta_requires) - r2 = process_entries(self.build_requires + self.dev_requires) - if self.extras: - result['Provides-Extra'] = sorted(self.extras) - result['Requires-Dist'] = sorted(r1) - result['Setup-Requires-Dist'] = sorted(r2) - # TODO: any other fields wanted - return result - - def write(self, path=None, fileobj=None, legacy=False, skip_unknown=True): - if [path, fileobj].count(None) != 1: - raise ValueError('Exactly one of path and fileobj is needed') - self.validate() - if legacy: - if self._legacy: - legacy_md = self._legacy - else: - legacy_md = self._to_legacy() - if path: - legacy_md.write(path, skip_unknown=skip_unknown) - else: - legacy_md.write_file(fileobj, skip_unknown=skip_unknown) - else: - if self._legacy: - d = self._from_legacy() - else: - d = self._data - if fileobj: - json.dump(d, fileobj, ensure_ascii=True, indent=2, sort_keys=True) - else: - with codecs.open(path, 'w', 'utf-8') as f: - json.dump(d, f, ensure_ascii=True, indent=2, sort_keys=True) - - def add_requirements(self, requirements): - if self._legacy: - self._legacy.add_requirements(requirements) - else: - run_requires = self._data.setdefault('run_requires', []) - always = None - for entry in run_requires: - if 'environment' not in entry and 'extra' not in entry: - always = entry - break - if always is None: - always = {'requires': requirements} - run_requires.insert(0, always) - else: - rset = set(always['requires']) | set(requirements) - always['requires'] = sorted(rset) - - def __repr__(self): - name = self.name or '(no name)' - version = self.version or 'no version' - return '<%s %s %s (%s)>' % (self.__class__.__name__, self.metadata_version, name, version) diff --git a/src/pip/_vendor/distlib/version.py b/src/pip/_vendor/distlib/version.py deleted file mode 100644 index d70a96ef51e..00000000000 --- a/src/pip/_vendor/distlib/version.py +++ /dev/null @@ -1,750 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012-2023 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -""" -Implementation of a flexible versioning scheme providing support for PEP-440, -setuptools-compatible and semantic versioning. -""" - -import logging -import re - -from .compat import string_types -from .util import parse_requirement - -__all__ = ['NormalizedVersion', 'NormalizedMatcher', - 'LegacyVersion', 'LegacyMatcher', - 'SemanticVersion', 'SemanticMatcher', - 'UnsupportedVersionError', 'get_scheme'] - -logger = logging.getLogger(__name__) - - -class UnsupportedVersionError(ValueError): - """This is an unsupported version.""" - pass - - -class Version(object): - def __init__(self, s): - self._string = s = s.strip() - self._parts = parts = self.parse(s) - assert isinstance(parts, tuple) - assert len(parts) > 0 - - def parse(self, s): - raise NotImplementedError('please implement in a subclass') - - def _check_compatible(self, other): - if type(self) != type(other): - raise TypeError('cannot compare %r and %r' % (self, other)) - - def __eq__(self, other): - self._check_compatible(other) - return self._parts == other._parts - - def __ne__(self, other): - return not self.__eq__(other) - - def __lt__(self, other): - self._check_compatible(other) - return self._parts < other._parts - - def __gt__(self, other): - return not (self.__lt__(other) or self.__eq__(other)) - - def __le__(self, other): - return self.__lt__(other) or self.__eq__(other) - - def __ge__(self, other): - return self.__gt__(other) or self.__eq__(other) - - # See http://docs.python.org/reference/datamodel#object.__hash__ - def __hash__(self): - return hash(self._parts) - - def __repr__(self): - return "%s('%s')" % (self.__class__.__name__, self._string) - - def __str__(self): - return self._string - - @property - def is_prerelease(self): - raise NotImplementedError('Please implement in subclasses.') - - -class Matcher(object): - version_class = None - - # value is either a callable or the name of a method - _operators = { - '<': lambda v, c, p: v < c, - '>': lambda v, c, p: v > c, - '<=': lambda v, c, p: v == c or v < c, - '>=': lambda v, c, p: v == c or v > c, - '==': lambda v, c, p: v == c, - '===': lambda v, c, p: v == c, - # by default, compatible => >=. - '~=': lambda v, c, p: v == c or v > c, - '!=': lambda v, c, p: v != c, - } - - # this is a method only to support alternative implementations - # via overriding - def parse_requirement(self, s): - return parse_requirement(s) - - def __init__(self, s): - if self.version_class is None: - raise ValueError('Please specify a version class') - self._string = s = s.strip() - r = self.parse_requirement(s) - if not r: - raise ValueError('Not valid: %r' % s) - self.name = r.name - self.key = self.name.lower() # for case-insensitive comparisons - clist = [] - if r.constraints: - # import pdb; pdb.set_trace() - for op, s in r.constraints: - if s.endswith('.*'): - if op not in ('==', '!='): - raise ValueError('\'.*\' not allowed for ' - '%r constraints' % op) - # Could be a partial version (e.g. for '2.*') which - # won't parse as a version, so keep it as a string - vn, prefix = s[:-2], True - # Just to check that vn is a valid version - self.version_class(vn) - else: - # Should parse as a version, so we can create an - # instance for the comparison - vn, prefix = self.version_class(s), False - clist.append((op, vn, prefix)) - self._parts = tuple(clist) - - def match(self, version): - """ - Check if the provided version matches the constraints. - - :param version: The version to match against this instance. - :type version: String or :class:`Version` instance. - """ - if isinstance(version, string_types): - version = self.version_class(version) - for operator, constraint, prefix in self._parts: - f = self._operators.get(operator) - if isinstance(f, string_types): - f = getattr(self, f) - if not f: - msg = ('%r not implemented ' - 'for %s' % (operator, self.__class__.__name__)) - raise NotImplementedError(msg) - if not f(version, constraint, prefix): - return False - return True - - @property - def exact_version(self): - result = None - if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='): - result = self._parts[0][1] - return result - - def _check_compatible(self, other): - if type(self) != type(other) or self.name != other.name: - raise TypeError('cannot compare %s and %s' % (self, other)) - - def __eq__(self, other): - self._check_compatible(other) - return self.key == other.key and self._parts == other._parts - - def __ne__(self, other): - return not self.__eq__(other) - - # See http://docs.python.org/reference/datamodel#object.__hash__ - def __hash__(self): - return hash(self.key) + hash(self._parts) - - def __repr__(self): - return "%s(%r)" % (self.__class__.__name__, self._string) - - def __str__(self): - return self._string - - -PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|alpha|b|beta|c|rc|pre|preview)(\d+)?)?' - r'(\.(post|r|rev)(\d+)?)?([._-]?(dev)(\d+)?)?' - r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$', re.I) - - -def _pep_440_key(s): - s = s.strip() - m = PEP440_VERSION_RE.match(s) - if not m: - raise UnsupportedVersionError('Not a valid version: %s' % s) - groups = m.groups() - nums = tuple(int(v) for v in groups[1].split('.')) - while len(nums) > 1 and nums[-1] == 0: - nums = nums[:-1] - - if not groups[0]: - epoch = 0 - else: - epoch = int(groups[0][:-1]) - pre = groups[4:6] - post = groups[7:9] - dev = groups[10:12] - local = groups[13] - if pre == (None, None): - pre = () - else: - if pre[1] is None: - pre = pre[0], 0 - else: - pre = pre[0], int(pre[1]) - if post == (None, None): - post = () - else: - if post[1] is None: - post = post[0], 0 - else: - post = post[0], int(post[1]) - if dev == (None, None): - dev = () - else: - if dev[1] is None: - dev = dev[0], 0 - else: - dev = dev[0], int(dev[1]) - if local is None: - local = () - else: - parts = [] - for part in local.split('.'): - # to ensure that numeric compares as > lexicographic, avoid - # comparing them directly, but encode a tuple which ensures - # correct sorting - if part.isdigit(): - part = (1, int(part)) - else: - part = (0, part) - parts.append(part) - local = tuple(parts) - if not pre: - # either before pre-release, or final release and after - if not post and dev: - # before pre-release - pre = ('a', -1) # to sort before a0 - else: - pre = ('z',) # to sort after all pre-releases - # now look at the state of post and dev. - if not post: - post = ('_',) # sort before 'a' - if not dev: - dev = ('final',) - - return epoch, nums, pre, post, dev, local - - -_normalized_key = _pep_440_key - - -class NormalizedVersion(Version): - """A rational version. - - Good: - 1.2 # equivalent to "1.2.0" - 1.2.0 - 1.2a1 - 1.2.3a2 - 1.2.3b1 - 1.2.3c1 - 1.2.3.4 - TODO: fill this out - - Bad: - 1 # minimum two numbers - 1.2a # release level must have a release serial - 1.2.3b - """ - def parse(self, s): - result = _normalized_key(s) - # _normalized_key loses trailing zeroes in the release - # clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0 - # However, PEP 440 prefix matching needs it: for example, - # (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0). - m = PEP440_VERSION_RE.match(s) # must succeed - groups = m.groups() - self._release_clause = tuple(int(v) for v in groups[1].split('.')) - return result - - PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev']) - - @property - def is_prerelease(self): - return any(t[0] in self.PREREL_TAGS for t in self._parts if t) - - -def _match_prefix(x, y): - x = str(x) - y = str(y) - if x == y: - return True - if not x.startswith(y): - return False - n = len(y) - return x[n] == '.' - - -class NormalizedMatcher(Matcher): - version_class = NormalizedVersion - - # value is either a callable or the name of a method - _operators = { - '~=': '_match_compatible', - '<': '_match_lt', - '>': '_match_gt', - '<=': '_match_le', - '>=': '_match_ge', - '==': '_match_eq', - '===': '_match_arbitrary', - '!=': '_match_ne', - } - - def _adjust_local(self, version, constraint, prefix): - if prefix: - strip_local = '+' not in constraint and version._parts[-1] - else: - # both constraint and version are - # NormalizedVersion instances. - # If constraint does not have a local component, - # ensure the version doesn't, either. - strip_local = not constraint._parts[-1] and version._parts[-1] - if strip_local: - s = version._string.split('+', 1)[0] - version = self.version_class(s) - return version, constraint - - def _match_lt(self, version, constraint, prefix): - version, constraint = self._adjust_local(version, constraint, prefix) - if version >= constraint: - return False - release_clause = constraint._release_clause - pfx = '.'.join([str(i) for i in release_clause]) - return not _match_prefix(version, pfx) - - def _match_gt(self, version, constraint, prefix): - version, constraint = self._adjust_local(version, constraint, prefix) - if version <= constraint: - return False - release_clause = constraint._release_clause - pfx = '.'.join([str(i) for i in release_clause]) - return not _match_prefix(version, pfx) - - def _match_le(self, version, constraint, prefix): - version, constraint = self._adjust_local(version, constraint, prefix) - return version <= constraint - - def _match_ge(self, version, constraint, prefix): - version, constraint = self._adjust_local(version, constraint, prefix) - return version >= constraint - - def _match_eq(self, version, constraint, prefix): - version, constraint = self._adjust_local(version, constraint, prefix) - if not prefix: - result = (version == constraint) - else: - result = _match_prefix(version, constraint) - return result - - def _match_arbitrary(self, version, constraint, prefix): - return str(version) == str(constraint) - - def _match_ne(self, version, constraint, prefix): - version, constraint = self._adjust_local(version, constraint, prefix) - if not prefix: - result = (version != constraint) - else: - result = not _match_prefix(version, constraint) - return result - - def _match_compatible(self, version, constraint, prefix): - version, constraint = self._adjust_local(version, constraint, prefix) - if version == constraint: - return True - if version < constraint: - return False -# if not prefix: -# return True - release_clause = constraint._release_clause - if len(release_clause) > 1: - release_clause = release_clause[:-1] - pfx = '.'.join([str(i) for i in release_clause]) - return _match_prefix(version, pfx) - - -_REPLACEMENTS = ( - (re.compile('[.+-]$'), ''), # remove trailing puncts - (re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start - (re.compile('^[.-]'), ''), # remove leading puncts - (re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses - (re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion) - (re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion) - (re.compile('[.]{2,}'), '.'), # multiple runs of '.' - (re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha - (re.compile(r'\b(pre-alpha|prealpha)\b'), - 'pre.alpha'), # standardise - (re.compile(r'\(beta\)$'), 'beta'), # remove parentheses -) - -_SUFFIX_REPLACEMENTS = ( - (re.compile('^[:~._+-]+'), ''), # remove leading puncts - (re.compile('[,*")([\\]]'), ''), # remove unwanted chars - (re.compile('[~:+_ -]'), '.'), # replace illegal chars - (re.compile('[.]{2,}'), '.'), # multiple runs of '.' - (re.compile(r'\.$'), ''), # trailing '.' -) - -_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)') - - -def _suggest_semantic_version(s): - """ - Try to suggest a semantic form for a version for which - _suggest_normalized_version couldn't come up with anything. - """ - result = s.strip().lower() - for pat, repl in _REPLACEMENTS: - result = pat.sub(repl, result) - if not result: - result = '0.0.0' - - # Now look for numeric prefix, and separate it out from - # the rest. - # import pdb; pdb.set_trace() - m = _NUMERIC_PREFIX.match(result) - if not m: - prefix = '0.0.0' - suffix = result - else: - prefix = m.groups()[0].split('.') - prefix = [int(i) for i in prefix] - while len(prefix) < 3: - prefix.append(0) - if len(prefix) == 3: - suffix = result[m.end():] - else: - suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():] - prefix = prefix[:3] - prefix = '.'.join([str(i) for i in prefix]) - suffix = suffix.strip() - if suffix: - # import pdb; pdb.set_trace() - # massage the suffix. - for pat, repl in _SUFFIX_REPLACEMENTS: - suffix = pat.sub(repl, suffix) - - if not suffix: - result = prefix - else: - sep = '-' if 'dev' in suffix else '+' - result = prefix + sep + suffix - if not is_semver(result): - result = None - return result - - -def _suggest_normalized_version(s): - """Suggest a normalized version close to the given version string. - - If you have a version string that isn't rational (i.e. NormalizedVersion - doesn't like it) then you might be able to get an equivalent (or close) - rational version from this function. - - This does a number of simple normalizations to the given string, based - on observation of versions currently in use on PyPI. Given a dump of - those version during PyCon 2009, 4287 of them: - - 2312 (53.93%) match NormalizedVersion without change - with the automatic suggestion - - 3474 (81.04%) match when using this suggestion method - - @param s {str} An irrational version string. - @returns A rational version string, or None, if couldn't determine one. - """ - try: - _normalized_key(s) - return s # already rational - except UnsupportedVersionError: - pass - - rs = s.lower() - - # part of this could use maketrans - for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'), - ('beta', 'b'), ('rc', 'c'), ('-final', ''), - ('-pre', 'c'), - ('-release', ''), ('.release', ''), ('-stable', ''), - ('+', '.'), ('_', '.'), (' ', ''), ('.final', ''), - ('final', '')): - rs = rs.replace(orig, repl) - - # if something ends with dev or pre, we add a 0 - rs = re.sub(r"pre$", r"pre0", rs) - rs = re.sub(r"dev$", r"dev0", rs) - - # if we have something like "b-2" or "a.2" at the end of the - # version, that is probably beta, alpha, etc - # let's remove the dash or dot - rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs) - - # 1.0-dev-r371 -> 1.0.dev371 - # 0.1-dev-r79 -> 0.1.dev79 - rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs) - - # Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1 - rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs) - - # Clean: v0.3, v1.0 - if rs.startswith('v'): - rs = rs[1:] - - # Clean leading '0's on numbers. - # TODO: unintended side-effect on, e.g., "2003.05.09" - # PyPI stats: 77 (~2%) better - rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs) - - # Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers - # zero. - # PyPI stats: 245 (7.56%) better - rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs) - - # the 'dev-rNNN' tag is a dev tag - rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs) - - # clean the - when used as a pre delimiter - rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs) - - # a terminal "dev" or "devel" can be changed into ".dev0" - rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs) - - # a terminal "dev" can be changed into ".dev0" - rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs) - - # a terminal "final" or "stable" can be removed - rs = re.sub(r"(final|stable)$", "", rs) - - # The 'r' and the '-' tags are post release tags - # 0.4a1.r10 -> 0.4a1.post10 - # 0.9.33-17222 -> 0.9.33.post17222 - # 0.9.33-r17222 -> 0.9.33.post17222 - rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs) - - # Clean 'r' instead of 'dev' usage: - # 0.9.33+r17222 -> 0.9.33.dev17222 - # 1.0dev123 -> 1.0.dev123 - # 1.0.git123 -> 1.0.dev123 - # 1.0.bzr123 -> 1.0.dev123 - # 0.1a0dev.123 -> 0.1a0.dev123 - # PyPI stats: ~150 (~4%) better - rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs) - - # Clean '.pre' (normalized from '-pre' above) instead of 'c' usage: - # 0.2.pre1 -> 0.2c1 - # 0.2-c1 -> 0.2c1 - # 1.0preview123 -> 1.0c123 - # PyPI stats: ~21 (0.62%) better - rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs) - - # Tcl/Tk uses "px" for their post release markers - rs = re.sub(r"p(\d+)$", r".post\1", rs) - - try: - _normalized_key(rs) - except UnsupportedVersionError: - rs = None - return rs - -# -# Legacy version processing (distribute-compatible) -# - - -_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I) -_VERSION_REPLACE = { - 'pre': 'c', - 'preview': 'c', - '-': 'final-', - 'rc': 'c', - 'dev': '@', - '': None, - '.': None, -} - - -def _legacy_key(s): - def get_parts(s): - result = [] - for p in _VERSION_PART.split(s.lower()): - p = _VERSION_REPLACE.get(p, p) - if p: - if '0' <= p[:1] <= '9': - p = p.zfill(8) - else: - p = '*' + p - result.append(p) - result.append('*final') - return result - - result = [] - for p in get_parts(s): - if p.startswith('*'): - if p < '*final': - while result and result[-1] == '*final-': - result.pop() - while result and result[-1] == '00000000': - result.pop() - result.append(p) - return tuple(result) - - -class LegacyVersion(Version): - def parse(self, s): - return _legacy_key(s) - - @property - def is_prerelease(self): - result = False - for x in self._parts: - if (isinstance(x, string_types) and x.startswith('*') and x < '*final'): - result = True - break - return result - - -class LegacyMatcher(Matcher): - version_class = LegacyVersion - - _operators = dict(Matcher._operators) - _operators['~='] = '_match_compatible' - - numeric_re = re.compile(r'^(\d+(\.\d+)*)') - - def _match_compatible(self, version, constraint, prefix): - if version < constraint: - return False - m = self.numeric_re.match(str(constraint)) - if not m: - logger.warning('Cannot compute compatible match for version %s ' - ' and constraint %s', version, constraint) - return True - s = m.groups()[0] - if '.' in s: - s = s.rsplit('.', 1)[0] - return _match_prefix(version, s) - -# -# Semantic versioning -# - - -_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)' - r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?' - r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I) - - -def is_semver(s): - return _SEMVER_RE.match(s) - - -def _semantic_key(s): - def make_tuple(s, absent): - if s is None: - result = (absent,) - else: - parts = s[1:].split('.') - # We can't compare ints and strings on Python 3, so fudge it - # by zero-filling numeric values so simulate a numeric comparison - result = tuple([p.zfill(8) if p.isdigit() else p for p in parts]) - return result - - m = is_semver(s) - if not m: - raise UnsupportedVersionError(s) - groups = m.groups() - major, minor, patch = [int(i) for i in groups[:3]] - # choose the '|' and '*' so that versions sort correctly - pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*') - return (major, minor, patch), pre, build - - -class SemanticVersion(Version): - def parse(self, s): - return _semantic_key(s) - - @property - def is_prerelease(self): - return self._parts[1][0] != '|' - - -class SemanticMatcher(Matcher): - version_class = SemanticVersion - - -class VersionScheme(object): - def __init__(self, key, matcher, suggester=None): - self.key = key - self.matcher = matcher - self.suggester = suggester - - def is_valid_version(self, s): - try: - self.matcher.version_class(s) - result = True - except UnsupportedVersionError: - result = False - return result - - def is_valid_matcher(self, s): - try: - self.matcher(s) - result = True - except UnsupportedVersionError: - result = False - return result - - def is_valid_constraint_list(self, s): - """ - Used for processing some metadata fields - """ - # See issue #140. Be tolerant of a single trailing comma. - if s.endswith(','): - s = s[:-1] - return self.is_valid_matcher('dummy_name (%s)' % s) - - def suggest(self, s): - if self.suggester is None: - result = None - else: - result = self.suggester(s) - return result - - -_SCHEMES = { - 'normalized': VersionScheme(_normalized_key, NormalizedMatcher, - _suggest_normalized_version), - 'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s), - 'semantic': VersionScheme(_semantic_key, SemanticMatcher, - _suggest_semantic_version), -} - -_SCHEMES['default'] = _SCHEMES['normalized'] - - -def get_scheme(name): - if name not in _SCHEMES: - raise ValueError('unknown scheme name: %r' % name) - return _SCHEMES[name] diff --git a/src/pip/_vendor/distlib/wheel.py b/src/pip/_vendor/distlib/wheel.py deleted file mode 100644 index 62ab10fb3ad..00000000000 --- a/src/pip/_vendor/distlib/wheel.py +++ /dev/null @@ -1,1100 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2013-2023 Vinay Sajip. -# Licensed to the Python Software Foundation under a contributor agreement. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -from __future__ import unicode_literals - -import base64 -import codecs -import datetime -from email import message_from_file -import hashlib -import json -import logging -import os -import posixpath -import re -import shutil -import sys -import tempfile -import zipfile - -from . import __version__, DistlibException -from .compat import sysconfig, ZipFile, fsdecode, text_type, filter -from .database import InstalledDistribution -from .metadata import Metadata, WHEEL_METADATA_FILENAME, LEGACY_METADATA_FILENAME -from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache, cached_property, get_cache_base, - read_exports, tempdir, get_platform) -from .version import NormalizedVersion, UnsupportedVersionError - -logger = logging.getLogger(__name__) - -cache = None # created when needed - -if hasattr(sys, 'pypy_version_info'): # pragma: no cover - IMP_PREFIX = 'pp' -elif sys.platform.startswith('java'): # pragma: no cover - IMP_PREFIX = 'jy' -elif sys.platform == 'cli': # pragma: no cover - IMP_PREFIX = 'ip' -else: - IMP_PREFIX = 'cp' - -VER_SUFFIX = sysconfig.get_config_var('py_version_nodot') -if not VER_SUFFIX: # pragma: no cover - VER_SUFFIX = '%s%s' % sys.version_info[:2] -PYVER = 'py' + VER_SUFFIX -IMPVER = IMP_PREFIX + VER_SUFFIX - -ARCH = get_platform().replace('-', '_').replace('.', '_') - -ABI = sysconfig.get_config_var('SOABI') -if ABI and ABI.startswith('cpython-'): - ABI = ABI.replace('cpython-', 'cp').split('-')[0] -else: - - def _derive_abi(): - parts = ['cp', VER_SUFFIX] - if sysconfig.get_config_var('Py_DEBUG'): - parts.append('d') - if IMP_PREFIX == 'cp': - vi = sys.version_info[:2] - if vi < (3, 8): - wpm = sysconfig.get_config_var('WITH_PYMALLOC') - if wpm is None: - wpm = True - if wpm: - parts.append('m') - if vi < (3, 3): - us = sysconfig.get_config_var('Py_UNICODE_SIZE') - if us == 4 or (us is None and sys.maxunicode == 0x10FFFF): - parts.append('u') - return ''.join(parts) - - ABI = _derive_abi() - del _derive_abi - -FILENAME_RE = re.compile( - r''' -(?P[^-]+) --(?P\d+[^-]*) -(-(?P\d+[^-]*))? --(?P\w+\d+(\.\w+\d+)*) --(?P\w+) --(?P\w+(\.\w+)*) -\.whl$ -''', re.IGNORECASE | re.VERBOSE) - -NAME_VERSION_RE = re.compile(r''' -(?P[^-]+) --(?P\d+[^-]*) -(-(?P\d+[^-]*))?$ -''', re.IGNORECASE | re.VERBOSE) - -SHEBANG_RE = re.compile(br'\s*#![^\r\n]*') -SHEBANG_DETAIL_RE = re.compile(br'^(\s*#!("[^"]+"|\S+))\s+(.*)$') -SHEBANG_PYTHON = b'#!python' -SHEBANG_PYTHONW = b'#!pythonw' - -if os.sep == '/': - to_posix = lambda o: o -else: - to_posix = lambda o: o.replace(os.sep, '/') - -if sys.version_info[0] < 3: - import imp -else: - imp = None - import importlib.machinery - import importlib.util - - -def _get_suffixes(): - if imp: - return [s[0] for s in imp.get_suffixes()] - else: - return importlib.machinery.EXTENSION_SUFFIXES - - -def _load_dynamic(name, path): - # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly - if imp: - return imp.load_dynamic(name, path) - else: - spec = importlib.util.spec_from_file_location(name, path) - module = importlib.util.module_from_spec(spec) - sys.modules[name] = module - spec.loader.exec_module(module) - return module - - -class Mounter(object): - - def __init__(self): - self.impure_wheels = {} - self.libs = {} - - def add(self, pathname, extensions): - self.impure_wheels[pathname] = extensions - self.libs.update(extensions) - - def remove(self, pathname): - extensions = self.impure_wheels.pop(pathname) - for k, v in extensions: - if k in self.libs: - del self.libs[k] - - def find_module(self, fullname, path=None): - if fullname in self.libs: - result = self - else: - result = None - return result - - def load_module(self, fullname): - if fullname in sys.modules: - result = sys.modules[fullname] - else: - if fullname not in self.libs: - raise ImportError('unable to find extension for %s' % fullname) - result = _load_dynamic(fullname, self.libs[fullname]) - result.__loader__ = self - parts = fullname.rsplit('.', 1) - if len(parts) > 1: - result.__package__ = parts[0] - return result - - -_hook = Mounter() - - -class Wheel(object): - """ - Class to build and install from Wheel files (PEP 427). - """ - - wheel_version = (1, 1) - hash_kind = 'sha256' - - def __init__(self, filename=None, sign=False, verify=False): - """ - Initialise an instance using a (valid) filename. - """ - self.sign = sign - self.should_verify = verify - self.buildver = '' - self.pyver = [PYVER] - self.abi = ['none'] - self.arch = ['any'] - self.dirname = os.getcwd() - if filename is None: - self.name = 'dummy' - self.version = '0.1' - self._filename = self.filename - else: - m = NAME_VERSION_RE.match(filename) - if m: - info = m.groupdict('') - self.name = info['nm'] - # Reinstate the local version separator - self.version = info['vn'].replace('_', '-') - self.buildver = info['bn'] - self._filename = self.filename - else: - dirname, filename = os.path.split(filename) - m = FILENAME_RE.match(filename) - if not m: - raise DistlibException('Invalid name or ' - 'filename: %r' % filename) - if dirname: - self.dirname = os.path.abspath(dirname) - self._filename = filename - info = m.groupdict('') - self.name = info['nm'] - self.version = info['vn'] - self.buildver = info['bn'] - self.pyver = info['py'].split('.') - self.abi = info['bi'].split('.') - self.arch = info['ar'].split('.') - - @property - def filename(self): - """ - Build and return a filename from the various components. - """ - if self.buildver: - buildver = '-' + self.buildver - else: - buildver = '' - pyver = '.'.join(self.pyver) - abi = '.'.join(self.abi) - arch = '.'.join(self.arch) - # replace - with _ as a local version separator - version = self.version.replace('-', '_') - return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver, pyver, abi, arch) - - @property - def exists(self): - path = os.path.join(self.dirname, self.filename) - return os.path.isfile(path) - - @property - def tags(self): - for pyver in self.pyver: - for abi in self.abi: - for arch in self.arch: - yield pyver, abi, arch - - @cached_property - def metadata(self): - pathname = os.path.join(self.dirname, self.filename) - name_ver = '%s-%s' % (self.name, self.version) - info_dir = '%s.dist-info' % name_ver - wrapper = codecs.getreader('utf-8') - with ZipFile(pathname, 'r') as zf: - self.get_wheel_metadata(zf) - # wv = wheel_metadata['Wheel-Version'].split('.', 1) - # file_version = tuple([int(i) for i in wv]) - # if file_version < (1, 1): - # fns = [WHEEL_METADATA_FILENAME, METADATA_FILENAME, - # LEGACY_METADATA_FILENAME] - # else: - # fns = [WHEEL_METADATA_FILENAME, METADATA_FILENAME] - fns = [WHEEL_METADATA_FILENAME, LEGACY_METADATA_FILENAME] - result = None - for fn in fns: - try: - metadata_filename = posixpath.join(info_dir, fn) - with zf.open(metadata_filename) as bf: - wf = wrapper(bf) - result = Metadata(fileobj=wf) - if result: - break - except KeyError: - pass - if not result: - raise ValueError('Invalid wheel, because metadata is ' - 'missing: looked in %s' % ', '.join(fns)) - return result - - def get_wheel_metadata(self, zf): - name_ver = '%s-%s' % (self.name, self.version) - info_dir = '%s.dist-info' % name_ver - metadata_filename = posixpath.join(info_dir, 'WHEEL') - with zf.open(metadata_filename) as bf: - wf = codecs.getreader('utf-8')(bf) - message = message_from_file(wf) - return dict(message) - - @cached_property - def info(self): - pathname = os.path.join(self.dirname, self.filename) - with ZipFile(pathname, 'r') as zf: - result = self.get_wheel_metadata(zf) - return result - - def process_shebang(self, data): - m = SHEBANG_RE.match(data) - if m: - end = m.end() - shebang, data_after_shebang = data[:end], data[end:] - # Preserve any arguments after the interpreter - if b'pythonw' in shebang.lower(): - shebang_python = SHEBANG_PYTHONW - else: - shebang_python = SHEBANG_PYTHON - m = SHEBANG_DETAIL_RE.match(shebang) - if m: - args = b' ' + m.groups()[-1] - else: - args = b'' - shebang = shebang_python + args - data = shebang + data_after_shebang - else: - cr = data.find(b'\r') - lf = data.find(b'\n') - if cr < 0 or cr > lf: - term = b'\n' - else: - if data[cr:cr + 2] == b'\r\n': - term = b'\r\n' - else: - term = b'\r' - data = SHEBANG_PYTHON + term + data - return data - - def get_hash(self, data, hash_kind=None): - if hash_kind is None: - hash_kind = self.hash_kind - try: - hasher = getattr(hashlib, hash_kind) - except AttributeError: - raise DistlibException('Unsupported hash algorithm: %r' % hash_kind) - result = hasher(data).digest() - result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii') - return hash_kind, result - - def write_record(self, records, record_path, archive_record_path): - records = list(records) # make a copy, as mutated - records.append((archive_record_path, '', '')) - with CSVWriter(record_path) as writer: - for row in records: - writer.writerow(row) - - def write_records(self, info, libdir, archive_paths): - records = [] - distinfo, info_dir = info - # hasher = getattr(hashlib, self.hash_kind) - for ap, p in archive_paths: - with open(p, 'rb') as f: - data = f.read() - digest = '%s=%s' % self.get_hash(data) - size = os.path.getsize(p) - records.append((ap, digest, size)) - - p = os.path.join(distinfo, 'RECORD') - ap = to_posix(os.path.join(info_dir, 'RECORD')) - self.write_record(records, p, ap) - archive_paths.append((ap, p)) - - def build_zip(self, pathname, archive_paths): - with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf: - for ap, p in archive_paths: - logger.debug('Wrote %s to %s in wheel', p, ap) - zf.write(p, ap) - - def build(self, paths, tags=None, wheel_version=None): - """ - Build a wheel from files in specified paths, and use any specified tags - when determining the name of the wheel. - """ - if tags is None: - tags = {} - - libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0] - if libkey == 'platlib': - is_pure = 'false' - default_pyver = [IMPVER] - default_abi = [ABI] - default_arch = [ARCH] - else: - is_pure = 'true' - default_pyver = [PYVER] - default_abi = ['none'] - default_arch = ['any'] - - self.pyver = tags.get('pyver', default_pyver) - self.abi = tags.get('abi', default_abi) - self.arch = tags.get('arch', default_arch) - - libdir = paths[libkey] - - name_ver = '%s-%s' % (self.name, self.version) - data_dir = '%s.data' % name_ver - info_dir = '%s.dist-info' % name_ver - - archive_paths = [] - - # First, stuff which is not in site-packages - for key in ('data', 'headers', 'scripts'): - if key not in paths: - continue - path = paths[key] - if os.path.isdir(path): - for root, dirs, files in os.walk(path): - for fn in files: - p = fsdecode(os.path.join(root, fn)) - rp = os.path.relpath(p, path) - ap = to_posix(os.path.join(data_dir, key, rp)) - archive_paths.append((ap, p)) - if key == 'scripts' and not p.endswith('.exe'): - with open(p, 'rb') as f: - data = f.read() - data = self.process_shebang(data) - with open(p, 'wb') as f: - f.write(data) - - # Now, stuff which is in site-packages, other than the - # distinfo stuff. - path = libdir - distinfo = None - for root, dirs, files in os.walk(path): - if root == path: - # At the top level only, save distinfo for later - # and skip it for now - for i, dn in enumerate(dirs): - dn = fsdecode(dn) - if dn.endswith('.dist-info'): - distinfo = os.path.join(root, dn) - del dirs[i] - break - assert distinfo, '.dist-info directory expected, not found' - - for fn in files: - # comment out next suite to leave .pyc files in - if fsdecode(fn).endswith(('.pyc', '.pyo')): - continue - p = os.path.join(root, fn) - rp = to_posix(os.path.relpath(p, path)) - archive_paths.append((rp, p)) - - # Now distinfo. Assumed to be flat, i.e. os.listdir is enough. - files = os.listdir(distinfo) - for fn in files: - if fn not in ('RECORD', 'INSTALLER', 'SHARED', 'WHEEL'): - p = fsdecode(os.path.join(distinfo, fn)) - ap = to_posix(os.path.join(info_dir, fn)) - archive_paths.append((ap, p)) - - wheel_metadata = [ - 'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version), - 'Generator: distlib %s' % __version__, - 'Root-Is-Purelib: %s' % is_pure, - ] - for pyver, abi, arch in self.tags: - wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch)) - p = os.path.join(distinfo, 'WHEEL') - with open(p, 'w') as f: - f.write('\n'.join(wheel_metadata)) - ap = to_posix(os.path.join(info_dir, 'WHEEL')) - archive_paths.append((ap, p)) - - # sort the entries by archive path. Not needed by any spec, but it - # keeps the archive listing and RECORD tidier than they would otherwise - # be. Use the number of path segments to keep directory entries together, - # and keep the dist-info stuff at the end. - def sorter(t): - ap = t[0] - n = ap.count('/') - if '.dist-info' in ap: - n += 10000 - return (n, ap) - - archive_paths = sorted(archive_paths, key=sorter) - - # Now, at last, RECORD. - # Paths in here are archive paths - nothing else makes sense. - self.write_records((distinfo, info_dir), libdir, archive_paths) - # Now, ready to build the zip file - pathname = os.path.join(self.dirname, self.filename) - self.build_zip(pathname, archive_paths) - return pathname - - def skip_entry(self, arcname): - """ - Determine whether an archive entry should be skipped when verifying - or installing. - """ - # The signature file won't be in RECORD, - # and we don't currently don't do anything with it - # We also skip directories, as they won't be in RECORD - # either. See: - # - # https://github.com/pypa/wheel/issues/294 - # https://github.com/pypa/wheel/issues/287 - # https://github.com/pypa/wheel/pull/289 - # - return arcname.endswith(('/', '/RECORD.jws')) - - def install(self, paths, maker, **kwargs): - """ - Install a wheel to the specified paths. If kwarg ``warner`` is - specified, it should be a callable, which will be called with two - tuples indicating the wheel version of this software and the wheel - version in the file, if there is a discrepancy in the versions. - This can be used to issue any warnings to raise any exceptions. - If kwarg ``lib_only`` is True, only the purelib/platlib files are - installed, and the headers, scripts, data and dist-info metadata are - not written. If kwarg ``bytecode_hashed_invalidation`` is True, written - bytecode will try to use file-hash based invalidation (PEP-552) on - supported interpreter versions (CPython 3.7+). - - The return value is a :class:`InstalledDistribution` instance unless - ``options.lib_only`` is True, in which case the return value is ``None``. - """ - - dry_run = maker.dry_run - warner = kwargs.get('warner') - lib_only = kwargs.get('lib_only', False) - bc_hashed_invalidation = kwargs.get('bytecode_hashed_invalidation', False) - - pathname = os.path.join(self.dirname, self.filename) - name_ver = '%s-%s' % (self.name, self.version) - data_dir = '%s.data' % name_ver - info_dir = '%s.dist-info' % name_ver - - metadata_name = posixpath.join(info_dir, LEGACY_METADATA_FILENAME) - wheel_metadata_name = posixpath.join(info_dir, 'WHEEL') - record_name = posixpath.join(info_dir, 'RECORD') - - wrapper = codecs.getreader('utf-8') - - with ZipFile(pathname, 'r') as zf: - with zf.open(wheel_metadata_name) as bwf: - wf = wrapper(bwf) - message = message_from_file(wf) - wv = message['Wheel-Version'].split('.', 1) - file_version = tuple([int(i) for i in wv]) - if (file_version != self.wheel_version) and warner: - warner(self.wheel_version, file_version) - - if message['Root-Is-Purelib'] == 'true': - libdir = paths['purelib'] - else: - libdir = paths['platlib'] - - records = {} - with zf.open(record_name) as bf: - with CSVReader(stream=bf) as reader: - for row in reader: - p = row[0] - records[p] = row - - data_pfx = posixpath.join(data_dir, '') - info_pfx = posixpath.join(info_dir, '') - script_pfx = posixpath.join(data_dir, 'scripts', '') - - # make a new instance rather than a copy of maker's, - # as we mutate it - fileop = FileOperator(dry_run=dry_run) - fileop.record = True # so we can rollback if needed - - bc = not sys.dont_write_bytecode # Double negatives. Lovely! - - outfiles = [] # for RECORD writing - - # for script copying/shebang processing - workdir = tempfile.mkdtemp() - # set target dir later - # we default add_launchers to False, as the - # Python Launcher should be used instead - maker.source_dir = workdir - maker.target_dir = None - try: - for zinfo in zf.infolist(): - arcname = zinfo.filename - if isinstance(arcname, text_type): - u_arcname = arcname - else: - u_arcname = arcname.decode('utf-8') - if self.skip_entry(u_arcname): - continue - row = records[u_arcname] - if row[2] and str(zinfo.file_size) != row[2]: - raise DistlibException('size mismatch for ' - '%s' % u_arcname) - if row[1]: - kind, value = row[1].split('=', 1) - with zf.open(arcname) as bf: - data = bf.read() - _, digest = self.get_hash(data, kind) - if digest != value: - raise DistlibException('digest mismatch for ' - '%s' % arcname) - - if lib_only and u_arcname.startswith((info_pfx, data_pfx)): - logger.debug('lib_only: skipping %s', u_arcname) - continue - is_script = (u_arcname.startswith(script_pfx) and not u_arcname.endswith('.exe')) - - if u_arcname.startswith(data_pfx): - _, where, rp = u_arcname.split('/', 2) - outfile = os.path.join(paths[where], convert_path(rp)) - else: - # meant for site-packages. - if u_arcname in (wheel_metadata_name, record_name): - continue - outfile = os.path.join(libdir, convert_path(u_arcname)) - if not is_script: - with zf.open(arcname) as bf: - fileop.copy_stream(bf, outfile) - # Issue #147: permission bits aren't preserved. Using - # zf.extract(zinfo, libdir) should have worked, but didn't, - # see https://www.thetopsites.net/article/53834422.shtml - # So ... manually preserve permission bits as given in zinfo - if os.name == 'posix': - # just set the normal permission bits - os.chmod(outfile, (zinfo.external_attr >> 16) & 0x1FF) - outfiles.append(outfile) - # Double check the digest of the written file - if not dry_run and row[1]: - with open(outfile, 'rb') as bf: - data = bf.read() - _, newdigest = self.get_hash(data, kind) - if newdigest != digest: - raise DistlibException('digest mismatch ' - 'on write for ' - '%s' % outfile) - if bc and outfile.endswith('.py'): - try: - pyc = fileop.byte_compile(outfile, hashed_invalidation=bc_hashed_invalidation) - outfiles.append(pyc) - except Exception: - # Don't give up if byte-compilation fails, - # but log it and perhaps warn the user - logger.warning('Byte-compilation failed', exc_info=True) - else: - fn = os.path.basename(convert_path(arcname)) - workname = os.path.join(workdir, fn) - with zf.open(arcname) as bf: - fileop.copy_stream(bf, workname) - - dn, fn = os.path.split(outfile) - maker.target_dir = dn - filenames = maker.make(fn) - fileop.set_executable_mode(filenames) - outfiles.extend(filenames) - - if lib_only: - logger.debug('lib_only: returning None') - dist = None - else: - # Generate scripts - - # Try to get pydist.json so we can see if there are - # any commands to generate. If this fails (e.g. because - # of a legacy wheel), log a warning but don't give up. - commands = None - file_version = self.info['Wheel-Version'] - if file_version == '1.0': - # Use legacy info - ep = posixpath.join(info_dir, 'entry_points.txt') - try: - with zf.open(ep) as bwf: - epdata = read_exports(bwf) - commands = {} - for key in ('console', 'gui'): - k = '%s_scripts' % key - if k in epdata: - commands['wrap_%s' % key] = d = {} - for v in epdata[k].values(): - s = '%s:%s' % (v.prefix, v.suffix) - if v.flags: - s += ' [%s]' % ','.join(v.flags) - d[v.name] = s - except Exception: - logger.warning('Unable to read legacy script ' - 'metadata, so cannot generate ' - 'scripts') - else: - try: - with zf.open(metadata_name) as bwf: - wf = wrapper(bwf) - commands = json.load(wf).get('extensions') - if commands: - commands = commands.get('python.commands') - except Exception: - logger.warning('Unable to read JSON metadata, so ' - 'cannot generate scripts') - if commands: - console_scripts = commands.get('wrap_console', {}) - gui_scripts = commands.get('wrap_gui', {}) - if console_scripts or gui_scripts: - script_dir = paths.get('scripts', '') - if not os.path.isdir(script_dir): - raise ValueError('Valid script path not ' - 'specified') - maker.target_dir = script_dir - for k, v in console_scripts.items(): - script = '%s = %s' % (k, v) - filenames = maker.make(script) - fileop.set_executable_mode(filenames) - - if gui_scripts: - options = {'gui': True} - for k, v in gui_scripts.items(): - script = '%s = %s' % (k, v) - filenames = maker.make(script, options) - fileop.set_executable_mode(filenames) - - p = os.path.join(libdir, info_dir) - dist = InstalledDistribution(p) - - # Write SHARED - paths = dict(paths) # don't change passed in dict - del paths['purelib'] - del paths['platlib'] - paths['lib'] = libdir - p = dist.write_shared_locations(paths, dry_run) - if p: - outfiles.append(p) - - # Write RECORD - dist.write_installed_files(outfiles, paths['prefix'], dry_run) - return dist - except Exception: # pragma: no cover - logger.exception('installation failed.') - fileop.rollback() - raise - finally: - shutil.rmtree(workdir) - - def _get_dylib_cache(self): - global cache - if cache is None: - # Use native string to avoid issues on 2.x: see Python #20140. - base = os.path.join(get_cache_base(), str('dylib-cache'), '%s.%s' % sys.version_info[:2]) - cache = Cache(base) - return cache - - def _get_extensions(self): - pathname = os.path.join(self.dirname, self.filename) - name_ver = '%s-%s' % (self.name, self.version) - info_dir = '%s.dist-info' % name_ver - arcname = posixpath.join(info_dir, 'EXTENSIONS') - wrapper = codecs.getreader('utf-8') - result = [] - with ZipFile(pathname, 'r') as zf: - try: - with zf.open(arcname) as bf: - wf = wrapper(bf) - extensions = json.load(wf) - cache = self._get_dylib_cache() - prefix = cache.prefix_to_dir(self.filename, use_abspath=False) - cache_base = os.path.join(cache.base, prefix) - if not os.path.isdir(cache_base): - os.makedirs(cache_base) - for name, relpath in extensions.items(): - dest = os.path.join(cache_base, convert_path(relpath)) - if not os.path.exists(dest): - extract = True - else: - file_time = os.stat(dest).st_mtime - file_time = datetime.datetime.fromtimestamp(file_time) - info = zf.getinfo(relpath) - wheel_time = datetime.datetime(*info.date_time) - extract = wheel_time > file_time - if extract: - zf.extract(relpath, cache_base) - result.append((name, dest)) - except KeyError: - pass - return result - - def is_compatible(self): - """ - Determine if a wheel is compatible with the running system. - """ - return is_compatible(self) - - def is_mountable(self): - """ - Determine if a wheel is asserted as mountable by its metadata. - """ - return True # for now - metadata details TBD - - def mount(self, append=False): - pathname = os.path.abspath(os.path.join(self.dirname, self.filename)) - if not self.is_compatible(): - msg = 'Wheel %s not compatible with this Python.' % pathname - raise DistlibException(msg) - if not self.is_mountable(): - msg = 'Wheel %s is marked as not mountable.' % pathname - raise DistlibException(msg) - if pathname in sys.path: - logger.debug('%s already in path', pathname) - else: - if append: - sys.path.append(pathname) - else: - sys.path.insert(0, pathname) - extensions = self._get_extensions() - if extensions: - if _hook not in sys.meta_path: - sys.meta_path.append(_hook) - _hook.add(pathname, extensions) - - def unmount(self): - pathname = os.path.abspath(os.path.join(self.dirname, self.filename)) - if pathname not in sys.path: - logger.debug('%s not in path', pathname) - else: - sys.path.remove(pathname) - if pathname in _hook.impure_wheels: - _hook.remove(pathname) - if not _hook.impure_wheels: - if _hook in sys.meta_path: - sys.meta_path.remove(_hook) - - def verify(self): - pathname = os.path.join(self.dirname, self.filename) - name_ver = '%s-%s' % (self.name, self.version) - # data_dir = '%s.data' % name_ver - info_dir = '%s.dist-info' % name_ver - - # metadata_name = posixpath.join(info_dir, LEGACY_METADATA_FILENAME) - wheel_metadata_name = posixpath.join(info_dir, 'WHEEL') - record_name = posixpath.join(info_dir, 'RECORD') - - wrapper = codecs.getreader('utf-8') - - with ZipFile(pathname, 'r') as zf: - with zf.open(wheel_metadata_name) as bwf: - wf = wrapper(bwf) - message_from_file(wf) - # wv = message['Wheel-Version'].split('.', 1) - # file_version = tuple([int(i) for i in wv]) - # TODO version verification - - records = {} - with zf.open(record_name) as bf: - with CSVReader(stream=bf) as reader: - for row in reader: - p = row[0] - records[p] = row - - for zinfo in zf.infolist(): - arcname = zinfo.filename - if isinstance(arcname, text_type): - u_arcname = arcname - else: - u_arcname = arcname.decode('utf-8') - # See issue #115: some wheels have .. in their entries, but - # in the filename ... e.g. __main__..py ! So the check is - # updated to look for .. in the directory portions - p = u_arcname.split('/') - if '..' in p: - raise DistlibException('invalid entry in ' - 'wheel: %r' % u_arcname) - - if self.skip_entry(u_arcname): - continue - row = records[u_arcname] - if row[2] and str(zinfo.file_size) != row[2]: - raise DistlibException('size mismatch for ' - '%s' % u_arcname) - if row[1]: - kind, value = row[1].split('=', 1) - with zf.open(arcname) as bf: - data = bf.read() - _, digest = self.get_hash(data, kind) - if digest != value: - raise DistlibException('digest mismatch for ' - '%s' % arcname) - - def update(self, modifier, dest_dir=None, **kwargs): - """ - Update the contents of a wheel in a generic way. The modifier should - be a callable which expects a dictionary argument: its keys are - archive-entry paths, and its values are absolute filesystem paths - where the contents the corresponding archive entries can be found. The - modifier is free to change the contents of the files pointed to, add - new entries and remove entries, before returning. This method will - extract the entire contents of the wheel to a temporary location, call - the modifier, and then use the passed (and possibly updated) - dictionary to write a new wheel. If ``dest_dir`` is specified, the new - wheel is written there -- otherwise, the original wheel is overwritten. - - The modifier should return True if it updated the wheel, else False. - This method returns the same value the modifier returns. - """ - - def get_version(path_map, info_dir): - version = path = None - key = '%s/%s' % (info_dir, LEGACY_METADATA_FILENAME) - if key not in path_map: - key = '%s/PKG-INFO' % info_dir - if key in path_map: - path = path_map[key] - version = Metadata(path=path).version - return version, path - - def update_version(version, path): - updated = None - try: - NormalizedVersion(version) - i = version.find('-') - if i < 0: - updated = '%s+1' % version - else: - parts = [int(s) for s in version[i + 1:].split('.')] - parts[-1] += 1 - updated = '%s+%s' % (version[:i], '.'.join(str(i) for i in parts)) - except UnsupportedVersionError: - logger.debug('Cannot update non-compliant (PEP-440) ' - 'version %r', version) - if updated: - md = Metadata(path=path) - md.version = updated - legacy = path.endswith(LEGACY_METADATA_FILENAME) - md.write(path=path, legacy=legacy) - logger.debug('Version updated from %r to %r', version, updated) - - pathname = os.path.join(self.dirname, self.filename) - name_ver = '%s-%s' % (self.name, self.version) - info_dir = '%s.dist-info' % name_ver - record_name = posixpath.join(info_dir, 'RECORD') - with tempdir() as workdir: - with ZipFile(pathname, 'r') as zf: - path_map = {} - for zinfo in zf.infolist(): - arcname = zinfo.filename - if isinstance(arcname, text_type): - u_arcname = arcname - else: - u_arcname = arcname.decode('utf-8') - if u_arcname == record_name: - continue - if '..' in u_arcname: - raise DistlibException('invalid entry in ' - 'wheel: %r' % u_arcname) - zf.extract(zinfo, workdir) - path = os.path.join(workdir, convert_path(u_arcname)) - path_map[u_arcname] = path - - # Remember the version. - original_version, _ = get_version(path_map, info_dir) - # Files extracted. Call the modifier. - modified = modifier(path_map, **kwargs) - if modified: - # Something changed - need to build a new wheel. - current_version, path = get_version(path_map, info_dir) - if current_version and (current_version == original_version): - # Add or update local version to signify changes. - update_version(current_version, path) - # Decide where the new wheel goes. - if dest_dir is None: - fd, newpath = tempfile.mkstemp(suffix='.whl', prefix='wheel-update-', dir=workdir) - os.close(fd) - else: - if not os.path.isdir(dest_dir): - raise DistlibException('Not a directory: %r' % dest_dir) - newpath = os.path.join(dest_dir, self.filename) - archive_paths = list(path_map.items()) - distinfo = os.path.join(workdir, info_dir) - info = distinfo, info_dir - self.write_records(info, workdir, archive_paths) - self.build_zip(newpath, archive_paths) - if dest_dir is None: - shutil.copyfile(newpath, pathname) - return modified - - -def _get_glibc_version(): - import platform - ver = platform.libc_ver() - result = [] - if ver[0] == 'glibc': - for s in ver[1].split('.'): - result.append(int(s) if s.isdigit() else 0) - result = tuple(result) - return result - - -def compatible_tags(): - """ - Return (pyver, abi, arch) tuples compatible with this Python. - """ - class _Version: - def __init__(self, major, minor): - self.major = major - self.major_minor = (major, minor) - self.string = ''.join((str(major), str(minor))) - - def __str__(self): - return self.string - - - versions = [ - _Version(sys.version_info.major, minor_version) - for minor_version in range(sys.version_info.minor, -1, -1) - ] - abis = [] - for suffix in _get_suffixes(): - if suffix.startswith('.abi'): - abis.append(suffix.split('.', 2)[1]) - abis.sort() - if ABI != 'none': - abis.insert(0, ABI) - abis.append('none') - result = [] - - arches = [ARCH] - if sys.platform == 'darwin': - m = re.match(r'(\w+)_(\d+)_(\d+)_(\w+)$', ARCH) - if m: - name, major, minor, arch = m.groups() - minor = int(minor) - matches = [arch] - if arch in ('i386', 'ppc'): - matches.append('fat') - if arch in ('i386', 'ppc', 'x86_64'): - matches.append('fat3') - if arch in ('ppc64', 'x86_64'): - matches.append('fat64') - if arch in ('i386', 'x86_64'): - matches.append('intel') - if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'): - matches.append('universal') - while minor >= 0: - for match in matches: - s = '%s_%s_%s_%s' % (name, major, minor, match) - if s != ARCH: # already there - arches.append(s) - minor -= 1 - - # Most specific - our Python version, ABI and arch - for i, version_object in enumerate(versions): - version = str(version_object) - add_abis = [] - - if i == 0: - add_abis = abis - - if IMP_PREFIX == 'cp' and version_object.major_minor >= (3, 2): - limited_api_abi = 'abi' + str(version_object.major) - if limited_api_abi not in add_abis: - add_abis.append(limited_api_abi) - - for abi in add_abis: - for arch in arches: - result.append((''.join((IMP_PREFIX, version)), abi, arch)) - # manylinux - if abi != 'none' and sys.platform.startswith('linux'): - arch = arch.replace('linux_', '') - parts = _get_glibc_version() - if len(parts) == 2: - if parts >= (2, 5): - result.append((''.join((IMP_PREFIX, version)), abi, 'manylinux1_%s' % arch)) - if parts >= (2, 12): - result.append((''.join((IMP_PREFIX, version)), abi, 'manylinux2010_%s' % arch)) - if parts >= (2, 17): - result.append((''.join((IMP_PREFIX, version)), abi, 'manylinux2014_%s' % arch)) - result.append((''.join( - (IMP_PREFIX, version)), abi, 'manylinux_%s_%s_%s' % (parts[0], parts[1], arch))) - - # where no ABI / arch dependency, but IMP_PREFIX dependency - for i, version_object in enumerate(versions): - version = str(version_object) - result.append((''.join((IMP_PREFIX, version)), 'none', 'any')) - if i == 0: - result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any')) - - # no IMP_PREFIX, ABI or arch dependency - for i, version_object in enumerate(versions): - version = str(version_object) - result.append((''.join(('py', version)), 'none', 'any')) - if i == 0: - result.append((''.join(('py', version[0])), 'none', 'any')) - - return set(result) - - -COMPATIBLE_TAGS = compatible_tags() - -del compatible_tags - - -def is_compatible(wheel, tags=None): - if not isinstance(wheel, Wheel): - wheel = Wheel(wheel) # assume it's a filename - result = False - if tags is None: - tags = COMPATIBLE_TAGS - for ver, abi, arch in tags: - if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch: - result = True - break - return result