diff --git a/CHANGELOG.md b/CHANGELOG.md index 7af2958..6087008 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ [\#22](https://github.com/conda-incubator/conda-mirror/issues/22) * Support use of conda package version specifiers [\#37](https://github.com/conda-incubator/conda-mirror/issues/37) +* Options to only get n latest versions of matching packages. + [\#52](https://github.com/conda-incubator/conda-mirror/issues/52) * Added tqdm based progress bars. [\#29](https://github.com/conda-incubator/conda-mirror/issues/29) * Improve download speed, especially for smaller packages. diff --git a/conda_mirror/conda_mirror.py b/conda_mirror/conda_mirror.py index e362167..c6d1967 100644 --- a/conda_mirror/conda_mirror.py +++ b/conda_mirror/conda_mirror.py @@ -15,7 +15,7 @@ import time import random from pprint import pformat -from typing import Any, Callable, Dict, Set, Union +from typing import Any, Callable, Dict, Iterable, Set, Union, List, NamedTuple import requests import yaml @@ -23,9 +23,9 @@ from tqdm import tqdm try: - from conda.models.version import BuildNumberMatch, VersionSpec + from conda.models.version import BuildNumberMatch, VersionSpec, VersionOrder except ImportError: - from .versionspec import BuildNumberMatch, VersionSpec + from .versionspec import BuildNumberMatch, VersionSpec, VersionOrder logger = None @@ -289,6 +289,27 @@ def _make_arg_parser(): action="store_true", help=("Include packages matching any dependencies of packages in whitelist."), ) + ap.add_argument( + "--latest", + metavar="", + type=int, + nargs="?", + const=1, + default=-1, + help=( + "Only download most-recent non-dev instance(s) of each package. " + "If specified then " + ), + ) + ap.add_argument( + "--latest-dev", + metavar="", + type=int, + nargs="?", + const=1, + default=-1, + help="Only download most-recent dev instance(s) of each package.", + ) ap.add_argument( "-v", "--verbose", @@ -487,6 +508,14 @@ def pdb_hook(exctype, value, traceback): else: url = "{}:{}".format(scheme, url[0]) proxies = {scheme: url} + + latest_dev = int(args.latest_dev) + latest_non_dev = int(args.latest) + + # If --latest is specified, then --latest-dev are specified defaults to zero. + if latest_dev < 0 and latest_non_dev >= 0: + latest_dev = 0 + return { "upstream_channel": args.upstream_channel, "target_directory": args.target_directory, @@ -496,6 +525,8 @@ def pdb_hook(exctype, value, traceback): "blacklist": blacklist, "whitelist": whitelist, "include_depends": args.include_depends, + "latest_dev": latest_dev, + "latest_non_dev": latest_non_dev, "dry_run": args.dry_run, "no_validate_target": args.no_validate_target, "minimum_free_space": args.minimum_free_space, @@ -890,6 +921,68 @@ def _validate_or_remove_package(args): ) +def _find_non_recent_packages( + packages: Dict[str, Dict[str, Any]], + *, + include: Iterable[str], + latest_non_dev: int, + latest_dev: int, +) -> Set[str]: + """Computes set of package filenames that are not sufficiently recent + + Parameters + ---------- + packages: packages dictionary from repodata.json + include: package filenames to be considered + latest_non_dev: number of non-dev packages that are sufficiently recent to be included + if negative, then all non-dev packages will be included + latest_dev: number of dev packages that are sufficnetly recent to be included + if negative, then all dev packages will be included + + Returns + ------- + non-recent packages: Set[str] + Package filenames that are not sufficiently recent. This will be a subset of `include` + """ + + non_recent_packages: Set[str] = set() + + if latest_non_dev >= 0 or latest_dev >= 0: + + class PackageAndVersion(NamedTuple): + package_file: str + version: VersionOrder + + packages_by_name: Dict[str, List[PackageAndVersion]] = {} + for key in include: + metadata = packages[key] + try: + packages_by_name.setdefault(metadata["name"], []).append( + PackageAndVersion(key, VersionOrder(metadata["version"])) + ) + except KeyError: + pass # ignore bad entries + + for curpackages in packages_by_name.values(): + curpackages.sort( + key=lambda x: x.version, reverse=True + ) # recent versions first + dev_versions = [ + p.package_file for p in curpackages if "DEV" in p.version.version[-1] + ] + non_dev_versions = [ + p.package_file + for p in curpackages + if "DEV" not in p.version.version[-1] + ] + if latest_dev >= 0: + non_recent_packages.update(dev_versions[latest_dev:]) + if latest_non_dev >= 0: + non_recent_packages.update(non_dev_versions[latest_non_dev:]) + + return non_recent_packages + + def main( upstream_channel, target_directory, @@ -898,6 +991,8 @@ def main( blacklist=None, whitelist=None, include_depends=False, + latest_non_dev: int = -1, + latest_dev: int = -1, num_threads=1, dry_run=False, no_validate_target=False, @@ -939,6 +1034,12 @@ def main( include_depends: bool If true, then include packages matching dependencies of whitelisted packages as well. + latest_dev: int + If >= zero, then only that number of the most recent development versions of + each package in a repo subdir will be downloaded. + latest_non_dev: int + If >= zero, then only that number of the most recent non development versions of + each package in a repo subdir will be downloaded. num_threads : int, optional Number of threads to be used for concurrent validation. Defaults to `num_threads=1` for non-concurrent mode. To use all available cores, @@ -1002,6 +1103,7 @@ def main( 'size': 1960193, 'version': '8.5.18'} """ + # TODO update these comments. They are no longer totally correct. # Steps: # 1. figure out blacklisted packages # 2. un-blacklist packages that are actually whitelisted @@ -1077,6 +1179,15 @@ def main( possible_packages_to_mirror = set(packages.keys()) - excluded_packages + # 3b remove non-latest packages if so specified. + non_recent_packages = _find_non_recent_packages( + packages, + include=possible_packages_to_mirror, + latest_non_dev=latest_non_dev, + latest_dev=latest_dev, + ) + possible_packages_to_mirror -= non_recent_packages + # 4. Validate all local packages # construct the desired package repodata desired_repodata = {