diff --git a/environment.yaml b/environment.yaml index fb1d5e639..1c801084f 100644 --- a/environment.yaml +++ b/environment.yaml @@ -28,3 +28,4 @@ dependencies: - tomli-w - libcblas - beautifulsoup4 + - semver diff --git a/grayskull/__main__.py b/grayskull/__main__.py index e3e225ac9..aee19bdf0 100644 --- a/grayskull/__main__.py +++ b/grayskull/__main__.py @@ -21,10 +21,7 @@ logging.basicConfig(format="%(levelname)s:%(message)s") -def main(args=None): - if not args: - args = sys.argv[1:] or ["--help"] - +def init_parser(): # create the top-level parser parser = argparse.ArgumentParser(description="Grayskull - Conda recipe generator") subparsers = parser.add_subparsers(help="sub-command help") @@ -246,6 +243,14 @@ def main(args=None): help="Exclude folders when searching for licence.", ) + return parser + + +def main(args=None): + if not args: + args = sys.argv[1:] or ["--help"] + + parser = init_parser() args = parser.parse_args(args) if args.version: diff --git a/grayskull/strategy/py_base.py b/grayskull/strategy/py_base.py index 836997a29..76a185535 100644 --- a/grayskull/strategy/py_base.py +++ b/grayskull/strategy/py_base.py @@ -732,6 +732,10 @@ def merge_setup_toml_metadata(setup_metadata: dict, pyproject_metadata: dict) -> setup_metadata.get("install_requires", []), pyproject_metadata["requirements"]["run"], ) + if pyproject_metadata["requirements"]["run_constrained"]: + setup_metadata["requirements_run_constrained"] = pyproject_metadata[ + "requirements" + ]["run_constrained"] return setup_metadata diff --git a/grayskull/strategy/py_toml.py b/grayskull/strategy/py_toml.py index 02f66248b..147fd8adb 100644 --- a/grayskull/strategy/py_toml.py +++ b/grayskull/strategy/py_toml.py @@ -1,47 +1,219 @@ +import re from collections import defaultdict +from functools import singledispatch from pathlib import Path -from typing import Union +from typing import Dict, Optional, Tuple, Union +import semver import tomli from grayskull.utils import nested_dict +VERSION_REGEX = re.compile( + r"""[vV]? + (?P0|[1-9]\d*) + (\. + (?P0|[1-9]\d*) + (\. + (?P0|[1-9]\d*) + )? + )? + """, + re.VERBOSE, +) + + +class InvalidVersion(BaseException): + pass + + +class InvalidPoetryDependency(BaseException): + pass + + +def parse_version(version: str) -> Dict[str, Optional[str]]: + """ + Parses a version string (not necessarily semver) to a dictionary with keys + "major", "minor", and "patch". "minor" and "patch" are possibly None. + """ + match = VERSION_REGEX.search(version) + if not match: + raise InvalidVersion(f"Could not parse version {version}.") + + return { + key: None if value is None else int(value) + for key, value in match.groupdict().items() + } + + +def vdict_to_vinfo(version_dict: Dict[str, Optional[str]]) -> semver.VersionInfo: + """ + Coerces version dictionary to a semver.VersionInfo object. If minor or patch + numbers are missing, 0 is substituted in their place. + """ + ver = {key: 0 if value is None else value for key, value in version_dict.items()} + return semver.VersionInfo(**ver) + + +def coerce_to_semver(version: str) -> str: + """ + Coerces a version string to a semantic version. + """ + if semver.VersionInfo.isvalid(version): + return version + + return str(vdict_to_vinfo(parse_version(version))) + + +def get_caret_ceiling(target: str) -> str: + """ + Accepts a Poetry caret target and returns the exclusive version ceiling. + + Targets that are invalid semver strings (e.g. "1.2", "0") are handled + according to the Poetry caret requirements specification, which is based on + whether the major version is 0: + + - If the major version is 0, the ceiling is determined by bumping the + rightmost specified digit and then coercing it to semver. + Example: 0 => 1.0.0, 0.1 => 0.2.0, 0.1.2 => 0.1.3 + + - If the major version is not 0, the ceiling is determined by + coercing it to semver and then bumping the major version. + Example: 1 => 2.0.0, 1.2 => 2.0.0, 1.2.3 => 2.0.0 + """ + if not semver.VersionInfo.isvalid(target): + target_dict = parse_version(target) + + if target_dict["major"] == 0: + if target_dict["minor"] is None: + target_dict["major"] += 1 + elif target_dict["patch"] is None: + target_dict["minor"] += 1 + else: + target_dict["patch"] += 1 + return str(vdict_to_vinfo(target_dict)) + + vdict_to_vinfo(target_dict) + return str(vdict_to_vinfo(target_dict).bump_major()) + + target_vinfo = semver.VersionInfo.parse(target) + + if target_vinfo.major == 0: + if target_vinfo.minor == 0: + return str(target_vinfo.bump_patch()) + else: + return str(target_vinfo.bump_minor()) + else: + return str(target_vinfo.bump_major()) + + +def get_tilde_ceiling(target: str) -> str: + """ + Accepts a Poetry tilde target and returns the exclusive version ceiling. + """ + target_dict = parse_version(target) + if target_dict["minor"]: + return str(vdict_to_vinfo(target_dict).bump_minor()) + + return str(vdict_to_vinfo(target_dict).bump_major()) + + +def encode_poetry_version(poetry_specifier: str) -> str: + """ + Encodes Poetry version specifier as a Conda version specifier. + + Example: ^1 => >=1.0.0,<2.0.0 + """ + poetry_clauses = poetry_specifier.split(",") + + conda_clauses = [] + for poetry_clause in poetry_clauses: + poetry_clause = poetry_clause.replace(" ", "") + if poetry_clause.startswith("^"): + # handle ^ operator + target = poetry_clause[1:] + floor = coerce_to_semver(target) + ceiling = get_caret_ceiling(target) + conda_clauses.append(">=" + floor) + conda_clauses.append("<" + ceiling) + continue + + if poetry_clause.startswith("~"): + # handle ~ operator + target = poetry_clause[1:] + floor = coerce_to_semver(target) + ceiling = get_tilde_ceiling(target) + conda_clauses.append(">=" + floor) + conda_clauses.append("<" + ceiling) + continue + + # other poetry clauses should be conda-compatible + conda_clauses.append(poetry_clause) + + return ",".join(conda_clauses) + + +@singledispatch +def get_constrained_dep(dep_spec, dep_name): + raise InvalidPoetryDependency( + "Expected Poetry dependency specification to be of type str or dict, " + f"received {type(dep_spec).__name__}" + ) + + +@get_constrained_dep.register +def __get_constrained_dep_dict(dep_spec: dict, dep_name: str): + conda_version = encode_poetry_version(dep_spec["version"]) + return f"{dep_name} {conda_version}" + + +@get_constrained_dep.register +def __get_constrained_dep_str(dep_spec: str, dep_name: str): + conda_version = encode_poetry_version(dep_spec) + return f"{dep_name} {conda_version}" + + +def encode_poetry_deps(poetry_deps: dict) -> Tuple[list, list]: + run = [] + run_constrained = [] + for dep_name, dep_spec in poetry_deps.items(): + constrained_dep = get_constrained_dep(dep_spec, dep_name) + try: + assert dep_spec.get("optional", False) + run_constrained.append(constrained_dep) + except (AttributeError, AssertionError): + run.append(constrained_dep) + return run, run_constrained + def add_poetry_metadata(metadata: dict, toml_metadata: dict) -> dict: if not is_poetry_present(toml_metadata): return metadata - def flat_deps(dict_deps: dict) -> list: - result = [] - for pkg_name, version in dict_deps.items(): - if isinstance(version, dict): - version_spec = version["version"].strip() - del version["version"] - version = ( - f"{version_spec}{' ; '.join(f'{k} {v}' for k,v in version.items())}" - ) - version = f"=={version}" if version and version[0].isdigit() else version - result.append(f"{pkg_name} {version}".strip()) - return result - poetry_metadata = toml_metadata["tool"]["poetry"] - if poetry_run := flat_deps(poetry_metadata.get("dependencies", {})): - if not metadata["requirements"]["run"]: - metadata["requirements"]["run"] = [] - metadata["requirements"]["run"].extend(poetry_run) + poetry_deps = poetry_metadata.get("dependencies", {}) + req_run, req_run_constrained = encode_poetry_deps(poetry_deps) + + # add dependencies + metadata["requirements"].setdefault("run", []) + metadata["requirements"]["run"].extend(req_run) + + # add optional dependencies + if len(req_run_constrained): + metadata["requirements"].setdefault("run_constrained", []) + metadata["requirements"]["run_constrained"].extend(req_run_constrained) host_metadata = metadata["requirements"].get("host", []) if "poetry" not in host_metadata and "poetry-core" not in host_metadata: metadata["requirements"]["host"] = host_metadata + ["poetry-core"] - test_metadata = metadata["test"].get("requires", []) or [] - if ( - test_deps := poetry_metadata.get("group", {}) - .get("test", {}) - .get("dependencies", {}) - ): - test_deps = flat_deps(test_deps) - metadata["test"]["requires"] = test_metadata + test_deps + poetry_test_deps = ( + poetry_metadata.get("group", {}).get("test", {}).get("dependencies", {}) + ) + # add required test dependencies and ignore optional test dependencies, as + # there doesn't appear to be a way to specify them in Conda recipe metadata. + test_reqs, _ = encode_poetry_deps(poetry_test_deps) + metadata["test"].get("requires", []).extend(test_reqs) return metadata diff --git a/grayskull/strategy/pypi.py b/grayskull/strategy/pypi.py index 0fd3d82ee..a38dc9012 100644 --- a/grayskull/strategy/pypi.py +++ b/grayskull/strategy/pypi.py @@ -107,6 +107,7 @@ def get_val(key): "extras_require": get_val("extras_require"), "requires_dist": requires_dist, "sdist_path": get_val("sdist_path"), + "requirements_run_constrained": get_val("requirements_run_constrained"), } @@ -571,6 +572,9 @@ def extract_requirements(metadata: dict, config, recipe) -> Dict[str, List[str]] "run": rm_duplicated_deps(sort_reqs(map(lambda x: x.lower(), run_req))), } ) + + if metadata.get("requirements_run_constrained", None): + result.update({"run_constrained": metadata["requirements_run_constrained"]}) update_requirements_with_pin(result) return result diff --git a/pyproject.toml b/pyproject.toml index 9742c0115..d1abeb0c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "ruamel.yaml >=0.16.10", "ruamel.yaml.jinja2", "setuptools >=30.3.0", + "semver~=2.13.0", "stdlib-list", "tomli", "tomli-w", diff --git a/tests/data/poetry/langchain-expected.yaml b/tests/data/poetry/langchain-expected.yaml new file mode 100644 index 000000000..ad76a634a --- /dev/null +++ b/tests/data/poetry/langchain-expected.yaml @@ -0,0 +1,86 @@ +{% set name = "langchain" %} +{% set version = "0.0.119" %} + +package: + name: {{ name|lower }} + version: {{ version }} + +source: + url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/langchain-{{ version }}.tar.gz + sha256: 95a93c966b1a2ff056c43870747aba1c39924c145179f0b8ffa27fef6a525610 + +build: + entry_points: + - langchain-server = langchain.server:main + noarch: python + script: {{ PYTHON }} -m pip install . -vv + number: 0 + +requirements: + host: + - python >=3.8,<4.0 + - poetry-core + - pip + run: + - python >=3.8.1,<4.0 + - pydantic >=1.0.0,<2.0.0 + - sqlalchemy >=1.0.0,<2.0.0 + - requests >=2.0.0,<3.0.0 + - pyyaml >=5.4.1 + - numpy >=1.0.0,<2.0.0 + - dataclasses-json >=0.5.7,<0.6.0 + - tenacity >=8.1.0,<9.0.0 + - aiohttp >=3.8.3,<4.0.0 + run_constrained: + - faiss-cpu >=1.0.0,<2.0.0 + - wikipedia >=1.0.0,<2.0.0 + - elasticsearch >=8.0.0,<9.0.0 + - opensearch-py >=2.0.0,<3.0.0 + - redis-py >=4.0.0,<5.0.0 + - manifest-ml >=0.0.1,<0.0.2 + - spacy >=3.0.0,<4.0.0 + - nltk >=3.0.0,<4.0.0 + - transformers >=4.0.0,<5.0.0 + - beautifulsoup4 >=4.0.0,<5.0.0 + - pytorch >=1.0.0,<2.0.0 + - jinja2 >=3.0.0,<4.0.0 + - tiktoken >=0.0.0,<1.0.0 + - pinecone-client >=2.0.0,<3.0.0 + - weaviate-client >=3.0.0,<4.0.0 + - google-api-python-client 2.70.0 + - wolframalpha 5.0.0 + - anthropic >=0.2.2,<0.3.0 + - qdrant-client >=1.0.4,<2.0.0 + - tensorflow-text >=2.11.0,<3.0.0 + - cohere >=3.0.0,<4.0.0 + - openai >=0.0.0,<1.0.0 + - nlpcloud >=1.0.0,<2.0.0 + - nomic >=1.0.43,<2.0.0 + - huggingface_hub >=0.0.0,<1.0.0 + - google-search-results >=2.0.0,<3.0.0 + - sentence-transformers >=2.0.0,<3.0.0 + - pypdf >=3.4.0,<4.0.0 + - networkx >=2.6.3,<3.0.0 + - aleph-alpha-client >=2.15.0,<3.0.0 + - deeplake >=3.2.9,<4.0.0 + - pgvector >=0.1.6,<0.2.0 + - psycopg2-binary >=2.9.5,<3.0.0 + +test: + imports: + - langchain + commands: + - pip check + - langchain-server --help + requires: + - pip + +about: + home: https://www.github.com/hwchase17/langchain + summary: Building applications with LLMs through composability + license: MIT + license_file: LICENSE + +extra: + recipe-maintainers: + - AddYourGitHubIdHere diff --git a/tests/data/pyproject/poetry.toml b/tests/data/poetry/poetry.toml similarity index 100% rename from tests/data/pyproject/poetry.toml rename to tests/data/poetry/poetry.toml diff --git a/tests/data/pyproject/tox.toml b/tests/data/tox/tox.toml similarity index 100% rename from tests/data/pyproject/tox.toml rename to tests/data/tox/tox.toml diff --git a/tests/test_poetry.py b/tests/test_poetry.py new file mode 100644 index 000000000..429128d10 --- /dev/null +++ b/tests/test_poetry.py @@ -0,0 +1,157 @@ +"""Unit and integration tests for recipifying Poetry projects.""" + +import filecmp +from pathlib import Path + +import pytest + +from grayskull.__main__ import generate_recipes_from_list, init_parser +from grayskull.strategy.py_toml import ( + InvalidVersion, + add_poetry_metadata, + encode_poetry_version, + get_all_toml_info, + get_caret_ceiling, + get_tilde_ceiling, + parse_version, +) + + +@pytest.mark.parametrize( + "version, major, minor, patch", + [ + ("0", 0, None, None), + ("1", 1, None, None), + ("1.2", 1, 2, None), + ("1.2.3", 1, 2, 3), + ], +) +def test_parse_version_success(version, major, minor, patch): + assert parse_version(version) == {"major": major, "minor": minor, "patch": patch} + + +@pytest.mark.parametrize("invalid_version", ["asdf", "", "."]) +def test_parse_version_failure(invalid_version): + with pytest.raises(InvalidVersion): + parse_version(invalid_version) + + +@pytest.mark.parametrize( + "version, ceiling_version", + [ + ("0", "1.0.0"), + ("0.0", "0.1.0"), + ("0.0.3", "0.0.4"), + ("0.2.3", "0.3.0"), + ("1", "2.0.0"), + ("1.2", "2.0.0"), + ("1.2.3", "2.0.0"), + ], +) +def test_get_caret_ceiling(version, ceiling_version): + # examples from Poetry docs + assert get_caret_ceiling(version) == ceiling_version + + +@pytest.mark.parametrize( + "version, ceiling_version", + [("1", "2.0.0"), ("1.2", "1.3.0"), ("1.2.3", "1.3.0")], +) +def test_get_tilde_ceiling(version, ceiling_version): + # examples from Poetry docs + assert get_tilde_ceiling(version) == ceiling_version + + +@pytest.mark.parametrize( + "version, encoded_version", + [ + # should be unchanged + ("1.*", "1.*"), + (">=1,<2", ">=1,<2"), + ("==1.2.3", "==1.2.3"), + ("!=1.2.3", "!=1.2.3"), + # strip spaces + (">= 1, < 2", ">=1,<2"), + # handle exact version specifiers correctly + ("1.2.3", "1.2.3"), + ("==1.2.3", "==1.2.3"), + # handle caret operator correctly + # examples from Poetry docs + ("^0", ">=0.0.0,<1.0.0"), + ("^0.0", ">=0.0.0,<0.1.0"), + ("^0.0.3", ">=0.0.3,<0.0.4"), + ("^0.2.3", ">=0.2.3,<0.3.0"), + ("^1", ">=1.0.0,<2.0.0"), + ("^1.2", ">=1.2.0,<2.0.0"), + ("^1.2.3", ">=1.2.3,<2.0.0"), + # handle tilde operator correctly + # examples from Poetry docs + ("~1", ">=1.0.0,<2.0.0"), + ("~1.2", ">=1.2.0,<1.3.0"), + ("~1.2.3", ">=1.2.3,<1.3.0"), + ], +) +def test_encode_poetry_version(version, encoded_version): + assert encode_poetry_version(version) == encoded_version + + +def test_add_poetry_metadata(): + toml_metadata = { + "tool": { + "poetry": { + "dependencies": {"tomli": ">=1.0.0", "requests": ">=1.0.0"}, + "group": { + "test": {"dependencies": {"tox": ">=1.0.0", "pytest": ">=1.0.0"}} + }, + } + } + } + metadata = { + "requirements": { + "host": ["pkg_host1 >=1.0.0", "pkg_host2"], + "run": ["pkg_run1", "pkg_run2 >=2.0.0"], + }, + "test": {"requires": ["mock", "pkg_test >=1.0.0"]}, + } + assert add_poetry_metadata(metadata, toml_metadata) == { + "requirements": { + "host": ["pkg_host1 >=1.0.0", "pkg_host2", "poetry-core"], + "run": [ + "pkg_run1", + "pkg_run2 >=2.0.0", + "tomli >=1.0.0", + "requests >=1.0.0", + ], + }, + "test": { + "requires": ["mock", "pkg_test >=1.0.0", "tox >=1.0.0", "pytest >=1.0.0"] + }, + } + + +def test_poetry_dependencies(): + toml_path = Path(__file__).parent / "data" / "poetry" / "poetry.toml" + result = get_all_toml_info(toml_path) + + assert result["test"]["requires"] == ["cachy 0.3.0", "deepdiff >=6.2.0,<7.0.0"] + assert result["requirements"]["host"] == ["setuptools>=1.1.0", "poetry-core"] + assert result["requirements"]["run"] == [ + "python >=3.7.0,<4.0.0", + "cleo >=2.0.0,<3.0.0", + "html5lib >=1.0.0,<2.0.0", + "urllib3 >=1.26.0,<2.0.0", + ] + + +def test_poetry_langchain_snapshot(tmpdir): + """Snapshot test that asserts correct recipifying of an example Poetry project.""" + snapshot_path = ( + Path(__file__).parent / "data" / "poetry" / "langchain-expected.yaml" + ) + output_path = tmpdir / "langchain" / "meta.yaml" + + parser = init_parser() + args = parser.parse_args(["pypi", "langchain==0.0.119", "-o", str(tmpdir)]) + + generate_recipes_from_list(args.pypi_packages, args) + assert filecmp.cmp(snapshot_path, output_path, shallow=False) diff --git a/tests/test_pyproject.py b/tests/test_tox.py similarity index 51% rename from tests/test_pyproject.py rename to tests/test_tox.py index 3b0977ed3..e3b68d138 100644 --- a/tests/test_pyproject.py +++ b/tests/test_tox.py @@ -1,24 +1,12 @@ -from pathlib import Path - -from grayskull.strategy.py_toml import add_poetry_metadata, get_all_toml_info - +"""Unit and integration tests for recipifying Tox projects.""" -def test_get_all_toml_info_poetry(): - toml_path = Path(__file__).parent / "data" / "pyproject" / "poetry.toml" - result = get_all_toml_info(toml_path) +from pathlib import Path - assert result["test"]["requires"] == ["cachy ==0.3.0", "deepdiff ^6.2"] - assert result["requirements"]["host"] == ["setuptools>=1.1.0", "poetry-core"] - assert result["requirements"]["run"] == [ - "python ^3.7", - "cleo ^2.0.0", - "html5lib ^1.0", - "urllib3 ^1.26.0", - ] +from grayskull.strategy.py_toml import get_all_toml_info def test_get_all_toml_info(): - toml_path = Path(__file__).parent / "data" / "pyproject" / "tox.toml" + toml_path = Path(__file__).parent / "data" / "tox" / "tox.toml" result = get_all_toml_info(toml_path) assert result["build"]["entry_points"] == ["tox = tox.run:run"] @@ -66,28 +54,3 @@ def test_get_all_toml_info(): 'typing-extensions>=4.4; python_version < "3.8"', "python >=3.7", ] - - -def test_add_poetry_metadata(): - toml_metadata = { - "tool": { - "poetry": { - "dependencies": {"tomli": ">=1.0.0", "requests": ""}, - "group": {"test": {"dependencies": {"tox": ">=1.0.0", "pytest": ""}}}, - } - } - } - metadata = { - "requirements": { - "host": ["pkg_host1 >=1.0.0", "pkg_host2"], - "run": ["pkg_run1", "pkg_run2 >=2.0.0"], - }, - "test": {"requires": ["mock", "pkg_test >=1.0.0"]}, - } - assert add_poetry_metadata(metadata, toml_metadata) == { - "requirements": { - "host": ["pkg_host1 >=1.0.0", "pkg_host2", "poetry-core"], - "run": ["pkg_run1", "pkg_run2 >=2.0.0", "tomli >=1.0.0", "requests"], - }, - "test": {"requires": ["mock", "pkg_test >=1.0.0", "tox >=1.0.0", "pytest"]}, - }