From 92434f03d6a226623ae47de59e09274b6c1f7dbb Mon Sep 17 00:00:00 2001 From: Jan Sikorski <132985823+sfc-gh-jsikorski@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:26:00 +0100 Subject: [PATCH 1/2] Fixes for package creation and pip installs (#781) * Added dependencies check to venv * Problem * Problem2 * Fixed problem * Cleanup * Corrected github installs * Corrected lib path * added release notes * fixes * fixes * fixes * fixes * Added native libraries option * Added native libraries option * Snapshot update * Snapshot update * Fixes * After merge fixes * Minor * fixes * Updates * Performance update * Test fix * windows fix * windows fix * windows fix * windows fix * windows fix * windows fix * windows fix * windows fix * fixed tests * fixed tests * Fixes after review * Fixes after review * Fixes * Fixes * Fixes * Fixes * Fixes * Fixes * Fixes * Fixes * Replaced deprecated flags * Replaced deprecated flags * Replaced deprecated flags * Replaced deprecated flags * Fix for encoding * Fix for encoding * Fix for encoding * Fix for encoding * Fix for encoding * Started cleaning * Cleanup --- .pre-commit-config.yaml | 3 +- RELEASE-NOTES.md | 4 + src/snowflake/cli/plugins/snowpark/models.py | 67 ++++- .../cli/plugins/snowpark/package/commands.py | 25 +- .../cli/plugins/snowpark/package/manager.py | 19 +- .../cli/plugins/snowpark/package/utils.py | 3 +- .../cli/plugins/snowpark/package_utils.py | 276 +++++++----------- .../cli/plugins/snowpark/snowpark_shared.py | 5 +- src/snowflake/cli/plugins/snowpark/venv.py | 129 +++++++- tests/__snapshots__/test_help_messages.ambr | 22 +- tests/snowpark/test_models.py | 46 +++ tests/snowpark/test_package.py | 13 +- tests/snowpark/test_snowpark_shared.py | 3 +- tests/test_data/test_data.py | 2 +- tests/test_utils.py | 57 +--- tests_integration/test_package.py | 98 ++++++- tests_integration/test_snowpark.py | 1 + tests_integration/test_streamlit.py | 2 +- .../testing_utils/snowpark_utils.py | 2 +- 19 files changed, 507 insertions(+), 270 deletions(-) create mode 100644 tests/snowpark/test_models.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eda7e65d96..3032f566a8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,8 @@ repos: ^src/snowflake/cli/app/dev/.*$| ^src/snowflake/cli/templates/.*$| ^src/snowflake/cli/api/utils/rendering.py$| - ^src/snowflake/cli/plugins/spcs/common.py$ + ^src/snowflake/cli/plugins/spcs/common.py$| + ^src/snowflake/cli/plugins/snowpark/venv.py$ - id: check-app-imports-in-api language: pygrep name: "No top level cli.app imports in cli.api" diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index dc9d351f31..e7007d4047 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -28,6 +28,9 @@ * Added `create` command for `image-repository`. * Added `status`, `set (property)`, `unset (property)`, `suspend` and `resume` commands for `compute-pool`. * Added `set (property)`, `unset (property)`,`upgrade` and `list-endpoints` commands for `service`. +* You can now use github repo link in `snow snowpark package create` to prepare your code for upload +* Added `allow-native-libraries` option to `snow snowpark package create` command +* Added alias `--install-from-pip` for `-y` option in `snow snowpark package create` command * Connections parameters are also supported by generic environment variables: * `SNOWFLAKE_ACCOUNT` * `SNOWFLAKE_USER` @@ -49,6 +52,7 @@ ## Fixes and improvements * Restricted permissions of automatically created files * Fixed bug where `spcs service create` would not throw error if service with specified name already exists. +* Improved package lookup, to avoid unnecessary uploads * Logging into the file by default (INFO level) * Added validation that service, compute pool, and image repository names are unqualified identifiers. * `spcs service` commands now accept qualified names. diff --git a/src/snowflake/cli/plugins/snowpark/models.py b/src/snowflake/cli/plugins/snowpark/models.py index 604e046393..cf25f91c98 100644 --- a/src/snowflake/cli/plugins/snowpark/models.py +++ b/src/snowflake/cli/plugins/snowpark/models.py @@ -1,10 +1,11 @@ from __future__ import annotations +import re from dataclasses import dataclass from enum import Enum from typing import List -from requirements.requirement import Requirement +from requirements import requirement class PypiOption(Enum): @@ -13,6 +14,35 @@ class PypiOption(Enum): ASK = "ask" +class RequirementType(Enum): + FILE = "file" + PACKAGE = "package" + + +class Requirement(requirement.Requirement): + extra_pattern = re.compile("'([^']*)'") + + @classmethod + def parse_line(cls, line: str) -> Requirement: + if len(line_elements := line.split(";")) > 1: + line = line_elements[0] + result = super().parse_line(line) + + if len(line_elements) > 1: + for element in line_elements[1:]: + if "extra" in element and (extras := cls.extra_pattern.search(element)): + result.extras.extend(extras.groups()) + + if result.uri and not result.name: + result.name = get_package_name(result.uri) + + return result + + @classmethod + def _look_for_specifier(cls, specifier: str, line: str): + return re.search(cls.specifier_pattern.format(specifier), line) + + @dataclass class SplitRequirements: """A dataclass to hold the results of parsing requirements files and dividing them into @@ -30,3 +60,38 @@ class RequirementWithFiles: requirement: Requirement files: List[str] + + +@dataclass +class RequirementWithFilesAndDeps(RequirementWithFiles): + dependencies: List[str] + + def to_requirement_with_files(self): + return RequirementWithFiles(self.requirement, self.files) + + +def get_package_name(name: str) -> str: + if name.lower().startswith(("git+", "http")): + pattern = re.compile(r"github\.com\/[^\/]+\/([^\/][^.@$/]+)") + if match := pattern.search(name): + return match.group(1) + else: + return name + + elif name.endswith(".zip"): + return name.replace(".zip", "") + else: + return name + + +pip_failed_msg = ( + "pip failed with return code {}." + "If pip is installed correctly, this may mean you`re trying to install a package" + "that isn't compatible with the host architecture -" + "and generally means it has native libraries." +) +second_chance_msg = ( + "Good news! The following package dependencies can be" + "imported directly from Anaconda, and will be excluded from" + "the zip: {}" +) diff --git a/src/snowflake/cli/plugins/snowpark/package/commands.py b/src/snowflake/cli/plugins/snowpark/package/commands.py index 37ac029701..c0eb548203 100644 --- a/src/snowflake/cli/plugins/snowpark/package/commands.py +++ b/src/snowflake/cli/plugins/snowpark/package/commands.py @@ -6,6 +6,7 @@ import typer from snowflake.cli.api.commands.snow_typer import SnowTyper from snowflake.cli.api.output.types import CommandResult, MessageResult +from snowflake.cli.plugins.snowpark.models import PypiOption from snowflake.cli.plugins.snowpark.package.manager import ( cleanup_after_install, create, @@ -17,6 +18,7 @@ NotInAnaconda, RequiresPackages, ) +from snowflake.cli.plugins.snowpark.snowpark_shared import PackageNativeLibrariesOption app = SnowTyper( name="package", @@ -45,6 +47,7 @@ def package_lookup( name: str = typer.Argument(..., help="Name of the package."), install_packages: bool = install_option, deprecated_install_option: bool = deprecated_install_option, + allow_native_libraries: PypiOption = PackageNativeLibrariesOption, **options, ) -> CommandResult: """ @@ -55,7 +58,11 @@ def package_lookup( if deprecated_install_option: install_packages = deprecated_install_option - lookup_result = lookup(name=name, install_packages=install_packages) + lookup_result = lookup( + name=name, + install_packages=install_packages, + allow_native_libraries=allow_native_libraries, + ) return MessageResult(lookup_result.message) @@ -97,6 +104,7 @@ def package_create( ), install_packages: bool = install_option, deprecated_install_option: bool = deprecated_install_option, + allow_native_libraries: PypiOption = PackageNativeLibrariesOption, **options, ) -> CommandResult: """ @@ -105,13 +113,14 @@ def package_create( if deprecated_install_option: install_packages = deprecated_install_option - if ( - type(lookup_result := lookup(name=name, install_packages=install_packages)) - in [ - NotInAnaconda, - RequiresPackages, - ] - and type(creation_result := create(name)) == CreatedSuccessfully + lookup_result = lookup( + name=name, + install_packages=install_packages, + allow_native_libraries=allow_native_libraries, + ) + + if isinstance(lookup_result, (NotInAnaconda, RequiresPackages)) and isinstance( + creation_result := create(name), CreatedSuccessfully ): message = creation_result.message if type(lookup_result) == RequiresPackages: diff --git a/src/snowflake/cli/plugins/snowpark/package/manager.py b/src/snowflake/cli/plugins/snowpark/package/manager.py index e9aa74a747..9837ab0400 100644 --- a/src/snowflake/cli/plugins/snowpark/package/manager.py +++ b/src/snowflake/cli/plugins/snowpark/package/manager.py @@ -5,12 +5,16 @@ from functools import wraps from pathlib import Path -from requirements.requirement import Requirement from snowflake.cli.api.constants import PACKAGES_DIR from snowflake.cli.api.secure_path import SecurePath from snowflake.cli.plugins.object.stage.manager import StageManager from snowflake.cli.plugins.snowpark import package_utils -from snowflake.cli.plugins.snowpark.models import SplitRequirements +from snowflake.cli.plugins.snowpark.models import ( + PypiOption, + Requirement, + SplitRequirements, + get_package_name, +) from snowflake.cli.plugins.snowpark.package.utils import ( CreatedSuccessfully, InAnaconda, @@ -25,7 +29,9 @@ log = logging.getLogger(__name__) -def lookup(name: str, install_packages: bool) -> LookupResult: +def lookup( + name: str, install_packages: bool, allow_native_libraries: PypiOption +) -> LookupResult: package_response = package_utils.parse_anaconda_packages([Requirement.parse(name)]) @@ -33,7 +39,10 @@ def lookup(name: str, install_packages: bool) -> LookupResult: return InAnaconda(package_response, name) elif install_packages: status, result = package_utils.install_packages( - perform_anaconda_check=True, package_name=name, file_name=None + perform_anaconda_check=True, + package_name=name, + file_name=None, + allow_native_libraries=allow_native_libraries, ) if status: @@ -65,7 +74,7 @@ def upload(file: Path, stage: str, overwrite: bool): def create(zip_name: str): - file_name = zip_name if zip_name.endswith(".zip") else f"{zip_name}.zip" + file_name = f"{get_package_name(zip_name)}.zip" zip_dir(dest_zip=Path(file_name), source=Path.cwd() / ".packages") if os.path.exists(file_name): diff --git a/src/snowflake/cli/plugins/snowpark/package/utils.py b/src/snowflake/cli/plugins/snowpark/package/utils.py index 29c62dbe6d..3acbb44751 100644 --- a/src/snowflake/cli/plugins/snowpark/package/utils.py +++ b/src/snowflake/cli/plugins/snowpark/package/utils.py @@ -4,9 +4,8 @@ from pathlib import Path from typing import List -from requirements.requirement import Requirement from snowflake.cli.api.secure_path import SecurePath -from snowflake.cli.plugins.snowpark.models import SplitRequirements +from snowflake.cli.plugins.snowpark.models import Requirement, SplitRequirements @dataclass diff --git a/src/snowflake/cli/plugins/snowpark/package_utils.py b/src/snowflake/cli/plugins/snowpark/package_utils.py index 310cb49088..ca344bece3 100644 --- a/src/snowflake/cli/plugins/snowpark/package_utils.py +++ b/src/snowflake/cli/plugins/snowpark/package_utils.py @@ -1,23 +1,25 @@ from __future__ import annotations -import glob import logging import os -import re -from typing import Dict, List +from pathlib import Path +from typing import List import click import requests import requirements import typer from packaging.version import parse -from requirements.requirement import Requirement from snowflake.cli.api.constants import DEFAULT_SIZE_LIMIT_MB from snowflake.cli.api.secure_path import SecurePath from snowflake.cli.plugins.snowpark.models import ( PypiOption, - RequirementWithFiles, + Requirement, + RequirementType, + RequirementWithFilesAndDeps, SplitRequirements, + pip_failed_msg, + second_chance_msg, ) from snowflake.cli.plugins.snowpark.venv import Venv @@ -54,12 +56,14 @@ def parse_requirements( return deduplicate_and_sort_reqs(reqs) -def deduplicate_and_sort_reqs(packages: List[Requirement]) -> List[Requirement]: +def deduplicate_and_sort_reqs( + packages: List[Requirement], +) -> List[Requirement]: """ Deduplicates a list of requirements, keeping the first occurrence of each package. """ seen = set() - deduped: List[Requirement] = [] + deduped: List[RequirementWithFilesAndDeps] = [] for package in packages: if package.name not in seen: deduped.append(package) @@ -75,6 +79,9 @@ def parse_anaconda_packages(packages: List[Requirement]) -> SplitRequirements: Returns a dict with two keys: 'snowflake' and 'other'. Each key contains a list of Requirement object. + As snowflake currently doesn't support extra syntax (ex. `jinja2[diagrams]`), if such + extra is present in the dependency, we mark it as unavailable. + Parameters: packages (List[Requirement]) - list of requirements to be checked @@ -87,8 +94,9 @@ def parse_anaconda_packages(packages: List[Requirement]) -> SplitRequirements: channel_data = _get_anaconda_channel_contents() for package in packages: - # pip package names are case-insensitive, while Anaconda package names are lowercase - if check_if_package_is_avaiable_in_conda(package, channel_data["packages"]): + if package.extras: + result.other.append(package) + elif check_if_package_is_avaiable_in_conda(package, channel_data["packages"]): result.snowflake.append(package) else: log.info("'%s' not found in Snowflake Anaconda channel...", package.name) @@ -105,92 +113,11 @@ def _get_anaconda_channel_contents(): raise typer.Abort() -def get_downloaded_packages() -> Dict[str, RequirementWithFiles]: - """Returns a dict of official package names mapped to the files/folders - that belong to it under the .packages directory. - - Returns: - dict[str:List[str]]: a dict of package folder names to package name - """ - metadata_files = glob.glob(".packages/*dist-info/METADATA") - packages_full_path = os.path.abspath(".packages") - return_dict: Dict[str, RequirementWithFiles] = {} - for metadata_file in metadata_files: - parent_folder = os.path.dirname(metadata_file) - package = get_package_name_from_metadata(metadata_file) - if package is not None: - # since we found a package name, we can now look at the RECORD - # file (a sibling of METADATA) to determine which files and - # folders that belong to it - record_file_path = SecurePath(parent_folder) / "RECORD" - if record_file_path.exists(): - # the RECORD file contains a list of files included in the - # package, get the unique root folder names and delete them - # recursively - with record_file_path.open( - "r", read_file_limit_mb=DEFAULT_SIZE_LIMIT_MB, encoding="utf-8" - ) as record_file: - # we want the part up until the first '/'. - # Sometimes it's a file with a trailing ",sha256=abcd....", - # so we trim that off too - record_entries = list( - { - line.split(",")[0].rsplit("/", 1)[0] - for line in record_file.readlines() - }, - ) - included_record_entries = [] - for record_entry in record_entries: - record_entry_full_path = os.path.abspath( - os.path.join(".packages", record_entry), - ) - # it's possible for the RECORD file to contain relative - # paths to items outside of the packages folder. - # We'll ignore those by asserting that the full - # packages path exists in the full path of each item. - if ( - os.path.exists(record_entry_full_path) - and packages_full_path in record_entry_full_path - ): - included_record_entries.append(record_entry) - return_dict[package.name] = RequirementWithFiles( - requirement=package, files=included_record_entries - ) - return return_dict - - -def get_package_name_from_metadata(metadata_file_path: str) -> Requirement | None: - """Loads a METADATA file from the dist-info directory of an installed - Python package, finds the name of the package. - This is found on a line containing "Name: my_package". - - Args: - metadata_file_path (str): The path to the METADATA file - - Returns: - str: the name of the package. - """ - metadata_file_path_spath = SecurePath(metadata_file_path) - with metadata_file_path_spath.open( - "r", read_file_limit_mb=DEFAULT_SIZE_LIMIT_MB, encoding="utf-8" - ) as metadata_file: - contents = metadata_file.read() - results = re.search("^Name: (.*)$", contents, flags=re.MULTILINE) - if results is None: - return None - requirement_line = results.group(1) - results = re.search("^Version: (.*)$", contents, flags=re.MULTILINE) - if results is not None: - version = results.group(1) - requirement_line += f"=={version}" - return Requirement.parse(requirement_line) - - def install_packages( file_name: str | None, perform_anaconda_check: bool = True, - package_native_libraries: PypiOption = PypiOption.ASK, package_name: str | None = None, + allow_native_libraries: PypiOption = PypiOption.ASK, ) -> tuple[bool, SplitRequirements | None]: """ Install packages from a requirements.txt file or a single package name, @@ -203,86 +130,64 @@ def install_packages( which are available on the Snowflake Anaconda channel. These will have been deleted from the local packages folder. """ - second_chance_results = None - with Venv() as v: if file_name is not None: - pip_install_result = v.pip_install(file_name, "file") + pip_install_result = v.pip_install(file_name, RequirementType.FILE) + dependencies = v.get_package_dependencies(file_name, RequirementType.FILE) if package_name is not None: - pip_install_result = v.pip_install(package_name, "package") - - if pip_install_result != 0: - log.info(pip_failed_msg.format(pip_install_result)) - return False, None - - if perform_anaconda_check: - log.info("Checking for dependencies available in Anaconda...") - # it's not over just yet. a non-Anaconda package may have brought in - # a package available on Anaconda. - # use each folder's METADATA file to determine its real name - downloaded_packages_dict = get_downloaded_packages() - log.info("Downloaded packages: %s", downloaded_packages_dict.keys()) - # look for all the downloaded packages on the Anaconda channel - downloaded_package_requirements = [ - r.requirement for r in downloaded_packages_dict.values() - ] - second_chance_results = parse_anaconda_packages( - downloaded_package_requirements, - ) - second_chance_snowflake_packages = second_chance_results.snowflake - if len(second_chance_snowflake_packages) > 0: - log.info(second_chance_msg.format(second_chance_results)) - else: - log.info("None of the package dependencies were found on Anaconda") - second_chance_snowflake_package_names = [ - p.name for p in second_chance_snowflake_packages - ] - downloaded_packages_not_needed = { - k: v - for k, v in downloaded_packages_dict.items() - if k in second_chance_snowflake_package_names - } - _delete_packages(downloaded_packages_not_needed) - - log.info("Checking to see if packages have native libraries...") - # use glob to see if any files in packages have a .so extension - if _check_for_native_libraries(): - continue_installation = ( - click.confirm( - "\n\nWARNING! Some packages appear to have native libraries!\n" - "Continue with package installation?", - default=False, + pip_install_result = v.pip_install(package_name, RequirementType.PACKAGE) + dependencies = v.get_package_dependencies( + package_name, RequirementType.PACKAGE + ) + + if pip_install_result != 0: + log.info(pip_failed_msg.format(pip_install_result)) + return False, None + + if perform_anaconda_check: + log.info("Checking for dependencies available in Anaconda...") + dependency_requirements = [dep.requirement for dep in dependencies] + log.info( + "Downloaded packages: %s", + ",".join([d.name for d in dependency_requirements]), + ) + second_chance_results: SplitRequirements = parse_anaconda_packages( + dependency_requirements ) - if package_native_libraries == PypiOption.ASK - else True + + if len(second_chance_results.snowflake) > 0: + log.info(second_chance_msg.format(second_chance_results.snowflake)) + else: + log.info("None of the package dependencies were found on Anaconda") + + dependencies_to_be_packed = _get_dependencies_not_avaiable_in_conda( + dependencies, + second_chance_results.snowflake if second_chance_results else None, ) - if not continue_installation: - SecurePath(".packages").rmdir(recursive=True, missing_ok=True) + + log.info("Checking to see if packages have native libraries...") + + if _perform_native_libraries_check( + dependencies_to_be_packed + ) and not _confirm_native_libraries(allow_native_libraries): return False, second_chance_results - else: - log.info("No non-supported native libraries found in packages (Good news!)...") - return True, second_chance_results - - -def _delete_packages(to_be_deleted: Dict) -> None: - for package, items in to_be_deleted.items(): - log.info("Package %s: deleting %d files", package, len(items.files)) - for item in items.files: - item_path = SecurePath(".packages") / item - if item_path.exists(): - if item_path.path.is_dir(): - item_path.rmdir(recursive=True) - else: - item_path.unlink() - - -def _check_for_native_libraries(): - if glob.glob(".packages/**/*.so"): - for path in glob.glob(".packages/**/*.so"): - log.info("Potential native library: %s", path) - return True - return False + else: + v.copy_files_to_packages_dir( + [Path(file) for dep in dependencies_to_be_packed for file in dep.files] + ) + return True, second_chance_results + + +def _check_for_native_libraries( + dependencies: List[RequirementWithFilesAndDeps], +) -> List[str]: + return [ + dependency.requirement.name + for dependency in dependencies + for file in dependency.files + if file.endswith(".so") + ] def get_snowflake_packages() -> List[str]: @@ -296,6 +201,17 @@ def get_snowflake_packages() -> List[str]: return [] +def _get_dependencies_not_avaiable_in_conda( + dependencies: List[RequirementWithFilesAndDeps], + avaiable_in_conda: List[Requirement], +) -> List[RequirementWithFilesAndDeps]: + return [ + dep + for dep in dependencies + if dep.requirement.name not in [package.name for package in avaiable_in_conda] + ] + + def generate_deploy_stage_name(identifier: str) -> str: return ( identifier.replace("()", "") @@ -328,11 +244,29 @@ def check_if_package_is_avaiable_in_conda(package: Requirement, packages: dict) return True -pip_failed_msg = """pip failed with return code {}. - If pip is installed correctly, this may mean you`re trying to install a package - that isn't compatible with the host architecture - - and generally means it has native libraries.""" +def _perform_native_libraries_check(deps: List[RequirementWithFilesAndDeps]): + if native_libraries := _check_for_native_libraries(deps): + _log_native_libraries(native_libraries) + return True + else: + log.info("Unsupported native libraries not found in packages (Good news!)...") + return False + + +def _log_native_libraries( + native_libraries: List[str], +) -> None: + log.error( + "Following dependencies utilise native libraries, not supported by Conda:" + ) + log.error("\n".join(set(native_libraries))) + log.error("You may still try to create your package, but it probably won`t work") + log.error("You may also request adding the package to Snowflake Conda channel") + log.error("at https://support.anaconda.com/") -second_chance_msg = """Good news! The following package dependencies can be - imported directly from Anaconda, and will be excluded from - the zip: {}""" + +def _confirm_native_libraries(allow_native_libraries: PypiOption) -> bool: + if allow_native_libraries == PypiOption.ASK: + return click.confirm("Continue with package installation?", default=False) + else: + return allow_native_libraries == PypiOption.YES diff --git a/src/snowflake/cli/plugins/snowpark/snowpark_shared.py b/src/snowflake/cli/plugins/snowpark/snowpark_shared.py index d347cc8f37..8cbbd14cb3 100644 --- a/src/snowflake/cli/plugins/snowpark/snowpark_shared.py +++ b/src/snowflake/cli/plugins/snowpark/snowpark_shared.py @@ -6,10 +6,9 @@ import click import typer -from requirements.requirement import Requirement from snowflake.cli.api.secure_path import SecurePath from snowflake.cli.plugins.snowpark import package_utils -from snowflake.cli.plugins.snowpark.models import PypiOption +from snowflake.cli.plugins.snowpark.models import PypiOption, Requirement from snowflake.cli.plugins.snowpark.zipper import zip_dir PyPiDownloadOption: PypiOption = typer.Option( @@ -77,7 +76,7 @@ def snowpark_package( should_continue, second_chance_results = package_utils.install_packages( REQUIREMENTS_OTHER, check_anaconda_for_pypi_deps, - package_native_libraries, + allow_native_libraries=package_native_libraries, ) # add the Anaconda packages discovered as dependencies if should_continue and second_chance_results: diff --git a/src/snowflake/cli/plugins/snowpark/venv.py b/src/snowflake/cli/plugins/snowpark/venv.py index 4e60403c29..d3b9a32d6a 100644 --- a/src/snowflake/cli/plugins/snowpark/venv.py +++ b/src/snowflake/cli/plugins/snowpark/venv.py @@ -1,13 +1,30 @@ +import json +import locale import logging +import os +import shutil import subprocess import sys import venv +from enum import Enum from pathlib import Path from tempfile import TemporaryDirectory +from typing import Dict, List + +from snowflake.cli.plugins.snowpark.models import ( + Requirement, + RequirementType, + RequirementWithFilesAndDeps, +) log = logging.getLogger(__name__) +class PackageInfoType(Enum): + FILES = "files" + DEPENDENCIES = "requires" + + class Venv: ERROR_MESSAGE = "Running command {0} caused error {1}" @@ -30,6 +47,7 @@ def run_python(self, args): [self.python_path, *args], capture_output=True, text=True, + encoding=locale.getpreferredencoding(), ) except subprocess.CalledProcessError as e: log.error(self.ERROR_MESSAGE, "python" + " ".join(args), e.stderr) @@ -37,9 +55,9 @@ def run_python(self, args): return process - def pip_install(self, name: str, req_type: str, directory: str = ".packages"): - arguments = ["-m", "pip", "install", "-t", directory] - arguments += ["-r", name] if req_type == "file" else [name] + def pip_install(self, name: str, req_type: RequirementType): + arguments = ["-m", "pip", "install"] + arguments += ["-r", name] if req_type == RequirementType.FILE else [name] process = self.run_python(arguments) return process.returncode @@ -48,7 +66,110 @@ def _create_venv(self): venv.create(self.directory.name, self.with_pip) @staticmethod - def _get_python_path(venv_dir: Path): + def _get_python_path(venv_dir: Path) -> Path: if sys.platform == "win32": return venv_dir / "scripts" / "python" return venv_dir / "bin" / "python" + + def _get_library_path(self) -> Path: + return [ + lib for lib in (Path(self.directory.name) / "lib").glob("**/site-packages") + ][0] + + def get_package_dependencies( + self, name: str, req_type: RequirementType + ) -> List[RequirementWithFilesAndDeps]: + installed_packages = self._get_installed_packages_metadata() + dependencies: Dict = {} + + def _get_dependencies(package: Requirement): + if package.name not in dependencies.keys(): + requires = installed_packages.get(package.name, {}).get( + "requires_dist", [] + ) + files = self._parse_file_list( + self._get_library_path(), + self._parse_info(package.name, PackageInfoType.FILES), + ) + + dependencies[package.name] = RequirementWithFilesAndDeps( + requirement=package, files=files, dependencies=requires + ) + + log.debug( + "Checking package %s, with dependencies: %s", package.name, requires + ) + + for package in requires: + _get_dependencies(Requirement.parse_line(package)) + + if req_type == RequirementType.PACKAGE: + _get_dependencies(Requirement.parse_line(name)) + + elif req_type == RequirementType.FILE: + if Path(name).exists(): + with open(name, "r") as req_file: + for line in req_file: + _get_dependencies(Requirement.parse_line(line)) + + return [dep for dep in dependencies.values()] + + def _get_installed_packages_metadata(self): + if inspect := self.get_pip_inspect(): + return { + package.get("metadata", {}).get("name"): package.get("metadata", {}) + for package in inspect.get("installed", {}) + } + else: + return {} + + def get_pip_inspect(self) -> Dict: + result = self.run_python(["-m", "pip", "inspect", "--local"]) + + if result.returncode == 0: + return json.loads(result.stdout) + else: + return {} + + def _parse_info(self, package_name: str, info_type: PackageInfoType) -> List[str]: + from importlib.metadata import PackagePath # noqa + + info = self.run_python( + [ + "-c", + f"from importlib.metadata import {info_type.value}; print({info_type.value}('{package_name}'))", + ] + ) + if info.returncode != 0: + return [] + + result = eval(info.stdout) + + if result: + return result + else: + return [] + + def _parse_file_list(self, base_dir: Path, files: List): + result = [] + + for file in [Path(f) for f in files]: + file_path = base_dir / file + if file_path.exists() and not ".." in file_path.parts: + result.append(str(file_path)) + + return result + + def copy_files_to_packages_dir( + self, files_to_be_copied: List[Path], destination: Path = Path(".packages") + ) -> None: + if not destination.exists(): + destination.mkdir() + library_path = self._get_library_path() + + for file in files_to_be_copied: + destination_file = destination / file.relative_to(library_path) + + if not destination_file.parent.exists(): + os.makedirs(destination_file.parent) + shutil.copy(file, destination / file.relative_to(library_path)) diff --git a/tests/__snapshots__/test_help_messages.ambr b/tests/__snapshots__/test_help_messages.ambr index 42f4795802..019ea1efab 100644 --- a/tests/__snapshots__/test_help_messages.ambr +++ b/tests/__snapshots__/test_help_messages.ambr @@ -1620,9 +1620,14 @@ │ [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ - │ --pypi-download Installs packages that are not available on the │ - │ Snowflake Anaconda channel. │ - │ --help -h Show this message and exit. │ + │ --pypi-download Installs packages that are │ + │ not available on the │ + │ Snowflake Anaconda channel. │ + │ --allow-native-libraries [yes|no|ask] Allows native libraries, │ + │ when using packages │ + │ installed through PIP │ + │ [default: no] │ + │ --help -h Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Connection configuration ───────────────────────────────────────────────────╮ │ --connection,--environment -c TEXT Name of the connection, as defined │ @@ -1687,9 +1692,14 @@ │ * name TEXT Name of the package. [default: None] [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ - │ --pypi-download Installs packages that are not available on the │ - │ Snowflake Anaconda channel. │ - │ --help -h Show this message and exit. │ + │ --pypi-download Installs packages that are │ + │ not available on the │ + │ Snowflake Anaconda channel. │ + │ --allow-native-libraries [yes|no|ask] Allows native libraries, │ + │ when using packages │ + │ installed through PIP │ + │ [default: no] │ + │ --help -h Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Connection configuration ───────────────────────────────────────────────────╮ │ --connection,--environment -c TEXT Name of the connection, as defined │ diff --git a/tests/snowpark/test_models.py b/tests/snowpark/test_models.py new file mode 100644 index 0000000000..25582d1e31 --- /dev/null +++ b/tests/snowpark/test_models.py @@ -0,0 +1,46 @@ +import pytest +from snowflake.cli.plugins.snowpark.models import Requirement, get_package_name + + +@pytest.mark.parametrize( + "line,name,extras", + [ + ("ipython ; extra == 'docs'", "ipython", ["docs"]), + ("foo", "foo", []), + ("pytest ; extra == 'tests'", "pytest", ["tests"]), + ], +) +def test_requirement_is_parsed_correctly(line, name, extras): + result = Requirement.parse_line(line) + + assert result.name == name + assert result.extras == extras + + +@pytest.mark.parametrize( + "line,name", + [ + ( + "git+https://github.com/sfc-gh-turbaszek/dummy-pkg-for-tests", + "dummy-pkg-for-tests", + ), + ( + "git+https://github.com/sfc-gh-turbaszek/dummy-pkg-for-tests@foo", + "dummy-pkg-for-tests", + ), + ( + "git+https://github.com/sfc-gh-turbaszek/dummy-pkg-for-tests@0123456789abcdef0123456789abcdef01234567", + "dummy-pkg-for-tests", + ), + ("foo.zip", "foo"), + ("package", "package"), + ("package.zip", "package"), + ("git+https://github.com/Snowflake-Labs/snowflake-cli/", "snowflake-cli"), + ( + "git+https://github.com/Snowflake-Labs/snowflake-cli.git/@snow-123456-fix", + "snowflake-cli", + ), + ], +) +def test_get_package_name(line, name): + assert get_package_name(line) == name diff --git a/tests/snowpark/test_package.py b/tests/snowpark/test_package.py index 902ffea9a4..e647a7de82 100644 --- a/tests/snowpark/test_package.py +++ b/tests/snowpark/test_package.py @@ -5,8 +5,11 @@ from zipfile import ZipFile import pytest -from requirements.requirement import Requirement -from snowflake.cli.plugins.snowpark.models import SplitRequirements +from snowflake.cli.plugins.snowpark.models import ( + PypiOption, + Requirement, + SplitRequirements, +) from snowflake.cli.plugins.snowpark.package.utils import NothingFound, NotInAnaconda from tests.test_data import test_data @@ -150,7 +153,11 @@ def test_install_flag(self, mock_lookup, command, flags, expected_value, runner) mock_lookup.return_value = NothingFound result = runner.invoke(["snowpark", "package", "lookup", "foo", *flags]) - mock_lookup.assert_called_with(name="foo", install_packages=expected_value) + mock_lookup.assert_called_with( + name="foo", + install_packages=expected_value, + allow_native_libraries=PypiOption.NO, + ) @staticmethod def mocked_anaconda_response(response: dict): diff --git a/tests/snowpark/test_snowpark_shared.py b/tests/snowpark/test_snowpark_shared.py index 183d9f711e..08b594e6b4 100644 --- a/tests/snowpark/test_snowpark_shared.py +++ b/tests/snowpark/test_snowpark_shared.py @@ -4,8 +4,7 @@ from zipfile import ZipFile import snowflake.cli.plugins.snowpark.snowpark_shared as shared -from requirements.requirement import Requirement -from snowflake.cli.plugins.snowpark.models import SplitRequirements +from snowflake.cli.plugins.snowpark.models import Requirement, SplitRequirements @mock.patch("snowflake.cli.plugins.snowpark.package_utils.parse_anaconda_packages") diff --git a/tests/test_data/test_data.py b/tests/test_data/test_data.py index 891e99767d..29f4d24746 100644 --- a/tests/test_data/test_data.py +++ b/tests/test_data/test_data.py @@ -1,4 +1,4 @@ -from requirements.requirement import Requirement +from snowflake.cli.plugins.snowpark.models import Requirement requirements = ["pytest==1.0.0", "Django==3.2.1", "awesome_lib==3.3.3"] diff --git a/tests/test_utils.py b/tests/test_utils.py index e1420a6bce..216404e39b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,19 +1,18 @@ import json import logging import os -from distutils.dir_util import copy_tree from pathlib import Path, PosixPath from unittest import mock from unittest.mock import MagicMock, mock_open, patch import pytest +import snowflake.cli.plugins.snowpark.models import snowflake.cli.plugins.snowpark.package.utils import typer -from requirements.requirement import Requirement from snowflake.cli.api.secure_path import SecurePath from snowflake.cli.api.utils import path_utils from snowflake.cli.plugins.snowpark import package_utils -from snowflake.cli.plugins.snowpark.models import PypiOption +from snowflake.cli.plugins.snowpark.models import PypiOption, Requirement from snowflake.cli.plugins.streamlit import streamlit_utils from tests.test_data import test_data @@ -152,13 +151,6 @@ def test_generate_streamlit_package_wrapper(): result.unlink() -def test_get_package_name_from_metadata_using_correct_data( - correct_metadata_file: str, tmp_path -): - result = package_utils.get_package_name_from_metadata(correct_metadata_file) - assert result == Requirement.parse_line("my-awesome-package==0.0.1") - - @pytest.mark.parametrize( "contents, expected", [ @@ -231,51 +223,6 @@ def test_parse_anaconda_packages(mock_get): assert split_requirements.other[1].specs == [("==", "1.0.1")] -def test_get_downloaded_packages(test_root_path, temp_dir): - # In this test, we parse some real package metadata downloaded by pip - # only the dist-info directories are available, we don't need the actual files - copy_tree( - os.path.join(test_root_path, "test_data", "local_packages"), - temp_dir, - ) - requirements_with_files = package_utils.get_downloaded_packages() - - assert len(requirements_with_files) == 4 - - assert "httplib2" in requirements_with_files - httplib_req = requirements_with_files["httplib2"] - assert httplib_req.requirement.name == "httplib2" - assert httplib_req.requirement.specifier is True - assert httplib_req.requirement.specs == [("==", "0.22.0")] - # there are 19 files listed in the RECORD file, but we only get the - # first part of the path. All 19 files fall under these two directories - assert sorted(httplib_req.files) == ["httplib2", "httplib2-0.22.0.dist-info"] - - assert "Zendesk" in requirements_with_files - zendesk_req = requirements_with_files["Zendesk"] - assert zendesk_req.requirement.name == "Zendesk" - assert zendesk_req.requirement.specifier is True - assert zendesk_req.requirement.specs == [("==", "1.1.1")] - assert sorted(zendesk_req.files) == ["Zendesk-1.1.1.dist-info", "zendesk"] - - assert "azure-core" in requirements_with_files - azcore_req = requirements_with_files["azure-core"] - assert azcore_req.requirement.name == "azure-core" - assert azcore_req.requirement.specifier is True - assert azcore_req.requirement.specs == [("==", "1.29.5")] - assert sorted(azcore_req.files) == ["azure/core", "azure_core-1.29.5.dist-info"] - - assert "azure-eventhub" in requirements_with_files - azehub_req = requirements_with_files["azure-eventhub"] - assert azehub_req.requirement.name == "azure-eventhub" - assert azehub_req.requirement.specifier is True - assert azehub_req.requirement.specs == [("==", "5.11.5")] - assert sorted(azehub_req.files) == [ - "azure/eventhub", - "azure_eventhub-5.11.5.dist-info", - ] - - def test_deduplicate_and_sort_reqs(): packages = [ Requirement.parse("d"), diff --git a/tests_integration/test_package.py b/tests_integration/test_package.py index 9b78cfcd32..cdfe9faea3 100644 --- a/tests_integration/test_package.py +++ b/tests_integration/test_package.py @@ -1,4 +1,5 @@ import os +import sys import tempfile from pathlib import Path from typing import List @@ -47,17 +48,23 @@ def test_package_upload(self, runner, snowflake_session, test_database): @pytest.mark.integration def test_package_create_with_non_anaconda_package(self, directory_for_test, runner): result = runner.invoke_with_connection_json( - ["snowpark", "package", "create", "dummy_pkg_for_tests_with_deps", "-y"] + [ + "snowpark", + "package", + "create", + "dummy-pkg-for-tests-with-deps", + "--pypi-download", + ] ) assert result.exit_code == 0 - assert os.path.isfile("dummy_pkg_for_tests_with_deps.zip") + assert Path("dummy-pkg-for-tests-with-deps.zip").is_file() assert "dummy_pkg_for_tests/shrubbery.py" in self._get_filenames_from_zip( - "dummy_pkg_for_tests_with_deps.zip" + "dummy-pkg-for-tests-with-deps.zip" ) assert ( "dummy_pkg_for_tests_with_deps/shrubbery.py" - in self._get_filenames_from_zip("dummy_pkg_for_tests_with_deps.zip") + in self._get_filenames_from_zip("dummy-pkg-for-tests-with-deps.zip") ) @pytest.mark.integration @@ -75,7 +82,13 @@ def test_package_create_with_non_anaconda_package_without_install( @pytest.mark.integration def test_create_package_with_deps(self, directory_for_test, runner): result = runner.invoke_with_connection_json( - ["snowpark", "package", "create", "dummy_pkg_for_tests_with_deps", "-y"] + [ + "snowpark", + "package", + "create", + "dummy_pkg_for_tests_with_deps", + "--pypi-download", + ] ) assert result.exit_code == 0 @@ -85,7 +98,80 @@ def test_create_package_with_deps(self, directory_for_test, runner): ) files = self._get_filenames_from_zip("dummy_pkg_for_tests_with_deps.zip") - assert "dummy_pkg_for_tests/shrubbery.py" in files + assert any(["shrubbery.py" in file for file in files]) + + @pytest.mark.integration + def test_package_with_conda_dependencies( + self, directory_for_test, runner + ): # TODO think how to make this test with packages controlled by us + # test case is: We have a non-conda package, that has a dependency present on conda + # but not in latest version - here the case is matplotlib. + result = runner.invoke_with_connection_json( + [ + "snowpark", + "package", + "create", + "july", + "--pypi-download", + "--allow-native-libraries", + "yes", + ] + ) + + assert result.exit_code == 0 + assert Path("july.zip").exists() + + files = self._get_filenames_from_zip("july.zip") + assert any(["colormaps.py" in name for name in files]) + assert not any(["matplotlib" in name for name in files]) + + @pytest.mark.integration + def test_package_from_github(self, directory_for_test, runner): + result = runner.invoke_with_connection_json( + [ + "snowpark", + "package", + "create", + "git+https://github.com/sfc-gh-turbaszek/dummy-pkg-for-tests-with-deps.git", + "--pypi-download", + ] + ) + + assert result.exit_code == 0 + assert Path("dummy-pkg-for-tests-with-deps.zip").exists() + + files = self._get_filenames_from_zip("dummy-pkg-for-tests-with-deps.zip") + + assert any( + ["dummy_pkg_for_tests_with_deps-1.0.dist-info" in file for file in files] + ) + assert any(["dummy_pkg_for_tests-1.0.dist-info" in file for file in files]) + + @pytest.mark.integration + @pytest.mark.skipif( + sys.platform.startswith("win"), + reason="Windows version of PyGame has no native libraries", + ) + def test_package_with_native_libraries(self, directory_for_test, runner): + """ + We use PyGame in this test for two reasons: + 1. We need a package with native libraries to trigger warning + 2. This package should not be avaiable on Snowflake Conda channel. + As it is highly unlikely, that PyGame will be included in the channel, it's probably ok to leave it for now, + but we may reconsider switch to a package controlled by us. + """ + result = runner.invoke_with_connection( + [ + "snowpark", + "package", + "create", + "pygame", + "--pypi-download", + ] + ) + + assert result.exit_code == 0 + assert "at https://support.anaconda.com/" in result.output @pytest.fixture(scope="function") def directory_for_test(self): diff --git a/tests_integration/test_snowpark.py b/tests_integration/test_snowpark.py index c55cc684ba..9b75f9c73d 100644 --- a/tests_integration/test_snowpark.py +++ b/tests_integration/test_snowpark.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os from decimal import Decimal from pathlib import Path from textwrap import dedent diff --git a/tests_integration/test_streamlit.py b/tests_integration/test_streamlit.py index 06ec0f0b63..a6f865fda6 100644 --- a/tests_integration/test_streamlit.py +++ b/tests_integration/test_streamlit.py @@ -150,7 +150,7 @@ def test_streamlit_deploy_experimental_twice( @pytest.mark.integration def test_streamlit_is_visible_in_anaconda_channel(): - from requirements.requirement import Requirement + from snowflake.cli.plugins.snowpark.models import Requirement from snowflake.cli.plugins.snowpark.package_utils import parse_anaconda_packages streamlit = Requirement.parse_line("streamlit") diff --git a/tests_integration/testing_utils/snowpark_utils.py b/tests_integration/testing_utils/snowpark_utils.py index 0c33ede3a5..8ef347c0d5 100644 --- a/tests_integration/testing_utils/snowpark_utils.py +++ b/tests_integration/testing_utils/snowpark_utils.py @@ -222,7 +222,7 @@ def object_drop_should_finish_successfully( def package_should_build_proper_artifact(self, package_name: str, file_name: str): result = self._setup.runner.invoke_with_connection_json( - ["snowpark", "package", "create", package_name, "-y"] + ["snowpark", "package", "create", package_name, "--pypi-download"] ) assert result.exit_code == 0 From dd2358f4579f32738ff8261fd80371b4ece5485c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Mar 2024 10:57:02 +0100 Subject: [PATCH 2/2] Bump pytest from 8.0.2 to 8.1.1 (#891) Bumps [pytest](https://github.com/pytest-dev/pytest) from 8.0.2 to 8.1.1. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/8.0.2...8.1.1) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 00af57691e..337ee507b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ classifiers = [ development = [ "coverage==7.4.3", "pre-commit>=3.5.0", - "pytest==8.0.2", + "pytest==8.1.1", "pytest-randomly==3.15.0", "syrupy==4.6.1", ]