Skip to content

Commit

Permalink
refactor(pypi): further cleanup of pip.parse code (#2534)
Browse files Browse the repository at this point in the history
Summary:
- Move the `whl_library` creation into a separate function and remove
  the `TODO` note.
- Move the creation of the `get_index_urls` functions into outer
  `parse_modules` function and simplify the reproducible extension
  setting logic.
- Remove the `prefix` parameter from the `*repo_name` functions.
- Add an extra error message, for ensuring that invariants are met.

Work towards #260
  • Loading branch information
aignas authored Dec 27, 2024
1 parent a632044 commit 2136215
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 109 deletions.
213 changes: 116 additions & 97 deletions python/private/pypi/extension.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,17 @@ def _create_whl_repos(
*,
pip_attr,
whl_overrides,
simpleapi_cache,
evaluate_markers = evaluate_markers,
available_interpreters = INTERPRETER_LABELS,
simpleapi_download = simpleapi_download):
get_index_urls = None):
"""create all of the whl repositories
Args:
module_ctx: {type}`module_ctx`.
pip_attr: {type}`struct` - the struct that comes from the tag class iteration.
whl_overrides: {type}`dict[str, struct]` - per-wheel overrides.
simpleapi_cache: {type}`dict` - an opaque dictionary used for caching the results from calling
SimpleAPI evaluating all of the tag class invocations {bzl:obj}`pip.parse`.
evaluate_markers: the function to use to evaluate markers.
simpleapi_download: Used for testing overrides
get_index_urls: A function used to get the index URLs
available_interpreters: {type}`dict[str, Label]` The dictionary of available
interpreters that have been registered using the `python` bzlmod extension.
The keys are in the form `python_{snake_case_version}_host`. This is to be
Expand All @@ -96,12 +93,9 @@ def _create_whl_repos(
aparent repository names for the hub repo and the values are the
arguments that will be passed to {bzl:obj}`whl_library` repository
rule.
is_reproducible: {type}`bool` set to True if does not make calls to the
internet to evaluate the requirements files.
"""
logger = repo_utils.logger(module_ctx, "pypi:create_whl_repos")
python_interpreter_target = pip_attr.python_interpreter_target
is_reproducible = True

# containers to aggregate outputs from this function
whl_map = {}
Expand Down Expand Up @@ -158,26 +152,6 @@ def _create_whl_repos(
whl_group_mapping = {}
requirement_cycles = {}

# Create a new wheel library for each of the different whls

get_index_urls = None
if pip_attr.experimental_index_url:
get_index_urls = lambda ctx, distributions: simpleapi_download(
ctx,
attr = struct(
index_url = pip_attr.experimental_index_url,
extra_index_urls = pip_attr.experimental_extra_index_urls or [],
index_url_overrides = pip_attr.experimental_index_url_overrides or {},
sources = distributions,
envsubst = pip_attr.envsubst,
# Auth related info
netrc = pip_attr.netrc,
auth_patterns = pip_attr.auth_patterns,
),
cache = simpleapi_cache,
parallel_download = pip_attr.parallel_download,
)

requirements_by_platform = parse_requirements(
module_ctx,
requirements_by_platform = requirements_files_by_platform(
Expand Down Expand Up @@ -258,77 +232,27 @@ def _create_whl_repos(
if v != default
})

# TODO @aignas 2024-05-26: move to a separate function
for requirement in requirements:
dists = requirement.whls
if not pip_attr.download_only and requirement.sdist:
dists = dists + [requirement.sdist]

for distribution in dists:
args = dict(whl_library_args)
if pip_attr.netrc:
args["netrc"] = pip_attr.netrc
if pip_attr.auth_patterns:
args["auth_patterns"] = pip_attr.auth_patterns

if not distribution.filename.endswith(".whl"):
# pip is not used to download wheels and the python
# `whl_library` helpers are only extracting things, however
# for sdists, they will be built by `pip`, so we still
# need to pass the extra args there.
args["extra_pip_args"] = requirement.extra_pip_args

# This is no-op because pip is not used to download the wheel.
args.pop("download_only", None)

repo_name = whl_repo_name(pip_name, distribution.filename, distribution.sha256)
args["requirement"] = requirement.srcs.requirement
args["urls"] = [distribution.url]
args["sha256"] = distribution.sha256
args["filename"] = distribution.filename
args["experimental_target_platforms"] = requirement.target_platforms

# Pure python wheels or sdists may need to have a platform here
target_platforms = None
if distribution.filename.endswith("-any.whl") or not distribution.filename.endswith(".whl"):
if len(requirements) > 1:
target_platforms = requirement.target_platforms
for repo_name, (args, config_setting) in _whl_repos(
requirement = requirement,
whl_library_args = whl_library_args,
download_only = pip_attr.download_only,
netrc = pip_attr.netrc,
auth_patterns = pip_attr.auth_patterns,
python_version = major_minor,
multiple_requirements_for_whl = len(requirements) > 1.,
).items():
repo_name = "{}_{}".format(pip_name, repo_name)
if repo_name in whl_libraries:
fail("Attempting to creating a duplicate library {} for {}".format(
repo_name,
whl_name,
))

whl_libraries[repo_name] = args

whl_map.setdefault(whl_name, {})[whl_config_setting(
version = major_minor,
filename = distribution.filename,
target_platforms = target_platforms,
)] = repo_name

if dists:
is_reproducible = False
continue

# Fallback to a pip-installed wheel
args = dict(whl_library_args) # make a copy
args["requirement"] = requirement.srcs.requirement_line
if requirement.extra_pip_args:
args["extra_pip_args"] = requirement.extra_pip_args

if pip_attr.download_only:
args.setdefault("experimental_target_platforms", requirement.target_platforms)

target_platforms = requirement.target_platforms if len(requirements) > 1 else []
repo_name = pypi_repo_name(
pip_name,
whl_name,
*target_platforms
)
whl_libraries[repo_name] = args
whl_map.setdefault(whl_name, {})[whl_config_setting(
version = major_minor,
target_platforms = target_platforms or None,
)] = repo_name
whl_map.setdefault(whl_name, {})[config_setting] = repo_name

return struct(
is_reproducible = is_reproducible,
whl_map = whl_map,
exposed_packages = {
whl_name: None
Expand All @@ -339,11 +263,88 @@ def _create_whl_repos(
whl_libraries = whl_libraries,
)

def parse_modules(module_ctx, _fail = fail, **kwargs):
def _whl_repos(*, requirement, whl_library_args, download_only, netrc, auth_patterns, multiple_requirements_for_whl = False, python_version):
ret = {}

dists = requirement.whls
if not download_only and requirement.sdist:
dists = dists + [requirement.sdist]

for distribution in dists:
args = dict(whl_library_args)
if netrc:
args["netrc"] = netrc
if auth_patterns:
args["auth_patterns"] = auth_patterns

if not distribution.filename.endswith(".whl"):
# pip is not used to download wheels and the python
# `whl_library` helpers are only extracting things, however
# for sdists, they will be built by `pip`, so we still
# need to pass the extra args there.
args["extra_pip_args"] = requirement.extra_pip_args

# This is no-op because pip is not used to download the wheel.
args.pop("download_only", None)

args["requirement"] = requirement.srcs.requirement
args["urls"] = [distribution.url]
args["sha256"] = distribution.sha256
args["filename"] = distribution.filename
args["experimental_target_platforms"] = requirement.target_platforms

# Pure python wheels or sdists may need to have a platform here
target_platforms = None
if distribution.filename.endswith("-any.whl") or not distribution.filename.endswith(".whl"):
if multiple_requirements_for_whl:
target_platforms = requirement.target_platforms

repo_name = whl_repo_name(
distribution.filename,
distribution.sha256,
)
ret[repo_name] = (
args,
whl_config_setting(
version = python_version,
filename = distribution.filename,
target_platforms = target_platforms,
),
)

if ret:
return ret

# Fallback to a pip-installed wheel
args = dict(whl_library_args) # make a copy
args["requirement"] = requirement.srcs.requirement_line
if requirement.extra_pip_args:
args["extra_pip_args"] = requirement.extra_pip_args

if download_only:
args.setdefault("experimental_target_platforms", requirement.target_platforms)

target_platforms = requirement.target_platforms if multiple_requirements_for_whl else []
repo_name = pypi_repo_name(
normalize_name(requirement.distribution),
*target_platforms
)
ret[repo_name] = (
args,
whl_config_setting(
version = python_version,
target_platforms = target_platforms or None,
),
)

return ret

def parse_modules(module_ctx, _fail = fail, simpleapi_download = simpleapi_download, **kwargs):
"""Implementation of parsing the tag classes for the extension and return a struct for registering repositories.
Args:
module_ctx: {type}`module_ctx` module context.
simpleapi_download: Used for testing overrides
_fail: {type}`function` the failure function, mainly for testing.
**kwargs: Extra arguments passed to the layers below.
Expand Down Expand Up @@ -460,10 +461,29 @@ You cannot use both the additive_build_content and additive_build_content_file a
else:
pip_hub_map[pip_attr.hub_name].python_versions.append(pip_attr.python_version)

get_index_urls = None
if pip_attr.experimental_index_url:
is_reproducible = False
get_index_urls = lambda ctx, distributions: simpleapi_download(
ctx,
attr = struct(
index_url = pip_attr.experimental_index_url,
extra_index_urls = pip_attr.experimental_extra_index_urls or [],
index_url_overrides = pip_attr.experimental_index_url_overrides or {},
sources = distributions,
envsubst = pip_attr.envsubst,
# Auth related info
netrc = pip_attr.netrc,
auth_patterns = pip_attr.auth_patterns,
),
cache = simpleapi_cache,
parallel_download = pip_attr.parallel_download,
)

out = _create_whl_repos(
module_ctx,
pip_attr = pip_attr,
simpleapi_cache = simpleapi_cache,
get_index_urls = get_index_urls,
whl_overrides = whl_overrides,
**kwargs
)
Expand All @@ -476,7 +496,6 @@ You cannot use both the additive_build_content and additive_build_content_file a
extra_aliases[hub_name].setdefault(whl_name, {}).update(aliases)
exposed_packages.setdefault(hub_name, {}).update(out.exposed_packages)
whl_libraries.update(out.whl_libraries)
is_reproducible = is_reproducible and out.is_reproducible

# TODO @aignas 2024-04-05: how do we support different requirement
# cycles for different abis/oses? For now we will need the users to
Expand Down
9 changes: 3 additions & 6 deletions python/private/pypi/whl_repo_name.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
load("//python/private:normalize_name.bzl", "normalize_name")
load(":parse_whl_name.bzl", "parse_whl_name")

def whl_repo_name(prefix, filename, sha256):
def whl_repo_name(filename, sha256):
"""Return a valid whl_library repo name given a distribution filename.
Args:
prefix: {type}`str` the prefix of the whl_library.
filename: {type}`str` the filename of the distribution.
sha256: {type}`str` the sha256 of the distribution.
Returns:
a string that can be used in {obj}`whl_library`.
"""
parts = [prefix]
parts = []

if not filename.endswith(".whl"):
# Then the filename is basically foo-3.2.1.<ext>
Expand All @@ -51,19 +50,17 @@ def whl_repo_name(prefix, filename, sha256):

return "_".join(parts)

def pypi_repo_name(prefix, whl_name, *target_platforms):
def pypi_repo_name(whl_name, *target_platforms):
"""Return a valid whl_library given a requirement line.
Args:
prefix: {type}`str` the prefix of the whl_library.
whl_name: {type}`str` the whl_name to use.
*target_platforms: {type}`list[str]` the target platforms to use in the name.
Returns:
{type}`str` that can be used in {obj}`whl_library`.
"""
parts = [
prefix,
normalize_name(whl_name),
]
parts.extend([p.partition("_")[-1] for p in target_platforms])
Expand Down
11 changes: 5 additions & 6 deletions tests/pypi/whl_repo_name/whl_repo_name_tests.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,25 @@ load("//python/private/pypi:whl_repo_name.bzl", "whl_repo_name") # buildifier:
_tests = []

def _test_simple(env):
got = whl_repo_name("prefix", "foo-1.2.3-py3-none-any.whl", "deadbeef")
env.expect.that_str(got).equals("prefix_foo_py3_none_any_deadbeef")
got = whl_repo_name("foo-1.2.3-py3-none-any.whl", "deadbeef")
env.expect.that_str(got).equals("foo_py3_none_any_deadbeef")

_tests.append(_test_simple)

def _test_sdist(env):
got = whl_repo_name("prefix", "foo-1.2.3.tar.gz", "deadbeef000deadbeef")
env.expect.that_str(got).equals("prefix_foo_sdist_deadbeef")
got = whl_repo_name("foo-1.2.3.tar.gz", "deadbeef000deadbeef")
env.expect.that_str(got).equals("foo_sdist_deadbeef")

_tests.append(_test_sdist)

def _test_platform_whl(env):
got = whl_repo_name(
"prefix",
"foo-1.2.3-cp39.cp310-abi3-manylinux1_x86_64.manylinux_2_17_x86_64.whl",
"deadbeef000deadbeef",
)

# We only need the first segment of each
env.expect.that_str(got).equals("prefix_foo_cp39_abi3_manylinux_2_5_x86_64_deadbeef")
env.expect.that_str(got).equals("foo_cp39_abi3_manylinux_2_5_x86_64_deadbeef")

_tests.append(_test_platform_whl)

Expand Down

0 comments on commit 2136215

Please sign in to comment.