From f8ba64069ee630243f63403ef30c18f798ec99b9 Mon Sep 17 00:00:00 2001 From: thesayyn Date: Mon, 18 Sep 2023 14:40:39 -0700 Subject: [PATCH] refactor: clean up oci_pull --- oci/private/pull.bzl | 54 +++++++++++++++++++++---------------------- oci/tests/BUILD.bazel | 1 + 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/oci/private/pull.bzl b/oci/private/pull.bzl index 739523e4..4b0a84c2 100644 --- a/oci/private/pull.bzl +++ b/oci/private/pull.bzl @@ -242,7 +242,7 @@ def _download(rctx, state, identifier, output, resource, download_fn = download. if identifier.startswith("sha256:"): sha256 = identifier[len("sha256:"):] else: - util.warning(rctx, """fetching from %s without an integrity hash. The result will not be cached.""" % registry_url) + util.warning(rctx, """Fetching from {} without an integrity hash. The result will not be cached.""".format(registry_url)) return download_fn( rctx, @@ -268,7 +268,7 @@ def _download_manifest(rctx, state, identifier, output): digest = "sha256:{}".format(result.sha256) if manifest["schemaVersion"] == 1: util.warning(rctx, """\ -registry responded with a manifest that has schemaVersion=1. Usually happens when fetching from a registry that requires `Docker-Distribution-API-Version` header to be set. +Registry responded with a manifest that has `schemaVersion=1`. Usually happens when fetching from a registry that requires `Docker-Distribution-API-Version` header to be set. Falling back to using `curl`. See https://github.com/bazelbuild/bazel/issues/17829 for the context.""") fallback_to_curl = True else: @@ -319,7 +319,7 @@ def _create_downloader(rctx): download_manifest = lambda identifier, output: _download_manifest(rctx, state, identifier, output), ) -_build_file = """\ +_BUILD_FILE_TMPL = """\ "Generated by oci_pull" load("@aspect_bazel_lib//lib:copy_to_directory.bzl", "copy_to_directory") @@ -332,7 +332,7 @@ package(default_visibility = ["//visibility:public"]) copy_file( name = "manifest", src = "manifest.json", - out = "{image_digest}" + out = "{digest}" ) copy_to_directory( @@ -341,7 +341,7 @@ copy_to_directory( out = "blobs/sha256", include_external_repositories = ["*"], srcs = {tars} + [ - ":{image_digest}", + ":{digest}", ":{config_file}", ], ) @@ -375,39 +375,38 @@ def _find_platform_manifest(image_mf, platform_wanted): def _oci_pull_impl(rctx): downloader = _create_downloader(rctx) - mf, mf_len, mf_digest = downloader.download_manifest(rctx.attr.identifier, "manifest.json") + manifest, size, digest = downloader.download_manifest(rctx.attr.identifier, "manifest.json") - if mf["mediaType"] in _SUPPORTED_MEDIA_TYPES["manifest"]: + if manifest["mediaType"] in _SUPPORTED_MEDIA_TYPES["manifest"]: if rctx.attr.platform: fail("{}/{} is a single-architecture image, so attribute 'platforms' should not be set.".format(rctx.attr.registry, rctx.attr.repository)) - - image_mf = mf - image_mf_len = mf_len - image_digest = mf_digest - - elif mf["mediaType"] in _SUPPORTED_MEDIA_TYPES["index"]: - # extra download to get the manifest for the selected arch + + elif manifest["mediaType"] in _SUPPORTED_MEDIA_TYPES["index"]: if not rctx.attr.platform: fail("{}/{} is a multi-architecture image, so attribute 'platforms' is required.".format(rctx.attr.registry, rctx.attr.repository)) - matching_mf = _find_platform_manifest(mf, rctx.attr.platform) - if not matching_mf: + + matching_manifest = _find_platform_manifest(manifest, rctx.attr.platform) + if not matching_manifest: fail("No matching manifest found in image {}/{} for platform {}".format(rctx.attr.registry, rctx.attr.repository, rctx.attr.platform)) - image_mf, image_mf_len, image_digest = downloader.download_manifest(matching_mf["digest"], "manifest.json") + # extra download to get the manifest for the target platform + manifest, size, digest = downloader.download_manifest(matching_manifest["digest"], "manifest.json") else: - fail("Unrecognized mediaType {} in manifest file".format(mf["mediaType"])) + fail("Unrecognized mediaType {} in manifest file".format(manifest["mediaType"])) - image_config_file = _trim_hash_algorithm(image_mf["config"]["digest"]) - downloader.download_blob(image_mf["config"]["digest"], image_config_file) + # download the image config + config_file = _trim_hash_algorithm(manifest["config"]["digest"]) + downloader.download_blob(manifest["config"]["digest"], config_file) + # download all layers + # TODO: we should avoid eager-download of the layers ("shallow pull") tars = [] - for layer in image_mf["layers"]: - hash = _trim_hash_algorithm(layer["digest"]) + for layer in manifest["layers"]: + blob_digest = _trim_hash_algorithm(layer["digest"]) + downloader.download_blob(layer["digest"], blob_digest) - # TODO: we should avoid eager-download of the layers ("shallow pull") - downloader.download_blob(layer["digest"], hash) - if hash not in tars: - tars.append(hash) + if blob_digest not in tars: + tars.append(blob_digest) rctx.file("index.json", util.build_manifest_json( media_type = image_mf["mediaType"], @@ -488,8 +487,7 @@ def _oci_alias_impl(rctx): util.warning(rctx, """\ For reproducible builds, a digest is recommended. -Either set 'reproducible = False' to silence this warning, -or run the following command to change {rule} to use a digest: +Either set 'reproducible = False' to silence this warning, or run the following command to change {rule} to use a digest: {warning} buildozer 'set digest "{digest}"' 'remove tag' 'remove platforms' {optional_platforms} {location} diff --git a/oci/tests/BUILD.bazel b/oci/tests/BUILD.bazel index 00e0ba7d..52687677 100644 --- a/oci/tests/BUILD.bazel +++ b/oci/tests/BUILD.bazel @@ -32,6 +32,7 @@ IMAGES_TO_TEST = { tags = ["requires-network"], toolchains = [ "@oci_crane_toolchains//:current_toolchain", + "@jq_toolchains//:resolved_toolchain" ], visibility = ["//visibility:public"], )