From d775bce2671d9799f4d553e2e21597b4fefa97c8 Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Wed, 5 Jun 2024 15:22:59 -0700 Subject: [PATCH] Support gs URLs in SignedURLService gs is the same as s3 but specifies Google Cloud Storage. Support either URL format when generating signed URLs in safir.gcs. --- .../20240605_152335_rra_DM_44606_queue.md | 3 +++ src/safir/gcs.py | 20 +++++++++---------- tests/gcs_test.py | 4 ++++ 3 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 changelog.d/20240605_152335_rra_DM_44606_queue.md diff --git a/changelog.d/20240605_152335_rra_DM_44606_queue.md b/changelog.d/20240605_152335_rra_DM_44606_queue.md new file mode 100644 index 00000000..737f20ea --- /dev/null +++ b/changelog.d/20240605_152335_rra_DM_44606_queue.md @@ -0,0 +1,3 @@ +### New features + +- Add support for `gs` URLs to `safir.gcs.SignedURLService`. diff --git a/src/safir/gcs.py b/src/safir/gcs.py index d959f0cc..869edbdf 100644 --- a/src/safir/gcs.py +++ b/src/safir/gcs.py @@ -16,13 +16,13 @@ class SignedURLService: """Generate signed URLs for Google Cloud Storage blobs. Uses default credentials plus credential impersonation to generate signed - URLs for Google Cloud Storage blobs. This is the correct approach when + URLs for Google Cloud Storage blobs. This is the correct approach when running as a Kubernetes pod using workload identity. Parameters ---------- service_account - The service account to use to sign the URLs. The workload identity + The service account to use to sign the URLs. The workload identity must have access to generate service account tokens for that service account. lifetime @@ -32,7 +32,7 @@ class SignedURLService: ----- The workload identity (or other default credentials) under which the caller is running must have ``roles/iam.serviceAccountTokenCreator`` on - the service account given in the ``service_account`` parameter. This is + the service account given in the ``service_account`` parameter. This is how a workload identity can retrieve a key that can be used to create a signed URL. @@ -54,9 +54,9 @@ def signed_url(self, uri: str, mime_type: str | None) -> str: Parameters ---------- uri - URI for the storage object. This must start with ``s3://`` and - use the S3 URI syntax to specify bucket and blob of a Google - Cloud Storage object. + URI for the storage object. This must start with ``s3://`` or + ``gs://`` and use that URI syntax to specify bucket and blob of a + Google Cloud Storage object. mime_type MIME type of the object, for encoding in the signed URL. @@ -69,17 +69,17 @@ def signed_url(self, uri: str, mime_type: str | None) -> str: Raises ------ ValueError - The ``uri`` parameter is not an S3 URI. + Raised if the ``uri`` parameter is not an S3 or GCS URI. Notes ----- This is inefficient, since it gets new signing credentials each time - it generates a signed URL. Doing better will require figuring out the + it generates a signed URL. Doing better will require figuring out the lifetime and refreshing the credentials when the lifetime has expired. """ parsed_uri = urlparse(uri) - if parsed_uri.scheme != "s3": - raise ValueError(f"URI {uri} is not an S3 URI") + if parsed_uri.scheme not in ("s3", "gs"): + raise ValueError(f"URI {uri} is not an s3 or gs URI") bucket = self._gcs.bucket(parsed_uri.netloc) blob = bucket.blob(parsed_uri.path[1:]) signing_credentials = impersonated_credentials.Credentials( diff --git a/tests/gcs_test.py b/tests/gcs_test.py index 22efb2d4..0bcda10c 100644 --- a/tests/gcs_test.py +++ b/tests/gcs_test.py @@ -15,6 +15,10 @@ def test_signed_url(mock_gcs: MockStorageClient) -> None: url = url_service.signed_url("s3://some-bucket/path/to/blob", "text/plain") assert url == "https://example.com/path/to/blob" + # Test the same with a gs URL. + url = url_service.signed_url("gs://some-bucket/path/to/blob", "text/plain") + assert url == "https://example.com/path/to/blob" + # Test that the lifetime is passed down to the mock, which will reject it # if it's not an hour. url_service = SignedURLService("foo", timedelta(minutes=30))