diff --git a/CHANGELOG.md b/CHANGELOG.md index f82e062c8f3..6bd33d93fb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,12 @@ as necessary. Empty sections will not end in the release notes. ### Changes +- The base `location` of a new entity (e.g. tables) created via Iceberg REST is derived from the nearest + parent namespace that has an explicitly set `location` property. (Path separator character is `/`.) +- The `location` property on tables (and view) created via Iceberg REST may be explicitly configured, as + long as it can be resolved against the configured object storage locations. (Path separator character + is `/`.) + ### Deprecations ### Fixes diff --git a/catalog/files/api/src/main/java/org/projectnessie/catalog/files/api/ObjectIO.java b/catalog/files/api/src/main/java/org/projectnessie/catalog/files/api/ObjectIO.java index cf453ce08ea..cb1c27beee4 100644 --- a/catalog/files/api/src/main/java/org/projectnessie/catalog/files/api/ObjectIO.java +++ b/catalog/files/api/src/main/java/org/projectnessie/catalog/files/api/ObjectIO.java @@ -20,6 +20,7 @@ import java.io.OutputStream; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.BiConsumer; import java.util.function.BooleanSupplier; import org.projectnessie.storage.uri.StorageUri; @@ -60,4 +61,11 @@ void trinoSampleConfig( StorageUri warehouse, Map icebergConfig, BiConsumer properties); + + /** + * Checks whether the given storage URI can be resolved. + * + * @return an empty optional, if the URI can be resolved, otherwise an error message. + */ + Optional canResolve(StorageUri uri); } diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/DelegatingObjectIO.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/DelegatingObjectIO.java index 30be0a75a22..d13ac6bbe23 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/DelegatingObjectIO.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/DelegatingObjectIO.java @@ -19,6 +19,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.Map; +import java.util.Optional; import java.util.function.BiConsumer; import java.util.function.BooleanSupplier; import org.projectnessie.catalog.files.api.ObjectIO; @@ -43,6 +44,16 @@ public InputStream readObject(StorageUri uri) throws IOException { return resolve(uri).readObject(uri); } + @Override + public Optional canResolve(StorageUri uri) { + try { + ObjectIO resolved = resolve(uri); + return resolved.canResolve(uri); + } catch (IllegalArgumentException e) { + return Optional.of(e.getMessage()); + } + } + @Override public void configureIcebergWarehouse( StorageUri warehouse, diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsClientSupplier.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsClientSupplier.java index 4b352e4b0ac..8f4aeec40b8 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsClientSupplier.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsClientSupplier.java @@ -84,10 +84,6 @@ AdlsOptions adlsOptions() { return adlsOptions; } - SecretsProvider secretsProvider() { - return secretsProvider; - } - public DataLakeFileClient fileClientForLocation(StorageUri uri) { AdlsLocation location = adlsLocation(uri); diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsObjectIO.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsObjectIO.java index 9d72334207c..e4aa2311bce 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsObjectIO.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/adls/AdlsObjectIO.java @@ -116,6 +116,16 @@ public void deleteObjects(List uris) throws IOException { } } + @Override + public Optional canResolve(StorageUri uri) { + try { + DataLakeFileClient file = clientSupplier.fileClientForLocation(uri); + return file != null ? Optional.empty() : Optional.of("ADLS client could not be constructed"); + } catch (IllegalArgumentException e) { + return Optional.of(e.getMessage()); + } + } + @Override public void configureIcebergWarehouse( StorageUri warehouse, diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/gcs/GcsObjectIO.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/gcs/GcsObjectIO.java index 227cc490bbf..08ce2a1e7c2 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/gcs/GcsObjectIO.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/gcs/GcsObjectIO.java @@ -157,6 +157,19 @@ public void deleteObjects(List uris) { } } + @Override + public Optional canResolve(StorageUri uri) { + try { + GcsLocation location = gcsLocation(uri); + GcsBucketOptions bucketOptions = storageSupplier.bucketOptions(location); + @SuppressWarnings("resource") + Storage client = storageSupplier.forLocation(bucketOptions); + return client != null ? Optional.empty() : Optional.of("GCS client could not be constructed"); + } catch (IllegalArgumentException e) { + return Optional.of(e.getMessage()); + } + } + @Override public void configureIcebergWarehouse( StorageUri warehouse, diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/local/LocalObjectIO.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/local/LocalObjectIO.java index 6ac6435f676..f86962d4ef1 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/local/LocalObjectIO.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/local/LocalObjectIO.java @@ -25,6 +25,7 @@ import java.nio.file.Paths; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.function.BiConsumer; import java.util.function.BooleanSupplier; import org.projectnessie.catalog.files.api.ObjectIO; @@ -97,6 +98,16 @@ public void trinoSampleConfig( Map icebergConfig, BiConsumer properties) {} + @Override + public Optional canResolve(StorageUri uri) { + Path path = LocalObjectIO.filePath(uri); + // We expect a directory here (or non-existing here), because the URI is meant to store other + // files + return Files.isRegularFile(path) + ? Optional.of(path + " does must not be file") + : Optional.empty(); + } + private static Path filePath(StorageUri uri) { return Paths.get(uri.requiredPath()); } diff --git a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/s3/S3ObjectIO.java b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/s3/S3ObjectIO.java index 6603d3617b5..05b6eb9834a 100644 --- a/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/s3/S3ObjectIO.java +++ b/catalog/files/impl/src/main/java/org/projectnessie/catalog/files/s3/S3ObjectIO.java @@ -135,6 +135,23 @@ public void deleteObjects(List uris) { } } + @Override + public Optional canResolve(StorageUri uri) { + String scheme = uri.scheme(); + if (!isS3scheme(scheme)) { + return Optional.of("Not an S3 URI"); + } + + try { + S3Client s3client = s3clientSupplier.getClient(uri); + return s3client != null + ? Optional.empty() + : Optional.of("S3 client could not be constructed"); + } catch (IllegalArgumentException e) { + return Optional.of(e.getMessage()); + } + } + private static String withoutLeadingSlash(StorageUri uri) { String path = uri.requiredPath(); return path.startsWith("/") ? path.substring(1) : path; diff --git a/catalog/format/iceberg/src/main/java/org/projectnessie/catalog/formats/iceberg/nessie/NessieModelIceberg.java b/catalog/format/iceberg/src/main/java/org/projectnessie/catalog/formats/iceberg/nessie/NessieModelIceberg.java index c5ed952448b..9ba4b9c6617 100644 --- a/catalog/format/iceberg/src/main/java/org/projectnessie/catalog/formats/iceberg/nessie/NessieModelIceberg.java +++ b/catalog/format/iceberg/src/main/java/org/projectnessie/catalog/formats/iceberg/nessie/NessieModelIceberg.java @@ -128,10 +128,8 @@ import org.projectnessie.catalog.model.statistics.NessiePartitionStatisticsFile; import org.projectnessie.catalog.model.statistics.NessieStatisticsFile; import org.projectnessie.model.Content; -import org.projectnessie.model.ContentKey; import org.projectnessie.model.IcebergTable; import org.projectnessie.model.IcebergView; -import org.projectnessie.model.Namespace; public class NessieModelIceberg { private NessieModelIceberg() {} @@ -795,14 +793,8 @@ public static long safeUnbox(Long value, long defaultValue) { * *

Also: we deliberately ignore the TableProperties.WRITE_METADATA_LOCATION property here. */ - public static String icebergBaseLocation(String warehouseLocation, ContentKey key) { + public static String icebergNewEntityBaseLocation(String baseLocation) { // FIXME escape or remove forbidden chars, cf. #8524 - String baseLocation = warehouseLocation; - Namespace ns = key.getNamespace(); - if (!ns.isEmpty()) { - baseLocation = concatLocation(warehouseLocation, ns.toString()); - } - baseLocation = concatLocation(baseLocation, key.getName()); return baseLocation + "_" + randomUUID(); } diff --git a/catalog/format/iceberg/src/main/java/org/projectnessie/catalog/formats/iceberg/rest/IcebergCatalogOperation.java b/catalog/format/iceberg/src/main/java/org/projectnessie/catalog/formats/iceberg/rest/IcebergCatalogOperation.java index 50c92c26ac2..e9d9c6f5381 100644 --- a/catalog/format/iceberg/src/main/java/org/projectnessie/catalog/formats/iceberg/rest/IcebergCatalogOperation.java +++ b/catalog/format/iceberg/src/main/java/org/projectnessie/catalog/formats/iceberg/rest/IcebergCatalogOperation.java @@ -21,6 +21,7 @@ import com.google.errorprone.annotations.CanIgnoreReturnValue; import jakarta.annotation.Nullable; import java.util.List; +import java.util.Optional; import java.util.function.Function; import java.util.stream.Collectors; import org.immutables.value.Value; @@ -73,6 +74,17 @@ public boolean hasUpdate(Class update) { @Value.Derived public T getSingleUpdateValue( Class update, Function mapper) { + return getSingleUpdate(update, mapper) + .orElseThrow( + () -> + new IllegalArgumentException( + "Could not find any updates of type " + update.getSimpleName())); + } + + @JsonIgnore + @Value.Derived + public Optional getSingleUpdate( + Class update, Function mapper) { return updates().stream() .filter(update::isInstance) .map(update::cast) @@ -81,11 +93,7 @@ public T getSingleUpdateValue( (e1, e2) -> { throw new IllegalArgumentException( "Found multiple updates of type " + update.getSimpleName()); - }) - .orElseThrow( - () -> - new IllegalArgumentException( - "Could not find any updates of type " + update.getSimpleName())); + }); } @Value.Check diff --git a/catalog/service/common/src/main/java/org/projectnessie/catalog/service/api/CatalogService.java b/catalog/service/common/src/main/java/org/projectnessie/catalog/service/api/CatalogService.java index 208e455d8f2..eef0bd18637 100644 --- a/catalog/service/common/src/main/java/org/projectnessie/catalog/service/api/CatalogService.java +++ b/catalog/service/common/src/main/java/org/projectnessie/catalog/service/api/CatalogService.java @@ -18,6 +18,8 @@ import jakarta.annotation.Nullable; import java.net.URI; import java.util.List; +import java.util.Map; +import java.util.Optional; import java.util.concurrent.CompletionStage; import java.util.function.Consumer; import java.util.function.Function; @@ -25,6 +27,7 @@ import java.util.stream.Stream; import org.projectnessie.api.v2.params.ParsedReference; import org.projectnessie.catalog.model.snapshot.NessieEntitySnapshot; +import org.projectnessie.catalog.service.config.WarehouseConfig; import org.projectnessie.error.BaseNessieClientServerException; import org.projectnessie.error.NessieNotFoundException; import org.projectnessie.model.CommitMeta; @@ -32,6 +35,7 @@ import org.projectnessie.model.ContentKey; import org.projectnessie.model.Reference; import org.projectnessie.services.authz.ApiContext; +import org.projectnessie.storage.uri.StorageUri; import org.projectnessie.versioned.RequestMeta; public interface CatalogService { @@ -78,4 +82,21 @@ interface CatalogUriResolver { URI icebergSnapshot( Reference effectiveReference, ContentKey key, NessieEntitySnapshot snapshot); } + + Optional validateStorageLocation(String location); + + StorageUri locationForEntity( + WarehouseConfig warehouse, + ContentKey contentKey, + Content.Type contentType, + ApiContext apiContext, + String refName, + String hash) + throws NessieNotFoundException; + + StorageUri locationForEntity( + WarehouseConfig warehouse, + ContentKey contentKey, + List keysInOrder, + Map contentsMap); } diff --git a/catalog/service/impl/src/main/java/org/projectnessie/catalog/service/impl/CatalogServiceImpl.java b/catalog/service/impl/src/main/java/org/projectnessie/catalog/service/impl/CatalogServiceImpl.java index cc44a774c00..4f066702a41 100644 --- a/catalog/service/impl/src/main/java/org/projectnessie/catalog/service/impl/CatalogServiceImpl.java +++ b/catalog/service/impl/src/main/java/org/projectnessie/catalog/service/impl/CatalogServiceImpl.java @@ -23,9 +23,9 @@ import static org.projectnessie.catalog.formats.iceberg.nessie.IcebergConstants.NESSIE_COMMIT_ID; import static org.projectnessie.catalog.formats.iceberg.nessie.IcebergConstants.NESSIE_COMMIT_REF; import static org.projectnessie.catalog.formats.iceberg.nessie.IcebergConstants.NESSIE_CONTENT_ID; -import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.icebergBaseLocation; import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.icebergMetadataJsonLocation; import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.icebergMetadataToContent; +import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.icebergNewEntityBaseLocation; import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.nessieTableSnapshotToIceberg; import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.nessieViewSnapshotToIceberg; import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.newIcebergTableSnapshot; @@ -39,6 +39,8 @@ import static org.projectnessie.error.ReferenceConflicts.referenceConflicts; import static org.projectnessie.model.Conflict.conflict; import static org.projectnessie.model.Content.Type.ICEBERG_TABLE; +import static org.projectnessie.model.Content.Type.NAMESPACE; +import static org.projectnessie.versioned.RequestMeta.API_READ; import jakarta.annotation.Nullable; import jakarta.enterprise.context.RequestScoped; @@ -100,6 +102,7 @@ import org.projectnessie.model.ContentKey; import org.projectnessie.model.ContentResponse; import org.projectnessie.model.GetMultipleContentsResponse; +import org.projectnessie.model.Namespace; import org.projectnessie.model.Reference; import org.projectnessie.nessie.tasks.api.TasksService; import org.projectnessie.services.authz.AccessContext; @@ -157,6 +160,79 @@ private IcebergStuff icebergStuff() { executor); } + @Override + public Optional validateStorageLocation(String location) { + StorageUri uri = StorageUri.of(location); + return objectIO.canResolve(uri); + } + + @Override + public StorageUri locationForEntity( + WarehouseConfig warehouse, + ContentKey contentKey, + Content.Type contentType, + ApiContext apiContext, + String refName, + String hash) { + List keyElements = contentKey.getElements(); + int keyElementCount = keyElements.size(); + + List keysInOrder = new ArrayList<>(keyElementCount); + for (int i = 0; i < keyElementCount; i++) { + ContentKey key = ContentKey.of(keyElements.subList(0, i + 1)); + keysInOrder.add(key); + } + + GetMultipleContentsResponse namespaces; + try { + namespaces = + contentService(apiContext) + .getMultipleContents(refName, hash, keysInOrder, false, API_READ); + } catch (NessieNotFoundException e) { + throw new RuntimeException(e); + } + Map contentsMap = namespaces.toContentsMap(); + + return locationForEntity(warehouse, contentKey, keysInOrder, contentsMap); + } + + @Override + public StorageUri locationForEntity( + WarehouseConfig warehouse, + ContentKey contentKey, + List keysInOrder, + Map contentsMap) { + List keyElements = contentKey.getElements(); + int keyElementCount = keyElements.size(); + StorageUri location = null; + List remainingElements = List.of(); + + // Find the nearest namespace with a 'location' property and start from there + for (int n = keysInOrder.size() - 1; n >= 0; n--) { + Content parent = contentsMap.get(keysInOrder.get(n)); + if (parent != null && parent.getType().equals(NAMESPACE)) { + Namespace parentNamespace = (Namespace) parent; + String parentLocation = parentNamespace.getProperties().get("location"); + if (parentLocation != null) { + location = StorageUri.of(parentLocation).withTrailingSeparator(); + remainingElements = keyElements.subList(n + 1, keyElementCount); + } + } + } + + // No parent namespace has a 'location' property, start from the warehouse + if (location == null) { + location = StorageUri.of(warehouse.location()).withTrailingSeparator(); + remainingElements = keyElements; + } + + for (String element : remainingElements) { + location = location.withTrailingSeparator().resolve(element); + } + + return location; + } + @Override public Stream>> retrieveSnapshots( SnapshotReqParams reqParams, @@ -430,12 +506,12 @@ CompletionStage commit( verifyIcebergOperation(op, reference, content); commitBuilderStage = applyIcebergTableCommitOperation( - target, op, content, multiTableUpdate, commitBuilderStage); + target, op, content, multiTableUpdate, commitBuilderStage, apiContext); } else if (op.getType().equals(Content.Type.ICEBERG_VIEW)) { verifyIcebergOperation(op, reference, content); commitBuilderStage = applyIcebergViewCommitOperation( - target, op, content, multiTableUpdate, commitBuilderStage); + target, op, content, multiTableUpdate, commitBuilderStage, apiContext); } else { throw new IllegalArgumentException("(Yet) unsupported entity type: " + op.getType()); } @@ -561,7 +637,8 @@ private CompletionStage applyIcebergTableCommitOperation( CatalogOperation op, Content content, MultiTableUpdate multiTableUpdate, - CompletionStage commitBuilderStage) { + CompletionStage commitBuilderStage, + ApiContext apiContext) { // TODO serialize the changes as well, so that we can retrieve those later for content-aware // merges and automatic conflict resolution. @@ -594,7 +671,8 @@ private CompletionStage applyIcebergTableCommitOperation( return new IcebergTableMetadataUpdateState( nessieSnapshot, op.getKey(), content != null) .checkRequirements(icebergOp.requirements()) - .applyUpdates(pruneUpdates(icebergOp, content != null)); + .applyUpdates( + pruneUpdates(reference, icebergOp, content != null, apiContext)); // TODO handle the case when nothing changed -> do not update // e.g. when adding a schema/spec/order that already exists }) @@ -630,7 +708,8 @@ private CompletionStage applyIcebergViewCommitOperation( CatalogOperation op, Content content, MultiTableUpdate multiTableUpdate, - CompletionStage commitBuilderStage) { + CompletionStage commitBuilderStage, + ApiContext apiContext) { // TODO serialize the changes as well, so that we can retrieve those later for content-aware // merges and automatic conflict resolution. @@ -663,7 +742,8 @@ private CompletionStage applyIcebergViewCommitOperation( return new IcebergViewMetadataUpdateState( nessieSnapshot, op.getKey(), content != null) .checkRequirements(icebergOp.requirements()) - .applyUpdates(pruneUpdates(icebergOp, content != null)); + .applyUpdates( + pruneUpdates(reference, icebergOp, content != null, apiContext)); // TODO handle the case when nothing changed -> do not update // e.g. when adding a schema/spec/order that already exists }) @@ -693,28 +773,44 @@ private CompletionStage applyIcebergViewCommitOperation( return commitBuilderStage; } - private List pruneUpdates(IcebergCatalogOperation op, boolean update) { + private List pruneUpdates( + Branch reference, IcebergCatalogOperation op, boolean update, ApiContext apiContext) { if (update) { return op.updates(); } List prunedUpdates = new ArrayList<>(op.updates()); - String location = null; + String location = op.getSingleUpdate(SetLocation.class, SetLocation::location).orElse(null); if (op.hasUpdate(SetProperties.class)) { Map properties = op.getSingleUpdateValue(SetProperties.class, SetProperties::updates); if (properties.containsKey(IcebergTableMetadata.STAGED_PROPERTY)) { - String stagedLocation = op.getSingleUpdateValue(SetLocation.class, SetLocation::location); - // TODO verify integrity of staged location prunedUpdates.removeIf(u -> u instanceof SetProperties); properties = new HashMap<>(properties); properties.remove(IcebergTableMetadata.STAGED_PROPERTY); prunedUpdates.add(setProperties(properties)); - location = stagedLocation; } } if (location == null) { WarehouseConfig w = catalogConfig.getWarehouse(op.warehouse()); - location = icebergBaseLocation(w.location(), op.getKey()); + location = + icebergNewEntityBaseLocation( + locationForEntity( + w, + op.getKey(), + op.getType(), + apiContext, + reference.getName(), + reference.getHash()) + .toString()); + } else { + validateStorageLocation(location) + .ifPresent( + msg -> { + throw new IllegalArgumentException( + format( + "Location for %s '%s' cannot be associated with any configured object storage location: %s", + op.getType().name(), op.getKey(), msg)); + }); } prunedUpdates.add(setTrustedLocation(location)); return prunedUpdates; diff --git a/catalog/service/rest/src/main/java/org/projectnessie/catalog/service/rest/IcebergApiV1NamespaceResource.java b/catalog/service/rest/src/main/java/org/projectnessie/catalog/service/rest/IcebergApiV1NamespaceResource.java index 436c0027d0a..8547219e7e1 100644 --- a/catalog/service/rest/src/main/java/org/projectnessie/catalog/service/rest/IcebergApiV1NamespaceResource.java +++ b/catalog/service/rest/src/main/java/org/projectnessie/catalog/service/rest/IcebergApiV1NamespaceResource.java @@ -15,6 +15,7 @@ */ package org.projectnessie.catalog.service.rest; +import static java.lang.String.format; import static org.projectnessie.catalog.formats.iceberg.nessie.CatalogOps.CATALOG_CREATE_ENTITY; import static org.projectnessie.catalog.formats.iceberg.nessie.CatalogOps.CATALOG_DROP_ENTITY; import static org.projectnessie.catalog.formats.iceberg.nessie.CatalogOps.CATALOG_UPDATE_ENTITY; @@ -125,8 +126,6 @@ public IcebergCreateNamespaceResponse createNamespace( throws IOException { ParsedReference ref = decodePrefix(prefix).parsedReference(); - // TODO might want to prevent setting 'location' - Map properties = createNamespaceRequest.properties(); Namespace namespace = @@ -170,8 +169,18 @@ public IcebergCreateNamespaceResponse createNamespace( apiWrite().addKeyAction(key, CATALOG_CREATE_ENTITY.name()); if (!namespace.getProperties().isEmpty()) { requestMeta.addKeyAction(key, META_SET_PROPERTIES.name()); - if (namespace.getProperties().containsKey("location")) { + String location = namespace.getProperties().get("location"); + if (location != null) { requestMeta.addKeyAction(key, META_SET_LOCATION.name()); + catalogService + .validateStorageLocation(location) + .ifPresent( + msg -> { + throw new IllegalArgumentException( + format( + "Location for namespace '%s' cannot be associated with any configured object storage location: %s", + key, msg)); + }); } } @@ -336,43 +345,20 @@ public IcebergGetNamespaceResponse loadNamespaceMetadata( API_READ); Map namespacesMap = namespaces.toContentsMap(); - Content content = namespacesMap.get(nessieNamespace.toContentKey()); + ContentKey contentKey = nessieNamespace.toContentKey(); + Content content = namespacesMap.get(contentKey); if (content == null || !content.getType().equals(NAMESPACE)) { - throw new NessieContentNotFoundException( - nessieNamespace.toContentKey(), namespaceRef.referenceName()); + throw new NessieContentNotFoundException(contentKey, namespaceRef.referenceName()); } nessieNamespace = (Namespace) content; Map properties = new HashMap<>(nessieNamespace.getProperties()); if (!properties.containsKey("location")) { - StorageUri location = null; - List remainingElements = null; - - // Find the nearest namespace with a 'location' property and start from there - for (int n = keysInOrder.size() - 2; n >= 0; n--) { - Content parent = namespacesMap.get(keysInOrder.get(n)); - if (parent != null && parent.getType().equals(NAMESPACE)) { - Namespace parentNamespace = (Namespace) parent; - String parentLocationString = parentNamespace.getProperties().get("location"); - if (parentLocationString != null) { - location = StorageUri.of(parentLocationString); - remainingElements = - nessieNamespace.getElements().subList(n + 1, nessieNamespace.getElementCount()); - } - } - } - - // No parent namespace has a 'location' property, start from the warehouse - if (location == null) { - WarehouseConfig warehouse = catalogConfig.getWarehouse(decoded.warehouse()); - location = StorageUri.of(warehouse.location()).withTrailingSeparator(); - remainingElements = nessieNamespace.getElements(); - } - - for (String element : remainingElements) { - location = location.resolve(element).withTrailingSeparator(); - } - + WarehouseConfig warehouse = catalogConfig.getWarehouse(decoded.warehouse()); + StorageUri location = + catalogService + .locationForEntity(warehouse, contentKey, keysInOrder, namespacesMap) + .withTrailingSeparator(); properties.put("location", location.toString()); } @@ -432,8 +418,18 @@ public IcebergUpdateNamespacePropertiesResponse updateProperties( } if (!updateNamespacePropertiesRequest.updates().isEmpty()) { requestMeta.addKeyAction(key, CatalogOps.META_SET_PROPERTIES.name()); - if (updateNamespacePropertiesRequest.updates().containsKey("location")) { + String location = updateNamespacePropertiesRequest.updates().get("location"); + if (location != null) { requestMeta.addKeyAction(key, CatalogOps.META_SET_LOCATION.name()); + catalogService + .validateStorageLocation(location) + .ifPresent( + msg -> { + throw new IllegalArgumentException( + format( + "Location for namespace '%s' cannot be associated with any configured object storage location: %s", + key, msg)); + }); } } treeService.commitMultipleOperations(ref.getName(), ref.getHash(), ops, requestMeta.build()); diff --git a/catalog/service/rest/src/main/java/org/projectnessie/catalog/service/rest/IcebergApiV1TableResource.java b/catalog/service/rest/src/main/java/org/projectnessie/catalog/service/rest/IcebergApiV1TableResource.java index e1b21cfb584..6dbb32e7a22 100644 --- a/catalog/service/rest/src/main/java/org/projectnessie/catalog/service/rest/IcebergApiV1TableResource.java +++ b/catalog/service/rest/src/main/java/org/projectnessie/catalog/service/rest/IcebergApiV1TableResource.java @@ -26,7 +26,7 @@ import static org.projectnessie.catalog.formats.iceberg.nessie.CatalogOps.CATALOG_CREATE_ENTITY; import static org.projectnessie.catalog.formats.iceberg.nessie.CatalogOps.CATALOG_DROP_ENTITY; import static org.projectnessie.catalog.formats.iceberg.nessie.CatalogOps.CATALOG_UPDATE_ENTITY; -import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.icebergBaseLocation; +import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.icebergNewEntityBaseLocation; import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.nessieTableSnapshotToIceberg; import static org.projectnessie.catalog.formats.iceberg.nessie.NessieModelIceberg.newIcebergTableSnapshot; import static org.projectnessie.catalog.formats.iceberg.rest.IcebergMetadataUpdate.AddPartitionSpec.addPartitionSpec; @@ -41,6 +41,7 @@ import static org.projectnessie.catalog.formats.iceberg.rest.IcebergMetadataUpdate.UpgradeFormatVersion.upgradeFormatVersion; import static org.projectnessie.catalog.service.rest.TableRef.tableRef; import static org.projectnessie.model.Content.Type.ICEBERG_TABLE; +import static org.projectnessie.model.Content.Type.NAMESPACE; import static org.projectnessie.model.Reference.ReferenceType.BRANCH; import static org.projectnessie.versioned.RequestMeta.API_WRITE; import static org.projectnessie.versioned.RequestMeta.apiWrite; @@ -324,11 +325,21 @@ public Uni createTable( WarehouseConfig warehouse = catalogConfig.getWarehouse(tableRef.warehouse()); - if (createTableRequest.stageCreate()) { - - String location = icebergBaseLocation(warehouse.location(), tableRef.contentKey()); - updates.add(setTrustedLocation(location)); + ParsedReference ref = tableRef.reference(); + String location = + icebergNewEntityBaseLocation( + catalogService + .locationForEntity( + warehouse, + tableRef.contentKey(), + NAMESPACE, + ICEBERG_V1, + ref.name(), + ref.hashWithRelativeSpec()) + .toString()); + updates.add(setTrustedLocation(location)); + if (createTableRequest.stageCreate()) { NessieTableSnapshot snapshot = new IcebergTableMetadataUpdateState( newIcebergTableSnapshot(uuid), tableRef.contentKey(), false) @@ -456,6 +467,16 @@ public Uni registerTable( IcebergJson.objectMapper().readValue(metadataInput, IcebergTableMetadata.class); } + catalogService + .validateStorageLocation(tableMetadata.location()) + .ifPresent( + msg -> { + throw new IllegalArgumentException( + format( + "Location for table '%s' to be registered cannot be associated with any configured object storage location: %s", + tableRef.contentKey(), msg)); + }); + ToIntFunction safeUnbox = i -> i != null ? i : 0; Content newContent = @@ -598,6 +619,9 @@ public Uni updateTable( throws IOException { TableRef tableRef = decodeTableRef(prefix, namespace, table); + // TODO allow updating the "location" property, rely on AuthZ + // TODO verify the "location" can be mapped to a configured object-store + return createOrUpdateEntity(tableRef, commitTableRequest, ICEBERG_TABLE, CATALOG_UPDATE_ENTITY) .map( snap -> { diff --git a/servers/quarkus-server/src/test/java/org/projectnessie/server/authz/TestAuthzMeta.java b/servers/quarkus-server/src/test/java/org/projectnessie/server/authz/TestAuthzMeta.java index 55de528f637..8f38d060144 100644 --- a/servers/quarkus-server/src/test/java/org/projectnessie/server/authz/TestAuthzMeta.java +++ b/servers/quarkus-server/src/test/java/org/projectnessie/server/authz/TestAuthzMeta.java @@ -154,6 +154,13 @@ public void icebergApiTable() { check(READ_ENTITY_VALUE, branch, tableKey), check(CREATE_ENTITY, branch, tableKey)), Map.of()), + // 'CatalogServiceImpl.locationForEntity' + authzCheck( + apiContext, + List.of( + canViewReference(branch), + check(READ_ENTITY_VALUE, branch, tableKey.getParent())), + Map.of()), // 'CatalogServiceImpl.commit' authzCheck( apiContext, @@ -226,8 +233,11 @@ public void icebergApiNamespaces() { canCommitChangeAgainstReference(branch)), Map.of())); + Map meta = catalog.loadNamespaceMetadata(myNamespaceIceberg); + String location = meta.get("location"); + var props = new HashMap(); - props.put("location", "my_location"); + props.put("location", location + "/foo_bar/baz"); mockedAuthorizer.reset(); catalog.createNamespace(myNamespaceIcebergInner, props); soft.assertThat(mockedAuthorizer.checksWithoutIdentifiedKey()) diff --git a/servers/quarkus-server/src/testFixtures/java/org/projectnessie/server/catalog/AbstractIcebergCatalogTests.java b/servers/quarkus-server/src/testFixtures/java/org/projectnessie/server/catalog/AbstractIcebergCatalogTests.java index b6c421d7934..8d978809015 100644 --- a/servers/quarkus-server/src/testFixtures/java/org/projectnessie/server/catalog/AbstractIcebergCatalogTests.java +++ b/servers/quarkus-server/src/testFixtures/java/org/projectnessie/server/catalog/AbstractIcebergCatalogTests.java @@ -15,6 +15,7 @@ */ package org.projectnessie.server.catalog; +import static java.lang.String.format; import static java.nio.charset.StandardCharsets.UTF_8; import static java.time.format.DateTimeFormatter.ISO_OFFSET_DATE_TIME; import static java.util.Collections.singletonMap; @@ -82,6 +83,7 @@ import org.projectnessie.model.IcebergTable; import org.projectnessie.model.LogResponse; import org.projectnessie.model.Reference; +import org.projectnessie.storage.uri.StorageUri; public abstract class AbstractIcebergCatalogTests extends CatalogTests { @@ -132,7 +134,7 @@ void cleanup() throws Exception { protected NessieClientBuilder nessieClientBuilder() { int catalogServerPort = Integer.getInteger("quarkus.http.port"); return NessieClientBuilder.createClientBuilderFromSystemSettings() - .withUri(String.format("http://127.0.0.1:%d/api/v2/", catalogServerPort)); + .withUri(format("http://127.0.0.1:%d/api/v2/", catalogServerPort)); } @Override @@ -170,6 +172,47 @@ protected boolean overridesRequestedLocation() { protected abstract String scheme(); + @Test + public void testNewTableLocationFromParentNamespace() { + @SuppressWarnings("resource") + RESTCatalog catalog = catalog(); + + if (this.requiresNamespaceCreate()) { + catalog.createNamespace(NS); + } + + catalog.buildTable(TABLE, SCHEMA).withPartitionSpec(SPEC).withSortOrder(WRITE_ORDER).create(); + + Table pokeTable = catalog.loadTable(TABLE); + + String pokeTableLocation = pokeTable.location(); + + int idx = pokeTableLocation.indexOf(format("/%s/", TABLE.namespace().level(0))); + StorageUri rootIshUri = StorageUri.of(pokeTableLocation.substring(0, idx)); + + String namespaceLocation = rootIshUri + "/some/custom/path"; + Map namespaceProps = new HashMap<>(); + namespaceProps.put("location", namespaceLocation); + Namespace namespace = Namespace.of("new_table_location_from_namespace"); + catalog.createNamespace(namespace, namespaceProps); + + Map namespaceMeta = catalog.loadNamespaceMetadata(namespace); + assertThat(namespaceMeta).containsEntry("location", namespaceLocation); + + Namespace nestedNamespace = Namespace.of("new_table_location_from_namespace", "nested_level"); + catalog.createNamespace(nestedNamespace, namespaceProps); + + Table tableWithNsLoc = + catalog + .buildTable(TableIdentifier.of(nestedNamespace, "table_loc_from_ns"), SCHEMA) + .withPartitionSpec(SPEC) + .withSortOrder(WRITE_ORDER) + .create(); + + String tableLocation = tableWithNsLoc.location(); + assertThat(tableLocation).startsWith(namespaceLocation + "/nested_level/table_loc_from_ns_"); + } + @Test public void testNessieCreateOnDifferentBranch() throws Exception { @SuppressWarnings("resource") @@ -191,7 +234,7 @@ public void testNessieCreateOnDifferentBranch() throws Exception { api.createReference().reference(branch).sourceRefName(defaultBranch.getName()).create(); TableIdentifier onDifferentBranch = - TableIdentifier.of(NS, String.format("`%s@%s`", tableName, branch.getName())); + TableIdentifier.of(NS, format("`%s@%s`", tableName, branch.getName())); // "Create ICEBERG_TABLE" commit Table table = @@ -351,7 +394,7 @@ public void testNessieTimeTravel() throws Exception { CommitMeta commit = commitLog.get(i); IcebergTable c = contents.get(i); - String tableSpec = String.format("`%s@main#%s`", tableName, commit.getHash()); + String tableSpec = format("`%s@main#%s`", tableName, commit.getHash()); table = catalog.loadTable(TableIdentifier.of(NS, tableSpec)); soft.assertThat(table) .describedAs("using branch + commit ID #%d, table '%s'", i, tableSpec) @@ -360,7 +403,7 @@ public void testNessieTimeTravel() throws Exception { t -> t.properties().get("nessie.commit.id")) .containsExactly(c.getSnapshotId(), commit.getHash()); - tableSpec = String.format("`%s@main#%s`", tableName, commit.getCommitTime()); + tableSpec = format("`%s@main#%s`", tableName, commit.getCommitTime()); table = catalog.loadTable(TableIdentifier.of(NS, tableSpec)); soft.assertThat(table) .describedAs("using branch + commit timestamp #%d, table '%s'", i, tableSpec) @@ -369,7 +412,7 @@ public void testNessieTimeTravel() throws Exception { t -> t.properties().get("nessie.commit.id")) .containsExactly(c.getSnapshotId(), commit.getHash()); - tableSpec = String.format("`%s#%s`", tableName, commit.getHash()); + tableSpec = format("`%s#%s`", tableName, commit.getHash()); table = catalog.loadTable(TableIdentifier.of(NS, tableSpec)); soft.assertThat(table) .describedAs("using commit ID #%d, table '%s'", i, tableSpec) @@ -378,7 +421,7 @@ public void testNessieTimeTravel() throws Exception { t -> t.properties().get("nessie.commit.id")) .containsExactly(c.getSnapshotId(), commit.getHash()); - tableSpec = String.format("`%s#%s`", tableName, commit.getCommitTime()); + tableSpec = format("`%s#%s`", tableName, commit.getCommitTime()); table = catalog.loadTable(TableIdentifier.of(NS, tableSpec)); soft.assertThat(table) .describedAs("using commit timestamp #%d, table '%s'", i, tableSpec) @@ -388,7 +431,7 @@ public void testNessieTimeTravel() throws Exception { .containsExactly(c.getSnapshotId(), commit.getHash()); tableSpec = - String.format( + format( "`%s#%s`", tableName, ISO_OFFSET_DATE_TIME.format(