diff --git a/api/src/main/java/org/apache/iceberg/ManifestFile.java b/api/src/main/java/org/apache/iceberg/ManifestFile.java index 8f20697c7780..60372636e14e 100644 --- a/api/src/main/java/org/apache/iceberg/ManifestFile.java +++ b/api/src/main/java/org/apache/iceberg/ManifestFile.java @@ -49,7 +49,7 @@ public interface ManifestFile { Types.LongType.get(), "Lowest sequence number in the manifest"); Types.NestedField SNAPSHOT_ID = - optional( + required( 503, "added_snapshot_id", Types.LongType.get(), "Snapshot ID that added the manifest"); Types.NestedField ADDED_FILES_COUNT = optional(504, "added_files_count", Types.IntegerType.get(), "Added entry count"); diff --git a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java index a3c4fc8738cd..58dec570d1fb 100644 --- a/core/src/main/java/org/apache/iceberg/BaseSnapshot.java +++ b/core/src/main/java/org/apache/iceberg/BaseSnapshot.java @@ -138,7 +138,8 @@ private void cacheManifests(FileIO fileIO) { allManifests = Lists.transform( Arrays.asList(v1ManifestLocations), - location -> new GenericManifestFile(fileIO.newInputFile(location), 0)); + location -> + new GenericManifestFile(fileIO.newInputFile(location), 0, this.snapshotId)); } if (allManifests == null) { diff --git a/core/src/main/java/org/apache/iceberg/GenericManifestFile.java b/core/src/main/java/org/apache/iceberg/GenericManifestFile.java index 7707c57a6905..a079f5fb7bd4 100644 --- a/core/src/main/java/org/apache/iceberg/GenericManifestFile.java +++ b/core/src/main/java/org/apache/iceberg/GenericManifestFile.java @@ -87,6 +87,26 @@ public GenericManifestFile(Schema avroSchema) { this.keyMetadata = null; } + GenericManifestFile(InputFile file, int specId, long snapshotId) { + super(ManifestFile.schema().columns().size()); + this.avroSchema = AVRO_SCHEMA; + this.file = file; + this.manifestPath = file.location(); + this.length = null; // lazily loaded from file + this.specId = specId; + this.sequenceNumber = 0; + this.minSequenceNumber = 0; + this.snapshotId = snapshotId; + this.addedFilesCount = null; + this.addedRowsCount = null; + this.existingFilesCount = null; + this.existingRowsCount = null; + this.deletedFilesCount = null; + this.deletedRowsCount = null; + this.partitions = null; + this.keyMetadata = null; + } + /** Adjust the arg order to avoid conflict with the public constructor below */ GenericManifestFile( String path, @@ -167,7 +187,13 @@ private GenericManifestFile(GenericManifestFile toCopy) { super(toCopy); this.avroSchema = toCopy.avroSchema; this.manifestPath = toCopy.manifestPath; - this.length = toCopy.length; + try { + this.length = toCopy.length(); + } catch (UnsupportedOperationException e) { + // Can be removed when embedded manifests are dropped + // DummyFileIO does not support .length() + this.length = null; + } this.specId = toCopy.specId; this.content = toCopy.content; this.sequenceNumber = toCopy.sequenceNumber; diff --git a/core/src/main/java/org/apache/iceberg/V2Metadata.java b/core/src/main/java/org/apache/iceberg/V2Metadata.java index bb715385610b..18c3b0a40613 100644 --- a/core/src/main/java/org/apache/iceberg/V2Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V2Metadata.java @@ -40,7 +40,7 @@ private V2Metadata() {} ManifestFile.MANIFEST_CONTENT.asRequired(), ManifestFile.SEQUENCE_NUMBER.asRequired(), ManifestFile.MIN_SEQUENCE_NUMBER.asRequired(), - ManifestFile.SNAPSHOT_ID.asRequired(), + ManifestFile.SNAPSHOT_ID, ManifestFile.ADDED_FILES_COUNT.asRequired(), ManifestFile.EXISTING_FILES_COUNT.asRequired(), ManifestFile.DELETED_FILES_COUNT.asRequired(), diff --git a/core/src/main/java/org/apache/iceberg/V3Metadata.java b/core/src/main/java/org/apache/iceberg/V3Metadata.java index 12f4a2058748..fc11a7df03de 100644 --- a/core/src/main/java/org/apache/iceberg/V3Metadata.java +++ b/core/src/main/java/org/apache/iceberg/V3Metadata.java @@ -40,7 +40,7 @@ private V3Metadata() {} ManifestFile.MANIFEST_CONTENT.asRequired(), ManifestFile.SEQUENCE_NUMBER.asRequired(), ManifestFile.MIN_SEQUENCE_NUMBER.asRequired(), - ManifestFile.SNAPSHOT_ID.asRequired(), + ManifestFile.SNAPSHOT_ID, ManifestFile.ADDED_FILES_COUNT.asRequired(), ManifestFile.EXISTING_FILES_COUNT.asRequired(), ManifestFile.DELETED_FILES_COUNT.asRequired(), diff --git a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java index cae19fece4e9..79c3761fa8c3 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java +++ b/core/src/test/java/org/apache/iceberg/TestMetadataUpdateParser.java @@ -1244,8 +1244,8 @@ private String createManifestListWithManifestFiles(long snapshotId, Long parentS List manifests = ImmutableList.of( - new GenericManifestFile(localInput("file:/tmp/manifest1.avro"), 0), - new GenericManifestFile(localInput("file:/tmp/manifest2.avro"), 0)); + new GenericManifestFile(localInput("file:/tmp/manifest1.avro"), 0, snapshotId), + new GenericManifestFile(localInput("file:/tmp/manifest2.avro"), 0, snapshotId)); try (ManifestListWriter writer = ManifestLists.write(1, Files.localOutput(manifestList), snapshotId, parentSnapshotId, 0)) { diff --git a/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java b/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java index 8a067fc4dc44..e4c2ba5ec2df 100644 --- a/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java +++ b/core/src/test/java/org/apache/iceberg/TestSnapshotJson.java @@ -166,8 +166,8 @@ private String createManifestListWithManifestFiles(long snapshotId, Long parentS List manifests = ImmutableList.of( - new GenericManifestFile(localInput("file:/tmp/manifest1.avro"), 0), - new GenericManifestFile(localInput("file:/tmp/manifest2.avro"), 0)); + new GenericManifestFile(localInput("file:/tmp/manifest1.avro"), 0, snapshotId), + new GenericManifestFile(localInput("file:/tmp/manifest2.avro"), 0, snapshotId)); try (ManifestListWriter writer = ManifestLists.write(1, Files.localOutput(manifestList), snapshotId, parentSnapshotId, 0)) { diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 64c410b46427..6d066e8a654c 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -1663,7 +1663,8 @@ private String createManifestListWithManifestFile( try (ManifestListWriter writer = ManifestLists.write(1, Files.localOutput(manifestList), snapshotId, parentSnapshotId, 0)) { writer.addAll( - ImmutableList.of(new GenericManifestFile(localInput(manifestFile), SPEC_5.specId()))); + ImmutableList.of( + new GenericManifestFile(localInput(manifestFile), SPEC_5.specId(), snapshotId))); } return localInput(manifestList).location(); diff --git a/core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java b/core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java index ead17e9f9c42..30de81266efc 100644 --- a/core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java +++ b/core/src/test/java/org/apache/iceberg/avro/TestReadProjection.java @@ -62,6 +62,28 @@ public void testFullProjection() throws Exception { assertThat(cmp).as("Should contain the correct data value").isEqualTo(0); } + @Test + public void testReadOptionalAsRequired() throws Exception { + Schema writeSchema = + new Schema( + Types.NestedField.required(0, "id", Types.LongType.get()), + Types.NestedField.optional(1, "data", Types.StringType.get())); + + Record record = new Record(AvroSchemaUtil.convert(writeSchema, "table")); + record.put("id", 34L); + record.put("data", "test"); + + Schema readSchema = + new Schema( + Types.NestedField.required(0, "id", Types.LongType.get()), + Types.NestedField.required(1, "data", Types.StringType.get())); + + Record projected = writeAndRead("read_optional_as_required", writeSchema, readSchema, record); + + int cmp = Comparators.charSequences().compare("test", (CharSequence) projected.get("data")); + assertThat(cmp).as("Should contain the correct data/renamed value").isEqualTo(0); + } + @Test public void testReorderedFullProjection() throws Exception { Schema schema =