Skip to content

Commit

Permalink
Core: Ignore split offsets when the last split offset is past the fil…
Browse files Browse the repository at this point in the history
…e length
  • Loading branch information
amogh-jahagirdar committed Oct 17, 2023
1 parent 069d930 commit 8f83bdc
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 2 deletions.
10 changes: 10 additions & 0 deletions core/src/main/java/org/apache/iceberg/BaseFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,16 @@ public ByteBuffer keyMetadata() {

@Override
public List<Long> splitOffsets() {
if (splitOffsets == null || splitOffsets.length == 0) {
return null;
}

// If the last split offset is past the file size this means the split offsets are corrupted and
// should not be used
if (splitOffsets[splitOffsets.length - 1] >= fileSizeInBytes) {
return null;
}

return ArrayUtil.toUnmodifiableLongList(splitOffsets);
}

Expand Down
4 changes: 2 additions & 2 deletions core/src/test/java/org/apache/iceberg/TableTestBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public class TableTestBase {
.withFileSizeInBytes(10)
.withPartitionPath("data_bucket=2") // easy way to set partition data for now
.withRecordCount(1)
.withSplitOffsets(ImmutableList.of(2L, 2_000_000L))
.withSplitOffsets(ImmutableList.of(2L, 8L))
.build();
static final DeleteFile FILE_C2_DELETES =
FileMetadata.deleteFileBuilder(SPEC)
Expand All @@ -129,7 +129,7 @@ public class TableTestBase {
.withFileSizeInBytes(10)
.withPartitionPath("data_bucket=3") // easy way to set partition data for now
.withRecordCount(1)
.withSplitOffsets(ImmutableList.of(3L, 3_000L, 3_000_000L))
.withSplitOffsets(ImmutableList.of(0L, 3L, 6L))
.build();
static final DeleteFile FILE_D2_DELETES =
FileMetadata.deleteFileBuilder(SPEC)
Expand Down
17 changes: 17 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestManifestReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.stream.Collectors;
import org.apache.iceberg.ManifestEntry.Status;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Streams;
Expand Down Expand Up @@ -159,4 +160,20 @@ public void testDeleteFilePositions() throws IOException {
}
}
}

@Test
public void testDataFileSplitOffsetsNullWhenInvalid() throws IOException {
DataFile invalidOffset =
DataFiles.builder(SPEC)
.withPath("/path/to/invalid-offsets.parquet")
.withFileSizeInBytes(10)
.withRecordCount(1)
.withSplitOffsets(ImmutableList.of(2L, 1000L)) // Offset 1000 is out of bounds
.build();
ManifestFile manifest = writeManifest(1000L, invalidOffset);
try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, FILE_IO)) {
DataFile file = Iterables.getOnlyElement(reader);
Assertions.assertThat(file.splitOffsets()).isNull();
}
}
}

0 comments on commit 8f83bdc

Please sign in to comment.