From 249ac10f937c503d35e54a7ee4fc258098a893f6 Mon Sep 17 00:00:00 2001 From: Amogh Jahagirdar Date: Tue, 2 Jan 2024 12:52:59 -0800 Subject: [PATCH] Spark 3.3, 3.4: Fix file clobbering when Spark reuses query IDs (#9255) (#9399) --- .../main/java/org/apache/iceberg/spark/source/SparkWrite.java | 4 ++-- .../main/java/org/apache/iceberg/spark/source/SparkWrite.java | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java index a080fcead13b..59fecd45cc2c 100644 --- a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java +++ b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java @@ -643,11 +643,11 @@ public DataWriter createWriter(int partitionId, long taskId, long e Table table = tableBroadcast.value(); PartitionSpec spec = table.specs().get(outputSpecId); FileIO io = table.io(); - + String operationId = queryId + "-" + epochId; OutputFileFactory fileFactory = OutputFileFactory.builderFor(table, partitionId, taskId) .format(format) - .operationId(queryId) + .operationId(operationId) .build(); SparkFileWriterFactory writerFactory = SparkFileWriterFactory.builderFor(table) diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java index 96432178247d..fb49d0bff5f4 100644 --- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java +++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java @@ -657,11 +657,11 @@ public DataWriter createWriter(int partitionId, long taskId, long e Table table = tableBroadcast.value(); PartitionSpec spec = table.specs().get(outputSpecId); FileIO io = table.io(); - + String operationId = queryId + "-" + epochId; OutputFileFactory fileFactory = OutputFileFactory.builderFor(table, partitionId, taskId) .format(format) - .operationId(queryId) + .operationId(operationId) .build(); SparkFileWriterFactory writerFactory = SparkFileWriterFactory.builderFor(table)