Skip to content

Commit

Permalink
[AMORO-3228] Remove the format suffix in judgment condition (#3338)
Browse files Browse the repository at this point in the history
* [AMORO-3228] Fix the problem of file name with suffix in judgment condition

* Extract method

* Fix comment
  • Loading branch information
zhongqishang authored Nov 22, 2024
1 parent 55b0acb commit c12c240
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,7 @@ private void flushDeletes() {
return;
}
posDeletes.sort(Comparator.comparingLong(PosRow::pos));
String fileName = TableFileUtil.getFileName(filePath.get().toString());
FileFormat fileFormat = FileFormat.fromFileName(fileName);
if (fileFormat != null) {
fileName = fileName.substring(0, fileName.length() - fileFormat.name().length() - 1);
}
String fileName = TableFileUtil.getFileNameWithoutExt(filePath.get().toString());
String fileDir = TableFileUtil.getFileDir(filePath.get().toString());
String deleteFilePath =
format.addExtension(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import org.apache.amoro.io.AuthenticatedFileIO;
import org.apache.hadoop.fs.Path;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.io.BulkDeletionFailureException;
import org.apache.iceberg.util.Tasks;
import org.slf4j.Logger;
Expand All @@ -36,16 +37,33 @@ public class TableFileUtil {
private static final String POS_DELETE_FILE_IDENTIFIER = "delete";

/**
* Parse file name form file path
* Parse file name from file path.
*
* @param filePath file path
* @return file name parsed from file path
* @return file name parsed from file path, e.g. data-1.parquet.
*/
public static String getFileName(String filePath) {
int lastSlash = filePath.lastIndexOf('/');
return filePath.substring(lastSlash + 1);
}

/**
* Parse file name without ext from file path.
*
* @param filePath file path
* @return file name without ext parsed from file path, e.g. data-1.
*/
public static String getFileNameWithoutExt(String filePath) {
String fileName = getFileName(filePath);

FileFormat fileFormat = FileFormat.fromFileName(fileName);
if (fileFormat != null) {
return fileName.substring(0, fileName.length() - fileFormat.name().length() - 1);
}

return fileName;
}

/**
* Parse file directory path from file path
*
Expand Down Expand Up @@ -200,6 +218,8 @@ public static String optimizingPosDeleteFileName(String dataFileName, String suf

public static boolean isOptimizingPosDeleteFile(String dataFilePath, String posDeleteFilePath) {
return getFileName(posDeleteFilePath)
.startsWith(String.format("%s-%s", getFileName(dataFilePath), POS_DELETE_FILE_IDENTIFIER));
.startsWith(
String.format(
"%s-%s", getFileNameWithoutExt(dataFilePath), POS_DELETE_FILE_IDENTIFIER));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ public void testWritePosDelete() throws IOException {
dataDir, fileFormat.addExtension("data-1-delete-suffix")))
.toString());
Assert.assertNotNull(deleteFile1);
Assert.assertTrue(
TableFileUtil.isOptimizingPosDeleteFile(dataFile1Path, deleteFile1.path().toString()));
Assert.assertEquals(3, deleteFile1.recordCount());
// Check whether the path-pos pairs are sorted as expected.
Schema pathPosSchema = DeleteSchemaUtil.pathPosSchema();
Expand All @@ -147,6 +149,8 @@ public void testWritePosDelete() throws IOException {
dataDir, fileFormat.addExtension("data-2-delete-suffix")))
.toString());
Assert.assertNotNull(deleteFile2);
Assert.assertTrue(
TableFileUtil.isOptimizingPosDeleteFile(dataFile2Path, deleteFile2.path().toString()));
Assert.assertEquals(
new Path(
TableFileUtil.getNewFilePath(
Expand Down

0 comments on commit c12c240

Please sign in to comment.