Skip to content

Commit

Permalink
Add exponential scaling FNV composite value hash algorithm for remote…
Browse files Browse the repository at this point in the history
… path

Signed-off-by: Ashish Singh <[email protected]>
  • Loading branch information
ashking94 committed Apr 11, 2024
1 parent 645b1f1 commit a6a30c7
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
.get();
assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status());
ensureGreen(restoredIndexName1version2);
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);

// Create index with cluster setting cluster.remote_store.index.path.prefix.type as hashed_prefix.
indexSettings = getIndexSettings(1, 0).build();
createIndex(indexName2, indexSettings);
ensureGreen(indexName2);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);

// Validating that custom data has not changed for indexes which were created before the cluster setting got updated
validatePathType(indexName1, PathType.FIXED);
Expand Down Expand Up @@ -309,7 +309,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
ensureGreen(indexName2);

// Validating that custom data has not changed for testindex2 which was created before the cluster setting got updated
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);
}

private void validatePathType(String index, PathType pathType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import static java.util.Collections.unmodifiableMap;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeUrlBase64AndBinary;
import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64;

/**
* This class contains the different enums related to remote store like data categories and types, path types
Expand Down Expand Up @@ -216,13 +218,26 @@ public static PathType parseString(String pathType) {
@PublicApi(since = "2.14.0")
public enum PathHashAlgorithm {

FNV_1A(0) {
FNV_1A_BASE64(0) {
@Override
String hash(PathInput pathInput) {
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
.getName();
long hash = FNV1a.hash64(input);
return RemoteStoreUtils.longToUrlBase64(hash);
return longToUrlBase64(hash);
}
},
/**
* This hash algorithm will generate a hash value which will use 1st 6 bits to create bas64 character and next 14
* bits to create binary string.
*/
FNV_1A_COMPOSITE(1) {
@Override
String hash(PathInput pathInput) {
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
.getName();
long hash = FNV1a.hash64(input);
return longToCompositeUrlBase64AndBinary(hash);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public RemoteStorePathStrategy get() {
// Min node version check ensures that we are enabling the new prefix type only when all the nodes understand it.
pathType = Version.CURRENT.compareTo(minNodeVersionSupplier.get()) <= 0 ? type : PathType.FIXED;
// If the path type is fixed, hash algorithm is not applicable.
pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A;
pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A_BASE64;
return new RemoteStorePathStrategy(pathType, pathHashAlgorithm);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,16 @@
public class RemoteStoreUtils {
public static final int LONG_MAX_LENGTH = String.valueOf(Long.MAX_VALUE).length();

/**
* URL safe base 64 character set. This must not be changed as this is used in deriving the base64 equivalent of binary.
*/
private static final char[] URL_BASE64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray();

/**
* This method subtracts given numbers from Long.MAX_VALUE and returns a string representation of the result.
* The resultant string is guaranteed to be of the same length that of Long.MAX_VALUE. If shorter, we add left padding
* of 0s to the string.
*
* @param num number to get the inverted long string for
* @return String value of Long.MAX_VALUE - num
*/
Expand All @@ -46,6 +52,7 @@ public static String invertLong(long num) {

/**
* This method converts the given string into long and subtracts it from Long.MAX_VALUE
*
* @param str long in string format to be inverted
* @return long value of the invert result
*/
Expand All @@ -59,6 +66,7 @@ public static long invertLong(String str) {

/**
* Extracts the segment name from the provided segment file name
*
* @param filename Segment file name to parse
* @return Name of the segment that the segment file belongs to
*/
Expand All @@ -79,10 +87,9 @@ public static String getSegmentName(String filename) {
}

/**
*
* @param mdFiles List of segment/translog metadata files
* @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name .
* fn returns null if node id is not part of the file name
* @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name .
* fn returns null if node id is not part of the file name
*/
public static void verifyNoMultipleWriters(List<String> mdFiles, Function<String, Tuple<String, String>> fn) {
Map<String, String> nodesByPrimaryTermAndGen = new HashMap<>();
Expand Down Expand Up @@ -116,4 +123,23 @@ static String longToUrlBase64(long value) {
String base64Str = Base64.getUrlEncoder().encodeToString(hashBytes);
return base64Str.substring(0, base64Str.length() - 1);
}

static long urlBase64ToLong(String base64Str) {
byte[] hashBytes = Base64.getUrlDecoder().decode(base64Str);
return ByteBuffer.wrap(hashBytes).getLong();
}

/**
* Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts -
* 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string.
* For the second part, we will use the next 14 bits. For eg - A010001010100010.
*/
static String longToCompositeUrlBase64AndBinary(long value) {
String binaryEncoding = String.format("%64s", Long.toBinaryString(value)).replace(' ', '0');
String base64Part = binaryEncoding.substring(0, 6);
String binaryPart = binaryEncoding.substring(6, 20);
int base64DecimalValue = Integer.valueOf(base64Part, 2);
assert base64DecimalValue >= 0 && base64DecimalValue < 64;
return URL_BASE64_CHARSET[base64DecimalValue] + binaryPart;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1607,7 +1607,7 @@ public void testRemoteCustomData() {
validateRemoteCustomData(
indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY),
PathHashAlgorithm.NAME,
PathHashAlgorithm.FNV_1A.name()
PathHashAlgorithm.FNV_1A_BASE64.name()
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.LOCK_FILES;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A;
import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64;
import static org.opensearch.index.remote.RemoteStoreEnums.PathType.FIXED;
import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_INFIX;
import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_PREFIX;
Expand Down Expand Up @@ -161,10 +161,10 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
BlobPath result = HASHED_PREFIX.path(pathInput, FNV_1A);
BlobPath result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
);

// assert with exact value for known base path
Expand All @@ -178,7 +178,7 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertEquals("DgSI70IciXs/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/data/", result.buildAsString());

// Translog Metadata
Expand All @@ -190,10 +190,10 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
);

// assert with exact value for known base path
Expand All @@ -204,7 +204,7 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertEquals("oKU5SjILiy4/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/metadata/", result.buildAsString());

// Translog Lock files - This is a negative case where the assertion will trip.
Expand Down Expand Up @@ -238,10 +238,10 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
);

// assert with exact value for known base path
Expand All @@ -252,7 +252,7 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertEquals("AUBRfCIuWdk/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/data/", result.buildAsString());

// Segment Metadata
Expand All @@ -264,10 +264,10 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
);

// assert with exact value for known base path
Expand All @@ -278,7 +278,7 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertEquals("erwR-G735Uw/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/metadata/", result.buildAsString());

// Segment Lockfiles
Expand All @@ -290,10 +290,10 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(String.join(SEPARATOR, FNV_1A_BASE64.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
);

// assert with exact value for known base path
Expand All @@ -304,7 +304,7 @@ public void testGeneratePathForHashedPrefixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A);
result = HASHED_PREFIX.path(pathInput, FNV_1A_BASE64);
assertEquals("KeYDIk0mJXI/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/lock_files/", result.buildAsString());
}

Expand All @@ -330,7 +330,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
BlobPath result = HASHED_INFIX.path(pathInput, FNV_1A);
BlobPath result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
String expected = derivePath(basePath, pathInput);
String actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -346,7 +346,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/DgSI70IciXs/k2ijhe877d7yuhx7/10/translog/data/";
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -361,7 +361,7 @@ public void testGeneratePathForHashedInfixType() {
.dataType(dataType)
.build();

result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = derivePath(basePath, pathInput);
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -374,7 +374,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/oKU5SjILiy4/k2ijhe877d7yuhx7/10/translog/metadata/";
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand Down Expand Up @@ -410,7 +410,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = derivePath(basePath, pathInput);
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -423,7 +423,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/AUBRfCIuWdk/k2ijhe877d7yuhx7/10/segments/data/";
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -437,7 +437,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = derivePath(basePath, pathInput);
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -450,7 +450,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/erwR-G735Uw/k2ijhe877d7yuhx7/10/segments/metadata/";
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -464,7 +464,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = derivePath(basePath, pathInput);
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -477,7 +477,7 @@ public void testGeneratePathForHashedInfixType() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_INFIX.path(pathInput, FNV_1A);
result = HASHED_INFIX.path(pathInput, FNV_1A_BASE64);
expected = "xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/KeYDIk0mJXI/k2ijhe877d7yuhx7/10/segments/lock_files/";
actual = result.buildAsString();
assertTrue(new ParameterizedMessage("expected={} actual={}", expected, actual).getFormattedMessage(), actual.startsWith(expected));
Expand All @@ -487,7 +487,7 @@ private String derivePath(String basePath, PathInput pathInput) {
return "".equals(basePath)
? String.join(
SEPARATOR,
FNV_1A.hash(pathInput),
FNV_1A_BASE64.hash(pathInput),
pathInput.indexUUID(),
pathInput.shardId(),
pathInput.dataCategory().getName(),
Expand All @@ -496,7 +496,7 @@ private String derivePath(String basePath, PathInput pathInput) {
: String.join(
SEPARATOR,
basePath,
FNV_1A.hash(pathInput),
FNV_1A_BASE64.hash(pathInput),
pathInput.indexUUID(),
pathInput.shardId(),
pathInput.dataCategory().getName(),
Expand Down
Loading

0 comments on commit a6a30c7

Please sign in to comment.