From cedb90a52b89144295eaf688dbf7a463b68af577 Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Mon, 9 Oct 2023 10:34:14 +0200 Subject: [PATCH 01/15] get filesystem from path --- .../linkedin/venice/hadoop/DefaultInputDataInfoProvider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java index 0d5702f2b7..752f674a69 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java @@ -326,8 +326,8 @@ public Schema extractAvroSubSchema(Schema origin, String fieldName) { @Override public long getInputLastModificationTime(String inputUri) throws IOException { - FileSystem fs = FileSystem.get(new Configuration()); Path srcPath = new Path(inputUri); + FileSystem fs = srcPath.getFileSystem(new Configuration()); try { return fs.getFileStatus(srcPath).getModificationTime(); } catch (FileNotFoundException e) { From afb66650e07f298f916c83811d6bb75379abe33d Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Mon, 9 Oct 2023 17:31:48 +0200 Subject: [PATCH 02/15] use overridden hadoop config --- .../hadoop/DefaultInputDataInfoProvider.java | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java index 752f674a69..3ccb777469 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java @@ -1,7 +1,6 @@ package com.linkedin.venice.hadoop; -import static com.linkedin.venice.hadoop.VenicePushJob.DEFAULT_KEY_FIELD_PROP; -import static com.linkedin.venice.hadoop.VenicePushJob.DEFAULT_VALUE_FIELD_PROP; +import static com.linkedin.venice.hadoop.VenicePushJob.*; import static com.linkedin.venice.utils.ByteUtils.BYTES_PER_MB; import com.github.luben.zstd.ZstdDictTrainer; @@ -100,8 +99,8 @@ public class DefaultInputDataInfoProvider implements InputDataInfoProvider { @Override public InputDataInfo validateInputAndGetInfo(String inputUri) throws Exception { long inputModificationTime = getInputLastModificationTime(inputUri); - FileSystem fs = FileSystem.get(new Configuration()); Path srcPath = new Path(inputUri); + FileSystem fs = srcPath.getFileSystem(getConfiguration()); FileStatus[] fileStatuses = fs.listStatus(srcPath, PATH_FILTER); if (fileStatuses == null || fileStatuses.length == 0) { @@ -324,10 +323,25 @@ public Schema extractAvroSubSchema(Schema origin, String fieldName) { return field.schema(); } + private Configuration getConfiguration() { + Configuration conf = new Configuration(); + for (String key: props.keySet()) { + if (key.startsWith(HADOOP_PREFIX)) { + String hadoopKey = key.substring(HADOOP_PREFIX.length()); + if (conf.get(hadoopKey) != null) { + LOGGER.warn("Hadoop configuration {} is overwritten by {}", hadoopKey, key); + } + conf.set(hadoopKey, props.getString(key)); + } + } + LOGGER.info("Hadoop configuration: {} {}", conf.get("fs.s3.awsAccessKeyId"), conf.get("fs.s3.awsSecretAccessKey")); + return conf; + } + @Override public long getInputLastModificationTime(String inputUri) throws IOException { Path srcPath = new Path(inputUri); - FileSystem fs = srcPath.getFileSystem(new Configuration()); + FileSystem fs = srcPath.getFileSystem(getConfiguration()); try { return fs.getFileStatus(srcPath).getModificationTime(); } catch (FileNotFoundException e) { From e32b1ebd66933d8485b0d1b7b6b0a9cefe701770 Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Mon, 9 Oct 2023 17:49:26 +0200 Subject: [PATCH 03/15] fix-imports --- .../linkedin/venice/hadoop/DefaultInputDataInfoProvider.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java index 3ccb777469..834a56ca00 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java @@ -1,6 +1,8 @@ package com.linkedin.venice.hadoop; -import static com.linkedin.venice.hadoop.VenicePushJob.*; +import static com.linkedin.venice.hadoop.VenicePushJob.DEFAULT_KEY_FIELD_PROP; +import static com.linkedin.venice.hadoop.VenicePushJob.DEFAULT_VALUE_FIELD_PROP; +import static com.linkedin.venice.hadoop.VenicePushJob.HADOOP_PREFIX; import static com.linkedin.venice.utils.ByteUtils.BYTES_PER_MB; import com.github.luben.zstd.ZstdDictTrainer; From 2aff43c548158bd4c8e170d49cbc76cc9a7c97b1 Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Tue, 10 Oct 2023 16:24:21 +0200 Subject: [PATCH 04/15] Bump hadoop --- build.gradle | 7 ++++--- clients/venice-push-job/build.gradle | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index 05f12338d3..d2d4544839 100644 --- a/build.gradle +++ b/build.gradle @@ -65,7 +65,8 @@ ext.libraries = [ d2: 'com.linkedin.pegasus:d2:' + pegasusVersion, failsafe: 'net.jodah:failsafe:2.4.0', fastUtil: 'it.unimi.dsi:fastutil:8.3.0', - hadoopCommon: 'org.apache.hadoop:hadoop-common:2.3.0', + hadoopCommon: 'org.apache.hadoop:hadoop-common:2.6.0', + hadoopAws: 'org.apache.hadoop:hadoop-aws:2.6.0', helix: 'org.apache.helix:helix-core:1.0.4', httpAsyncClient: 'org.apache.httpcomponents:httpasyncclient:4.1.2', httpClient5: 'org.apache.httpcomponents.client5:httpclient5:5.2.1', @@ -88,8 +89,8 @@ ext.libraries = [ log4j2api: 'org.apache.logging.log4j:log4j-api:' + log4j2Version, log4j2core: 'org.apache.logging.log4j:log4j-core:' + log4j2Version, mail: 'javax.mail:mail:1.4.4', - mapreduceClientCore: 'org.apache.hadoop:hadoop-mapreduce-client-core:2.3.0', - mapreduceClientJobClient: 'org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.3.0', + mapreduceClientCore: 'org.apache.hadoop:hadoop-mapreduce-client-core:2.6.0', + mapreduceClientJobClient: 'org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.6.0', mockito: 'org.mockito:mockito-core:3.3.3', netty: 'io.netty:netty-all:4.1.52.Final', oss: 'org.sonatype.oss:oss-parent:7', diff --git a/clients/venice-push-job/build.gradle b/clients/venice-push-job/build.gradle index d0ab11e689..99306b60d4 100644 --- a/clients/venice-push-job/build.gradle +++ b/clients/venice-push-job/build.gradle @@ -26,6 +26,12 @@ dependencies { exclude group: 'javax.servlet' } + implementation (libraries.hadoopAws){ + // Exclude transitive dependency + exclude group: 'org.apache.avro' + exclude group: 'javax.servlet' + } + implementation project(':clients:venice-thin-client') // Needed by the Post Bulk-load Analysis Job implementation libraries.commonsIo From 34fa80e13407cd5f55a18118f711e5e28fcf0dca Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Wed, 18 Oct 2023 17:39:10 +0200 Subject: [PATCH 05/15] Bump hadoop only --- build.gradle | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/build.gradle b/build.gradle index f93565cda1..edb784e8af 100644 --- a/build.gradle +++ b/build.gradle @@ -49,6 +49,7 @@ def jacksonVersion = '2.13.3' def pulsarGroup = 'org.apache.pulsar' def pulsarVersion = '2.10.3' def alpnAgentVersion = '2.0.10' +def hadoopVersion = '2.6.0' ext.libraries = [ alpnAgent: "org.mortbay.jetty.alpn:jetty-alpn-agent:${alpnAgentVersion}", @@ -71,7 +72,7 @@ ext.libraries = [ d2: "com.linkedin.pegasus:d2:${pegasusVersion}", failsafe: 'net.jodah:failsafe:2.4.0', fastUtil: 'it.unimi.dsi:fastutil:8.3.0', - hadoopCommon: 'org.apache.hadoop:hadoop-common:2.3.0', + hadoopCommon: "org.apache.hadoop:hadoop-common:${hadoopVersion}", helix: 'org.apache.helix:helix-core:1.1.0', httpAsyncClient: 'org.apache.httpcomponents:httpasyncclient:4.1.2', httpClient5: 'org.apache.httpcomponents.client5:httpclient5:5.2.1', @@ -94,8 +95,8 @@ ext.libraries = [ log4j2api: "org.apache.logging.log4j:log4j-api:${log4j2Version}", log4j2core: "org.apache.logging.log4j:log4j-core:${log4j2Version}", mail: 'javax.mail:mail:1.4.4', - mapreduceClientCore: 'org.apache.hadoop:hadoop-mapreduce-client-core:2.3.0', - mapreduceClientJobClient: 'org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.3.0', + mapreduceClientCore: "org.apache.hadoop:hadoop-mapreduce-client-core:${hadoopVersion}", + mapreduceClientJobClient: "org.apache.hadoop:hadoop-mapreduce-client-jobclient:${hadoopVersion}", mockito: 'org.mockito:mockito-core:4.11.0', netty: 'io.netty:netty-all:4.1.52.Final', oss: 'org.sonatype.oss:oss-parent:7', From e4cb8e6611a2efcdcd2a304671cafb816076888c Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Wed, 18 Oct 2023 17:48:48 +0200 Subject: [PATCH 06/15] Make hadoop-aws runtime deps --- clients/venice-push-job/build.gradle | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/clients/venice-push-job/build.gradle b/clients/venice-push-job/build.gradle index 1fdf9073de..6198a34a85 100644 --- a/clients/venice-push-job/build.gradle +++ b/clients/venice-push-job/build.gradle @@ -26,12 +26,6 @@ dependencies { exclude group: 'javax.servlet' } - implementation (libraries.hadoopAws){ - // Exclude transitive dependency - exclude group: 'org.apache.avro' - exclude group: 'javax.servlet' - } - implementation project(':clients:venice-thin-client') // Needed by the Post Bulk-load Analysis Job implementation libraries.commonsIo @@ -52,6 +46,7 @@ dependencies { runtimeOnly libraries.commonsLang runtimeOnly libraries.httpCore runtimeOnly libraries.httpClient + runtimeOnly libraries.hadoopAws } apply from: "$rootDir/gradle/helper/publishing.gradle" From c7a2b1458ae2759f4f2057c9ea34d9550f3538a4 Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Wed, 18 Oct 2023 18:34:35 +0200 Subject: [PATCH 07/15] Bump to 2.7.2 https://stackoverflow.com/questions/36427291/illegalaccesserror-to-guavas-stopwatch-from-org-apache-hadoop-mapreduce-lib-inp --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 6a82fa866e..b45b2f39a3 100644 --- a/build.gradle +++ b/build.gradle @@ -49,7 +49,7 @@ def jacksonVersion = '2.13.3' def pulsarGroup = 'org.apache.pulsar' def pulsarVersion = '2.10.3' def alpnAgentVersion = '2.0.10' -def hadoopVersion = '2.6.0' +def hadoopVersion = '2.7.2' ext.libraries = [ alpnAgent: "org.mortbay.jetty.alpn:jetty-alpn-agent:${alpnAgentVersion}", From 406d6ba23defe5cb598482fb74bc62122e34a797 Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Mon, 23 Oct 2023 13:27:07 +0200 Subject: [PATCH 08/15] Cleanup and added tests --- .../hadoop/DefaultInputDataInfoProvider.java | 13 ++----------- .../com/linkedin/venice/hadoop/VenicePushJob.java | 7 ++----- .../linkedin/venice/hadoop/utils/HadoopUtils.java | 13 +++++++++++++ .../venice/hadoop/utils/TestHadoopUtils.java | 14 ++++++++++++++ 4 files changed, 31 insertions(+), 16 deletions(-) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java index 834a56ca00..27f9b06fab 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java @@ -2,7 +2,6 @@ import static com.linkedin.venice.hadoop.VenicePushJob.DEFAULT_KEY_FIELD_PROP; import static com.linkedin.venice.hadoop.VenicePushJob.DEFAULT_VALUE_FIELD_PROP; -import static com.linkedin.venice.hadoop.VenicePushJob.HADOOP_PREFIX; import static com.linkedin.venice.utils.ByteUtils.BYTES_PER_MB; import com.github.luben.zstd.ZstdDictTrainer; @@ -10,6 +9,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.hadoop.exceptions.VeniceInconsistentSchemaException; import com.linkedin.venice.hadoop.exceptions.VeniceSchemaFieldNotFoundException; +import com.linkedin.venice.hadoop.utils.HadoopUtils; import com.linkedin.venice.schema.vson.VsonAvroSchemaAdapter; import com.linkedin.venice.schema.vson.VsonSchema; import com.linkedin.venice.utils.Pair; @@ -327,16 +327,7 @@ public Schema extractAvroSubSchema(Schema origin, String fieldName) { private Configuration getConfiguration() { Configuration conf = new Configuration(); - for (String key: props.keySet()) { - if (key.startsWith(HADOOP_PREFIX)) { - String hadoopKey = key.substring(HADOOP_PREFIX.length()); - if (conf.get(hadoopKey) != null) { - LOGGER.warn("Hadoop configuration {} is overwritten by {}", hadoopKey, key); - } - conf.set(hadoopKey, props.getString(key)); - } - } - LOGGER.info("Hadoop configuration: {} {}", conf.get("fs.s3.awsAccessKeyId"), conf.get("fs.s3.awsSecretAccessKey")); + HadoopUtils.setHadoopConfigurationFromProperties(conf, props); return conf; } diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java index 4ea2e123c8..5dfaccdc19 100755 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java @@ -347,7 +347,7 @@ public class VenicePushJob implements AutoCloseable { public static final String VALUE_SCHEMA_ID_PROP = "value.schema.id"; public static final String DERIVED_SCHEMA_ID_PROP = "derived.schema.id"; public static final String TOPIC_PROP = "venice.kafka.topic"; - protected static final String HADOOP_PREFIX = "hadoop-conf."; + public static final String HADOOP_PREFIX = "hadoop-conf."; protected static final String HADOOP_VALIDATE_SCHEMA_AND_BUILD_DICT_PREFIX = "hadoop-dict-build-conf."; public static final String SSL_PREFIX = "ssl"; @@ -948,6 +948,7 @@ public void run() { "The store {} is discovered in Venice cluster {}", pushJobSetting.storeName, pushJobSetting.clusterName); + HadoopUtils.setHadoopConfigurationFromProperties(jobConf, props); if (pushJobSetting.isSourceKafka) { initKIFRepushDetails(); @@ -3079,10 +3080,6 @@ protected void setupDefaultJobConf( for (String key: props.keySet()) { String lowerCase = key.toLowerCase(); - if (lowerCase.startsWith(HADOOP_PREFIX)) { - String overrideKey = key.substring(HADOOP_PREFIX.length()); - conf.set(overrideKey, props.getString(key)); - } for (String prefix: passThroughPrefixList) { if (lowerCase.startsWith(prefix)) { conf.set(key, props.getString(key)); diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java index 4a35474e60..655fd77e7d 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java @@ -1,5 +1,7 @@ package com.linkedin.venice.hadoop.utils; +import static com.linkedin.venice.hadoop.VenicePushJob.HADOOP_PREFIX; + import com.linkedin.venice.utils.VeniceProperties; import java.io.IOException; import java.util.Properties; @@ -52,6 +54,17 @@ public static void cleanUpHDFSPath(String path, boolean recursive) { } } + public static void setHadoopConfigurationFromProperties(Configuration conf, VeniceProperties props) { + for (String key: props.keySet()) { + String lowerCase = key.toLowerCase(); + if (lowerCase.startsWith(HADOOP_PREFIX)) { + String overrideKey = key.substring(HADOOP_PREFIX.length()); + conf.set(overrideKey, props.getString(key)); + LOGGER.info("Hadoop configuration {} is overwritten by {}", overrideKey, key); + } + } + } + private HadoopUtils() { } } diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java index ad3cd9c2b1..937f639203 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java @@ -1,6 +1,8 @@ package com.linkedin.venice.hadoop.utils; +import com.linkedin.venice.utils.VeniceProperties; import java.io.IOException; +import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -40,4 +42,16 @@ public void testCleanUpHDFSPath() throws IOException { // validate the path Assert.assertFalse(fs.exists(p)); } + + @Test + public void testSetHadoopConfigurationFromProperties() { + Configuration conf = new Configuration(); + Properties innerProps = new Properties(); + innerProps.setProperty("non.valid.key", "shouldn't exist"); + innerProps.setProperty("hadoop-conf.fs.s3a.access.key", "s3-key"); + VeniceProperties props = new VeniceProperties(innerProps); + HadoopUtils.setHadoopConfigurationFromProperties(conf, props); + Assert.assertEquals(conf.get("fs.s3a.access.key"), "s3-key"); + Assert.assertNull(conf.get("non.valid.key")); + } } From 51e89bc2bffff2d3332aea5e968a3dc83a09e173 Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Mon, 23 Oct 2023 13:35:59 +0200 Subject: [PATCH 09/15] add docs --- .../java/com/linkedin/venice/hadoop/utils/HadoopUtils.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java index 655fd77e7d..9f6d415e02 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java @@ -54,6 +54,11 @@ public static void cleanUpHDFSPath(String path, boolean recursive) { } } + /** + * Set Hadoop configuration from Venice properties. + * @param conf Configuration to be updated + * @param props Venice properties + */ public static void setHadoopConfigurationFromProperties(Configuration conf, VeniceProperties props) { for (String key: props.keySet()) { String lowerCase = key.toLowerCase(); From c7639fd2d86df7e163149808f8736affc44266ed Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Mon, 23 Oct 2023 14:38:40 +0200 Subject: [PATCH 10/15] remove aws dependancy --- build.gradle | 1 - clients/venice-push-job/build.gradle | 1 - 2 files changed, 2 deletions(-) diff --git a/build.gradle b/build.gradle index 6c7455727a..a482e1fce9 100644 --- a/build.gradle +++ b/build.gradle @@ -73,7 +73,6 @@ ext.libraries = [ failsafe: 'net.jodah:failsafe:2.4.0', fastUtil: 'it.unimi.dsi:fastutil:8.3.0', hadoopCommon: "org.apache.hadoop:hadoop-common:${hadoopVersion}", - hadoopAws: "org.apache.hadoop:hadoop-aws:${hadoopVersion}", helix: 'org.apache.helix:helix-core:1.1.0', httpAsyncClient: 'org.apache.httpcomponents:httpasyncclient:4.1.2', httpClient5: 'org.apache.httpcomponents.client5:httpclient5:5.2.1', diff --git a/clients/venice-push-job/build.gradle b/clients/venice-push-job/build.gradle index 6198a34a85..67ee119af3 100644 --- a/clients/venice-push-job/build.gradle +++ b/clients/venice-push-job/build.gradle @@ -46,7 +46,6 @@ dependencies { runtimeOnly libraries.commonsLang runtimeOnly libraries.httpCore runtimeOnly libraries.httpClient - runtimeOnly libraries.hadoopAws } apply from: "$rootDir/gradle/helper/publishing.gradle" From 4eda8ecede2a47d56ac14f37e4e6a15d13d12e5e Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Wed, 25 Oct 2023 15:53:25 +0200 Subject: [PATCH 11/15] Bump hadoop 2.10.2 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index a482e1fce9..033b23caee 100644 --- a/build.gradle +++ b/build.gradle @@ -49,7 +49,7 @@ def jacksonVersion = '2.13.3' def pulsarGroup = 'org.apache.pulsar' def pulsarVersion = '2.10.3' def alpnAgentVersion = '2.0.10' -def hadoopVersion = '2.7.2' +def hadoopVersion = '2.10.2' ext.libraries = [ alpnAgent: "org.mortbay.jetty.alpn:jetty-alpn-agent:${alpnAgentVersion}", From e7f1dd6325fa7535124d7a371c1e0abd61016c35 Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Wed, 25 Oct 2023 16:26:00 +0200 Subject: [PATCH 12/15] always getFileSystem from path --- .../venice/hadoop/DefaultInputDataInfoProvider.java | 11 ++--------- .../hadoop/ValidateSchemaAndBuildDictMapper.java | 2 +- ...alidateSchemaAndBuildDictMapperOutputReader.java | 6 +++--- .../ValidateSchemaAndBuildDictOutputFormat.java | 2 +- .../venice/hadoop/VeniceFileInputRecordReader.java | 2 +- .../com/linkedin/venice/hadoop/VenicePushJob.java | 5 +++-- .../venice/hadoop/schema/HDFSRmdSchemaSource.java | 2 +- .../linkedin/venice/hadoop/utils/HadoopUtils.java | 2 +- .../venice/hadoop/utils/TestHadoopUtils.java | 2 +- .../com/linkedin/venice/zstd/TestZstdLibrary.java | 4 ++-- .../linkedin/venice/hadoop/TestVenicePushJob.java | 13 +++++++------ 11 files changed, 23 insertions(+), 28 deletions(-) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java index 27f9b06fab..a10d073704 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/DefaultInputDataInfoProvider.java @@ -9,7 +9,6 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.hadoop.exceptions.VeniceInconsistentSchemaException; import com.linkedin.venice.hadoop.exceptions.VeniceSchemaFieldNotFoundException; -import com.linkedin.venice.hadoop.utils.HadoopUtils; import com.linkedin.venice.schema.vson.VsonAvroSchemaAdapter; import com.linkedin.venice.schema.vson.VsonSchema; import com.linkedin.venice.utils.Pair; @@ -102,7 +101,7 @@ public class DefaultInputDataInfoProvider implements InputDataInfoProvider { public InputDataInfo validateInputAndGetInfo(String inputUri) throws Exception { long inputModificationTime = getInputLastModificationTime(inputUri); Path srcPath = new Path(inputUri); - FileSystem fs = srcPath.getFileSystem(getConfiguration()); + FileSystem fs = srcPath.getFileSystem(new Configuration()); FileStatus[] fileStatuses = fs.listStatus(srcPath, PATH_FILTER); if (fileStatuses == null || fileStatuses.length == 0) { @@ -325,16 +324,10 @@ public Schema extractAvroSubSchema(Schema origin, String fieldName) { return field.schema(); } - private Configuration getConfiguration() { - Configuration conf = new Configuration(); - HadoopUtils.setHadoopConfigurationFromProperties(conf, props); - return conf; - } - @Override public long getInputLastModificationTime(String inputUri) throws IOException { Path srcPath = new Path(inputUri); - FileSystem fs = srcPath.getFileSystem(getConfiguration()); + FileSystem fs = srcPath.getFileSystem(new Configuration()); try { return fs.getFileStatus(srcPath).getModificationTime(); } catch (FileNotFoundException e) { diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictMapper.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictMapper.java index a17abe399b..14ea1b4937 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictMapper.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictMapper.java @@ -276,8 +276,8 @@ protected void initInputData(JobConf job, VeniceProperties props) throws Excepti } try { - fileSystem = FileSystem.get(job); Path srcPath = new Path(inputDirectory); + fileSystem = srcPath.getFileSystem(job); fileStatuses = fileSystem.listStatus(srcPath, PATH_FILTER); } catch (IOException e) { /** Should not happen as this is already done in driver, unless there has diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictMapperOutputReader.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictMapperOutputReader.java index f383d3f4f2..0a29a22e2e 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictMapperOutputReader.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictMapperOutputReader.java @@ -39,17 +39,17 @@ public ValidateSchemaAndBuildDictMapperOutputReader(String outputDir, String fil ValidateSchemaAndBuildDictMapper.class.getSimpleName() + " output fileName should not be empty"); this.outputDir = outputDir; - String filePath = outputDir + "/" + fileName; + Path filePath = new Path(String.format("%s/%s", outputDir, fileName)); LOGGER.info( "Reading file {} to retrieve info persisted by {}", filePath, ValidateSchemaAndBuildDictMapper.class.getSimpleName()); Configuration conf = new Configuration(); - fs = FileSystem.get(conf); + fs = filePath.getFileSystem(conf); try { - inputStream = fs.open(new Path(filePath)); + inputStream = fs.open(filePath); avroDataFileStream = new DataFileStream(inputStream, new SpecificDatumReader(ValidateSchemaAndBuildDictMapperOutput.class)); try { diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictOutputFormat.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictOutputFormat.java index af5a5dea1f..24a03a104d 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictOutputFormat.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/ValidateSchemaAndBuildDictOutputFormat.java @@ -73,9 +73,9 @@ private static void createDirectoryWithPermission(FileSystem fs, Path path, Stri protected static void setValidateSchemaAndBuildDictionaryOutputDirPath(JobConf job) throws IOException { // parent directory: Common directory under which all the different push jobs // create their job specific directories. - FileSystem fs = FileSystem.get(job); String parentOutputDir = job.get(MAPPER_OUTPUT_DIRECTORY); Path outputPath = new Path(parentOutputDir); + FileSystem fs = outputPath.getFileSystem(job); createDirectoryWithPermission(fs, outputPath, "777"); // store+job specific unique directory under parent directory: already derived in VPJ driver diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VeniceFileInputRecordReader.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VeniceFileInputRecordReader.java index 4637506e5a..29c4960e03 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VeniceFileInputRecordReader.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VeniceFileInputRecordReader.java @@ -38,8 +38,8 @@ public VeniceFileInputRecordReader(InputSplit split, JobConf job) throws IOExcep protected int getTotalNumberOfFiles(String inputDirectory, JobConf job) throws IOException { Configuration conf = new Configuration(); - FileSystem fs = FileSystem.get(conf); Path srcPath = new Path(inputDirectory); + FileSystem fs = srcPath.getFileSystem(conf); FileStatus[] fileStatuses = fs.listStatus(srcPath, PATH_FILTER); // Path validity and length validity are already checked for the flow to be here, so not checking again return fileStatuses.length; diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java index 5dfaccdc19..4d1f267e36 100755 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java @@ -1055,7 +1055,7 @@ public void run() { pushJobSetting.repushTTLInSeconds = storeSetting.storeRewindTimeInSeconds; // make the base directory TEMP_DIR_PREFIX with 777 permissions Path baseSchemaDir = new Path(TEMP_DIR_PREFIX); - FileSystem fs = FileSystem.get(new Configuration()); + FileSystem fs = baseSchemaDir.getFileSystem(new Configuration()); if (!fs.exists(baseSchemaDir)) { fs.mkdirs(baseSchemaDir); fs.setPermission(baseSchemaDir, new FsPermission("777")); @@ -2001,8 +2001,9 @@ protected String getInputURI(VeniceProperties props) throws Exception { return ""; } Configuration conf = new Configuration(); - FileSystem fs = FileSystem.get(conf); String uri = props.getString(INPUT_PATH_PROP); + Path uriPath = new Path(uri); + FileSystem fs = uriPath.getFileSystem(conf); Path sourcePath = getLatestPathOfInputDirectory(uri, fs); return sourcePath.toString(); } diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/schema/HDFSRmdSchemaSource.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/schema/HDFSRmdSchemaSource.java index 20d93a648d..0ea3049007 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/schema/HDFSRmdSchemaSource.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/schema/HDFSRmdSchemaSource.java @@ -42,8 +42,8 @@ public class HDFSRmdSchemaSource implements RmdSchemaSource, AutoCloseable { public HDFSRmdSchemaSource(final String schemaDir, final String storeName) throws IOException { Configuration conf = new Configuration(); - this.fs = FileSystem.get(conf); this.schemaDir = new Path(schemaDir); + this.fs = this.schemaDir.getFileSystem(conf); if (!fs.exists(this.schemaDir)) { fs.mkdirs(this.schemaDir); } diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java index 9f6d415e02..29d60e3c9a 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java @@ -45,8 +45,8 @@ public static boolean shouldPathBeIgnored(org.apache.hadoop.fs.Path path) throws public static void cleanUpHDFSPath(String path, boolean recursive) { Configuration conf = new Configuration(); try { - FileSystem fs = FileSystem.get(conf); Path p = new Path(path); + FileSystem fs = p.getFileSystem(conf); fs.delete(p, recursive); fs.close(); } catch (IOException e) { diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java index 937f639203..3900b5b0ea 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java @@ -29,9 +29,9 @@ public void testShouldPathBeIgnored() throws IOException { public void testCleanUpHDFSPath() throws IOException { String path = "/tmp/venice-test/"; Configuration conf = new Configuration(); - FileSystem fs = FileSystem.get(conf); // create the path Path p = new Path(path); + FileSystem fs = p.getFileSystem(conf); if (!fs.exists(p)) { fs.mkdirs(p); } diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/zstd/TestZstdLibrary.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/zstd/TestZstdLibrary.java index d91ac30ca6..ac1fac71ca 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/zstd/TestZstdLibrary.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/zstd/TestZstdLibrary.java @@ -33,8 +33,9 @@ public class TestZstdLibrary { private void runTest(int numOfFiles, int numOfRecordsPerFile, int dictSizeLimitInKB, int dictSampleSizeLimitInMB) throws Exception { - FileSystem fs = FileSystem.get(new Configuration()); File inputDir = Utils.getTempDataDirectory(); + Path srcPath = new Path(inputDir.getAbsolutePath()); + FileSystem fs = srcPath.getFileSystem(new Configuration()); try { for (int i = 0; i < numOfFiles; i++) { writeSimpleAvroFileWithStringToStringSchema(inputDir, numOfRecordsPerFile, "testInput" + i + ".avro"); @@ -46,7 +47,6 @@ private void runTest(int numOfFiles, int numOfRecordsPerFile, int dictSizeLimitI PushJobZstdConfig pushJobZstdConfig = new PushJobZstdConfig(vProps, numOfFiles); - Path srcPath = new Path(inputDir.getAbsolutePath()); FileStatus[] fileStatuses = fs.listStatus(srcPath, PATH_FILTER); LOGGER.info("Collect maximum of {} Bytes from {} files", pushJobZstdConfig.getMaxBytesPerFile(), numOfFiles); for (FileStatus fileStatus: fileStatuses) { diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/hadoop/TestVenicePushJob.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/hadoop/TestVenicePushJob.java index bbb2b3a118..0d97094043 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/hadoop/TestVenicePushJob.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/hadoop/TestVenicePushJob.java @@ -58,6 +58,7 @@ import org.apache.avro.io.DatumWriter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -286,26 +287,26 @@ public void testRunJobByPickingUpLatestFolder() throws Exception { File inputDir_v2_file = new File(inputDir_v2, "v3.avro"); // Added to ensure lexically greater files do not get // resolved inputDir_v2_file.createNewFile(); - - FileSystem fs = FileSystem.get(new Configuration()); + String basePath = "file:" + inputDir.getAbsolutePath(); + FileSystem fs = new Path(basePath).getFileSystem(new Configuration()); Assert.assertEquals( - getLatestPathOfInputDirectory("file:" + inputDir.getAbsolutePath() + "/#LATEST", fs).toString(), + getLatestPathOfInputDirectory(basePath + "/#LATEST", fs).toString(), "file:" + inputDir_v2.getAbsolutePath(), "VenicePushJob should parse #LATEST to latest directory when it is in the last level in the input path"); Assert.assertEquals( - getLatestPathOfInputDirectory("file:" + inputDir.getAbsolutePath() + "/#LATEST/v1", fs).toString(), + getLatestPathOfInputDirectory(basePath + "/#LATEST/v1", fs).toString(), "file:" + inputDir_v2_v1.getAbsolutePath(), "VenicePushJob should parse #LATEST to latest directory when it is only in an intermediate level in the input path"); Assert.assertEquals( - getLatestPathOfInputDirectory("file:" + inputDir.getAbsolutePath() + "/#LATEST/#LATEST", fs).toString(), + getLatestPathOfInputDirectory(basePath + "/#LATEST/#LATEST", fs).toString(), "file:" + inputDir_v2_v2.getAbsolutePath(), "VenicePushJob should parse all occurrences of #LATEST to respective latest directories"); Assert.assertEquals( - getLatestPathOfInputDirectory("file:" + inputDir.getAbsolutePath() + "/#LATEST/#LATEST/", fs).toString(), + getLatestPathOfInputDirectory(basePath + "/#LATEST/#LATEST/", fs).toString(), "file:" + inputDir_v2_v2.getAbsolutePath(), "VenicePushJob should parse #LATEST to latest directory to respective latest directories"); } From f0e3f4d585bc2a989b4aad279918b0a9353eb2aa Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Wed, 25 Oct 2023 16:26:27 +0200 Subject: [PATCH 13/15] Revert "add docs" This reverts commit 51e89bc2bffff2d3332aea5e968a3dc83a09e173. --- .../java/com/linkedin/venice/hadoop/utils/HadoopUtils.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java index 29d60e3c9a..40c3188821 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java @@ -54,11 +54,6 @@ public static void cleanUpHDFSPath(String path, boolean recursive) { } } - /** - * Set Hadoop configuration from Venice properties. - * @param conf Configuration to be updated - * @param props Venice properties - */ public static void setHadoopConfigurationFromProperties(Configuration conf, VeniceProperties props) { for (String key: props.keySet()) { String lowerCase = key.toLowerCase(); From 217577c5b1dba63b801718cd5422981e6b5d07d7 Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Wed, 25 Oct 2023 16:28:47 +0200 Subject: [PATCH 14/15] revert unnecessary configs --- .../com/linkedin/venice/hadoop/VenicePushJob.java | 7 +++++-- .../linkedin/venice/hadoop/utils/HadoopUtils.java | 13 ------------- .../venice/hadoop/utils/TestHadoopUtils.java | 14 -------------- 3 files changed, 5 insertions(+), 29 deletions(-) diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java index 4d1f267e36..25cfd71954 100755 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/VenicePushJob.java @@ -347,7 +347,7 @@ public class VenicePushJob implements AutoCloseable { public static final String VALUE_SCHEMA_ID_PROP = "value.schema.id"; public static final String DERIVED_SCHEMA_ID_PROP = "derived.schema.id"; public static final String TOPIC_PROP = "venice.kafka.topic"; - public static final String HADOOP_PREFIX = "hadoop-conf."; + protected static final String HADOOP_PREFIX = "hadoop-conf."; protected static final String HADOOP_VALIDATE_SCHEMA_AND_BUILD_DICT_PREFIX = "hadoop-dict-build-conf."; public static final String SSL_PREFIX = "ssl"; @@ -948,7 +948,6 @@ public void run() { "The store {} is discovered in Venice cluster {}", pushJobSetting.storeName, pushJobSetting.clusterName); - HadoopUtils.setHadoopConfigurationFromProperties(jobConf, props); if (pushJobSetting.isSourceKafka) { initKIFRepushDetails(); @@ -3081,6 +3080,10 @@ protected void setupDefaultJobConf( for (String key: props.keySet()) { String lowerCase = key.toLowerCase(); + if (lowerCase.startsWith(HADOOP_PREFIX)) { + String overrideKey = key.substring(HADOOP_PREFIX.length()); + conf.set(overrideKey, props.getString(key)); + } for (String prefix: passThroughPrefixList) { if (lowerCase.startsWith(prefix)) { conf.set(key, props.getString(key)); diff --git a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java index 40c3188821..ce0b3c9772 100644 --- a/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java +++ b/clients/venice-push-job/src/main/java/com/linkedin/venice/hadoop/utils/HadoopUtils.java @@ -1,7 +1,5 @@ package com.linkedin.venice.hadoop.utils; -import static com.linkedin.venice.hadoop.VenicePushJob.HADOOP_PREFIX; - import com.linkedin.venice.utils.VeniceProperties; import java.io.IOException; import java.util.Properties; @@ -54,17 +52,6 @@ public static void cleanUpHDFSPath(String path, boolean recursive) { } } - public static void setHadoopConfigurationFromProperties(Configuration conf, VeniceProperties props) { - for (String key: props.keySet()) { - String lowerCase = key.toLowerCase(); - if (lowerCase.startsWith(HADOOP_PREFIX)) { - String overrideKey = key.substring(HADOOP_PREFIX.length()); - conf.set(overrideKey, props.getString(key)); - LOGGER.info("Hadoop configuration {} is overwritten by {}", overrideKey, key); - } - } - } - private HadoopUtils() { } } diff --git a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java index 3900b5b0ea..d9bdc9f871 100644 --- a/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java +++ b/clients/venice-push-job/src/test/java/com/linkedin/venice/hadoop/utils/TestHadoopUtils.java @@ -1,8 +1,6 @@ package com.linkedin.venice.hadoop.utils; -import com.linkedin.venice.utils.VeniceProperties; import java.io.IOException; -import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -42,16 +40,4 @@ public void testCleanUpHDFSPath() throws IOException { // validate the path Assert.assertFalse(fs.exists(p)); } - - @Test - public void testSetHadoopConfigurationFromProperties() { - Configuration conf = new Configuration(); - Properties innerProps = new Properties(); - innerProps.setProperty("non.valid.key", "shouldn't exist"); - innerProps.setProperty("hadoop-conf.fs.s3a.access.key", "s3-key"); - VeniceProperties props = new VeniceProperties(innerProps); - HadoopUtils.setHadoopConfigurationFromProperties(conf, props); - Assert.assertEquals(conf.get("fs.s3a.access.key"), "s3-key"); - Assert.assertNull(conf.get("non.valid.key")); - } } From 82461facdf7d317e629e365562713ce72d3d145b Mon Sep 17 00:00:00 2001 From: Moditha Hewasinghage Date: Wed, 25 Oct 2023 16:38:10 +0200 Subject: [PATCH 15/15] Bump docker image hadoop versions --- docker/venice-client/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/venice-client/Dockerfile b/docker/venice-client/Dockerfile index 67393090d2..82549864d8 100644 --- a/docker/venice-client/Dockerfile +++ b/docker/venice-client/Dockerfile @@ -6,9 +6,9 @@ RUN apt-get update RUN apt-get install netcat tree wget python3 -y RUN mkdir -p "${VENICE_DIR}/bin" RUN wget -O ${VENICE_DIR}/bin/avro-tools.jar https://repo1.maven.org/maven2/org/apache/avro/avro-tools/1.11.2/avro-tools-1.11.2.jar -RUN wget -O ${VENICE_DIR}/bin/hadoop-mapreduce-client-core.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-mapreduce-client-core/2.3.0/hadoop-mapreduce-client-core-2.3.0.jar -RUN wget -O ${VENICE_DIR}/bin/hadoop-mapreduce-client-common.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-mapreduce-client-common/2.3.0/hadoop-mapreduce-client-common-2.3.0.jar -RUN wget -O ${VENICE_DIR}/bin/hadoop-common.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-common/2.3.0/hadoop-common-2.3.0.jar +RUN wget -O ${VENICE_DIR}/bin/hadoop-mapreduce-client-core.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-mapreduce-client-core/2.10.2/hadoop-mapreduce-client-core-2.10.2.jar +RUN wget -O ${VENICE_DIR}/bin/hadoop-mapreduce-client-common.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-mapreduce-client-common/2.10.2/hadoop-mapreduce-client-common-2.10.2.jar +RUN wget -O ${VENICE_DIR}/bin/hadoop-common.jar https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-common/2.10.2/hadoop-common-2.10.2.jar WORKDIR ${VENICE_DIR} COPY venice-push-job-all.jar bin/venice-push-job-all.jar