From 4785d8c8aea637a46474fea73ca44e5d6ca48857 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 20 Aug 2024 20:32:24 +0800 Subject: [PATCH] remove check it's not safe --- .../gluten/backendsapi/clickhouse/CHBackend.scala | 8 -------- .../backendsapi/clickhouse/CHSparkPlanExecApi.scala | 12 ------------ cpp-ch/local-engine/local_engine_jni.cpp | 2 +- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala index ffd9068b166a8..d0dbd98a88c2f 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala @@ -406,12 +406,4 @@ object CHBackendSettings extends BackendSettingsApi with Logging { } } } - - def getBroadcastThreshold: Long = { - val conf = SQLConf.get - conf - .getConf(SQLConf.ADAPTIVE_AUTO_BROADCASTJOIN_THRESHOLD) - .getOrElse(conf.autoBroadcastJoinThreshold) - } - } diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala index cb261e6e416c2..c09a066f0b026 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala @@ -540,18 +540,6 @@ class CHSparkPlanExecApi extends SparkPlanExecApi { CHExecUtil.buildSideRDD(dataSize, newChild).collect val batches = countsAndBytes.map(_._2) - val totalBatchesBytes = batches.map(_.length).sum - // totalBatchesBytes could be larger than the shuffle written bytes, so we double the threshold - // here. - if ( - totalBatchesBytes < 0 || - totalBatchesBytes.toLong > CHBackendSettings.getBroadcastThreshold * 2 - ) { - throw new GlutenException( - s"Cannot broadcast the table ($totalBatchesBytes) that is larger than threshold:" + - s" ${CHBackendSettings.getBroadcastThreshold}. Ensure the shuffle written" + - s"bytes is collected properly.") - } val rawSize = dataSize.value if (rawSize >= BroadcastExchangeExec.MAX_BROADCAST_TABLE_BYTES) { throw new GlutenException( diff --git a/cpp-ch/local-engine/local_engine_jni.cpp b/cpp-ch/local-engine/local_engine_jni.cpp index ce536799d94a3..9727fca1937d8 100644 --- a/cpp-ch/local-engine/local_engine_jni.cpp +++ b/cpp-ch/local-engine/local_engine_jni.cpp @@ -680,7 +680,7 @@ JNIEXPORT jobject Java_org_apache_gluten_vectorized_CHShuffleSplitterJniWrapper_ // AQE has dependency on total_bytes_written, if the data is wrong, it will generate inappropriate plan // add a log here for remining this. if (!result.total_bytes_written) - LOG_WARNING(getLogger("_CHShuffleSplitterJniWrapper"), "total_bytes_written is 0, something may be wrong"); + LOG_WARNING(getLogger("CHShuffleSplitterJniWrapper"), "total_bytes_written is 0, something may be wrong"); jobject split_result = env->NewObject( split_result_class,