From b323c4da714a1c0d6df20f417c319c9249fe3178 Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Mon, 8 Jul 2024 12:19:04 +0800 Subject: [PATCH] Refactor: reduce duplicated codes --- .../gluten/vectorized/BatchIterator.java | 18 +++--------------- .../gluten/vectorized/CHColumnVector.java | 2 +- .../gluten/vectorized/CHNativeBlock.java | 10 ++++------ 3 files changed, 8 insertions(+), 22 deletions(-) diff --git a/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/BatchIterator.java b/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/BatchIterator.java index 1fbb6053a2afc..1809ed26e6929 100644 --- a/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/BatchIterator.java +++ b/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/BatchIterator.java @@ -19,11 +19,8 @@ import org.apache.gluten.metrics.IMetrics; import org.apache.gluten.metrics.NativeMetrics; -import org.apache.spark.sql.execution.utils.CHExecUtil; -import org.apache.spark.sql.vectorized.ColumnVector; import org.apache.spark.sql.vectorized.ColumnarBatch; -import java.io.IOException; import java.util.concurrent.atomic.AtomicBoolean; public class BatchIterator extends GeneralOutIterator { @@ -43,8 +40,6 @@ public String getId() { private native boolean nativeHasNext(long nativeHandle); - private native byte[] nativeNext(long nativeHandle); - private native long nativeCHNext(long nativeHandle); private native void nativeClose(long nativeHandle); @@ -54,22 +49,15 @@ public String getId() { private native String nativeFetchMetrics(long nativeHandle); @Override - public boolean hasNextInternal() throws IOException { + public boolean hasNextInternal() { return nativeHasNext(handle); } @Override - public ColumnarBatch nextInternal() throws IOException { + public ColumnarBatch nextInternal() { long block = nativeCHNext(handle); CHNativeBlock nativeBlock = new CHNativeBlock(block); - int cols = nativeBlock.numColumns(); - ColumnVector[] columnVectors = new ColumnVector[cols]; - for (int i = 0; i < cols; i++) { - columnVectors[i] = - new CHColumnVector( - CHExecUtil.inferSparkDataType(nativeBlock.getTypeByPosition(i)), block, i); - } - return new ColumnarBatch(columnVectors, nativeBlock.numRows()); + return nativeBlock.toColumnarBatch(); } @Override diff --git a/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/CHColumnVector.java b/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/CHColumnVector.java index 9cbb242824a9c..88d6471c3cc13 100644 --- a/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/CHColumnVector.java +++ b/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/CHColumnVector.java @@ -25,7 +25,7 @@ public class CHColumnVector extends ColumnVector { private final int columnPosition; - private long blockAddress; + private final long blockAddress; public CHColumnVector(DataType type, long blockAddress, int columnPosition) { super(type); diff --git a/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/CHNativeBlock.java b/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/CHNativeBlock.java index f84327e7d9eb5..e3c51ae28583a 100644 --- a/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/CHNativeBlock.java +++ b/backends-clickhouse/src/main/java/org/apache/gluten/vectorized/CHNativeBlock.java @@ -90,15 +90,13 @@ public static void closeFromColumnarBatch(ColumnarBatch cb) { } public ColumnarBatch toColumnarBatch() { - ColumnVector[] vectors = new ColumnVector[numColumns()]; - for (int i = 0; i < numColumns(); i++) { + int numRows = numRows(); + int cols = numColumns(); + ColumnVector[] vectors = new ColumnVector[cols]; + for (int i = 0; i < cols; i++) { vectors[i] = new CHColumnVector(CHExecUtil.inferSparkDataType(getTypeByPosition(i)), blockAddress, i); } - int numRows = 0; - if (numColumns() != 0) { - numRows = numRows(); - } return new ColumnarBatch(vectors, numRows); } }