diff --git a/java/vector/src/main/codegen/templates/VariableLengthVectors.java b/java/vector/src/main/codegen/templates/VariableLengthVectors.java index 3934e74f11b2d..4d0342b8db5d1 100644 --- a/java/vector/src/main/codegen/templates/VariableLengthVectors.java +++ b/java/vector/src/main/codegen/templates/VariableLengthVectors.java @@ -18,6 +18,7 @@ import java.lang.Override; +import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.drill.exec.exception.OutOfMemoryException; import org.apache.drill.exec.vector.BaseDataValueVector; import org.apache.drill.exec.vector.BaseValueVector; @@ -301,7 +302,18 @@ public void setInitialCapacity(final int valueCount) { allocationSizeInBytes = (int)size; offsetVector.setInitialCapacity(valueCount + 1); } + + @Override + public void setInitialCapacity(int valueCount, double density) { + long size = (long) (valueCount * density); + if (size > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed allocation size"); + } + allocationSizeInBytes = (int)size; + offsetVector.setInitialCapacity(valueCount + 1); + } + @Override public void allocateNew() { if(!allocateNewSafe()){ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java index 0ff3959d4bf5c..fecbe9879eb77 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseNullableVariableWidthVector.java @@ -186,6 +186,7 @@ public void setInitialCapacity(int valueCount) { * @param valueCount desired number of elements in the vector * @param density average number of bytes per variable width element */ + @Override public void setInitialCapacity(int valueCount, double density) { long size = (long) (valueCount * density); if (size > MAX_ALLOCATION_SIZE) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java b/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java new file mode 100644 index 0000000000000..9544b23cefddb --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +/** + * Vector that support density aware initial capacity settings. + */ +public interface DensityAwareVector { + /** + * Set value with density + * @param valueCount + * @param density + */ + void setInitialCapacity(int valueCount, double density); + +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java index 04c00b7c8349c..7182fa8476c9a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector; -public interface VariableWidthVector extends ValueVector { +public interface VariableWidthVector extends ValueVector, DensityAwareVector { /** * Allocate a new memory space for this vector. Must be called prior to using the ValueVector. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index db0ff86df47a9..c777618fdfbcb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -20,6 +20,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.types.Types.MinorType; @@ -33,7 +34,7 @@ * * This class implements common functionality of composite vectors. */ -public abstract class AbstractContainerVector implements ValueVector { +public abstract class AbstractContainerVector implements ValueVector, DensityAwareVector { static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class); protected final String name; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 1cd9b718523ee..c4414871eb814 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -25,6 +25,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.AddOrGetResult; import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; @@ -167,6 +168,7 @@ public void setInitialCapacity(int numRecords) { * This helps in tightly controlling the memory we provision * for inner data vector. */ + @Override public void setInitialCapacity(int numRecords, double density) { if ((numRecords * density) >= 2_000_000_000) { throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); @@ -178,7 +180,11 @@ public void setInitialCapacity(int numRecords, double density) { innerValueCapacity = 1; } - vector.setInitialCapacity(innerValueCapacity); + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector)vector).setInitialCapacity(innerValueCapacity, density); + } else { + vector.setInitialCapacity(innerValueCapacity); + } } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 8a46465253a4d..33698ca61a0a1 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -129,6 +129,7 @@ public void setInitialCapacity(int numRecords) { * This helps in tightly controlling the memory we provision * for inner data vector. */ + @Override public void setInitialCapacity(int numRecords, double density) { validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); super.setInitialCapacity(numRecords, density); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java index 6089a67924fe0..52395026939da 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java @@ -28,7 +28,6 @@ import javax.annotation.Nullable; -import com.google.common.base.Preconditions; import com.google.common.collect.Ordering; import com.google.common.primitives.Ints; @@ -101,6 +100,17 @@ public void setInitialCapacity(int numRecords) { } } + @Override + public void setInitialCapacity(int valueCount, double density) { + for (final ValueVector vector : (Iterable) this) { + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector)vector).setInitialCapacity(valueCount, density); + } else { + vector.setInitialCapacity(valueCount); + } + } + } + @Override public int getBufferSize() { if (valueCount == 0 || size() == 0) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java index 2709ab068c14f..c1880189712cf 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/NullableMapVector.java @@ -21,8 +21,6 @@ import static com.google.common.base.Preconditions.checkNotNull; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.List; import com.google.common.collect.ObjectArrays; @@ -348,6 +346,12 @@ public void setInitialCapacity(int numRecords) { super.setInitialCapacity(numRecords); } + @Override + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords); + super.setInitialCapacity(numRecords, density); + } + @Override public boolean allocateNewSafe() { /* Boolean to keep track if all the memory allocations were successful diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java index 91147c663f248..e107430f0ae36 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java @@ -18,6 +18,7 @@ package org.apache.arrow.vector.complex; +import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.UInt4Vector; import org.apache.arrow.vector.ValueVector; @@ -28,7 +29,7 @@ * Current design maintains data and offsets vectors. Each cell is stored in the data vector. Repeated vector * uses the offset vector to determine the sequence of cells pertaining to an individual value. */ -public interface RepeatedValueVector extends ValueVector { +public interface RepeatedValueVector extends ValueVector, DensityAwareVector { final static int DEFAULT_REPEAT_PER_RECORD = 5;