Skip to content

Commit

Permalink
propagate density awareness throughout the vector tree.
Browse files Browse the repository at this point in the history
  • Loading branch information
jacques-n authored and vkorukanti committed Feb 8, 2018
1 parent 80bc33c commit b46ba11
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 7 deletions.
12 changes: 12 additions & 0 deletions java/vector/src/main/codegen/templates/VariableLengthVectors.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.lang.Override;

import org.apache.arrow.vector.util.OversizedAllocationException;
import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.vector.BaseDataValueVector;
import org.apache.drill.exec.vector.BaseValueVector;
Expand Down Expand Up @@ -301,7 +302,18 @@ public void setInitialCapacity(final int valueCount) {
allocationSizeInBytes = (int)size;
offsetVector.setInitialCapacity(valueCount + 1);
}


@Override
public void setInitialCapacity(int valueCount, double density) {
long size = (long) (valueCount * density);
if (size > MAX_ALLOCATION_SIZE) {
throw new OversizedAllocationException("Requested amount of memory is more than max allowed allocation size");
}
allocationSizeInBytes = (int)size;
offsetVector.setInitialCapacity(valueCount + 1);
}

@Override
public void allocateNew() {
if(!allocateNewSafe()){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ public void setInitialCapacity(int valueCount) {
* @param valueCount desired number of elements in the vector
* @param density average number of bytes per variable width element
*/
@Override
public void setInitialCapacity(int valueCount, double density) {
long size = (long) (valueCount * density);
if (size > MAX_ALLOCATION_SIZE) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.vector;

/**
* Vector that support density aware initial capacity settings.
*/
public interface DensityAwareVector {
/**
* Set value with density
* @param valueCount
* @param density
*/
void setInitialCapacity(int valueCount, double density);

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

package org.apache.arrow.vector;

public interface VariableWidthVector extends ValueVector {
public interface VariableWidthVector extends ValueVector, DensityAwareVector {

/**
* Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.OutOfMemoryException;
import org.apache.arrow.vector.DensityAwareVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.types.Types.MinorType;
Expand All @@ -33,7 +34,7 @@
*
* This class implements common functionality of composite vectors.
*/
public abstract class AbstractContainerVector implements ValueVector {
public abstract class AbstractContainerVector implements ValueVector, DensityAwareVector {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class);

protected final String name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.AddOrGetResult;
import org.apache.arrow.vector.BaseValueVector;
import org.apache.arrow.vector.DensityAwareVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.UInt4Vector;
import org.apache.arrow.vector.ValueVector;
Expand Down Expand Up @@ -167,6 +168,7 @@ public void setInitialCapacity(int numRecords) {
* This helps in tightly controlling the memory we provision
* for inner data vector.
*/
@Override
public void setInitialCapacity(int numRecords, double density) {
if ((numRecords * density) >= 2_000_000_000) {
throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
Expand All @@ -178,7 +180,11 @@ public void setInitialCapacity(int numRecords, double density) {
innerValueCapacity = 1;
}

vector.setInitialCapacity(innerValueCapacity);
if (vector instanceof DensityAwareVector) {
((DensityAwareVector)vector).setInitialCapacity(innerValueCapacity, density);
} else {
vector.setInitialCapacity(innerValueCapacity);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ public void setInitialCapacity(int numRecords) {
* This helps in tightly controlling the memory we provision
* for inner data vector.
*/
@Override
public void setInitialCapacity(int numRecords, double density) {
validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
super.setInitialCapacity(numRecords, density);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@

import javax.annotation.Nullable;

import com.google.common.base.Preconditions;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Ints;

Expand Down Expand Up @@ -101,6 +100,17 @@ public void setInitialCapacity(int numRecords) {
}
}

@Override
public void setInitialCapacity(int valueCount, double density) {
for (final ValueVector vector : (Iterable<ValueVector>) this) {
if (vector instanceof DensityAwareVector) {
((DensityAwareVector)vector).setInitialCapacity(valueCount, density);
} else {
vector.setInitialCapacity(valueCount);
}
}
}

@Override
public int getBufferSize() {
if (valueCount == 0 || size() == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import static com.google.common.base.Preconditions.checkNotNull;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import com.google.common.collect.ObjectArrays;
Expand Down Expand Up @@ -348,6 +346,12 @@ public void setInitialCapacity(int numRecords) {
super.setInitialCapacity(numRecords);
}

@Override
public void setInitialCapacity(int numRecords, double density) {
validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
super.setInitialCapacity(numRecords, density);
}

@Override
public boolean allocateNewSafe() {
/* Boolean to keep track if all the memory allocations were successful
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.arrow.vector.complex;

import org.apache.arrow.vector.DensityAwareVector;
import org.apache.arrow.vector.UInt4Vector;
import org.apache.arrow.vector.ValueVector;

Expand All @@ -28,7 +29,7 @@
* Current design maintains data and offsets vectors. Each cell is stored in the data vector. Repeated vector
* uses the offset vector to determine the sequence of cells pertaining to an individual value.
*/
public interface RepeatedValueVector extends ValueVector {
public interface RepeatedValueVector extends ValueVector, DensityAwareVector {

final static int DEFAULT_REPEAT_PER_RECORD = 5;

Expand Down

0 comments on commit b46ba11

Please sign in to comment.