Skip to content

Commit

Permalink
Optimize RowContainer get fixed column width
Browse files Browse the repository at this point in the history
  • Loading branch information
jinchengchenghh committed Nov 25, 2024
1 parent 78d761b commit a3b54bc
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 15 deletions.
44 changes: 29 additions & 15 deletions velox/exec/RowContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,14 @@ RowContainer::RowContainer(
typeKinds_.push_back(type->kind());
types_.push_back(type);
offsets_.push_back(offset);
offset += typeKindSize(type->kind());
const auto typeSize = typeKindSize(type->kind());
offset += typeSize;
nullOffsets_.push_back(nullOffset);
if (type->isFixedWidth()) {
fixedColumnWidth_.push_back(typeSize);
} else {
fixedColumnWidth_.push_back(std::nullopt);
}
isVariableWidth |= !type->isFixedWidth();
if (nullableKeys_) {
++nullOffset;
Expand Down Expand Up @@ -216,6 +222,11 @@ RowContainer::RowContainer(
for (auto& type : dependentTypes) {
types_.push_back(type);
typeKinds_.push_back(type->kind());
if (type->isFixedWidth()) {
fixedColumnWidth_.push_back(typeKindSize(type->kind()));
} else {
fixedColumnWidth_.push_back(std::nullopt);
}
nullOffsets_.push_back(nullOffset);
++nullOffset;
isVariableWidth |= !type->isFixedWidth();
Expand Down Expand Up @@ -617,7 +628,8 @@ int32_t RowContainer::variableSizeAt(const char* row, column_index_t column)
}

int32_t RowContainer::fixedSizeAt(column_index_t column) const {
return typeKindSize(typeKinds_[column]);
VELOX_DCHECK(fixedColumnWidth_[column].has_value());
return fixedColumnWidth_[column].value();
}

int32_t RowContainer::extractVariableSizeAt(
Expand Down Expand Up @@ -707,9 +719,9 @@ void RowContainer::extractSerializedRows(
size_t fixedWidthRowSize = 0;
bool hasVariableWidth = false;
for (auto i = 0; i < types_.size(); ++i) {
const auto& type = types_[i];
if (type->isFixedWidth()) {
fixedWidthRowSize += typeKindSize(type->kind());
const auto width = fixedColumnWidth_[i];
if (width.has_value()) {
fixedWidthRowSize += width.value();
} else {
hasVariableWidth = true;
}
Expand Down Expand Up @@ -746,11 +758,11 @@ void RowContainer::extractSerializedRows(

// Copy values.
for (auto j = 0; j < types_.size(); ++j) {
const auto& type = types_[j];
if (type->isFixedWidth()) {
const auto size = typeKindSize(type->kind());
::memcpy(rawBuffer + offset, row + rowColumns_[j].offset(), size);
offset += size;
const auto width = fixedColumnWidth_[i];
if (width.has_value()) {
::memcpy(
rawBuffer + offset, row + rowColumns_[j].offset(), width.value());
offset += width.value();
} else {
auto size = extractVariableSizeAt(row, j, rawBuffer + offset);
offset += size;
Expand Down Expand Up @@ -778,11 +790,13 @@ void RowContainer::storeSerializedRow(

RowSizeTracker tracker(row[rowSizeOffset_], *stringAllocator_);
for (auto i = 0; i < types_.size(); ++i) {
const auto& type = types_[i];
if (type->isFixedWidth()) {
const auto size = typeKindSize(type->kind());
::memcpy(row + rowColumns_[i].offset(), serialized.data() + offset, size);
offset += size;
const auto width = fixedColumnWidth_[i];
if (width.has_value()) {
::memcpy(
row + rowColumns_[i].offset(),
serialized.data() + offset,
width.value());
offset += width.value();
} else {
const auto size = storeVariableSizeAt(serialized.data() + offset, row, i);
offset += size;
Expand Down
4 changes: 4 additions & 0 deletions velox/exec/RowContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,8 @@ class RowContainer {
// to 'typeKinds_' and 'rowColumns_'.
std::vector<TypePtr> types_;
std::vector<TypeKind> typeKinds_;
// Width of fixed size fields. std::nullopt for variable width fields.
std::vector<std::optional<int32_t>> fixedColumnWidth_;
int32_t nextOffset_ = 0;
// Indicates if this row container has rows with duplicate keys. This only
// applies if 'nextOffset_' is set.
Expand All @@ -1510,10 +1512,12 @@ class RowContainer {
// Offset and null indicator offset of non-aggregate fields as a single word.
// Corresponds pairwise to 'types_'.
std::vector<RowColumn> rowColumns_;

// Optional aggregated column stats(e.g. min/max size) for non-aggregate
// fields. Index aligns with 'rowColumns_'. Column stats will only be enabled
// if 'collectColumnStats_' is true.
std::vector<RowColumn::Stats> rowColumnsStats_;

// Bit offset of the probed flag for a full or right outer join payload. 0 if
// not applicable.
int32_t probedFlagOffset_ = 0;
Expand Down

0 comments on commit a3b54bc

Please sign in to comment.