Skip to content

Commit

Permalink
improve: reduce the cost of coping field
Browse files Browse the repository at this point in the history
  • Loading branch information
lgbo-ustc committed Nov 25, 2024
1 parent 4bd2b11 commit b534011
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3187,7 +3187,7 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr
)
compareResultsAgainstVanillaSpark(
"""
|select a, b, c, row_number() over (partition by a order by b desc nulls last) as r
|select a, b, c, row_number() over (partition by a order by b desc, c nulls last) as r
|from test_win_top
|""".stripMargin,
true,
Expand Down
26 changes: 22 additions & 4 deletions cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ struct RowNumGroupArraySortedData
const auto & pos = sort_order.pos;
const auto & asc = sort_order.direction;
const auto & nulls_first = sort_order.nulls_direction;
LOG_ERROR(getLogger("GroupLimitFunction"), "xxx pos: {} tuple size: {} {}", pos, rhs.size(), lhs.size());
bool l_is_null = lhs[pos].isNull();
bool r_is_null = rhs[pos].isNull();
if (l_is_null && r_is_null)
Expand Down Expand Up @@ -119,17 +120,25 @@ struct RowNumGroupArraySortedData
values[current_index] = current;
}

ALWAYS_INLINE void addElement(Data data, const SortOrderFields & sort_orders, size_t max_elements)
ALWAYS_INLINE void addElement(const Data & data, const SortOrderFields & sort_orders, size_t max_elements)
{
if (values.size() >= max_elements)
{
LOG_ERROR(
getLogger("GroupLimitFunction"),
"xxxx values size: {}, limit: {}, tuple size: {} {}",
values.size(),
max_elements,
data.size(),
values[0].size());
if (!compare(data, values[0], sort_orders))
return;
values[0] = std::move(data);
values[0] = data;
heapReplaceTop(sort_orders);
return;
}
values.push_back(std::move(data));
values.push_back(data);
LOG_ERROR(getLogger("GroupLimitFunction"), "add new element: {} {}", values.size(), values.back().size());
auto cmp = [&sort_orders](const Data & a, const Data & b) { return compare(a, b, sort_orders); };
std::push_heap(values.begin(), values.end(), cmp);
}
Expand Down Expand Up @@ -203,7 +212,16 @@ class RowNumGroupArraySorted final : public DB::IAggregateFunctionDataHelper<Row
{
auto & data = this->data(place);
DB::Tuple data_tuple = (*columns[0])[row_num].safeGet<DB::Tuple>();
this->data(place).addElement(std::move(data_tuple), sort_order_fields, limit);
// const DB::Tuple & data_tuple = *(static_cast<const DB::Tuple *>(&((*columns[0])[row_num])));
LOG_ERROR(
getLogger("GroupLimitFunction"),
"xxx col len: {}, row num: {}, tuple size: {}, type: {}",
columns[0]->size(),
row_num,
data_tuple.size(),
(*columns[0])[row_num].getType());
;
this->data(place).addElement(data_tuple, sort_order_fields, limit);
}

void merge(DB::AggregateDataPtr __restrict place, DB::ConstAggregateDataPtr rhs, DB::Arena * /*arena*/) const override
Expand Down

0 comments on commit b534011

Please sign in to comment.