From 2a80d3c523abb0b6c4bd79e77fd4d67010e2bfe6 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 21 Nov 2024 09:04:18 +0800 Subject: [PATCH] improve: reduce the cost of coping field --- ...enClickHouseTPCHSaltNullParquetSuite.scala | 2 +- .../GroupLimitFunctions.cpp | 26 ++++++++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala index 1707b9ef6d88..2214c79b20b4 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala @@ -3187,7 +3187,7 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr ) compareResultsAgainstVanillaSpark( """ - |select a, b, c, row_number() over (partition by a order by b desc nulls last) as r + |select a, b, c, row_number() over (partition by a order by b desc, c nulls last) as r |from test_win_top |""".stripMargin, true, diff --git a/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp b/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp index 8ca64b20b701..2884b687e335 100644 --- a/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp +++ b/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp @@ -72,6 +72,7 @@ struct RowNumGroupArraySortedData const auto & pos = sort_order.pos; const auto & asc = sort_order.direction; const auto & nulls_first = sort_order.nulls_direction; + LOG_ERROR(getLogger("GroupLimitFunction"), "xxx pos: {} tuple size: {} {}", pos, rhs.size(), lhs.size()); bool l_is_null = lhs[pos].isNull(); bool r_is_null = rhs[pos].isNull(); if (l_is_null && r_is_null) @@ -119,17 +120,25 @@ struct RowNumGroupArraySortedData values[current_index] = current; } - ALWAYS_INLINE void addElement(Data data, const SortOrderFields & sort_orders, size_t max_elements) + ALWAYS_INLINE void addElement(const Data & data, const SortOrderFields & sort_orders, size_t max_elements) { if (values.size() >= max_elements) { + LOG_ERROR( + getLogger("GroupLimitFunction"), + "xxxx values size: {}, limit: {}, tuple size: {} {}", + values.size(), + max_elements, + data.size(), + values[0].size()); if (!compare(data, values[0], sort_orders)) return; - values[0] = std::move(data); + values[0] = data; heapReplaceTop(sort_orders); return; } - values.push_back(std::move(data)); + values.push_back(data); + LOG_ERROR(getLogger("GroupLimitFunction"), "add new element: {} {}", values.size(), values.back().size()); auto cmp = [&sort_orders](const Data & a, const Data & b) { return compare(a, b, sort_orders); }; std::push_heap(values.begin(), values.end(), cmp); } @@ -203,7 +212,16 @@ class RowNumGroupArraySorted final : public DB::IAggregateFunctionDataHelperdata(place); DB::Tuple data_tuple = (*columns[0])[row_num].safeGet(); - this->data(place).addElement(std::move(data_tuple), sort_order_fields, limit); + // const DB::Tuple & data_tuple = *(static_cast(&((*columns[0])[row_num]))); + LOG_ERROR( + getLogger("GroupLimitFunction"), + "xxx col len: {}, row num: {}, tuple size: {}, type: {}", + columns[0]->size(), + row_num, + data_tuple.size(), + (*columns[0])[row_num].getType()); + ; + this->data(place).addElement(data_tuple, sort_order_fields, limit); } void merge(DB::AggregateDataPtr __restrict place, DB::ConstAggregateDataPtr rhs, DB::Arena * /*arena*/) const override