diff --git a/.gitignore b/.gitignore index 14b860244c22..2f47aad573e9 100644 --- a/.gitignore +++ b/.gitignore @@ -167,3 +167,6 @@ tests/queries/0_stateless/*.expect.history /rust/**/target # It is autogenerated from *.in /rust/**/.cargo/config.toml + +utils/local-engine/tests/testConfig.h +/utils/extern-local-engine diff --git a/CMakeLists.txt b/CMakeLists.txt index ce615c11f2b5..3c2f37f1eca2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -398,6 +398,8 @@ endif () include(cmake/dbms_glob_sources.cmake) +set (CMAKE_POSITION_INDEPENDENT_CODE ON) + add_library(global-group INTERFACE) if (OS_LINUX OR OS_ANDROID) include(cmake/linux/default_libs.cmake) @@ -434,7 +436,6 @@ endif () set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") -set (CMAKE_POSITION_INDEPENDENT_CODE OFF) if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X)) # Slightly more efficient code can be generated # It's disabled for ARM because otherwise ClickHouse cannot run on Android. @@ -497,8 +498,10 @@ macro (clickhouse_add_executable target) if (${type} STREQUAL EXECUTABLE) # disabled for TSAN and gcc since libtsan.a provides overrides too if (TARGET clickhouse_new_delete) - # operator::new/delete for executables (MemoryTracker stuff) - target_link_libraries (${target} PRIVATE clickhouse_new_delete) + if (NOT ${target} STREQUAL Git::Git) + # operator::new/delete for executables (MemoryTracker stuff) + target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES}) + endif() endif() # In case of static jemalloc, because zone_register() is located in zone.c and diff --git a/base/base/IPv4andIPv6.h b/base/base/IPv4andIPv6.h index 0e97d83b07ed..7b745ec7b847 100644 --- a/base/base/IPv4andIPv6.h +++ b/base/base/IPv4andIPv6.h @@ -51,3 +51,15 @@ namespace DB }; } + +namespace std +{ + template <> + struct hash + { + size_t operator()(const DB::IPv6 & x) const + { + return std::hash()(x.toUnderType()); + } + }; +} diff --git a/base/base/strong_typedef.h b/base/base/strong_typedef.h index 2ddea6412f56..b3b8bced688c 100644 --- a/base/base/strong_typedef.h +++ b/base/base/strong_typedef.h @@ -35,7 +35,7 @@ struct StrongTypedef Self & operator=(T && rhs) { t = std::move(rhs); return *this;} // NOLINTBEGIN(google-explicit-constructor) - operator const T & () const { return t; } + constexpr operator const T & () const { return t; } operator T & () { return t; } // NOLINTEND(google-explicit-constructor) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index b52b2eda992d..70dc9aeb5ffa 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54472) SET(VERSION_MAJOR 23) SET(VERSION_MINOR 3) -SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 52bf836e03a6ba7cf2d654eaaf73231701abc3a2) -SET(VERSION_DESCRIBE v23.3.1.2537-testing) -SET(VERSION_STRING 23.3.1.2537) +SET(VERSION_PATCH 3) +SET(VERSION_GITHASH 1b144bcd101ddf23466ba67e4fa0fd27afb9c060) +SET(VERSION_DESCRIBE v23.3.3.1-lts) +SET(VERSION_STRING 23.3.3.1) # end of autochange diff --git a/contrib/cctz b/contrib/cctz index 7c78edd52b4d..5e05432420f9 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d +Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2 diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index ce5bae2a0317..6cc20ef56476 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -46,10 +46,9 @@ ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable -RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - -RUN add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" - -RUN apt-get update \ +RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ + && add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" \ + && apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ docker-ce \ && rm -rf \ @@ -60,7 +59,7 @@ RUN apt-get update \ RUN dockerd --version; docker --version -RUN python3 -m pip install \ +RUN python3 -m pip install --no-cache-dir \ PyMySQL \ aerospike==4.0.0 \ avro==1.10.2 \ diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index c16b2bf10877..78cb1aeffaeb 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -16,7 +16,9 @@ echo '{ # and on hung you can simply press Ctrl-C and it will spawn a python pdb, # but on SIGINT dockerd will exit, so ignore it to preserve the daemon. trap '' INT -dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & +# Binding to an IP address without --tlsverify is deprecated. Startup is intentionally being slowed +# unless --tls=false or --tlsverify=false is set +dockerd --host=unix:///var/run/docker.sock --tls=false --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & set +e reties=0 diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 34c865e77521..cdecbbcc2e95 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -68,9 +68,9 @@ Result: ## mapFromArrays -Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). +Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing. -The function is a more convenient alternative to `CAST((key_array, value_array), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. +The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. **Syntax** @@ -82,11 +82,11 @@ Alias: `MAP_FROM_ARRAYS(keys, values)` **Arguments** - `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md) -- `values` - Given value array to create a map from. +- `values` - Given value array or map to create a map from. **Returned value** -- A map whose keys and values are constructed from the key and value arrays +- A map whose keys and values are constructed from the key array and value array/map. **Example** @@ -94,13 +94,17 @@ Query: ```sql select mapFromArrays(['a', 'b', 'c'], [1, 2, 3]) -``` - -```text + ┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐ │ {'a':1,'b':2,'c':3} │ └───────────────────────────────────────────┘ -``` + +SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3)) + +┌─mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))─┐ +│ {1:('a',1),2:('b',2),3:('c',3)} │ +└───────────────────────────────────────────────────────┘ +``` ## mapAdd diff --git a/programs/server/MetricsTransmitter.cpp b/programs/server/MetricsTransmitter.cpp index 2f28f0a1d162..ae9fa5ecc2c9 100644 --- a/programs/server/MetricsTransmitter.cpp +++ b/programs/server/MetricsTransmitter.cpp @@ -87,7 +87,7 @@ void MetricsTransmitter::transmit(std::vector & prev_count if (send_events) { - for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); const auto counter_increment = counter - prev_counters[i]; @@ -100,7 +100,7 @@ void MetricsTransmitter::transmit(std::vector & prev_count if (send_events_cumulative) { - for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i) + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); std::string key{ProfileEvents::getName(static_cast(i))}; @@ -110,7 +110,7 @@ void MetricsTransmitter::transmit(std::vector & prev_count if (send_metrics) { - for (size_t i = 0, end = CurrentMetrics::end(); i < end; ++i) + for (CurrentMetrics::Metric i = CurrentMetrics::Metric(0), end = CurrentMetrics::end(); i < end; ++i) { const auto value = CurrentMetrics::values[i].load(std::memory_order_relaxed); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 15f500b8bb60..67cfa3f73563 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -121,7 +121,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArraySample( void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory) { - AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; + AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = true }; factory.registerFunction("groupArray", { createAggregateFunctionGroupArray, properties }); factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties }); diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h index c97733571a37..19904dd92152 100644 --- a/src/AggregateFunctions/Helpers.h +++ b/src/AggregateFunctions/Helpers.h @@ -130,6 +130,8 @@ static IAggregateFunction * createWithNumericBasedType(const IDataType & argumen if (which.idx == TypeIndex::Date) return new AggregateFunctionTemplate(std::forward(args)...); if (which.idx == TypeIndex::DateTime) return new AggregateFunctionTemplate(std::forward(args)...); if (which.idx == TypeIndex::UUID) return new AggregateFunctionTemplate(std::forward(args)...); + if (which.idx == TypeIndex::IPv4) return new AggregateFunctionTemplate(std::forward(args)...); + if (which.idx == TypeIndex::IPv6) return new AggregateFunctionTemplate(std::forward(args)...); return nullptr; } diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index fd46b38ada85..2605b3a5966c 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -525,6 +525,35 @@ void ColumnAggregateFunction::insertDefault() pushBackAndCreateState(data, arena, func.get()); } +void ColumnAggregateFunction::insertRangeSelective( + const IColumn & from, const IColumn::Selector & selector, size_t selector_start, size_t length) +{ + const ColumnAggregateFunction & from_concrete = static_cast(from); + const auto & from_data = from_concrete.data; + if (!empty() && src.get() != &from_concrete) + { + ensureOwnership(); + Arena & arena = createOrGetArena(); + Arena * arena_ptr = &arena; + data.reserve(size() + length); + for (size_t i = 0; i < length; ++i) + { + pushBackAndCreateState(data, arena, func.get()); + func->merge(data.back(), from_data[selector[selector_start + i]], arena_ptr); + } + return; + } + /// Keep shared ownership of aggregation states. + src = from_concrete.getPtr(); + + size_t old_size = data.size(); + data.resize(old_size + length); + auto * data_start = data.data(); + size_t element_size = sizeof(data[0]); + for (size_t i = 0; i < length; ++i) + memcpy(data_start + old_size + i, &from_concrete.data[selector[selector_start + i]], element_size); +} + StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const { WriteBufferFromArena out(arena, begin); diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index f9ce45708c90..d4e5b785d43d 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -184,6 +184,8 @@ class ColumnAggregateFunction final : public COWHelper(src); + const Offsets & src_offsets = src_concrete.getOffsets(); + const IColumn & src_data = src_concrete.getData(); + IColumn & cur_data = getData(); + Offsets & cur_offsets = getOffsets(); + + size_t old_size = cur_offsets.size(); + size_t cur_size = old_size + length; + cur_data.reserve(cur_size); + cur_offsets.resize(cur_size); + + for (size_t i = 0; i < length; ++i) + { + size_t src_pos = selector[selector_start + i]; + size_t offset = src_offsets[src_pos - 1]; + size_t size = src_offsets[src_pos] - offset; + cur_data.insertRangeFrom(src_data, offset, size); + cur_offsets[old_size + i] = cur_offsets[old_size + i - 1] + size; // PaddedPODArray allows to use -1th element that will have value 0 + } +} + ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const { diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index e60c388831d9..b38e72682043 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -84,6 +84,7 @@ class ColumnArray final : public COWHelper void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insertRangeSelective(const IColumn & src, const Selector & selector, size_t selector_start, size_t length) override; void insert(const Field & x) override; void insertFrom(const IColumn & src_, size_t n) override; void insertDefault() override; @@ -149,6 +150,8 @@ class ColumnArray final : public COWHelper void gather(ColumnGathererStream & gatherer_stream) override; + bool canBeInsideNullable() const override { return true; } + ColumnPtr compress() const override; void forEachSubcolumn(ColumnCallback callback) const override diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index d747f8ef5f40..7d8122cc0369 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -85,6 +85,7 @@ class ColumnCompressed : public COWHelper bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); } void insert(const Field &) override { throwMustBeDecompressed(); } void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); } + void insertRangeSelective(const IColumn &, const Selector &, size_t, size_t) override { throwMustBeDecompressed(); } void insertData(const char *, size_t) override { throwMustBeDecompressed(); } void insertDefault() override { throwMustBeDecompressed(); } void popBack(size_t) override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index fcdcd2ce2244..30efc6f6545a 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -126,6 +126,11 @@ class ColumnConst final : public COWHelper s += length; } + void insertRangeSelective(const IColumn & /*src*/, const Selector & /*selector*/, size_t /*selector_start*/, size_t length) override + { + s += length; + } + void insert(const Field &) override { ++s; diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index d9fed8c87e56..188e8ec07169 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -261,6 +261,18 @@ void ColumnDecimal::insertRangeFrom(const IColumn & src, size_t start, size_t memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0])); } +template +void ColumnDecimal::insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) +{ + size_t old_size = data.size(); + data.resize(old_size + length); + const auto & src_data = (static_cast(src)).getData(); + for (size_t i = 0; i < length; ++i) + { + data[old_size + i] = src_data[selector[selector_start + i]]; + } +} + template ColumnPtr ColumnDecimal::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const { diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 03e0b9be5588..97d1d5efd771 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -62,6 +62,7 @@ class ColumnDecimal final : public COWHelper()); } void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) override; void popBack(size_t n) override { diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 1b7355d91f59..7f4effd9ab1b 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -202,6 +202,22 @@ void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_ memcpy(chars.data() + old_size, &src_concrete.chars[start * n], length * n); } +void ColumnFixedString::insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) +{ + const ColumnFixedString & src_concrete = static_cast(src); + + size_t old_size = chars.size(); + chars.resize(old_size + length * n); + auto * cur_data_end = chars.data() + old_size; + auto * src_data_start = src_concrete.chars.data(); + + for (size_t i = 0; i < length; ++i) + { + size_t src_pos = selector[selector_start + i]; + memcpySmallAllowReadWriteOverflow15(cur_data_end + i * n, src_data_start+ n * src_pos, n); + } +} + ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const { size_t col_size = size(); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 39497e3403ed..3809b4cd10ba 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -154,6 +154,8 @@ class ColumnFixedString final : public COWHelperinsertRangeSelective(assert_cast(src).getNestedColumn(), selector, selector_start, length); +} + ColumnPtr ColumnMap::filter(const Filter & filt, ssize_t result_size_hint) const { auto filtered = nested->filter(filt, result_size_hint); diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index a11905fcaa05..340fc84c9990 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -65,6 +65,7 @@ class ColumnMap final : public COWHelper void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insertRangeSelective(const IColumn & src, const Selector & selector, size_t selector_start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; @@ -72,6 +73,7 @@ class ColumnMap final : public COWHelper ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; void gather(ColumnGathererStream & gatherer_stream) override; + bool canBeInsideNullable() const override { return true; } int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index f70dac20a2a8..c107e9a750f3 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -184,6 +185,21 @@ void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t l getNestedColumn().insertRangeFrom(*nullable_col.nested_column, start, length); } +void ColumnNullable::insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) +{ + const ColumnNullable & nullable_col = static_cast(src); + getNestedColumn().insertRangeSelective(*nullable_col.nested_column, selector, selector_start, length); + + if (!memoryIsZero(nullable_col.getNullMapData().data(), 0, nullable_col.size())) + { + getNullMapColumn().insertRangeSelective(*nullable_col.null_map, selector, selector_start, length); + } + else + { + getNullMapColumn().insertManyDefaults(length); + } +} + void ColumnNullable::insert(const Field & x) { if (x.isNull()) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 47afd982b1e9..9b84a000f994 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -66,6 +66,7 @@ class ColumnNullable final : public COWHelper const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) override; void insert(const Field & x) override; void insertFrom(const IColumn & src, size_t n) override; @@ -128,6 +129,8 @@ class ColumnNullable final : public COWHelper void gather(ColumnGathererStream & gatherer_stream) override; + bool canBeInsideNullable() const override { return true; } + ColumnPtr compress() const override; void forEachSubcolumn(ColumnCallback callback) const override diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index b00600e17487..667a543f3de0 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -576,5 +576,41 @@ void ColumnString::validate() const "ColumnString validation failed: size mismatch (internal logical error) {} != {}", offsets.back(), chars.size()); } +void ColumnString::insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) +{ + const ColumnString & src_concrete = static_cast(src); + const Offsets & src_offsets = src_concrete.getOffsets(); + auto * src_data_start = src_concrete.chars.data(); + + Offsets & cur_offsets = getOffsets(); + + if (length == 0) + return; + + size_t old_offset_size = cur_offsets.size(); + cur_offsets.resize(old_offset_size + length); + + size_t old_chars_size = chars.size(); + size_t new_chars_size = old_chars_size; + for (size_t i = 0; i < length; ++i) + { + new_chars_size += src_concrete.sizeAt(selector[selector_start + i]); + } + chars.resize(new_chars_size); + + size_t cur_offset = cur_offsets[old_offset_size - 1]; + + auto * cur_chars_start = chars.data(); // realloc memory is not allowed in the following + for (size_t i = 0; i < length; ++i) + { + size_t src_pos = selector[selector_start + i]; + size_t offset = src_offsets[src_pos - 1]; + const size_t size_to_append = src_offsets[src_pos] - offset; /// -1th index is Ok, see PaddedPODArray. + + memcpySmallAllowReadWriteOverflow15(cur_chars_start + cur_offset, src_data_start + offset, size_to_append); + cur_offset += size_to_append; + cur_offsets[old_offset_size + i] = cur_offset; + } +} } diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 08c876a803d8..d25f8c15fead 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -193,6 +193,8 @@ class ColumnString final : public COWHelper void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) override; + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 369842c72819..d66ad9237212 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -233,6 +233,18 @@ void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t leng start, length); } +void ColumnTuple::insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) +{ + const ColumnTuple & src_concrete = static_cast(src); + + const size_t tuple_size = columns.size(); + if (src_concrete.columns.size() != tuple_size) + throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple"); + + for (size_t i = 0; i < tuple_size; ++i) + columns[i]->insertRangeSelective(*src_concrete.columns[i], selector, selector_start, length); +} + ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) const { const size_t tuple_size = columns.size(); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 281fd94d893b..53a8556fe618 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -68,6 +68,7 @@ class ColumnTuple final : public COWHelper void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + void insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; @@ -75,6 +76,7 @@ class ColumnTuple final : public COWHelper ColumnPtr replicate(const Offsets & offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; void gather(ColumnGathererStream & gatherer_stream) override; + bool canBeInsideNullable() const override { return true; } int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 3ed3ed73328a..b77a05e3a8cc 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -630,6 +630,18 @@ inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_ali } ) +template +void ColumnVector::insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) +{ +size_t old_size = data.size(); +data.resize(old_size + length); +const auto & src_data = (static_cast(src)).getData(); +for (size_t i = 0; i < length; ++i) +{ + data[old_size + i] = src_data[selector[selector_start + i]]; +} +} + template ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bf790423d1d8..d601998ef296 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -330,6 +330,8 @@ class ColumnVector final : public COWHelper> return this->template scatterImpl(num_columns, selector); } + void insertRangeSelective(const IColumn & src, const IColumn::Selector & selector, size_t selector_start, size_t length) override; + void gather(ColumnGathererStream & gatherer_stream) override; bool canBeInsideNullable() const override { return true; } diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index d777fd7b2406..84dd15432272 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -379,6 +379,18 @@ class IColumn : public COW using Selector = PaddedPODArray; [[nodiscard]] virtual std::vector scatter(ColumnIndex num_columns, const Selector & selector) const = 0; + /// This function will get row index from selector and append the data to this column. + /// This function will handle indexes start from input 'selector_start' and will append 'size' times + /// For example: + /// input selector: [1, 2, 3, 4, 5, 6] + /// selector_start: 2 + /// length: 3 + /// This function will copy the [3, 4, 5] row of src to this column. + virtual void insertRangeSelective(const IColumn & /*src*/, const Selector & /*selector*/, size_t /*selector_start*/, size_t /*length*/) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "insertRangeSelective is not implemented for {}", getName()); + } + /// Insert data from several other columns according to source mask (used in vertical merge). /// For now it is a helper to de-virtualize calls to insert*() functions inside gather loop /// (descendants should call gatherer_stream.gather(*this) to implement this function.) diff --git a/src/Common/Concepts.h b/src/Common/Concepts.h index b1bf591024d1..927f42aa4bea 100644 --- a/src/Common/Concepts.h +++ b/src/Common/Concepts.h @@ -5,6 +5,10 @@ namespace DB { +template +concept is_any_of = (std::same_as || ...); + + template concept OptionalArgument = requires(T &&...) { diff --git a/src/Common/CurrentMemoryTracker.cpp b/src/Common/CurrentMemoryTracker.cpp index 78a6c65f43c0..a8844728f887 100644 --- a/src/Common/CurrentMemoryTracker.cpp +++ b/src/Common/CurrentMemoryTracker.cpp @@ -36,6 +36,9 @@ MemoryTracker * getMemoryTracker() } using DB::current_thread; +thread_local std::function CurrentMemoryTracker::before_alloc = nullptr; + +thread_local std::function CurrentMemoryTracker::before_free = nullptr; void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) { @@ -52,11 +55,12 @@ void CurrentMemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded) if (current_thread) { Int64 will_be = current_thread->untracked_memory + size; - - if (will_be > current_thread->untracked_memory_limit) + if (will_be > current_thread->untracked_memory_limit) { - memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); current_thread->untracked_memory = 0; + if (before_alloc) + before_alloc(will_be, throw_if_memory_exceeded); + memory_tracker->allocImpl(will_be, throw_if_memory_exceeded); } else { @@ -106,6 +110,8 @@ void CurrentMemoryTracker::free(Int64 size) current_thread->untracked_memory -= size; if (current_thread->untracked_memory < -current_thread->untracked_memory_limit) { + if (before_free) + before_free(-current_thread->untracked_memory); memory_tracker->free(-current_thread->untracked_memory); current_thread->untracked_memory = 0; } diff --git a/src/Common/CurrentMemoryTracker.h b/src/Common/CurrentMemoryTracker.h index 2721d89d5645..5392b1e44760 100644 --- a/src/Common/CurrentMemoryTracker.h +++ b/src/Common/CurrentMemoryTracker.h @@ -17,6 +17,8 @@ struct CurrentMemoryTracker /// Throws MEMORY_LIMIT_EXCEEDED (if it's allowed to throw exceptions) static void injectFault(); + static thread_local std::function before_alloc; + static thread_local std::function before_free; private: static void allocImpl(Int64 size, bool throw_if_memory_exceeded); }; diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 542c48148c8b..cfe9f41befeb 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -126,6 +126,8 @@ M(DDLWorkerThreadsActive, "Number of threads in the DDLWORKER thread pool for ON CLUSTER queries running a task.") \ M(StorageDistributedThreads, "Number of threads in the StorageDistributed thread pool.") \ M(StorageDistributedThreadsActive, "Number of threads in the StorageDistributed thread pool running a task.") \ + M(DistributedInsertThreads, "Number of threads used for INSERT into Distributed.") \ + M(DistributedInsertThreadsActive, "Number of threads used for INSERT into Distributed running a task.") \ M(StorageS3Threads, "Number of threads in the StorageS3 thread pool.") \ M(StorageS3ThreadsActive, "Number of threads in the StorageS3 thread pool running a task.") \ M(MergeTreePartsLoaderThreads, "Number of threads in the MergeTree parts loader thread pool.") \ @@ -184,10 +186,10 @@ namespace CurrentMetrics { - #define M(NAME, DOCUMENTATION) extern const Metric NAME = __COUNTER__; + #define M(NAME, DOCUMENTATION) extern const Metric NAME = Metric(__COUNTER__); APPLY_FOR_METRICS(M) #undef M - constexpr Metric END = __COUNTER__; + constexpr Metric END = Metric(__COUNTER__); std::atomic values[END] {}; /// Global variable, initialized by zeros. diff --git a/src/Common/CurrentMetrics.h b/src/Common/CurrentMetrics.h index 0ae16e2d08d7..a1ef254485dd 100644 --- a/src/Common/CurrentMetrics.h +++ b/src/Common/CurrentMetrics.h @@ -6,6 +6,7 @@ #include #include #include +#include /** Allows to count number of simultaneously happening processes or current value of some metric. * - for high-level profiling. @@ -22,7 +23,7 @@ namespace CurrentMetrics { /// Metric identifier (index in array). - using Metric = size_t; + using Metric = StrongTypedef; using Value = DB::Int64; /// Get name of metric by identifier. Returns statically allocated string. diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index efdc43917dac..eb455c5f9e3a 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -344,6 +344,8 @@ DEFINE_HASH(DB::Int256) DEFINE_HASH(DB::Float32) DEFINE_HASH(DB::Float64) DEFINE_HASH(DB::UUID) +DEFINE_HASH(DB::IPv4) +DEFINE_HASH(DB::IPv6) #undef DEFINE_HASH diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 7ddcbc20b22e..8542ddae144e 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -75,7 +75,7 @@ template bool check(const T x) { return x == T{}; } template -void set(T & x) { x = {}; } +void set(T & x) { x = T{}; } } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 3cee4a8e7181..1d035952f13c 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -497,10 +497,10 @@ The server successfully detected this situation and will download merged part fr namespace ProfileEvents { -#define M(NAME, DOCUMENTATION) extern const Event NAME = __COUNTER__; +#define M(NAME, DOCUMENTATION) extern const Event NAME = Event(__COUNTER__); APPLY_FOR_EVENTS(M) #undef M -constexpr Event END = __COUNTER__; +constexpr Event END = Event(__COUNTER__); /// Global variable, initialized by zeros. Counter global_counters_array[END] {}; @@ -522,7 +522,7 @@ void Counters::resetCounters() { if (counters) { - for (Event i = 0; i < num_counters; ++i) + for (Event i = Event(0); i < num_counters; ++i) counters[i].store(0, std::memory_order_relaxed); } } @@ -540,7 +540,7 @@ Counters::Snapshot::Snapshot() Counters::Snapshot Counters::getPartiallyAtomicSnapshot() const { Snapshot res; - for (Event i = 0; i < num_counters; ++i) + for (Event i = Event(0); i < num_counters; ++i) res.counters_holder[i] = counters[i].load(std::memory_order_relaxed); return res; } @@ -616,7 +616,7 @@ CountersIncrement::CountersIncrement(Counters::Snapshot const & snapshot) CountersIncrement::CountersIncrement(Counters::Snapshot const & after, Counters::Snapshot const & before) { init(); - for (Event i = 0; i < Counters::num_counters; ++i) + for (Event i = Event(0); i < Counters::num_counters; ++i) increment_holder[i] = static_cast(after[i]) - static_cast(before[i]); } diff --git a/src/Common/ProfileEvents.h b/src/Common/ProfileEvents.h index 867b5b551c62..a36e68742cf5 100644 --- a/src/Common/ProfileEvents.h +++ b/src/Common/ProfileEvents.h @@ -1,7 +1,8 @@ #pragma once #include -#include "base/types.h" +#include +#include #include #include #include @@ -14,7 +15,7 @@ namespace ProfileEvents { /// Event identifier (index in array). - using Event = size_t; + using Event = StrongTypedef; using Count = size_t; using Increment = Int64; using Counter = std::atomic; diff --git a/src/Common/StatusInfo.cpp b/src/Common/StatusInfo.cpp index 32afc8330013..1f9ddfaf4b9e 100644 --- a/src/Common/StatusInfo.cpp +++ b/src/Common/StatusInfo.cpp @@ -8,10 +8,10 @@ namespace CurrentStatusInfo { - #define M(NAME, DOCUMENTATION, ENUM) extern const Status NAME = __COUNTER__; + #define M(NAME, DOCUMENTATION, ENUM) extern const Status NAME = Status(__COUNTER__); APPLY_FOR_STATUS(M) #undef M - constexpr Status END = __COUNTER__; + constexpr Status END = Status(__COUNTER__); std::mutex locks[END] {}; std::unordered_map values[END] {}; diff --git a/src/Common/StatusInfo.h b/src/Common/StatusInfo.h index 9aa185cd0c36..91e6d4d3b850 100644 --- a/src/Common/StatusInfo.h +++ b/src/Common/StatusInfo.h @@ -6,13 +6,14 @@ #include #include #include +#include #include #include namespace CurrentStatusInfo { - using Status = size_t; + using Status = StrongTypedef; using Key = std::string; const char * getName(Status event); diff --git a/src/Common/tests/gtest_thread_pool_limit.cpp b/src/Common/tests/gtest_thread_pool_limit.cpp index bc67ffd0bc16..17f79d17894f 100644 --- a/src/Common/tests/gtest_thread_pool_limit.cpp +++ b/src/Common/tests/gtest_thread_pool_limit.cpp @@ -1,16 +1,23 @@ #include #include #include +#include #include +namespace CurrentMetrics +{ + extern const Metric LocalThread; + extern const Metric LocalThreadActive; +} + /// Test for thread self-removal when number of free threads in pool is too large. /// Just checks that nothing weird happens. template int test() { - Pool pool(10, 2, 10); + Pool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, 10, 2, 10); std::atomic counter{0}; for (size_t i = 0; i < 10; ++i) diff --git a/src/Common/typeid_cast.h b/src/Common/typeid_cast.h index 1568d3809389..baee3aaf6327 100644 --- a/src/Common/typeid_cast.h +++ b/src/Common/typeid_cast.h @@ -18,9 +18,6 @@ namespace DB } } -template -concept is_any_of = (std::same_as || ...); - /** Checks type by comparing typeid. * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful. diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 41a6af542044..6b2696034f19 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -61,16 +61,10 @@ String getSHA1(const String & userdata) return String{digest_id.begin(), digest_id.end()}; } -String generateDigest(const String & userdata) -{ - std::vector user_password; - boost::split(user_password, userdata, [](char character) { return character == ':'; }); - return user_password[0] + ":" + base64Encode(getSHA1(userdata)); -} - bool fixupACL( const std::vector & request_acls, - const std::vector & current_ids, + int64_t session_id, + const KeeperStorage::UncommittedState & uncommitted_state, std::vector & result_acls) { if (request_acls.empty()) @@ -81,14 +75,18 @@ bool fixupACL( { if (request_acl.scheme == "auth") { - for (const auto & current_id : current_ids) - { - valid_found = true; - Coordination::ACL new_acl = request_acl; - new_acl.scheme = current_id.scheme; - new_acl.id = current_id.id; - result_acls.push_back(new_acl); - } + uncommitted_state.forEachAuthInSession( + session_id, + [&](const KeeperStorage::AuthID & auth_id) + { + valid_found = true; + Coordination::ACL new_acl = request_acl; + + new_acl.scheme = auth_id.scheme; + new_acl.id = auth_id.id; + + result_acls.push_back(new_acl); + }); } else if (request_acl.scheme == "world" && request_acl.id == "anyone") { @@ -564,6 +562,32 @@ Coordination::ACLs KeeperStorage::UncommittedState::getACLs(StringRef path) cons return storage.acl_map.convertNumber(node_it->value.acl_id); } +void KeeperStorage::UncommittedState::forEachAuthInSession(int64_t session_id, std::function func) const +{ + const auto call_for_each_auth = [&func](const auto & auth_ids) + { + for (const auto & auth : auth_ids) + { + using TAuth = std::remove_reference_t; + + const AuthID * auth_ptr = nullptr; + if constexpr (std::is_pointer_v) + auth_ptr = auth; + else + auth_ptr = &auth; + + func(*auth_ptr); + } + }; + + // for committed + if (storage.session_and_auth.contains(session_id)) + call_for_each_auth(storage.session_and_auth.at(session_id)); + // for uncommitted + if (session_and_auth.contains(session_id)) + call_for_each_auth(session_and_auth.at(session_id)); +} + namespace { @@ -927,7 +951,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADARGUMENTS}}; Coordination::ACLs node_acls; - if (!fixupACL(request.acls, storage.session_and_auth[session_id], node_acls)) + if (!fixupACL(request.acls, session_id, storage.uncommitted_state, node_acls)) return {KeeperStorage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; if (request.is_ephemeral) @@ -1533,10 +1557,8 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; - auto & session_auth_ids = storage.session_and_auth[session_id]; Coordination::ACLs node_acls; - - if (!fixupACL(request.acls, session_auth_ids, node_acls)) + if (!fixupACL(request.acls, session_id, uncommitted_state, node_acls)) return {KeeperStorage::Delta{zxid, Coordination::Error::ZINVALIDACL}}; std::vector new_deltas @@ -1840,7 +1862,7 @@ struct KeeperStorageAuthRequestProcessor final : public KeeperStorageRequestProc return {KeeperStorage::Delta{zxid, Coordination::Error::ZAUTHFAILED}}; std::vector new_deltas; - auto auth_digest = generateDigest(auth_request.data); + auto auth_digest = KeeperStorage::generateDigest(auth_request.data); if (auth_digest == storage.superdigest) { KeeperStorage::AuthID auth{"super", ""}; @@ -2420,5 +2442,12 @@ void KeeperStorage::recalculateStats() container.recalculateDataSize(); } +String KeeperStorage::generateDigest(const String & userdata) +{ + std::vector user_password; + boost::split(user_password, userdata, [](char character) { return character == ':'; }); + return user_password[0] + ":" + base64Encode(getSHA1(userdata)); +} + } diff --git a/src/Coordination/KeeperStorage.h b/src/Coordination/KeeperStorage.h index be528072df4d..cfacdfc84de5 100644 --- a/src/Coordination/KeeperStorage.h +++ b/src/Coordination/KeeperStorage.h @@ -105,6 +105,8 @@ class KeeperStorage return first.value == second.value; } + static String generateDigest(const String & userdata); + struct RequestForSession { int64_t session_id; @@ -263,6 +265,8 @@ class KeeperStorage return check_auth(auth_it->second); } + void forEachAuthInSession(int64_t session_id, std::function func) const; + std::shared_ptr tryGetNodeFromStorage(StringRef path) const; std::unordered_map> session_and_auth; diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 895d563327ee..b1bea8ddf248 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1579,6 +1579,113 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) } +TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitted) +{ + using namespace Coordination; + using namespace DB; + + ChangelogDirTest snapshots("./snapshots"); + CoordinationSettingsPtr settings = std::make_shared(); + ResponsesQueue queue(std::numeric_limits::max()); + SnapshotsQueue snapshots_queue{1}; + + auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr); + state_machine->init(); + + String user_auth_data = "test_user:test_password"; + String digest = KeeperStorage::generateDigest(user_auth_data); + + std::shared_ptr auth_req = std::make_shared(); + auth_req->scheme = "digest"; + auth_req->data = user_auth_data; + + // Add auth data to the session + auto auth_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), auth_req); + state_machine->pre_commit(1, auth_entry->get_buf()); + + // Create a node with 'auth' scheme for ACL + String node_path = "/hello"; + std::shared_ptr create_req = std::make_shared(); + create_req->path = node_path; + // When 'auth' scheme is used the creator must have been authenticated by the server (for example, using 'digest' scheme) before it can + // create nodes with this ACL. + create_req->acls = {{.permissions = 31, .scheme = "auth", .id = ""}}; + auto create_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), create_req); + state_machine->pre_commit(2, create_entry->get_buf()); + + const auto & uncommitted_state = state_machine->getStorage().uncommitted_state; + ASSERT_TRUE(uncommitted_state.nodes.contains(node_path)); + + // commit log entries + state_machine->commit(1, auth_entry->get_buf()); + state_machine->commit(2, create_entry->get_buf()); + + auto node = uncommitted_state.getNode(node_path); + ASSERT_NE(node, nullptr); + auto acls = uncommitted_state.getACLs(node_path); + ASSERT_EQ(acls.size(), 1); + EXPECT_EQ(acls[0].scheme, "digest"); + EXPECT_EQ(acls[0].id, digest); + EXPECT_EQ(acls[0].permissions, 31); +} + +TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) +{ + using namespace Coordination; + using namespace DB; + + ChangelogDirTest snapshots("./snapshots"); + CoordinationSettingsPtr settings = std::make_shared(); + ResponsesQueue queue(std::numeric_limits::max()); + SnapshotsQueue snapshots_queue{1}; + + auto state_machine = std::make_shared(queue, snapshots_queue, "./snapshots", settings, keeper_context, nullptr); + state_machine->init(); + + String user_auth_data = "test_user:test_password"; + String digest = KeeperStorage::generateDigest(user_auth_data); + + std::shared_ptr auth_req = std::make_shared(); + auth_req->scheme = "digest"; + auth_req->data = user_auth_data; + + // Add auth data to the session + auto auth_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), auth_req); + state_machine->pre_commit(1, auth_entry->get_buf()); + + // Create a node + String node_path = "/hello"; + std::shared_ptr create_req = std::make_shared(); + create_req->path = node_path; + auto create_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), create_req); + state_machine->pre_commit(2, create_entry->get_buf()); + + // Set ACL with 'auth' scheme for ACL + std::shared_ptr set_acl_req = std::make_shared(); + set_acl_req->path = node_path; + // When 'auth' scheme is used the creator must have been authenticated by the server (for example, using 'digest' scheme) before it can + // set this ACL. + set_acl_req->acls = {{.permissions = 31, .scheme = "auth", .id = ""}}; + auto set_acl_entry = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), set_acl_req); + state_machine->pre_commit(3, set_acl_entry->get_buf()); + + // commit all entries + state_machine->commit(1, auth_entry->get_buf()); + state_machine->commit(2, create_entry->get_buf()); + state_machine->commit(3, set_acl_entry->get_buf()); + + const auto & uncommitted_state = state_machine->getStorage().uncommitted_state; + auto node = uncommitted_state.getNode(node_path); + + ASSERT_NE(node, nullptr); + auto acls = uncommitted_state.getACLs(node_path); + ASSERT_EQ(acls.size(), 1); + EXPECT_EQ(acls[0].scheme, "digest"); + EXPECT_EQ(acls[0].id, digest); + EXPECT_EQ(acls[0].permissions, 31); +} + + TEST_P(CoordinationTest, TestRotateIntervalChanges) { using namespace Coordination; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e9db155fb123..6cd9e2c2d00d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -129,7 +129,7 @@ class IColumn; M(Bool, allow_suspicious_fixed_string_types, false, "In CREATE TABLE statement allows creating columns of type FixedString(n) with n > 256. FixedString with length >= 256 is suspicious and most likely indicates misusage", 0) \ M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \ M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \ - M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code.", 0) \ + M(Bool, compile_aggregate_expressions, false, "Compile aggregate functions to native code. This feature has a bug and should not be used.", 0) \ M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \ M(Bool, compile_sort_description, true, "Compile sort description to native code.", 0) \ M(UInt64, min_count_to_compile_sort_description, 3, "The number of identical sort descriptions before they are JIT-compiled", 0) \ diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 033a657c845d..7e1e8ab764ef 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -33,7 +33,7 @@ class DataTypeArray final : public IDataType bool canBeInsideNullable() const override { - return false; + return true; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeIPv4andIPv6.h b/src/DataTypes/DataTypeIPv4andIPv6.h index 0a0c61d0ad8e..ad70bdae9336 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.h +++ b/src/DataTypes/DataTypeIPv4andIPv6.h @@ -37,6 +37,7 @@ class DataTypeIPv4 : public IDataType bool textCanContainOnlyValidUTF8() const override { return true; } bool isComparable() const override { return true; } bool isValueRepresentedByNumber() const override { return true; } + bool isValueRepresentedByInteger() const override { return true; } bool isValueRepresentedByUnsignedInteger() const override { return true; } bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; } bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 2ab5c602a259..793866ff9aa1 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -31,7 +31,7 @@ class DataTypeMap final : public IDataType std::string doGetName() const override; const char * getFamilyName() const override { return "Map"; } - bool canBeInsideNullable() const override { return false; } + bool canBeInsideNullable() const override { return true; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 06d46fb15ede..cc0901453e6c 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -41,6 +41,7 @@ class DataTypeNullable final : public IDataType bool onlyNull() const override; bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); } bool canBePromoted() const override { return nested_data_type->canBePromoted(); } + bool canBeInsideNullable() const override { return true; } const DataTypePtr & getNestedType() const { return nested_data_type; } private: diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index 152f21015f5a..2dd8307726dc 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -34,7 +34,7 @@ class DataTypeTuple final : public IDataType std::string doGetName() const override; const char * getFamilyName() const override { return "Tuple"; } - bool canBeInsideNullable() const override { return false; } + bool canBeInsideNullable() const override { return true; } bool supportsSparseSerialization() const override { return true; } MutableColumnPtr createColumn() const override; diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 3c33289c3047..9d42d82ce91e 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -88,7 +88,7 @@ DataTypePtr getNumericType(const TypeIndexSet & types) maximize(max_bits_of_unsigned_integer, 8); else if (type == TypeIndex::UInt16) maximize(max_bits_of_unsigned_integer, 16); - else if (type == TypeIndex::UInt32) + else if (type == TypeIndex::UInt32 || type == TypeIndex::IPv4) maximize(max_bits_of_unsigned_integer, 32); else if (type == TypeIndex::UInt64) maximize(max_bits_of_unsigned_integer, 64); diff --git a/src/Databases/DDLLoadingDependencyVisitor.cpp b/src/Databases/DDLLoadingDependencyVisitor.cpp index 3a61f8216291..22dc0b4af7b6 100644 --- a/src/Databases/DDLLoadingDependencyVisitor.cpp +++ b/src/Databases/DDLLoadingDependencyVisitor.cpp @@ -115,10 +115,13 @@ void DDLLoadingDependencyVisitor::visit(const ASTStorage & storage, Data & data) { if (!storage.engine) return; - if (storage.engine->name != "Dictionary") - return; - extractTableNameFromArgument(*storage.engine, data, 0); + if (storage.engine->name == "Distributed") + /// Checks that dict* expression was used as sharding_key and builds dependency between the dictionary and current table. + /// Distributed(logs, default, hits[, sharding_key[, policy_name]]) + extractTableNameFromArgument(*storage.engine, data, 3); + else if (storage.engine->name == "Dictionary") + extractTableNameFromArgument(*storage.engine, data, 0); } @@ -131,7 +134,29 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction QualifiedTableName qualified_name; const auto * arg = function.arguments->as()->children[arg_idx].get(); - if (const auto * literal = arg->as()) + + if (const auto * dict_function = arg->as()) + { + if (!functionIsDictGet(dict_function->name)) + return; + + /// Get the dictionary name from `dict*` function. + const auto * literal_arg = dict_function->arguments->as()->children[0].get(); + const auto * dictionary_name = literal_arg->as(); + + if (!dictionary_name) + return; + + if (dictionary_name->value.getType() != Field::Types::String) + return; + + auto maybe_qualified_name = QualifiedTableName::tryParseFromString(dictionary_name->value.get()); + if (!maybe_qualified_name) + return; + + qualified_name = std::move(*maybe_qualified_name); + } + else if (const auto * literal = arg->as()) { if (literal->value.getType() != Field::Types::String) return; @@ -167,5 +192,4 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction } data.dependencies.emplace(std::move(qualified_name)); } - } diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 89a799349bfb..199bae4fbb41 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -188,7 +188,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String if (auto named_collection = tryGetNamedCollectionWithOverrides(arguments, context)) { - configuration = StorageMySQL::processNamedCollectionResult(*named_collection, *mysql_settings, false); + configuration = StorageMySQL::processNamedCollectionResult(*named_collection, *mysql_settings, context, false); } else { diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 82a2762e61e0..730217f96b79 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "readInvalidateQuery.h" @@ -37,7 +38,7 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } -static const std::unordered_set dictionary_allowed_keys = { +static const ValidateKeysMultiset dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "invalidate_query", "priority", @@ -69,13 +70,42 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) std::shared_ptr pool; MySQLSettings mysql_settings; - StorageMySQL::Configuration configuration; + std::optional dictionary_configuration; auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr; if (named_collection) { - named_collection->remove("name"); - configuration = StorageMySQL::processNamedCollectionResult(*named_collection, mysql_settings); - global_context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port)); + auto allowed_arguments{dictionary_allowed_keys}; + for (const auto & setting : mysql_settings.all()) + allowed_arguments.insert(setting.getName()); + validateNamedCollection>(*named_collection, {}, allowed_arguments); + + StorageMySQL::Configuration::Addresses addresses; + const auto addresses_expr = named_collection->getOrDefault("addresses_expr", ""); + if (addresses_expr.empty()) + { + const auto host = named_collection->getAnyOrDefault({"host", "hostname"}, ""); + const auto port = static_cast(named_collection->get("port")); + addresses = {std::make_pair(host, port)}; + } + else + { + size_t max_addresses = global_context->getSettingsRef().glob_expansion_max_elements; + addresses = parseRemoteDescriptionForExternalDatabase(addresses_expr, max_addresses, 3306); + } + + for (auto & address : addresses) + global_context->getRemoteHostFilter().checkHostAndPort(address.first, toString(address.second)); + + dictionary_configuration.emplace(MySQLDictionarySource::Configuration{ + .db = named_collection->getAnyOrDefault({"database", "db"}, ""), + .table = named_collection->getOrDefault("table", ""), + .query = named_collection->getOrDefault("query", ""), + .where = named_collection->getOrDefault("where", ""), + .invalidate_query = named_collection->getOrDefault("invalidate_query", ""), + .update_field = named_collection->getOrDefault("update_field", ""), + .update_lag = named_collection->getOrDefault("update_lag", 1), + .dont_check_update_time = named_collection->getOrDefault("dont_check_update_time", false), + }); const auto & settings = global_context->getSettingsRef(); if (!mysql_settings.isChanged("connect_timeout")) @@ -83,38 +113,42 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) if (!mysql_settings.isChanged("read_write_timeout")) mysql_settings.read_write_timeout = settings.external_storage_rw_timeout_sec; - pool = std::make_shared(createMySQLPoolWithFailover(configuration, mysql_settings)); - } - else - { - if (created_from_ddl) + for (const auto & setting : mysql_settings.all()) { - for (auto & address : configuration.addresses) - global_context->getRemoteHostFilter().checkHostAndPort(address.first, toString(address.second)); + const auto & setting_name = setting.getName(); + if (named_collection->has(setting_name)) + mysql_settings.set(setting_name, named_collection->get(setting_name)); } - configuration.database = config.getString(settings_config_prefix + ".db", ""); - configuration.table = config.getString(settings_config_prefix + ".table", ""); - pool = std::make_shared(mysqlxx::PoolFactory::instance().get(config, settings_config_prefix)); + pool = std::make_shared( + createMySQLPoolWithFailover( + dictionary_configuration->db, + addresses, + named_collection->getAnyOrDefault({"user", "username"}, ""), + named_collection->getOrDefault("password", ""), + mysql_settings)); + } + else + { + dictionary_configuration.emplace(MySQLDictionarySource::Configuration{ + .db = config.getString(settings_config_prefix + ".db", ""), + .table = config.getString(settings_config_prefix + ".table", ""), + .query = config.getString(settings_config_prefix + ".query", ""), + .where = config.getString(settings_config_prefix + ".where", ""), + .invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""), + .update_field = config.getString(settings_config_prefix + ".update_field", ""), + .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), + .dont_check_update_time = config.getBool(settings_config_prefix + ".dont_check_update_time", false) + }); + + pool = std::make_shared( + mysqlxx::PoolFactory::instance().get(config, settings_config_prefix)); } - auto query = config.getString(settings_config_prefix + ".query", ""); - if (query.empty() && configuration.table.empty()) + if (dictionary_configuration->query.empty() && dictionary_configuration->table.empty()) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL dictionary source configuration must contain table or query field"); - MySQLDictionarySource::Configuration dictionary_configuration - { - .db = configuration.database, - .table = configuration.table, - .query = query, - .where = config.getString(settings_config_prefix + ".where", ""), - .invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""), - .update_field = config.getString(settings_config_prefix + ".update_field", ""), - .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), - .dont_check_update_time = config.getBool(settings_config_prefix + ".dont_check_update_time", false) - }; - - return std::make_unique(dict_struct, dictionary_configuration, std::move(pool), sample_block, mysql_input_stream_settings); + return std::make_unique(dict_struct, *dictionary_configuration, std::move(pool), sample_block, mysql_input_stream_settings); #else throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Dictionary source of type `mysql` is disabled because ClickHouse was built without mysql support."); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 72346787cfbb..5eaee2e3026f 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -1184,7 +1184,7 @@ String CachedOnDiskReadBufferFromFile::getInfoForLog() implementation_buffer_read_range_str = "None"; String current_file_segment_info; - if (current_file_segment_it == file_segments_holder->file_segments.end()) + if (current_file_segment_it != file_segments_holder->file_segments.end()) current_file_segment_info = (*current_file_segment_it)->getInfoForLog(); else current_file_segment_info = "None"; diff --git a/src/Disks/TemporaryFileOnDisk.cpp b/src/Disks/TemporaryFileOnDisk.cpp index d31b09b2185f..6fe6fd5a1c9a 100644 --- a/src/Disks/TemporaryFileOnDisk.cpp +++ b/src/Disks/TemporaryFileOnDisk.cpp @@ -27,7 +27,7 @@ TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_) : TemporaryFileOnDisk(disk_, "") {} -TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Value metric_scope) +TemporaryFileOnDisk::TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope) : TemporaryFileOnDisk(disk_) { sub_metric_increment.emplace(metric_scope); diff --git a/src/Disks/TemporaryFileOnDisk.h b/src/Disks/TemporaryFileOnDisk.h index 9ba59c3eaf0a..4c376383087f 100644 --- a/src/Disks/TemporaryFileOnDisk.h +++ b/src/Disks/TemporaryFileOnDisk.h @@ -17,7 +17,7 @@ class TemporaryFileOnDisk { public: explicit TemporaryFileOnDisk(const DiskPtr & disk_); - explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Value metric_scope); + explicit TemporaryFileOnDisk(const DiskPtr & disk_, CurrentMetrics::Metric metric_scope); explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix); ~TemporaryFileOnDisk(); diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 053b461244b2..529e50921a1e 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -34,6 +34,7 @@ struct FormatSettings bool null_as_default = true; bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; + bool use_lowercase_column_name = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 100; diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index f832bf404a8b..28002d34acc4 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h new file mode 100644 index 000000000000..4727bb892bfe --- /dev/null +++ b/src/Functions/FunctionsJSON.h @@ -0,0 +1,1679 @@ +#pragma once + +#include +#include + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + + +#include "config.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +concept HasIndexOperator = requires (T t) +{ + t[0]; +}; + +/// Functions to parse JSONs and extract values from it. +/// The first argument of all these functions gets a JSON, +/// after that there are any number of arguments specifying path to a desired part from the JSON's root. +/// For example, +/// select JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) = -100 + +class FunctionJSONHelpers +{ +public: + template typename Impl, class JSONParser> + class Executor + { + public: + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) + { + MutableColumnPtr to{result_type->createColumn()}; + to->reserve(input_rows_count); + + if (arguments.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument", String(Name::name)); + + const auto & first_column = arguments[0]; + if (!isString(first_column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The first argument of function {} should be a string containing JSON, illegal type: " + "{}", String(Name::name), first_column.type->getName()); + + const ColumnPtr & arg_json = first_column.column; + const auto * col_json_const = typeid_cast(arg_json.get()); + const auto * col_json_string + = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); + + if (!col_json_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}", arg_json->getName()); + + const ColumnString::Chars & chars = col_json_string->getChars(); + const ColumnString::Offsets & offsets = col_json_string->getOffsets(); + + size_t num_index_arguments = Impl::getNumberOfIndexArguments(arguments); + std::vector moves = prepareMoves(Name::name, arguments, 1, num_index_arguments); + + /// Preallocate memory in parser if necessary. + JSONParser parser; + if constexpr (has_member_function_reserve::value) + { + size_t max_size = calculateMaxSize(offsets); + if (max_size) + parser.reserve(max_size); + } + + Impl impl; + + /// prepare() does Impl-specific preparation before handling each row. + if constexpr (has_member_function_prepare::*)(const char *, const ColumnsWithTypeAndName &, const DataTypePtr &)>::value) + impl.prepare(Name::name, arguments, result_type); + + using Element = typename JSONParser::Element; + + Element document; + bool document_ok = false; + if (col_json_const) + { + std::string_view json{reinterpret_cast(chars.data()), offsets[0] - 1}; + document_ok = parser.parse(json, document); + } + + for (const auto i : collections::range(0, input_rows_count)) + { + if (!col_json_const) + { + std::string_view json{reinterpret_cast(&chars[offsets[i - 1]]), offsets[i] - offsets[i - 1] - 1}; + document_ok = parser.parse(json, document); + } + + bool added_to_column = false; + if (document_ok) + { + /// Perform moves. + Element element; + std::string_view last_key; + bool moves_ok = performMoves(arguments, i, document, moves, element, last_key); + + if (moves_ok) + added_to_column = impl.insertResultToColumn(*to, element, last_key); + } + + /// We add default value (=null or zero) if something goes wrong, we don't throw exceptions in these JSON functions. + if (!added_to_column) + to->insertDefault(); + } + return to; + } + }; + +private: + BOOST_TTI_HAS_MEMBER_FUNCTION(reserve) + BOOST_TTI_HAS_MEMBER_FUNCTION(prepare) + + /// Represents a move of a JSON iterator described by a single argument passed to a JSON function. + /// For example, the call JSONExtractInt('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 1) + /// contains two moves: {MoveType::ConstKey, "b"} and {MoveType::ConstIndex, 1}. + /// Keys and indices can be nonconst, in this case they are calculated for each row. + enum class MoveType + { + Key, + Index, + ConstKey, + ConstIndex, + }; + + struct Move + { + explicit Move(MoveType type_, size_t index_ = 0) : type(type_), index(index_) {} + Move(MoveType type_, const String & key_) : type(type_), key(key_) {} + MoveType type; + size_t index = 0; + String key; + }; + + static std::vector prepareMoves( + const char * function_name, + const ColumnsWithTypeAndName & columns, + size_t first_index_argument, + size_t num_index_arguments) + { + std::vector moves; + moves.reserve(num_index_arguments); + for (const auto i : collections::range(first_index_argument, first_index_argument + num_index_arguments)) + { + const auto & column = columns[i]; + if (!isString(column.type) && !isNativeInteger(column.type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The argument {} of function {} should be a string specifying key " + "or an integer specifying index, illegal type: {}", + std::to_string(i + 1), String(function_name), column.type->getName()); + + if (column.column && isColumnConst(*column.column)) + { + const auto & column_const = assert_cast(*column.column); + if (isString(column.type)) + moves.emplace_back(MoveType::ConstKey, column_const.getValue()); + else + moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0)); + } + else + { + if (isString(column.type)) + moves.emplace_back(MoveType::Key, ""); + else + moves.emplace_back(MoveType::Index, 0); + } + } + return moves; + } + + + /// Performs moves of types MoveType::Index and MoveType::ConstIndex. + template + static bool performMoves(const ColumnsWithTypeAndName & arguments, size_t row, + const typename JSONParser::Element & document, const std::vector & moves, + typename JSONParser::Element & element, std::string_view & last_key) + { + typename JSONParser::Element res_element = document; + std::string_view key; + + for (size_t j = 0; j != moves.size(); ++j) + { + switch (moves[j].type) + { + case MoveType::ConstIndex: + { + if (!moveToElementByIndex(res_element, static_cast(moves[j].index), key)) + return false; + break; + } + case MoveType::ConstKey: + { + key = moves[j].key; + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + case MoveType::Index: + { + Int64 index = (*arguments[j + 1].column)[row].get(); + if (!moveToElementByIndex(res_element, static_cast(index), key)) + return false; + break; + } + case MoveType::Key: + { + key = (*arguments[j + 1].column).getDataAt(row).toView(); + if (!moveToElementByKey(res_element, key)) + return false; + break; + } + } + } + + element = res_element; + last_key = key; + return true; + } + + template + static bool moveToElementByIndex(typename JSONParser::Element & element, int index, std::string_view & out_key) + { + if (element.isArray()) + { + auto array = element.getArray(); + if (index >= 0) + --index; + else + index += array.size(); + + if (static_cast(index) >= array.size()) + return false; + element = array[index]; + out_key = {}; + return true; + } + + if constexpr (HasIndexOperator) + { + if (element.isObject()) + { + auto object = element.getObject(); + if (index >= 0) + --index; + else + index += object.size(); + + if (static_cast(index) >= object.size()) + return false; + std::tie(out_key, element) = object[index]; + return true; + } + } + + return {}; + } + + /// Performs moves of types MoveType::Key and MoveType::ConstKey. + template + static bool moveToElementByKey(typename JSONParser::Element & element, std::string_view key) + { + if (!element.isObject()) + return false; + auto object = element.getObject(); + return object.find(key, element); + } + + static size_t calculateMaxSize(const ColumnString::Offsets & offsets) + { + size_t max_size = 0; + for (const auto i : collections::range(0, offsets.size())) + { + size_t size = offsets[i] - offsets[i - 1]; + if (max_size < size) + max_size = size; + } + if (max_size) + --max_size; + return max_size; + } + +}; + + +template typename Impl> +class ExecutableFunctionJSON : public IExecutableFunction, WithContext +{ + +public: + explicit ExecutableFunctionJSON(const NullPresence & null_presence_, bool allow_simdjson_, const DataTypePtr & json_return_type_) + : null_presence(null_presence_), allow_simdjson(allow_simdjson_), json_return_type(json_return_type_) + { + } + + String getName() const override { return Name::name; } + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + if (null_presence.has_null_constant) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + if (null_presence.has_nullable) + return wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + return temporary_result; + } + +private: + + ColumnPtr + chooseAndRunJSONParser(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { +#if USE_SIMDJSON + if (allow_simdjson) + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +#endif + +#if USE_RAPIDJSON + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +#else + return FunctionJSONHelpers::Executor::run(arguments, result_type, input_rows_count); +#endif + } + + NullPresence null_presence; + bool allow_simdjson; + DataTypePtr json_return_type; +}; + + +template typename Impl> +class FunctionBaseFunctionJSON : public IFunctionBase +{ +public: + explicit FunctionBaseFunctionJSON( + const NullPresence & null_presence_, + bool allow_simdjson_, + DataTypes argument_types_, + DataTypePtr return_type_, + DataTypePtr json_return_type_) + : null_presence(null_presence_) + , allow_simdjson(allow_simdjson_) + , argument_types(std::move(argument_types_)) + , return_type(std::move(return_type_)) + , json_return_type(std::move(json_return_type_)) + { + } + + String getName() const override { return Name::name; } + + const DataTypes & getArgumentTypes() const override + { + return argument_types; + } + + const DataTypePtr & getResultType() const override + { + return return_type; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName &) const override + { + return std::make_unique>(null_presence, allow_simdjson, json_return_type); + } + +private: + NullPresence null_presence; + bool allow_simdjson; + DataTypes argument_types; + DataTypePtr return_type; + DataTypePtr json_return_type; +}; + + +/// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. +/// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. +template typename Impl> +class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext +{ +public: + static constexpr auto name = Name::name; + + String getName() const override { return name; } + + static FunctionOverloadResolverPtr create(ContextPtr context_) + { + return std::make_unique(context_); + } + + explicit JSONOverloadResolver(ContextPtr context_) : WithContext(context_) {} + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForNulls() const override { return false; } + + FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override + { + bool has_nothing_argument = false; + for (const auto & arg : arguments) + has_nothing_argument |= isNothing(arg.type); + + DataTypePtr json_return_type = Impl::getReturnType(Name::name, createBlockWithNestedColumns(arguments)); + NullPresence null_presence = getNullPresense(arguments); + DataTypePtr return_type; + if (has_nothing_argument) + return_type = std::make_shared(); + else if (null_presence.has_null_constant) + return_type = makeNullable(std::make_shared()); + else if (null_presence.has_nullable) + return_type = makeNullable(json_return_type); + else + return_type = json_return_type; + + /// Top-level LowCardinality columns are processed outside JSON parser. + json_return_type = removeLowCardinality(json_return_type); + + DataTypes argument_types; + argument_types.reserve(arguments.size()); + for (const auto & argument : arguments) + argument_types.emplace_back(argument.type); + return std::make_unique>( + null_presence, getContext()->getSettingsRef().allow_simdjson, argument_types, return_type, json_return_type); + } +}; + + +struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; +struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; +struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; +struct NameJSONKey { static constexpr auto name{"JSONKey"}; }; +struct NameJSONType { static constexpr auto name{"JSONType"}; }; +struct NameJSONExtractInt { static constexpr auto name{"JSONExtractInt"}; }; +struct NameJSONExtractUInt { static constexpr auto name{"JSONExtractUInt"}; }; +struct NameJSONExtractFloat { static constexpr auto name{"JSONExtractFloat"}; }; +struct NameJSONExtractBool { static constexpr auto name{"JSONExtractBool"}; }; +struct NameJSONExtractString { static constexpr auto name{"JSONExtractString"}; }; +struct NameJSONExtract { static constexpr auto name{"JSONExtract"}; }; +struct NameJSONExtractKeysAndValues { static constexpr auto name{"JSONExtractKeysAndValues"}; }; +struct NameJSONExtractRaw { static constexpr auto name{"JSONExtractRaw"}; }; +struct NameJSONExtractArrayRaw { static constexpr auto name{"JSONExtractArrayRaw"}; }; +struct NameJSONExtractKeysAndValuesRaw { static constexpr auto name{"JSONExtractKeysAndValuesRaw"}; }; +struct NameJSONExtractKeys { static constexpr auto name{"JSONExtractKeys"}; }; + + +template +class JSONHasImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared(); } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) + { + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class IsValidJSONImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() != 1) + { + /// IsValidJSON() shouldn't get parameters other than JSON. + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument", + String(function_name)); + } + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName &) { return 0; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view) + { + /// This function is called only if JSON is valid. + /// If JSON isn't valid then `FunctionJSON::Executor::run()` adds default value (=zero) to `dest` without calling this function. + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(1); + return true; + } +}; + + +template +class JSONLengthImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + size_t size; + if (element.isArray()) + size = element.getArray().size(); + else if (element.isObject()) + size = element.getObject().size(); + else + return false; + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(size); + return true; + } +}; + + +template +class JSONKeyImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element &, std::string_view last_key) + { + if (last_key.empty()) + return false; + ColumnString & col_str = assert_cast(dest); + col_str.insertData(last_key.data(), last_key.size()); + return true; + } +}; + + +template +class JSONTypeImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + static const std::vector> values = { + {"Array", '['}, + {"Object", '{'}, + {"String", '"'}, + {"Int64", 'i'}, + {"UInt64", 'u'}, + {"Double", 'd'}, + {"Bool", 'b'}, + {"Null", 0}, /// the default value for the column. + }; + return std::make_shared>(values); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + UInt8 type; + switch (element.type()) + { + case ElementType::INT64: + type = 'i'; + break; + case ElementType::UINT64: + type = 'u'; + break; + case ElementType::DOUBLE: + type = 'd'; + break; + case ElementType::STRING: + type = '"'; + break; + case ElementType::ARRAY: + type = '['; + break; + case ElementType::OBJECT: + type = '{'; + break; + case ElementType::BOOL: + type = 'b'; + break; + case ElementType::NULL_VALUE: + type = 0; + break; + } + + ColumnVector & col_vec = assert_cast &>(dest); + col_vec.insertValue(type); + return true; + } +}; + + +template +class JSONExtractNumericImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared>(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + NumberType value; + + switch (element.type()) + { + case ElementType::DOUBLE: + if constexpr (std::is_floating_point_v) + { + /// We permit inaccurate conversion of double to float. + /// Example: double 0.1 from JSON is not representable in float. + /// But it will be more convenient for user to perform conversion. + value = static_cast(element.getDouble()); + } + else if (!accurate::convertNumeric(element.getDouble(), value)) + return false; + break; + case ElementType::UINT64: + if (!accurate::convertNumeric(element.getUInt64(), value)) + return false; + break; + case ElementType::INT64: + if (!accurate::convertNumeric(element.getInt64(), value)) + return false; + break; + case ElementType::BOOL: + if constexpr (is_integer && convert_bool_to_integer) + { + value = static_cast(element.getBool()); + break; + } + return false; + case ElementType::STRING: + { + auto rb = ReadBufferFromMemory{element.getString()}; + if constexpr (std::is_floating_point_v) + { + if (!tryReadFloatText(value, rb) || !rb.eof()) + return false; + } + else + { + if (tryReadIntText(value, rb) && rb.eof()) + break; + + /// Try to parse float and convert it to integer. + Float64 tmp_float; + rb.position() = rb.buffer().begin(); + if (!tryReadFloatText(tmp_float, rb) || !rb.eof()) + return false; + + if (!accurate::convertNumeric(tmp_float, value)) + return false; + } + break; + } + default: + return false; + } + + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnLowCardinality & col_low = assert_cast(dest); + col_low.insertData(reinterpret_cast(&value), sizeof(value)); + } + else + { + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(value); + } + return true; + } +}; + + +template +using JSONExtractInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractUInt64Impl = JSONExtractNumericImpl; +template +using JSONExtractFloat64Impl = JSONExtractNumericImpl; + + +template +class JSONExtractBoolImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + bool value; + switch (element.type()) + { + case ElementType::BOOL: + value = element.getBool(); + break; + case ElementType::INT64: + value = element.getInt64() != 0; + break; + case ElementType::UINT64: + value = element.getUInt64() != 0; + break; + default: + return false; + } + + auto & col_vec = assert_cast &>(dest); + col_vec.insertValue(static_cast(value)); + return true; + } +}; + +template +class JSONExtractRawImpl; + +template +class JSONExtractStringImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + if (element.isNull()) + return false; + + if (!element.isString()) + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); + + auto str = element.getString(); + + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnLowCardinality & col_low = assert_cast(dest); + col_low.insertData(str.data(), str.size()); + } + else + { + ColumnString & col_str = assert_cast(dest); + col_str.insertData(str.data(), str.size()); + } + return true; + } +}; + +/// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. +template +struct JSONExtractTree +{ + using Element = typename JSONParser::Element; + + class Node + { + public: + Node() = default; + virtual ~Node() = default; + virtual bool insertResultToColumn(IColumn &, const Element &) = 0; + }; + + template + class NumericNode : public Node + { + public: + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + return JSONExtractNumericImpl::insertResultToColumn(dest, element, {}); + } + }; + + class LowCardinalityFixedStringNode : public Node + { + public: + explicit LowCardinalityFixedStringNode(const size_t fixed_length_) : fixed_length(fixed_length_) { } + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + // If element is an object we delegate the insertion to JSONExtractRawImpl + if (element.isObject()) + return JSONExtractRawImpl::insertResultToLowCardinalityFixedStringColumn(dest, element, fixed_length); + else if (!element.isString()) + return false; + + auto str = element.getString(); + if (str.size() > fixed_length) + return false; + + // For the non low cardinality case of FixedString, the padding is done in the FixedString Column implementation. + // In order to avoid having to pass the data to a FixedString Column and read it back (which would slow down the execution) + // the data is padded here and written directly to the Low Cardinality Column + if (str.size() == fixed_length) + { + assert_cast(dest).insertData(str.data(), str.size()); + } + else + { + String padded_str(str); + padded_str.resize(fixed_length, '\0'); + + assert_cast(dest).insertData(padded_str.data(), padded_str.size()); + } + return true; + } + + private: + const size_t fixed_length; + }; + + class UUIDNode : public Node + { + public: + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + if (!element.isString()) + return false; + + auto uuid = parseFromString(element.getString()); + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnLowCardinality & col_low = assert_cast(dest); + col_low.insertData(reinterpret_cast(&uuid), sizeof(uuid)); + } + else + { + assert_cast(dest).insert(uuid); + } + return true; + } + }; + + template + class DecimalNode : public Node + { + public: + explicit DecimalNode(DataTypePtr data_type_) : data_type(data_type_) {} + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + const auto * type = assert_cast *>(data_type.get()); + + DecimalType value{}; + + switch (element.type()) + { + case ElementType::DOUBLE: + value = convertToDecimal, DataTypeDecimal>( + element.getDouble(), type->getScale()); + break; + case ElementType::UINT64: + value = convertToDecimal, DataTypeDecimal>( + element.getUInt64(), type->getScale()); + break; + case ElementType::INT64: + value = convertToDecimal, DataTypeDecimal>( + element.getInt64(), type->getScale()); + break; + case ElementType::STRING: { + auto rb = ReadBufferFromMemory{element.getString()}; + if (!SerializationDecimal::tryReadText(value, rb, DecimalUtils::max_precision, type->getScale())) + return false; + break; + } + default: + return false; + } + + assert_cast &>(dest).insertValue(value); + return true; + } + + private: + DataTypePtr data_type; + }; + + class StringNode : public Node + { + public: + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); + } + }; + + class FixedStringNode : public Node + { + public: + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + if (element.isNull()) + return false; + + if (!element.isString()) + return JSONExtractRawImpl::insertResultToFixedStringColumn(dest, element, {}); + + auto str = element.getString(); + auto & col_str = assert_cast(dest); + if (str.size() > col_str.getN()) + return false; + col_str.insertData(str.data(), str.size()); + + return true; + } + }; + + template + class EnumNode : public Node + { + public: + explicit EnumNode(const std::vector> & name_value_pairs_) : name_value_pairs(name_value_pairs_) + { + for (const auto & name_value_pair : name_value_pairs) + { + name_to_value_map.emplace(name_value_pair.first, name_value_pair.second); + only_values.emplace(name_value_pair.second); + } + } + + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + auto & col_vec = assert_cast &>(dest); + + if (element.isInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getInt64(), value) || !only_values.contains(value)) + return false; + col_vec.insertValue(value); + return true; + } + + if (element.isUInt64()) + { + Type value; + if (!accurate::convertNumeric(element.getUInt64(), value) || !only_values.contains(value)) + return false; + col_vec.insertValue(value); + return true; + } + + if (element.isString()) + { + auto value = name_to_value_map.find(element.getString()); + if (value == name_to_value_map.end()) + return false; + col_vec.insertValue(value->second); + return true; + } + + return false; + } + + private: + std::vector> name_value_pairs; + std::unordered_map name_to_value_map; + std::unordered_set only_values; + }; + + class NullableNode : public Node + { + public: + explicit NullableNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} + + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + ColumnNullable & col_null = assert_cast(dest); + if (!nested->insertResultToColumn(col_null.getNestedColumn(), element)) + return false; + col_null.getNullMapColumn().insertValue(0); + return true; + } + + private: + std::unique_ptr nested; + }; + + class ArrayNode : public Node + { + public: + explicit ArrayNode(std::unique_ptr nested_) : nested(std::move(nested_)) {} + + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + if (!element.isArray()) + return false; + + auto array = element.getArray(); + + ColumnArray & col_arr = assert_cast(dest); + auto & data = col_arr.getData(); + size_t old_size = data.size(); + bool were_valid_elements = false; + + for (auto value : array) + { + if (nested->insertResultToColumn(data, value)) + were_valid_elements = true; + else + data.insertDefault(); + } + + if (!were_valid_elements) + { + data.popBack(data.size() - old_size); + return false; + } + + col_arr.getOffsets().push_back(data.size()); + return true; + } + + private: + std::unique_ptr nested; + }; + + class TupleNode : public Node + { + public: + TupleNode(std::vector> nested_, const std::vector & explicit_names_) : nested(std::move(nested_)), explicit_names(explicit_names_) + { + for (size_t i = 0; i != explicit_names.size(); ++i) + name_to_index_map.emplace(explicit_names[i], i); + } + + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + ColumnTuple & tuple = assert_cast(dest); + size_t old_size = dest.size(); + bool were_valid_elements = false; + + auto set_size = [&](size_t size) + { + for (size_t i = 0; i != tuple.tupleSize(); ++i) + { + auto & col = tuple.getColumn(i); + if (col.size() != size) + { + if (col.size() > size) + col.popBack(col.size() - size); + else + while (col.size() < size) + col.insertDefault(); + } + } + }; + + if (element.isArray()) + { + auto array = element.getArray(); + auto it = array.begin(); + + for (size_t index = 0; (index != nested.size()) && (it != array.end()); ++index) + { + if (nested[index]->insertResultToColumn(tuple.getColumn(index), *it++)) + were_valid_elements = true; + else + tuple.getColumn(index).insertDefault(); + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + if (element.isObject()) + { + auto object = element.getObject(); + if (name_to_index_map.empty()) + { + auto it = object.begin(); + for (size_t index = 0; (index != nested.size()) && (it != object.end()); ++index) + { + if (nested[index]->insertResultToColumn(tuple.getColumn(index), (*it++).second)) + were_valid_elements = true; + else + tuple.getColumn(index).insertDefault(); + } + } + else + { + for (const auto & [key, value] : object) + { + auto index = name_to_index_map.find(key); + if (index != name_to_index_map.end()) + { + if (nested[index->second]->insertResultToColumn(tuple.getColumn(index->second), value)) + were_valid_elements = true; + } + } + } + + set_size(old_size + static_cast(were_valid_elements)); + return were_valid_elements; + } + + return false; + } + + private: + std::vector> nested; + std::vector explicit_names; + std::unordered_map name_to_index_map; + }; + + class MapNode : public Node + { + public: + MapNode(std::unique_ptr key_, std::unique_ptr value_) : key(std::move(key_)), value(std::move(value_)) { } + + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + if (!element.isObject()) + return false; + + ColumnMap & map_col = assert_cast(dest); + auto & offsets = map_col.getNestedColumn().getOffsets(); + auto & tuple_col = map_col.getNestedData(); + auto & key_col = tuple_col.getColumn(0); + auto & value_col = tuple_col.getColumn(1); + size_t old_size = tuple_col.size(); + + auto object = element.getObject(); + auto it = object.begin(); + for (; it != object.end(); ++it) + { + auto pair = *it; + + /// Insert key + key_col.insertData(pair.first.data(), pair.first.size()); + + /// Insert value + if (!value->insertResultToColumn(value_col, pair.second)) + value_col.insertDefault(); + } + + offsets.push_back(old_size + object.size()); + return true; + } + + private: + std::unique_ptr key; + std::unique_ptr value; + }; + + static std::unique_ptr build(const char * function_name, const DataTypePtr & type) + { + switch (type->getTypeId()) + { + case TypeIndex::UInt8: return std::make_unique>(); + case TypeIndex::UInt16: return std::make_unique>(); + case TypeIndex::UInt32: return std::make_unique>(); + case TypeIndex::UInt64: return std::make_unique>(); + case TypeIndex::UInt128: return std::make_unique>(); + case TypeIndex::UInt256: return std::make_unique>(); + case TypeIndex::Int8: return std::make_unique>(); + case TypeIndex::Int16: return std::make_unique>(); + case TypeIndex::Int32: return std::make_unique>(); + case TypeIndex::Int64: return std::make_unique>(); + case TypeIndex::Int128: return std::make_unique>(); + case TypeIndex::Int256: return std::make_unique>(); + case TypeIndex::Float32: return std::make_unique>(); + case TypeIndex::Float64: return std::make_unique>(); + case TypeIndex::String: return std::make_unique(); + case TypeIndex::FixedString: return std::make_unique(); + case TypeIndex::UUID: return std::make_unique(); + case TypeIndex::LowCardinality: + { + // The low cardinality case is treated in two different ways: + // For FixedString type, an especial class is implemented for inserting the data in the destination column, + // as the string length must be passed in order to check and pad the incoming data. + // For the rest of low cardinality types, the insertion is done in their corresponding class, adapting the data + // as needed for the insertData function of the ColumnLowCardinality. + auto dictionary_type = typeid_cast(type.get())->getDictionaryType(); + if ((*dictionary_type).getTypeId() == TypeIndex::FixedString) + { + auto fixed_length = typeid_cast(dictionary_type.get())->getN(); + return std::make_unique(fixed_length); + } + return build(function_name, dictionary_type); + } + case TypeIndex::Decimal256: return std::make_unique>(type); + case TypeIndex::Decimal128: return std::make_unique>(type); + case TypeIndex::Decimal64: return std::make_unique>(type); + case TypeIndex::Decimal32: return std::make_unique>(type); + case TypeIndex::Enum8: + return std::make_unique>(static_cast(*type).getValues()); + case TypeIndex::Enum16: + return std::make_unique>(static_cast(*type).getValues()); + case TypeIndex::Nullable: + { + return std::make_unique(build(function_name, static_cast(*type).getNestedType())); + } + case TypeIndex::Array: + { + return std::make_unique(build(function_name, static_cast(*type).getNestedType())); + } + case TypeIndex::Tuple: + { + const auto & tuple = static_cast(*type); + const auto & tuple_elements = tuple.getElements(); + std::vector> elements; + elements.reserve(tuple_elements.size()); + for (const auto & tuple_element : tuple_elements) + elements.emplace_back(build(function_name, tuple_element)); + return std::make_unique(std::move(elements), tuple.haveExplicitNames() ? tuple.getElementNames() : Strings{}); + } + case TypeIndex::Map: + { + const auto & map_type = static_cast(*type); + const auto & key_type = map_type.getKeyType(); + if (!isString(removeLowCardinality(key_type))) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function {} doesn't support the return type schema: {} with key type not String", + String(function_name), + type->getName()); + + const auto & value_type = map_type.getValueType(); + return std::make_unique(build(function_name, key_type), build(function_name, value_type)); + } + default: + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function {} doesn't support the return type schema: {}", + String(function_name), type->getName()); + } + } +}; + + +template +class JSONExtractImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the return data type, illegal value: {}", + String(function_name), col.name); + + return DataTypeFactory::instance().get(col_type_const->getValue()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + extract_tree = JSONExtractTree::build(function_name, result_type); + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + return extract_tree->insertResultToColumn(dest, element); + } + +protected: + std::unique_ptr::Node> extract_tree; +}; + + +template +class JSONExtractKeysAndValuesImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char * function_name, const ColumnsWithTypeAndName & arguments) + { + if (arguments.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least two arguments", String(function_name)); + + const auto & col = arguments.back(); + const auto * col_type_const = typeid_cast(col.column.get()); + if (!col_type_const || !isString(col.type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The last argument of function {} should " + "be a constant string specifying the values' data type, illegal value: {}", + String(function_name), col.name); + + DataTypePtr key_type = std::make_unique(); + DataTypePtr value_type = DataTypeFactory::instance().get(col_type_const->getValue()); + DataTypePtr tuple_type = std::make_unique(DataTypes{key_type, value_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 2; } + + void prepare(const char * function_name, const ColumnsWithTypeAndName &, const DataTypePtr & result_type) + { + const auto tuple_type = typeid_cast(result_type.get())->getNestedType(); + const auto value_type = typeid_cast(tuple_type.get())->getElements()[1]; + extract_tree = JSONExtractTree::build(function_name, value_type); + } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + size_t old_size = col_tuple.size(); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = col_tuple.getColumn(1); + + for (const auto & [key, value] : object) + { + if (extract_tree->insertResultToColumn(col_value, value)) + col_key.insertData(key.data(), key.size()); + } + + if (col_tuple.size() == old_size) + return false; + + col_arr.getOffsets().push_back(col_tuple.size()); + return true; + } + +private: + std::unique_ptr::Node> extract_tree; +}; + + +template +class JSONExtractRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + if (dest.getDataType() == TypeIndex::LowCardinality) + { + ColumnString::Chars chars; + WriteBufferFromVector buf(chars, AppendModeTag()); + traverse(element, buf); + buf.finalize(); + assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); + } + else + { + ColumnString & col_str = assert_cast(dest); + auto & chars = col_str.getChars(); + WriteBufferFromVector buf(chars, AppendModeTag()); + traverse(element, buf); + buf.finalize(); + chars.push_back(0); + col_str.getOffsets().push_back(chars.size()); + } + return true; + } + + // We use insertResultToFixedStringColumn in case we are inserting raw data in a FixedString column + static bool insertResultToFixedStringColumn(IColumn & dest, const Element & element, std::string_view) + { + ColumnFixedString::Chars chars; + WriteBufferFromVector buf(chars, AppendModeTag()); + traverse(element, buf); + buf.finalize(); + + auto & col_str = assert_cast(dest); + + if (chars.size() > col_str.getN()) + return false; + + chars.resize_fill(col_str.getN()); + col_str.insertData(reinterpret_cast(chars.data()), chars.size()); + + + return true; + } + + // We use insertResultToLowCardinalityFixedStringColumn in case we are inserting raw data in a Low Cardinality FixedString column + static bool insertResultToLowCardinalityFixedStringColumn(IColumn & dest, const Element & element, size_t fixed_length) + { + if (element.getObject().size() > fixed_length) + return false; + + ColumnFixedString::Chars chars; + WriteBufferFromVector buf(chars, AppendModeTag()); + traverse(element, buf); + buf.finalize(); + + if (chars.size() > fixed_length) + return false; + chars.resize_fill(fixed_length); + assert_cast(dest).insertData(reinterpret_cast(chars.data()), chars.size()); + + return true; + } + +private: + static void traverse(const Element & element, WriteBuffer & buf) + { + if (element.isInt64()) + { + writeIntText(element.getInt64(), buf); + return; + } + if (element.isUInt64()) + { + writeIntText(element.getUInt64(), buf); + return; + } + if (element.isDouble()) + { + writeFloatText(element.getDouble(), buf); + return; + } + if (element.isBool()) + { + if (element.getBool()) + writeCString("true", buf); + else + writeCString("false", buf); + return; + } + if (element.isString()) + { + writeJSONString(element.getString(), buf, formatSettings()); + return; + } + if (element.isArray()) + { + writeChar('[', buf); + bool need_comma = false; + for (auto value : element.getArray()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + traverse(value, buf); + } + writeChar(']', buf); + return; + } + if (element.isObject()) + { + writeChar('{', buf); + bool need_comma = false; + for (auto [key, value] : element.getObject()) + { + if (std::exchange(need_comma, true)) + writeChar(',', buf); + writeJSONString(key, buf, formatSettings()); + writeChar(':', buf); + traverse(value, buf); + } + writeChar('}', buf); + return; + } + if (element.isNull()) + { + writeCString("null", buf); + return; + } + } + + static const FormatSettings & formatSettings() + { + static const FormatSettings the_instance = [] + { + FormatSettings settings; + settings.json.escape_forward_slashes = false; + return settings; + }(); + return the_instance; + } +}; + + +template +class JSONExtractArrayRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_shared(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + static bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + if (!element.isArray()) + return false; + + auto array = element.getArray(); + ColumnArray & col_res = assert_cast(dest); + + for (auto value : array) + JSONExtractRawImpl::insertResultToColumn(col_res.getData(), value, {}); + + col_res.getOffsets().push_back(col_res.getOffsets().back() + array.size()); + return true; + } +}; + + +template +class JSONExtractKeysAndValuesRawImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + DataTypePtr string_type = std::make_unique(); + DataTypePtr tuple_type = std::make_unique(DataTypes{string_type, string_type}); + return std::make_unique(tuple_type); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + auto & col_arr = assert_cast(dest); + auto & col_tuple = assert_cast(col_arr.getData()); + auto & col_key = assert_cast(col_tuple.getColumn(0)); + auto & col_value = assert_cast(col_tuple.getColumn(1)); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + JSONExtractRawImpl::insertResultToColumn(col_value, value, {}); + } + + col_arr.getOffsets().push_back(col_arr.getOffsets().back() + object.size()); + return true; + } +}; + +template +class JSONExtractKeysImpl +{ +public: + using Element = typename JSONParser::Element; + + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) + { + return std::make_unique(std::make_shared()); + } + + static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } + + bool insertResultToColumn(IColumn & dest, const Element & element, std::string_view) + { + if (!element.isObject()) + return false; + + auto object = element.getObject(); + + ColumnArray & col_res = assert_cast(dest); + auto & col_key = assert_cast(col_res.getData()); + + for (const auto & [key, value] : object) + { + col_key.insertData(key.data(), key.size()); + } + + col_res.getOffsets().push_back(col_res.getOffsets().back() + object.size()); + return true; + } +}; + +} diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h index c9b67dddd0bb..77c820ba75a5 100644 --- a/src/Functions/GatherUtils/Algorithms.h +++ b/src/Functions/GatherUtils/Algorithms.h @@ -381,6 +381,10 @@ static void sliceDynamicOffsetBoundedImpl(Source && src, Sink && sink, const ICo Int64 offset = has_offset ? offset_nested_column->getInt(row_num) : 1; Int64 size = has_length ? length_nested_column->getInt(row_num) : static_cast(src.getElementSize()); + if(offset==0){ + offset=1; + } + if (size < 0) size += offset > 0 ? static_cast(src.getElementSize()) - (offset - 1) : -UInt64(offset); diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 4081850bb86e..a25ef47d60da 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -195,7 +195,15 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls( if (null_presence.has_nullable) { ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args); - auto temporary_result_type = removeNullable(result_type); + + DataTypePtr temporary_result_type; + if (resolver) + { + auto temporary_function_base = resolver->build(temporary_columns); + temporary_result_type = temporary_function_base->getResultType(); + } + else + temporary_result_type = removeNullable(result_type); auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run); return wrapInNullable(res, args, result_type, input_rows_count); diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index b613ddb89873..1e161d4536e5 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -31,6 +31,8 @@ namespace ErrorCodes } class Field; +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; /// The simplest executable object. /// Motivation: @@ -48,6 +50,8 @@ class IExecutableFunction ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const; + void setResolver(const FunctionOverloadResolverPtr & resolver_) { resolver = resolver_; } + protected: virtual ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const = 0; @@ -99,6 +103,8 @@ class IExecutableFunction */ virtual bool canBeExecutedOnDefaultArguments() const { return true; } + FunctionOverloadResolverPtr resolver; + private: ColumnPtr defaultImplementationForConstantArguments( @@ -396,7 +402,6 @@ class IFunctionOverloadResolver DataTypePtr getReturnTypeWithoutLowCardinality(const ColumnsWithTypeAndName & arguments) const; }; -using FunctionOverloadResolverPtr = std::shared_ptr; /// Old function interface. Check documentation in IFunction.h. /// If client do not need stateful properties it can implement this interface. diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index e972a6846dac..c99a6cfd0d1c 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -483,17 +483,25 @@ ColumnPtr FunctionArrayElement::executeNumberConst( } else if (index.getType() == Field::Types::Int64) { - /// Cast to UInt64 before negation allows to avoid undefined behaviour for negation of the most negative number. - /// NOTE: this would be undefined behaviour in C++ sense, but nevertheless, compiler cannot see it on user provided data, - /// and generates the code that we want on supported CPU architectures (overflow in sense of two's complement arithmetic). - /// This is only needed to avoid UBSan report. - - /// Negative array indices work this way: - /// arr[-1] is the element at offset 0 from the last - /// arr[-2] is the element at offset 1 from the last and so on. - - ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), -(static_cast(index.safeGet()) + 1), col_res->getData(), builder); + auto value = index.safeGet(); + if (value >= 0) + { + ArrayElementNumImpl::template vectorConst( + col_nested->getData(), col_array->getOffsets(), static_cast(value) - 1, col_res->getData(), builder); + } + else + { + /// Cast to UInt64 before negation allows to avoid undefined behaviour for negation of the most negative number. + /// NOTE: this would be undefined behaviour in C++ sense, but nevertheless, compiler cannot see it on user provided data, + /// and generates the code that we want on supported CPU architectures (overflow in sense of two's complement arithmetic). + /// This is only needed to avoid UBSan report. + + /// Negative array indices work this way: + /// arr[-1] is the element at offset 0 from the last + /// arr[-2] is the element at offset 1 from the last and so on. + ArrayElementNumImpl::template vectorConst( + col_nested->getData(), col_array->getOffsets(), -(static_cast(index.safeGet()) + 1), col_res->getData(), builder); + } } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index bbb4c3ba5b01..5ec0212c56d5 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -38,22 +39,19 @@ namespace ErrorCodes namespace { +using Pos = const char *; -struct FormatDateTimeTraits +enum class SupportInteger { - enum class SupportInteger - { - Yes, - No - }; - - enum class FormatSyntax - { - MySQL, - Joda - }; + Yes, + No }; +enum class FormatSyntax +{ + MySQL, + Joda +}; template struct InstructionValueTypeMap {}; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; @@ -85,11 +83,9 @@ constexpr std::string_view weekdaysFull[] = {"Sunday", "Monday", "Tuesday", "Wed constexpr std::string_view weekdaysShort[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; -constexpr std::string_view monthsFull[] - = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; +constexpr std::string_view monthsFull[] = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; -constexpr std::string_view monthsShort[] - = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; +constexpr std::string_view monthsShort[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; /** formatDateTime(time, 'format') * Performs formatting of time, according to provided format. @@ -115,13 +111,13 @@ constexpr std::string_view monthsShort[] * * Performance on Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz: * - * WITH formatDateTime(now() + number, '%H:%M:%S') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x); + * WITH formatDateTime(now() + number, '%H:%i:%S') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x); * - 97 million rows per second per core; * * WITH formatDateTime(toDateTime('2018-01-01 00:00:00') + number, '%F %T') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x) * - 71 million rows per second per core; * - * select count() from (select formatDateTime(t, '%m/%d/%Y %H:%M:%S') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000))); + * select count() from (select formatDateTime(t, '%m/%d/%Y %H:%i:%S') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000))); * - 53 million rows per second per core; * * select count() from (select formatDateTime(t, 'Hello %Y World') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000))); @@ -129,7 +125,7 @@ constexpr std::string_view monthsShort[] * * PS. We can make this function to return FixedString. Currently it returns String. */ -template +template class FunctionFormatDateTimeImpl : public IFunction { private: @@ -152,26 +148,34 @@ class FunctionFormatDateTimeImpl : public IFunction class Instruction { public: - /// Using std::function will cause performance degradation in MySQL format by 0.45x. - /// But std::function is required for Joda format to capture extra variables. - /// This is the reason why we use raw function pointer in MySQL format and std::function - /// in Joda format. - using Func = std::conditional_t< - format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL, - size_t (*)(char *, Time, UInt64, UInt32, const DateLUTImpl &), - std::function>; + /// Joda format generally requires capturing extra variables (i.e. holding state) which is more convenient with + /// std::function and std::bind. Unfortunately, std::function causes a performance degradation by 0.45x compared to raw function + /// pointers. For MySQL format, we generally prefer raw function pointers. Because of the special case that not all formatters are + /// fixed-width formatters (see mysqlLiteral instruction), we still need to be able to store state. For that reason, we use member + /// function pointers instead of static function pointers. + using FuncMysql = size_t (Instruction