diff --git a/CMake/abseil.cmake b/CMake/Findabseil.cmake similarity index 88% rename from CMake/abseil.cmake rename to CMake/Findabseil.cmake index 8768f24..25fa2c4 100644 --- a/CMake/abseil.cmake +++ b/CMake/Findabseil.cmake @@ -1,15 +1,15 @@ include_guard(GLOBAL) -# TODO: these variables are named VELOX_* because we are piggy-backing on -# Velox's resolve dependency module for now. We should change and have -# our own in the future. +# TODO: these variables are named VELOX_* because we are piggy-backing on +# Velox's resolve dependency module for now. We should change and have our own +# in the future. set(VELOX_ABSEIL_VERSION 20240116.0) set(VELOX_ABSEIL_BUILD_SHA256_CHECKSUM "338420448b140f0dfd1a1ea3c3ce71b3bc172071f24f4d9a57d59b45037da440") set(VELOX_ABSEIL_SOURCE_URL "https://github.com/abseil/abseil-cpp/archive/refs/tags/${VELOX_ABSEIL_VERSION}.tar.gz") -resolve_dependency_url(ABSEIL) +velox_resolve_dependency_url(ABSEIL) message(STATUS "Building abseil from source") diff --git a/CMakeLists.txt b/CMakeLists.txt index aa4d343..52dbe3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,25 +98,25 @@ include(CTest) # include after project() but before add_subdirectory() # generated .cpp/.h files), but adding this for convenience for now. find_package(FlatBuffers REQUIRED) -set_source(gtest) -resolve_dependency(gtest) +velox_set_source(gtest) +velox_resolve_dependency(gtest) -set_source(glog) -resolve_dependency(glog) +velox_set_source(glog) +velox_resolve_dependency(glog) -set_source(gflags) -resolve_dependency(gflags COMPONENTS shared) +velox_set_source(gflags) +velox_resolve_dependency(gflags COMPONENTS shared) set(BOOST_INCLUDE_LIBRARIES algorithm context filesystem program_options) -set_source(Boost) -resolve_dependency(Boost 1.77.0 COMPONENTS ${BOOST_INCLUDE_LIBRARIES}) +velox_set_source(Boost) +velox_resolve_dependency(Boost 1.77.0 COMPONENTS ${BOOST_INCLUDE_LIBRARIES}) -set_source(folly) -resolve_dependency(folly) +velox_set_source(folly) +velox_resolve_dependency(folly) -set_source(abseil) -resolve_dependency(abseil) +velox_set_source(abseil) +velox_resolve_dependency(abseil) # Use xxhash and xsimd from Velox for now. include_directories(.) diff --git a/dwio/nimble/velox/FieldWriter.cpp b/dwio/nimble/velox/FieldWriter.cpp index 754a7f6..9925e5b 100644 --- a/dwio/nimble/velox/FieldWriter.cpp +++ b/dwio/nimble/velox/FieldWriter.cpp @@ -851,8 +851,14 @@ class FlatMapFieldWriter : public FieldWriter { folly::Executor* executor = nullptr) override { // Check if the vector received is already flattened const auto isFlatMap = vector->type()->kind() == velox::TypeKind::ROW; - isFlatMap ? ingestFlattenedMap(vector, ranges) - : ingestMap(vector, ranges, executor); + if (isFlatMap) { + ingestFlattenedMap( + velox::RowVector::pushDictionaryToRowVectorLeaves( + BaseVector::loadedVectorShared(vector)), + ranges); + } else { + ingestMap(vector, ranges, executor); + } } FlatMapPassthroughValueFieldWriter& createPassthroughValueFieldWriter( diff --git a/dwio/nimble/velox/tests/VeloxWriterTests.cpp b/dwio/nimble/velox/tests/VeloxWriterTests.cpp index c8b41f0..d03e740 100644 --- a/dwio/nimble/velox/tests/VeloxWriterTests.cpp +++ b/dwio/nimble/velox/tests/VeloxWriterTests.cpp @@ -914,6 +914,64 @@ TEST_F(VeloxWriterTests, EncodingLayoutSchemaEvolutionExpandingRow) { // that no captured encoding was used. } +TEST_F(VeloxWriterTests, CombineMultipleLayersOfDictionaries) { + using namespace facebook::velox; + test::VectorMaker vectorMaker{leafPool_.get()}; + auto wrapInDictionary = [&](const std::vector& indices, + const VectorPtr& values) { + auto buf = + AlignedBuffer::allocate(indices.size(), leafPool_.get()); + memcpy( + buf->asMutable(), + indices.data(), + sizeof(vector_size_t) * indices.size()); + return BaseVector::wrapInDictionary(nullptr, buf, indices.size(), values); + }; + auto vector = vectorMaker.rowVector({ + wrapInDictionary( + {0, 0, 1, 1}, + vectorMaker.rowVector({ + wrapInDictionary( + {0, 0}, vectorMaker.arrayVector({{1, 2, 3}})), + })), + }); + nimble::VeloxWriterOptions options; + options.flatMapColumns = {"c0"}; + options.dictionaryArrayColumns = {"c0"}; + std::string file; + auto writeFile = std::make_unique(&file); + nimble::VeloxWriter writer( + *rootPool_, + ROW({"c0"}, {MAP(VARCHAR(), ARRAY(BIGINT()))}), + std::move(writeFile), + std::move(options)); + writer.write(vector); + writer.close(); + InMemoryReadFile readFile(file); + nimble::VeloxReadParams params; + params.readFlatMapFieldAsStruct = {"c0"}; + params.flatMapFeatureSelector["c0"].features = {"c0"}; + nimble::VeloxReader reader(*leafPool_, &readFile, nullptr, std::move(params)); + VectorPtr result; + ASSERT_TRUE(reader.next(4, result)); + ASSERT_EQ(result->size(), 4); + auto* c0 = result->asChecked()->childAt(0)->asChecked(); + auto& dict = c0->childAt(0); + ASSERT_EQ(dict->encoding(), VectorEncoding::Simple::DICTIONARY); + ASSERT_EQ(dict->size(), 4); + auto* indices = dict->wrapInfo()->as(); + for (int i = 0; i < 4; ++i) { + ASSERT_EQ(indices[i], 0); + } + auto* values = dict->valueVector()->asChecked(); + ASSERT_EQ(values->size(), 1); + auto* elements = values->elements()->asChecked>(); + ASSERT_EQ(values->sizeAt(0), 3); + for (int i = 0; i < 3; ++i) { + ASSERT_EQ(elements->valueAt(i + values->offsetAt(0)), 1 + i); + } +} + #define ASSERT_CHUNK_COUNT(count, chunked) \ for (auto __i = 0; __i < count; ++__i) { \ ASSERT_TRUE(chunked.hasNext()); \ diff --git a/velox b/velox index 9e48da6..b44ffc9 160000 --- a/velox +++ b/velox @@ -1 +1 @@ -Subproject commit 9e48da6787b531373b775f6a1c66e891834cd547 +Subproject commit b44ffc9e9e85e3a0d21ec72f2e463ff17be011ee