From 21f1e21e7e120e8f3d838a28525a89ef3f414438 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Fri, 6 Dec 2024 18:18:06 -0800 Subject: [PATCH] fix: Quote string keys for feature selection (#11779) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/11779 For Nimble bulk reader we need to pass down the feature selection as JSON string, and when the map key is string, we forgot to quote them so the JSON was invalid. Reviewed By: kevinwilfong Differential Revision: D66885879 fbshipit-source-id: 924c1b7773cbae0306e001ee6e2f2975a1472c88 --- velox/common/base/Exceptions.h | 4 ++-- .../hive/tests/HiveConnectorTest.cpp | 19 +++++++++++++++++++ velox/dwio/common/ColumnSelector.cpp | 15 ++++++++++++++- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/velox/common/base/Exceptions.h b/velox/common/base/Exceptions.h index 4fb1ec99355c..11fa7ae9ec59 100644 --- a/velox/common/base/Exceptions.h +++ b/velox/common/base/Exceptions.h @@ -190,7 +190,7 @@ std::string errorMessage(fmt::string_view fmt, const Args&... args) { #define _VELOX_THROW(exception, ...) \ _VELOX_THROW_IMPL(exception, "", ##__VA_ARGS__) -DECLARE_CHECK_FAIL_TEMPLATES(::facebook::velox::VeloxRuntimeError); +DECLARE_CHECK_FAIL_TEMPLATES(::facebook::velox::VeloxRuntimeError) #define _VELOX_CHECK_IMPL(expr, exprStr, ...) \ _VELOX_CHECK_AND_THROW_IMPL( \ @@ -367,7 +367,7 @@ DECLARE_CHECK_FAIL_TEMPLATES(::facebook::velox::VeloxRuntimeError); /* isRetriable */ false, \ ##__VA_ARGS__) -DECLARE_CHECK_FAIL_TEMPLATES(::facebook::velox::VeloxUserError); +DECLARE_CHECK_FAIL_TEMPLATES(::facebook::velox::VeloxUserError) // For all below macros, an additional message can be passed using a // format string and arguments, as with `fmt::format`. diff --git a/velox/connectors/hive/tests/HiveConnectorTest.cpp b/velox/connectors/hive/tests/HiveConnectorTest.cpp index 126215b9d312..f4a558513640 100644 --- a/velox/connectors/hive/tests/HiveConnectorTest.cpp +++ b/velox/connectors/hive/tests/HiveConnectorTest.cpp @@ -206,6 +206,25 @@ TEST_F(HiveConnectorTest, makeScanSpec_requiredSubfields_mergeMap) { ASSERT_FALSE(values->childByName("c0c2")->isConstant()); } +TEST_F( + HiveConnectorTest, + makeScanSpec_requiredSubfields_flatMapFeatureSelectionStringKeys) { + auto rowType = ROW({{"c0", MAP(VARCHAR(), BIGINT())}}); + auto scanSpec = makeScanSpec( + rowType, + groupSubfields(makeSubfields({"c0[\"foo\"]"})), + {}, + nullptr, + {}, + {}, + {}, + pool_.get()); + auto* c0 = scanSpec->childByName("c0"); + ASSERT_EQ(c0->flatMapFeatureSelection(), std::vector({"foo"})); + auto cs = dwio::common::ColumnSelector::fromScanSpec(*scanSpec, rowType); + ASSERT_EQ(cs->findNode("c0")->getNode().expression, "[\"foo\"]"); +} + TEST_F(HiveConnectorTest, makeScanSpec_requiredSubfields_allSubscripts) { auto columnType = MAP(BIGINT(), ARRAY(ROW({{"c0c0", BIGINT()}, {"c0c1", BIGINT()}}))); diff --git a/velox/dwio/common/ColumnSelector.cpp b/velox/dwio/common/ColumnSelector.cpp index 51a81a2d425e..158eaa05cfab 100644 --- a/velox/dwio/common/ColumnSelector.cpp +++ b/velox/dwio/common/ColumnSelector.cpp @@ -348,8 +348,21 @@ std::shared_ptr ColumnSelector::fromScanSpec( } std::string name = child->fieldName(); if (!child->flatMapFeatureSelection().empty()) { + const auto& featureIds = child->flatMapFeatureSelection(); + const auto& type = rowType->findChild(name); + VELOX_CHECK(type->isMap()); + const bool isVarchar = type->asMap().keyType()->isVarchar(); name += "#["; - name += folly::join(',', child->flatMapFeatureSelection()); + for (int i = 0; i < featureIds.size(); ++i) { + if (i > 0) { + name += ','; + } + if (isVarchar) { + folly::json::escapeString(featureIds[i], name, {}); + } else { + name += featureIds[i]; + } + } name += ']'; } columnNames.push_back(std::move(name));