From 06a7a6a0cbf5af04a5f975298aa34297a7b79fdb Mon Sep 17 00:00:00 2001 From: Joey Date: Sat, 16 Dec 2023 09:00:40 +0800 Subject: [PATCH] [VL] Explode support Literal array and map (#4019) --- .../backendsapi/velox/ValidatorApiImpl.scala | 19 ++++++--------- .../execution/TestOperator.scala | 24 +++++++++++++++++++ cpp/velox/substrait/SubstraitToVeloxPlan.cc | 17 +++++++++++-- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala index b6943bbbe4e8..06bd2e2ff9bf 100644 --- a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala +++ b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ValidatorApiImpl.scala @@ -23,10 +23,10 @@ import io.glutenproject.substrait.plan.PlanNode import io.glutenproject.validate.NativePlanValidationInfo import io.glutenproject.vectorized.NativePlanEvaluator -import org.apache.spark.sql.catalyst.expressions.{CreateMap, Explode, Expression, Generator, JsonTuple, Literal, PosExplode} +import org.apache.spark.sql.catalyst.expressions.{CreateMap, Explode, Expression, Generator, JsonTuple, PosExplode} import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, DataType, DateType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, ShortType, StringType, StructType, TimestampType} +import org.apache.spark.sql.types._ class ValidatorApiImpl extends ValidatorApi { @@ -95,20 +95,15 @@ class ValidatorApiImpl extends ValidatorApi { return ValidationResult.notOk(s"Velox backend does not support outer") } generator match { - case generator: JsonTuple => + case _: JsonTuple => ValidationResult.notOk(s"Velox backend does not support this json_tuple") - case generator: PosExplode => + case _: PosExplode => // TODO(yuan): support posexplode and remove this check ValidationResult.notOk(s"Velox backend does not support this posexplode") - case explode: Explode if (explode.child.isInstanceOf[CreateMap]) => - // explode(MAP(col1, col2)) - ValidationResult.notOk(s"Velox backend does not support MAP datatype") - case explode: Explode if (explode.child.isInstanceOf[Literal]) => - // explode(ARRAY(1, 2, 3)) - ValidationResult.notOk(s"Velox backend does not support literal Array datatype") case explode: Explode => - explode.child.dataType match { - case _: MapType => + explode.child match { + case _: CreateMap => + // explode(MAP(col1, col2)) ValidationResult.notOk(s"Velox backend does not support MAP datatype") case _ => ValidationResult.ok diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/TestOperator.scala b/backends-velox/src/test/scala/io/glutenproject/execution/TestOperator.scala index 5216a32ad3eb..e2e34100d4a2 100644 --- a/backends-velox/src/test/scala/io/glutenproject/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/io/glutenproject/execution/TestOperator.scala @@ -694,4 +694,28 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla } } } + + test("test explode function") { + runQueryAndCompare(""" + |SELECT explode(array(1, 2, 3)); + |""".stripMargin) { + checkOperatorMatch[GenerateExecTransformer] + } + runQueryAndCompare(""" + |SELECT explode(map(1, 'a', 2, 'b')); + |""".stripMargin) { + checkOperatorMatch[GenerateExecTransformer] + } + runQueryAndCompare( + """ + |SELECT explode(array(map(1, 'a', 2, 'b'), map(3, 'c', 4, 'd'), map(5, 'e', 6, 'f'))); + |""".stripMargin) { + checkOperatorMatch[GenerateExecTransformer] + } + runQueryAndCompare(""" + |SELECT explode(map(1, array(1, 2), 2, array(3, 4))); + |""".stripMargin) { + checkOperatorMatch[GenerateExecTransformer] + } + } } diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 4fcefd4930ad..c3832ee22ddd 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -498,8 +498,6 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: std::vector replicated; std::vector unnest; - // TODO(yuan): get from generator output - std::vector unnestNames = {"C0"}; const auto& generator = generateRel.generator(); const auto& requiredChildOutput = generateRel.child_output(); @@ -534,6 +532,21 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait:: unnest.emplace_back(unnestFieldExpr); } + // TODO(yuan): get from generator output + std::vector unnestNames; + int unnestIndex = 0; + for (const auto& variable : unnest) { + if (variable->type()->isArray()) { + unnestNames.emplace_back(fmt::format("C{}", unnestIndex++)); + } else if (variable->type()->isMap()) { + unnestNames.emplace_back(fmt::format("C{}", unnestIndex++)); + unnestNames.emplace_back(fmt::format("C{}", unnestIndex++)); + } else { + VELOX_FAIL( + "Unexpected type of unnest variable. Expected ARRAY or MAP, but got {}.", variable->type()->toString()); + } + } + auto node = std::make_shared( nextPlanNodeId(), replicated, unnest, std::move(unnestNames), std::nullopt, childNode);