From 0d16b3432bcbef0d530e2ace0d8ff72eed072139 Mon Sep 17 00:00:00 2001 From: lixinguo Date: Mon, 30 Dec 2024 20:00:23 +0800 Subject: [PATCH] add test Signed-off-by: lixinguo --- .../src/exec/expression/BinaryRangeExpr.cpp | 1 - .../core/src/exec/expression/NullExpr.cpp | 30 +++-- internal/core/src/exec/expression/NullExpr.h | 2 +- internal/core/unittest/test_array_expr.cpp | 100 +++++++++++++++ internal/core/unittest/test_expr.cpp | 120 +++++++++++++++++- internal/core/unittest/test_string_expr.cpp | 107 ++++++++++++++++ .../core/unittest/test_utils/GenExprProto.h | 11 +- 7 files changed, 353 insertions(+), 18 deletions(-) diff --git a/internal/core/src/exec/expression/BinaryRangeExpr.cpp b/internal/core/src/exec/expression/BinaryRangeExpr.cpp index b71644c33782d..7dd0943794703 100644 --- a/internal/core/src/exec/expression/BinaryRangeExpr.cpp +++ b/internal/core/src/exec/expression/BinaryRangeExpr.cpp @@ -17,7 +17,6 @@ #include "BinaryRangeExpr.h" #include -#include "common/Types.h" #include "query/Utils.h" namespace milvus { diff --git a/internal/core/src/exec/expression/NullExpr.cpp b/internal/core/src/exec/expression/NullExpr.cpp index e1f5f87272abf..ef3e59ca91b81 100644 --- a/internal/core/src/exec/expression/NullExpr.cpp +++ b/internal/core/src/exec/expression/NullExpr.cpp @@ -26,33 +26,34 @@ namespace exec { void PhyNullExpr::Eval(EvalCtx& context, VectorPtr& result) { + auto input = context.get_offset_input(); switch (expr_->column_.data_type_) { case DataType::BOOL: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT8: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT16: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT32: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::INT64: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::FLOAT: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::DOUBLE: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::VARCHAR: { @@ -60,18 +61,18 @@ PhyNullExpr::Eval(EvalCtx& context, VectorPtr& result) { !storage::MmapManager::GetInstance() .GetMmapConfig() .growing_enable_mmap) { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); } else { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); } break; } case DataType::JSON: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } case DataType::ARRAY: { - result = ExecVisitorImpl(); + result = ExecVisitorImpl(input); break; } default: @@ -83,11 +84,14 @@ PhyNullExpr::Eval(EvalCtx& context, VectorPtr& result) { template VectorPtr -PhyNullExpr::ExecVisitorImpl() { +PhyNullExpr::ExecVisitorImpl(OffsetVector* input) { if (auto res = PreCheckNullable()) { return res; } - TargetBitmap valid_res = ProcessChunksForValid(is_index_mode_); + auto valid_res = + (input != nullptr) + ? ProcessChunksForValidByOffsets(is_index_mode_, *input) + : ProcessChunksForValid(is_index_mode_); TargetBitmap res = valid_res.clone(); if (expr_->op_ == proto::plan::NullExpr_NullOp_IsNull) { res.flip(); diff --git a/internal/core/src/exec/expression/NullExpr.h b/internal/core/src/exec/expression/NullExpr.h index 2930b9c079b68..3d88c0ec71411 100644 --- a/internal/core/src/exec/expression/NullExpr.h +++ b/internal/core/src/exec/expression/NullExpr.h @@ -54,7 +54,7 @@ class PhyNullExpr : public SegmentExpr { template VectorPtr - ExecVisitorImpl(); + ExecVisitorImpl(OffsetVector* input); private: std::shared_ptr expr_; diff --git a/internal/core/unittest/test_array_expr.cpp b/internal/core/unittest/test_array_expr.cpp index 4133045a3b200..1be5522082432 100644 --- a/internal/core/unittest/test_array_expr.cpp +++ b/internal/core/unittest/test_array_expr.cpp @@ -782,6 +782,106 @@ TEST(Expr, TestArrayEqual) { } } +TEST(Expr, TestArrayNullExpr) { + std::vector>> testcases = + { + {R"(null_expr: < + column_info: < + field_id: 102 + data_type: Array + element_type:Int64 + nullable: true + > + op:IsNull + >)", + [](bool v) { return !v; }}, + }; + + std::string raw_plan_tmp = R"(vector_anns: < + field_id: 100 + predicates: < + @@@@ + > + query_info: < + topk: 10 + round_decimal: 3 + metric_type: "L2" + search_params: "{\"nprobe\": 10}" + > + placeholder_tag: "$0" + >)"; + auto schema = std::make_shared(); + auto vec_fid = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2); + auto i64_fid = schema->AddDebugField("id", DataType::INT64); + auto long_array_fid = schema->AddDebugField( + "long_array", DataType::ARRAY, DataType::INT64, true); + schema->set_primary_field_id(i64_fid); + + auto seg = CreateGrowingSegment(schema, empty_index_meta); + int N = 1000; + std::vector long_array_col; + int num_iters = 1; + FixedVector valid_data; + + for (int iter = 0; iter < num_iters; ++iter) { + auto raw_data = DataGen(schema, N, iter, 0, 1, 3); + auto new_long_array_col = raw_data.get_col(long_array_fid); + long_array_col.insert(long_array_col.end(), + new_long_array_col.begin(), + new_long_array_col.end()); + auto new_valid_col = raw_data.get_col_valid(long_array_fid); + valid_data.insert( + valid_data.end(), new_valid_col.begin(), new_valid_col.end()); + seg->PreInsert(N); + seg->Insert(iter * N, + N, + raw_data.row_ids_.data(), + raw_data.timestamps_.data(), + raw_data.raw_); + } + + auto seg_promote = dynamic_cast(seg.get()); + for (auto [clause, ref_func] : testcases) { + auto loc = raw_plan_tmp.find("@@@@"); + auto raw_plan = raw_plan_tmp; + raw_plan.replace(loc, 4, clause); + auto plan_str = translate_text_plan_to_binary_plan(raw_plan.c_str()); + auto plan = + CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size()); + BitsetType final; + final = ExecuteQueryExpr( + plan->plan_node_->plannodes_->sources()[0]->sources()[0], + seg_promote, + N * num_iters, + MAX_TIMESTAMP); + EXPECT_EQ(final.size(), N * num_iters); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { + auto ans = final[i]; + auto valid = valid_data[i]; + auto ref = ref_func(valid); + ASSERT_EQ(ans, ref); + } + } +} + TEST(Expr, PraseArrayContainsExpr) { std::vector raw_plans{ R"(vector_anns:< diff --git a/internal/core/unittest/test_expr.cpp b/internal/core/unittest/test_expr.cpp index 49aca1b80b3cc..4a31ecbd1466a 100644 --- a/internal/core/unittest/test_expr.cpp +++ b/internal/core/unittest/test_expr.cpp @@ -5140,6 +5140,8 @@ TEST_P(ExprTest, TestBinaryArithOpEvalRangeBenchExpr) { TEST(Expr, TestExprNull) { auto schema = std::make_shared(); + auto bool_fid = schema->AddDebugField("bool", DataType::BOOL, true); + auto bool_1_fid = schema->AddDebugField("bool1", DataType::BOOL); auto int8_fid = schema->AddDebugField("int8", DataType::INT8, true); auto int8_1_fid = schema->AddDebugField("int81", DataType::INT8); auto int16_fid = schema->AddDebugField("int16", DataType::INT16, true); @@ -5156,7 +5158,8 @@ TEST(Expr, TestExprNull) { auto double_1_fid = schema->AddDebugField("double1", DataType::DOUBLE); schema->set_primary_field_id(str1_fid); - std::map fids = {{DataType::INT8, int8_fid}, + std::map fids = {{DataType::BOOL, bool_fid}, + {DataType::INT8, int8_fid}, {DataType::INT16, int16_fid}, {DataType::INT32, int32_fid}, {DataType::INT64, int64_fid}, @@ -5165,6 +5168,7 @@ TEST(Expr, TestExprNull) { {DataType::DOUBLE, double_fid}}; std::map fids_not_nullable = { + {DataType::BOOL, bool_1_fid}, {DataType::INT8, int8_1_fid}, {DataType::INT16, int16_1_fid}, {DataType::INT32, int32_1_fid}, @@ -5174,6 +5178,7 @@ TEST(Expr, TestExprNull) { {DataType::DOUBLE, double_1_fid}}; auto seg = CreateSealedSegment(schema); + FixedVector valid_data_bool; FixedVector valid_data_i8; FixedVector valid_data_i16; FixedVector valid_data_i32; @@ -5184,6 +5189,7 @@ TEST(Expr, TestExprNull) { int N = 1000; auto raw_data = DataGen(schema, N); + valid_data_bool = raw_data.get_col_valid(bool_fid); valid_data_i8 = raw_data.get_col_valid(int8_fid); valid_data_i16 = raw_data.get_col_valid(int16_fid); valid_data_i32 = raw_data.get_col_valid(int32_fid); @@ -5248,8 +5254,11 @@ TEST(Expr, TestExprNull) { } }; - auto expr = build_nullable_expr(DataType::INT8, + auto expr = build_nullable_expr(DataType::BOOL, proto::plan::NullExpr_NullOp_IsNull); + test_is_null_ans(expr, valid_data_bool); + expr = build_nullable_expr(DataType::INT8, + proto::plan::NullExpr_NullOp_IsNull); test_is_null_ans(expr, valid_data_i8); expr = build_nullable_expr(DataType::INT16, proto::plan::NullExpr_NullOp_IsNull); @@ -5272,6 +5281,9 @@ TEST(Expr, TestExprNull) { expr = build_nullable_expr(DataType::DOUBLE, proto::plan::NullExpr_NullOp_IsNull); test_is_null_ans(expr, valid_data_double); + expr = build_nullable_expr(DataType::BOOL, + proto::plan::NullExpr_NullOp_IsNotNull); + test_is_not_null_ans(expr, valid_data_bool); expr = build_nullable_expr(DataType::INT8, proto::plan::NullExpr_NullOp_IsNotNull); test_is_not_null_ans(expr, valid_data_i8); @@ -5297,6 +5309,9 @@ TEST(Expr, TestExprNull) { proto::plan::NullExpr_NullOp_IsNotNull); test_is_not_null_ans(expr, valid_data_double); //not nullable expr + expr = build_not_nullable_expr(DataType::BOOL, + proto::plan::NullExpr_NullOp_IsNull); + test_is_null_ans(expr, valid_data_all_true); expr = build_not_nullable_expr(DataType::INT8, proto::plan::NullExpr_NullOp_IsNull); test_is_null_ans(expr, valid_data_all_true); @@ -5321,6 +5336,9 @@ TEST(Expr, TestExprNull) { expr = build_not_nullable_expr(DataType::DOUBLE, proto::plan::NullExpr_NullOp_IsNull); test_is_null_ans(expr, valid_data_all_true); + expr = build_not_nullable_expr(DataType::BOOL, + proto::plan::NullExpr_NullOp_IsNotNull); + test_is_not_null_ans(expr, valid_data_all_true); expr = build_not_nullable_expr(DataType::INT8, proto::plan::NullExpr_NullOp_IsNotNull); test_is_not_null_ans(expr, valid_data_all_true); @@ -11664,6 +11682,104 @@ TEST_P(ExprTest, TestUnaryRangeWithJSONNullable) { } } +TEST_P(ExprTest, TestNullExprWithJSON) { + std::vector>> testcases = + { + {R"(null_expr: < + column_info: < + field_id: 102 + data_type:JSON + nullable: true + > + op:IsNull + >)", + [](bool v) { return !v; }}, + }; + + std::string raw_plan_tmp = R"(vector_anns: < + field_id: 100 + predicates: < + @@@@ + > + query_info: < + topk: 10 + round_decimal: 3 + metric_type: "L2" + search_params: "{\"nprobe\": 10}" + > + placeholder_tag: "$0" + >)"; + auto schema = std::make_shared(); + auto vec_fid = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2); + auto i64_fid = schema->AddDebugField("id", DataType::INT64); + auto json_fid = schema->AddDebugField("json", DataType::JSON, true); + schema->set_primary_field_id(i64_fid); + + auto seg = CreateGrowingSegment(schema, empty_index_meta); + int N = 1000; + int num_iters = 1; + FixedVector valid_data; + std::vector json_col; + + for (int iter = 0; iter < num_iters; ++iter) { + auto raw_data = DataGen(schema, N, iter, 0, 1, 3); + auto new_json_col = raw_data.get_col(json_fid); + + json_col.insert( + json_col.end(), new_json_col.begin(), new_json_col.end()); + auto new_valid_col = raw_data.get_col_valid(json_fid); + valid_data.insert( + valid_data.end(), new_valid_col.begin(), new_valid_col.end()); + seg->PreInsert(N); + seg->Insert(iter * N, + N, + raw_data.row_ids_.data(), + raw_data.timestamps_.data(), + raw_data.raw_); + } + + auto seg_promote = dynamic_cast(seg.get()); + for (auto [clause, ref_func] : testcases) { + auto loc = raw_plan_tmp.find("@@@@"); + auto raw_plan = raw_plan_tmp; + raw_plan.replace(loc, 4, clause); + auto plan_str = translate_text_plan_to_binary_plan(raw_plan.c_str()); + auto plan = + CreateSearchPlanByExpr(*schema, plan_str.data(), plan_str.size()); + BitsetType final; + final = ExecuteQueryExpr( + plan->plan_node_->plannodes_->sources()[0]->sources()[0], + seg_promote, + N * num_iters, + MAX_TIMESTAMP); + EXPECT_EQ(final.size(), N * num_iters); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + for (int i = 0; i < N * num_iters; ++i) { + auto ans = final[i]; + auto valid = valid_data[i]; + auto ref = ref_func(valid); + ASSERT_EQ(ans, ref); + } + } +} + TEST_P(ExprTest, TestTermWithJSON) { std::vector< std::tuple(); + schema->AddDebugField("str", DataType::VARCHAR, true); + schema->AddDebugField("another_str", DataType::VARCHAR); + schema->AddDebugField( + "fvec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2); + auto pk = schema->AddDebugField("int64", DataType::INT64); + schema->set_primary_field_id(pk); + const auto& fvec_meta = schema->operator[](FieldName("fvec")); + const auto& str_meta = schema->operator[](FieldName("str")); + + auto gen_plan = + [&, fvec_meta, str_meta]( + NullExprType op) -> std::unique_ptr { + auto column_info = test::GenColumnInfo(str_meta.get_id().get(), + proto::schema::DataType::VarChar, + false, + false, + proto::schema::DataType::None, + true); + auto null_expr = test::GenNullExpr(op); + null_expr->set_allocated_column_info(column_info); + + auto expr = test::GenExpr().release(); + expr->set_allocated_null_expr(null_expr); + + proto::plan::VectorType vector_type; + if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT) { + vector_type = proto::plan::VectorType::FloatVector; + } else if (fvec_meta.get_data_type() == DataType::VECTOR_BINARY) { + vector_type = proto::plan::VectorType::BinaryVector; + } else if (fvec_meta.get_data_type() == DataType::VECTOR_FLOAT16) { + vector_type = proto::plan::VectorType::Float16Vector; + } + auto anns = GenAnns(expr, vector_type, fvec_meta.get_id().get(), "$0"); + + auto plan_node = std::make_unique(); + plan_node->set_allocated_vector_anns(anns); + return plan_node; + }; + + auto seg = CreateGrowingSegment(schema, empty_index_meta); + int N = 1000; + std::vector str_col; + FixedVector valid_data; + int num_iters = 100; + for (int iter = 0; iter < num_iters; ++iter) { + auto raw_data = DataGen(schema, N, iter); + auto new_str_col = raw_data.get_col(str_meta.get_id()); + auto begin = FIELD_DATA(new_str_col, string).begin(); + auto end = FIELD_DATA(new_str_col, string).end(); + str_col.insert(str_col.end(), begin, end); + auto new_str_valid_col = raw_data.get_col_valid(str_meta.get_id()); + valid_data.insert(valid_data.end(), + new_str_valid_col.begin(), + new_str_valid_col.end()); + seg->PreInsert(N); + seg->Insert(iter * N, + N, + raw_data.row_ids_.data(), + raw_data.timestamps_.data(), + raw_data.raw_); + } + std::vector ops{NullExprType::NullExpr_NullOp_IsNull, + NullExprType::NullExpr_NullOp_IsNotNull}; + + auto seg_promote = dynamic_cast(seg.get()); + // is_null + for (const auto op : ops) { + auto plan_proto = gen_plan(op); + auto plan = ProtoParser(*schema).CreatePlan(*plan_proto); + BitsetType final; + final = ExecuteQueryExpr( + plan->plan_node_->plannodes_->sources()[0]->sources()[0], + seg_promote, + N * num_iters, + MAX_TIMESTAMP); + EXPECT_EQ(final.size(), N * num_iters); + + // specify some offsets and do scalar filtering on these offsets + milvus::exec::OffsetVector offsets; + offsets.reserve(N * num_iters / 2); + for (auto i = 0; i < N * num_iters; ++i) { + if (i % 2 == 0) { + offsets.emplace_back(i); + } + } + auto col_vec = milvus::test::gen_filter_res( + plan->plan_node_->plannodes_->sources()[0]->sources()[0].get(), + seg_promote, + N * num_iters, + MAX_TIMESTAMP, + &offsets); + BitsetTypeView view(col_vec->GetRawData(), col_vec->size()); + EXPECT_EQ(view.size(), N * num_iters / 2); + + for (int i = 0; i < N * num_iters; ++i) { + auto ans = final[i]; + if (op == NullExprType::NullExpr_NullOp_IsNull) { + ASSERT_EQ(ans, !valid_data[i]); + } else { + ASSERT_EQ(ans, valid_data[i]); + } + } + } +} + TEST(StringExpr, BinaryRange) { auto schema = GenTestSchema(); const auto& fvec_meta = schema->operator[](FieldName("fvec")); diff --git a/internal/core/unittest/test_utils/GenExprProto.h b/internal/core/unittest/test_utils/GenExprProto.h index 372e07e356f24..78b58ee261918 100644 --- a/internal/core/unittest/test_utils/GenExprProto.h +++ b/internal/core/unittest/test_utils/GenExprProto.h @@ -27,13 +27,15 @@ GenColumnInfo( proto::schema::DataType field_type, bool auto_id, bool is_pk, - proto::schema::DataType element_type = proto::schema::DataType::None) { + proto::schema::DataType element_type = proto::schema::DataType::None, + bool nullable = false) { auto column_info = new proto::plan::ColumnInfo(); column_info->set_field_id(field_id); column_info->set_data_type(field_type); column_info->set_is_autoid(auto_id); column_info->set_is_primary_key(is_pk); column_info->set_element_type(element_type); + column_info->set_nullable(nullable); return column_info; } @@ -65,6 +67,13 @@ GenUnaryRangeExpr(proto::plan::OpType op, T& value) { return unary_range_expr; } +inline auto +GenNullExpr(NullExprType op) { + auto null_expr = new proto::plan::NullExpr(); + null_expr->set_op(op); + return null_expr; +} + inline auto GenExpr() { return std::make_unique();