Skip to content

Commit

Permalink
enhance: support null expr (#38772)
Browse files Browse the repository at this point in the history
#31728

---------

Signed-off-by: lixinguo <[email protected]>
Co-authored-by: lixinguo <[email protected]>
  • Loading branch information
smellthemoon and lixinguo authored Jan 2, 2025
1 parent 19a5c31 commit 907fc24
Show file tree
Hide file tree
Showing 27 changed files with 1,756 additions and 660 deletions.
1 change: 1 addition & 0 deletions internal/core/src/common/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ using GroupByValueType = std::variant<std::monostate,
bool,
std::string>;
using ContainsType = proto::plan::JSONContainsExpr_JSONOp;
using NullExprType = proto::plan::NullExpr_NullOp;

inline bool
IsPrimaryKeyDataType(DataType data_type) {
Expand Down
1 change: 1 addition & 0 deletions internal/core/src/exec/expression/BinaryRangeExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ PhyBinaryRangeFilterExpr::PreCheckOverflow(HighPrecisionType& val1,
(input != nullptr)
? ProcessChunksForValidByOffsets<T>(is_index_mode_, *input)
: ProcessChunksForValid<T>(is_index_mode_);

auto res_vec = std::make_shared<ColumnVector>(TargetBitmap(batch_size),
std::move(valid_res));
return res_vec;
Expand Down
12 changes: 12 additions & 0 deletions internal/core/src/exec/expression/Expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@
#include "exec/expression/JsonContainsExpr.h"
#include "exec/expression/LogicalBinaryExpr.h"
#include "exec/expression/LogicalUnaryExpr.h"
#include "exec/expression/NullExpr.h"
#include "exec/expression/TermExpr.h"
#include "exec/expression/UnaryExpr.h"
#include "exec/expression/ValueExpr.h"
#include "expr/ITypeExpr.h"

#include <memory>

Expand Down Expand Up @@ -285,6 +287,16 @@ CompileExpression(const expr::TypedExprPtr& expr,
context->get_segment(),
context->get_active_count(),
context->query_config()->get_expr_batch_size());
} else if (auto column_expr =
std::dynamic_pointer_cast<const milvus::expr::NullExpr>(
expr)) {
result = std::make_shared<PhyNullExpr>(
compiled_inputs,
column_expr,
"PhyNullExpr",
context->get_segment(),
context->get_active_count(),
context->query_config()->get_expr_batch_size());
} else {
PanicInfo(ExprInvalid, "unsupport expr: ", expr->ToString());
}
Expand Down
150 changes: 150 additions & 0 deletions internal/core/src/exec/expression/NullExpr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "NullExpr.h"
#include <memory>
#include <utility>
#include "common/Array.h"
#include "common/Types.h"
#include "log/Log.h"
#include "query/Utils.h"
namespace milvus {
namespace exec {

void
PhyNullExpr::Eval(EvalCtx& context, VectorPtr& result) {
auto input = context.get_offset_input();
switch (expr_->column_.data_type_) {
case DataType::BOOL: {
result = ExecVisitorImpl<bool>(input);
break;
}
case DataType::INT8: {
result = ExecVisitorImpl<int8_t>(input);
break;
}
case DataType::INT16: {
result = ExecVisitorImpl<int16_t>(input);
break;
}
case DataType::INT32: {
result = ExecVisitorImpl<int32_t>(input);
break;
}
case DataType::INT64: {
result = ExecVisitorImpl<int64_t>(input);
break;
}
case DataType::FLOAT: {
result = ExecVisitorImpl<float>(input);
break;
}
case DataType::DOUBLE: {
result = ExecVisitorImpl<double>(input);
break;
}
case DataType::VARCHAR: {
if (segment_->type() == SegmentType::Growing &&
!storage::MmapManager::GetInstance()
.GetMmapConfig()
.growing_enable_mmap) {
result = ExecVisitorImpl<std::string>(input);
} else {
result = ExecVisitorImpl<std::string_view>(input);
}
break;
}
case DataType::JSON: {
result = ExecVisitorImpl<Json>(input);
break;
}
case DataType::ARRAY: {
result = ExecVisitorImpl<ArrayView>(input);
break;
}
default:
PanicInfo(DataTypeInvalid,
"unsupported data type: {}",
expr_->column_.data_type_);
}
}

template <typename T>
VectorPtr
PhyNullExpr::ExecVisitorImpl(OffsetVector* input) {
if (auto res = PreCheckNullable(input)) {
return res;
}
auto valid_res =
(input != nullptr)
? ProcessChunksForValidByOffsets<T>(is_index_mode_, *input)
: ProcessChunksForValid<T>(is_index_mode_);
TargetBitmap res = valid_res.clone();
if (expr_->op_ == proto::plan::NullExpr_NullOp_IsNull) {
res.flip();
}
auto res_vec =
std::make_shared<ColumnVector>(std::move(res), std::move(valid_res));
return res_vec;
}

// if nullable is false, no need to process chunks
// res is all false when is null, and is all true when is not null
ColumnVectorPtr
PhyNullExpr::PreCheckNullable(OffsetVector* input) {
if (expr_->column_.nullable_) {
return nullptr;
}

int64_t batch_size;
if (input != nullptr) {
batch_size = input->size();
} else {
batch_size = precheck_pos_ + batch_size_ >= active_count_
? active_count_ - precheck_pos_
: batch_size_;
precheck_pos_ += batch_size;
}
if (cached_precheck_res_ != nullptr &&
cached_precheck_res_->size() == batch_size) {
return cached_precheck_res_;
}

auto res_vec = std::make_shared<ColumnVector>(TargetBitmap(batch_size),
TargetBitmap(batch_size));
TargetBitmapView res(res_vec->GetRawData(), batch_size);
TargetBitmapView valid_res(res_vec->GetValidRawData(), batch_size);
valid_res.set();
switch (expr_->op_) {
case proto::plan::NullExpr_NullOp_IsNull: {
res.reset();
break;
}
case proto::plan::NullExpr_NullOp_IsNotNull: {
res.set();
break;
}
default:
PanicInfo(ExprInvalid,
"unsupported null expr type {}",
proto::plan::NullExpr_NullOp_Name(expr_->op_));
}
cached_precheck_res_ = res_vec;
return cached_precheck_res_;
}

} //namespace exec
} // namespace milvus
65 changes: 65 additions & 0 deletions internal/core/src/exec/expression/NullExpr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <fmt/core.h>

#include "common/EasyAssert.h"
#include "common/Types.h"
#include "common/Vector.h"
#include "exec/expression/Expr.h"
#include "expr/ITypeExpr.h"
#include "segcore/SegmentInterface.h"

namespace milvus {
namespace exec {

class PhyNullExpr : public SegmentExpr {
public:
PhyNullExpr(const std::vector<std::shared_ptr<Expr>>& input,
const std::shared_ptr<const milvus::expr::NullExpr>& expr,
const std::string& name,
const segcore::SegmentInternalInterface* segment,
int64_t active_count,
int64_t batch_size)
: SegmentExpr(std::move(input),
name,
segment,
expr->column_.field_id_,
active_count,
batch_size),
expr_(expr) {
}

void
Eval(EvalCtx& context, VectorPtr& result) override;

private:
ColumnVectorPtr
PreCheckNullable(OffsetVector* input);

template <typename T>
VectorPtr
ExecVisitorImpl(OffsetVector* input);

private:
std::shared_ptr<const milvus::expr::NullExpr> expr_;
ColumnVectorPtr cached_precheck_res_{nullptr};
int64_t precheck_pos_{0};
};
} //namespace exec
} // namespace milvus
8 changes: 4 additions & 4 deletions internal/core/src/exec/expression/UnaryExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -905,10 +905,10 @@ PhyUnaryRangeFilterExpr::PreCheckOverflow(OffsetVector* input) {
: batch_size_;
overflow_check_pos_ += batch_size;
}
auto valid = (input != nullptr)
? ProcessChunksForValidByOffsets<T>(
CanUseIndex<T>(), *input)
: ProcessChunksForValid<T>(CanUseIndex<T>());
auto valid =
(input != nullptr)
? ProcessChunksForValidByOffsets<T>(is_index_mode_, *input)
: ProcessChunksForValid<T>(is_index_mode_);
auto res_vec = std::make_shared<ColumnVector>(
TargetBitmap(batch_size), std::move(valid));
TargetBitmapView res(res_vec->GetRawData(), batch_size);
Expand Down
28 changes: 25 additions & 3 deletions internal/core/src/expr/ITypeExpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,22 +116,26 @@ struct ColumnInfo {
DataType data_type_;
DataType element_type_;
std::vector<std::string> nested_path_;
bool nullable_;

ColumnInfo(const proto::plan::ColumnInfo& column_info)
: field_id_(column_info.field_id()),
data_type_(static_cast<DataType>(column_info.data_type())),
element_type_(static_cast<DataType>(column_info.element_type())),
nested_path_(column_info.nested_path().begin(),
column_info.nested_path().end()) {
column_info.nested_path().end()),
nullable_(column_info.nullable()) {
}

ColumnInfo(FieldId field_id,
DataType data_type,
std::vector<std::string> nested_path = {})
std::vector<std::string> nested_path = {},
bool nullable = false)
: field_id_(field_id),
data_type_(data_type),
element_type_(DataType::NONE),
nested_path_(std::move(nested_path)) {
nested_path_(std::move(nested_path)),
nullable_(nullable) {
}

bool
Expand Down Expand Up @@ -627,6 +631,24 @@ class BinaryArithOpEvalRangeExpr : public ITypeFilterExpr {
const proto::plan::GenericValue value_;
};

class NullExpr : public ITypeFilterExpr {
public:
explicit NullExpr(const ColumnInfo& column, NullExprType op)
: ITypeFilterExpr(), column_(column), op_(op) {
}

std::string
ToString() const override {
return fmt::format("NullExpr:[Column: {}, Operator: {} ",
column_.ToString(),
NullExpr_NullOp_Name(op_));
}

public:
const ColumnInfo column_;
NullExprType op_;
};

class CallExpr : public ITypeFilterExpr {
public:
CallExpr(const std::string fun_name,
Expand Down
14 changes: 14 additions & 0 deletions internal/core/src/query/PlanProto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,16 @@ ProtoParser::ParseUnaryRangeExprs(const proto::plan::UnaryRangeExpr& expr_pb) {
expr::ColumnInfo(column_info), expr_pb.op(), expr_pb.value());
}

expr::TypedExprPtr
ProtoParser::ParseNullExprs(const proto::plan::NullExpr& expr_pb) {
auto& column_info = expr_pb.column_info();
auto field_id = FieldId(column_info.field_id());
auto data_type = schema[field_id].get_data_type();
Assert(data_type == static_cast<DataType>(column_info.data_type()));
return std::make_shared<milvus::expr::NullExpr>(
expr::ColumnInfo(column_info), expr_pb.op());
}

expr::TypedExprPtr
ProtoParser::ParseBinaryRangeExprs(
const proto::plan::BinaryRangeExpr& expr_pb) {
Expand Down Expand Up @@ -521,6 +531,10 @@ ProtoParser::ParseExprs(const proto::plan::Expr& expr_pb,
result = ParseValueExprs(expr_pb.value_expr());
break;
}
case ppe::kNullExpr: {
result = ParseNullExprs(expr_pb.null_expr());
break;
}
default: {
std::string s;
google::protobuf::TextFormat::PrintToString(expr_pb, &s);
Expand Down
3 changes: 3 additions & 0 deletions internal/core/src/query/PlanProto.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ class ProtoParser {
expr::TypedExprPtr
ParseExistExprs(const proto::plan::ExistsExpr& expr_pb);

expr::TypedExprPtr
ParseNullExprs(const proto::plan::NullExpr& expr_pb);

expr::TypedExprPtr
ParseJsonContainsExprs(const proto::plan::JSONContainsExpr& expr_pb);

Expand Down
Loading

0 comments on commit 907fc24

Please sign in to comment.