From 4fbe3c9142ccbfab8b6e515821815a508c886cad Mon Sep 17 00:00:00 2001 From: cqy123456 <39671710+cqy123456@users.noreply.github.com> Date: Wed, 1 Nov 2023 02:20:15 +0800 Subject: [PATCH] replace loaded binlog with binlog index for search performance (#27673) Signed-off-by: cqy123456 --- configs/milvus.yaml | 6 +- internal/core/src/segcore/FieldIndexing.cpp | 6 +- internal/core/src/segcore/FieldIndexing.h | 2 +- .../core/src/segcore/IndexConfigGenerator.cpp | 6 +- .../core/src/segcore/IndexConfigGenerator.h | 9 +- internal/core/src/segcore/SegcoreConfig.h | 16 +- .../core/src/segcore/SegmentGrowingImpl.cpp | 4 +- .../core/src/segcore/SegmentSealedImpl.cpp | 151 +++++++- internal/core/src/segcore/SegmentSealedImpl.h | 25 +- internal/core/src/segcore/segcore_init_c.cpp | 4 +- internal/core/src/segcore/segcore_init_c.h | 2 +- internal/core/src/segcore/segment_c.cpp | 4 +- internal/core/unittest/CMakeLists.txt | 1 + internal/core/unittest/bench/bench_search.cpp | 2 +- internal/core/unittest/test_binlog_index.cpp | 326 ++++++++++++++++++ internal/core/unittest/test_float16.cpp | 2 +- internal/core/unittest/test_growing.cpp | 2 +- internal/core/unittest/test_growing_index.cpp | 4 +- internal/proto/planpb/plan.pb.go | 3 - internal/proto/proxypb/proxy.pb.go | 5 +- internal/proto/rootcoordpb/root_coord.pb.go | 40 +-- internal/querynodev2/server.go | 8 +- pkg/util/paramtable/component_param.go | 38 +- pkg/util/paramtable/component_param_test.go | 18 +- tests/python_client/testcases/test_search.py | 4 +- 25 files changed, 575 insertions(+), 113 deletions(-) create mode 100644 internal/core/unittest/test_binlog_index.cpp diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 618e3bceea41b..838b4c00a28e2 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -265,10 +265,10 @@ queryNode: # This parameter is only useful when enable-disk = true. # And this value should be a number greater than 1 and less than 32. chunkRows: 1024 # The number of vectors in a chunk. - growing: # growing a vector index for growing segment to accelerate search + interimIndex: # build a vector temperate index for growing segment or binlog to accelerate search enableIndex: true - nlist: 128 # growing segment index nlist - nprobe: 16 # nprobe to search growing segment, based on your accuracy requirement, must smaller than nlist + nlist: 128 # segment index nlist + nprobe: 16 # nprobe to search segment, based on your accuracy requirement, must smaller than nlist loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments enableDisk: false # enable querynode load disk index, and search on disk index maxDiskUsagePercentage: 95 diff --git a/internal/core/src/segcore/FieldIndexing.cpp b/internal/core/src/segcore/FieldIndexing.cpp index f86245f2e57f2..7cf9c0a7f45e8 100644 --- a/internal/core/src/segcore/FieldIndexing.cpp +++ b/internal/core/src/segcore/FieldIndexing.cpp @@ -31,8 +31,10 @@ VectorFieldIndexing::VectorFieldIndexing(const FieldMeta& field_meta, : FieldIndexing(field_meta, segcore_config), build(false), sync_with_index(false), - config_(std::make_unique( - segment_max_row_count, field_index_meta, segcore_config)) { + config_(std::make_unique(segment_max_row_count, + field_index_meta, + segcore_config, + SegmentType::Growing)) { index_ = std::make_unique( config_->GetIndexType(), config_->GetMetricType(), diff --git a/internal/core/src/segcore/FieldIndexing.h b/internal/core/src/segcore/FieldIndexing.h index cee451fe99258..bd54ad08b54c0 100644 --- a/internal/core/src/segcore/FieldIndexing.h +++ b/internal/core/src/segcore/FieldIndexing.h @@ -243,7 +243,7 @@ class IndexingRecord { for (auto& [field_id, field_meta] : schema_.get_fields()) { ++offset_id; if (field_meta.is_vector() && - segcore_config_.get_enable_growing_segment_index()) { + segcore_config_.get_enable_interim_segment_index()) { // TODO: skip binary small index now, reenable after config.yaml is ready if (field_meta.get_data_type() == DataType::VECTOR_BINARY) { continue; diff --git a/internal/core/src/segcore/IndexConfigGenerator.cpp b/internal/core/src/segcore/IndexConfigGenerator.cpp index 37f6f9563bdba..f40317d8be113 100644 --- a/internal/core/src/segcore/IndexConfigGenerator.cpp +++ b/internal/core/src/segcore/IndexConfigGenerator.cpp @@ -13,15 +13,15 @@ #include "log/Log.h" namespace milvus::segcore { - VecIndexConfig::VecIndexConfig(const int64_t max_index_row_cout, const FieldIndexMeta& index_meta_, - const SegcoreConfig& config) + const SegcoreConfig& config, + const SegmentType& segment_type) : max_index_row_count_(max_index_row_cout), config_(config) { origin_index_type_ = index_meta_.GetIndexType(); metric_type_ = index_meta_.GeMetricType(); - index_type_ = support_index_types[0]; + index_type_ = support_index_types.at(segment_type); build_params_[knowhere::meta::METRIC_TYPE] = metric_type_; build_params_[knowhere::indexparam::NLIST] = std::to_string(config_.get_nlist()); diff --git a/internal/core/src/segcore/IndexConfigGenerator.h b/internal/core/src/segcore/IndexConfigGenerator.h index cf31fc63a5e4a..a26853df51ed8 100644 --- a/internal/core/src/segcore/IndexConfigGenerator.h +++ b/internal/core/src/segcore/IndexConfigGenerator.h @@ -16,6 +16,7 @@ #include "knowhere/config.h" #include "SegcoreConfig.h" #include "common/QueryInfo.h" +#include "common/type_c.h" namespace milvus::segcore { @@ -27,8 +28,9 @@ enum class IndexConfigLevel { }; class VecIndexConfig { - inline static const std::vector support_index_types = { - knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC}; + inline static const std::map support_index_types = + {{SegmentType::Growing, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC}, + {SegmentType::Sealed, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT}}; inline static const std::map index_build_ratio = { {knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, 0.1}}; @@ -39,7 +41,8 @@ class VecIndexConfig { public: VecIndexConfig(const int64_t max_index_row_count, const FieldIndexMeta& index_meta_, - const SegcoreConfig& config); + const SegcoreConfig& config, + const SegmentType& segment_type); int64_t GetBuildThreshold() const noexcept; diff --git a/internal/core/src/segcore/SegcoreConfig.h b/internal/core/src/segcore/SegcoreConfig.h index c7f3175119018..51a828125045b 100644 --- a/internal/core/src/segcore/SegcoreConfig.h +++ b/internal/core/src/segcore/SegcoreConfig.h @@ -64,20 +64,20 @@ class SegcoreConfig { } void - set_enable_growing_segment_index(bool enable_growing_segment_index) { - enable_growing_segment_index_ = enable_growing_segment_index; + set_enable_interim_segment_index(bool enable_interim_segment_index) { + this->enable_interim_segment_index_ = enable_interim_segment_index; } bool - get_enable_growing_segment_index() const { - return enable_growing_segment_index_; + get_enable_interim_segment_index() const { + return enable_interim_segment_index_; } private: - bool enable_growing_segment_index_ = false; - int64_t chunk_rows_ = 32 * 1024; - int64_t nlist_ = 100; - int64_t nprobe_ = 4; + inline static bool enable_interim_segment_index_ = false; + inline static int64_t chunk_rows_ = 32 * 1024; + inline static int64_t nlist_ = 100; + inline static int64_t nprobe_ = 4; }; } // namespace milvus::segcore diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 69703b99529a9..3dcfb53fdc597 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -112,7 +112,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset, field_meta); } //insert vector data into index - if (segcore_config_.get_enable_growing_segment_index()) { + if (segcore_config_.get_enable_interim_segment_index()) { indexing_record_.AppendingIndex( reserved_offset, num_rows, @@ -195,7 +195,7 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) { insert_record_.get_field_data_base(field_id)->set_data_raw( reserved_offset, field_data); } - if (segcore_config_.get_enable_growing_segment_index()) { + if (segcore_config_.get_enable_interim_segment_index()) { auto offset = reserved_offset; for (auto& data : field_data) { auto row_count = data->get_num_rows(); diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index e24a290d1f081..5bfc44421bbc1 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -41,6 +41,7 @@ #include "storage/ChunkCacheSingleton.h" #include "common/File.h" #include "common/Tracer.h" +#include "index/VectorMemIndex.h" namespace milvus::segcore { @@ -98,16 +99,19 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) { std::to_string(num_rows_.value()) + ")"); } AssertInfo(!vector_indexings_.is_ready(field_id), "vec index is not ready"); + if (get_bit(field_data_ready_bitset_, field_id)) { + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + } else if (get_bit(binlog_index_bitset_, field_id)) { + set_bit(binlog_index_bitset_, field_id, false); + vector_indexings_.drop_field_indexing(field_id); + } + update_row_count(row_count); vector_indexings_.append_field_indexing( field_id, metric_type, std::move(const_cast(info).index)); - set_bit(index_ready_bitset_, field_id, true); - update_row_count(row_count); - // release field column - fields_.erase(field_id); - set_bit(field_data_ready_bitset_, field_id, false); } void @@ -366,11 +370,29 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { insert_record_.seal_pks(); } + bool use_temp_index = false; + { + // update num_rows to build temperate binlog index + std::unique_lock lck(mutex_); + update_row_count(num_rows); + } + + if (generate_binlog_index(field_id)) { + std::unique_lock lck(mutex_); + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + use_temp_index = true; + } + + if (!use_temp_index) { + std::unique_lock lck(mutex_); + set_bit(field_data_ready_bitset_, field_id, true); + } + } + { std::unique_lock lck(mutex_); - set_bit(field_data_ready_bitset_, field_id, true); + update_row_count(num_rows); } - std::unique_lock lck(mutex_); - update_row_count(num_rows); } void @@ -609,7 +631,26 @@ SegmentSealedImpl::vector_search(SearchInfo& search_info, AssertInfo(field_meta.is_vector(), "The meta type of vector field is not vector type"); - if (get_bit(index_ready_bitset_, field_id)) { + if (get_bit(binlog_index_bitset_, field_id)) { + AssertInfo( + vec_binlog_config_.find(field_id) != vec_binlog_config_.end(), + "The binlog params is not generate."); + auto binlog_search_info = + vec_binlog_config_.at(field_id)->GetSearchConf(search_info); + + AssertInfo(vector_indexings_.is_ready(field_id), + "vector indexes isn't ready for field " + + std::to_string(field_id.get())); + query::SearchOnSealedIndex(*schema_, + vector_indexings_, + binlog_search_info, + query_data, + query_count, + bitset, + output); + milvus::tracer::AddEvent( + "finish_searching_vector_temperate_binlog_index"); + } else if (get_bit(index_ready_bitset_, field_id)) { AssertInfo(vector_indexings_.is_ready(field_id), "vector indexes isn't ready for field " + std::to_string(field_id.get())); @@ -676,7 +717,8 @@ SegmentSealedImpl::get_vector(FieldId field_id, auto& field_meta = schema_->operator[](field_id); AssertInfo(field_meta.is_vector(), "vector field is not vector type"); - if (!get_bit(index_ready_bitset_, field_id)) { + if (!get_bit(index_ready_bitset_, field_id) && + !get_bit(binlog_index_bitset_, field_id)) { return fill_with_empty(field_id, count); } @@ -771,8 +813,14 @@ SegmentSealedImpl::DropFieldData(const FieldId field_id) { } else { auto& field_meta = schema_->operator[](field_id); std::unique_lock lck(mutex_); - set_bit(field_data_ready_bitset_, field_id, false); - insert_record_.drop_field_data(field_id); + if (get_bit(field_data_ready_bitset_, field_id)) { + set_bit(field_data_ready_bitset_, field_id, false); + insert_record_.drop_field_data(field_id); + } + if (get_bit(binlog_index_bitset_, field_id)) { + set_bit(binlog_index_bitset_, field_id, false); + vector_indexings_.drop_field_indexing(field_id); + } lck.unlock(); } } @@ -807,7 +855,8 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const { } auto& request_fields = plan->extra_info_opt_.value().involved_fields_; - auto field_ready_bitset = field_data_ready_bitset_ | index_ready_bitset_; + auto field_ready_bitset = + field_data_ready_bitset_ | index_ready_bitset_ | binlog_index_bitset_; AssertInfo(request_fields.size() == field_ready_bitset.size(), "Request fields size not equal to field ready bitset size when " "check search"); @@ -823,13 +872,19 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const { } } -SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, int64_t segment_id) - : field_data_ready_bitset_(schema->size()), +SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, + IndexMetaPtr index_meta, + const SegcoreConfig& segcore_config, + int64_t segment_id) + : segcore_config_(segcore_config), + field_data_ready_bitset_(schema->size()), index_ready_bitset_(schema->size()), + binlog_index_bitset_(schema->size()), scalar_indexings_(schema->size()), insert_record_(*schema, MAX_ROW_COUNT), schema_(schema), - id_(segment_id) { + id_(segment_id), + col_index_meta_(index_meta) { } SegmentSealedImpl::~SegmentSealedImpl() { @@ -1081,7 +1136,8 @@ SegmentSealedImpl::bulk_subscript(FieldId field_id, bool SegmentSealedImpl::HasIndex(FieldId field_id) const { std::shared_lock lck(mutex_); - return get_bit(index_ready_bitset_, field_id); + return get_bit(index_ready_bitset_, field_id) | + get_bit(binlog_index_bitset_, field_id); } bool @@ -1100,7 +1156,8 @@ SegmentSealedImpl::HasRawData(int64_t field_id) const { auto fieldID = FieldId(field_id); const auto& field_meta = schema_->operator[](fieldID); if (datatype_is_vector(field_meta.get_data_type())) { - if (get_bit(index_ready_bitset_, fieldID)) { + if (get_bit(index_ready_bitset_, fieldID) | + get_bit(binlog_index_bitset_, fieldID)) { AssertInfo(vector_indexings_.is_ready(fieldID), "vector index is not ready"); auto field_indexing = vector_indexings_.get_field_indexing(fieldID); @@ -1249,4 +1306,62 @@ SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk, bitset_chunk |= mask; } +bool +SegmentSealedImpl::generate_binlog_index(const FieldId field_id) { + if (col_index_meta_ == nullptr) + return false; + auto& field_meta = schema_->operator[](field_id); + + if (field_meta.is_vector() && + field_meta.get_data_type() == DataType::VECTOR_FLOAT && + segcore_config_.get_enable_interim_segment_index()) { + try { + auto& field_index_meta = + col_index_meta_->GetFieldIndexMeta(field_id); + auto& index_params = field_index_meta.GetIndexParams(); + if (index_params.find(knowhere::meta::INDEX_TYPE) == + index_params.end() || + index_params.at(knowhere::meta::INDEX_TYPE) == + knowhere::IndexEnum::INDEX_FAISS_IDMAP) { + return false; + } + // get binlog data and meta + auto row_count = num_rows_.value(); + auto dim = field_meta.get_dim(); + auto vec_data = fields_.at(field_id); + auto dataset = + knowhere::GenDataSet(row_count, dim, (void*)vec_data->Data()); + dataset->SetIsOwner(false); + // generate index params + auto field_binlog_config = std::unique_ptr( + new VecIndexConfig(row_count, + field_index_meta, + segcore_config_, + SegmentType::Sealed)); + auto build_config = field_binlog_config->GetBuildBaseParams(); + build_config[knowhere::meta::DIM] = std::to_string(dim); + build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string(1); + auto index_metric = field_binlog_config->GetMetricType(); + + index::IndexBasePtr vec_index = + std::make_unique( + field_binlog_config->GetIndexType(), + index_metric, + knowhere::Version::GetCurrentVersion().VersionNumber()); + vec_index->BuildWithDataset(dataset, build_config); + vector_indexings_.append_field_indexing( + field_id, index_metric, std::move(vec_index)); + + vec_binlog_config_[field_id] = std::move(field_binlog_config); + set_bit(binlog_index_bitset_, field_id, true); + + return true; + } catch (std::exception& e) { + return false; + } + } else { + return false; + } +} + } // namespace milvus::segcore diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 7a53699af99dc..6c849200abbbe 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -33,12 +33,16 @@ #include "index/ScalarIndex.h" #include "sys/mman.h" #include "common/Types.h" +#include "common/IndexMeta.h" namespace milvus::segcore { class SegmentSealedImpl : public SegmentSealed { public: - explicit SegmentSealedImpl(SchemaPtr schema, int64_t segment_id); + explicit SegmentSealedImpl(SchemaPtr schema, + IndexMetaPtr index_meta, + const SegcoreConfig& segcore_config, + int64_t segment_id); ~SegmentSealedImpl() override; void LoadIndex(const LoadIndexInfo& info) override; @@ -240,10 +244,14 @@ class SegmentSealedImpl : public SegmentSealed { void LoadScalarIndex(const LoadIndexInfo& info); + bool + generate_binlog_index(const FieldId field_id); + private: // segment loading state BitsetType field_data_ready_bitset_; BitsetType index_ready_bitset_; + BitsetType binlog_index_bitset_; std::atomic system_ready_count_ = 0; // segment data @@ -266,11 +274,22 @@ class SegmentSealedImpl : public SegmentSealed { SchemaPtr schema_; int64_t id_; std::unordered_map> fields_; + + // only useful in binlog + IndexMetaPtr col_index_meta_; + SegcoreConfig segcore_config_; + std::unordered_map> + vec_binlog_config_; }; inline SegmentSealedPtr -CreateSealedSegment(SchemaPtr schema, int64_t segment_id = -1) { - return std::make_unique(schema, segment_id); +CreateSealedSegment( + SchemaPtr schema, + IndexMetaPtr index_meta = nullptr, + int64_t segment_id = -1, + const SegcoreConfig& segcore_config = SegcoreConfig::default_config()) { + return std::make_unique( + schema, index_meta, segcore_config, segment_id); } } // namespace milvus::segcore diff --git a/internal/core/src/segcore/segcore_init_c.cpp b/internal/core/src/segcore/segcore_init_c.cpp index 5ebf53f651f01..85f1a1996d79e 100644 --- a/internal/core/src/segcore/segcore_init_c.cpp +++ b/internal/core/src/segcore/segcore_init_c.cpp @@ -32,10 +32,10 @@ SegcoreSetChunkRows(const int64_t value) { } extern "C" void -SegcoreSetEnableGrowingSegmentIndex(const bool value) { +SegcoreSetEnableTempSegmentIndex(const bool value) { milvus::segcore::SegcoreConfig& config = milvus::segcore::SegcoreConfig::default_config(); - config.set_enable_growing_segment_index(value); + config.set_enable_interim_segment_index(value); } extern "C" void diff --git a/internal/core/src/segcore/segcore_init_c.h b/internal/core/src/segcore/segcore_init_c.h index ace8e9e723f75..a0293c7234a06 100644 --- a/internal/core/src/segcore/segcore_init_c.h +++ b/internal/core/src/segcore/segcore_init_c.h @@ -22,7 +22,7 @@ void SegcoreSetChunkRows(const int64_t); void -SegcoreSetEnableGrowingSegmentIndex(const bool); +SegcoreSetEnableTempSegmentIndex(const bool); void SegcoreSetNlist(const int64_t); diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index a9b933547db02..51e589b26f929 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -41,8 +41,8 @@ NewSegment(CCollection collection, SegmentType seg_type, int64_t segment_id) { } case Sealed: case Indexing: - segment = milvus::segcore::CreateSealedSegment(col->get_schema(), - segment_id); + segment = milvus::segcore::CreateSealedSegment( + col->get_schema(), col->GetIndexMeta(), segment_id); break; default: LOG_SEGCORE_ERROR_ << "invalid segment type " diff --git a/internal/core/unittest/CMakeLists.txt b/internal/core/unittest/CMakeLists.txt index 4d5971b9b9f4a..544b135dee9ab 100644 --- a/internal/core/unittest/CMakeLists.txt +++ b/internal/core/unittest/CMakeLists.txt @@ -57,6 +57,7 @@ set(MILVUS_TEST_FILES test_always_true_expr.cpp test_plan_proto.cpp test_chunk_cache.cpp + test_binlog_index.cpp ) if ( BUILD_DISK_ANN STREQUAL "ON" ) diff --git a/internal/core/unittest/bench/bench_search.cpp b/internal/core/unittest/bench/bench_search.cpp index d1ff6a3813fe6..9f63d61ed61ca 100644 --- a/internal/core/unittest/bench/bench_search.cpp +++ b/internal/core/unittest/bench/bench_search.cpp @@ -75,7 +75,7 @@ Search_GrowingIndex(benchmark::State& state) { FieldIndexMeta fieldIndexMeta(schema->get_field_id(FieldName("fakevec")), std::move(index_params), std::move(type_params)); - segconf.set_enable_growing_segment_index(true); + segconf.set_enable_interim_segment_index(true); std::map filedMap = { {schema->get_field_id(FieldName("fakevec")), fieldIndexMeta}}; IndexMetaPtr metaPtr = diff --git a/internal/core/unittest/test_binlog_index.cpp b/internal/core/unittest/test_binlog_index.cpp new file mode 100644 index 0000000000000..d96b78776ef89 --- /dev/null +++ b/internal/core/unittest/test_binlog_index.cpp @@ -0,0 +1,326 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include +#include +#include + +#include "pb/plan.pb.h" +#include "segcore/segcore_init_c.h" +#include "segcore/SegmentSealed.h" +#include "segcore/SegmentSealedImpl.h" +#include "pb/schema.pb.h" +#include "test_utils/DataGen.h" +#include "index/IndexFactory.h" +#include "query/Plan.h" +#include "knowhere/comp/brute_force.h" + +using namespace milvus::segcore; +using namespace milvus; +namespace pb = milvus::proto; + +std::shared_ptr +GenRandomFloatVecData(int rows, int dim, int seed = 42) { + std::shared_ptr vecs = + std::shared_ptr(new float[rows * dim]); + std::mt19937 rng(seed); + std::uniform_int_distribution<> distrib(0.0, 100.0); + for (int i = 0; i < rows * dim; ++i) vecs[i] = (float)distrib(rng); + return std::move(vecs); +} + +inline float +GetKnnSearchRecall( + size_t nq, int64_t* gt_ids, size_t gt_k, int64_t* res_ids, size_t res_k) { + uint32_t matched_num = 0; + for (auto i = 0; i < nq; ++i) { + std::vector ids_0(gt_ids + i * gt_k, + gt_ids + i * gt_k + res_k); + std::vector ids_1(res_ids + i * res_k, + res_ids + i * res_k + res_k); + + std::sort(ids_0.begin(), ids_0.end()); + std::sort(ids_1.begin(), ids_1.end()); + + std::vector v(std::max(ids_0.size(), ids_1.size())); + std::vector::iterator it; + it = std::set_intersection( + ids_0.begin(), ids_0.end(), ids_1.begin(), ids_1.end(), v.begin()); + v.resize(it - v.begin()); + matched_num += v.size(); + } + return ((float)matched_num) / ((float)nq * res_k); +} + +using Param = const char*; +class BinlogIndexTest : public ::testing::TestWithParam { + void + SetUp() override { + auto param = GetParam(); + metricType = param; + + schema = std::make_shared(); + + auto metric_type = metricType; + vec_field_id = schema->AddDebugField( + "fakevec", DataType::VECTOR_FLOAT, data_d, metric_type); + auto i64_fid = schema->AddDebugField("counter", DataType::INT64); + schema->set_primary_field_id(i64_fid); + + // generate vector field data + vec_data = GenRandomFloatVecData(data_n, data_d); + + vec_field_data = + storage::CreateFieldData(DataType::VECTOR_FLOAT, data_d); + vec_field_data->FillFieldData(vec_data.get(), data_n); + } + + public: + IndexMetaPtr + GetCollectionIndexMeta(std::string index_type) { + std::map index_params = { + {"index_type", index_type}, + {"metric_type", metricType}, + {"nlist", "1024"}}; + std::map type_params = {{"dim", "128"}}; + FieldIndexMeta fieldIndexMeta( + vec_field_id, std::move(index_params), std::move(type_params)); + auto& config = SegcoreConfig::default_config(); + config.set_chunk_rows(1024); + config.set_enable_interim_segment_index(true); + std::map filedMap = { + {vec_field_id, fieldIndexMeta}}; + IndexMetaPtr metaPtr = + std::make_shared(226985, std::move(filedMap)); + return std::move(metaPtr); + } + + void + LoadOtherFields() { + auto dataset = DataGen(schema, data_n); + // load id + LoadFieldDataInfo row_id_info; + FieldMeta row_id_field_meta( + FieldName("RowID"), RowFieldID, DataType::INT64); + auto field_data = std::make_shared>( + DataType::INT64); + field_data->FillFieldData(dataset.row_ids_.data(), data_n); + auto field_data_info = + FieldDataInfo{RowFieldID.get(), + data_n, + std::vector{field_data}}; + segment->LoadFieldData(RowFieldID, field_data_info); + // load ts + LoadFieldDataInfo ts_info; + FieldMeta ts_field_meta( + FieldName("Timestamp"), TimestampFieldID, DataType::INT64); + field_data = std::make_shared>( + DataType::INT64); + field_data->FillFieldData(dataset.timestamps_.data(), data_n); + field_data_info = + FieldDataInfo{TimestampFieldID.get(), + data_n, + std::vector{field_data}}; + segment->LoadFieldData(TimestampFieldID, field_data_info); + } + + protected: + milvus::SchemaPtr schema; + const char* metricType; + size_t data_n = 10000; + size_t data_d = 128; + size_t topk = 10; + milvus::storage::FieldDataPtr vec_field_data = nullptr; + milvus::segcore::SegmentSealedPtr segment = nullptr; + milvus::FieldId vec_field_id; + std::shared_ptr vec_data; +}; + +INSTANTIATE_TEST_CASE_P(MetricTypeParameters, + BinlogIndexTest, + ::testing::Values(knowhere::metric::L2)); + +TEST_P(BinlogIndexTest, Accuracy) { + IndexMetaPtr collection_index_meta = + GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT); + + segment = CreateSealedSegment(schema, collection_index_meta); + LoadOtherFields(); + + auto& segcore_config = milvus::segcore::SegcoreConfig::default_config(); + segcore_config.set_enable_interim_segment_index(true); + segcore_config.set_nprobe(32); + // 1. load field data, and build binlog index for binlog data + auto field_data_info = + FieldDataInfo{vec_field_id.get(), + data_n, + std::vector{vec_field_data}}; + segment->LoadFieldData(vec_field_id, field_data_info); + //assert segment has been built binlog index + EXPECT_TRUE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_FALSE(segment->HasFieldData(vec_field_id)); + + // 2. search binlog index + auto num_queries = 10; + auto query_ptr = GenRandomFloatVecData(num_queries, data_d); + + milvus::proto::plan::PlanNode plan_node; + auto vector_anns = plan_node.mutable_vector_anns(); + vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector); + vector_anns->set_placeholder_tag("$0"); + vector_anns->set_field_id(vec_field_id.get()); + auto query_info = vector_anns->mutable_query_info(); + query_info->set_topk(topk); + query_info->set_round_decimal(3); + query_info->set_metric_type(metricType); + query_info->set_search_params(R"({"nprobe": 1024})"); + auto plan_str = plan_node.SerializeAsString(); + + auto ph_group_raw = + CreatePlaceholderGroupFromBlob(num_queries, data_d, query_ptr.get()); + + auto plan = milvus::query::CreateSearchPlanByExpr( + *schema, plan_str.data(), plan_str.size()); + auto ph_group = + ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString()); + + std::vector ph_group_arr = { + ph_group.get()}; + auto nlist = segcore_config.get_nlist(); + auto binlog_index_sr = segment->Search(plan.get(), ph_group.get()); + ASSERT_EQ(binlog_index_sr->total_nq_, num_queries); + EXPECT_EQ(binlog_index_sr->unity_topK_, topk); + EXPECT_EQ(binlog_index_sr->distances_.size(), num_queries * topk); + EXPECT_EQ(binlog_index_sr->seg_offsets_.size(), num_queries * topk); + + // 3. update vector index + { + milvus::index::CreateIndexInfo create_index_info; + create_index_info.field_type = DataType::VECTOR_FLOAT; + create_index_info.metric_type = metricType; + create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT; + create_index_info.index_engine_version = + knowhere::Version::GetCurrentVersion().VersionNumber(); + auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, milvus::storage::FileManagerContext()); + + auto build_conf = + knowhere::Json{{knowhere::meta::METRIC_TYPE, metricType}, + {knowhere::meta::DIM, std::to_string(data_d)}, + {knowhere::indexparam::NLIST, "1024"}}; + + auto database = knowhere::GenDataSet(data_n, data_d, vec_data.get()); + indexing->BuildWithDataset(database, build_conf); + + LoadIndexInfo load_info; + load_info.field_id = vec_field_id.get(); + + load_info.index = std::move(indexing); + load_info.index_params["metric_type"] = metricType; + segment->DropFieldData(vec_field_id); + ASSERT_NO_THROW(segment->LoadIndex(load_info)); + EXPECT_TRUE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_FALSE(segment->HasFieldData(vec_field_id)); + auto ivf_sr = segment->Search(plan.get(), ph_group.get()); + auto similary = GetKnnSearchRecall(num_queries, + binlog_index_sr->seg_offsets_.data(), + topk, + ivf_sr->seg_offsets_.data(), + topk); + ASSERT_GT(similary, 0.45); + } +} + +TEST_P(BinlogIndexTest, DisableInterimIndex) { + IndexMetaPtr collection_index_meta = + GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT); + + segment = CreateSealedSegment(schema, collection_index_meta); + LoadOtherFields(); + SegcoreSetEnableTempSegmentIndex(false); + + auto field_data_info = + FieldDataInfo{vec_field_id.get(), + data_n, + std::vector{vec_field_data}}; + segment->LoadFieldData(vec_field_id, field_data_info); + + EXPECT_FALSE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_TRUE(segment->HasFieldData(vec_field_id)); + // load vector index + milvus::index::CreateIndexInfo create_index_info; + create_index_info.field_type = DataType::VECTOR_FLOAT; + create_index_info.metric_type = metricType; + create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT; + create_index_info.index_engine_version = + knowhere::Version::GetCurrentVersion().VersionNumber(); + auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex( + create_index_info, milvus::storage::FileManagerContext()); + + auto build_conf = + knowhere::Json{{knowhere::meta::METRIC_TYPE, metricType}, + {knowhere::meta::DIM, std::to_string(data_d)}, + {knowhere::indexparam::NLIST, "1024"}}; + + auto database = knowhere::GenDataSet(data_n, data_d, vec_data.get()); + indexing->BuildWithDataset(database, build_conf); + + LoadIndexInfo load_info; + load_info.field_id = vec_field_id.get(); + + load_info.index = std::move(indexing); + load_info.index_params["metric_type"] = metricType; + + segment->DropFieldData(vec_field_id); + ASSERT_NO_THROW(segment->LoadIndex(load_info)); + EXPECT_TRUE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_FALSE(segment->HasFieldData(vec_field_id)); +} + +TEST_P(BinlogIndexTest, LoadBingLogWihIDMAP) { + IndexMetaPtr collection_index_meta = + GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IDMAP); + + segment = CreateSealedSegment(schema, collection_index_meta); + LoadOtherFields(); + + auto field_data_info = + FieldDataInfo{vec_field_id.get(), + data_n, + std::vector{vec_field_data}}; + segment->LoadFieldData(vec_field_id, field_data_info); + + EXPECT_FALSE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_TRUE(segment->HasFieldData(vec_field_id)); +} + +TEST_P(BinlogIndexTest, LoadBinlogWithoutIndexMeta) { + IndexMetaPtr collection_index_meta = + GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IDMAP); + + segment = CreateSealedSegment(schema, collection_index_meta); + SegcoreSetEnableTempSegmentIndex(true); + + auto field_data_info = + FieldDataInfo{vec_field_id.get(), + data_n, + std::vector{vec_field_data}}; + segment->LoadFieldData(vec_field_id, field_data_info); + + EXPECT_FALSE(segment->HasIndex(vec_field_id)); + EXPECT_EQ(segment->get_row_count(), data_n); + EXPECT_TRUE(segment->HasFieldData(vec_field_id)); +} \ No newline at end of file diff --git a/internal/core/unittest/test_float16.cpp b/internal/core/unittest/test_float16.cpp index d83c433bb3cbb..349cc9813e7f0 100644 --- a/internal/core/unittest/test_float16.cpp +++ b/internal/core/unittest/test_float16.cpp @@ -177,7 +177,7 @@ TEST(Float16, GetVector) { vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(100000, std::move(filedMap)); diff --git a/internal/core/unittest/test_growing.cpp b/internal/core/unittest/test_growing.cpp index 34272c72f948f..bb6b84612b7ff 100644 --- a/internal/core/unittest/test_growing.cpp +++ b/internal/core/unittest/test_growing.cpp @@ -133,7 +133,7 @@ TEST(Growing, FillData) { vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(100000, std::move(filedMap)); diff --git a/internal/core/unittest/test_growing_index.cpp b/internal/core/unittest/test_growing_index.cpp index fa612fc31c4be..428c5a4bcc596 100644 --- a/internal/core/unittest/test_growing_index.cpp +++ b/internal/core/unittest/test_growing_index.cpp @@ -37,7 +37,7 @@ TEST(GrowingIndex, Correctness) { vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(226985, std::move(filedMap)); @@ -161,7 +161,7 @@ TEST_P(GrowingIndexGetVectorTest, GetVector) { vec, std::move(index_params), std::move(type_params)); auto& config = SegcoreConfig::default_config(); config.set_chunk_rows(1024); - config.set_enable_growing_segment_index(true); + config.set_enable_interim_segment_index(true); std::map filedMap = {{vec, fieldIndexMeta}}; IndexMetaPtr metaPtr = std::make_shared(100000, std::move(filedMap)); diff --git a/internal/proto/planpb/plan.pb.go b/internal/proto/planpb/plan.pb.go index 2edfb4c92706d..06de0040e91da 100644 --- a/internal/proto/planpb/plan.pb.go +++ b/internal/proto/planpb/plan.pb.go @@ -237,7 +237,6 @@ func (BinaryExpr_BinaryOp) EnumDescriptor() ([]byte, []int) { type GenericValue struct { // Types that are valid to be assigned to Val: - // // *GenericValue_BoolVal // *GenericValue_Int64Val // *GenericValue_FloatVal @@ -1298,7 +1297,6 @@ var xxx_messageInfo_AlwaysTrueExpr proto.InternalMessageInfo type Expr struct { // Types that are valid to be assigned to Expr: - // // *Expr_TermExpr // *Expr_UnaryExpr // *Expr_BinaryExpr @@ -1670,7 +1668,6 @@ func (m *QueryPlanNode) GetLimit() int64 { type PlanNode struct { // Types that are valid to be assigned to Node: - // // *PlanNode_VectorAnns // *PlanNode_Predicates // *PlanNode_Query diff --git a/internal/proto/proxypb/proxy.pb.go b/internal/proto/proxypb/proxy.pb.go index c5cf24fd78b7e..60ba4e6a11df1 100644 --- a/internal/proto/proxypb/proxy.pb.go +++ b/internal/proto/proxypb/proxy.pb.go @@ -29,9 +29,8 @@ const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package type InvalidateCollMetaCacheRequest struct { // MsgType: - // - // DropCollection -> {meta cache, dml channels} - // Other -> {meta cache} + // DropCollection -> {meta cache, dml channels} + // Other -> {meta cache} Base *commonpb.MsgBase `protobuf:"bytes,1,opt,name=base,proto3" json:"base,omitempty"` DbName string `protobuf:"bytes,2,opt,name=db_name,json=dbName,proto3" json:"db_name,omitempty"` CollectionName string `protobuf:"bytes,3,opt,name=collection_name,json=collectionName,proto3" json:"collection_name,omitempty"` diff --git a/internal/proto/rootcoordpb/root_coord.pb.go b/internal/proto/rootcoordpb/root_coord.pb.go index ff74594ed2d2e..7d16956e9bd7f 100644 --- a/internal/proto/rootcoordpb/root_coord.pb.go +++ b/internal/proto/rootcoordpb/root_coord.pb.go @@ -793,28 +793,28 @@ type RootCoordClient interface { GetComponentStates(ctx context.Context, in *milvuspb.GetComponentStatesRequest, opts ...grpc.CallOption) (*milvuspb.ComponentStates, error) GetTimeTickChannel(ctx context.Context, in *internalpb.GetTimeTickChannelRequest, opts ...grpc.CallOption) (*milvuspb.StringResponse, error) GetStatisticsChannel(ctx context.Context, in *internalpb.GetStatisticsChannelRequest, opts ...grpc.CallOption) (*milvuspb.StringResponse, error) - // * + //* // @brief This method is used to create collection // // @param CreateCollectionRequest, use to provide collection information to be created. // // @return Status CreateCollection(ctx context.Context, in *milvuspb.CreateCollectionRequest, opts ...grpc.CallOption) (*commonpb.Status, error) - // * + //* // @brief This method is used to delete collection. // // @param DropCollectionRequest, collection name is going to be deleted. // // @return Status DropCollection(ctx context.Context, in *milvuspb.DropCollectionRequest, opts ...grpc.CallOption) (*commonpb.Status, error) - // * + //* // @brief This method is used to test collection existence. // // @param HasCollectionRequest, collection name is going to be tested. // // @return BoolResponse HasCollection(ctx context.Context, in *milvuspb.HasCollectionRequest, opts ...grpc.CallOption) (*milvuspb.BoolResponse, error) - // * + //* // @brief This method is used to get collection schema. // // @param DescribeCollectionRequest, target collection name. @@ -825,28 +825,28 @@ type RootCoordClient interface { CreateAlias(ctx context.Context, in *milvuspb.CreateAliasRequest, opts ...grpc.CallOption) (*commonpb.Status, error) DropAlias(ctx context.Context, in *milvuspb.DropAliasRequest, opts ...grpc.CallOption) (*commonpb.Status, error) AlterAlias(ctx context.Context, in *milvuspb.AlterAliasRequest, opts ...grpc.CallOption) (*commonpb.Status, error) - // * + //* // @brief This method is used to list all collections. // // @return StringListResponse, collection name list ShowCollections(ctx context.Context, in *milvuspb.ShowCollectionsRequest, opts ...grpc.CallOption) (*milvuspb.ShowCollectionsResponse, error) AlterCollection(ctx context.Context, in *milvuspb.AlterCollectionRequest, opts ...grpc.CallOption) (*commonpb.Status, error) - // * + //* // @brief This method is used to create partition // // @return Status CreatePartition(ctx context.Context, in *milvuspb.CreatePartitionRequest, opts ...grpc.CallOption) (*commonpb.Status, error) - // * + //* // @brief This method is used to drop partition // // @return Status DropPartition(ctx context.Context, in *milvuspb.DropPartitionRequest, opts ...grpc.CallOption) (*commonpb.Status, error) - // * + //* // @brief This method is used to test partition existence. // // @return BoolResponse HasPartition(ctx context.Context, in *milvuspb.HasPartitionRequest, opts ...grpc.CallOption) (*milvuspb.BoolResponse, error) - // * + //* // @brief This method is used to show partition information // // @param ShowPartitionRequest, target collection name. @@ -854,7 +854,7 @@ type RootCoordClient interface { // @return StringListResponse ShowPartitions(ctx context.Context, in *milvuspb.ShowPartitionsRequest, opts ...grpc.CallOption) (*milvuspb.ShowPartitionsResponse, error) ShowPartitionsInternal(ctx context.Context, in *milvuspb.ShowPartitionsRequest, opts ...grpc.CallOption) (*milvuspb.ShowPartitionsResponse, error) - // rpc DescribeSegment(milvus.DescribeSegmentRequest) returns (milvus.DescribeSegmentResponse) {} + // rpc DescribeSegment(milvus.DescribeSegmentRequest) returns (milvus.DescribeSegmentResponse) {} ShowSegments(ctx context.Context, in *milvuspb.ShowSegmentsRequest, opts ...grpc.CallOption) (*milvuspb.ShowSegmentsResponse, error) AllocTimestamp(ctx context.Context, in *AllocTimestampRequest, opts ...grpc.CallOption) (*AllocTimestampResponse, error) AllocID(ctx context.Context, in *AllocIDRequest, opts ...grpc.CallOption) (*AllocIDResponse, error) @@ -1327,28 +1327,28 @@ type RootCoordServer interface { GetComponentStates(context.Context, *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error) GetTimeTickChannel(context.Context, *internalpb.GetTimeTickChannelRequest) (*milvuspb.StringResponse, error) GetStatisticsChannel(context.Context, *internalpb.GetStatisticsChannelRequest) (*milvuspb.StringResponse, error) - // * + //* // @brief This method is used to create collection // // @param CreateCollectionRequest, use to provide collection information to be created. // // @return Status CreateCollection(context.Context, *milvuspb.CreateCollectionRequest) (*commonpb.Status, error) - // * + //* // @brief This method is used to delete collection. // // @param DropCollectionRequest, collection name is going to be deleted. // // @return Status DropCollection(context.Context, *milvuspb.DropCollectionRequest) (*commonpb.Status, error) - // * + //* // @brief This method is used to test collection existence. // // @param HasCollectionRequest, collection name is going to be tested. // // @return BoolResponse HasCollection(context.Context, *milvuspb.HasCollectionRequest) (*milvuspb.BoolResponse, error) - // * + //* // @brief This method is used to get collection schema. // // @param DescribeCollectionRequest, target collection name. @@ -1359,28 +1359,28 @@ type RootCoordServer interface { CreateAlias(context.Context, *milvuspb.CreateAliasRequest) (*commonpb.Status, error) DropAlias(context.Context, *milvuspb.DropAliasRequest) (*commonpb.Status, error) AlterAlias(context.Context, *milvuspb.AlterAliasRequest) (*commonpb.Status, error) - // * + //* // @brief This method is used to list all collections. // // @return StringListResponse, collection name list ShowCollections(context.Context, *milvuspb.ShowCollectionsRequest) (*milvuspb.ShowCollectionsResponse, error) AlterCollection(context.Context, *milvuspb.AlterCollectionRequest) (*commonpb.Status, error) - // * + //* // @brief This method is used to create partition // // @return Status CreatePartition(context.Context, *milvuspb.CreatePartitionRequest) (*commonpb.Status, error) - // * + //* // @brief This method is used to drop partition // // @return Status DropPartition(context.Context, *milvuspb.DropPartitionRequest) (*commonpb.Status, error) - // * + //* // @brief This method is used to test partition existence. // // @return BoolResponse HasPartition(context.Context, *milvuspb.HasPartitionRequest) (*milvuspb.BoolResponse, error) - // * + //* // @brief This method is used to show partition information // // @param ShowPartitionRequest, target collection name. @@ -1388,7 +1388,7 @@ type RootCoordServer interface { // @return StringListResponse ShowPartitions(context.Context, *milvuspb.ShowPartitionsRequest) (*milvuspb.ShowPartitionsResponse, error) ShowPartitionsInternal(context.Context, *milvuspb.ShowPartitionsRequest) (*milvuspb.ShowPartitionsResponse, error) - // rpc DescribeSegment(milvus.DescribeSegmentRequest) returns (milvus.DescribeSegmentResponse) {} + // rpc DescribeSegment(milvus.DescribeSegmentRequest) returns (milvus.DescribeSegmentResponse) {} ShowSegments(context.Context, *milvuspb.ShowSegmentsRequest) (*milvuspb.ShowSegmentsResponse, error) AllocTimestamp(context.Context, *AllocTimestampRequest) (*AllocTimestampResponse, error) AllocID(context.Context, *AllocIDRequest) (*AllocIDResponse, error) diff --git a/internal/querynodev2/server.go b/internal/querynodev2/server.go index f99397243aacc..a382a01c413f5 100644 --- a/internal/querynodev2/server.go +++ b/internal/querynodev2/server.go @@ -193,13 +193,13 @@ func (node *QueryNode) InitSegcore() error { cKnowhereThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereThreadPoolSize.GetAsUint32()) C.SegcoreSetKnowhereSearchThreadPoolNum(cKnowhereThreadPoolSize) - enableGrowingIndex := C.bool(paramtable.Get().QueryNodeCfg.EnableGrowingSegmentIndex.GetAsBool()) - C.SegcoreSetEnableGrowingSegmentIndex(enableGrowingIndex) + enableGrowingIndex := C.bool(paramtable.Get().QueryNodeCfg.EnableTempSegmentIndex.GetAsBool()) + C.SegcoreSetEnableTempSegmentIndex(enableGrowingIndex) - nlist := C.int64_t(paramtable.Get().QueryNodeCfg.GrowingIndexNlist.GetAsInt64()) + nlist := C.int64_t(paramtable.Get().QueryNodeCfg.InterimIndexNlist.GetAsInt64()) C.SegcoreSetNlist(nlist) - nprobe := C.int64_t(paramtable.Get().QueryNodeCfg.GrowingIndexNProbe.GetAsInt64()) + nprobe := C.int64_t(paramtable.Get().QueryNodeCfg.InterimIndexNProbe.GetAsInt64()) C.SegcoreSetNprobe(nprobe) // override segcore SIMD type diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 6a669ceff2952..11c9dd50e7c4b 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -1541,11 +1541,11 @@ type queryNodeConfig struct { StatsPublishInterval ParamItem `refreshable:"true"` // segcore - KnowhereThreadPoolSize ParamItem `refreshable:"false"` - ChunkRows ParamItem `refreshable:"false"` - EnableGrowingSegmentIndex ParamItem `refreshable:"false"` - GrowingIndexNlist ParamItem `refreshable:"false"` - GrowingIndexNProbe ParamItem `refreshable:"false"` + KnowhereThreadPoolSize ParamItem `refreshable:"false"` + ChunkRows ParamItem `refreshable:"false"` + EnableTempSegmentIndex ParamItem `refreshable:"false"` + InterimIndexNlist ParamItem `refreshable:"false"` + InterimIndexNProbe ParamItem `refreshable:"false"` // memory limit LoadMemoryUsageFactor ParamItem `refreshable:"true"` @@ -1666,42 +1666,42 @@ func (p *queryNodeConfig) init(base *BaseTable) { } p.ChunkRows.Init(base.mgr) - p.EnableGrowingSegmentIndex = ParamItem{ - Key: "queryNode.segcore.growing.enableIndex", + p.EnableTempSegmentIndex = ParamItem{ + Key: "queryNode.segcore.interimIndex.enableIndex", Version: "2.0.0", DefaultValue: "false", - Doc: "Enable segment growing with index to accelerate vector search.", + Doc: "Enable segment build with index to accelerate vector search when segment is in growing or binlog.", Export: true, } - p.EnableGrowingSegmentIndex.Init(base.mgr) + p.EnableTempSegmentIndex.Init(base.mgr) - p.GrowingIndexNlist = ParamItem{ - Key: "queryNode.segcore.growing.nlist", + p.InterimIndexNlist = ParamItem{ + Key: "queryNode.segcore.interimIndex.nlist", Version: "2.0.0", DefaultValue: "128", - Doc: "growing index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8", + Doc: "temp index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8", Export: true, } - p.GrowingIndexNlist.Init(base.mgr) + p.InterimIndexNlist.Init(base.mgr) - p.GrowingIndexNProbe = ParamItem{ - Key: "queryNode.segcore.growing.nprobe", + p.InterimIndexNProbe = ParamItem{ + Key: "queryNode.segcore.interimIndex.nprobe", Version: "2.0.0", Formatter: func(v string) string { - defaultNprobe := p.GrowingIndexNlist.GetAsInt64() / 8 + defaultNprobe := p.InterimIndexNlist.GetAsInt64() / 8 nprobe := getAsInt64(v) if nprobe == 0 { nprobe = defaultNprobe } - if nprobe > p.GrowingIndexNlist.GetAsInt64() { - return p.GrowingIndexNlist.GetValue() + if nprobe > p.InterimIndexNlist.GetAsInt64() { + return p.InterimIndexNlist.GetValue() } return strconv.FormatInt(nprobe, 10) }, Doc: "nprobe to search small index, based on your accuracy requirement, must smaller than nlist", Export: true, } - p.GrowingIndexNProbe.Init(base.mgr) + p.InterimIndexNProbe.Init(base.mgr) p.LoadMemoryUsageFactor = ParamItem{ Key: "queryNode.loadMemoryUsageFactor", diff --git a/pkg/util/paramtable/component_param_test.go b/pkg/util/paramtable/component_param_test.go index 583edf97edee4..6c7b572552332 100644 --- a/pkg/util/paramtable/component_param_test.go +++ b/pkg/util/paramtable/component_param_test.go @@ -297,10 +297,10 @@ func TestComponentParam(t *testing.T) { chunkRows := Params.ChunkRows.GetAsInt64() assert.Equal(t, int64(1024), chunkRows) - nlist := Params.GrowingIndexNlist.GetAsInt64() + nlist := Params.InterimIndexNlist.GetAsInt64() assert.Equal(t, int64(128), nlist) - nprobe := Params.GrowingIndexNProbe.GetAsInt64() + nprobe := Params.InterimIndexNProbe.GetAsInt64() assert.Equal(t, int64(16), nprobe) assert.Equal(t, true, Params.GroupEnabled.GetAsBool()) @@ -319,17 +319,17 @@ func TestComponentParam(t *testing.T) { chunkRows = Params.ChunkRows.GetAsInt64() assert.Equal(t, int64(8192), chunkRows) - enableGrowingIndex := Params.EnableGrowingSegmentIndex.GetAsBool() - assert.Equal(t, true, enableGrowingIndex) + enableInterimIndex := Params.EnableTempSegmentIndex.GetAsBool() + assert.Equal(t, true, enableInterimIndex) - params.Save("queryNode.segcore.growing.enableIndex", "true") - enableGrowingIndex = Params.EnableGrowingSegmentIndex.GetAsBool() - assert.Equal(t, true, enableGrowingIndex) + params.Save("queryNode.segcore.interimIndex.enableIndex", "true") + enableInterimIndex = Params.EnableTempSegmentIndex.GetAsBool() + assert.Equal(t, true, enableInterimIndex) - nlist = Params.GrowingIndexNlist.GetAsInt64() + nlist = Params.InterimIndexNlist.GetAsInt64() assert.Equal(t, int64(128), nlist) - nprobe = Params.GrowingIndexNProbe.GetAsInt64() + nprobe = Params.InterimIndexNProbe.GetAsInt64() assert.Equal(t, int64(16), nprobe) params.Remove("queryNode.segcore.growing.nlist") diff --git a/tests/python_client/testcases/test_search.py b/tests/python_client/testcases/test_search.py index c157ff5f8bf2a..5ca5d5fd84ebb 100644 --- a/tests/python_client/testcases/test_search.py +++ b/tests/python_client/testcases/test_search.py @@ -2991,7 +2991,7 @@ def test_search_with_expression(self, dim, expression, _async, enable_dynamic_fi filter_ids.append(_id) # 2. create index - index_param = {"index_type": "IVF_FLAT", "metric_type": "COSINE", "params": {"nlist": 100}} + index_param = {"index_type": "FLAT", "metric_type": "COSINE", "params": {}} collection_w.create_index("float_vector", index_param) collection_w.load() @@ -7381,7 +7381,7 @@ def test_range_search_with_expression(self, dim, expression, _async, enable_dyna filter_ids.append(_id) # 2. create index - index_param = {"index_type": "IVF_FLAT", + index_param = {"index_type": "FLAT", "metric_type": "L2", "params": {"nlist": 100}} collection_w.create_index("float_vector", index_param) collection_w.load()