From 9f86cd66814447381e0de21931fe45603378496c Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Tue, 10 May 2022 09:45:50 +0800 Subject: [PATCH] Add interface to access Dataset (#172) Signed-off-by: yudong.cai --- knowhere/common/Dataset.h | 18 +- knowhere/index/vector_index/ConfAdapter.cpp | 7 +- knowhere/index/vector_index/IndexAnnoy.cpp | 5 +- .../index/vector_index/IndexBinaryIDMAP.cpp | 12 +- .../index/vector_index/IndexBinaryIVF.cpp | 12 +- knowhere/index/vector_index/IndexHNSW.cpp | 5 +- knowhere/index/vector_index/IndexIDMAP.cpp | 12 +- knowhere/index/vector_index/IndexIVF.cpp | 12 +- knowhere/index/vector_index/IndexNGT.cpp | 5 +- knowhere/index/vector_index/IndexRHNSW.cpp | 5 +- knowhere/index/vector_index/IndexSPTAG.cpp | 2 +- .../vector_index/adapter/SptagAdapter.cpp | 7 +- .../vector_index/adapter/VectorAdapter.cpp | 23 +- .../vector_index/adapter/VectorAdapter.h | 50 +++- .../vector_index/helpers/IndexParameter.h | 88 +++---- .../index/vector_offset_index/IndexIVF_NM.cpp | 12 +- .../index/vector_offset_index/IndexNSG_NM.cpp | 7 +- python/knowhere/knowhere.i | 24 +- .../benchmark/benchmark_knowhere_test.cpp | 8 +- unittest/test_annoy.cpp | 248 +++--------------- unittest/test_binaryidmap.cpp | 6 +- unittest/test_binaryivf.cpp | 4 +- unittest/test_hnsw.cpp | 164 +----------- unittest/test_idmap.cpp | 6 +- unittest/test_ivf.cpp | 8 +- unittest/test_ivf_nm.cpp | 4 +- unittest/test_nsg.cpp | 40 +-- unittest/test_rhnsw_flat.cpp | 143 +++++----- unittest/test_rhnsw_pq.cpp | 21 -- unittest/test_rhnsw_sq8.cpp | 21 -- unittest/test_sptag.cpp | 4 +- unittest/utils.cpp | 27 +- unittest/utils.h | 3 - 33 files changed, 306 insertions(+), 707 deletions(-) diff --git a/knowhere/common/Dataset.h b/knowhere/common/Dataset.h index 684a52c74..0f853a320 100644 --- a/knowhere/common/Dataset.h +++ b/knowhere/common/Dataset.h @@ -31,19 +31,19 @@ class Dataset { ~Dataset() { for (auto const& d : data_) { if (d.first == meta::IDS) { - auto ids = Get(meta::IDS); + auto ids = Get(meta::IDS); // the space of ids must be allocated through malloc - free(ids); + free((void*)ids); } if (d.first == meta::DISTANCE) { - auto distances = Get(meta::DISTANCE); + auto distances = Get(meta::DISTANCE); // the space of distance must be allocated through malloc - free(distances); + free((void*)distances); } if (d.first == meta::LIMS) { - auto lims = Get(meta::LIMS); + auto lims = Get(meta::LIMS); // the space of lims must be allocated through malloc - free(lims); + free((void*)lims); } } } @@ -58,11 +58,7 @@ class Dataset { T Get(const std::string_view& k) { std::lock_guard lk(mutex_); - try { - return std::any_cast(*(data_.at(std::string(k)))); - } catch (...) { - throw std::logic_error("Can't find this key"); - } + return std::any_cast(*(data_.at(std::string(k)))); } const std::map& diff --git a/knowhere/index/vector_index/ConfAdapter.cpp b/knowhere/index/vector_index/ConfAdapter.cpp index e0123cdf7..121436754 100644 --- a/knowhere/index/vector_index/ConfAdapter.cpp +++ b/knowhere/index/vector_index/ConfAdapter.cpp @@ -50,8 +50,11 @@ static const std::vector default_binary_metric_array{metric::HAMMING template inline bool CheckValueInRange(const Config& cfg, const std::string_view& key, T min, T max) { - T value = GetValueFromConfig(cfg, std::string(key)); - return (value >= min && value <= max); + if (cfg.contains(std::string(key))) { + T value = GetValueFromConfig(cfg, key); + return (value >= min && value <= max); + } + return false; } inline bool diff --git a/knowhere/index/vector_index/IndexAnnoy.cpp b/knowhere/index/vector_index/IndexAnnoy.cpp index eba2b1e46..5795820d6 100644 --- a/knowhere/index/vector_index/IndexAnnoy.cpp +++ b/knowhere/index/vector_index/IndexAnnoy.cpp @@ -139,10 +139,7 @@ IndexAnnoy::Query(const DatasetPtr& dataset_ptr, const Config& config, const fai } } - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } int64_t diff --git a/knowhere/index/vector_index/IndexBinaryIDMAP.cpp b/knowhere/index/vector_index/IndexBinaryIDMAP.cpp index 5ed913728..bd28d05b6 100644 --- a/knowhere/index/vector_index/IndexBinaryIDMAP.cpp +++ b/knowhere/index/vector_index/IndexBinaryIDMAP.cpp @@ -64,10 +64,7 @@ BinaryIDMAP::Query(const DatasetPtr& dataset_ptr, const Config& config, const fa QueryImpl(rows, reinterpret_cast(p_data), k, p_dist, p_id, config, bitset); - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); @@ -106,12 +103,7 @@ BinaryIDMAP::QueryByRange(const DatasetPtr& dataset, try { QueryByRangeImpl(rows, reinterpret_cast(p_data), radius, p_dist, p_id, p_lims, config, bitset); - - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - ret_ds->Set(meta::LIMS, p_lims); - return ret_ds; + return GenResultDataset(p_id, p_dist, p_lims); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); diff --git a/knowhere/index/vector_index/IndexBinaryIVF.cpp b/knowhere/index/vector_index/IndexBinaryIVF.cpp index c38f29e97..9f6fbf0cd 100644 --- a/knowhere/index/vector_index/IndexBinaryIVF.cpp +++ b/knowhere/index/vector_index/IndexBinaryIVF.cpp @@ -77,10 +77,7 @@ BinaryIVF::Query(const DatasetPtr& dataset_ptr, const Config& config, const fais QueryImpl(rows, reinterpret_cast(p_data), k, p_dist, p_id, config, bitset); - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); @@ -119,12 +116,7 @@ BinaryIVF::QueryByRange(const DatasetPtr& dataset, try { QueryByRangeImpl(rows, reinterpret_cast(p_data), radius, p_dist, p_id, p_lims, config, bitset); - - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - ret_ds->Set(meta::LIMS, p_lims); - return ret_ds; + return GenResultDataset(p_id, p_dist, p_lims); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); diff --git a/knowhere/index/vector_index/IndexHNSW.cpp b/knowhere/index/vector_index/IndexHNSW.cpp index 7c802aa73..a10f5b3e5 100644 --- a/knowhere/index/vector_index/IndexHNSW.cpp +++ b/knowhere/index/vector_index/IndexHNSW.cpp @@ -215,10 +215,7 @@ IndexHNSW::Query(const DatasetPtr& dataset_ptr, const Config& config, const fais // LOG_KNOWHERE_DEBUG_ << "IndexHNSW::Query finished, show statistics:"; // LOG_KNOWHERE_DEBUG_ << GetStatistics()->ToString(); - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } int64_t diff --git a/knowhere/index/vector_index/IndexIDMAP.cpp b/knowhere/index/vector_index/IndexIDMAP.cpp index 4c47d3f1d..f3cd7933b 100644 --- a/knowhere/index/vector_index/IndexIDMAP.cpp +++ b/knowhere/index/vector_index/IndexIDMAP.cpp @@ -97,10 +97,7 @@ IDMAP::Query(const DatasetPtr& dataset_ptr, const Config& config, const faiss::B QueryImpl(rows, reinterpret_cast(p_data), k, p_dist, p_id, config, bitset); - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); @@ -139,12 +136,7 @@ IDMAP::QueryByRange(const DatasetPtr& dataset, try { QueryByRangeImpl(rows, reinterpret_cast(p_data), radius, p_dist, p_id, p_lims, config, bitset); - - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - ret_ds->Set(meta::LIMS, p_lims); - return ret_ds; + return GenResultDataset(p_id, p_dist, p_lims); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); diff --git a/knowhere/index/vector_index/IndexIVF.cpp b/knowhere/index/vector_index/IndexIVF.cpp index 2990a1a2f..1a7ced47b 100644 --- a/knowhere/index/vector_index/IndexIVF.cpp +++ b/knowhere/index/vector_index/IndexIVF.cpp @@ -120,10 +120,7 @@ IVF::Query(const DatasetPtr& dataset_ptr, const Config& config, const faiss::Bit QueryImpl(rows, reinterpret_cast(p_data), k, p_dist, p_id, config, bitset); - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); @@ -162,12 +159,7 @@ IVF::QueryByRange(const DatasetPtr& dataset, try { QueryByRangeImpl(rows, reinterpret_cast(p_data), radius, p_dist, p_id, p_lims, config, bitset); - - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - ret_ds->Set(meta::LIMS, p_lims); - return ret_ds; + return GenResultDataset(p_id, p_dist, p_lims); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); diff --git a/knowhere/index/vector_index/IndexNGT.cpp b/knowhere/index/vector_index/IndexNGT.cpp index c86affb2d..5b915aa7c 100644 --- a/knowhere/index/vector_index/IndexNGT.cpp +++ b/knowhere/index/vector_index/IndexNGT.cpp @@ -186,10 +186,7 @@ IndexNGT::Query(const DatasetPtr& dataset_ptr, const Config& config, const faiss index_->deleteObject(object); } - auto res_ds = std::make_shared(); - res_ds->Set(meta::IDS, p_id); - res_ds->Set(meta::DISTANCE, p_dist); - return res_ds; + return GenResultDataset(p_id, p_dist); } int64_t diff --git a/knowhere/index/vector_index/IndexRHNSW.cpp b/knowhere/index/vector_index/IndexRHNSW.cpp index ef13f2bcc..0cbd49c04 100644 --- a/knowhere/index/vector_index/IndexRHNSW.cpp +++ b/knowhere/index/vector_index/IndexRHNSW.cpp @@ -139,10 +139,7 @@ IndexRHNSW::Query(const DatasetPtr& dataset_ptr, const Config& config, const fai // LOG_KNOWHERE_DEBUG_ << "IndexRHNSW::Load finished, show statistics:"; // LOG_KNOWHERE_DEBUG_ << GetStatistics()->ToString(); - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } int64_t diff --git a/knowhere/index/vector_index/IndexSPTAG.cpp b/knowhere/index/vector_index/IndexSPTAG.cpp index 5a8ff9c43..5372c49fb 100644 --- a/knowhere/index/vector_index/IndexSPTAG.cpp +++ b/knowhere/index/vector_index/IndexSPTAG.cpp @@ -180,7 +180,7 @@ DatasetPtr CPUSPTAGRNG::Query(const DatasetPtr& dataset_ptr, const Config& config, const faiss::BitsetView bitset) { SetParameters(config); - float* p_data = (float*)dataset_ptr->Get(meta::TENSOR); + float* p_data = (float*)GetDatasetTensor(dataset_ptr); for (auto i = 0; i < 10; ++i) { for (auto j = 0; j < 10; ++j) { std::cout << p_data[i * 10 + j] << " "; diff --git a/knowhere/index/vector_index/adapter/SptagAdapter.cpp b/knowhere/index/vector_index/adapter/SptagAdapter.cpp index 10134cc07..d2d091f85 100644 --- a/knowhere/index/vector_index/adapter/SptagAdapter.cpp +++ b/knowhere/index/vector_index/adapter/SptagAdapter.cpp @@ -16,7 +16,7 @@ namespace knowhere { std::shared_ptr ConvertToMetadataSet(const DatasetPtr& dataset_ptr) { - auto elems = dataset_ptr->Get(meta::ROWS); + auto elems = GetDatasetRows(dataset_ptr); auto p_id = new int64_t[elems]; for (int64_t i = 0; i < elems; ++i) p_id[i] = i; @@ -81,10 +81,7 @@ ConvertToDataset(std::vector query_results, std::shared_ptr< } } - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } } // namespace knowhere diff --git a/knowhere/index/vector_index/adapter/VectorAdapter.cpp b/knowhere/index/vector_index/adapter/VectorAdapter.cpp index 1bcfc34f7..c8860fe09 100644 --- a/knowhere/index/vector_index/adapter/VectorAdapter.cpp +++ b/knowhere/index/vector_index/adapter/VectorAdapter.cpp @@ -20,9 +20,26 @@ namespace knowhere { DatasetPtr GenDataset(const int64_t nb, const int64_t dim, const void* xb) { auto ret_ds = std::make_shared(); - ret_ds->Set(meta::ROWS, nb); - ret_ds->Set(meta::DIM, dim); - ret_ds->Set(meta::TENSOR, xb); + SetDatasetRows(ret_ds, nb); + SetDatasetDim(ret_ds, dim); + SetDatasetTensor(ret_ds, xb); + return ret_ds; +} + +DatasetPtr +GenResultDataset(const int64_t* ids, const float* distance) { + auto ret_ds = std::make_shared(); + SetDatasetIDs(ret_ds, ids); + SetDatasetDistance(ret_ds, distance); + return ret_ds; +} + +DatasetPtr +GenResultDataset(const int64_t* ids, const float* distance, const size_t* lims) { + auto ret_ds = std::make_shared(); + SetDatasetIDs(ret_ds, ids); + SetDatasetDistance(ret_ds, distance); + SetDatasetLims(ret_ds, lims); return ret_ds; } diff --git a/knowhere/index/vector_index/adapter/VectorAdapter.h b/knowhere/index/vector_index/adapter/VectorAdapter.h index 50464cf6a..3509b5dc1 100644 --- a/knowhere/index/vector_index/adapter/VectorAdapter.h +++ b/knowhere/index/vector_index/adapter/VectorAdapter.h @@ -18,15 +18,53 @@ namespace knowhere { -#define GET_TENSOR_DATA(dataset_ptr) \ - int64_t rows = dataset_ptr->Get(meta::ROWS); \ - const void* p_data = dataset_ptr->Get(meta::TENSOR); +#define DEFINE_DATASET_GETTER(func_name, key, T) \ +inline T func_name(const DatasetPtr& ds_ptr) { \ + return ds_ptr->Get(key); \ +} -#define GET_TENSOR_DATA_DIM(dataset_ptr) \ - GET_TENSOR_DATA(dataset_ptr) \ - int64_t dim = dataset_ptr->Get(meta::DIM); +#define DEFINE_DATASET_SETTER(func_name, key, T) \ +inline void func_name(DatasetPtr& ds_ptr, T value) { \ + ds_ptr->Set(key, value); \ +} + +/////////////////////////////////////////////////////////////////////////////// + +DEFINE_DATASET_GETTER(GetDatasetDim, meta::DIM, int64_t); +DEFINE_DATASET_SETTER(SetDatasetDim, meta::DIM, int64_t); + +DEFINE_DATASET_GETTER(GetDatasetTensor, meta::TENSOR, const void*); +DEFINE_DATASET_SETTER(SetDatasetTensor, meta::TENSOR, const void*); + +DEFINE_DATASET_GETTER(GetDatasetRows, meta::ROWS, int64_t); +DEFINE_DATASET_SETTER(SetDatasetRows, meta::ROWS, int64_t); + +DEFINE_DATASET_GETTER(GetDatasetIDs, meta::IDS, const int64_t*); +DEFINE_DATASET_SETTER(SetDatasetIDs, meta::IDS, const int64_t*); + +DEFINE_DATASET_GETTER(GetDatasetDistance, meta::DISTANCE, const float*); +DEFINE_DATASET_SETTER(SetDatasetDistance, meta::DISTANCE, const float*); + +DEFINE_DATASET_GETTER(GetDatasetLims, meta::LIMS, const size_t*); +DEFINE_DATASET_SETTER(SetDatasetLims, meta::LIMS, const size_t*); + +/////////////////////////////////////////////////////////////////////////////// + +#define GET_TENSOR_DATA(ds_ptr) \ + auto rows = GetDatasetRows(ds_ptr); \ + auto p_data = GetDatasetTensor(ds_ptr); + +#define GET_TENSOR_DATA_DIM(ds_ptr) \ + GET_TENSOR_DATA(ds_ptr) \ + auto dim = GetDatasetDim(ds_ptr); extern DatasetPtr GenDataset(const int64_t nb, const int64_t dim, const void* xb); +extern DatasetPtr +GenResultDataset(const int64_t* ids, const float* distance); + +extern DatasetPtr +GenResultDataset(const int64_t* ids, const float* distance, const size_t* lims); + } // namespace knowhere diff --git a/knowhere/index/vector_index/helpers/IndexParameter.h b/knowhere/index/vector_index/helpers/IndexParameter.h index 6d4f24e94..8f8062214 100644 --- a/knowhere/index/vector_index/helpers/IndexParameter.h +++ b/knowhere/index/vector_index/helpers/IndexParameter.h @@ -84,85 +84,85 @@ constexpr MetricType SUPERSTRUCTURE = "SUPERSTRUCTURE"; } // namespace metric /////////////////////////////////////////////////////////////////////////////// -template +template T -GetValueFromConfig(const Config& cfg, const std::string& key) { - return cfg.at(key).get(); +GetValueFromConfig(const Config& cfg, const std::string_view& key) { + return cfg.at(std::string(key)).get(); } -template +template void -SetValueToConfig(Config& cfg, const std::string& key, const T value) { - cfg[key] = value; +SetValueToConfig(Config& cfg, const std::string_view& key, const T value) { + cfg[std::string(key)] = value; } -#define DEFINE_GETTER(func_name, key, T) \ -inline T func_name(const Config& cfg) { \ - return GetValueFromConfig(cfg, std::string(key)); \ +#define DEFINE_CONFIG_GETTER(func_name, key, T) \ +inline T func_name(const Config& cfg) { \ + return GetValueFromConfig(cfg, key); \ } -#define DEFINE_SETTER(func_name, key, T1, T2) \ -inline void func_name(Config& cfg, T1 value) { \ - SetValueToConfig(cfg, std::string(key), T2(value)); \ +#define DEFINE_CONFIG_SETTER(func_name, key, T1, T2) \ +inline void func_name(Config& cfg, T1 value) { \ + SetValueToConfig(cfg, key, (T2)(value)); \ } /////////////////////////////////////////////////////////////////////////////// // APIs to access meta -DEFINE_GETTER(GetMetaMetricType, meta::METRIC_TYPE, std::string) -DEFINE_SETTER(SetMetaMetricType, meta::METRIC_TYPE, MetricType , std::string) +DEFINE_CONFIG_GETTER(GetMetaMetricType, meta::METRIC_TYPE, std::string) +DEFINE_CONFIG_SETTER(SetMetaMetricType, meta::METRIC_TYPE, MetricType, std::string) -DEFINE_GETTER(GetMetaRows, meta::ROWS, int64_t) -DEFINE_SETTER(SetMetaRows, meta::ROWS, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetMetaRows, meta::ROWS, int64_t) +DEFINE_CONFIG_SETTER(SetMetaRows, meta::ROWS, int64_t, int64_t) -DEFINE_GETTER(GetMetaDim, meta::DIM, int64_t) -DEFINE_SETTER(SetMetaDim, meta::DIM, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetMetaDim, meta::DIM, int64_t) +DEFINE_CONFIG_SETTER(SetMetaDim, meta::DIM, int64_t, int64_t) -DEFINE_GETTER(GetMetaTopk, meta::TOPK, int64_t) -DEFINE_SETTER(SetMetaTopk, meta::TOPK, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetMetaTopk, meta::TOPK, int64_t) +DEFINE_CONFIG_SETTER(SetMetaTopk, meta::TOPK, int64_t, int64_t) -DEFINE_GETTER(GetMetaRadius, meta::RADIUS, float) -DEFINE_SETTER(SetMetaRadius, meta::RADIUS, float, float) +DEFINE_CONFIG_GETTER(GetMetaRadius, meta::RADIUS, float) +DEFINE_CONFIG_SETTER(SetMetaRadius, meta::RADIUS, float, float) -DEFINE_GETTER(GetMetaDeviceID, meta::DEVICE_ID, int64_t) -DEFINE_SETTER(SetMetaDeviceID, meta::DEVICE_ID, int64_t , int64_t) +DEFINE_CONFIG_GETTER(GetMetaDeviceID, meta::DEVICE_ID, int64_t) +DEFINE_CONFIG_SETTER(SetMetaDeviceID, meta::DEVICE_ID, int64_t , int64_t) /////////////////////////////////////////////////////////////////////////////// // APIs to access indexparam -DEFINE_GETTER(GetIndexParamNprobe, indexparam::NPROBE, int64_t) -DEFINE_SETTER(SetIndexParamNprobe, indexparam::NPROBE, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamNprobe, indexparam::NPROBE, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamNprobe, indexparam::NPROBE, int64_t, int64_t) -DEFINE_GETTER(GetIndexParamNlist, indexparam::NLIST, int64_t) -DEFINE_SETTER(SetIndexParamNlist, indexparam::NLIST, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamNlist, indexparam::NLIST, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamNlist, indexparam::NLIST, int64_t, int64_t) -DEFINE_GETTER(GetIndexParamNbits, indexparam::NBITS, int64_t) -DEFINE_SETTER(SetIndexParamNbits, indexparam::NBITS, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamNbits, indexparam::NBITS, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamNbits, indexparam::NBITS, int64_t, int64_t) // PQ param for IVFPQ -DEFINE_GETTER(GetIndexParamM, indexparam::M, int64_t) -DEFINE_SETTER(SetIndexParamM, indexparam::M, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamM, indexparam::M, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamM, indexparam::M, int64_t, int64_t) // PQ param for RHNSWPQ -DEFINE_GETTER(GetIndexParamPQM, indexparam::PQ_M, int64_t) -DEFINE_SETTER(SetIndexParamPQM, indexparam::PQ_M, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamPQM, indexparam::PQ_M, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamPQM, indexparam::PQ_M, int64_t, int64_t) // HNSW Params -DEFINE_GETTER(GetIndexParamEfConstruction, indexparam::EFCONSTRUCTION, int64_t) -DEFINE_SETTER(SetIndexParamEfConstruction, indexparam::EFCONSTRUCTION, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamEfConstruction, indexparam::EFCONSTRUCTION, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamEfConstruction, indexparam::EFCONSTRUCTION, int64_t, int64_t) -DEFINE_GETTER(GetIndexParamHNSWM, indexparam::HNSW_M, int64_t) -DEFINE_SETTER(SetIndexParamHNSWM, indexparam::HNSW_M, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamHNSWM, indexparam::HNSW_M, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamHNSWM, indexparam::HNSW_M, int64_t, int64_t) -DEFINE_GETTER(GetIndexParamEf, indexparam::EF, int64_t) -DEFINE_SETTER(SetIndexParamEf, indexparam::EF, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamEf, indexparam::EF, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamEf, indexparam::EF, int64_t, int64_t) // Annoy Params -DEFINE_GETTER(GetIndexParamNtrees, indexparam::N_TREES, int64_t) -DEFINE_SETTER(SetIndexParamNtrees, indexparam::N_TREES, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamNtrees, indexparam::N_TREES, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamNtrees, indexparam::N_TREES, int64_t, int64_t) -DEFINE_GETTER(GetIndexParamSearchK, indexparam::SEARCH_K, int64_t) -DEFINE_SETTER(SetIndexParamSearchK, indexparam::SEARCH_K, int64_t, int64_t) +DEFINE_CONFIG_GETTER(GetIndexParamSearchK, indexparam::SEARCH_K, int64_t) +DEFINE_CONFIG_SETTER(SetIndexParamSearchK, indexparam::SEARCH_K, int64_t, int64_t) /////////////////////////////////////////////////////////////////////////////// // other diff --git a/knowhere/index/vector_offset_index/IndexIVF_NM.cpp b/knowhere/index/vector_offset_index/IndexIVF_NM.cpp index f334c1701..820810757 100644 --- a/knowhere/index/vector_offset_index/IndexIVF_NM.cpp +++ b/knowhere/index/vector_offset_index/IndexIVF_NM.cpp @@ -163,10 +163,7 @@ IVF_NM::Query(const DatasetPtr& dataset_ptr, const Config& config, const faiss:: QueryImpl(rows, reinterpret_cast(p_data), k, p_dist, p_id, config, bitset); - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); @@ -202,12 +199,7 @@ IVF_NM::QueryByRange(const DatasetPtr& dataset_ptr, const Config& config, const try { QueryByRangeImpl(rows, reinterpret_cast(p_data), radius, p_dist, p_id, p_lims, config, bitset); - - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - ret_ds->Set(meta::LIMS, p_lims); - return ret_ds; + return GenResultDataset(p_id, p_dist, p_lims); } catch (faiss::FaissException& e) { release_when_exception(); KNOWHERE_THROW_MSG(e.what()); diff --git a/knowhere/index/vector_offset_index/IndexNSG_NM.cpp b/knowhere/index/vector_offset_index/IndexNSG_NM.cpp index ab339e1ea..16ad9a0c3 100644 --- a/knowhere/index/vector_offset_index/IndexNSG_NM.cpp +++ b/knowhere/index/vector_offset_index/IndexNSG_NM.cpp @@ -90,12 +90,7 @@ NSG_NM::Query(const DatasetPtr& dataset_ptr, const Config& config, const faiss:: s_params.k = GetMetaTopk(config); index_->Search(reinterpret_cast(p_data), reinterpret_cast(data_.get()), rows, dim, topK, p_dist, p_id, s_params, bitset); - MapOffsetToUid(p_id, static_cast(elems)); - - auto ret_ds = std::make_shared(); - ret_ds->Set(meta::IDS, p_id); - ret_ds->Set(meta::DISTANCE, p_dist); - return ret_ds; + return GenResultDataset(p_id, p_dist); } catch (std::exception& e) { KNOWHERE_THROW_MSG(e.what()); } diff --git a/python/knowhere/knowhere.i b/python/knowhere/knowhere.i index 9ab0cf798..6d7d22647 100644 --- a/python/knowhere/knowhere.i +++ b/python/knowhere/knowhere.i @@ -82,19 +82,19 @@ DOWNCAST ( IndexAnnoy ) knowhere::DatasetPtr ArrayToDataSet( float* xb,int nb, int dim){ - return knowhere::GenDataset(nb, dim, xb); + return knowhere::GenDataset(nb, dim, xb); }; void DumpResultDataSet(knowhere::DatasetPtr result, float *dis, int nq_1, int k_1, - int *ids,int nq_2, int k_2){ - auto ids_ = result->Get(knowhere::meta::IDS); - auto dist_ = result->Get(knowhere::meta::DISTANCE); - assert(nq_1==nq_2); - assert(k_1==k_2); - for (int i = 0; i < nq_1; i++) { - for (int j = 0; j < k_1; ++j) { - *(ids+i*k_1+j) = *((int64_t*)(ids_) + i * k_1 + j); - *(dis+i*k_1+j) = *((float*)(dist_) + i * k_1 + j); + int *ids,int nq_2, int k_2){ + auto ids_ = knowhere::GetDatasetIDs(result); + auto dist_ = knowhere::GetDatasetDistance(result); + assert(nq_1==nq_2); + assert(k_1==k_2); + for (int i = 0; i < nq_1; i++) { + for (int j = 0; j < k_1; ++j) { + *(ids+i*k_1+j) = *((int64_t*)(ids_) + i * k_1 + j); + *(dis+i*k_1+j) = *((float*)(dist_) + i * k_1 + j); } } } @@ -104,11 +104,11 @@ knowhere::Config CreateConfig(std::string str){ } faiss::BitsetView EmptyBitSetView(){ -return faiss::BitsetView(nullptr); + return faiss::BitsetView(nullptr); }; faiss::BitsetView ArrayToBitsetView(uint8_t *block, int size){ -return faiss::BitsetView(block, size); + return faiss::BitsetView(block, size); } %} diff --git a/unittest/benchmark/benchmark_knowhere_test.cpp b/unittest/benchmark/benchmark_knowhere_test.cpp index 7486548e0..7fe16a0bf 100644 --- a/unittest/benchmark/benchmark_knowhere_test.cpp +++ b/unittest/benchmark/benchmark_knowhere_test.cpp @@ -374,7 +374,7 @@ class Benchmark_knowhere : public ::testing::Test { auto result = index_->Query(ds_ptr, conf, nullptr); t_end = elapsed(); - auto ids = result->Get(knowhere::meta::IDS); + auto ids = knowhere::GetDatasetIDs(result); int32_t hit = CalcRecall(ids, nq, k); printf(" nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nq, k, (t_end - t_start), (hit / float(nq * std::min(gt_k_, k)))); @@ -406,7 +406,7 @@ class Benchmark_knowhere : public ::testing::Test { auto result = index_->Query(ds_ptr, conf, nullptr); t_end = elapsed(); - auto ids = result->Get(knowhere::meta::IDS); + auto ids = knowhere::GetDatasetIDs(result); int32_t hit = CalcRecall(ids, nq, k); printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nprobe, nq, k, (t_end - t_start), (hit / float(nq * std::min(gt_k_, k)))); @@ -440,7 +440,7 @@ class Benchmark_knowhere : public ::testing::Test { auto result = index_->Query(ds_ptr, conf, nullptr); t_end = elapsed(); - auto ids = result->Get(knowhere::meta::IDS); + auto ids = knowhere::GetDatasetIDs(result); int32_t hit = CalcRecall(ids, nq, k); printf(" ef = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", ef, nq, k, (t_end - t_start), (hit / float(nq * std::min(gt_k_, k)))); @@ -473,7 +473,7 @@ class Benchmark_knowhere : public ::testing::Test { auto result = index_->Query(ds_ptr, conf, nullptr); t_end = elapsed(); - auto ids = result->Get(knowhere::meta::IDS); + auto ids = knowhere::GetDatasetIDs(result); int32_t hit = CalcRecall(ids, nq, k); printf(" search_k = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", sk, nq, k, (t_end - t_start), (hit / float(nq * std::min(gt_k_, k)))); diff --git a/unittest/test_annoy.cpp b/unittest/test_annoy.cpp index d1e25f43d..50ec154ae 100644 --- a/unittest/test_annoy.cpp +++ b/unittest/test_annoy.cpp @@ -67,29 +67,6 @@ TEST_P(AnnoyTest, annoy_basic) { auto result = index_->Query(query_dataset, conf, nullptr); AssertAnns(result, nq, k); - - /* - * output result to check by eyes - { - auto ids = result->Get(knowhere::meta::IDS); - auto dist = result->Get(knowhere::meta::DISTANCE); - - std::stringstream ss_id; - std::stringstream ss_dist; - for (auto i = 0; i < nq; i++) { - for (auto j = 0; j < k; ++j) { - // ss_id << *ids->data()->GetValues(1, i * k + j) << " "; - // ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; - ss_id << *((int64_t*)(ids) + i * k + j) << " "; - ss_dist << *((float*)(dist) + i * k + j) << " "; - } - ss_id << std::endl; - ss_dist << std::endl; - } - std::cout << "id\n" << ss_id.str() << std::endl; - std::cout << "dist\n" << ss_dist.str() << std::endl; - } - */ } TEST_P(AnnoyTest, annoy_delete) { @@ -104,49 +81,6 @@ TEST_P(AnnoyTest, annoy_delete) { auto result2 = index_->Query(query_dataset, conf, *bitset); AssertAnns(result2, nq, k, CheckMode::CHECK_NOT_EQUAL); - - /* - * delete result checked by eyes - auto ids1 = result1->Get(knowhere::meta::IDS); - auto ids2 = result2->Get(knowhere::meta::IDS); - std::cout << std::endl; - for (int i = 0; i < nq; ++ i) { - std::cout << "ids1: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids1 + i * k + j) << " "; - } - std::cout << " ids2: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids2 + i * k + j) << " "; - } - std::cout << std::endl; - for (int j = 0; j < std::min(5, k>>1); ++ j) { - ASSERT_EQ(*(ids1 + i * k + j + 1), *(ids2 + i * k + j)); - } - } - */ - /* - * output result to check by eyes - { - auto ids = result->Get(knowhere::meta::IDS); - auto dist = result->Get(knowhere::meta::DISTANCE); - - std::stringstream ss_id; - std::stringstream ss_dist; - for (auto i = 0; i < nq; i++) { - for (auto j = 0; j < k; ++j) { - // ss_id << *ids->data()->GetValues(1, i * k + j) << " "; - // ss_dist << *dists->data()->GetValues(1, i * k + j) << " "; - ss_id << *((int64_t*)(ids) + i * k + j) << " "; - ss_dist << *((float*)(dist) + i * k + j) << " "; - } - ss_id << std::endl; - ss_dist << std::endl; - } - std::cout << "id\n" << ss_id.str() << std::endl; - std::cout << "dist\n" << ss_dist.str() << std::endl; - } - */ } TEST_P(AnnoyTest, annoy_serialize) { @@ -161,161 +95,49 @@ TEST_P(AnnoyTest, annoy_serialize) { reader(ret, bin->size); }; - { - // serialize index - index_->BuildAll(base_dataset, conf); - auto binaryset = index_->Serialize(knowhere::Config()); + // serialize index + index_->BuildAll(base_dataset, conf); + auto binaryset = index_->Serialize(knowhere::Config()); - auto bin_data = binaryset.GetByName("annoy_index_data"); - std::string filename1 = temp_path("/tmp/annoy_test_data_serialize.bin"); - auto load_data1 = new uint8_t[bin_data->size]; - serialize(filename1, bin_data, load_data1); + auto bin_data = binaryset.GetByName("annoy_index_data"); + std::string filename1 = temp_path("/tmp/annoy_test_data_serialize.bin"); + auto load_data1 = new uint8_t[bin_data->size]; + serialize(filename1, bin_data, load_data1); - auto bin_metric_type = binaryset.GetByName("annoy_metric_type"); - std::string filename2 = temp_path("/tmp/annoy_test_metric_type_serialize.bin"); - auto load_data2 = new uint8_t[bin_metric_type->size]; - serialize(filename2, bin_metric_type, load_data2); + auto bin_metric_type = binaryset.GetByName("annoy_metric_type"); + std::string filename2 = temp_path("/tmp/annoy_test_metric_type_serialize.bin"); + auto load_data2 = new uint8_t[bin_metric_type->size]; + serialize(filename2, bin_metric_type, load_data2); - auto bin_dim = binaryset.GetByName("annoy_dim"); - std::string filename3 = temp_path("/tmp/annoy_test_dim_serialize.bin"); - auto load_data3 = new uint8_t[bin_dim->size]; - serialize(filename3, bin_dim, load_data3); + auto bin_dim = binaryset.GetByName("annoy_dim"); + std::string filename3 = temp_path("/tmp/annoy_test_dim_serialize.bin"); + auto load_data3 = new uint8_t[bin_dim->size]; + serialize(filename3, bin_dim, load_data3); - binaryset.clear(); - std::shared_ptr index_data(load_data1); - binaryset.Append("annoy_index_data", index_data, bin_data->size); + binaryset.clear(); + std::shared_ptr index_data(load_data1); + binaryset.Append("annoy_index_data", index_data, bin_data->size); - std::shared_ptr metric_data(load_data2); - binaryset.Append("annoy_metric_type", metric_data, bin_metric_type->size); + std::shared_ptr metric_data(load_data2); + binaryset.Append("annoy_metric_type", metric_data, bin_metric_type->size); - std::shared_ptr dim_data(load_data3); - binaryset.Append("annoy_dim", dim_data, bin_dim->size); + std::shared_ptr dim_data(load_data3); + binaryset.Append("annoy_dim", dim_data, bin_dim->size); - index_->Load(binaryset); - ASSERT_EQ(index_->Count(), nb); - ASSERT_EQ(index_->Dim(), dim); - auto result = index_->Query(query_dataset, conf, nullptr); - AssertAnns(result, nq, knowhere::GetMetaTopk(conf)); - } + index_->Load(binaryset); + ASSERT_EQ(index_->Count(), nb); + ASSERT_EQ(index_->Dim(), dim); + auto result = index_->Query(query_dataset, conf, nullptr); + AssertAnns(result, nq, knowhere::GetMetaTopk(conf)); } TEST_P(AnnoyTest, annoy_slice) { - { - // serialize index - index_->BuildAll(base_dataset, conf); - auto binaryset = index_->Serialize(knowhere::Config()); - index_->Load(binaryset); - ASSERT_EQ(index_->Count(), nb); - ASSERT_EQ(index_->Dim(), dim); - auto result = index_->Query(query_dataset, conf, nullptr); - AssertAnns(result, nq, knowhere::GetMetaTopk(conf)); - } -} - -/* - * faiss style test - * keep it -int -main() { - int64_t d = 64; // dimension - int64_t nb = 10000; // database size - int64_t nq = 10; // 10000; // nb of queries - faiss::ConcurrentBitsetPtr bitset = std::make_shared(nb); - - int64_t* ids = new int64_t[nb]; - float* xb = new float[d * nb]; - float* xq = new float[d * nq]; - - for (int i = 0; i < nb; i++) { - for (int j = 0; j < d; j++) xb[d * i + j] = (float)drand48(); - xb[d * i] += i / 1000.; - ids[i] = i; - } - printf("gen xb and ids done! \n"); - - // srand((unsigned)time(nullptr)); - auto random_seed = (unsigned)time(nullptr); - printf("delete ids: \n"); - for (int i = 0; i < nq; i++) { - auto tmp = rand_r(&random_seed) % nb; - printf("%d\n", tmp); - // std::cout << "before delete, test result: " << bitset->test(tmp) << std::endl; - bitset->set(tmp); - // std::cout << "after delete, test result: " << bitset->test(tmp) << std::endl; - for (int j = 0; j < d; j++) xq[d * i + j] = xb[d * tmp + j]; - // xq[d * i] += i / 1000.; - } - printf("\n"); - - int k = 4; - int n_trees = 5; - int search_k = 100; - knowhere::IndexAnnoy index; - knowhere::DatasetPtr base_dataset = generate_dataset(nb, d, (const void*)xb, ids); - - knowhere::Config base_conf{ - {knowhere::meta::METRIC_TYPE, knowhere::metric::L2}, - {knowhere::meta::DIM, d}, - {knowhere::meta::TOPK, k}, - {knowhere::indexparam::N_TREES, n_trees}, - }; - knowhere::DatasetPtr query_dataset = generate_query_dataset(nq, d, (const void*)xq); - knowhere::Config query_conf{ - {knowhere::meta::DIM, d}, - {knowhere::meta::TOPK, k}, - {knowhere::indexparam::SEARCH_K, search_k}, - }; - - index.BuildAll(base_dataset, base_conf); - - printf("------------sanity check----------------\n"); - { // sanity check - auto res = index.Query(query_dataset, query_conf); - printf("Query done!\n"); - const int64_t* I = res->Get(knowhere::meta::IDS); - float* D = res->Get(knowhere::meta::DISTANCE); - - printf("I=\n"); - for (int i = 0; i < 5; i++) { - for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]); - printf("\n"); - } - - printf("D=\n"); - for (int i = 0; i < 5; i++) { - for (int j = 0; j < k; j++) printf("%7g ", D[i * k + j]); - printf("\n"); - } - } - - printf("---------------search xq-------------\n"); - { // search xq - auto res = index.Query(query_dataset, query_conf); - const int64_t* I = res->Get(knowhere::meta::IDS); - - printf("I=\n"); - for (int i = 0; i < nq; i++) { - for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]); - printf("\n"); - } - } - - printf("----------------search xq with delete------------\n"); - { // search xq with delete - auto res = index.Query(query_dataset, query_conf, bitset); - auto I = res->Get(knowhere::meta::IDS); - - printf("I=\n"); - for (int i = 0; i < nq; i++) { - for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]); - printf("\n"); - } - } - - delete[] xb; - delete[] xq; - delete[] ids; - - return 0; + // serialize index + index_->BuildAll(base_dataset, conf); + auto binaryset = index_->Serialize(knowhere::Config()); + index_->Load(binaryset); + ASSERT_EQ(index_->Count(), nb); + ASSERT_EQ(index_->Dim(), dim); + auto result = index_->Query(query_dataset, conf, nullptr); + AssertAnns(result, nq, knowhere::GetMetaTopk(conf)); } -*/ diff --git a/unittest/test_binaryidmap.cpp b/unittest/test_binaryidmap.cpp index 549445695..ea283998e 100644 --- a/unittest/test_binaryidmap.cpp +++ b/unittest/test_binaryidmap.cpp @@ -78,9 +78,9 @@ class BinaryIDMAPTest : public DataGen, const std::vector& golden_distances, const std::vector& golden_lims) { - auto lims = result->Get(knowhere::meta::LIMS); - auto ids = result->Get(knowhere::meta::IDS); - auto distances = result->Get(knowhere::meta::DISTANCE); + auto lims = knowhere::GetDatasetLims(result); + auto ids = knowhere::GetDatasetIDs(result); + auto distances = knowhere::GetDatasetDistance(result); for (int64_t i = 0; i < nq; i++) { ASSERT_EQ(golden_lims[i+1], lims[i+1]); diff --git a/unittest/test_binaryivf.cpp b/unittest/test_binaryivf.cpp index d6e9cc4e7..3d11f4e05 100644 --- a/unittest/test_binaryivf.cpp +++ b/unittest/test_binaryivf.cpp @@ -53,8 +53,8 @@ class BinaryIVFTest : public DataGen, const knowhere::DatasetPtr& result, const float radius) { - auto lims = result->Get(knowhere::meta::LIMS); - auto distances = result->Get(knowhere::meta::DISTANCE); + auto lims = knowhere::GetDatasetLims(result); + auto distances = knowhere::GetDatasetDistance(result); for (int64_t i = 0; i < lims[nq]; i++) { ASSERT_TRUE(C::cmp(distances[i], radius)); diff --git a/unittest/test_hnsw.cpp b/unittest/test_hnsw.cpp index 95b9a96e6..27130cc73 100644 --- a/unittest/test_hnsw.cpp +++ b/unittest/test_hnsw.cpp @@ -10,12 +10,14 @@ // or implied. See the License for the specific language governing permissions and limitations under the License. #include -#include "knowhere/common/Config.h" -#include "knowhere/index/vector_index/IndexHNSW.h" -#include "knowhere/index/vector_index/helpers/IndexParameter.h" #include #include + +#include "knowhere/common/Config.h" #include "knowhere/common/Exception.h" +#include "knowhere/index/vector_index/IndexHNSW.h" +#include "knowhere/index/vector_index/adapter/VectorAdapter.h" +#include "knowhere/index/vector_index/helpers/IndexParameter.h" #include "unittest/utils.h" using ::testing::Combine; @@ -67,9 +69,9 @@ TEST_P(HNSWTest, HNSW_basic) { // Serialize and Load before Query knowhere::BinarySet bs = index_->Serialize(conf); - int64_t dim = base_dataset->Get(knowhere::meta::DIM); - int64_t rows = base_dataset->Get(knowhere::meta::ROWS); - auto raw_data = base_dataset->Get(knowhere::meta::TENSOR); + int64_t dim = knowhere::GetDatasetDim(base_dataset); + int64_t rows = knowhere::GetDatasetRows(base_dataset); + auto raw_data = knowhere::GetDatasetTensor(base_dataset); knowhere::BinaryPtr bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); @@ -82,11 +84,11 @@ TEST_P(HNSWTest, HNSW_basic) { // case: k > nb const int64_t new_rows = 6; - base_dataset->Set(knowhere::meta::ROWS, new_rows); + knowhere::SetDatasetRows(base_dataset, new_rows); index_->Train(base_dataset, conf); index_->AddWithoutIds(base_dataset, conf); auto result2 = index_->Query(query_dataset, conf, nullptr); - auto res_ids = result2->Get(knowhere::meta::IDS); + auto res_ids = knowhere::GetDatasetIDs(result2); for (int64_t i = 0; i < nq; i++) { for (int64_t j = new_rows; j < k; j++) { ASSERT_EQ(res_ids[i * k + j], -1); @@ -105,9 +107,9 @@ TEST_P(HNSWTest, HNSW_delete) { // Serialize and Load before Query knowhere::BinarySet bs = index_->Serialize(conf); - int64_t dim = base_dataset->Get(knowhere::meta::DIM); - int64_t rows = base_dataset->Get(knowhere::meta::ROWS); - auto raw_data = base_dataset->Get(knowhere::meta::TENSOR); + int64_t dim = knowhere::GetDatasetDim(base_dataset); + int64_t rows = knowhere::GetDatasetRows(base_dataset); + auto raw_data = knowhere::GetDatasetTensor(base_dataset); knowhere::BinaryPtr bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); @@ -120,27 +122,6 @@ TEST_P(HNSWTest, HNSW_delete) { auto result2 = index_->Query(query_dataset, conf, *bitset); AssertAnns(result2, nq, k, CheckMode::CHECK_NOT_EQUAL); - - /* - * delete result checked by eyes - auto ids1 = result1->Get(knowhere::meta::IDS); - auto ids2 = result2->Get(knowhere::meta::IDS); - std::cout << std::endl; - for (int i = 0; i < nq; ++ i) { - std::cout << "ids1: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids1 + i * k + j) << " "; - } - std::cout << "ids2: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids2 + i * k + j) << " "; - } - std::cout << std::endl; - for (int j = 0; j < std::min(5, k>>1); ++ j) { - ASSERT_EQ(*(ids1 + i * k + j + 1), *(ids2 + i * k + j)); - } - } - */ } /* @@ -176,122 +157,3 @@ TEST_P(HNSWTest, HNSW_serialize) { AssertAnns(result, nq, conf[knowhere::meta::TOPK]); } }*/ - -/* - * faiss style test - * keep it -int -main() { - int64_t d = 64; // dimension - int64_t nb = 10000; // database size - int64_t nq = 10; // 10000; // nb of queries - faiss::ConcurrentBitsetPtr bitset = std::make_shared(nb); - - int64_t* ids = new int64_t[nb]; - float* xb = new float[d * nb]; - float* xq = new float[d * nq]; - // int64_t *ids = (int64_t*)malloc(nb * sizeof(int64_t)); - // float* xb = (float*)malloc(d * nb * sizeof(float)); - // float* xq = (float*)malloc(d * nq * sizeof(float)); - - for (int i = 0; i < nb; i++) { - for (int j = 0; j < d; j++) xb[d * i + j] = drand48(); - xb[d * i] += i / 1000.; - ids[i] = i; - } -// printf("gen xb and ids done! \n"); - - // srand((unsigned)time(nullptr)); - auto random_seed = (unsigned)time(nullptr); -// printf("delete ids: \n"); - for (int i = 0; i < nq; i++) { - auto tmp = rand_r(&random_seed) % nb; -// printf("%ld\n", tmp); - // std::cout << "before delete, test result: " << bitset->test(tmp) << std::endl; - bitset->set(tmp); - // std::cout << "after delete, test result: " << bitset->test(tmp) << std::endl; - for (int j = 0; j < d; j++) xq[d * i + j] = xb[d * tmp + j]; - // xq[d * i] += i / 1000.; - } -// printf("\n"); - - int k = 4; - int m = 16; - int ef = 200; - knowhere::IndexHNSW_NM index; - knowhere::DatasetPtr base_dataset = generate_dataset(nb, d, (const void*)xb, ids); -// base_dataset->Set(knowhere::meta::ROWS, nb); -// base_dataset->Set(knowhere::meta::DIM, d); -// base_dataset->Set(knowhere::meta::TENSOR, (const void*)xb); -// base_dataset->Set(knowhere::meta::IDS, (const int64_t*)ids); - - knowhere::Config base_conf{ - {knowhere::meta::DIM, d}, - {knowhere::meta::TOPK, k}, - {knowhere::indexparam::M, m}, - {knowhere::indexparam::EFCONSTRUCTION, ef}, - {knowhere::Metric::TYPE, knowhere::Metric::L2}, - }; - knowhere::DatasetPtr query_dataset = generate_query_dataset(nq, d, (const void*)xq); - knowhere::Config query_conf{ - {knowhere::meta::DIM, d}, - {knowhere::meta::TOPK, k}, - {knowhere::indexparam::M, m}, - {knowhere::indexparam::EF, ef}, - {knowhere::Metric::TYPE, knowhere::Metric::L2}, - }; - - index.Train(base_dataset, base_conf); - index.Add(base_dataset, base_conf); - -// printf("------------sanity check----------------\n"); - { // sanity check - auto res = index.Query(query_dataset, query_conf); -// printf("Query done!\n"); - const int64_t* I = res->Get(knowhere::meta::IDS); -// float* D = res->Get(knowhere::meta::DISTANCE); - -// printf("I=\n"); -// for (int i = 0; i < 5; i++) { -// for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]); -// printf("\n"); -// } - -// printf("D=\n"); -// for (int i = 0; i < 5; i++) { -// for (int j = 0; j < k; j++) printf("%7g ", D[i * k + j]); -// printf("\n"); -// } - } - -// printf("---------------search xq-------------\n"); - { // search xq - auto res = index.Query(query_dataset, query_conf); - const int64_t* I = res->Get(knowhere::meta::IDS); - - printf("I=\n"); - for (int i = 0; i < nq; i++) { - for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]); - printf("\n"); - } - } - - printf("----------------search xq with delete------------\n"); - { // search xq with delete - auto res = index.Query(query_dataset, query_conf, bitset); - auto I = res->Get(knowhere::meta::IDS); - - printf("I=\n"); - for (int i = 0; i < nq; i++) { - for (int j = 0; j < k; j++) printf("%5ld ", I[i * k + j]); - printf("\n"); - } - } - - delete[] xb; - delete[] xq; - delete[] ids; - - return 0; -} -*/ diff --git a/unittest/test_idmap.cpp b/unittest/test_idmap.cpp index 440ffb0e9..e44ba886d 100644 --- a/unittest/test_idmap.cpp +++ b/unittest/test_idmap.cpp @@ -117,9 +117,9 @@ class IDMAPTest : public DataGen, public TestWithParam { const std::vector& golden_distances, const std::vector& golden_lims) { - auto lims = result->Get(knowhere::meta::LIMS); - auto ids = result->Get(knowhere::meta::IDS); - auto distances = result->Get(knowhere::meta::DISTANCE); + auto lims = knowhere::GetDatasetLims(result); + auto ids = knowhere::GetDatasetIDs(result); + auto distances = knowhere::GetDatasetDistance(result); for (int64_t i = 0; i < nq; i++) { if (golden_lims[i+1] != lims[i+1]) { diff --git a/unittest/test_ivf.cpp b/unittest/test_ivf.cpp index fb1c858d2..d1162fbe3 100644 --- a/unittest/test_ivf.cpp +++ b/unittest/test_ivf.cpp @@ -65,8 +65,8 @@ class IVFTest : public DataGen, const knowhere::DatasetPtr& result, const float radius) { - auto lims = result->Get(knowhere::meta::LIMS); - auto distances = result->Get(knowhere::meta::DISTANCE); + auto lims = knowhere::GetDatasetLims(result); + auto distances = knowhere::GetDatasetDistance(result); for (auto i = 0; i < lims[nq]; ++i) { ASSERT_TRUE(C::cmp(distances[i], radius)); @@ -218,8 +218,8 @@ TEST_P(IVFTest, clone_test) { // PrintResult(result, nq, k); auto AssertEqual = [&](knowhere::DatasetPtr p1, knowhere::DatasetPtr p2) { - auto ids_p1 = p1->Get(knowhere::meta::IDS); - auto ids_p2 = p2->Get(knowhere::meta::IDS); + auto ids_p1 = knowhere::GetDatasetIDs(p1); + auto ids_p2 = knowhere::GetDatasetIDs(p2); for (int i = 0; i < nq * k; ++i) { EXPECT_EQ(*((int64_t*)(ids_p2) + i), *((int64_t*)(ids_p1) + i)); diff --git a/unittest/test_ivf_nm.cpp b/unittest/test_ivf_nm.cpp index 5e1e2adc9..d8ab6cdbe 100644 --- a/unittest/test_ivf_nm.cpp +++ b/unittest/test_ivf_nm.cpp @@ -62,8 +62,8 @@ class IVFNMTest : public DataGen, const knowhere::DatasetPtr& result, const float radius) { - auto lims = result->Get(knowhere::meta::LIMS); - auto distances = result->Get(knowhere::meta::DISTANCE); + auto lims = knowhere::GetDatasetLims(result); + auto distances = knowhere::GetDatasetDistance(result); for (auto i = 0; i < lims[nq]; ++i) { ASSERT_TRUE(C::cmp(distances[i], radius)); diff --git a/unittest/test_nsg.cpp b/unittest/test_nsg.cpp index 7feecb46a..5e56a6dbd 100644 --- a/unittest/test_nsg.cpp +++ b/unittest/test_nsg.cpp @@ -90,9 +90,9 @@ TEST_F(NSGInterfaceTest, basic_test) { // Serialize and Load before Query knowhere::BinarySet bs = index_->Serialize(search_conf); - int64_t dim = base_dataset->Get(knowhere::meta::DIM); - int64_t rows = base_dataset->Get(knowhere::meta::ROWS); - auto raw_data = base_dataset->Get(knowhere::meta::TENSOR); + int64_t dim = knowhere::GetDatasetDim(base_dataset); + int64_t rows = knowhere::GetDatasetRows(base_dataset); + auto raw_data = knowhere::GetDatasetTensor(base_dataset); knowhere::BinaryPtr bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); @@ -111,9 +111,9 @@ TEST_F(NSGInterfaceTest, basic_test) { // Serialize and Load before Query bs = new_index->Serialize(search_conf); - dim = base_dataset->Get(knowhere::meta::DIM); - rows = base_dataset->Get(knowhere::meta::ROWS); - raw_data = base_dataset->Get(knowhere::meta::TENSOR); + dim = knowhere::GetDatasetDim(base_dataset); + rows = knowhere::GetDatasetRows(base_dataset); + raw_data = knowhere::GetDatasetTensor(base_dataset); bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); @@ -152,9 +152,9 @@ TEST_F(NSGInterfaceTest, delete_test) { // Serialize and Load before Query knowhere::BinarySet bs = index_->Serialize(search_conf); - int64_t dim = base_dataset->Get(knowhere::meta::DIM); - int64_t rows = base_dataset->Get(knowhere::meta::ROWS); - auto raw_data = base_dataset->Get(knowhere::meta::TENSOR); + int64_t dim = knowhere::GetDatasetDim(base_dataset); + int64_t rows = knowhere::GetDatasetRows(base_dataset); + auto raw_data = knowhere::GetDatasetTensor(base_dataset); knowhere::BinaryPtr bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); @@ -164,7 +164,7 @@ TEST_F(NSGInterfaceTest, delete_test) { auto result = index_->Query(query_dataset, search_conf, nullptr); AssertAnns(result, nq, k); - auto I_before = result->Get(knowhere::meta::IDS); + auto I_before = GetDatasetIDs(result); ASSERT_EQ(index_->Count(), nb); ASSERT_EQ(index_->Dim(), dim); @@ -172,9 +172,9 @@ TEST_F(NSGInterfaceTest, delete_test) { // Serialize and Load before Query bs = index_->Serialize(search_conf); - dim = base_dataset->Get(knowhere::meta::DIM); - rows = base_dataset->Get(knowhere::meta::ROWS); - raw_data = base_dataset->Get(knowhere::meta::TENSOR); + dim = knowhere::GetDatasetDim(base_dataset); + rows = knowhere::GetDatasetRows(base_dataset); + raw_data = knowhere::GetDatasetTensor(base_dataset); bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); @@ -186,7 +186,7 @@ TEST_F(NSGInterfaceTest, delete_test) { auto result_after = index_->Query(query_dataset, search_conf, *bitset); AssertAnns(result_after, nq, k, CheckMode::CHECK_NOT_EQUAL); - auto I_after = result_after->Get(knowhere::meta::IDS); + auto I_after = GetDatasetIDs(result_after); // First vector deleted for (int i = 0; i < nq; i++) { @@ -209,9 +209,9 @@ TEST_F(NSGInterfaceTest, slice_test) { // Serialize and Load before Query knowhere::BinarySet bs = index_->Serialize(search_conf); - int64_t dim = base_dataset->Get(knowhere::meta::DIM); - int64_t rows = base_dataset->Get(knowhere::meta::ROWS); - auto raw_data = base_dataset->Get(knowhere::meta::TENSOR); + int64_t dim = knowhere::GetDatasetDim(base_dataset); + int64_t rows = knowhere::GetDatasetRows(base_dataset); + auto raw_data = knowhere::GetDatasetTensor(base_dataset); knowhere::BinaryPtr bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); @@ -230,9 +230,9 @@ TEST_F(NSGInterfaceTest, slice_test) { // Serialize and Load before Query bs = new_index_1->Serialize(search_conf); - dim = base_dataset->Get(knowhere::meta::DIM); - rows = base_dataset->Get(knowhere::meta::ROWS); - raw_data = base_dataset->Get(knowhere::meta::TENSOR); + dim = knowhere::GetDatasetDim(base_dataset); + rows = knowhere::GetDatasetRows(base_dataset); + raw_data = knowhere::GetDatasetTensor(base_dataset); bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); diff --git a/unittest/test_rhnsw_flat.cpp b/unittest/test_rhnsw_flat.cpp index 45ef61662..092dce278 100644 --- a/unittest/test_rhnsw_flat.cpp +++ b/unittest/test_rhnsw_flat.cpp @@ -62,9 +62,9 @@ TEST_P(RHNSWFlatTest, HNSW_basic) { // Serialize and Load before Query knowhere::BinarySet bs = index_->Serialize(conf); - int64_t dim = base_dataset->Get(knowhere::meta::DIM); - int64_t rows = base_dataset->Get(knowhere::meta::ROWS); - auto raw_data = base_dataset->Get(knowhere::meta::TENSOR); + int64_t dim = knowhere::GetDatasetDim(base_dataset); + int64_t rows = knowhere::GetDatasetRows(base_dataset); + auto raw_data = knowhere::GetDatasetTensor(base_dataset); knowhere::BinaryPtr bptr = std::make_shared(); bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); bptr->size = dim * rows * sizeof(float); @@ -90,27 +90,6 @@ TEST_P(RHNSWFlatTest, HNSW_delete) { auto result2 = index_->Query(query_dataset, conf, *bitset); // AssertAnns(result2, nq, k, CheckMode::CHECK_NOT_EQUAL); - - /* - * delete result checked by eyes - auto ids1 = result1->Get(knowhere::meta::IDS); - auto ids2 = result2->Get(knowhere::meta::IDS); - std::cout << std::endl; - for (int i = 0; i < nq; ++ i) { - std::cout << "ids1: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids1 + i * k + j) << " "; - } - std::cout << "ids2: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids2 + i * k + j) << " "; - } - std::cout << std::endl; - for (int j = 0; j < std::min(5, k>>1); ++ j) { - ASSERT_EQ(*(ids1 + i * k + j + 1), *(ids2 + i * k + j)); - } - } - */ } TEST_P(RHNSWFlatTest, HNSW_serialize) { @@ -124,68 +103,64 @@ TEST_P(RHNSWFlatTest, HNSW_serialize) { reader(ret, bin->size); }; - { - index_->Train(base_dataset, conf); - index_->AddWithoutIds(base_dataset, conf); - auto binaryset = index_->Serialize(conf); - knowhere::IndexType index_type = index_->index_type(); - std::string idx_name = std::string(index_type) + "_Index"; - std::string met_name = "META"; - if (binaryset.binary_map_.find(idx_name) == binaryset.binary_map_.end()) { - std::cout << "no idx!" << std::endl; - } - if (binaryset.binary_map_.find(met_name) == binaryset.binary_map_.end()) { - std::cout << "no met!" << std::endl; - } - auto bin_idx = binaryset.GetByName(idx_name); - auto bin_met = binaryset.GetByName(met_name); - - std::string filename_idx = temp_path("/tmp/RHNSWFlat_test_serialize_idx.bin"); - std::string filename_met = temp_path("/tmp/RHNSWFlat_test_serialize_met.bin"); - auto load_idx = new uint8_t[bin_idx->size]; - auto load_met = new uint8_t[bin_met->size]; - serialize(filename_idx, bin_idx, load_idx); - serialize(filename_met, bin_met, load_met); - - binaryset.clear(); - auto new_idx = std::make_shared(); - std::shared_ptr met(load_met); - std::shared_ptr idx(load_idx); - binaryset.Append(std::string(new_idx->index_type()) + "_Index", idx, bin_idx->size); - binaryset.Append("META", met, bin_met->size); - - int64_t dim = base_dataset->Get(knowhere::meta::DIM); - int64_t rows = base_dataset->Get(knowhere::meta::ROWS); - auto raw_data = base_dataset->Get(knowhere::meta::TENSOR); - knowhere::BinaryPtr bptr = std::make_shared(); - bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); - bptr->size = dim * rows * sizeof(float); - binaryset.Append(RAW_DATA, bptr); - new_idx->Load(binaryset); - EXPECT_EQ(new_idx->Count(), nb); - EXPECT_EQ(new_idx->Dim(), dim); - auto result = new_idx->Query(query_dataset, conf, nullptr); - // AssertAnns(result, nq, conf[knowhere::meta::TOPK]); + index_->Train(base_dataset, conf); + index_->AddWithoutIds(base_dataset, conf); + auto binaryset = index_->Serialize(conf); + knowhere::IndexType index_type = index_->index_type(); + std::string idx_name = std::string(index_type) + "_Index"; + std::string met_name = "META"; + if (binaryset.binary_map_.find(idx_name) == binaryset.binary_map_.end()) { + std::cout << "no idx!" << std::endl; + } + if (binaryset.binary_map_.find(met_name) == binaryset.binary_map_.end()) { + std::cout << "no met!" << std::endl; } + auto bin_idx = binaryset.GetByName(idx_name); + auto bin_met = binaryset.GetByName(met_name); + + std::string filename_idx = temp_path("/tmp/RHNSWFlat_test_serialize_idx.bin"); + std::string filename_met = temp_path("/tmp/RHNSWFlat_test_serialize_met.bin"); + auto load_idx = new uint8_t[bin_idx->size]; + auto load_met = new uint8_t[bin_met->size]; + serialize(filename_idx, bin_idx, load_idx); + serialize(filename_met, bin_met, load_met); + + binaryset.clear(); + auto new_idx = std::make_shared(); + std::shared_ptr met(load_met); + std::shared_ptr idx(load_idx); + binaryset.Append(std::string(new_idx->index_type()) + "_Index", idx, bin_idx->size); + binaryset.Append("META", met, bin_met->size); + + int64_t dim = knowhere::GetDatasetDim(base_dataset); + int64_t rows = knowhere::GetDatasetRows(base_dataset); + auto raw_data = knowhere::GetDatasetTensor(base_dataset); + knowhere::BinaryPtr bptr = std::make_shared(); + bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); + bptr->size = dim * rows * sizeof(float); + binaryset.Append(RAW_DATA, bptr); + new_idx->Load(binaryset); + EXPECT_EQ(new_idx->Count(), nb); + EXPECT_EQ(new_idx->Dim(), dim); + auto result = new_idx->Query(query_dataset, conf, nullptr); + // AssertAnns(result, nq, conf[knowhere::meta::TOPK]); } TEST_P(RHNSWFlatTest, HNSW_slice) { - { - index_->Train(base_dataset, conf); - index_->AddWithoutIds(base_dataset, conf); - auto binaryset = index_->Serialize(conf); - auto new_idx = std::make_shared(); - int64_t dim = base_dataset->Get(knowhere::meta::DIM); - int64_t rows = base_dataset->Get(knowhere::meta::ROWS); - auto raw_data = base_dataset->Get(knowhere::meta::TENSOR); - knowhere::BinaryPtr bptr = std::make_shared(); - bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); - bptr->size = dim * rows * sizeof(float); - binaryset.Append(RAW_DATA, bptr); - new_idx->Load(binaryset); - EXPECT_EQ(new_idx->Count(), nb); - EXPECT_EQ(new_idx->Dim(), dim); - auto result = new_idx->Query(query_dataset, conf, nullptr); - // AssertAnns(result, nq, conf[knowhere::meta::TOPK]); - } + index_->Train(base_dataset, conf); + index_->AddWithoutIds(base_dataset, conf); + auto binaryset = index_->Serialize(conf); + auto new_idx = std::make_shared(); + int64_t dim = knowhere::GetDatasetDim(base_dataset); + int64_t rows = knowhere::GetDatasetRows(base_dataset); + auto raw_data = knowhere::GetDatasetTensor(base_dataset); + knowhere::BinaryPtr bptr = std::make_shared(); + bptr->data = std::shared_ptr((uint8_t*)raw_data, [&](uint8_t*) {}); + bptr->size = dim * rows * sizeof(float); + binaryset.Append(RAW_DATA, bptr); + new_idx->Load(binaryset); + EXPECT_EQ(new_idx->Count(), nb); + EXPECT_EQ(new_idx->Dim(), dim); + auto result = new_idx->Query(query_dataset, conf, nullptr); + // AssertAnns(result, nq, conf[knowhere::meta::TOPK]); } diff --git a/unittest/test_rhnsw_pq.cpp b/unittest/test_rhnsw_pq.cpp index 7020a5a19..f6dbdae49 100644 --- a/unittest/test_rhnsw_pq.cpp +++ b/unittest/test_rhnsw_pq.cpp @@ -83,27 +83,6 @@ TEST_P(RHNSWPQTest, HNSW_delete) { auto result2 = index_->Query(query_dataset, conf, *bitset); // AssertAnns(result2, nq, k, CheckMode::CHECK_NOT_EQUAL); - - /* - * delete result checked by eyes - auto ids1 = result1->Get(knowhere::meta::IDS); - auto ids2 = result2->Get(knowhere::meta::IDS); - std::cout << std::endl; - for (int i = 0; i < nq; ++ i) { - std::cout << "ids1: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids1 + i * k + j) << " "; - } - std::cout << "ids2: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids2 + i * k + j) << " "; - } - std::cout << std::endl; - for (int j = 0; j < std::min(5, k>>1); ++ j) { - ASSERT_EQ(*(ids1 + i * k + j + 1), *(ids2 + i * k + j)); - } - } - */ } TEST_P(RHNSWPQTest, HNSW_serialize) { diff --git a/unittest/test_rhnsw_sq8.cpp b/unittest/test_rhnsw_sq8.cpp index 86061c5b1..e0b707999 100644 --- a/unittest/test_rhnsw_sq8.cpp +++ b/unittest/test_rhnsw_sq8.cpp @@ -83,27 +83,6 @@ TEST_P(RHNSWSQ8Test, HNSW_delete) { auto result2 = index_->Query(query_dataset, conf, *bitset); // AssertAnns(result2, nq, k, CheckMode::CHECK_NOT_EQUAL); - - /* - * delete result checked by eyes - auto ids1 = result1->Get(knowhere::meta::IDS); - auto ids2 = result2->Get(knowhere::meta::IDS); - std::cout << std::endl; - for (int i = 0; i < nq; ++ i) { - std::cout << "ids1: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids1 + i * k + j) << " "; - } - std::cout << "ids2: "; - for (int j = 0; j < k; ++ j) { - std::cout << *(ids2 + i * k + j) << " "; - } - std::cout << std::endl; - for (int j = 0; j < std::min(5, k>>1); ++ j) { - ASSERT_EQ(*(ids1 + i * k + j + 1), *(ids2 + i * k + j)); - } - } - */ } TEST_P(RHNSWSQ8Test, HNSW_serialize) { diff --git a/unittest/test_sptag.cpp b/unittest/test_sptag.cpp index f0e9cb99a..b1451d60f 100644 --- a/unittest/test_sptag.cpp +++ b/unittest/test_sptag.cpp @@ -71,8 +71,8 @@ TEST_P(SPTAGTest, sptag_basic) { AssertAnns(result, nq, k); { - auto ids = result->Get(knowhere::meta::IDS); - auto dist = result->Get(knowhere::meta::DISTANCE); + auto ids = knowhere::GetDatasetIDs(result); + auto dist = knowhere::GetDatasetDistance(result); std::stringstream ss_id; std::stringstream ss_dist; diff --git a/unittest/utils.cpp b/unittest/utils.cpp index 2e1b62b89..9c27a8a16 100644 --- a/unittest/utils.cpp +++ b/unittest/utils.cpp @@ -129,7 +129,7 @@ GenBase(const int64_t dim, void AssertAnns(const knowhere::DatasetPtr& result, const int nq, const int k, const CheckMode check_mode) { - auto ids = result->Get(knowhere::meta::IDS); + auto ids = knowhere::GetDatasetIDs(result); for (auto i = 0; i < nq; i++) { switch (check_mode) { case CheckMode::CHECK_EQUAL: @@ -149,9 +149,9 @@ AssertAnns(const knowhere::DatasetPtr& result, const int nq, const int k, const void AssertVec(const knowhere::DatasetPtr& result, const knowhere::DatasetPtr& base_dataset, const knowhere::DatasetPtr& id_dataset, const int n, const int dim, const CheckMode check_mode) { - float* base = (float*)base_dataset->Get(knowhere::meta::TENSOR); - auto ids = id_dataset->Get(knowhere::meta::IDS); - auto x = result->Get(knowhere::meta::TENSOR); + float* base = (float*)knowhere::GetDatasetTensor(base_dataset); + auto ids = knowhere::GetDatasetIDs(id_dataset); + auto x = (float*)knowhere::GetDatasetTensor(result); for (auto i = 0; i < n; i++) { auto id = ids[i]; for (auto j = 0; j < dim; j++) { @@ -181,9 +181,9 @@ AssertVec(const knowhere::DatasetPtr& result, const knowhere::DatasetPtr& base_d void AssertBinVec(const knowhere::DatasetPtr& result, const knowhere::DatasetPtr& base_dataset, const knowhere::DatasetPtr& id_dataset, const int n, const int dim, const CheckMode check_mode) { - auto base = (uint8_t*)base_dataset->Get(knowhere::meta::TENSOR); - auto ids = id_dataset->Get(knowhere::meta::IDS); - auto x = result->Get(knowhere::meta::TENSOR); + auto base = (uint8_t*)knowhere::GetDatasetTensor(base_dataset); + auto ids = knowhere::GetDatasetIDs(id_dataset; + auto x = (float*)knowhere::GetDatasetTensor(result); for (auto i = 0; i < 1; i++) { auto id = ids[i]; for (auto j = 0; j < dim; j++) { @@ -195,8 +195,8 @@ AssertBinVec(const knowhere::DatasetPtr& result, const knowhere::DatasetPtr& bas void PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) { - auto ids = result->Get(knowhere::meta::IDS); - auto dist = result->Get(knowhere::meta::DISTANCE); + auto ids = knowhere::GetDatasetIDs(result); + auto dist = knowhere::GetDatasetDistance(result); std::stringstream ss_id; std::stringstream ss_dist; @@ -214,15 +214,6 @@ PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k) { std::cout << "dist\n" << ss_dist.str() << std::endl; } -void -ReleaseQueryResult(const knowhere::DatasetPtr& result) { - float* res_dist = result->Get(knowhere::meta::DISTANCE); - free(res_dist); - - int64_t* res_ids = result->Get(knowhere::meta::IDS); - free(res_ids); -} - // not used #if 0 void diff --git a/unittest/utils.h b/unittest/utils.h index e03e3bf5a..8163ecc89 100644 --- a/unittest/utils.h +++ b/unittest/utils.h @@ -115,9 +115,6 @@ AssertBinVec(const knowhere::DatasetPtr& result, void PrintResult(const knowhere::DatasetPtr& result, const int& nq, const int& k); -void -ReleaseQueryResult(const knowhere::DatasetPtr& result); - struct FileIOWriter { std::fstream fs; std::string name;