diff --git a/unittest/benchmark/CMakeLists.txt b/unittest/benchmark/CMakeLists.txt index f8ffb2e73..4225365cc 100644 --- a/unittest/benchmark/CMakeLists.txt +++ b/unittest/benchmark/CMakeLists.txt @@ -34,31 +34,14 @@ set( UTIL_SRCS ) #============================================================================== -set( BENCHMARK_FAISS_SRCS - ${UTIL_SRCS} - benchmark_faiss_test.cpp - ) - -add_executable(benchmark_faiss ${BENCHMARK_FAISS_SRCS}) -target_link_libraries(benchmark_faiss ${depend_libs} ${unittest_libs} ${basic_libs}) -install(TARGETS benchmark_faiss DESTINATION unittest) - -#============================================================================== -set( BENCHMARK_KNOWHERE_SRCS - ${UTIL_SRCS} - benchmark_knowhere_test.cpp - ) - -add_executable(benchmark_knowhere ${BENCHMARK_KNOWHERE_SRCS}) -target_link_libraries(benchmark_knowhere ${depend_libs} ${unittest_libs} ${basic_libs}) -install(TARGETS benchmark_knowhere DESTINATION unittest) - -#============================================================================== -set( BENCHMARK_KNOWHERE_PERF_SRCS - ${UTIL_SRCS} - benchmark_knowhere_perf.cpp - ) - -add_executable(benchmark_knowhere_perf ${BENCHMARK_KNOWHERE_PERF_SRCS}) -target_link_libraries(benchmark_knowhere_perf ${depend_libs} ${unittest_libs} ${basic_libs}) -install(TARGETS benchmark_knowhere_perf DESTINATION unittest) +macro(benchmark_test target file) + set(FILE_SRCS ${UTIL_SRCS} ${file}) + add_executable(${target} ${FILE_SRCS}) + target_link_libraries(${target} ${depend_libs} ${unittest_libs} ${basic_libs}) + install(TARGETS ${target} DESTINATION unittest) +endmacro() + +benchmark_test(benchmark_faiss benchmark_faiss.cpp) +benchmark_test(benchmark_knowhere_binary benchmark_knowhere_binary.cpp) +benchmark_test(benchmark_knowhere_float benchmark_knowhere_float.cpp) +benchmark_test(benchmark_knowhere_perf benchmark_knowhere_perf.cpp) diff --git a/unittest/benchmark/benchmark_faiss_test.cpp b/unittest/benchmark/benchmark_faiss.cpp similarity index 92% rename from unittest/benchmark/benchmark_faiss_test.cpp rename to unittest/benchmark/benchmark_faiss.cpp index 4d0bd9bf9..80c78a93e 100644 --- a/unittest/benchmark/benchmark_faiss_test.cpp +++ b/unittest/benchmark/benchmark_faiss.cpp @@ -24,6 +24,9 @@ X; \ double t_diff = elapsed() - t_start; +using idx_t = int64_t; +using distance_t = float; + class Benchmark_faiss : public Benchmark_sift { public: void @@ -46,10 +49,10 @@ class Benchmark_faiss : public Benchmark_sift { index_ = faiss::index_factory(dim_, index_key_.c_str(), metric_type_); printf("[%.3f s] Training on %d vectors\n", get_time_diff(), nb_); - index_->train(nb_, xb_); + index_->train(nb_, (const float*)xb_); printf("[%.3f s] Indexing on %d vectors\n", get_time_diff(), nb_); - index_->add(nb_, xb_); + index_->add(nb_, (const float*)xb_); printf("[%.3f s] Writing index file: %s\n", get_time_diff(), index_file_name.c_str()); write_index(index_file_name); @@ -65,7 +68,7 @@ class Benchmark_faiss : public Benchmark_sift { printf("================================================================================\n"); for (auto nq : NQs_) { for (auto k : TOPKs_) { - CALC_TIME_SPAN(index_->search(nq, xq_, k, D, I)); + CALC_TIME_SPAN(index_->search(nq, (const float*)xq_, k, D, I)); float recall = CalcRecall(I, nq, k); printf(" nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nq, k, t_diff, recall); } @@ -90,7 +93,7 @@ class Benchmark_faiss : public Benchmark_sift { params.set_index_parameters(index_, nprobe_str.c_str()); for (auto nq : NQs_) { for (auto k : TOPKs_) { - CALC_TIME_SPAN(index_->search(nq, xq_, k, D, I)); + CALC_TIME_SPAN(index_->search(nq, (const float*)xq_, k, D, I)); float recall = CalcRecall(I, nq, k); printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nprobe, nq, k, t_diff, recall); @@ -115,7 +118,7 @@ class Benchmark_faiss : public Benchmark_sift { for (auto ef : EFs_) { for (auto nq : NQs_) { for (auto k : TOPKs_) { - CALC_TIME_SPAN(index_->search(nq_, xq_, k, D, I)); + CALC_TIME_SPAN(index_->search(nq_, (const float*)xq_, k, D, I)); float recall = CalcRecall(I, nq, k); printf(" ef = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", ef, nq, k, t_diff, recall); } @@ -131,8 +134,12 @@ class Benchmark_faiss : public Benchmark_sift { protected: void SetUp() override { + T0_ = elapsed(); set_ann_test_name("sift-128-euclidean"); - Benchmark_sift::SetUp(); + parse_ann_test_name(); + load_hdf5_data(); + + assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR); metric_type_ = (metric_str_ == METRIC_IP_STR) ? faiss::METRIC_INNER_PRODUCT : faiss::METRIC_L2; knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AUTO); } diff --git a/unittest/benchmark/benchmark_knowhere_binary.cpp b/unittest/benchmark/benchmark_knowhere_binary.cpp new file mode 100644 index 000000000..12321dfd8 --- /dev/null +++ b/unittest/benchmark/benchmark_knowhere_binary.cpp @@ -0,0 +1,258 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License. + +#include + +#include + +#include "knowhere/index/IndexType.h" +#include "knowhere/index/VecIndexFactory.h" +#include "knowhere/index/vector_index/adapter/VectorAdapter.h" +#include "unittest/benchmark/benchmark_sift.h" +#include "unittest/utils.h" + +#define CALC_TIME_SPAN(X) \ + double t_start = elapsed(); \ + X; \ + double t_diff = elapsed() - t_start; + +class Benchmark_knowhere_binary : public Benchmark_sift { + public: + void + write_index(const std::string& filename, const knowhere::Config& conf) { + binary_set_.clear(); + + FileIOWriter writer(filename); + binary_set_ = index_->Serialize(conf); + + const auto& m = binary_set_.binary_map_; + for (auto it = m.begin(); it != m.end(); ++it) { + const std::string& name = it->first; + size_t name_size = name.length(); + const knowhere::BinaryPtr data = it->second; + size_t data_size = data->size; + + writer(&name_size, sizeof(size_t)); + writer(&data->size, sizeof(data->size)); + writer((void*)name.c_str(), name_size); + writer(data->data.get(), data->size); + } + } + + void + read_index(const std::string& filename) { + binary_set_.clear(); + + FileIOReader reader(filename); + int64_t file_size = reader.size(); + if (file_size < 0) { + throw knowhere::KnowhereException(filename + " not exist"); + } + + int64_t offset = 0; + while (offset < file_size) { + size_t name_size, data_size; + reader(&name_size, sizeof(size_t)); + offset += sizeof(size_t); + reader(&data_size, sizeof(size_t)); + offset += sizeof(size_t); + + std::string name; + name.resize(name_size); + reader(name.data(), name_size); + offset += name_size; + auto data = new uint8_t[data_size]; + reader(data, data_size); + offset += data_size; + + std::shared_ptr data_ptr(data); + binary_set_.Append(name, data_ptr, data_size); + } + } + + std::string + get_index_name(const std::vector& params) { + std::string params_str = ""; + for (size_t i = 0; i < params.size(); i++) { + params_str += "_" + std::to_string(params[i]); + } + return ann_test_name_ + "_" + std::string(index_type_) + params_str + ".index"; + } + + void + create_cpu_index(const std::string& index_file_name, const knowhere::Config& conf) { + printf("[%.3f s] Creating CPU index \"%s\"\n", get_time_diff(), std::string(index_type_).c_str()); + auto& factory = knowhere::VecIndexFactory::GetInstance(); + index_ = factory.CreateVecIndex(index_type_); + + try { + printf("[%.3f s] Reading index file: %s\n", get_time_diff(), index_file_name.c_str()); + read_index(index_file_name); + } catch (...) { + printf("[%.3f s] Building all on %d vectors\n", get_time_diff(), nb_); + knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(nb_, dim_, xb_); + index_->BuildAll(ds_ptr, conf); + + printf("[%.3f s] Writing index file: %s\n", get_time_diff(), index_file_name.c_str()); + write_index(index_file_name, conf); + } + } + + void + test_binary_idmap(const knowhere::Config& cfg) { + auto conf = cfg; + + printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), std::string(index_type_).c_str()); + printf("================================================================================\n"); + for (auto nq : NQs_) { + knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(nq, dim_, xq_); + for (auto k : TOPKs_) { + knowhere::SetMetaTopk(conf, k); + CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); + auto ids = knowhere::GetDatasetIDs(result); + float recall = CalcRecall(ids, nq, k); + printf(" nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nq, k, t_diff, recall); + } + } + printf("================================================================================\n"); + printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), + std::string(index_type_).c_str()); + } + + void + test_binary_ivf(const knowhere::Config& cfg) { + auto conf = cfg; + auto nlist = knowhere::GetIndexParamNlist(conf); + + printf("\n[%0.3f s] %s | %s | nlist=%ld\n", get_time_diff(), ann_test_name_.c_str(), + std::string(index_type_).c_str(), nlist); + printf("================================================================================\n"); + for (auto nprobe : NPROBEs_) { + knowhere::SetIndexParamNprobe(conf, nprobe); + for (auto nq : NQs_) { + knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(nq, dim_, xq_); + for (auto k : TOPKs_) { + knowhere::SetMetaTopk(conf, k); + CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); + auto ids = knowhere::GetDatasetIDs(result); + float recall = CalcRecall(ids, nq, k); + printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nprobe, nq, k, t_diff, + recall); + } + } + } + printf("================================================================================\n"); + printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), + std::string(index_type_).c_str()); + } + + protected: + void + SetUp() override { + T0_ = elapsed(); + // set_ann_test_name("sift-128-euclidean"); + set_ann_test_name("sift-4096-hamming"); + parse_ann_test_name(); + load_hdf5_data(); + + assert(metric_str_ == METRIC_HAM_STR || metric_str_ == METRIC_JAC_STR || metric_str_ == METRIC_TAN_STR); + metric_type_ = (metric_str_ == METRIC_HAM_STR) ? knowhere::metric::HAMMING + : (metric_str_ == METRIC_JAC_STR) ? knowhere::metric::JACCARD + : knowhere::metric::TANIMOTO; + knowhere::SetMetaMetricType(cfg_, metric_type_); + knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AUTO); + } + + void + TearDown() override { + free_all(); + } + + protected: + knowhere::MetricType metric_type_; + knowhere::BinarySet binary_set_; + knowhere::IndexType index_type_; + knowhere::VecIndexPtr index_ = nullptr; + knowhere::Config cfg_; + + const std::vector NQs_ = {10000}; + const std::vector TOPKs_ = {10}; + + // IVF index params + const std::vector NLISTs_ = {1024}; + const std::vector NPROBEs_ = {1, 2, 4, 8, 16, 32, 64, 128, 256}; +}; + +// This testcase can be used to generate binary sift1m HDF5 file +// Following these steps: +// 1. set_ann_test_name("sift-128-euclidean") +// 2. use load_hdf5_data(); +// 3. change metric type to expected value (hamming/jaccard/tanimoto) manually +// 4. specify the hdf5 file name to generate +// 5. run this testcase +#if 0 +TEST_F(Benchmark_knowhere_binary, TEST_CREATE_BINARY_HDF5) { + index_type_ = knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP; + + knowhere::Config conf = cfg_; + std::string index_file_name = get_index_name({}); + + // use sift1m data as binary data + dim_ *= 32; + metric_type_ = knowhere::metric::HAMMING; + knowhere::SetMetaMetricType(conf, metric_type_); + + create_cpu_index(index_file_name, conf); + index_->Load(binary_set_); + + knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(nq_, dim_, xq_); + knowhere::SetMetaTopk(conf, gt_k_); + auto result = index_->Query(ds_ptr, conf, nullptr); + + auto gt_ids = knowhere::GetDatasetIDs(result); + auto gt_dist = knowhere::GetDatasetDistance(result); + + auto gt_ids_int = new int32_t[gt_k_ * nq_]; + for (int32_t i = 0; i < gt_k_ * nq_; i++) { + gt_ids_int[i] = gt_ids[i]; + } + + assert(dim_ == 4096); + assert(nq_ == 10000); + assert(gt_k_ == 100); + hdf5_write("sift-4096-hamming.hdf5", dim_/32, gt_k_, xb_, nb_, xq_, nq_, gt_ids_int, gt_dist); + + delete[] gt_ids_int; +} +#endif + +TEST_F(Benchmark_knowhere_binary, TEST_BINARY_IDMAP) { + index_type_ = knowhere::IndexEnum::INDEX_FAISS_BIN_IDMAP; + + knowhere::Config conf = cfg_; + std::string index_file_name = get_index_name({}); + create_cpu_index(index_file_name, conf); + index_->Load(binary_set_); + test_binary_idmap(conf); +} + +TEST_F(Benchmark_knowhere_binary, TEST_BINARY_IVFFLAT) { + index_type_ = knowhere::IndexEnum::INDEX_FAISS_BIN_IVFFLAT; + + knowhere::Config conf = cfg_; + for (auto nlist : NLISTs_) { + std::string index_file_name = get_index_name({nlist}); + knowhere::SetIndexParamNlist(conf, nlist); + create_cpu_index(index_file_name, conf); + index_->Load(binary_set_); + test_binary_ivf(conf); + } +} diff --git a/unittest/benchmark/benchmark_knowhere_test.cpp b/unittest/benchmark/benchmark_knowhere_float.cpp similarity index 95% rename from unittest/benchmark/benchmark_knowhere_test.cpp rename to unittest/benchmark/benchmark_knowhere_float.cpp index 97681950f..3a816b866 100644 --- a/unittest/benchmark/benchmark_knowhere_test.cpp +++ b/unittest/benchmark/benchmark_knowhere_float.cpp @@ -24,7 +24,7 @@ X; \ double t_diff = elapsed() - t_start; -class Benchmark_knowhere : public Benchmark_sift { +class Benchmark_knowhere_float : public Benchmark_sift { public: void write_index(const std::string& filename, const knowhere::Config& conf) { @@ -211,13 +211,22 @@ class Benchmark_knowhere : public Benchmark_sift { protected: void SetUp() override { + T0_ = elapsed(); set_ann_test_name("sift-128-euclidean"); - Benchmark_sift::SetUp(); + parse_ann_test_name(); + load_hdf5_data(); + + assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR); metric_type_ = (metric_str_ == METRIC_IP_STR) ? knowhere::metric::IP : knowhere::metric::L2; knowhere::SetMetaMetricType(cfg_, metric_type_); knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AUTO); } + void + TearDown() override { + free_all(); + } + protected: knowhere::MetricType metric_type_; knowhere::BinarySet binary_set_; @@ -242,7 +251,7 @@ class Benchmark_knowhere : public Benchmark_sift { const std::vector SEARCH_Ks_ = {16, 32, 64, 128, 256}; }; -TEST_F(Benchmark_knowhere, TEST_IDMAP) { +TEST_F(Benchmark_knowhere_float, TEST_IDMAP) { index_type_ = knowhere::IndexEnum::INDEX_FAISS_IDMAP; knowhere::Config conf = cfg_; @@ -252,7 +261,7 @@ TEST_F(Benchmark_knowhere, TEST_IDMAP) { test_idmap(conf); } -TEST_F(Benchmark_knowhere, TEST_IVFFLAT_NM) { +TEST_F(Benchmark_knowhere_float, TEST_IVFFLAT_NM) { index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT; knowhere::Config conf = cfg_; @@ -272,7 +281,7 @@ TEST_F(Benchmark_knowhere, TEST_IVFFLAT_NM) { } } -TEST_F(Benchmark_knowhere, TEST_IVFSQ8) { +TEST_F(Benchmark_knowhere_float, TEST_IVFSQ8) { index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8; knowhere::Config conf = cfg_; @@ -285,7 +294,7 @@ TEST_F(Benchmark_knowhere, TEST_IVFSQ8) { } } -TEST_F(Benchmark_knowhere, TEST_HNSW) { +TEST_F(Benchmark_knowhere_float, TEST_HNSW) { index_type_ = knowhere::IndexEnum::INDEX_HNSW; knowhere::Config conf = cfg_; @@ -301,7 +310,7 @@ TEST_F(Benchmark_knowhere, TEST_HNSW) { } } -TEST_F(Benchmark_knowhere, TEST_ANNOY) { +TEST_F(Benchmark_knowhere_float, TEST_ANNOY) { index_type_ = knowhere::IndexEnum::INDEX_ANNOY; knowhere::Config conf = cfg_; diff --git a/unittest/benchmark/benchmark_knowhere_perf.cpp b/unittest/benchmark/benchmark_knowhere_perf.cpp index 99282186b..7d88b597f 100644 --- a/unittest/benchmark/benchmark_knowhere_perf.cpp +++ b/unittest/benchmark/benchmark_knowhere_perf.cpp @@ -114,7 +114,7 @@ class Benchmark_knowhere_perf : public Benchmark_sift { printf("\n[%0.3f s] %s | %s \n", get_time_diff(), ann_test_name_.c_str(), std::string(index_type_).c_str()); printf("================================================================================\n"); for (int32_t i = 0; i + NQ_STEP_ <= GT_NQ_; i = (i + NQ_STEP_) % GT_NQ_) { - knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(NQ_STEP_, dim_, xq_ + (i * dim_)); + knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(NQ_STEP_, dim_, (const float*)xq_ + (i * dim_)); for (auto k : TOPKs_) { knowhere::SetMetaTopk(conf, k); CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr)); @@ -139,7 +139,7 @@ class Benchmark_knowhere_perf : public Benchmark_sift { std::string(index_type_).c_str(), nlist); printf("================================================================================\n"); for (int32_t i = 0; i + NQ_STEP_ <= GT_NQ_; i = (i + NQ_STEP_) % GT_NQ_) { - knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(NQ_STEP_, dim_, xq_ + (i * dim_)); + knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(NQ_STEP_, dim_, (const float*)xq_ + (i * dim_)); for (auto nprobe : NPROBEs_) { knowhere::SetIndexParamNprobe(conf, nprobe); for (auto k : TOPKs_) { @@ -168,7 +168,7 @@ class Benchmark_knowhere_perf : public Benchmark_sift { std::string(index_type_).c_str(), M, efConstruction); printf("================================================================================\n"); for (int32_t i = 0; i + NQ_STEP_ <= GT_NQ_; i = (i + NQ_STEP_) % GT_NQ_) { - knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(NQ_STEP_, dim_, xq_ + (i * dim_)); + knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(NQ_STEP_, dim_, (const float*)xq_ + (i * dim_)); for (auto ef : EFs_) { knowhere::SetIndexParamEf(conf, ef); for (auto k : TOPKs_) { @@ -196,7 +196,7 @@ class Benchmark_knowhere_perf : public Benchmark_sift { std::string(index_type_).c_str(), n_trees); printf("================================================================================\n"); for (int32_t i = 0; i + NQ_STEP_ <= GT_NQ_; i = (i + NQ_STEP_) % GT_NQ_) { - knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(NQ_STEP_, dim_, xq_ + (i * dim_)); + knowhere::DatasetPtr ds_ptr = knowhere::GenDataset(NQ_STEP_, dim_, (const float*)xq_ + (i * dim_)); for (auto sk : SEARCH_Ks_) { knowhere::SetIndexParamSearchK(conf, sk); for (auto k : TOPKs_) { @@ -217,13 +217,22 @@ class Benchmark_knowhere_perf : public Benchmark_sift { protected: void SetUp() override { + T0_ = elapsed(); set_ann_test_name("sift-128-euclidean"); - Benchmark_sift::SetUp(); + parse_ann_test_name(); + load_hdf5_data(); + + assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR); metric_type_ = (metric_str_ == METRIC_IP_STR) ? knowhere::metric::IP : knowhere::metric::L2; knowhere::SetMetaMetricType(cfg_, metric_type_); knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType::AUTO); } + void + TearDown() override { + free_all(); + } + protected: knowhere::MetricType metric_type_; knowhere::BinarySet binary_set_; diff --git a/unittest/benchmark/benchmark_sift.h b/unittest/benchmark/benchmark_sift.h index dfefe3e94..ea7d7e8bd 100644 --- a/unittest/benchmark/benchmark_sift.h +++ b/unittest/benchmark/benchmark_sift.h @@ -35,6 +35,9 @@ static const char* HDF5_DATASET_DISTANCES = "distances"; static const char* METRIC_IP_STR = "angular"; static const char* METRIC_L2_STR = "euclidean"; +static const char* METRIC_HAM_STR = "hamming"; +static const char* METRIC_JAC_STR = "jaccard"; +static const char* METRIC_TAN_STR = "tanimoto"; /************************************************************************************ * https://github.com/erikbern/ann-benchmarks @@ -49,8 +52,6 @@ static const char* METRIC_L2_STR = "euclidean"; * NYTimes 256 290,000 10,000 100 Angular HDF5 (301MB) * SIFT 128 1,000,000 10,000 100 Euclidean HDF5 (501MB) *************************************************************************************/ -using idx_t = int64_t; -using distance_t = float; class Benchmark_sift : public ::testing::Test { public: @@ -87,13 +88,13 @@ class Benchmark_sift : public ::testing::Test { } float - CalcRecall(const idx_t* ids, int32_t nq, int32_t k) { + CalcRecall(const int64_t* ids, int32_t nq, int32_t k) { int32_t min_k = std::min(gt_k_, k); int32_t hit = 0; for (int32_t i = 0; i < nq; i++) { - std::unordered_set ground(gt_ids_ + i * gt_k_, gt_ids_ + i * gt_k_ + min_k); + std::unordered_set ground(gt_ids_ + i * gt_k_, gt_ids_ + i * gt_k_ + min_k); for (int32_t j = 0; j < min_k; j++) { - idx_t id = ids[i * k + j]; + auto id = ids[i * k + j]; if (ground.count(id) > 0) { hit++; } @@ -103,15 +104,15 @@ class Benchmark_sift : public ::testing::Test { } float - CalcRecall(const idx_t* ids, int32_t nq_start, int32_t step, int32_t k) { + CalcRecall(const int64_t* ids, int32_t nq_start, int32_t step, int32_t k) { assert(nq_start + step <= 10000); int32_t min_k = std::min(gt_k_, k); int32_t hit = 0; for (int32_t i = 0; i < step; i++) { - std::unordered_set ground(gt_ids_ + (i + nq_start) * gt_k_, - gt_ids_ + (i + nq_start) * gt_k_ + min_k); + std::unordered_set ground(gt_ids_ + (i + nq_start) * gt_k_, + gt_ids_ + (i + nq_start) * gt_k_ + min_k); for (int32_t j = 0; j < min_k; j++) { - idx_t id = ids[i * k + j]; + auto id = ids[i * k + j]; if (ground.count(id) > 0) { hit++; } @@ -133,60 +134,80 @@ class Benchmark_sift : public ::testing::Test { dim_ = std::stoi(ann_test_name_.substr(pos1 + 1, pos2 - pos1 - 1)); metric_str_ = ann_test_name_.substr(pos2 + 1); - assert(metric_str_ == METRIC_IP_STR || metric_str_ == METRIC_L2_STR); } + template void - load_base_data() { + load_hdf5_data() { const std::string ann_file_name = ann_test_name_ + HDF5_POSTFIX; - int32_t dim; + printf("[%.3f s] Loading HDF5 file: %s\n", get_time_diff(), ann_file_name.c_str()); - xb_ = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TRAIN, H5T_FLOAT, dim, nb_); - assert(dim == dim_ || !"dataset does not have correct dimension"); - if (metric_str_ == METRIC_IP_STR) { - printf("[%.3f s] Normalizing base data set \n", get_time_diff()); - normalize(xb_, nb_, dim_); + /* load train data */ + printf("[%.3f s] Loading train data\n", get_time_diff()); + if (!is_binary) { + xb_ = hdf5_read(ann_file_name, HDF5_DATASET_TRAIN, H5T_FLOAT, dim, nb_); + assert(dim == dim_ || !"train dataset has incorrect dimension"); + } else { + xb_ = hdf5_read(ann_file_name, HDF5_DATASET_TRAIN, H5T_INTEGER, dim, nb_); + assert(dim * 32 == dim_ || !"train dataset has incorrect dimension"); } - } - void - load_query_data() { - const std::string ann_file_name = ann_test_name_ + HDF5_POSTFIX; + if (metric_str_ == METRIC_IP_STR) { + printf("[%.3f s] Normalizing train dataset \n", get_time_diff()); + normalize((float*)xb_, nb_, dim_); + } - int32_t dim; - xq_ = (float*)hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_FLOAT, dim, nq_); - assert(dim == dim_ || !"query does not have same dimension as train set"); + /* load test data */ + printf("[%.3f s] Loading test data\n", get_time_diff()); + if (!is_binary) { + xq_ = hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_FLOAT, dim, nq_); + assert(dim == dim_ || !"test dataset has incorrect dimension"); + } else { + xq_ = hdf5_read(ann_file_name, HDF5_DATASET_TEST, H5T_INTEGER, dim, nq_); + assert(dim * 32 == dim_ || !"test dataset has incorrect dimension"); + } if (metric_str_ == METRIC_IP_STR) { - printf("[%.3f s] Normalizing query data \n", get_time_diff()); - normalize(xq_, nq_, dim_); + printf("[%.3f s] Normalizing test dataset \n", get_time_diff()); + normalize((float*)xq_, nq_, dim_); } - } - - void - load_ground_truth() { - const std::string ann_file_name = ann_test_name_ + HDF5_POSTFIX; - // load ground-truth and convert int to long + /* load ground-truth data */ int32_t gt_nq; - int* gt_int = (int*)hdf5_read(ann_file_name, HDF5_DATASET_NEIGHBORS, H5T_INTEGER, gt_k_, gt_nq); - assert(gt_nq == nq_ || !"incorrect nb of ground truth index"); - - gt_ids_ = new idx_t[gt_k_ * nq_]; - for (int32_t i = 0; i < gt_k_ * nq_; i++) { - gt_ids_[i] = gt_int[i]; + printf("[%.3f s] Loading ground truth data\n", get_time_diff()); + gt_ids_ = (int32_t*)hdf5_read(ann_file_name, HDF5_DATASET_NEIGHBORS, H5T_INTEGER, gt_k_, gt_nq); + assert(gt_nq == nq_ || !"incorrect nq of ground truth labels"); + +#if 0 + if (!is_binary) { + gt_dist_ = hdf5_read(ann_file_name, HDF5_DATASET_DISTANCES, H5T_FLOAT, gt_k_, gt_nq); + assert(gt_nq == nq_ || !"incorrect nq of ground truth distance"); + } else { + gt_dist_ = hdf5_read(ann_file_name, HDF5_DATASET_DISTANCES, H5T_INTEGER, gt_k_, gt_nq); + assert(gt_nq == nq_ || !"incorrect nq of ground truth distance"); } - delete[] gt_int; - -#if DEBUG_VERBOSE - distance_t* gt_dist = (float*)hdf5_read(ann_file_name, HDF5_DATASET_DISTANCES, H5T_FLOAT, k, nq2); - assert(nq2 == nq || !"incorrect nb of ground truth distance"); #endif } - private: + void + free_all() { + if (xb_ != nullptr) { + delete[](float*) xb_; + } + if (xq_ != nullptr) { + delete[](float*) xq_; + } + if (gt_ids_ != nullptr) { + delete[] gt_ids_; + } + if (gt_dist_ != nullptr) { + delete[](float*) gt_dist_; + } + } + + protected: void* hdf5_read(const std::string& file_name, const std::string& dataset_name, H5T_class_t dataset_class, int32_t& d_out, int32_t& n_out) { @@ -237,8 +258,8 @@ class Benchmark_sift : public ::testing::Test { /* Read data from hyperslab in the file into the hyperslab in memory and display. */ switch (t_class) { case H5T_INTEGER: - data_out = new int[dims_out[0] * dims_out[1]]; - H5Dread(dataset, H5T_NATIVE_INT, memspace, dataspace, H5P_DEFAULT, data_out); + data_out = new int32_t[dims_out[0] * dims_out[1]]; + H5Dread(dataset, H5T_NATIVE_INT32, memspace, dataspace, H5P_DEFAULT, data_out); break; case H5T_FLOAT: data_out = new float[dims_out[0] * dims_out[1]]; @@ -259,28 +280,49 @@ class Benchmark_sift : public ::testing::Test { return data_out; } - protected: + // For binary vector, dim should be divided by 32, since we use int32 to store binary vector data */ + template void - SetUp() override { - T0_ = elapsed(); - - parse_ann_test_name(); + hdf5_write(const char* file_name, const int32_t dim, const int32_t k, const void* xb, const int32_t nb, + const void* xq, const int32_t nq, const void* g_ids, const void* g_dist) { + /* Open the file and the dataset. */ + hid_t file = H5Fcreate(file_name, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + + auto write_hdf5_dataset = [](hid_t file, const char* dataset_name, hid_t type_id, int32_t rows, int32_t cols, + const void* data) { + hsize_t dims[2]; + dims[0] = rows; + dims[1] = cols; + auto dataspace = H5Screate_simple(2, dims, NULL); + auto dataset = H5Dcreate2(file, dataset_name, type_id, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + auto err = H5Dwrite(dataset, H5T_NATIVE_INT32, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + assert(err == 0); + H5Dclose(dataset); + H5Sclose(dataspace); + }; + + /* write train dataset */ + if (!is_binary) { + write_hdf5_dataset(file, HDF5_DATASET_TRAIN, H5T_NATIVE_FLOAT, nb, dim, xb); + } else { + write_hdf5_dataset(file, HDF5_DATASET_TRAIN, H5T_NATIVE_INT32, nb, dim, xb); + } - printf("[%.3f s] Loading base data\n", get_time_diff()); - load_base_data(); + /* write test dataset */ + if (!is_binary) { + write_hdf5_dataset(file, HDF5_DATASET_TEST, H5T_NATIVE_FLOAT, nq, dim, xq); + } else { + write_hdf5_dataset(file, HDF5_DATASET_TEST, H5T_NATIVE_INT32, nq, dim, xq); + } - printf("[%.3f s] Loading queries\n", get_time_diff()); - load_query_data(); + /* write ground-truth labels dataset */ + write_hdf5_dataset(file, HDF5_DATASET_NEIGHBORS, H5T_NATIVE_INT32, nq, k, g_ids); - printf("[%.3f s] Loading ground truth\n", get_time_diff()); - load_ground_truth(); - } + /* write ground-truth distance dataset */ + write_hdf5_dataset(file, HDF5_DATASET_DISTANCES, H5T_NATIVE_FLOAT, nq, k, g_dist); - void - TearDown() override { - delete[] xb_; - delete[] xq_; - delete[] gt_ids_; + /* Close/release resources. */ + H5Fclose(file); } protected: @@ -288,10 +330,11 @@ class Benchmark_sift : public ::testing::Test { std::string ann_test_name_ = ""; std::string metric_str_; int32_t dim_; - distance_t* xb_; - distance_t* xq_; + void* xb_ = nullptr; + void* xq_ = nullptr; int32_t nb_; int32_t nq_; + int32_t* gt_ids_ = nullptr; // ground-truth labels + void* gt_dist_ = nullptr; // ground-truth distances int32_t gt_k_; - idx_t* gt_ids_; // ground-truth index }; diff --git a/unittest/benchmark/ref_log/benchmark_faiss_ref.log b/unittest/benchmark/ref_log/benchmark_faiss.log similarity index 100% rename from unittest/benchmark/ref_log/benchmark_faiss_ref.log rename to unittest/benchmark/ref_log/benchmark_faiss.log diff --git a/unittest/benchmark/ref_log/benchmark_knowhere_ref.log b/unittest/benchmark/ref_log/benchmark_knowhere_float.log similarity index 100% rename from unittest/benchmark/ref_log/benchmark_knowhere_ref.log rename to unittest/benchmark/ref_log/benchmark_knowhere_float.log diff --git a/unittest/benchmark/ref_log/benchmark_knowhere_hamming.log b/unittest/benchmark/ref_log/benchmark_knowhere_hamming.log new file mode 100644 index 000000000..9922179b6 --- /dev/null +++ b/unittest/benchmark/ref_log/benchmark_knowhere_hamming.log @@ -0,0 +1,55 @@ +Running main() from /home/caiyd/vec/knowhere/cmake_build/thirdparty/gtest/googletest-src/googletest/src/gtest_main.cc +[==========] Running 2 tests from 1 test case. +[----------] Global test environment set-up. +[----------] 2 tests from Benchmark_knowhere_binary +[ RUN ] Benchmark_knowhere_binary.TEST_BINARY_IDMAP +[0.000 s] Loading HDF5 file: sift-4096-hamming.hdf5 +[0.000 s] Loading train data +[0.202 s] Loading test data +[0.204 s] Loading ground truth data +2022-05-25 19:09:21,277 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO +2022-05-25 19:09:21,277 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2 +[0.206 s] Creating CPU index "BIN_FLAT" +[0.206 s] Reading index file: sift-4096-hamming_BIN_FLAT.index +[0.206 s] Building all on 1000000 vectors +[0.399 s] Writing index file: sift-4096-hamming_BIN_FLAT.index + +[0.967 s] sift-4096-hamming | BIN_FLAT +================================================================================ + nq = 10000, k = 10, elapse = 437.8341s, R@ = 0.9662 +================================================================================ +[438.840 s] Test 'sift-4096-hamming/BIN_FLAT' done + +[ OK ] Benchmark_knowhere_binary.TEST_BINARY_IDMAP (438871 ms) +[ RUN ] Benchmark_knowhere_binary.TEST_BINARY_IVFFLAT +[0.000 s] Loading HDF5 file: sift-4096-hamming.hdf5 +[0.000 s] Loading train data +[0.149 s] Loading test data +[0.151 s] Loading ground truth data +2022-05-25 19:16:40,094 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO +2022-05-25 19:16:40,094 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2 +[0.152 s] Creating CPU index "BIN_IVF_FLAT" +[0.153 s] Reading index file: sift-4096-hamming_BIN_IVF_FLAT_1024.index +[0.153 s] Building all on 1000000 vectors +[153.966 s] Writing index file: sift-4096-hamming_BIN_IVF_FLAT_1024.index + +[154.891 s] sift-4096-hamming | BIN_IVF_FLAT | nlist=1024 +================================================================================ + nprobe = 1, nq = 10000, k = 10, elapse = 0.6325s, R@ = 0.1394 + nprobe = 2, nq = 10000, k = 10, elapse = 0.9452s, R@ = 0.2221 + nprobe = 4, nq = 10000, k = 10, elapse = 1.4388s, R@ = 0.3248 + nprobe = 8, nq = 10000, k = 10, elapse = 2.4233s, R@ = 0.4531 + nprobe = 16, nq = 10000, k = 10, elapse = 4.2960s, R@ = 0.5899 + nprobe = 32, nq = 10000, k = 10, elapse = 8.0693s, R@ = 0.7237 + nprobe = 64, nq = 10000, k = 10, elapse = 15.0587s, R@ = 0.8381 + nprobe = 128, nq = 10000, k = 10, elapse = 28.8887s, R@ = 0.9205 + nprobe = 256, nq = 10000, k = 10, elapse = 54.8444s, R@ = 0.9609 +================================================================================ +[271.828 s] Test 'sift-4096-hamming/BIN_IVF_FLAT' done + +[ OK ] Benchmark_knowhere_binary.TEST_BINARY_IVFFLAT (271865 ms) +[----------] 2 tests from Benchmark_knowhere_binary (710736 ms total) + +[----------] Global test environment tear-down +[==========] 2 tests from 1 test case ran. (710736 ms total) +[ PASSED ] 2 tests. diff --git a/unittest/benchmark/ref_log/benchmark_knowhere_jaccard.log b/unittest/benchmark/ref_log/benchmark_knowhere_jaccard.log new file mode 100644 index 000000000..32253c02c --- /dev/null +++ b/unittest/benchmark/ref_log/benchmark_knowhere_jaccard.log @@ -0,0 +1,55 @@ +Running main() from /home/caiyd/vec/knowhere/cmake_build/thirdparty/gtest/googletest-src/googletest/src/gtest_main.cc +[==========] Running 2 tests from 1 test case. +[----------] Global test environment set-up. +[----------] 2 tests from Benchmark_knowhere_binary +[ RUN ] Benchmark_knowhere_binary.TEST_BINARY_IDMAP +[0.000 s] Loading HDF5 file: sift-4096-jaccard.hdf5 +[0.000 s] Loading train data +[0.205 s] Loading test data +[0.207 s] Loading ground truth data +2022-05-25 19:32:57,550 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO +2022-05-25 19:32:57,550 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2 +[0.209 s] Creating CPU index "BIN_FLAT" +[0.209 s] Reading index file: sift-4096-jaccard_BIN_FLAT.index +[0.209 s] Building all on 1000000 vectors +[0.408 s] Writing index file: sift-4096-jaccard_BIN_FLAT.index + +[0.984 s] sift-4096-jaccard | BIN_FLAT +================================================================================ + nq = 10000, k = 10, elapse = 869.3156s, R@ = 0.9983 +================================================================================ +[870.339 s] Test 'sift-4096-jaccard/BIN_FLAT' done + +[ OK ] Benchmark_knowhere_binary.TEST_BINARY_IDMAP (870370 ms) +[ RUN ] Benchmark_knowhere_binary.TEST_BINARY_IVFFLAT +[0.000 s] Loading HDF5 file: sift-4096-jaccard.hdf5 +[0.000 s] Loading train data +[0.146 s] Loading test data +[0.147 s] Loading ground truth data +2022-05-25 19:47:27,861 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO +2022-05-25 19:47:27,861 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2 +[0.149 s] Creating CPU index "BIN_IVF_FLAT" +[0.149 s] Reading index file: sift-4096-jaccard_BIN_IVF_FLAT_1024.index +[0.149 s] Building all on 1000000 vectors +[193.351 s] Writing index file: sift-4096-jaccard_BIN_IVF_FLAT_1024.index + +[194.132 s] sift-4096-jaccard | BIN_IVF_FLAT | nlist=1024 +================================================================================ + nprobe = 1, nq = 10000, k = 10, elapse = 1.5469s, R@ = 0.1085 + nprobe = 2, nq = 10000, k = 10, elapse = 2.2739s, R@ = 0.1775 + nprobe = 4, nq = 10000, k = 10, elapse = 3.2757s, R@ = 0.2743 + nprobe = 8, nq = 10000, k = 10, elapse = 5.3013s, R@ = 0.3977 + nprobe = 16, nq = 10000, k = 10, elapse = 9.1345s, R@ = 0.5458 + nprobe = 32, nq = 10000, k = 10, elapse = 15.9183s, R@ = 0.7075 + nprobe = 64, nq = 10000, k = 10, elapse = 26.8309s, R@ = 0.8548 + nprobe = 128, nq = 10000, k = 10, elapse = 44.8333s, R@ = 0.9544 + nprobe = 256, nq = 10000, k = 10, elapse = 71.7345s, R@ = 0.9934 +================================================================================ +[375.319 s] Test 'sift-4096-jaccard/BIN_IVF_FLAT' done + +[ OK ] Benchmark_knowhere_binary.TEST_BINARY_IVFFLAT (375356 ms) +[----------] 2 tests from Benchmark_knowhere_binary (1245726 ms total) + +[----------] Global test environment tear-down +[==========] 2 tests from 1 test case ran. (1245726 ms total) +[ PASSED ] 2 tests. diff --git a/unittest/benchmark/ref_log/benchmark_knowhere_tanimoto.log b/unittest/benchmark/ref_log/benchmark_knowhere_tanimoto.log new file mode 100644 index 000000000..36c890f57 --- /dev/null +++ b/unittest/benchmark/ref_log/benchmark_knowhere_tanimoto.log @@ -0,0 +1,55 @@ +Running main() from /home/caiyd/vec/knowhere/cmake_build/thirdparty/gtest/googletest-src/googletest/src/gtest_main.cc +[==========] Running 2 tests from 1 test case. +[----------] Global test environment set-up. +[----------] 2 tests from Benchmark_knowhere_binary +[ RUN ] Benchmark_knowhere_binary.TEST_BINARY_IDMAP +[0.000 s] Loading HDF5 file: sift-4096-tanimoto.hdf5 +[0.000 s] Loading train data +[0.182 s] Loading test data +[0.184 s] Loading ground truth data +2022-05-25 19:56:19,549 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO +2022-05-25 19:56:19,549 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2 +[0.186 s] Creating CPU index "BIN_FLAT" +[0.186 s] Reading index file: sift-4096-tanimoto_BIN_FLAT.index +[0.186 s] Building all on 1000000 vectors +[0.380 s] Writing index file: sift-4096-tanimoto_BIN_FLAT.index + +[0.950 s] sift-4096-tanimoto | BIN_FLAT +================================================================================ + nq = 10000, k = 10, elapse = 884.7137s, R@ = 0.9983 +================================================================================ +[885.701 s] Test 'sift-4096-tanimoto/BIN_FLAT' done + +[ OK ] Benchmark_knowhere_binary.TEST_BINARY_IDMAP (885731 ms) +[ RUN ] Benchmark_knowhere_binary.TEST_BINARY_IVFFLAT +[0.000 s] Loading HDF5 file: sift-4096-tanimoto.hdf5 +[0.000 s] Loading train data +[0.148 s] Loading test data +[0.150 s] Loading ground truth data +2022-05-25 20:11:05,245 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO +2022-05-25 20:11:05,245 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2 +[0.151 s] Creating CPU index "BIN_IVF_FLAT" +[0.151 s] Reading index file: sift-4096-tanimoto_BIN_IVF_FLAT_1024.index +[0.151 s] Building all on 1000000 vectors +[192.371 s] Writing index file: sift-4096-tanimoto_BIN_IVF_FLAT_1024.index + +[193.125 s] sift-4096-tanimoto | BIN_IVF_FLAT | nlist=1024 +================================================================================ + nprobe = 1, nq = 10000, k = 10, elapse = 1.4864s, R@ = 0.1085 + nprobe = 2, nq = 10000, k = 10, elapse = 2.2937s, R@ = 0.1775 + nprobe = 4, nq = 10000, k = 10, elapse = 3.3036s, R@ = 0.2743 + nprobe = 8, nq = 10000, k = 10, elapse = 5.3492s, R@ = 0.3977 + nprobe = 16, nq = 10000, k = 10, elapse = 9.1166s, R@ = 0.5458 + nprobe = 32, nq = 10000, k = 10, elapse = 15.4500s, R@ = 0.7075 + nprobe = 64, nq = 10000, k = 10, elapse = 26.4878s, R@ = 0.8548 + nprobe = 128, nq = 10000, k = 10, elapse = 45.5960s, R@ = 0.9544 + nprobe = 256, nq = 10000, k = 10, elapse = 73.4119s, R@ = 0.9934 +================================================================================ +[375.953 s] Test 'sift-4096-tanimoto/BIN_IVF_FLAT' done + +[ OK ] Benchmark_knowhere_binary.TEST_BINARY_IVFFLAT (375995 ms) +[----------] 2 tests from Benchmark_knowhere_binary (1261726 ms total) + +[----------] Global test environment tear-down +[==========] 2 tests from 1 test case ran. (1261726 ms total) +[ PASSED ] 2 tests.