Skip to content
This repository has been archived by the owner on Aug 16, 2023. It is now read-only.

Commit

Permalink
Fix hnsw range search error (#214)
Browse files Browse the repository at this point in the history
Signed-off-by: yudong.cai <[email protected]>
  • Loading branch information
cydrain authored Jun 13, 2022
1 parent 7f4143e commit 63f3997
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 33 deletions.
4 changes: 4 additions & 0 deletions knowhere/index/vector_index/IndexHNSW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,10 @@ IndexHNSW::QueryByRange(const DatasetPtr& dataset,
index_->setEf(GetIndexParamEf(config));
bool is_IP = (index_->metric_type_ == 1); // InnerProduct: 1

if (!is_IP) {
radius *= radius;
}

std::vector<std::vector<int64_t>> result_id_array(rows);
std::vector<std::vector<float>> result_dist_array(rows);
std::vector<size_t> result_lims(rows + 1, 0);
Expand Down
4 changes: 2 additions & 2 deletions unittest/benchmark/benchmark_knowhere_binary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Benchmark_knowhere_binary : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, nq, k);
printf(" nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nq, k, t_diff, recall);
printf(" nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nq, k, t_diff, recall);
}
}
printf("================================================================================\n");
Expand All @@ -54,7 +54,7 @@ class Benchmark_knowhere_binary : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, nq, k);
printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nprobe, nq, k, t_diff,
printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nprobe, nq, k, t_diff,
recall);
}
}
Expand Down
4 changes: 2 additions & 2 deletions unittest/benchmark/benchmark_knowhere_binary_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Benchmark_knowhere_binary_range : public Benchmark_knowhere {
auto lims = knowhere::GetDatasetLims(result);
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy);
printf(" nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy);
}
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(),
Expand All @@ -53,7 +53,7 @@ class Benchmark_knowhere_binary_range : public Benchmark_knowhere {
auto lims = knowhere::GetDatasetLims(result);
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" nprobe = %4d, nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n",
printf(" nprobe = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n",
nprobe, nq, t_diff, recall, accuracy);
}
}
Expand Down
8 changes: 4 additions & 4 deletions unittest/benchmark/benchmark_knowhere_float.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Benchmark_knowhere_float : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, nq, k);
printf(" nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nq, k, t_diff, recall);
printf(" nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nq, k, t_diff, recall);
}
}
printf("================================================================================\n");
Expand All @@ -54,7 +54,7 @@ class Benchmark_knowhere_float : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, nq, k);
printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", nprobe, nq, k, t_diff,
printf(" nprobe = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", nprobe, nq, k, t_diff,
recall);
}
}
Expand Down Expand Up @@ -82,7 +82,7 @@ class Benchmark_knowhere_float : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, nq, k);
printf(" ef = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", ef, nq, k, t_diff, recall);
printf(" ef = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", ef, nq, k, t_diff, recall);
}
}
}
Expand All @@ -108,7 +108,7 @@ class Benchmark_knowhere_float : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, nq, k);
printf(" search_k = %4d, nq = %4d, k = %4d, elapse = %.4fs, R@ = %.4f\n", sk, nq, k, t_diff,
printf(" search_k = %4d, nq = %4d, k = %4d, elapse = %6.3fs, R@ = %.4f\n", sk, nq, k, t_diff,
recall);
}
}
Expand Down
6 changes: 3 additions & 3 deletions unittest/benchmark/benchmark_knowhere_float_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Benchmark_knowhere_float_range : public Benchmark_knowhere {
auto lims = knowhere::GetDatasetLims(result);
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy);
printf(" nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n", nq, t_diff, recall, accuracy);
}
printf("================================================================================\n");
printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(),
Expand All @@ -53,7 +53,7 @@ class Benchmark_knowhere_float_range : public Benchmark_knowhere {
auto lims = knowhere::GetDatasetLims(result);
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" nprobe = %4d, nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n",
printf(" nprobe = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n",
nprobe, nq, t_diff, recall, accuracy);
}
}
Expand All @@ -80,7 +80,7 @@ class Benchmark_knowhere_float_range : public Benchmark_knowhere {
auto lims = knowhere::GetDatasetLims(result);
float recall = CalcRecall(ids, lims, nq);
float accuracy = CalcAccuracy(ids, lims, nq);
printf(" ef = %4d, nq = %4d, elapse = %.4fs, R@ = %.4f, A@ = %.4f\n",
printf(" ef = %4d, nq = %4d, elapse = %6.3fs, R@ = %.4f, A@ = %.4f\n",
ef, nq, t_diff, recall, accuracy);
}
}
Expand Down
8 changes: 4 additions & 4 deletions unittest/benchmark/benchmark_knowhere_perf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Benchmark_knowhere_perf : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, i, NQ_STEP_, k);
printf(" No.%4d: nq = [%4d, %4d), k = %4d, elapse = %.4fs, R@ = %.4f\n", no++, i, i + NQ_STEP_, k,
printf(" No.%4d: nq = [%4d, %4d), k = %4d, elapse = %6.3fs, R@ = %.4f\n", no++, i, i + NQ_STEP_, k,
t_diff, recall);
}
}
Expand All @@ -57,7 +57,7 @@ class Benchmark_knowhere_perf : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, i, NQ_STEP_, k);
printf(" No.%4d: nprobe = %4d, nq = [%4d, %4d), k = %4d, elapse = %.4fs, R@ = %.4f\n", no++,
printf(" No.%4d: nprobe = %4d, nq = [%4d, %4d), k = %4d, elapse = %6.3fs, R@ = %.4f\n", no++,
nprobe, i, i + NQ_STEP_, k, t_diff, recall);
}
}
Expand Down Expand Up @@ -86,7 +86,7 @@ class Benchmark_knowhere_perf : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, i, NQ_STEP_, k);
printf(" No.%4d: ef = %4d, nq = [%4d, %4d), k = %4d, elapse = %.4fs, R@ = %.4f\n", no++, ef, i,
printf(" No.%4d: ef = %4d, nq = [%4d, %4d), k = %4d, elapse = %6.3fs, R@ = %.4f\n", no++, ef, i,
i + NQ_STEP_, k, t_diff, recall);
}
}
Expand Down Expand Up @@ -114,7 +114,7 @@ class Benchmark_knowhere_perf : public Benchmark_knowhere {
CALC_TIME_SPAN(auto result = index_->Query(ds_ptr, conf, nullptr));
auto ids = knowhere::GetDatasetIDs(result);
float recall = CalcRecall(ids, i, NQ_STEP_, k);
printf(" No.%4d: search_k = %4d, nq = [%4d, %4d), k = %4d, elapse = %.4fs, R@ = %.4f\n", no++, sk,
printf(" No.%4d: search_k = %4d, nq = [%4d, %4d), k = %4d, elapse = %6.3fs, R@ = %.4f\n", no++, sk,
i, i + NQ_STEP_, k, t_diff, recall);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,28 @@ Note: Google Test filter = Benchmark_knowhere_float_range.TEST_HNSW
[ RUN ] Benchmark_knowhere_float_range.TEST_HNSW
[0.000 s] Loading HDF5 file: sift-128-euclidean-range.hdf5
[0.000 s] Loading train data
[0.186 s] Loading test data
[0.188 s] Loading ground truth data
2022-06-08 20:38:53,635 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO
2022-06-08 20:38:53,635 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2
[0.190 s] Creating CPU index "HNSW"
[0.190 s] Reading index file: sift-128-euclidean-range_HNSW_16_100_20.index
[0.190 s] Building all on 1000000 vectors
[50.130 s] Writing index file: sift-128-euclidean-range_HNSW_16_100_20.index
[0.195 s] Loading test data
[0.197 s] Loading ground truth data
2022-06-13 16:58:24,478 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:39] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS expect simdType::AUTO
2022-06-13 16:58:24,479 DEBUG [default] [caiyd@unknown-host] [static std::__cxx11::string knowhere::KnowhereConfig::SetSimdType(knowhere::KnowhereConfig::SimdType)] [/home/caiyd/vec/knowhere/knowhere/archive/KnowhereConfig.cpp:64] [KNOWHERE][SetSimdType][benchmark_knowh] FAISS hook AVX2
[0.199 s] Creating CPU index "HNSW"
[0.199 s] Reading index file: sift-128-euclidean-range_HNSW_16_100_20.index
[0.199 s] Building all on 1000000 vectors
[50.409 s] Writing index file: sift-128-euclidean-range_HNSW_16_100_20.index

[50.895 s] sift-128-euclidean-range | HNSW | M=16 | efConstruction=100
[51.170 s] sift-128-euclidean-range | HNSW | M=16 | efConstruction=100
================================================================================
ef = 16, nq = 10000, elapse = 0.7227s, R@ = 0.0000, A@ = -nan
ef = 32, nq = 10000, elapse = 1.0283s, R@ = 0.0000, A@ = -nan
ef = 64, nq = 10000, elapse = 1.7962s, R@ = 0.0000, A@ = -nan
ef = 128, nq = 10000, elapse = 3.2314s, R@ = 0.0000, A@ = -nan
ef = 256, nq = 10000, elapse = 5.7384s, R@ = 0.0000, A@ = -nan
ef = 16, nq = 10000, elapse = 1.462s, R@ = 0.9962, A@ = 1.0000
ef = 32, nq = 10000, elapse = 1.756s, R@ = 0.9969, A@ = 1.0000
ef = 64, nq = 10000, elapse = 2.496s, R@ = 0.9975, A@ = 1.0000
ef = 128, nq = 10000, elapse = 3.871s, R@ = 0.9977, A@ = 1.0000
ef = 256, nq = 10000, elapse = 6.338s, R@ = 0.9978, A@ = 1.0000
================================================================================
[63.768 s] Test 'sift-128-euclidean-range/HNSW' done
[68.047 s] Test 'sift-128-euclidean-range/HNSW' done

[ OK ] Benchmark_knowhere_float_range.TEST_HNSW (63816 ms)
[----------] 1 test from Benchmark_knowhere_float_range (63816 ms total)
[ OK ] Benchmark_knowhere_float_range.TEST_HNSW (68088 ms)
[----------] 1 test from Benchmark_knowhere_float_range (68088 ms total)

[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (63816 ms total)
[==========] 1 test from 1 test case ran. (68088 ms total)
[ PASSED ] 1 test.

0 comments on commit 63f3997

Please sign in to comment.