From 3fbc579eaee46bbcc1d33cf90861b584c7b153ae Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 10 Jul 2023 23:26:56 +0200 Subject: [PATCH 01/10] Complete CAGRA support, use raft 23.06 Signed-off-by: Mickael Ide --- CMakeLists.txt | 2 +- cmake/libs/libraft.cmake | 2 +- cmake/utils/fetch_rapids.cmake | 2 +- include/knowhere/comp/index_param.h | 4 + src/index/cagra/cagra.cu | 236 ++++----------------------- src/index/cagra/cagra.cuh | 241 ++++++++++++++++++++++++++++ src/index/cagra/cagra_config.h | 6 + src/index/ivf_raft/ivf_raft.cuh | 5 +- tests/ut/test_gpu_search.cc | 11 ++ 9 files changed, 298 insertions(+), 211 deletions(-) create mode 100644 src/index/cagra/cagra.cuh diff --git a/CMakeLists.txt b/CMakeLists.txt index b243efeab..ebcf682bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,7 +110,7 @@ else() endif() knowhere_file_glob(GLOB_RECURSE KNOWHERE_GPU_SRCS src/index/gpu/flat_gpu/*.cc - src/index/gpu/ivf_gpu/*.cc src/index/cagra/*.cu) + src/index/gpu/ivf_gpu/*.cc) list(REMOVE_ITEM KNOWHERE_SRCS ${KNOWHERE_GPU_SRCS}) if(NOT WITH_RAFT) diff --git a/cmake/libs/libraft.cmake b/cmake/libs/libraft.cmake index 3de1cfaec..608e8eedd 100644 --- a/cmake/libs/libraft.cmake +++ b/cmake/libs/libraft.cmake @@ -26,7 +26,7 @@ rapids_cpm_init() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") -set(RAPIDS_VERSION 23.04) +set(RAPIDS_VERSION 23.06) set(RAFT_VERSION "${RAPIDS_VERSION}") set(RAFT_FORK "rapidsai") set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}") diff --git a/cmake/utils/fetch_rapids.cmake b/cmake/utils/fetch_rapids.cmake index 56899f2c5..84691b55a 100644 --- a/cmake/utils/fetch_rapids.cmake +++ b/cmake/utils/fetch_rapids.cmake @@ -13,7 +13,7 @@ # License for the specific language governing permissions and limitations under # the License. -set(RAPIDS_VERSION "23.04") +set(RAPIDS_VERSION "23.06") if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) file( diff --git a/include/knowhere/comp/index_param.h b/include/knowhere/comp/index_param.h index a90a69bce..5ab457d05 100644 --- a/include/knowhere/comp/index_param.h +++ b/include/knowhere/comp/index_param.h @@ -75,6 +75,10 @@ constexpr const char* M = "m"; // PQ param for IVFPQ constexpr const char* SSIZE = "ssize"; constexpr const char* REORDER_K = "reorder_k"; +// CAGRA Params +constexpr const char* INTERMEDIATE_GRAPH_DEGREE = "intermediate_graph_degree"; +constexpr const char* GRAPH_DEGREE = "graph_degree"; +constexpr const char* ITOPK_SIZE = "itopk_size"; // HNSW Params constexpr const char* EFCONSTRUCTION = "efConstruction"; constexpr const char* HNSW_M = "M"; diff --git a/src/index/cagra/cagra.cu b/src/index/cagra/cagra.cu index 43107783b..f09d1ea5d 100644 --- a/src/index/cagra/cagra.cu +++ b/src/index/cagra/cagra.cu @@ -1,210 +1,34 @@ -#include "cagra_config.h" -#include "common/raft/raft_utils.h" -#include "common/raft_metric.h" -#include "knowhere/index_node.h" -#include "knowhere/log.h" -#include "raft/neighbors/cagra.cuh" -#include "raft/neighbors/cagra_serialize.cuh" -namespace knowhere { - -using idx_type = uint32_t; - -using cagra_index = raft::neighbors::experimental::cagra::index; - -class CagraIndexNode : public IndexNode { - public: - CagraIndexNode(const Object& object) : devs_{}, gpu_index_{} { - } - - Status - Train(const DataSet& dataset, const Config& cfg) override { - auto cagra_cfg = static_cast(cfg); - if (gpu_index_) { - LOG_KNOWHERE_WARNING_ << "index is already trained"; - return Status::index_already_trained; - } - if (cagra_cfg.gpu_ids.value().size() != 1) { - LOG_KNOWHERE_WARNING_ << "Cagra implementation is single-GPU only" << std::endl; - return Status::raft_inner_error; - } - auto metric = Str2RaftMetricType(cagra_cfg.metric_type.value()); - if (!metric.has_value()) { - LOG_KNOWHERE_WARNING_ << "please check metric value: " << cagra_cfg.metric_type.value(); - return metric.error(); - } - if (metric.value() != raft::distance::DistanceType::L2Expanded) { - LOG_KNOWHERE_WARNING_ << "only support L2Expanded metric type"; - return Status::invalid_metric_type; - } - devs_.insert(devs_.begin(), cagra_cfg.gpu_ids.value().begin(), cagra_cfg.gpu_ids.value().end()); - auto scoped_device = raft_utils::device_setter{*cagra_cfg.gpu_ids.begin()}; - auto build_params = raft::neighbors::experimental::cagra::index_params{}; - build_params.intermediate_graph_degree = cagra_cfg.intermediate_graph_degree.value(); - build_params.graph_degree = cagra_cfg.graph_degree.value(); - build_params.metric = metric.value(); - auto& res = raft_utils::get_raft_resources(); - auto rows = dataset.GetRows(); - auto dim = dataset.GetDim(); - auto* data = reinterpret_cast(dataset.GetTensor()); - auto data_gpu = raft::make_device_matrix(res, rows, dim); - RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data_handle(), data, data_gpu.size() * sizeof(float), cudaMemcpyDefault, - res.get_stream().value())); - gpu_index_ = raft::neighbors::experimental::cagra::build( - res, build_params, - raft::make_device_matrix_view((const float*)data_gpu.data_handle(), rows, dim)); - this->dim_ = dim; - this->counts_ = rows; - res.sync_stream(); - } - - Status - Add(const DataSet& dataset, const Config& cfg) override { - return Status::success; - } - - expected - Search(const DataSet& dataset, const Config& cfg, const BitsetView& bitset) const override { - auto cagra_cfg = static_cast(cfg); - auto rows = dataset.GetRows(); - auto dim = dataset.GetDim(); - auto* data = reinterpret_cast(dataset.GetTensor()); - auto output_size = rows * cagra_cfg.k.value(); - auto ids = std::unique_ptr(new idx_type[output_size]); - auto dis = std::unique_ptr(new float[output_size]); - try { - auto scoped_device = raft_utils::device_setter{devs_[0]}; - auto& res_ = raft_utils::get_raft_resources(); - - auto data_gpu = raft::make_device_matrix(res_, rows, dim); - raft::copy(data_gpu.data_handle(), data, data_gpu.size(), res_.get_stream()); - - auto search_params = raft::neighbors::experimental::cagra::search_params{}; - search_params.max_queries = cagra_cfg.max_queries.value(); - auto ids_dev = raft::make_device_matrix(*res_, rows, cagra_cfg.k.value()); - auto dis_dev = raft::make_device_matrix(*res_, rows, cagra_cfg.k.value()); - raft::neighbors::experimental::cagra::search(res_, search_params, *gpu_index_, - raft::make_const_mdspan(data_gpu.view()), ids_dev.view(), - dis_dev.view()); - - raft::copy(ids.get(), ids_dev.data_handle(), output_size, res_.get_stream()); - raft::copy(dis.get(), dis_dev.data_handle(), output_size, res_.get_stream()); - res_.sync_stream(); - - } catch (std::exception& e) { - LOG_KNOWHERE_WARNING_ << "RAFT inner error, " << e.what(); - return Status::raft_inner_error; - } - return Status::not_implemented; - // return GenResultDataSet(rows, cagra_cfg.k, ids.release(), dis.release()); - } - - expected - RangeSearch(const DataSet& dataset, const Config& cfg, const BitsetView& bitset) const override { - return Status::not_implemented; - } - - expected - GetVectorByIds(const DataSet& dataset) const override { - return Status::not_implemented; - } - - bool - HasRawData(const std::string& metric_type) const override { - return false; - } - - expected - GetIndexMeta(const Config& cfg) const override { - return Status::not_implemented; - } - - Status - Serialize(BinarySet& binset) const override { - if (!gpu_index_.has_value()) { - LOG_KNOWHERE_ERROR_ << "Can not serialize empty RaftCagraIndex."; - return Status::empty_index; - } - std::stringbuf buf; - std::ostream os(&buf); - os.write((char*)(&this->dim_), sizeof(this->dim_)); - os.write((char*)(&this->counts_), sizeof(this->counts_)); - os.write((char*)(&this->devs_[0]), sizeof(this->devs_[0])); +/** + * SPDX-FileCopyrightText: Copyright (c) 2023,NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "cagra.cuh" +#include "knowhere/factory.h" +#include "knowhere/index_node_thread_pool_wrapper.h" + +static constexpr uint32_t cuda_concurrent_size = 16; - auto scoped_device = raft_utils::device_setter{devs_[0]}; - rmm::mr::cuda_memory_resource mr; - rmm::cuda_stream stm; - raft::device_resources res(stm.view(), nullptr, &mr); - - raft::neighbors::experimental::cagra::serialize(res, os, *gpu_index_); - - os.flush(); - std::shared_ptr index_binary(new (std::nothrow) uint8_t[buf.str().size()]); - - memcpy(index_binary.get(), buf.str().c_str(), buf.str().size()); - binset.Append(this->Type(), index_binary, buf.str().size()); - return Status::success; - } - - Status - Deserialize(const BinarySet& binset, const Config& config) override { - std::stringbuf buf; - auto binary = binset.GetByName(this->Type()); - if (binary == nullptr) { - LOG_KNOWHERE_ERROR_ << "Invalid binary set."; - return Status::invalid_binary_set; - } - buf.sputn((char*)binary->data.get(), binary->size); - std::istream is(&buf); - - is.read((char*)(&this->dim_), sizeof(this->dim_)); - is.read((char*)(&this->counts_), sizeof(this->counts_)); - this->devs_.resize(1); - is.read((char*)(&this->devs_[0]), sizeof(this->devs_[0])); - auto scoped_device = raft_utils::device_setter{devs_[0]}; - - auto& res = raft_utils::get_raft_resources(); - - cagra_index index_ = raft::neighbors::experimental::cagra::deserialize(res, is); - is.sync(); - gpu_index_ = cagra_index(std::move(index_)); - - return Status::success; - } - - Status - DeserializeFromFile(const std::string& filename, const Config& config) override { - } - - std::unique_ptr - CreateConfig() const override { - return std::make_unique(); - } - - int64_t - Dim() const override { - return dim_; - } - - int64_t - Size() const override { - return 0; - } - - int64_t - Count() const override { - return counts_; - } - - std::string - Type() const override { - return knowhere::IndexEnum::INDEX_RAFT_IVFFLAT; - } +namespace knowhere { - private: - std::vector devs_; - int64_t dim_ = 0; - int64_t counts_ = 0; - std::optional gpu_index_; -}; +static std::shared_ptr +GlobalThreadPoolRaft() { + static std::shared_ptr pool = std::make_shared(cuda_concurrent_size); + return pool; +} +KNOWHERE_REGISTER_GLOBAL(GPU_RAFT_CAGRA, [](const Object& object) { + return Index::Create(std::make_unique(object), GlobalThreadPoolRaft()); +}); } // namespace knowhere diff --git a/src/index/cagra/cagra.cuh b/src/index/cagra/cagra.cuh new file mode 100644 index 000000000..01c7a9148 --- /dev/null +++ b/src/index/cagra/cagra.cuh @@ -0,0 +1,241 @@ +#include "cagra_config.h" +#include "common/raft/raft_utils.h" +#include "common/raft_metric.h" +#include "knowhere/dataset.h" +#include "knowhere/index_node.h" +#include "knowhere/log.h" +#include "raft/neighbors/cagra.cuh" +#include "raft/neighbors/cagra_serialize.cuh" + +namespace knowhere { + +namespace detail { +struct device_setter { + device_setter(int new_device) + : prev_device_{[]() { + auto result = int{}; + RAFT_CUDA_TRY(cudaGetDevice(&result)); + return result; + }()} { + RAFT_CUDA_TRY(cudaSetDevice(new_device)); + } + + ~device_setter() { + RAFT_CUDA_TRY_NO_THROW(cudaSetDevice(prev_device_)); + } + + private: + int prev_device_; +}; +} // namespace detail + +class CagraIndexNode : public IndexNode { + using idx_type = std::int64_t; + using cagra_index = raft::neighbors::experimental::cagra::index; + + public: + CagraIndexNode(const Object& object) : devs_{}, gpu_index_{} { + } + + Status + Train(const DataSet& dataset, const Config& cfg) override { + auto cagra_cfg = static_cast(cfg); + if (gpu_index_) { + LOG_KNOWHERE_WARNING_ << "index is already trained"; + return Status::index_already_trained; + } + if (cagra_cfg.gpu_ids.value().size() != 1) { + LOG_KNOWHERE_WARNING_ << "Cagra implementation is single-GPU only" << std::endl; + return Status::raft_inner_error; + } + auto metric = Str2RaftMetricType(cagra_cfg.metric_type.value()); + if (!metric.has_value()) { + LOG_KNOWHERE_WARNING_ << "please check metric value: " << cagra_cfg.metric_type.value(); + return metric.error(); + } + if (metric.value() != raft::distance::DistanceType::L2Expanded) { + LOG_KNOWHERE_WARNING_ << "only support L2Expanded metric type"; + return Status::invalid_metric_type; + } + try { + devs_.insert(devs_.begin(), cagra_cfg.gpu_ids.value().begin(), cagra_cfg.gpu_ids.value().end()); + auto scoped_device = raft_utils::device_setter{*cagra_cfg.gpu_ids.value().begin()}; + raft_utils::init_gpu_resources(); + + auto build_params = raft::neighbors::experimental::cagra::index_params{}; + build_params.intermediate_graph_degree = cagra_cfg.intermediate_graph_degree.value(); + build_params.graph_degree = cagra_cfg.graph_degree.value(); + build_params.metric = metric.value(); + auto& res = raft_utils::get_raft_resources(); + auto rows = dataset.GetRows(); + auto dim = dataset.GetDim(); + auto* data = reinterpret_cast(dataset.GetTensor()); + auto data_gpu = raft::make_device_matrix(res, rows, dim); + RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data_handle(), data, data_gpu.size() * sizeof(float), + cudaMemcpyDefault, res.get_stream().value())); + gpu_index_ = raft::neighbors::experimental::cagra::build( + res, build_params, raft::make_const_mdspan(data_gpu.view())); + this->dim_ = dim; + this->counts_ = rows; + res.sync_stream(); + } catch (std::exception& e) { + LOG_KNOWHERE_WARNING_ << "RAFT inner error, " << e.what(); + return Status::raft_inner_error; + } + return Status::success; + } + + Status + Add(const DataSet& dataset, const Config& cfg) override { + return Status::success; + } + + expected + Search(const DataSet& dataset, const Config& cfg, const BitsetView& bitset) const override { + auto cagra_cfg = static_cast(cfg); + auto rows = dataset.GetRows(); + auto dim = dataset.GetDim(); + if (cagra_cfg.k.value() >= cagra_cfg.itopk_size.value()) { + LOG_KNOWHERE_WARNING_ << "topk should be smaller than itopk_size parameter" << std::endl; + return Status::raft_inner_error; + } + auto* data = reinterpret_cast(dataset.GetTensor()); + auto output_size = rows * cagra_cfg.k.value(); + auto ids = std::unique_ptr(new idx_type[output_size]); + auto dis = std::unique_ptr(new float[output_size]); + try { + auto scoped_device = raft_utils::device_setter{devs_[0]}; + auto& res = raft_utils::get_raft_resources(); + + auto data_gpu = raft::make_device_matrix(res, rows, dim); + raft::copy(data_gpu.data_handle(), data, data_gpu.size(), res.get_stream()); + + auto search_params = raft::neighbors::experimental::cagra::search_params{}; + search_params.max_queries = cagra_cfg.max_queries.value(); + search_params.itopk_size = cagra_cfg.itopk_size.value(); + + auto ids_dev = raft::make_device_matrix(res, rows, cagra_cfg.k.value()); + auto dis_dev = raft::make_device_matrix(res, rows, cagra_cfg.k.value()); + raft::neighbors::experimental::cagra::search(res, search_params, *gpu_index_, + raft::make_const_mdspan(data_gpu.view()), ids_dev.view(), + dis_dev.view()); + + raft::copy(ids.get(), ids_dev.data_handle(), output_size, res.get_stream()); + raft::copy(dis.get(), dis_dev.data_handle(), output_size, res.get_stream()); + res.sync_stream(); + + } catch (std::exception& e) { + LOG_KNOWHERE_WARNING_ << "RAFT inner error, " << e.what(); + return Status::raft_inner_error; + } + return GenResultDataSet(rows, cagra_cfg.k.value(), ids.release(), dis.release()); + } + + expected + RangeSearch(const DataSet& dataset, const Config& cfg, const BitsetView& bitset) const override { + return Status::not_implemented; + } + + expected + GetVectorByIds(const DataSet& dataset) const override { + return Status::not_implemented; + } + + bool + HasRawData(const std::string& metric_type) const override { + return false; + } + + expected + GetIndexMeta(const Config& cfg) const override { + return Status::not_implemented; + } + + Status + Serialize(BinarySet& binset) const override { + if (!gpu_index_.has_value()) { + LOG_KNOWHERE_ERROR_ << "Can not serialize empty RaftCagraIndex."; + return Status::empty_index; + } + std::stringbuf buf; + std::ostream os(&buf); + os.write((char*)(&this->dim_), sizeof(this->dim_)); + os.write((char*)(&this->counts_), sizeof(this->counts_)); + os.write((char*)(&this->devs_[0]), sizeof(this->devs_[0])); + + auto scoped_device = raft_utils::device_setter{devs_[0]}; + auto& res = raft_utils::get_raft_resources(); + + raft::neighbors::experimental::cagra::serialize(res, os, *gpu_index_); + + os.flush(); + std::shared_ptr index_binary(new (std::nothrow) uint8_t[buf.str().size()]); + + memcpy(index_binary.get(), buf.str().c_str(), buf.str().size()); + binset.Append(this->Type(), index_binary, buf.str().size()); + return Status::success; + } + + Status + Deserialize(const BinarySet& binset, const Config& config) override { + std::stringbuf buf; + auto binary = binset.GetByName(this->Type()); + if (binary == nullptr) { + LOG_KNOWHERE_ERROR_ << "Invalid binary set."; + return Status::invalid_binary_set; + } + buf.sputn((char*)binary->data.get(), binary->size); + std::istream is(&buf); + + is.read((char*)(&this->dim_), sizeof(this->dim_)); + is.read((char*)(&this->counts_), sizeof(this->counts_)); + this->devs_.resize(1); + is.read((char*)(&this->devs_[0]), sizeof(this->devs_[0])); + auto scoped_device = raft_utils::device_setter{devs_[0]}; + + auto& res = raft_utils::get_raft_resources(); + + auto index_ = raft::neighbors::experimental::cagra::deserialize(res, is); + is.sync(); + gpu_index_ = cagra_index(std::move(index_)); + + return Status::success; + } + + Status + DeserializeFromFile(const std::string& filename, const Config& config) override { + } + + std::unique_ptr + CreateConfig() const override { + return std::make_unique(); + } + + int64_t + Dim() const override { + return dim_; + } + + int64_t + Size() const override { + return 0; + } + + int64_t + Count() const override { + return counts_; + } + + std::string + Type() const override { + return knowhere::IndexEnum::INDEX_RAFT_CAGRA; + } + + private: + std::vector devs_; + int64_t dim_ = 0; + int64_t counts_ = 0; + std::optional gpu_index_; +}; + +} // namespace knowhere diff --git a/src/index/cagra/cagra_config.h b/src/index/cagra/cagra_config.h index 5c3d790c8..797a720c5 100644 --- a/src/index/cagra/cagra_config.h +++ b/src/index/cagra/cagra_config.h @@ -21,6 +21,7 @@ class CagraConfig : public BaseConfig { public: CFG_INT intermediate_graph_degree; CFG_INT graph_degree; + CFG_INT itopk_size; CFG_LIST gpu_ids; CFG_INT max_queries; KNOHWERE_DECLARE_CONFIG(CagraConfig) { @@ -34,6 +35,11 @@ class CagraConfig : public BaseConfig { .description("degree of output graph.") .for_search() .set_range(1, 65536); + KNOWHERE_CONFIG_DECLARE_FIELD(itopk_size) + .set_default(64) + .description("number of intermediate search results retained during the search.") + .for_search() + .set_range(1, 65536); KNOWHERE_CONFIG_DECLARE_FIELD(gpu_ids) .description("gpu device ids") .set_default({ diff --git a/src/index/ivf_raft/ivf_raft.cuh b/src/index/ivf_raft/ivf_raft.cuh index 729165f6e..192cfc151 100644 --- a/src/index/ivf_raft/ivf_raft.cuh +++ b/src/index/ivf_raft/ivf_raft.cuh @@ -23,7 +23,8 @@ #include #include #include -#include +#include +#include #include "common/raft/raft_utils.h" #include "common/raft_metric.h" @@ -344,7 +345,7 @@ class RaftIvfIndexNode : public IndexNode { raft::neighbors::ivf_pq::extend( res, raft::make_const_mdspan(data_gpu.view()), std::make_optional( - raft::make_device_matrix_view(indices.data(), rows, 1)), + raft::make_device_vector_view(indices.data(), rows)), gpu_index_.value()); } else { static_assert(std::is_same_v); diff --git a/tests/ut/test_gpu_search.cc b/tests/ut/test_gpu_search.cc index 9e7b68fcc..70bc18b43 100644 --- a/tests/ut/test_gpu_search.cc +++ b/tests/ut/test_gpu_search.cc @@ -57,6 +57,14 @@ TEST_CASE("Test All GPU Index", "[search]") { return json; }; + auto cagra_gen = [&base_gen]() { + knowhere::Json json = base_gen(); + json[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE] = 128; + json[knowhere::indexparam::GRAPH_DEGREE] = 64; + json[knowhere::indexparam::ITOPK_SIZE] = 128; + return json; + }; + SECTION("Test Gpu Index Search") { using std::make_tuple; auto [name, gen] = GENERATE_REF(table>({ @@ -68,6 +76,7 @@ TEST_CASE("Test All GPU Index", "[search]") { // make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_RAFT_IVFFLAT, ivfflat_gen), make_tuple(knowhere::IndexEnum::INDEX_RAFT_IVFPQ, ivfpq_gen), + make_tuple(knowhere::IndexEnum::INDEX_RAFT_CAGRA, cagra_gen), })); auto idx = knowhere::IndexFactory::Instance().Create(name); auto cfg_json = gen().dump(); @@ -139,6 +148,7 @@ TEST_CASE("Test All GPU Index", "[search]") { // make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_RAFT_IVFFLAT, ivfflat_gen), make_tuple(knowhere::IndexEnum::INDEX_RAFT_IVFPQ, ivfpq_gen), + make_tuple(knowhere::IndexEnum::INDEX_RAFT_CAGRA, cagra_gen), })); auto idx = knowhere::IndexFactory::Instance().Create(name); auto cfg_json = gen().dump(); @@ -171,6 +181,7 @@ TEST_CASE("Test All GPU Index", "[search]") { // make_tuple(knowhere::IndexEnum::INDEX_FAISS_GPU_IVFSQ8, ivfsq_gen), make_tuple(knowhere::IndexEnum::INDEX_RAFT_IVFFLAT, ivfflat_gen), make_tuple(knowhere::IndexEnum::INDEX_RAFT_IVFPQ, ivfpq_gen), + make_tuple(knowhere::IndexEnum::INDEX_RAFT_CAGRA, cagra_gen), })); auto idx = knowhere::IndexFactory::Instance().Create(name); From 763d2ae39cdd0cf7d806d7ec40fb873942e6e4d8 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 13 Jul 2023 02:41:58 +0200 Subject: [PATCH 02/10] Add benchmark for CAGRA Signed-off-by: Mickael Ide --- benchmark/hdf5/benchmark_float_qps.cpp | 70 ++++++++++++++++++++++++++ cmake/libs/libraft.cmake | 7 +-- src/index/cagra/cagra.cuh | 7 +++ src/index/cagra/cagra_config.h | 7 ++- 4 files changed, 85 insertions(+), 6 deletions(-) diff --git a/benchmark/hdf5/benchmark_float_qps.cpp b/benchmark/hdf5/benchmark_float_qps.cpp index dabc059cd..6bb6ac70b 100644 --- a/benchmark/hdf5/benchmark_float_qps.cpp +++ b/benchmark/hdf5/benchmark_float_qps.cpp @@ -73,6 +73,57 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { } } + void test_cagra(const knowhere::Json& cfg) { + auto conf = cfg; + auto igd = conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE].get(); + auto gd = conf[knowhere::indexparam::GRAPH_DEGREE].get(); + + + auto find_smallest_itopk = [&](float expected_recall) -> int32_t { + conf[knowhere::meta::TOPK] = topk_; + auto ds_ptr = knowhere::GenDataSet(nq_, dim_, xq_); + + int32_t left = topk_, right = 256, itopk; + float recall; + while (left <= right) { + itopk = left + (right - left) / 2; + conf[knowhere::indexparam::ITOPK_SIZE] = itopk; + + auto result = index_.Search(*ds_ptr, conf, nullptr); + recall = CalcRecall(result.value()->GetIds(), nq_, topk_); + printf("[%0.3f s] iterate CAGRA param for recall %.4f: gd=%d, igd=%d, itopk=%4d, k=%d, R@=%.4f\n", + get_time_diff(), expected_recall, gd, igd, itopk, topk_, recall); + std::fflush(stdout); + if (std::abs(recall - expected_recall) <= 0.0001) { + return itopk; + } + if (recall < expected_recall) { + left = itopk + 1; + } else { + right = itopk - 1; + } + } + return left; + }; + + for (auto expected_recall : EXPECTED_RECALLs_) { + auto itopk = find_smallest_itopk(expected_recall); + conf[knowhere::indexparam::ITOPK_SIZE] = itopk; + conf[knowhere::meta::TOPK] = topk_; + + printf("\n[%0.3f s] %s | %s | intermediate_graph_degree=%d, graph_degree=%d, k=%d, R@=%.4f\n", get_time_diff(), + ann_test_name_.c_str(), index_type_.c_str(), igd, gd, topk_, expected_recall); + printf("================================================================================\n"); + for (auto thread_num : THREAD_NUMs_) { + CALC_TIME_SPAN(task(conf, thread_num, nq_)); + printf(" thread_num = %2d, elapse = %6.3fs, VPS = %.3f\n", thread_num, t_diff, nq_ / t_diff); + std::fflush(stdout); + } + printf("================================================================================\n"); + printf("[%.3f s] Test '%s/%s' done\n\n", get_time_diff(), ann_test_name_.c_str(), index_type_.c_str()); + } + } + void test_hnsw(const knowhere::Json& cfg) { auto conf = cfg; @@ -191,6 +242,10 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { // HNSW index params const std::vector HNSW_Ms_ = {16}; const std::vector EFCONs_ = {100}; + + // CAGRA index params + const std::vector GRAPH_DEGREE_ = {16, 32, 64}; + const std::vector INTERMEDIATE_GRAPH_DEGREE_ = {16, 32, 64, 128}; }; TEST_F(Benchmark_float_qps, TEST_IVF_FLAT) { @@ -263,3 +318,18 @@ TEST_F(Benchmark_float_qps, TEST_HNSW) { } } } + +TEST_F(Benchmark_float_qps, TEST_CAGRA) { + index_type_ = knowhere::IndexEnum::INDEX_RAFT_CAGRA; + knowhere::Json conf = cfg_; + for (auto gd : GRAPH_DEGREE_) { + conf[knowhere::indexparam::GRAPH_DEGREE] = gd; + for (auto igd : INTERMEDIATE_GRAPH_DEGREE_) { + if (igd <= gd) continue; + conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE] = igd; + std::string index_file_name = get_index_name({igd, gd}); + create_index(index_file_name, conf); + test_cagra(conf); + } + } +} diff --git a/cmake/libs/libraft.cmake b/cmake/libs/libraft.cmake index 608e8eedd..a401c698b 100644 --- a/cmake/libs/libraft.cmake +++ b/cmake/libs/libraft.cmake @@ -15,11 +15,8 @@ add_definitions(-DKNOWHERE_WITH_RAFT) include(cmake/utils/fetch_rapids.cmake) -include(rapids-cmake) -include(rapids-cpm) -include(rapids-cuda) -include(rapids-export) -include(rapids-find) +include(rapids-cpm) # Dependency tracking +include(rapids-cuda) # Common CMake CUDA logic rapids_cpm_init() diff --git a/src/index/cagra/cagra.cuh b/src/index/cagra/cagra.cuh index 01c7a9148..9f2c7964b 100644 --- a/src/index/cagra/cagra.cuh +++ b/src/index/cagra/cagra.cuh @@ -57,6 +57,10 @@ class CagraIndexNode : public IndexNode { LOG_KNOWHERE_WARNING_ << "only support L2Expanded metric type"; return Status::invalid_metric_type; } + if (cagra_cfg.intermediate_graph_degree.value() < cagra_cfg.graph_degree.value()) { + LOG_KNOWHERE_WARNING_ << "Intermediate graph degree must be bigger than graph degree" << std::endl; + return Status::raft_inner_error; + } try { devs_.insert(devs_.begin(), cagra_cfg.gpu_ids.value().begin(), cagra_cfg.gpu_ids.value().end()); auto scoped_device = raft_utils::device_setter{*cagra_cfg.gpu_ids.value().begin()}; @@ -193,6 +197,7 @@ class CagraIndexNode : public IndexNode { is.read((char*)(&this->devs_[0]), sizeof(this->devs_[0])); auto scoped_device = raft_utils::device_setter{devs_[0]}; + raft_utils::init_gpu_resources(); auto& res = raft_utils::get_raft_resources(); auto index_ = raft::neighbors::experimental::cagra::deserialize(res, is); @@ -204,6 +209,8 @@ class CagraIndexNode : public IndexNode { Status DeserializeFromFile(const std::string& filename, const Config& config) override { + LOG_KNOWHERE_ERROR_ << "CAGRA doesn't support Deserialization from file."; + return Status::not_implemented; } std::unique_ptr diff --git a/src/index/cagra/cagra_config.h b/src/index/cagra/cagra_config.h index 797a720c5..9f0e90dc8 100644 --- a/src/index/cagra/cagra_config.h +++ b/src/index/cagra/cagra_config.h @@ -25,6 +25,11 @@ class CagraConfig : public BaseConfig { CFG_LIST gpu_ids; CFG_INT max_queries; KNOHWERE_DECLARE_CONFIG(CagraConfig) { + KNOWHERE_CONFIG_DECLARE_FIELD(k) + .set_default(10) + .description("search for top k similar vector.") + .set_range(1, 1024) + .for_search(); KNOWHERE_CONFIG_DECLARE_FIELD(intermediate_graph_degree) .set_default(128) .description("degree of input graph for pruning.") @@ -33,7 +38,7 @@ class CagraConfig : public BaseConfig { KNOWHERE_CONFIG_DECLARE_FIELD(graph_degree) .set_default(64) .description("degree of output graph.") - .for_search() + .for_train() .set_range(1, 65536); KNOWHERE_CONFIG_DECLARE_FIELD(itopk_size) .set_default(64) From 272be1be0d253d56d9f9125a52d8d4ea3d3a8df6 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 13 Jul 2023 02:50:56 +0200 Subject: [PATCH 03/10] Fix conditions Signed-off-by: Mickael Ide --- benchmark/hdf5/benchmark_float_qps.cpp | 2 +- src/index/cagra/cagra.cuh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/hdf5/benchmark_float_qps.cpp b/benchmark/hdf5/benchmark_float_qps.cpp index 6bb6ac70b..aafc73f0a 100644 --- a/benchmark/hdf5/benchmark_float_qps.cpp +++ b/benchmark/hdf5/benchmark_float_qps.cpp @@ -325,7 +325,7 @@ TEST_F(Benchmark_float_qps, TEST_CAGRA) { for (auto gd : GRAPH_DEGREE_) { conf[knowhere::indexparam::GRAPH_DEGREE] = gd; for (auto igd : INTERMEDIATE_GRAPH_DEGREE_) { - if (igd <= gd) continue; + if (igd < gd) continue; conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE] = igd; std::string index_file_name = get_index_name({igd, gd}); create_index(index_file_name, conf); diff --git a/src/index/cagra/cagra.cuh b/src/index/cagra/cagra.cuh index 9f2c7964b..4753bf323 100644 --- a/src/index/cagra/cagra.cuh +++ b/src/index/cagra/cagra.cuh @@ -99,8 +99,8 @@ class CagraIndexNode : public IndexNode { auto cagra_cfg = static_cast(cfg); auto rows = dataset.GetRows(); auto dim = dataset.GetDim(); - if (cagra_cfg.k.value() >= cagra_cfg.itopk_size.value()) { - LOG_KNOWHERE_WARNING_ << "topk should be smaller than itopk_size parameter" << std::endl; + if (cagra_cfg.k.value() > cagra_cfg.itopk_size.value()) { + LOG_KNOWHERE_WARNING_ << "topk must be smaller than itopk_size parameter" << std::endl; return Status::raft_inner_error; } auto* data = reinterpret_cast(dataset.GetTensor()); From 3961d4b2de8d6fd2d80037384c11db2b39bfe994 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 24 Jul 2023 17:49:31 +0200 Subject: [PATCH 04/10] Fix style Signed-off-by: Mickael Ide --- benchmark/hdf5/benchmark_float_qps.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/benchmark/hdf5/benchmark_float_qps.cpp b/benchmark/hdf5/benchmark_float_qps.cpp index aafc73f0a..a45361ea9 100644 --- a/benchmark/hdf5/benchmark_float_qps.cpp +++ b/benchmark/hdf5/benchmark_float_qps.cpp @@ -73,7 +73,8 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { } } - void test_cagra(const knowhere::Json& cfg) { + void + test_cagra(const knowhere::Json& cfg) { auto conf = cfg; auto igd = conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE].get(); auto gd = conf[knowhere::indexparam::GRAPH_DEGREE].get(); @@ -111,8 +112,8 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { conf[knowhere::indexparam::ITOPK_SIZE] = itopk; conf[knowhere::meta::TOPK] = topk_; - printf("\n[%0.3f s] %s | %s | intermediate_graph_degree=%d, graph_degree=%d, k=%d, R@=%.4f\n", get_time_diff(), - ann_test_name_.c_str(), index_type_.c_str(), igd, gd, topk_, expected_recall); + printf("\n[%0.3f s] %s | %s | intermediate_graph_degree=%d, graph_degree=%d, itopk=%d, k=%d, R@=%.4f\n", + get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), igd, gd, itopk, topk_, expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); @@ -325,7 +326,8 @@ TEST_F(Benchmark_float_qps, TEST_CAGRA) { for (auto gd : GRAPH_DEGREE_) { conf[knowhere::indexparam::GRAPH_DEGREE] = gd; for (auto igd : INTERMEDIATE_GRAPH_DEGREE_) { - if (igd < gd) continue; + if (igd < gd) + continue; conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE] = igd; std::string index_file_name = get_index_name({igd, gd}); create_index(index_file_name, conf); From 347c6a8dcfb6ed7671e595b65096ad056af5903c Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Mon, 24 Jul 2023 18:05:22 +0200 Subject: [PATCH 05/10] Fix style Signed-off-by: Mickael Ide --- benchmark/hdf5/benchmark_float_qps.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/hdf5/benchmark_float_qps.cpp b/benchmark/hdf5/benchmark_float_qps.cpp index a45361ea9..7fabc8ce0 100644 --- a/benchmark/hdf5/benchmark_float_qps.cpp +++ b/benchmark/hdf5/benchmark_float_qps.cpp @@ -79,7 +79,6 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { auto igd = conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE].get(); auto gd = conf[knowhere::indexparam::GRAPH_DEGREE].get(); - auto find_smallest_itopk = [&](float expected_recall) -> int32_t { conf[knowhere::meta::TOPK] = topk_; auto ds_ptr = knowhere::GenDataSet(nq_, dim_, xq_); @@ -113,7 +112,8 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { conf[knowhere::meta::TOPK] = topk_; printf("\n[%0.3f s] %s | %s | intermediate_graph_degree=%d, graph_degree=%d, itopk=%d, k=%d, R@=%.4f\n", - get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), igd, gd, itopk, topk_, expected_recall); + get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), igd, gd, itopk, topk_, + expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); From 37deb21cf24e6420fc589a76395bc1c0370875d9 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Sat, 5 Aug 2023 02:03:26 +0200 Subject: [PATCH 06/10] Add cagra parameters Signed-off-by: Mickael Ide --- src/index/cagra/cagra.cuh | 53 +++++++++++++++++++++++++++++++- src/index/cagra/cagra_config.h | 55 ++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/src/index/cagra/cagra.cuh b/src/index/cagra/cagra.cuh index 4753bf323..0db89a87e 100644 --- a/src/index/cagra/cagra.cuh +++ b/src/index/cagra/cagra.cuh @@ -9,6 +9,48 @@ namespace knowhere { +namespace cagra_search_algo { +auto static constexpr const SINGLE_CTA = "SINGLE_CTA"; +auto static constexpr const MULTI_CTA = "MULTI_CTA"; +auto static constexpr const MULTI_KERNEL = "MULTI_KERNEL"; +auto static constexpr const AUTO = "AUTO"; +} // namespace cagra_search_algo + +namespace cagra_hash_mode { +auto static constexpr const HASH = "HASH"; +auto static constexpr const SMALL = "SMALL"; +auto static constexpr const AUTO = "AUTO"; +} // namespace cagra_hash_mode + +inline expected +str_to_search_algo(std::string const& str) { + static const std::unordered_map name_map = { + {cagra_search_algo::SINGLE_CTA, raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA}, + {cagra_search_algo::MULTI_CTA, raft::neighbors::experimental::cagra::search_algo::MULTI_CTA}, + {cagra_search_algo::MULTI_KERNEL, raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL}, + {cagra_search_algo::AUTO, raft::neighbors::experimental::cagra::search_algo::AUTO}, + }; + + auto it = name_map.find(str); + if (it == name_map.end()) + return Status::invalid_args; + return it->second; +} + +inline expected +str_to_hashmap_mode(std::string const& str) { + static const std::unordered_map name_map = { + {cagra_hash_mode::SMALL, raft::neighbors::experimental::cagra::hash_mode::SMALL}, + {cagra_hash_mode::HASH, raft::neighbors::experimental::cagra::hash_mode::HASH}, + {cagra_hash_mode::AUTO, raft::neighbors::experimental::cagra::hash_mode::AUTO}, + }; + + auto it = name_map.find(str); + if (it == name_map.end()) + return Status::invalid_args; + return it->second; +} + namespace detail { struct device_setter { device_setter(int new_device) @@ -117,7 +159,16 @@ class CagraIndexNode : public IndexNode { auto search_params = raft::neighbors::experimental::cagra::search_params{}; search_params.max_queries = cagra_cfg.max_queries.value(); search_params.itopk_size = cagra_cfg.itopk_size.value(); - + search_params.team_size = cagra_cfg.team_size.value(); + search_params.algo = str_to_search_algo(cagra_cfg.algo.value()).value(); + search_params.num_parents = cagra_cfg.search_width.value(); + search_params.min_iterations = cagra_cfg.min_iterations.value(); + search_params.max_iterations = cagra_cfg.max_iterations.value(); + search_params.load_bit_length = cagra_cfg.load_bit_length.value(); + search_params.thread_block_size = cagra_cfg.thread_block_size.value(); + search_params.hashmap_mode = str_to_hashmap_mode(cagra_cfg.hashmap_mode.value()).value(); + search_params.hashmap_min_bitlen = cagra_cfg.hashmap_min_bitlen.value(); + search_params.hashmap_max_fill_rate = cagra_cfg.hashmap_max_fill_rate.value(); auto ids_dev = raft::make_device_matrix(res, rows, cagra_cfg.k.value()); auto dis_dev = raft::make_device_matrix(res, rows, cagra_cfg.k.value()); raft::neighbors::experimental::cagra::search(res, search_params, *gpu_index_, diff --git a/src/index/cagra/cagra_config.h b/src/index/cagra/cagra_config.h index 9f0e90dc8..793fe4408 100644 --- a/src/index/cagra/cagra_config.h +++ b/src/index/cagra/cagra_config.h @@ -24,6 +24,17 @@ class CagraConfig : public BaseConfig { CFG_INT itopk_size; CFG_LIST gpu_ids; CFG_INT max_queries; + CFG_STRING algo; + CFG_INT team_size; + CFG_INT search_width; + CFG_INT min_iterations; + CFG_INT max_iterations; + CFG_INT load_bit_length; + CFG_INT thread_block_size; + CFG_STRING hashmap_mode; + CFG_INT hashmap_min_bitlen; + CFG_FLOAT hashmap_max_fill_rate; + KNOHWERE_DECLARE_CONFIG(CagraConfig) { KNOWHERE_CONFIG_DECLARE_FIELD(k) .set_default(10) @@ -52,6 +63,50 @@ class CagraConfig : public BaseConfig { }) .for_train(); KNOWHERE_CONFIG_DECLARE_FIELD(max_queries).description("query batch size.").set_default(1).for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(algo) + .set_default("AUTO") + .description("Which search implementation to use.") + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(search_width) + .description("Number of graph nodes to select as the starting point in each search iteration.") + .set_default(1) + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(min_iterations) + .description("Lower limit of search iterations.") + .set_default(0) + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(max_iterations) + .description("Upper limit of search iterations. Auto select when 0.") + .set_default(0) + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(team_size) + .description("Number of threads used to calculate a single distance. 4, 8, 16, or 32.") + .set_default(0) + .set_range(0, 32) + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(load_bit_length) + .description("Bit length for reading the dataset vectors. 0, 64 or 128. Auto selection when 0.") + .set_default(0) + .set_range(0, 28) + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(thread_block_size) + .description("Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.") + .set_default(0) + .set_range(0, 1024) + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(hashmap_mode) + .description("Hashmap type. Auto selection when AUTO.") + .set_default("AUTO") + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(hashmap_min_bitlen) + .description("Lower limit of hashmap bit length. More than 8..") + .set_default(0) + .for_search(); + KNOWHERE_CONFIG_DECLARE_FIELD(hashmap_max_fill_rate) + .description("Upper limit of hashmap fill rate. More than 0.1, less than 0.9.") + .set_default(0.5) + .set_range(0.1, 0.9) + .for_search(); } }; From d4202805d7c3aaefb05b09f8db7f1d3e8766756e Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 10 Aug 2023 18:26:54 +0200 Subject: [PATCH 07/10] Add hyper parameters selection in benchmarks Signed-off-by: Mickael Ide --- benchmark/hdf5/benchmark_float_qps.cpp | 27 ++++++++++++---- include/knowhere/comp/index_param.h | 11 +++++++ src/common/raft/raft_utils.h | 3 +- src/index/cagra/cagra.cuh | 44 ++++++++++++-------------- src/index/cagra/cagra_config.h | 6 ---- 5 files changed, 55 insertions(+), 36 deletions(-) diff --git a/benchmark/hdf5/benchmark_float_qps.cpp b/benchmark/hdf5/benchmark_float_qps.cpp index 7fabc8ce0..a61ec7020 100644 --- a/benchmark/hdf5/benchmark_float_qps.cpp +++ b/benchmark/hdf5/benchmark_float_qps.cpp @@ -78,6 +78,8 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { auto conf = cfg; auto igd = conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE].get(); auto gd = conf[knowhere::indexparam::GRAPH_DEGREE].get(); + auto max_iterations = conf[knowhere::indexparam::MAX_ITERATIONS].get(); + auto search_width = conf[knowhere::indexparam::SEARCH_WIDTH].get(); auto find_smallest_itopk = [&](float expected_recall) -> int32_t { conf[knowhere::meta::TOPK] = topk_; @@ -91,8 +93,10 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { auto result = index_.Search(*ds_ptr, conf, nullptr); recall = CalcRecall(result.value()->GetIds(), nq_, topk_); - printf("[%0.3f s] iterate CAGRA param for recall %.4f: gd=%d, igd=%d, itopk=%4d, k=%d, R@=%.4f\n", - get_time_diff(), expected_recall, gd, igd, itopk, topk_, recall); + printf( + "[%0.3f s] iterate CAGRA param for recall %.4f: gd=%d, igd=%d, itopk=%d, k=%d, max_it=%d, sw=%d, " + "R@=%.4f\n", + get_time_diff(), expected_recall, gd, igd, itopk, topk_, max_iterations, search_width, recall); std::fflush(stdout); if (std::abs(recall - expected_recall) <= 0.0001) { return itopk; @@ -111,9 +115,11 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { conf[knowhere::indexparam::ITOPK_SIZE] = itopk; conf[knowhere::meta::TOPK] = topk_; - printf("\n[%0.3f s] %s | %s | intermediate_graph_degree=%d, graph_degree=%d, itopk=%d, k=%d, R@=%.4f\n", - get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), igd, gd, itopk, topk_, - expected_recall); + printf( + "\n[%0.3f s] %s | %s | intermediate_graph_degree=%d, graph_degree=%d, itopk=%d, k=%d, max_it=%d, " + "sw=%d, R@=%.4f\n", + get_time_diff(), ann_test_name_.c_str(), index_type_.c_str(), igd, gd, itopk, topk_, max_iterations, + search_width, expected_recall); printf("================================================================================\n"); for (auto thread_num : THREAD_NUMs_) { CALC_TIME_SPAN(task(conf, thread_num, nq_)); @@ -247,6 +253,9 @@ class Benchmark_float_qps : public Benchmark_knowhere, public ::testing::Test { // CAGRA index params const std::vector GRAPH_DEGREE_ = {16, 32, 64}; const std::vector INTERMEDIATE_GRAPH_DEGREE_ = {16, 32, 64, 128}; + // CAGRA search params + const std::vector CAGRA_SEARCH_WIDTH_ = {1, 2, 4}; + const std::vector CAGRA_MAX_ITERATIONS_ = {0, 16, 32, 64}; }; TEST_F(Benchmark_float_qps, TEST_IVF_FLAT) { @@ -331,7 +340,13 @@ TEST_F(Benchmark_float_qps, TEST_CAGRA) { conf[knowhere::indexparam::INTERMEDIATE_GRAPH_DEGREE] = igd; std::string index_file_name = get_index_name({igd, gd}); create_index(index_file_name, conf); - test_cagra(conf); + for (auto max_iterations : CAGRA_MAX_ITERATIONS_) { + for (auto search_width : CAGRA_SEARCH_WIDTH_) { + conf[knowhere::indexparam::MAX_ITERATIONS] = max_iterations; + conf[knowhere::indexparam::SEARCH_WIDTH] = search_width; + test_cagra(conf); + } + } } } } diff --git a/include/knowhere/comp/index_param.h b/include/knowhere/comp/index_param.h index 5ab457d05..b3de434bc 100644 --- a/include/knowhere/comp/index_param.h +++ b/include/knowhere/comp/index_param.h @@ -79,6 +79,17 @@ constexpr const char* REORDER_K = "reorder_k"; constexpr const char* INTERMEDIATE_GRAPH_DEGREE = "intermediate_graph_degree"; constexpr const char* GRAPH_DEGREE = "graph_degree"; constexpr const char* ITOPK_SIZE = "itopk_size"; +constexpr const char* MAX_QUERIES = "max_queries"; +constexpr const char* ALGO = "algo"; +constexpr const char* TEAM_SIZE = "team_size"; +constexpr const char* SEARCH_WIDTH = "search_width"; +constexpr const char* MIN_ITERATIONS = "min_iterations"; +constexpr const char* MAX_ITERATIONS = "max_iterations"; +constexpr const char* THREAD_BLOCK_SIZE = "thread_block_size"; +constexpr const char* HASHMAP_MODE = "hashmap_mode"; +constexpr const char* HASHMAP_MIN_BITLEN = "hashmap_min_bitlen"; +constexpr const char* HASHMAP_MAX_FILL_RATE = "hashmap_max_fill_rate"; + // HNSW Params constexpr const char* EFCONSTRUCTION = "efConstruction"; constexpr const char* HNSW_M = "M"; diff --git a/src/common/raft/raft_utils.h b/src/common/raft/raft_utils.h index 9e4ab9006..ec6c924c8 100644 --- a/src/common/raft/raft_utils.h +++ b/src/common/raft/raft_utils.h @@ -156,7 +156,8 @@ get_raft_resources(int device_id = get_current_device()) { if (iter == all_resources.end()) { auto scoped_device = device_setter{device_id}; all_resources[device_id] = std::make_unique( - get_gpu_resources().get_stream_view(), nullptr, rmm::mr::get_current_device_resource()); + get_gpu_resources().get_stream_view(), nullptr, + std::shared_ptr(rmm::mr::get_current_device_resource())); } return *all_resources[device_id]; } diff --git a/src/index/cagra/cagra.cuh b/src/index/cagra/cagra.cuh index 0db89a87e..862a97587 100644 --- a/src/index/cagra/cagra.cuh +++ b/src/index/cagra/cagra.cuh @@ -22,13 +22,13 @@ auto static constexpr const SMALL = "SMALL"; auto static constexpr const AUTO = "AUTO"; } // namespace cagra_hash_mode -inline expected +inline expected str_to_search_algo(std::string const& str) { - static const std::unordered_map name_map = { - {cagra_search_algo::SINGLE_CTA, raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA}, - {cagra_search_algo::MULTI_CTA, raft::neighbors::experimental::cagra::search_algo::MULTI_CTA}, - {cagra_search_algo::MULTI_KERNEL, raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL}, - {cagra_search_algo::AUTO, raft::neighbors::experimental::cagra::search_algo::AUTO}, + static const std::unordered_map name_map = { + {cagra_search_algo::SINGLE_CTA, raft::neighbors::cagra::search_algo::SINGLE_CTA}, + {cagra_search_algo::MULTI_CTA, raft::neighbors::cagra::search_algo::MULTI_CTA}, + {cagra_search_algo::MULTI_KERNEL, raft::neighbors::cagra::search_algo::MULTI_KERNEL}, + {cagra_search_algo::AUTO, raft::neighbors::cagra::search_algo::AUTO}, }; auto it = name_map.find(str); @@ -37,12 +37,12 @@ str_to_search_algo(std::string const& str) { return it->second; } -inline expected +inline expected str_to_hashmap_mode(std::string const& str) { - static const std::unordered_map name_map = { - {cagra_hash_mode::SMALL, raft::neighbors::experimental::cagra::hash_mode::SMALL}, - {cagra_hash_mode::HASH, raft::neighbors::experimental::cagra::hash_mode::HASH}, - {cagra_hash_mode::AUTO, raft::neighbors::experimental::cagra::hash_mode::AUTO}, + static const std::unordered_map name_map = { + {cagra_hash_mode::SMALL, raft::neighbors::cagra::hash_mode::SMALL}, + {cagra_hash_mode::HASH, raft::neighbors::cagra::hash_mode::HASH}, + {cagra_hash_mode::AUTO, raft::neighbors::cagra::hash_mode::AUTO}, }; auto it = name_map.find(str); @@ -73,7 +73,7 @@ struct device_setter { class CagraIndexNode : public IndexNode { using idx_type = std::int64_t; - using cagra_index = raft::neighbors::experimental::cagra::index; + using cagra_index = raft::neighbors::cagra::index; public: CagraIndexNode(const Object& object) : devs_{}, gpu_index_{} { @@ -108,7 +108,7 @@ class CagraIndexNode : public IndexNode { auto scoped_device = raft_utils::device_setter{*cagra_cfg.gpu_ids.value().begin()}; raft_utils::init_gpu_resources(); - auto build_params = raft::neighbors::experimental::cagra::index_params{}; + auto build_params = raft::neighbors::cagra::index_params{}; build_params.intermediate_graph_degree = cagra_cfg.intermediate_graph_degree.value(); build_params.graph_degree = cagra_cfg.graph_degree.value(); build_params.metric = metric.value(); @@ -119,8 +119,8 @@ class CagraIndexNode : public IndexNode { auto data_gpu = raft::make_device_matrix(res, rows, dim); RAFT_CUDA_TRY(cudaMemcpyAsync(data_gpu.data_handle(), data, data_gpu.size() * sizeof(float), cudaMemcpyDefault, res.get_stream().value())); - gpu_index_ = raft::neighbors::experimental::cagra::build( - res, build_params, raft::make_const_mdspan(data_gpu.view())); + gpu_index_ = raft::neighbors::cagra::build(res, build_params, + raft::make_const_mdspan(data_gpu.view())); this->dim_ = dim; this->counts_ = rows; res.sync_stream(); @@ -156,24 +156,22 @@ class CagraIndexNode : public IndexNode { auto data_gpu = raft::make_device_matrix(res, rows, dim); raft::copy(data_gpu.data_handle(), data, data_gpu.size(), res.get_stream()); - auto search_params = raft::neighbors::experimental::cagra::search_params{}; + auto search_params = raft::neighbors::cagra::search_params{}; search_params.max_queries = cagra_cfg.max_queries.value(); search_params.itopk_size = cagra_cfg.itopk_size.value(); search_params.team_size = cagra_cfg.team_size.value(); search_params.algo = str_to_search_algo(cagra_cfg.algo.value()).value(); - search_params.num_parents = cagra_cfg.search_width.value(); + search_params.search_width = cagra_cfg.search_width.value(); search_params.min_iterations = cagra_cfg.min_iterations.value(); search_params.max_iterations = cagra_cfg.max_iterations.value(); - search_params.load_bit_length = cagra_cfg.load_bit_length.value(); search_params.thread_block_size = cagra_cfg.thread_block_size.value(); search_params.hashmap_mode = str_to_hashmap_mode(cagra_cfg.hashmap_mode.value()).value(); search_params.hashmap_min_bitlen = cagra_cfg.hashmap_min_bitlen.value(); search_params.hashmap_max_fill_rate = cagra_cfg.hashmap_max_fill_rate.value(); auto ids_dev = raft::make_device_matrix(res, rows, cagra_cfg.k.value()); auto dis_dev = raft::make_device_matrix(res, rows, cagra_cfg.k.value()); - raft::neighbors::experimental::cagra::search(res, search_params, *gpu_index_, - raft::make_const_mdspan(data_gpu.view()), ids_dev.view(), - dis_dev.view()); + raft::neighbors::cagra::search(res, search_params, *gpu_index_, raft::make_const_mdspan(data_gpu.view()), + ids_dev.view(), dis_dev.view()); raft::copy(ids.get(), ids_dev.data_handle(), output_size, res.get_stream()); raft::copy(dis.get(), dis_dev.data_handle(), output_size, res.get_stream()); @@ -221,7 +219,7 @@ class CagraIndexNode : public IndexNode { auto scoped_device = raft_utils::device_setter{devs_[0]}; auto& res = raft_utils::get_raft_resources(); - raft::neighbors::experimental::cagra::serialize(res, os, *gpu_index_); + raft::neighbors::cagra::serialize(res, os, *gpu_index_); os.flush(); std::shared_ptr index_binary(new (std::nothrow) uint8_t[buf.str().size()]); @@ -251,7 +249,7 @@ class CagraIndexNode : public IndexNode { raft_utils::init_gpu_resources(); auto& res = raft_utils::get_raft_resources(); - auto index_ = raft::neighbors::experimental::cagra::deserialize(res, is); + auto index_ = raft::neighbors::cagra::deserialize(res, is); is.sync(); gpu_index_ = cagra_index(std::move(index_)); diff --git a/src/index/cagra/cagra_config.h b/src/index/cagra/cagra_config.h index 793fe4408..e05992628 100644 --- a/src/index/cagra/cagra_config.h +++ b/src/index/cagra/cagra_config.h @@ -29,7 +29,6 @@ class CagraConfig : public BaseConfig { CFG_INT search_width; CFG_INT min_iterations; CFG_INT max_iterations; - CFG_INT load_bit_length; CFG_INT thread_block_size; CFG_STRING hashmap_mode; CFG_INT hashmap_min_bitlen; @@ -84,11 +83,6 @@ class CagraConfig : public BaseConfig { .set_default(0) .set_range(0, 32) .for_search(); - KNOWHERE_CONFIG_DECLARE_FIELD(load_bit_length) - .description("Bit length for reading the dataset vectors. 0, 64 or 128. Auto selection when 0.") - .set_default(0) - .set_range(0, 28) - .for_search(); KNOWHERE_CONFIG_DECLARE_FIELD(thread_block_size) .description("Thread block size. 0, 64, 128, 256, 512, 1024. Auto selection when 0.") .set_default(0) From a3f1d69f6bc881125bc5f05a5cc9938ca2a8f556 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Sat, 12 Aug 2023 02:35:10 +0200 Subject: [PATCH 08/10] Use raft 23.08 Signed-off-by: Mickael Ide --- cmake/libs/libraft.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/libs/libraft.cmake b/cmake/libs/libraft.cmake index a401c698b..cf2357936 100644 --- a/cmake/libs/libraft.cmake +++ b/cmake/libs/libraft.cmake @@ -23,7 +23,7 @@ rapids_cpm_init() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") -set(RAPIDS_VERSION 23.06) +set(RAPIDS_VERSION 23.08) set(RAFT_VERSION "${RAPIDS_VERSION}") set(RAFT_FORK "rapidsai") set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}") From 7d98bd060fa3d38d5d10e3015cd2620bd4ca4223 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Sat, 12 Aug 2023 02:44:39 +0200 Subject: [PATCH 09/10] Add fetch rapids to libraft.cmake Signed-off-by: Mickael Ide --- cmake/libs/libraft.cmake | 20 ++++++++++++++------ cmake/utils/fetch_rapids.cmake | 24 ------------------------ 2 files changed, 14 insertions(+), 30 deletions(-) delete mode 100644 cmake/utils/fetch_rapids.cmake diff --git a/cmake/libs/libraft.cmake b/cmake/libs/libraft.cmake index cf2357936..04179b0f1 100644 --- a/cmake/libs/libraft.cmake +++ b/cmake/libs/libraft.cmake @@ -13,8 +13,21 @@ # License for the specific language governing permissions and limitations under # the License. +set(RAPIDS_VERSION 23.08) +set(RAFT_VERSION "${RAPIDS_VERSION}") +set(RAFT_FORK "rapidsai") +set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}") + add_definitions(-DKNOWHERE_WITH_RAFT) -include(cmake/utils/fetch_rapids.cmake) + +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) + file( + DOWNLOAD + https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake + ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) +endif() +include(${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) + include(rapids-cpm) # Dependency tracking include(rapids-cuda) # Common CMake CUDA logic @@ -23,11 +36,6 @@ rapids_cpm_init() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") -set(RAPIDS_VERSION 23.08) -set(RAFT_VERSION "${RAPIDS_VERSION}") -set(RAFT_FORK "rapidsai") -set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}") - function(find_and_configure_raft) set(oneValueArgs VERSION FORK PINNED_TAG) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" diff --git a/cmake/utils/fetch_rapids.cmake b/cmake/utils/fetch_rapids.cmake deleted file mode 100644 index 84691b55a..000000000 --- a/cmake/utils/fetch_rapids.cmake +++ /dev/null @@ -1,24 +0,0 @@ -# ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not -# use this file except in compliance with the License. You may obtain a copy of -# the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -set(RAPIDS_VERSION "23.06") - -if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) - file( - DOWNLOAD - https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake - ${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) -endif() -include(${CMAKE_CURRENT_BINARY_DIR}/RAPIDS.cmake) From 34cb6fb6396c4d2a09f23d6af96e5d4fb2b97c2a Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Sat, 12 Aug 2023 03:27:18 +0200 Subject: [PATCH 10/10] Apply Threadpool changes Signed-off-by: Mickael Ide --- src/index/cagra/cagra.cu | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/index/cagra/cagra.cu b/src/index/cagra/cagra.cu index f09d1ea5d..bf81e9ea1 100644 --- a/src/index/cagra/cagra.cu +++ b/src/index/cagra/cagra.cu @@ -22,13 +22,7 @@ static constexpr uint32_t cuda_concurrent_size = 16; namespace knowhere { -static std::shared_ptr -GlobalThreadPoolRaft() { - static std::shared_ptr pool = std::make_shared(cuda_concurrent_size); - return pool; -} - KNOWHERE_REGISTER_GLOBAL(GPU_RAFT_CAGRA, [](const Object& object) { - return Index::Create(std::make_unique(object), GlobalThreadPoolRaft()); + return Index::Create(std::make_unique(object), cuda_concurrent_size); }); } // namespace knowhere