Knowhere GPU support (#191)

milvus-io · May 26, 2022 · 477cff5 · 477cff5
1 parent 837ca20
commit 477cff5
Show file tree

Hide file tree

Showing 33 changed files with 316 additions and 161 deletions.
diff --git a/knowhere/CMakeLists.txt b/knowhere/CMakeLists.txt
@@ -13,6 +13,7 @@
 
 include_directories(${KNOWHERE_SOURCE_DIR}/knowhere)
 include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty)
+include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty/faiss)
 include_directories(${KNOWHERE_SOURCE_DIR}/knowhere/utils)
 
 set(KNOWHERE_THIRDPARTY_SRC ${KNOWHERE_SOURCE_DIR}/thirdparty)

diff --git a/knowhere/common/BlockingQueue.h b/knowhere/common/BlockingQueue.h
@@ -0,0 +1,95 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#pragma once
+
+#include <assert.h>
+#include <condition_variable>
+#include <iostream>
+#include <queue>
+#include <vector>
+
+namespace knowhere {
+
+template <typename T>
+class BlockingQueue {
+ public:
+    BlockingQueue() : mtx(), full_(), empty_() {
+    }
+
+    virtual ~BlockingQueue() {
+    }
+
+    BlockingQueue(const BlockingQueue& rhs) = delete;
+
+    BlockingQueue&
+    operator=(const BlockingQueue& rhs) = delete;
+
+    void
+    Put(const T& task) {
+        std::unique_lock<std::mutex> lock(mtx);
+        full_.wait(lock, [this] { return (queue_.size() < capacity_); });
+        queue_.push(task);
+        empty_.notify_all();
+    }
+
+    T
+    Take() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T front(queue_.front());
+        queue_.pop();
+        full_.notify_all();
+        return front;
+    }
+
+    T
+    Front() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T front(queue_.front());
+        return front;
+    }
+
+    T
+    Back() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T back(queue_.back());
+        return back;
+    }
+
+    size_t
+    Size() const {
+        std::lock_guard<std::mutex> lock(mtx);
+        return queue_.size();
+    }
+
+    bool
+    Empty() const {
+        std::unique_lock<std::mutex> lock(mtx);
+        return queue_.empty();
+    }
+
+    void
+    SetCapacity(const size_t capacity) {
+        capacity_ = (capacity > 0 ? capacity : capacity_);
+    }
+
+ protected:
+    mutable std::mutex mtx;
+    std::condition_variable full_;
+    std::condition_variable empty_;
+    std::queue<T> queue_;
+    size_t capacity_ = 32;
+};
+
+}  // namespace knowhere
diff --git a/knowhere/index/VecIndexFactory.cpp b/knowhere/index/VecIndexFactory.cpp
@@ -100,7 +100,7 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
         }
 #ifdef KNOWHERE_GPU_VERSION
         case IndexMode::MODE_GPU: {
-            auto gpu_device = -1;  // TODO: remove hardcode here, get from invoker
+            auto gpu_device = 0;  // TODO: remove hardcode here, get from invoker
             if (type == IndexEnum::INDEX_FAISS_BIN_IDMAP) {
                 return std::make_shared<knowhere::BinaryIDMAP>();
             } else if (type == IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
@@ -116,7 +116,7 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
             } else if (type == IndexEnum::INDEX_FAISS_IVFSQ8H) {
                 return std::make_shared<knowhere::IVFSQHybrid>(gpu_device);
             } else {
-                KNOWHERE_THROW_FORMAT("Invalid index type %s", type.c_str());
+                KNOWHERE_THROW_FORMAT("Invalid index type %s", std::string(type).c_str());
             }
         }
 #endif

diff --git a/knowhere/index/vector_index/gpu/GPUIndex.h b/knowhere/index/vector_index/gpu/GPUIndex.h
@@ -11,7 +11,7 @@
 
 #pragma once
 
-#include "knowhere/index/vector_index/VecIndex.h"
+#include "knowhere/index/VecIndex.h"
 #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
 
 namespace knowhere {

diff --git a/knowhere/index/vector_index/gpu/IndexGPUIDMAP.h b/knowhere/index/vector_index/gpu/IndexGPUIDMAP.h
@@ -24,6 +24,10 @@ using Graph = std::vector<std::vector<int64_t>>;
 
 class GPUIDMAP : public IDMAP, public GPUIndex {
  public:
+    explicit GPUIDMAP(const int& device_id) : IDMAP(), GPUIndex(device_id) {
+        index_mode_ = IndexMode::MODE_GPU;
+    }
+
     explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t device_id, ResPtr& res)
         : IDMAP(std::move(index)), GPUIndex(device_id, res) {
         index_mode_ = IndexMode::MODE_GPU;

diff --git a/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp b/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp
@@ -37,7 +37,7 @@ GPUIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFFlatConfig idx_config;
         idx_config.device = static_cast<int32_t>(gpu_id_);
-        int32_t nlist = config[IndexParams::nlist];
+        int32_t nlist = GetIndexParamNlist(config);
         faiss::MetricType metric_type = GetMetricType(config);
         index_ = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(gpu_res->faiss_res.get(), dim, nlist, metric_type,
                                                                idx_config);
@@ -151,7 +151,7 @@ GPUIVF::QueryImpl(int64_t n,
                   const faiss::BitsetView bitset) {
     auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
     if (device_index) {
-        device_index->nprobe = std::min(static_cast<int>(config[IndexParams::nprobe]), device_index->nlist);
+        device_index->nprobe = std::min(static_cast<int>(GetIndexParamNprobe(config)), device_index->nlist);
         ResScope rs(res_, gpu_id_);
 
         // if query size > 2048 we search by blocks to avoid malloc issue

diff --git a/knowhere/index/vector_index/gpu/IndexGPUIVFPQ.cpp b/knowhere/index/vector_index/gpu/IndexGPUIVFPQ.cpp
@@ -33,13 +33,13 @@ GPUIVFPQ::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFPQConfig idx_config;
         idx_config.device = static_cast<int32_t>(gpu_id_);
-        int32_t nlist = config[IndexParams::nlist];
-        int32_t m = config[IndexParams::m];
-        int32_t nbits = config[IndexParams::nbits];
+        int32_t nlist = GetIndexParamNlist(config);
+        int32_t m = GetIndexParamM(config);
+        int32_t nbits = GetIndexParamNbits(config);
         faiss::MetricType metric_type = GetMetricType(config);
         index_ = std::make_shared<faiss::gpu::GpuIndexIVFPQ>(gpu_res->faiss_res.get(), dim, nlist, m, nbits,
                                                              metric_type, idx_config);
-        device_index->train(rows, reinterpret_cast<const float*>(p_data));
+        index_->train(rows, reinterpret_cast<const float*>(p_data));
         res_ = gpu_res;
     } else {
         KNOWHERE_THROW_MSG("Build IVFPQ can't get gpu resource");
@@ -59,7 +59,7 @@ GPUIVFPQ::CopyGpuToCpu(const Config& config) {
 std::shared_ptr<faiss::IVFSearchParameters>
 GPUIVFPQ::GenParams(const Config& config) {
     auto params = std::make_shared<faiss::IVFPQSearchParameters>();
-    params->nprobe = config[IndexParams::nprobe];
+    params->nprobe = GetIndexParamNprobe(config);
     // params->scan_table_threshold = config["scan_table_threhold"]
     // params->polysemous_ht = config["polysemous_ht"]
     // params->max_codes = config["max_codes"]

diff --git a/knowhere/index/vector_index/gpu/IndexGPUIVFSQ.cpp b/knowhere/index/vector_index/gpu/IndexGPUIVFSQ.cpp
@@ -35,7 +35,7 @@ GPUIVFSQ::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFScalarQuantizerConfig idx_config;
         idx_config.device = static_cast<int32_t>(gpu_id_);
-        int32_t nlist = config[IndexParams::nlist];
+        int32_t nlist = GetIndexParamNlist(config);
         faiss::MetricType metric_type = GetMetricType(config);
         index_ = std::make_shared<faiss::gpu::GpuIndexIVFScalarQuantizer>(
             gpu_res->faiss_res.get(), dim, nlist, faiss::QuantizerType::QT_8bit, metric_type, true, idx_config);

diff --git a/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp b/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp
@@ -9,13 +9,13 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 // or implied. See the License for the specific language governing permissions and limitations under the License
 
-#include <faiss/IndexSQHybrid.h>
-#include <faiss/gpu/GpuCloner.h>
-#include <faiss/gpu/GpuIndexIVF.h>
-#include <faiss/index_factory.h>
 #include <string>
 #include <utility>
 
+#include <faiss/IndexSQHybrid.h>
+#include <faiss/gpu/GpuCloner.h>
+#include <faiss/gpu/GpuIndexIVFSQHybrid.h>
+
 #include "knowhere/common/Exception.h"
 #include "knowhere/index/vector_index/adapter/VectorAdapter.h"
 #include "knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h"
@@ -24,7 +24,6 @@
 
 namespace knowhere {
 
-#ifdef KNOWHERE_GPU_VERSION
 
 void
 IVFSQHybrid::Train(const DatasetPtr& dataset_ptr, const Config& config) {
@@ -36,7 +35,7 @@ IVFSQHybrid::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFSQHybridConfig idx_config;
         idx_config.device = static_cast<int32_t>(gpu_id_);
-        int32_t nlist = config[IndexParams::nlist];
+        int32_t nlist = GetIndexParamNlist(config);
         faiss::MetricType metric_type = GetMetricType(config);
         index_ = std::make_shared<faiss::gpu::GpuIndexIVFSQHybrid>(
             gpu_res->faiss_res.get(), dim, nlist, faiss::QuantizerType::QT_8bit, metric_type, true, idx_config);
@@ -275,6 +274,5 @@ FaissIVFQuantizer::~FaissIVFQuantizer() {
     }
 }
 
-#endif
 
 }  // namespace knowhere
diff --git a/knowhere/index/vector_index/helpers/Cloner.h b/knowhere/index/vector_index/helpers/Cloner.h
@@ -11,7 +11,7 @@
 
 #pragma once
 
-#include "knowhere/index/vector_index/VecIndex.h"
+#include "knowhere/index/VecIndex.h"
 
 namespace knowhere::cloner {
 

diff --git a/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h b/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h
@@ -17,8 +17,7 @@
 #include <utility>
 
 #include <faiss/gpu/StandardGpuResources.h>
-
-#include "utils/BlockingQueue.h"
+#include "knowhere/common/BlockingQueue.h"
 
 namespace knowhere {
 

diff --git a/knowhere/index/vector_offset_index/IndexIVF_NM.cpp b/knowhere/index/vector_offset_index/IndexIVF_NM.cpp
@@ -65,6 +65,7 @@ IVF_NM::Load(const BinarySet& binary_set) {
     auto invlists = ivf_index->invlists;
     auto d = ivf_index->d;
     prefix_sum_ = std::shared_ptr<size_t[]>(new size_t[invlists->nlist]);
+    size_t curr_index = 0;
 
 #if 0
     if (STATISTICS_LEVEL >= 3) {
@@ -76,7 +77,6 @@ IVF_NM::Load(const BinarySet& binary_set) {
     auto ails = dynamic_cast<faiss::ArrayInvertedLists*>(invlists);
     size_t nb = binary->size / invlists->code_size;
     auto arranged_data = new float[d * nb];
-    size_t curr_index = 0;
     for (size_t i = 0; i < invlists->nlist; i++) {
         auto list_size = ails->ids[i].size();
         for (size_t j = 0; j < list_size; j++) {
@@ -353,7 +353,11 @@ IVF_NM::QueryByRangeImpl(int64_t n,
         radius *= radius;
     }
 
+#ifndef KNOWHERE_GPU_VERSION
     auto arranged_data = data_.get();
+#else
+    auto arranged_data = static_cast<uint8_t*>(ro_codes_->data);
+#endif
 
     faiss::RangeSearchResult res(n);
     ivf_index->range_search_without_codes(n, xq, arranged_data, prefix_sum_.get(), radius, &res, bitset);

diff --git a/knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.cpp b/knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.cpp
@@ -37,7 +37,7 @@ GPUIVF_NM::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFFlatConfig idx_config;
         idx_config.device = gpu_id_;
-        int32_t nlist = config[IndexParams::nlist];
+        int32_t nlist = GetIndexParamNlist(config);
         faiss::MetricType metric_type = GetMetricType(config);
         auto device_index =
             new faiss::gpu::GpuIndexIVFFlat(gpu_res->faiss_res.get(), dim, nlist, metric_type, idx_config);
@@ -61,11 +61,6 @@ GPUIVF_NM::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
     }
 }
 
-void
-GPUIVF_NM::Load(const BinarySet& binary_set) {
-    // not supported
-}
-
 VecIndexPtr
 GPUIVF_NM::CopyGpuToCpu(const Config& config) {
     auto device_idx = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
@@ -125,6 +120,29 @@ GPUIVF_NM::SerializeImpl(const IndexType& type) {
     }
 }
 
+void
+GPUIVF_NM::LoadImpl(const BinarySet& binary_set, const IndexType& type) {
+    auto binary = binary_set.GetByName("IVF");
+    MemoryIOReader reader;
+    {
+        reader.total = binary->size;
+        reader.data_ = binary->data.get();
+
+        faiss::Index* index = faiss::read_index(&reader);
+
+        if (auto temp_res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
+            ResScope rs(temp_res, gpu_id_, false);
+            auto device_index = faiss::gpu::index_cpu_to_gpu(temp_res->faiss_res.get(), gpu_id_, index);
+            index_.reset(device_index);
+            res_ = temp_res;
+        } else {
+            KNOWHERE_THROW_MSG("Load error, can't get gpu resource");
+        }
+
+        delete index;
+    }
+}
+
 void
 GPUIVF_NM::QueryImpl(int64_t n,
                      const float* data,
@@ -135,7 +153,7 @@ GPUIVF_NM::QueryImpl(int64_t n,
                      const faiss::BitsetView bitset) {
     auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
     if (device_index) {
-        device_index->nprobe = config[IndexParams::nprobe];
+        device_index->nprobe = GetIndexParamNprobe(config);
         ResScope rs(res_, gpu_id_);
 
         // if query size > 2048 we search by blocks to avoid malloc issue

diff --git a/python/knowhere/__init__.py b/python/knowhere/__init__.py
@@ -10,4 +10,24 @@ def CreateIndex(index_name):
         return IVFSQ()
     if index_name == "hnsw":
         return IndexHNSW()
-    raise ValueError("index name only support 'annoy' 'ivf' 'ivfsq' 'hnsw'.")
+    if index_name == "gpu_ivf":
+        return GPUIVF(-1)
+    if index_name == "gpu_ivfpq":
+        return GPUIVFPQ(-1)
+    if index_name == "gpu_ivfsq":
+        return GPUIVFSQ(-1)
+    raise ValueError(
+        """ index name only support 
+            'annoy' 'ivf' 'ivfsq' 'hnsw'
+            'gpu_ivf', 'gpu_ivfsq', 'gpu_ivfpq'."""
+    )
+
+
+class GpuContext:
+    def __init__(
+        self, dev_id=0, pin_mem=200 * 1024 * 1024, temp_mem=300 * 1024 * 1024, res_num=2
+    ):
+        InitGpuResource(dev_id, pin_mem, temp_mem, res_num)
+
+    def __del__(self):
+        ReleaseGpuResource()