Skip to content
This repository has been archived by the owner on Aug 16, 2023. It is now read-only.

Commit

Permalink
Knowhere GPU support (#191)
Browse files Browse the repository at this point in the history
  • Loading branch information
Presburger authored May 26, 2022
1 parent 837ca20 commit 477cff5
Show file tree
Hide file tree
Showing 33 changed files with 316 additions and 161 deletions.
1 change: 1 addition & 0 deletions knowhere/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

include_directories(${KNOWHERE_SOURCE_DIR}/knowhere)
include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty)
include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty/faiss)
include_directories(${KNOWHERE_SOURCE_DIR}/knowhere/utils)

set(KNOWHERE_THIRDPARTY_SRC ${KNOWHERE_SOURCE_DIR}/thirdparty)
Expand Down
95 changes: 95 additions & 0 deletions knowhere/common/BlockingQueue.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.

#pragma once

#include <assert.h>
#include <condition_variable>
#include <iostream>
#include <queue>
#include <vector>

namespace knowhere {

template <typename T>
class BlockingQueue {
public:
BlockingQueue() : mtx(), full_(), empty_() {
}

virtual ~BlockingQueue() {
}

BlockingQueue(const BlockingQueue& rhs) = delete;

BlockingQueue&
operator=(const BlockingQueue& rhs) = delete;

void
Put(const T& task) {
std::unique_lock<std::mutex> lock(mtx);
full_.wait(lock, [this] { return (queue_.size() < capacity_); });
queue_.push(task);
empty_.notify_all();
}

T
Take() {
std::unique_lock<std::mutex> lock(mtx);
empty_.wait(lock, [this] { return !queue_.empty(); });
T front(queue_.front());
queue_.pop();
full_.notify_all();
return front;
}

T
Front() {
std::unique_lock<std::mutex> lock(mtx);
empty_.wait(lock, [this] { return !queue_.empty(); });
T front(queue_.front());
return front;
}

T
Back() {
std::unique_lock<std::mutex> lock(mtx);
empty_.wait(lock, [this] { return !queue_.empty(); });
T back(queue_.back());
return back;
}

size_t
Size() const {
std::lock_guard<std::mutex> lock(mtx);
return queue_.size();
}

bool
Empty() const {
std::unique_lock<std::mutex> lock(mtx);
return queue_.empty();
}

void
SetCapacity(const size_t capacity) {
capacity_ = (capacity > 0 ? capacity : capacity_);
}

protected:
mutable std::mutex mtx;
std::condition_variable full_;
std::condition_variable empty_;
std::queue<T> queue_;
size_t capacity_ = 32;
};

} // namespace knowhere
4 changes: 2 additions & 2 deletions knowhere/index/VecIndexFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
}
#ifdef KNOWHERE_GPU_VERSION
case IndexMode::MODE_GPU: {
auto gpu_device = -1; // TODO: remove hardcode here, get from invoker
auto gpu_device = 0; // TODO: remove hardcode here, get from invoker
if (type == IndexEnum::INDEX_FAISS_BIN_IDMAP) {
return std::make_shared<knowhere::BinaryIDMAP>();
} else if (type == IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
Expand All @@ -116,7 +116,7 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
} else if (type == IndexEnum::INDEX_FAISS_IVFSQ8H) {
return std::make_shared<knowhere::IVFSQHybrid>(gpu_device);
} else {
KNOWHERE_THROW_FORMAT("Invalid index type %s", type.c_str());
KNOWHERE_THROW_FORMAT("Invalid index type %s", std::string(type).c_str());
}
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion knowhere/index/vector_index/gpu/GPUIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#pragma once

#include "knowhere/index/vector_index/VecIndex.h"
#include "knowhere/index/VecIndex.h"
#include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"

namespace knowhere {
Expand Down
4 changes: 4 additions & 0 deletions knowhere/index/vector_index/gpu/IndexGPUIDMAP.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ using Graph = std::vector<std::vector<int64_t>>;

class GPUIDMAP : public IDMAP, public GPUIndex {
public:
explicit GPUIDMAP(const int& device_id) : IDMAP(), GPUIndex(device_id) {
index_mode_ = IndexMode::MODE_GPU;
}

explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t device_id, ResPtr& res)
: IDMAP(std::move(index)), GPUIndex(device_id, res) {
index_mode_ = IndexMode::MODE_GPU;
Expand Down
4 changes: 2 additions & 2 deletions knowhere/index/vector_index/gpu/IndexGPUIVF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ GPUIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
ResScope rs(gpu_res, gpu_id_, true);
faiss::gpu::GpuIndexIVFFlatConfig idx_config;
idx_config.device = static_cast<int32_t>(gpu_id_);
int32_t nlist = config[IndexParams::nlist];
int32_t nlist = GetIndexParamNlist(config);
faiss::MetricType metric_type = GetMetricType(config);
index_ = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(gpu_res->faiss_res.get(), dim, nlist, metric_type,
idx_config);
Expand Down Expand Up @@ -151,7 +151,7 @@ GPUIVF::QueryImpl(int64_t n,
const faiss::BitsetView bitset) {
auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
if (device_index) {
device_index->nprobe = std::min(static_cast<int>(config[IndexParams::nprobe]), device_index->nlist);
device_index->nprobe = std::min(static_cast<int>(GetIndexParamNprobe(config)), device_index->nlist);
ResScope rs(res_, gpu_id_);

// if query size > 2048 we search by blocks to avoid malloc issue
Expand Down
10 changes: 5 additions & 5 deletions knowhere/index/vector_index/gpu/IndexGPUIVFPQ.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ GPUIVFPQ::Train(const DatasetPtr& dataset_ptr, const Config& config) {
ResScope rs(gpu_res, gpu_id_, true);
faiss::gpu::GpuIndexIVFPQConfig idx_config;
idx_config.device = static_cast<int32_t>(gpu_id_);
int32_t nlist = config[IndexParams::nlist];
int32_t m = config[IndexParams::m];
int32_t nbits = config[IndexParams::nbits];
int32_t nlist = GetIndexParamNlist(config);
int32_t m = GetIndexParamM(config);
int32_t nbits = GetIndexParamNbits(config);
faiss::MetricType metric_type = GetMetricType(config);
index_ = std::make_shared<faiss::gpu::GpuIndexIVFPQ>(gpu_res->faiss_res.get(), dim, nlist, m, nbits,
metric_type, idx_config);
device_index->train(rows, reinterpret_cast<const float*>(p_data));
index_->train(rows, reinterpret_cast<const float*>(p_data));
res_ = gpu_res;
} else {
KNOWHERE_THROW_MSG("Build IVFPQ can't get gpu resource");
Expand All @@ -59,7 +59,7 @@ GPUIVFPQ::CopyGpuToCpu(const Config& config) {
std::shared_ptr<faiss::IVFSearchParameters>
GPUIVFPQ::GenParams(const Config& config) {
auto params = std::make_shared<faiss::IVFPQSearchParameters>();
params->nprobe = config[IndexParams::nprobe];
params->nprobe = GetIndexParamNprobe(config);
// params->scan_table_threshold = config["scan_table_threhold"]
// params->polysemous_ht = config["polysemous_ht"]
// params->max_codes = config["max_codes"]
Expand Down
2 changes: 1 addition & 1 deletion knowhere/index/vector_index/gpu/IndexGPUIVFSQ.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ GPUIVFSQ::Train(const DatasetPtr& dataset_ptr, const Config& config) {
ResScope rs(gpu_res, gpu_id_, true);
faiss::gpu::GpuIndexIVFScalarQuantizerConfig idx_config;
idx_config.device = static_cast<int32_t>(gpu_id_);
int32_t nlist = config[IndexParams::nlist];
int32_t nlist = GetIndexParamNlist(config);
faiss::MetricType metric_type = GetMetricType(config);
index_ = std::make_shared<faiss::gpu::GpuIndexIVFScalarQuantizer>(
gpu_res->faiss_res.get(), dim, nlist, faiss::QuantizerType::QT_8bit, metric_type, true, idx_config);
Expand Down
12 changes: 5 additions & 7 deletions knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License

#include <faiss/IndexSQHybrid.h>
#include <faiss/gpu/GpuCloner.h>
#include <faiss/gpu/GpuIndexIVF.h>
#include <faiss/index_factory.h>
#include <string>
#include <utility>

#include <faiss/IndexSQHybrid.h>
#include <faiss/gpu/GpuCloner.h>
#include <faiss/gpu/GpuIndexIVFSQHybrid.h>

#include "knowhere/common/Exception.h"
#include "knowhere/index/vector_index/adapter/VectorAdapter.h"
#include "knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h"
Expand All @@ -24,7 +24,6 @@

namespace knowhere {

#ifdef KNOWHERE_GPU_VERSION

void
IVFSQHybrid::Train(const DatasetPtr& dataset_ptr, const Config& config) {
Expand All @@ -36,7 +35,7 @@ IVFSQHybrid::Train(const DatasetPtr& dataset_ptr, const Config& config) {
ResScope rs(gpu_res, gpu_id_, true);
faiss::gpu::GpuIndexIVFSQHybridConfig idx_config;
idx_config.device = static_cast<int32_t>(gpu_id_);
int32_t nlist = config[IndexParams::nlist];
int32_t nlist = GetIndexParamNlist(config);
faiss::MetricType metric_type = GetMetricType(config);
index_ = std::make_shared<faiss::gpu::GpuIndexIVFSQHybrid>(
gpu_res->faiss_res.get(), dim, nlist, faiss::QuantizerType::QT_8bit, metric_type, true, idx_config);
Expand Down Expand Up @@ -275,6 +274,5 @@ FaissIVFQuantizer::~FaissIVFQuantizer() {
}
}

#endif

} // namespace knowhere
2 changes: 1 addition & 1 deletion knowhere/index/vector_index/helpers/Cloner.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#pragma once

#include "knowhere/index/vector_index/VecIndex.h"
#include "knowhere/index/VecIndex.h"

namespace knowhere::cloner {

Expand Down
3 changes: 1 addition & 2 deletions knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
#include <utility>

#include <faiss/gpu/StandardGpuResources.h>

#include "utils/BlockingQueue.h"
#include "knowhere/common/BlockingQueue.h"

namespace knowhere {

Expand Down
6 changes: 5 additions & 1 deletion knowhere/index/vector_offset_index/IndexIVF_NM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ IVF_NM::Load(const BinarySet& binary_set) {
auto invlists = ivf_index->invlists;
auto d = ivf_index->d;
prefix_sum_ = std::shared_ptr<size_t[]>(new size_t[invlists->nlist]);
size_t curr_index = 0;

#if 0
if (STATISTICS_LEVEL >= 3) {
Expand All @@ -76,7 +77,6 @@ IVF_NM::Load(const BinarySet& binary_set) {
auto ails = dynamic_cast<faiss::ArrayInvertedLists*>(invlists);
size_t nb = binary->size / invlists->code_size;
auto arranged_data = new float[d * nb];
size_t curr_index = 0;
for (size_t i = 0; i < invlists->nlist; i++) {
auto list_size = ails->ids[i].size();
for (size_t j = 0; j < list_size; j++) {
Expand Down Expand Up @@ -353,7 +353,11 @@ IVF_NM::QueryByRangeImpl(int64_t n,
radius *= radius;
}

#ifndef KNOWHERE_GPU_VERSION
auto arranged_data = data_.get();
#else
auto arranged_data = static_cast<uint8_t*>(ro_codes_->data);
#endif

faiss::RangeSearchResult res(n);
ivf_index->range_search_without_codes(n, xq, arranged_data, prefix_sum_.get(), radius, &res, bitset);
Expand Down
32 changes: 25 additions & 7 deletions knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ GPUIVF_NM::Train(const DatasetPtr& dataset_ptr, const Config& config) {
ResScope rs(gpu_res, gpu_id_, true);
faiss::gpu::GpuIndexIVFFlatConfig idx_config;
idx_config.device = gpu_id_;
int32_t nlist = config[IndexParams::nlist];
int32_t nlist = GetIndexParamNlist(config);
faiss::MetricType metric_type = GetMetricType(config);
auto device_index =
new faiss::gpu::GpuIndexIVFFlat(gpu_res->faiss_res.get(), dim, nlist, metric_type, idx_config);
Expand All @@ -61,11 +61,6 @@ GPUIVF_NM::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
}
}

void
GPUIVF_NM::Load(const BinarySet& binary_set) {
// not supported
}

VecIndexPtr
GPUIVF_NM::CopyGpuToCpu(const Config& config) {
auto device_idx = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
Expand Down Expand Up @@ -125,6 +120,29 @@ GPUIVF_NM::SerializeImpl(const IndexType& type) {
}
}

void
GPUIVF_NM::LoadImpl(const BinarySet& binary_set, const IndexType& type) {
auto binary = binary_set.GetByName("IVF");
MemoryIOReader reader;
{
reader.total = binary->size;
reader.data_ = binary->data.get();

faiss::Index* index = faiss::read_index(&reader);

if (auto temp_res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
ResScope rs(temp_res, gpu_id_, false);
auto device_index = faiss::gpu::index_cpu_to_gpu(temp_res->faiss_res.get(), gpu_id_, index);
index_.reset(device_index);
res_ = temp_res;
} else {
KNOWHERE_THROW_MSG("Load error, can't get gpu resource");
}

delete index;
}
}

void
GPUIVF_NM::QueryImpl(int64_t n,
const float* data,
Expand All @@ -135,7 +153,7 @@ GPUIVF_NM::QueryImpl(int64_t n,
const faiss::BitsetView bitset) {
auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
if (device_index) {
device_index->nprobe = config[IndexParams::nprobe];
device_index->nprobe = GetIndexParamNprobe(config);
ResScope rs(res_, gpu_id_);

// if query size > 2048 we search by blocks to avoid malloc issue
Expand Down
22 changes: 21 additions & 1 deletion python/knowhere/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,24 @@ def CreateIndex(index_name):
return IVFSQ()
if index_name == "hnsw":
return IndexHNSW()
raise ValueError("index name only support 'annoy' 'ivf' 'ivfsq' 'hnsw'.")
if index_name == "gpu_ivf":
return GPUIVF(-1)
if index_name == "gpu_ivfpq":
return GPUIVFPQ(-1)
if index_name == "gpu_ivfsq":
return GPUIVFSQ(-1)
raise ValueError(
""" index name only support
'annoy' 'ivf' 'ivfsq' 'hnsw'
'gpu_ivf', 'gpu_ivfsq', 'gpu_ivfpq'."""
)


class GpuContext:
def __init__(
self, dev_id=0, pin_mem=200 * 1024 * 1024, temp_mem=300 * 1024 * 1024, res_num=2
):
InitGpuResource(dev_id, pin_mem, temp_mem, res_num)

def __del__(self):
ReleaseGpuResource()
Loading

0 comments on commit 477cff5

Please sign in to comment.