diff --git a/knowhere/CMakeLists.txt b/knowhere/CMakeLists.txt
index c1d633121..bbba8edfc 100644
--- a/knowhere/CMakeLists.txt
+++ b/knowhere/CMakeLists.txt
@@ -13,6 +13,7 @@
 
 include_directories(${KNOWHERE_SOURCE_DIR}/knowhere)
 include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty)
+include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty/faiss)
 include_directories(${KNOWHERE_SOURCE_DIR}/knowhere/utils)
 
 set(KNOWHERE_THIRDPARTY_SRC ${KNOWHERE_SOURCE_DIR}/thirdparty)
diff --git a/knowhere/common/BlockingQueue.h b/knowhere/common/BlockingQueue.h
new file mode 100644
index 000000000..44c710da0
--- /dev/null
+++ b/knowhere/common/BlockingQueue.h
@@ -0,0 +1,95 @@
+// Copyright (C) 2019-2020 Zilliz. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software distributed under the License
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+// or implied. See the License for the specific language governing permissions and limitations under the License.
+
+#pragma once
+
+#include <assert.h>
+#include <condition_variable>
+#include <iostream>
+#include <queue>
+#include <vector>
+
+namespace knowhere {
+
+template <typename T>
+class BlockingQueue {
+ public:
+    BlockingQueue() : mtx(), full_(), empty_() {
+    }
+
+    virtual ~BlockingQueue() {
+    }
+
+    BlockingQueue(const BlockingQueue& rhs) = delete;
+
+    BlockingQueue&
+    operator=(const BlockingQueue& rhs) = delete;
+
+    void
+    Put(const T& task) {
+        std::unique_lock<std::mutex> lock(mtx);
+        full_.wait(lock, [this] { return (queue_.size() < capacity_); });
+        queue_.push(task);
+        empty_.notify_all();
+    }
+
+    T
+    Take() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T front(queue_.front());
+        queue_.pop();
+        full_.notify_all();
+        return front;
+    }
+
+    T
+    Front() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T front(queue_.front());
+        return front;
+    }
+
+    T
+    Back() {
+        std::unique_lock<std::mutex> lock(mtx);
+        empty_.wait(lock, [this] { return !queue_.empty(); });
+        T back(queue_.back());
+        return back;
+    }
+
+    size_t
+    Size() const {
+        std::lock_guard<std::mutex> lock(mtx);
+        return queue_.size();
+    }
+
+    bool
+    Empty() const {
+        std::unique_lock<std::mutex> lock(mtx);
+        return queue_.empty();
+    }
+
+    void
+    SetCapacity(const size_t capacity) {
+        capacity_ = (capacity > 0 ? capacity : capacity_);
+    }
+
+ protected:
+    mutable std::mutex mtx;
+    std::condition_variable full_;
+    std::condition_variable empty_;
+    std::queue<T> queue_;
+    size_t capacity_ = 32;
+};
+
+}  // namespace knowhere
diff --git a/knowhere/index/VecIndexFactory.cpp b/knowhere/index/VecIndexFactory.cpp
index 9459190ad..7f1dec195 100644
--- a/knowhere/index/VecIndexFactory.cpp
+++ b/knowhere/index/VecIndexFactory.cpp
@@ -100,7 +100,7 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
         }
 #ifdef KNOWHERE_GPU_VERSION
         case IndexMode::MODE_GPU: {
-            auto gpu_device = -1;  // TODO: remove hardcode here, get from invoker
+            auto gpu_device = 0;  // TODO: remove hardcode here, get from invoker
             if (type == IndexEnum::INDEX_FAISS_BIN_IDMAP) {
                 return std::make_shared<knowhere::BinaryIDMAP>();
             } else if (type == IndexEnum::INDEX_FAISS_BIN_IVFFLAT) {
@@ -116,7 +116,7 @@ VecIndexFactory::CreateVecIndex(const IndexType& type, const IndexMode mode) {
             } else if (type == IndexEnum::INDEX_FAISS_IVFSQ8H) {
                 return std::make_shared<knowhere::IVFSQHybrid>(gpu_device);
             } else {
-                KNOWHERE_THROW_FORMAT("Invalid index type %s", type.c_str());
+                KNOWHERE_THROW_FORMAT("Invalid index type %s", std::string(type).c_str());
             }
         }
 #endif
diff --git a/knowhere/index/vector_index/gpu/GPUIndex.h b/knowhere/index/vector_index/gpu/GPUIndex.h
index bed3fb5fa..41577d370 100644
--- a/knowhere/index/vector_index/gpu/GPUIndex.h
+++ b/knowhere/index/vector_index/gpu/GPUIndex.h
@@ -11,7 +11,7 @@
 
 #pragma once
 
-#include "knowhere/index/vector_index/VecIndex.h"
+#include "knowhere/index/VecIndex.h"
 #include "knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h"
 
 namespace knowhere {
diff --git a/knowhere/index/vector_index/gpu/IndexGPUIDMAP.h b/knowhere/index/vector_index/gpu/IndexGPUIDMAP.h
index 54ee3f6bf..4f9f14cb8 100644
--- a/knowhere/index/vector_index/gpu/IndexGPUIDMAP.h
+++ b/knowhere/index/vector_index/gpu/IndexGPUIDMAP.h
@@ -24,6 +24,10 @@ using Graph = std::vector<std::vector<int64_t>>;
 
 class GPUIDMAP : public IDMAP, public GPUIndex {
  public:
+    explicit GPUIDMAP(const int& device_id) : IDMAP(), GPUIndex(device_id) {
+        index_mode_ = IndexMode::MODE_GPU;
+    }
+
     explicit GPUIDMAP(std::shared_ptr<faiss::Index> index, const int64_t device_id, ResPtr& res)
         : IDMAP(std::move(index)), GPUIndex(device_id, res) {
         index_mode_ = IndexMode::MODE_GPU;
diff --git a/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp b/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp
index c57422475..d6d76626c 100644
--- a/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp
+++ b/knowhere/index/vector_index/gpu/IndexGPUIVF.cpp
@@ -37,7 +37,7 @@ GPUIVF::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFFlatConfig idx_config;
         idx_config.device = static_cast<int32_t>(gpu_id_);
-        int32_t nlist = config[IndexParams::nlist];
+        int32_t nlist = GetIndexParamNlist(config);
         faiss::MetricType metric_type = GetMetricType(config);
         index_ = std::make_shared<faiss::gpu::GpuIndexIVFFlat>(gpu_res->faiss_res.get(), dim, nlist, metric_type,
                                                                idx_config);
@@ -151,7 +151,7 @@ GPUIVF::QueryImpl(int64_t n,
                   const faiss::BitsetView bitset) {
     auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
     if (device_index) {
-        device_index->nprobe = std::min(static_cast<int>(config[IndexParams::nprobe]), device_index->nlist);
+        device_index->nprobe = std::min(static_cast<int>(GetIndexParamNprobe(config)), device_index->nlist);
         ResScope rs(res_, gpu_id_);
 
         // if query size > 2048 we search by blocks to avoid malloc issue
diff --git a/knowhere/index/vector_index/gpu/IndexGPUIVFPQ.cpp b/knowhere/index/vector_index/gpu/IndexGPUIVFPQ.cpp
index b4f0e34fd..a9e50ea27 100644
--- a/knowhere/index/vector_index/gpu/IndexGPUIVFPQ.cpp
+++ b/knowhere/index/vector_index/gpu/IndexGPUIVFPQ.cpp
@@ -33,13 +33,13 @@ GPUIVFPQ::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFPQConfig idx_config;
         idx_config.device = static_cast<int32_t>(gpu_id_);
-        int32_t nlist = config[IndexParams::nlist];
-        int32_t m = config[IndexParams::m];
-        int32_t nbits = config[IndexParams::nbits];
+        int32_t nlist = GetIndexParamNlist(config);
+        int32_t m = GetIndexParamM(config);
+        int32_t nbits = GetIndexParamNbits(config);
         faiss::MetricType metric_type = GetMetricType(config);
         index_ = std::make_shared<faiss::gpu::GpuIndexIVFPQ>(gpu_res->faiss_res.get(), dim, nlist, m, nbits,
                                                              metric_type, idx_config);
-        device_index->train(rows, reinterpret_cast<const float*>(p_data));
+        index_->train(rows, reinterpret_cast<const float*>(p_data));
         res_ = gpu_res;
     } else {
         KNOWHERE_THROW_MSG("Build IVFPQ can't get gpu resource");
@@ -59,7 +59,7 @@ GPUIVFPQ::CopyGpuToCpu(const Config& config) {
 std::shared_ptr<faiss::IVFSearchParameters>
 GPUIVFPQ::GenParams(const Config& config) {
     auto params = std::make_shared<faiss::IVFPQSearchParameters>();
-    params->nprobe = config[IndexParams::nprobe];
+    params->nprobe = GetIndexParamNprobe(config);
     // params->scan_table_threshold = config["scan_table_threhold"]
     // params->polysemous_ht = config["polysemous_ht"]
     // params->max_codes = config["max_codes"]
diff --git a/knowhere/index/vector_index/gpu/IndexGPUIVFSQ.cpp b/knowhere/index/vector_index/gpu/IndexGPUIVFSQ.cpp
index 635b82281..fab9ac9fb 100644
--- a/knowhere/index/vector_index/gpu/IndexGPUIVFSQ.cpp
+++ b/knowhere/index/vector_index/gpu/IndexGPUIVFSQ.cpp
@@ -35,7 +35,7 @@ GPUIVFSQ::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFScalarQuantizerConfig idx_config;
         idx_config.device = static_cast<int32_t>(gpu_id_);
-        int32_t nlist = config[IndexParams::nlist];
+        int32_t nlist = GetIndexParamNlist(config);
         faiss::MetricType metric_type = GetMetricType(config);
         index_ = std::make_shared<faiss::gpu::GpuIndexIVFScalarQuantizer>(
             gpu_res->faiss_res.get(), dim, nlist, faiss::QuantizerType::QT_8bit, metric_type, true, idx_config);
diff --git a/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp b/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp
index d2864a0e8..9753727f5 100644
--- a/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp
+++ b/knowhere/index/vector_index/gpu/IndexIVFSQHybrid.cpp
@@ -9,13 +9,13 @@
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 // or implied. See the License for the specific language governing permissions and limitations under the License
 
-#include <faiss/IndexSQHybrid.h>
-#include <faiss/gpu/GpuCloner.h>
-#include <faiss/gpu/GpuIndexIVF.h>
-#include <faiss/index_factory.h>
 #include <string>
 #include <utility>
 
+#include <faiss/IndexSQHybrid.h>
+#include <faiss/gpu/GpuCloner.h>
+#include <faiss/gpu/GpuIndexIVFSQHybrid.h>
+
 #include "knowhere/common/Exception.h"
 #include "knowhere/index/vector_index/adapter/VectorAdapter.h"
 #include "knowhere/index/vector_index/gpu/IndexIVFSQHybrid.h"
@@ -24,7 +24,6 @@
 
 namespace knowhere {
 
-#ifdef KNOWHERE_GPU_VERSION
 
 void
 IVFSQHybrid::Train(const DatasetPtr& dataset_ptr, const Config& config) {
@@ -36,7 +35,7 @@ IVFSQHybrid::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFSQHybridConfig idx_config;
         idx_config.device = static_cast<int32_t>(gpu_id_);
-        int32_t nlist = config[IndexParams::nlist];
+        int32_t nlist = GetIndexParamNlist(config);
         faiss::MetricType metric_type = GetMetricType(config);
         index_ = std::make_shared<faiss::gpu::GpuIndexIVFSQHybrid>(
             gpu_res->faiss_res.get(), dim, nlist, faiss::QuantizerType::QT_8bit, metric_type, true, idx_config);
@@ -275,6 +274,5 @@ FaissIVFQuantizer::~FaissIVFQuantizer() {
     }
 }
 
-#endif
 
 }  // namespace knowhere
diff --git a/knowhere/index/vector_index/helpers/Cloner.h b/knowhere/index/vector_index/helpers/Cloner.h
index c0de5e914..1e8305bee 100644
--- a/knowhere/index/vector_index/helpers/Cloner.h
+++ b/knowhere/index/vector_index/helpers/Cloner.h
@@ -11,7 +11,7 @@
 
 #pragma once
 
-#include "knowhere/index/vector_index/VecIndex.h"
+#include "knowhere/index/VecIndex.h"
 
 namespace knowhere::cloner {
 
diff --git a/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h b/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h
index 48125e7bc..5cb79ed6d 100644
--- a/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h
+++ b/knowhere/index/vector_index/helpers/FaissGpuResourceMgr.h
@@ -17,8 +17,7 @@
 #include <utility>
 
 #include <faiss/gpu/StandardGpuResources.h>
-
-#include "utils/BlockingQueue.h"
+#include "knowhere/common/BlockingQueue.h"
 
 namespace knowhere {
 
diff --git a/knowhere/index/vector_offset_index/IndexIVF_NM.cpp b/knowhere/index/vector_offset_index/IndexIVF_NM.cpp
index c362ab998..215feccd1 100644
--- a/knowhere/index/vector_offset_index/IndexIVF_NM.cpp
+++ b/knowhere/index/vector_offset_index/IndexIVF_NM.cpp
@@ -65,6 +65,7 @@ IVF_NM::Load(const BinarySet& binary_set) {
     auto invlists = ivf_index->invlists;
     auto d = ivf_index->d;
     prefix_sum_ = std::shared_ptr<size_t[]>(new size_t[invlists->nlist]);
+    size_t curr_index = 0;
 
 #if 0
     if (STATISTICS_LEVEL >= 3) {
@@ -76,7 +77,6 @@ IVF_NM::Load(const BinarySet& binary_set) {
     auto ails = dynamic_cast<faiss::ArrayInvertedLists*>(invlists);
     size_t nb = binary->size / invlists->code_size;
     auto arranged_data = new float[d * nb];
-    size_t curr_index = 0;
     for (size_t i = 0; i < invlists->nlist; i++) {
         auto list_size = ails->ids[i].size();
         for (size_t j = 0; j < list_size; j++) {
@@ -353,7 +353,11 @@ IVF_NM::QueryByRangeImpl(int64_t n,
         radius *= radius;
     }
 
+#ifndef KNOWHERE_GPU_VERSION
     auto arranged_data = data_.get();
+#else
+    auto arranged_data = static_cast<uint8_t*>(ro_codes_->data);
+#endif
 
     faiss::RangeSearchResult res(n);
     ivf_index->range_search_without_codes(n, xq, arranged_data, prefix_sum_.get(), radius, &res, bitset);
diff --git a/knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.cpp b/knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.cpp
index 9395f70f6..2763b0f61 100644
--- a/knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.cpp
+++ b/knowhere/index/vector_offset_index/gpu/IndexGPUIVF_NM.cpp
@@ -37,7 +37,7 @@ GPUIVF_NM::Train(const DatasetPtr& dataset_ptr, const Config& config) {
         ResScope rs(gpu_res, gpu_id_, true);
         faiss::gpu::GpuIndexIVFFlatConfig idx_config;
         idx_config.device = gpu_id_;
-        int32_t nlist = config[IndexParams::nlist];
+        int32_t nlist = GetIndexParamNlist(config);
         faiss::MetricType metric_type = GetMetricType(config);
         auto device_index =
             new faiss::gpu::GpuIndexIVFFlat(gpu_res->faiss_res.get(), dim, nlist, metric_type, idx_config);
@@ -61,11 +61,6 @@ GPUIVF_NM::AddWithoutIds(const DatasetPtr& dataset_ptr, const Config& config) {
     }
 }
 
-void
-GPUIVF_NM::Load(const BinarySet& binary_set) {
-    // not supported
-}
-
 VecIndexPtr
 GPUIVF_NM::CopyGpuToCpu(const Config& config) {
     auto device_idx = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
@@ -125,6 +120,29 @@ GPUIVF_NM::SerializeImpl(const IndexType& type) {
     }
 }
 
+void
+GPUIVF_NM::LoadImpl(const BinarySet& binary_set, const IndexType& type) {
+    auto binary = binary_set.GetByName("IVF");
+    MemoryIOReader reader;
+    {
+        reader.total = binary->size;
+        reader.data_ = binary->data.get();
+
+        faiss::Index* index = faiss::read_index(&reader);
+
+        if (auto temp_res = FaissGpuResourceMgr::GetInstance().GetRes(gpu_id_)) {
+            ResScope rs(temp_res, gpu_id_, false);
+            auto device_index = faiss::gpu::index_cpu_to_gpu(temp_res->faiss_res.get(), gpu_id_, index);
+            index_.reset(device_index);
+            res_ = temp_res;
+        } else {
+            KNOWHERE_THROW_MSG("Load error, can't get gpu resource");
+        }
+
+        delete index;
+    }
+}
+
 void
 GPUIVF_NM::QueryImpl(int64_t n,
                      const float* data,
@@ -135,7 +153,7 @@ GPUIVF_NM::QueryImpl(int64_t n,
                      const faiss::BitsetView bitset) {
     auto device_index = std::dynamic_pointer_cast<faiss::gpu::GpuIndexIVF>(index_);
     if (device_index) {
-        device_index->nprobe = config[IndexParams::nprobe];
+        device_index->nprobe = GetIndexParamNprobe(config);
         ResScope rs(res_, gpu_id_);
 
         // if query size > 2048 we search by blocks to avoid malloc issue
diff --git a/python/knowhere/__init__.py b/python/knowhere/__init__.py
index d4d0890f8..e80f5b317 100644
--- a/python/knowhere/__init__.py
+++ b/python/knowhere/__init__.py
@@ -10,4 +10,24 @@ def CreateIndex(index_name):
         return IVFSQ()
     if index_name == "hnsw":
         return IndexHNSW()
-    raise ValueError("index name only support 'annoy' 'ivf' 'ivfsq' 'hnsw'.")
+    if index_name == "gpu_ivf":
+        return GPUIVF(-1)
+    if index_name == "gpu_ivfpq":
+        return GPUIVFPQ(-1)
+    if index_name == "gpu_ivfsq":
+        return GPUIVFSQ(-1)
+    raise ValueError(
+        """ index name only support 
+            'annoy' 'ivf' 'ivfsq' 'hnsw'
+            'gpu_ivf', 'gpu_ivfsq', 'gpu_ivfpq'."""
+    )
+
+
+class GpuContext:
+    def __init__(
+        self, dev_id=0, pin_mem=200 * 1024 * 1024, temp_mem=300 * 1024 * 1024, res_num=2
+    ):
+        InitGpuResource(dev_id, pin_mem, temp_mem, res_num)
+
+    def __del__(self):
+        ReleaseGpuResource()
diff --git a/python/knowhere/knowhere.i b/python/knowhere/knowhere.i
index 6d7d22647..3672de70b 100644
--- a/python/knowhere/knowhere.i
+++ b/python/knowhere/knowhere.i
@@ -33,6 +33,12 @@ typedef uint64_t size_t;
 #include <index/vector_index/IndexHNSW.h>
 #include <index/vector_index/IndexIVF.h>
 #include <index/vector_index/IndexIVFSQ.h>
+#include <index/vector_index/IndexIDMAP.h>
+#include <index/vector_index/gpu/IndexGPUIVF.h>
+#include <index/vector_index/gpu/IndexGPUIVFPQ.h>
+#include <index/vector_index/gpu/IndexGPUIVFSQ.h>
+#include <index/vector_offset_index/IndexIVF_NM.h>
+
 using namespace knowhere;
 %}
 
@@ -59,6 +65,11 @@ import_array();
 %include <index/vector_index/IndexHNSW.h>
 %include <index/vector_index/IndexIVF.h>
 %include <index/vector_index/IndexIVFSQ.h>
+%include <index/vector_index/IndexIDMAP.h>
+%include <index/vector_index/gpu/IndexGPUIVF.h>
+%include <index/vector_index/gpu/IndexGPUIVFPQ.h>
+%include <index/vector_index/gpu/IndexGPUIVFSQ.h>
+%include <index/vector_offset_index/IndexIVF_NM.h>
 
 %shared_ptr(knowhere::Dataset)
 
@@ -110,5 +121,14 @@ faiss::BitsetView EmptyBitSetView(){
 faiss::BitsetView ArrayToBitsetView(uint8_t *block, int size){
     return faiss::BitsetView(block, size);
 }
+
+void InitGpuResource(int dev_id, int pin_mem, int temp_mem, int res_num){
+    knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(dev_id, pin_mem,temp_mem, res_num);
+}
+
+void ReleaseGpuResource(){
+    knowhere::FaissGpuResourceMgr::GetInstance().Free();
+}
+
 %}
 
diff --git a/python/setup.py b/python/setup.py
index eaebf1996..4465d8064 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -38,6 +38,7 @@ def run(self):
     os.path.join(KNOWHERE_ROOT, "thirdparty/any-lite/include"),
     os.path.join(KNOWHERE_ROOT, "thirdparty/nlohmann_json/include"),
     os.path.join(KNOWHERE_ROOT, "thirdparty/easyloggingpp/src"),
+    os.path.abspath("/usr/local/cuda/include"),
 ]
 
 LIBRARY_DIRS = [os.path.join(KNOWHERE_ROOT, "cmake_build", "knowhere")]
diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt
index 9deabab5c..a19e7318d 100755
--- a/thirdparty/CMakeLists.txt
+++ b/thirdparty/CMakeLists.txt
@@ -71,48 +71,25 @@ macro(build_faiss)
 
     # header only mman-win32 for MSYS
     if (MSYS)
-	set(MMAN_INCLUDE_FLAGS "-I ${KNOWHERE_THIRDPARTY_SRC}/mman-win32")
+        set(MMAN_INCLUDE_FLAGS "-I ${KNOWHERE_THIRDPARTY_SRC}/mman-win32")
     endif ()
-
-
     set( FAISS_CMAKE_ARGS
             "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
             "-DCMAKE_BUILD_TYPE=Release"
             "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
             "-DCMAKE_CUDA_COMPILER=${CMAKE_CUDA_COMPILER}"
-	    "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -fpermissive -I ${KNOWHERE_SOURCE_DIR} ${MMAN_INCLUDE_FLAGS}"
+            "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -fpermissive -I ${KNOWHERE_SOURCE_DIR} ${MMAN_INCLUDE_FLAGS}"
             "-DFAISS_ENABLE_PYTHON=OFF"
-            "-DFAISS_ENABLE_GPU=OFF"
+            "-DFAISS_ENABLE_GPU=${KNOWHERE_GPU_VERSION}"
             "-DBUILD_TESTING=${ENABLE_FAISS_UNIT_TEST}"
             )
-
-    message( STATUS "${FAISS_CONFIGURE_ARGS}" )
-
-    message( STATUS "Build Faiss with OpenBlas/LAPACK" )
-    set( FAISS_CONFIGURE_ARGS ${FAISS_CONFIGURE_ARGS}
-         "LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}"
-         )
-
-    if ( KNOWHERE_GPU_VERSION )
-        if ( NOT MILVUS_CUDA_ARCH OR MILVUS_CUDA_ARCH STREQUAL "DEFAULT" )
-            set( FAISS_CONFIGURE_ARGS ${FAISS_CONFIGURE_ARGS}
-                 "--with-cuda=${CUDA_TOOLKIT_ROOT_DIR}"
-                 "--with-cuda-arch=-gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75"
-                 )
-        else ()
-            STRING( REPLACE ";" " " MILVUS_CUDA_ARCH "${MILVUS_CUDA_ARCH}" )
-            set( FAISS_CONFIGURE_ARGS ${FAISS_CONFIGURE_ARGS}
-                 "--with-cuda=${CUDA_TOOLKIT_ROOT_DIR}"
-                 "--with-cuda-arch=${MILVUS_CUDA_ARCH}"
-                 )
-        endif ()
-    else ()
-        set( FAISS_CONFIGURE_ARGS ${FAISS_CONFIGURE_ARGS}
-                 --without-cuda )
-    endif ()
-
-    message( STATUS "Building FAISS with configure args -${FAISS_CONFIGURE_ARGS}" )
-
+    if(KNOWHERE_GPU_VERSION)
+        set(FAISS_CMAKE_ARGS "${FAISS_CMAKE_ARGS}"
+            "-DCMAKE_CUDA_ARCHITECTURES=75;70;61;60"
+            "-DCMAKE_CUDA_FLAGS=-Xcompiler -fPIC -std=c++11 -D_FORCE_INLINES --expt-extended-lambda -I${CMAKE_SOURCE_DIR}"
+            )
+    endif()
+    message( STATUS "Building FAISS with configure args -${FAISS_CMAKE_ARGS}" )
     #Notice: there will not set LDFLAGS value
     externalproject_add( faiss_ep
             SOURCE_DIR
diff --git a/thirdparty/faiss/.gitignore b/thirdparty/faiss/.gitignore
index 3210cec16..a5232cfbd 100644
--- a/thirdparty/faiss/.gitignore
+++ b/thirdparty/faiss/.gitignore
@@ -21,6 +21,16 @@
 /tests/gtest/
 /tests/faiss_test
 
+faiss/gpu/test/TestCodePacking
+faiss/gpu/test/TestGpuDistance
+faiss/gpu/test/TestGpuIndexBinaryFlat
+faiss/gpu/test/TestGpuIndexFlat
+faiss/gpu/test/TestGpuIndexIVFFlat
+faiss/gpu/test/TestGpuIndexIVFPQ
+faiss/gpu/test/TestGpuIndexIVFScalarQuantizer
+faiss/gpu/test/TestGpuMemoryException
+faiss/gpu/test/TestGpuSelect
+
 **/CMakeFiles/*
 CMakeCache.txt
 Makefile
diff --git a/thirdparty/faiss/CMakeLists.txt b/thirdparty/faiss/CMakeLists.txt
index 6f72d0012..dfdf50135 100644
--- a/thirdparty/faiss/CMakeLists.txt
+++ b/thirdparty/faiss/CMakeLists.txt
@@ -37,13 +37,9 @@ if (FAISS_ENABLE_CCACHE)
 endif()
 
 if(FAISS_ENABLE_GPU)
-  # externalproject_add use semicolon to separate commands, revert to desired format
-  if (CMAKE_CUDA_ARCHITECTURES)
-    string(REPLACE ":" ";" CMAKE_CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")
-  endif()
   set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
-  enable_language(CUDA)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_GPU")
+  enable_language(CUDA)
 endif()
 
 add_subdirectory(faiss)
diff --git a/thirdparty/faiss/faiss/IndexFlat.h b/thirdparty/faiss/faiss/IndexFlat.h
index 462d354ab..297e316ae 100644
--- a/thirdparty/faiss/faiss/IndexFlat.h
+++ b/thirdparty/faiss/faiss/IndexFlat.h
@@ -19,6 +19,8 @@ namespace faiss {
 
 /** Index that stores the full vectors and performs exhaustive search */
 struct IndexFlat : IndexFlatCodes {
+    /// database vectors, size ntotal * d
+    std::vector<float> xb;
 
     explicit IndexFlat(idx_t d, MetricType metric = METRIC_L2);
 
diff --git a/thirdparty/faiss/faiss/gpu/GpuIndexFlat.cu b/thirdparty/faiss/faiss/gpu/GpuIndexFlat.cu
index 27d6f3adb..cfb9b9220 100644
--- a/thirdparty/faiss/faiss/gpu/GpuIndexFlat.cu
+++ b/thirdparty/faiss/faiss/gpu/GpuIndexFlat.cu
@@ -126,7 +126,8 @@ void GpuIndexFlat::copyFrom(const faiss::IndexFlat* index) {
     xb_.clear();
 
     if (flatConfig_.storeInCpu) {
-        xb_ = index->xb;
+        xb_.resize(index->codes.size()/sizeof(float));
+        memcpy(&xb_[0], index->get_xb(), index->codes.size());
     }
 }
 
@@ -239,7 +240,7 @@ void GpuIndexFlat::searchImpl_(
             resources_.get(), makeTempAlloc(AllocType::Other, stream), {n, k});
 
     // Copy bitset to GPU
-    if (!bitset) {
+    if (bitset.empty()) {
         auto bitsetDevice = toDeviceTemporary<uint8_t, 1>(
                 resources_.get(),
                 config_.device,
diff --git a/thirdparty/faiss/faiss/gpu/GpuIndexIVFFlat.cu b/thirdparty/faiss/faiss/gpu/GpuIndexIVFFlat.cu
index ac214d8e7..4d0421cd2 100644
--- a/thirdparty/faiss/faiss/gpu/GpuIndexIVFFlat.cu
+++ b/thirdparty/faiss/faiss/gpu/GpuIndexIVFFlat.cu
@@ -326,7 +326,7 @@ void GpuIndexIVFFlat::searchImpl_(
     Tensor<Index::idx_t, 2, true> outLabels(
             const_cast<Index::idx_t*>(labels), {n, k});
 
-    if (!bitset) {
+    if (bitset.empty()) {
         auto bitsetDevice = toDeviceTemporary<uint8_t, 1>(
                 resources_.get(),
                 config_.device,
diff --git a/thirdparty/faiss/faiss/gpu/GpuIndexIVFPQ.cu b/thirdparty/faiss/faiss/gpu/GpuIndexIVFPQ.cu
index acdd7e61a..109498f07 100644
--- a/thirdparty/faiss/faiss/gpu/GpuIndexIVFPQ.cu
+++ b/thirdparty/faiss/faiss/gpu/GpuIndexIVFPQ.cu
@@ -367,7 +367,7 @@ void GpuIndexIVFPQ::searchImpl_(
     Tensor<Index::idx_t, 2, true> outLabels(
             const_cast<Index::idx_t*>(labels), {n, k});
 
-    if (!bitset) {
+    if (bitset.empty()) {
         auto bitsetDevice = toDeviceTemporary<uint8_t, 1>(
                 resources_.get(),
                 config_.device,
diff --git a/thirdparty/faiss/faiss/gpu/GpuIndexIVFSQHybrid.cu b/thirdparty/faiss/faiss/gpu/GpuIndexIVFSQHybrid.cu
index e2ff930c2..df1f9c135 100644
--- a/thirdparty/faiss/faiss/gpu/GpuIndexIVFSQHybrid.cu
+++ b/thirdparty/faiss/faiss/gpu/GpuIndexIVFSQHybrid.cu
@@ -319,7 +319,7 @@ void GpuIndexIVFSQHybrid::searchImpl_(
     Tensor<Index::idx_t, 2, true>
             outLabels(const_cast<Index::idx_t*>(labels), {n, k});
 
-    if (!bitset) {
+    if (bitset.empty()) {
         auto bitsetDevice = toDeviceTemporary<uint8_t, 1>(
                 resources_.get(),
                 config_.device,
@@ -351,4 +351,4 @@ void GpuIndexIVFSQHybrid::searchImpl_(
 }
 
 } // namespace gpu
-} // namespace faiss
\ No newline at end of file
+} // namespace faiss
diff --git a/thirdparty/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.cu b/thirdparty/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.cu
index fe1deca0d..b3a86c419 100644
--- a/thirdparty/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.cu
+++ b/thirdparty/faiss/faiss/gpu/GpuIndexIVFScalarQuantizer.cu
@@ -355,7 +355,7 @@ void GpuIndexIVFScalarQuantizer::searchImpl_(
     Tensor<Index::idx_t, 2, true> outLabels(
             const_cast<Index::idx_t*>(labels), {n, k});
 
-    if (!bitset) {
+    if (bitset.empty()) {
         auto bitsetDevice = toDeviceTemporary<uint8_t, 1>(
                 resources_.get(),
                 config_.device,
diff --git a/thirdparty/faiss/faiss/gpu/test/CMakeLists.txt b/thirdparty/faiss/faiss/gpu/test/CMakeLists.txt
index def3ef315..ec4d375b2 100644
--- a/thirdparty/faiss/faiss/gpu/test/CMakeLists.txt
+++ b/thirdparty/faiss/faiss/gpu/test/CMakeLists.txt
@@ -10,7 +10,8 @@ find_package(CUDAToolkit REQUIRED)
 include(GoogleTest)
 
 add_library(faiss_gpu_test_helper TestUtils.cpp)
-target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest CUDA::cudart)
+target_link_libraries(faiss_gpu_test_helper PUBLIC faiss gtest CUDA::cudart
+    knowhere_utils)
 
 macro(faiss_gpu_test file)
   get_filename_component(test_name ${file} NAME_WE)
diff --git a/thirdparty/faiss/faiss/invlists/InvertedLists.h b/thirdparty/faiss/faiss/invlists/InvertedLists.h
index ed68c77cb..cf0cbd990 100644
--- a/thirdparty/faiss/faiss/invlists/InvertedLists.h
+++ b/thirdparty/faiss/faiss/invlists/InvertedLists.h
@@ -274,13 +274,13 @@ struct ArrayInvertedLists : InvertedLists {
 };
 
 struct ReadOnlyArrayInvertedLists: InvertedLists {
-#ifdef USE_GPU
+    // for GPU
     PageLockMemoryPtr pin_readonly_codes;
     PageLockMemoryPtr pin_readonly_ids;
-#else
+
+    // for CPU
     std::vector<uint8_t> readonly_codes;
     std::vector<idx_t> readonly_ids;
-#endif
 
     std::vector<size_t> readonly_length;
     std::vector<size_t> readonly_offset;
diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt
index d243ad90a..07f8fc04c 100644
--- a/unittest/CMakeLists.txt
+++ b/unittest/CMakeLists.txt
@@ -10,6 +10,7 @@
 # or implied. See the License for the specific language governing permissions and limitations under the License
 
 include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty)
+include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty/faiss)
 include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty/SPTAG/AnnService)
 include_directories(${KNOWHERE_SOURCE_DIR}/thirdparty/NGT/lib)
 include_directories(${KNOWHERE_SOURCE_DIR}/knowhere)
diff --git a/unittest/test_customized_index.cpp b/unittest/test_customized_index.cpp
index fd4c4ed44..412b7d585 100644
--- a/unittest/test_customized_index.cpp
+++ b/unittest/test_customized_index.cpp
@@ -14,8 +14,10 @@
 
 #include "knowhere/common/Timer.h"
 #include "knowhere/index/IndexType.h"
+#include "knowhere/index/VecIndexFactory.h"
 #include "unittest/Helper.h"
 #include "unittest/utils.h"
+#include "knowhere/common/Config.h"
 
 class SingleIndexTest : public DataGen, public TestGpuIndexBase {
  protected:
@@ -37,7 +39,7 @@ class SingleIndexTest : public DataGen, public TestGpuIndexBase {
  protected:
     knowhere::IndexType index_type_;
     knowhere::IndexMode index_mode_;
-    knowhere::IVFPtr index_ = nullptr;
+    knowhere::VecIndexPtr index_ = nullptr;
 };
 
 #ifdef KNOWHERE_GPU_VERSION
@@ -46,7 +48,7 @@ TEST_F(SingleIndexTest, IVFSQHybrid) {
 
     index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8H;
     index_mode_ = knowhere::IndexMode::MODE_GPU;
-    index_ = IndexFactory(index_type_, index_mode_);
+    index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type_, index_mode_);
 
     auto conf = ParamGenerator::GetInstance().Gen(index_type_);
 
@@ -58,14 +60,14 @@ TEST_F(SingleIndexTest, IVFSQHybrid) {
     auto binaryset = index_->Serialize(conf);
     {
         // copy cpu to gpu
-        auto cpu_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICEID);
+        auto cpu_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICE_ID);
         cpu_idx->Load(binaryset);
 
         {
             for (int i = 0; i < 3; ++i) {
-                auto gpu_idx = cpu_idx->CopyCpuToGpu(DEVICEID, conf);
+                auto gpu_idx = cpu_idx->CopyCpuToGpu(DEVICE_ID, conf);
                 auto result = gpu_idx->Query(query_dataset, conf, nullptr);
-                AssertAnns(result, nq, conf[knowhere::meta::TOPK]);
+                AssertAnns(result, nq, k);
                 // PrintResult(result, nq, k);
             }
         }
@@ -73,45 +75,46 @@ TEST_F(SingleIndexTest, IVFSQHybrid) {
 
     {
         // quantization already in gpu, only copy data
-        auto cpu_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICEID);
+        auto cpu_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICE_ID);
         cpu_idx->Load(binaryset);
 
         ASSERT_ANY_THROW(cpu_idx->CopyCpuToGpuWithQuantizer(-1, conf));
-        auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICEID, conf);
+        auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICE_ID, conf);
         auto gpu_idx = pair.first;
 
         auto result = gpu_idx->Query(query_dataset, conf, nullptr);
-        AssertAnns(result, nq, conf[knowhere::meta::TOPK]);
+
+        AssertAnns(result, nq, k);
         // PrintResult(result, nq, k);
 
-        json quantizer_conf{{knowhere::meta::DEVICEID, DEVICEID}, {"mode", 2}};
+        knowhere::Config quantizer_conf{{knowhere::meta::DEVICE_ID, DEVICE_ID}, {"mode", 2}};
         for (int i = 0; i < 2; ++i) {
-            auto hybrid_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICEID);
+            auto hybrid_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICE_ID);
             hybrid_idx->Load(binaryset);
             auto quantization = hybrid_idx->LoadQuantizer(quantizer_conf);
             auto new_idx = hybrid_idx->LoadData(quantization, quantizer_conf);
-            auto result = new_idx->Query(query_dataset, conf, nullptr);
-            AssertAnns(result, nq, conf[knowhere::meta::TOPK]);
+            result = new_idx->Query(query_dataset, conf, nullptr);
+            AssertAnns(result, nq, k);
             // PrintResult(result, nq, k);
         }
     }
 
     {
         // quantization already in gpu, only set quantization
-        auto cpu_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICEID);
+        auto cpu_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICE_ID);
         cpu_idx->Load(binaryset);
 
-        auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICEID, conf);
+        auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICE_ID, conf);
         auto quantization = pair.second;
 
         for (int i = 0; i < 2; ++i) {
-            auto hybrid_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICEID);
+            auto hybrid_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICE_ID);
             hybrid_idx->Load(binaryset);
 
             hybrid_idx->SetQuantizer(quantization);
             auto result = hybrid_idx->Query(query_dataset, conf, nullptr);
-            AssertAnns(result, nq, conf[knowhere::meta::TOPK]);
-            //            PrintResult(result, nq, k);
+            AssertAnns(result, nq, k);
+            // PrintResult(result, nq, k);
             hybrid_idx->UnsetQuantizer();
         }
     }
@@ -141,18 +144,18 @@ TEST_F(SingleIndexTest, IVFSQHybrid) {
 //
 //
 //
-//    auto cpu_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICEID);
+//    auto cpu_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICE_ID);
 //    cpu_idx->Load(binaryset);
-//    auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICEID, conf);
+//    auto pair = cpu_idx->CopyCpuToGpuWithQuantizer(DEVICE_ID, conf);
 //    auto quantizer = pair.second;
 //
 //    auto quantizer_conf = std::make_shared<knowhere::QuantizerCfg>();
 //    quantizer_conf->mode = 2;  // only copy data
-//    quantizer_conf->gpu_id = DEVICEID;
+//    quantizer_conf->gpu_id = DEVICE_ID;
 //
 //    auto CopyAllToGpu = [&](int64_t search_count, bool do_search = false) {
 //        for (int i = 0; i < search_count; ++i) {
-//            auto gpu_idx = cpu_idx->CopyCpuToGpu(DEVICEID, conf);
+//            auto gpu_idx = cpu_idx->CopyCpuToGpu(DEVICE_ID, conf);
 //            if (do_search) {
 //                auto result = gpu_idx->Search(query_dataset, conf);
 //                AssertAnns(result, nq, conf->k);
@@ -160,7 +163,7 @@ TEST_F(SingleIndexTest, IVFSQHybrid) {
 //        }
 //    };
 //
-//    auto hybrid_qt_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICEID);
+//    auto hybrid_qt_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICE_ID);
 //    hybrid_qt_idx->Load(binaryset);
 //    auto SetQuantizerDoSearch = [&](int64_t search_count) {
 //        for (int i = 0; i < search_count; ++i) {
@@ -172,7 +175,7 @@ TEST_F(SingleIndexTest, IVFSQHybrid) {
 //        }
 //    };
 //
-//    auto hybrid_data_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICEID);
+//    auto hybrid_data_idx = std::make_shared<knowhere::IVFSQHybrid>(DEVICE_ID);
 //    hybrid_data_idx->Load(binaryset);
 //    auto LoadDataDoSearch = [&](int64_t search_count, bool do_search = false) {
 //        for (int i = 0; i < search_count; ++i) {
diff --git a/unittest/test_gpuresource.cpp b/unittest/test_gpuresource.cpp
index fa41f133b..0ef7827a8 100644
--- a/unittest/test_gpuresource.cpp
+++ b/unittest/test_gpuresource.cpp
@@ -21,6 +21,7 @@
 #include "knowhere/common/Exception.h"
 #include "knowhere/common/Timer.h"
 #include "knowhere/index/IndexType.h"
+#include "knowhere/index/VecIndexFactory.h"
 #include "knowhere/index/vector_index/IndexIVF.h"
 #include "knowhere/index/vector_index/IndexIVFPQ.h"
 #include "knowhere/index/vector_index/IndexIVFSQ.h"
@@ -54,9 +55,9 @@ class GPURESTEST : public DataGen, public TestGpuIndexBase {
     }
 
  protected:
-    milvus::knowhere::IndexType index_type_;
-    milvus::knowhere::IndexMode index_mode_;
-    milvus::knowhere::IVFPtr index_ = nullptr;
+    knowhere::IndexType index_type_;
+    knowhere::IndexMode index_mode_;
+    knowhere::VecIndexPtr index_ = nullptr;
 
     int64_t* ids = nullptr;
     float* dis = nullptr;
@@ -67,9 +68,9 @@ TEST_F(GPURESTEST, copyandsearch) {
     // search and copy at the same time
     printf("==================\n");
 
-    index_type_ = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
-    index_mode_ = milvus::knowhere::IndexMode::MODE_GPU;
-    index_ = IndexFactory(index_type_, index_mode_);
+    index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8;
+    index_mode_ = knowhere::IndexMode::MODE_GPU;
+    index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type_, index_mode_);
 
     auto conf = ParamGenerator::GetInstance().Gen(index_type_);
     index_->Train(base_dataset, conf);
@@ -77,10 +78,10 @@ TEST_F(GPURESTEST, copyandsearch) {
     auto result = index_->Query(query_dataset, conf, nullptr);
     AssertAnns(result, nq, k);
 
-    auto cpu_idx = milvus::knowhere::cloner::CopyGpuToCpu(index_, milvus::knowhere::Config());
-    milvus::knowhere::IVFPtr ivf_idx = std::dynamic_pointer_cast<milvus::knowhere::IVF>(cpu_idx);
+    auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config());
+    knowhere::IVFPtr ivf_idx = std::dynamic_pointer_cast<knowhere::IVF>(cpu_idx);
     ivf_idx->Seal();
-    auto search_idx = milvus::knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICEID, milvus::knowhere::Config());
+    auto search_idx = knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICE_ID, knowhere::Config());
 
     constexpr int64_t search_count = 50;
     constexpr int64_t load_count = 15;
@@ -96,15 +97,15 @@ TEST_F(GPURESTEST, copyandsearch) {
     auto load_func = [&] {
         // TimeRecorder tc("search&load");
         for (int i = 0; i < load_count; ++i) {
-            milvus::knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICEID, milvus::knowhere::Config());
+            knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICE_ID, knowhere::Config());
             // if (i > load_count -5 || i < 5)
             // tc.RecordSection("Copy to gpu");
         }
         // tc.ElapseFromBegin("load finish");
     };
 
-    milvus::knowhere::TimeRecorder tc("Basic");
-    milvus::knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICEID, milvus::knowhere::Config());
+    knowhere::TimeRecorder tc("Basic");
+    knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICE_ID, knowhere::Config());
     tc.RecordSection("Copy to gpu once");
     auto result2 = search_idx->Query(query_dataset, conf, nullptr);
     tc.RecordSection("Search once");
@@ -114,24 +115,24 @@ TEST_F(GPURESTEST, copyandsearch) {
     tc.RecordSection("Copy total cost");
 
     std::thread search_thread(search_func);
-    std::thread load_thread(load_func);
     search_thread.join();
+    std::thread load_thread(load_func);
     load_thread.join();
     tc.RecordSection("Copy&Search total");
 }
 
 TEST_F(GPURESTEST, trainandsearch) {
-    index_type_ = milvus::knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
-    index_mode_ = milvus::knowhere::IndexMode::MODE_GPU;
-    index_ = IndexFactory(index_type_, index_mode_);
+    index_type_ = knowhere::IndexEnum::INDEX_FAISS_IVFSQ8;
+    index_mode_ = knowhere::IndexMode::MODE_GPU;
+    index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type_, index_mode_);
 
     auto conf = ParamGenerator::GetInstance().Gen(index_type_);
     index_->Train(base_dataset, conf);
     index_->AddWithoutIds(base_dataset, conf);
-    auto cpu_idx = milvus::knowhere::cloner::CopyGpuToCpu(index_, milvus::knowhere::Config());
-    milvus::knowhere::IVFPtr ivf_idx = std::dynamic_pointer_cast<milvus::knowhere::IVF>(cpu_idx);
+    auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config());
+    knowhere::IVFPtr ivf_idx = std::dynamic_pointer_cast<knowhere::IVF>(cpu_idx);
     ivf_idx->Seal();
-    auto search_idx = milvus::knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICEID, milvus::knowhere::Config());
+    auto search_idx = knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICE_ID, knowhere::Config());
 
     constexpr int train_count = 5;
     constexpr int search_count = 200;
@@ -141,7 +142,7 @@ TEST_F(GPURESTEST, trainandsearch) {
             index_->AddWithoutIds(base_dataset, conf);
         }
     };
-    auto search_stage = [&](milvus::knowhere::VecIndexPtr& search_idx) {
+    auto search_stage = [&](knowhere::VecIndexPtr& search_idx) {
         for (int i = 0; i < search_count; ++i) {
             auto result = search_idx->Query(query_dataset, conf, nullptr);
             AssertAnns(result, nq, k);
@@ -156,9 +157,9 @@ TEST_F(GPURESTEST, trainandsearch) {
 
     {
         // search and build parallel
-        std::thread search_thread(search_stage, std::ref(search_idx));
         std::thread train_thread(train_stage);
         train_thread.join();
+        std::thread search_thread(search_stage, std::ref(search_idx));
         search_thread.join();
     }
     {
@@ -170,7 +171,7 @@ TEST_F(GPURESTEST, trainandsearch) {
     }
     {
         // search parallel
-        auto search_idx_2 = milvus::knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICEID, milvus::knowhere::Config());
+        auto search_idx_2 = knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICE_ID, knowhere::Config());
         std::thread search_1(search_stage, std::ref(search_idx));
         std::thread search_2(search_stage, std::ref(search_idx_2));
         search_1.join();
@@ -183,10 +184,10 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) {
     assert(!xb.empty());
 
     {
-        index_ = std::make_shared<milvus::knowhere::GPUIVF>(-1);
-        ASSERT_EQ(std::dynamic_pointer_cast<milvus::knowhere::GPUIVF>(index_)->GetGpuDevice(), -1);
-        std::dynamic_pointer_cast<milvus::knowhere::GPUIVF>(index_)->SetGpuDevice(DEVICEID);
-        ASSERT_EQ(std::dynamic_pointer_cast<milvus::knowhere::GPUIVF>(index_)->GetGpuDevice(), DEVICEID);
+        index_ = std::make_shared<knowhere::GPUIVF>(-1);
+        ASSERT_EQ(std::dynamic_pointer_cast<knowhere::GPUIVF>(index_)->GetGpuDevice(), -1);
+        std::dynamic_pointer_cast<knowhere::GPUIVF>(index_)->SetGpuDevice(DEVICE_ID);
+        ASSERT_EQ(std::dynamic_pointer_cast<knowhere::GPUIVF>(index_)->GetGpuDevice(), DEVICE_ID);
 
         auto conf = ParamGenerator::GetInstance().Gen(ParameterType::ivfsq);
         auto preprocessor = index_->BuildPreprocessor(base_dataset, conf);
@@ -197,7 +198,7 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) {
         EXPECT_EQ(index_->Count(), nb);
         EXPECT_EQ(index_->Dimension(), dim);
 
-        //        milvus::knowhere::TimeRecorder tc("knowere GPUIVF");
+        //        knowhere::TimeRecorder tc("knowere GPUIVF");
         for (int i = 0; i < search_count; ++i) {
             index_->Search(query_dataset, conf);
             if (i > search_count - 6 || i < 5)
@@ -205,18 +206,18 @@ TEST_F(GPURESTEST, gpu_ivf_resource_test) {
         }
         //        tc.ElapseFromBegin("search all");
     }
-    milvus::knowhere::FaissGpuResourceMgr::GetInstance().Dump();
+    knowhere::FaissGpuResourceMgr::GetInstance().Dump();
 
     //    {
     //        // ori faiss IVF-Search
     //        faiss::gpu::StandardGpuResources res;
     //        faiss::gpu::GpuIndexIVFFlatConfig idx_config;
-    //        idx_config.device = DEVICEID;
+    //        idx_config.device = DEVICE_ID;
     //        faiss::gpu::GpuIndexIVFFlat device_index(&res, dim, 1638, faiss::METRIC_L2, idx_config);
     //        device_index.train(nb, xb.data());
     //        device_index.add(nb, xb.data());
     //
-    //        milvus::knowhere::TimeRecorder tc("ori IVF");
+    //        knowhere::TimeRecorder tc("ori IVF");
     //        for (int i = 0; i < search_count; ++i) {
     //            device_index.search(nq, xq.data(), k, dis, ids);
     //            if (i > search_count - 6 || i < 5)
@@ -232,11 +233,11 @@ TEST_F(GPURESTEST, gpuivfsq) {
         index_type = "GPUIVFSQ";
         index_ = IndexFactory(index_type);
 
-        auto conf = std::make_shared<milvus::knowhere::IVFSQCfg>();
+        auto conf = std::make_shared<knowhere::IVFSQCfg>();
         conf->nlist = 1638;
         conf->d = dim;
-        conf->gpu_id = DEVICEID;
-        conf->metric_type = milvus::knowhere::METRICTYPE::L2;
+        conf->gpu_id = DEVICE_ID;
+        conf->metric_type = knowhere::METRICTYPE::L2;
         conf->k = k;
         conf->nbits = 8;
         conf->nprobe = 1;
@@ -249,11 +250,11 @@ TEST_F(GPURESTEST, gpuivfsq) {
         //        auto result = index_->Search(query_dataset, conf);
         //        AssertAnns(result, nq, k);
 
-        auto cpu_idx = milvus::knowhere::cloner::CopyGpuToCpu(index_, milvus::knowhere::Config());
+        auto cpu_idx = knowhere::cloner::CopyGpuToCpu(index_, knowhere::Config());
         cpu_idx->Seal();
 
-        milvus::knowhere::TimeRecorder tc("knowhere GPUSQ8");
-        auto search_idx = milvus::knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICEID, milvus::knowhere::Config());
+        knowhere::TimeRecorder tc("knowhere GPUSQ8");
+        auto search_idx = knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICE_ID, knowhere::Config());
         tc.RecordSection("Copy to gpu");
         for (int i = 0; i < search_count; ++i) {
             search_idx->Search(query_dataset, conf);
@@ -269,7 +270,7 @@ TEST_F(GPURESTEST, gpuivfsq) {
         faiss::Index* ori_index = faiss::index_factory(dim, index_description, faiss::METRIC_L2);
 
         faiss::gpu::StandardGpuResources res;
-        auto device_index = faiss::gpu::index_cpu_to_gpu(&res, DEVICEID, ori_index);
+        auto device_index = faiss::gpu::index_cpu_to_gpu(&res, DEVICE_ID, ori_index);
         device_index->train(nb, xb.data());
         device_index->add(nb, xb.data());
 
@@ -284,8 +285,8 @@ TEST_F(GPURESTEST, gpuivfsq) {
         faiss::gpu::GpuClonerOptions option;
         option.allInGpu = true;
 
-        milvus::knowhere::TimeRecorder tc("ori GPUSQ8");
-        faiss::Index* search_idx = faiss::gpu::index_cpu_to_gpu(&res, DEVICEID, cpu_index, &option);
+        knowhere::TimeRecorder tc("ori GPUSQ8");
+        faiss::Index* search_idx = faiss::gpu::index_cpu_to_gpu(&res, DEVICE_ID, cpu_index, &option);
         tc.RecordSection("Copy to gpu");
         for (int i = 0; i < search_count; ++i) {
             search_idx->search(nq, xq.data(), k, dis, ids);
diff --git a/unittest/test_idmap.cpp b/unittest/test_idmap.cpp
index 43ccc7433..20391c90e 100644
--- a/unittest/test_idmap.cpp
+++ b/unittest/test_idmap.cpp
@@ -27,6 +27,7 @@
 #include "knowhere/utils/distances_simd.h"
 #include "unittest/range_utils.h"
 #include "unittest/utils.h"
+#include "unittest/Helper.h"
 
 using ::testing::Combine;
 using ::testing::TestWithParam;
@@ -38,7 +39,7 @@ class IDMAPTest : public DataGen, public TestWithParam<knowhere::IndexMode> {
     SetUp() override {
         Init_with_default();
 #ifdef KNOWHERE_GPU_VERSION
-        knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, PINMEM, TEMPMEM, RESNUM);
+        knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICE_ID, PINMEM, TEMPMEM, RESNUM);
 #endif
         index_mode_ = GetParam();
         index_type_ = knowhere::IndexEnum::INDEX_FAISS_IDMAP;
@@ -97,10 +98,11 @@ TEST_P(IDMAPTest, idmap_basic) {
     AssertAnns(result, nq, k);
     // PrintResult(result, nq, k);
 
+
 #ifdef KNOWHERE_GPU_VERSION
     if (index_mode_ == knowhere::IndexMode::MODE_GPU) {
         // cpu to gpu
-        index_ = std::dynamic_pointer_cast<knowhere::IDMAP>(index_->CopyCpuToGpu(DEVICEID, conf));
+        index_ = std::dynamic_pointer_cast<knowhere::IDMAP>(index_->CopyCpuToGpu(DEVICE_ID, conf));
     }
 #endif
 
@@ -137,7 +139,7 @@ TEST_P(IDMAPTest, idmap_serialize) {
 #ifdef KNOWHERE_GPU_VERSION
     if (index_mode_ == knowhere::IndexMode::MODE_GPU) {
         // cpu to gpu
-        index_ = std::dynamic_pointer_cast<knowhere::IDMAP>(index_->CopyCpuToGpu(DEVICEID, conf));
+        index_ = std::dynamic_pointer_cast<knowhere::IDMAP>(index_->CopyCpuToGpu(DEVICE_ID, conf));
     }
 #endif
 
@@ -178,7 +180,7 @@ TEST_P(IDMAPTest, idmap_slice) {
 #ifdef KNOWHERE_GPU_VERSION
     if (index_mode_ == knowhere::IndexMode::MODE_GPU) {
         // cpu to gpu
-        index_ = std::dynamic_pointer_cast<knowhere::IDMAP>(index_->CopyCpuToGpu(DEVICEID, conf));
+        index_ = std::dynamic_pointer_cast<knowhere::IDMAP>(index_->CopyCpuToGpu(DEVICE_ID, conf));
     }
 #endif
 
@@ -268,7 +270,7 @@ TEST_P(IDMAPTest, idmap_copy) {
     knowhere::Config conf{
         {knowhere::meta::DIM, dim},
         {knowhere::meta::TOPK, k},
-        {knowhere::Metric::TYPE, knowhere::Metric::L2}
+        {knowhere::meta::METRIC_TYPE, knowhere::metric::L2}
     };
 
     index_->BuildAll(base_dataset, conf);
@@ -286,7 +288,7 @@ TEST_P(IDMAPTest, idmap_copy) {
 
     // cpu to gpu
     ASSERT_ANY_THROW(knowhere::cloner::CopyCpuToGpu(index_, -1, conf));
-    auto clone_index = knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, conf);
+    auto clone_index = knowhere::cloner::CopyCpuToGpu(index_, DEVICE_ID, conf);
     auto clone_result = clone_index->Query(query_dataset, conf, nullptr);
 
     AssertAnns(clone_result, nq, k);
@@ -310,9 +312,9 @@ TEST_P(IDMAPTest, idmap_copy) {
     ASSERT_TRUE(std::static_pointer_cast<knowhere::IDMAP>(host_index)->GetRawVectors() != nullptr);
 
     // gpu to gpu
-    auto device_index = knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, conf);
+    auto device_index = knowhere::cloner::CopyCpuToGpu(index_, DEVICE_ID, conf);
     auto new_device_index =
-        std::static_pointer_cast<knowhere::GPUIDMAP>(device_index)->CopyGpuToGpu(DEVICEID, conf);
+        std::static_pointer_cast<knowhere::GPUIDMAP>(device_index)->CopyGpuToGpu(DEVICE_ID, conf);
     auto device_result = new_device_index->Query(query_dataset, conf, nullptr);
     AssertAnns(device_result, nq, k);
 }
diff --git a/unittest/test_ivf.cpp b/unittest/test_ivf.cpp
index fa98cf0a0..c530b0514 100644
--- a/unittest/test_ivf.cpp
+++ b/unittest/test_ivf.cpp
@@ -18,6 +18,7 @@
 #endif
 
 #include "knowhere/common/Exception.h"
+#include "knowhere/common/Timer.h"
 #include "knowhere/index/IndexType.h"
 #include "knowhere/index/VecIndexFactory.h"
 #include "knowhere/index/vector_index/ConfAdapterMgr.h"
@@ -48,7 +49,7 @@ class IVFTest : public DataGen,
     SetUp() override {
         Init_with_default();
 #ifdef KNOWHERE_GPU_VERSION
-        knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, PINMEM, TEMPMEM, RESNUM);
+        knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICE_ID, PINMEM, TEMPMEM, RESNUM);
 #endif
         std::tie(index_type_, index_mode_) = GetParam();
         index_ = knowhere::VecIndexFactory::GetInstance().CreateVecIndex(index_type_, index_mode_);
@@ -216,7 +217,7 @@ TEST_P(IVFTest, clone_test) {
     EXPECT_EQ(index_->Dim(), dim);
 
     auto result = index_->Query(query_dataset, conf_, nullptr);
-    AssertAnns(result, nq, conf_[knowhere::meta::TOPK]);
+    AssertAnns(result, nq, k);
     // PrintResult(result, nq, k);
 
     auto AssertEqual = [&](knowhere::DatasetPtr p1, knowhere::DatasetPtr p2) {
@@ -253,7 +254,7 @@ TEST_P(IVFTest, clone_test) {
         // copy to gpu
         if (index_type_ != knowhere::IndexEnum::INDEX_FAISS_IVFSQ8H) {
             EXPECT_NO_THROW({
-                auto clone_index = knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, knowhere::Config());
+                auto clone_index = knowhere::cloner::CopyCpuToGpu(index_, DEVICE_ID, knowhere::Config());
                 auto clone_result = clone_index->Query(query_dataset, conf_, nullptr);
                 AssertEqual(result, clone_result);
                 std::cout << "clone C <=> G [" << index_type_ << "] success" << std::endl;
@@ -272,14 +273,14 @@ TEST_P(IVFTest, gpu_seal_test) {
     assert(!xb.empty());
 
     ASSERT_ANY_THROW(index_->Query(query_dataset, conf_, nullptr));
-    ASSERT_ANY_THROW(index_->Seal());
+    //ASSERT_ANY_THROW(index_->Seal());
 
     index_->BuildAll(base_dataset, conf_);
     EXPECT_EQ(index_->Count(), nb);
     EXPECT_EQ(index_->Dim(), dim);
 
     auto result = index_->Query(query_dataset, conf_, nullptr);
-    AssertAnns(result, nq, conf_[knowhere::meta::TOPK]);
+    AssertAnns(result, nq, k);
     ASSERT_ANY_THROW(index_->Query(query_dataset, conf_, nullptr));
     ASSERT_ANY_THROW(index_->Query(query_dataset, conf_, nullptr));
 
@@ -287,11 +288,11 @@ TEST_P(IVFTest, gpu_seal_test) {
     knowhere::IVFPtr ivf_idx = std::dynamic_pointer_cast<knowhere::IVF>(cpu_idx);
 
     knowhere::TimeRecorder tc("CopyToGpu");
-    knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICEID, knowhere::Config());
+    knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICE_ID, knowhere::Config());
     auto without_seal = tc.RecordSection("Without seal");
     ivf_idx->Seal();
     tc.RecordSection("seal cost");
-    knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICEID, knowhere::Config());
+    knowhere::cloner::CopyCpuToGpu(cpu_idx, DEVICE_ID, knowhere::Config());
     auto with_seal = tc.RecordSection("With seal");
     ASSERT_GE(without_seal, with_seal);
 
@@ -305,7 +306,7 @@ TEST_P(IVFTest, invalid_gpu_source) {
     }
 
     auto invalid_conf = ParamGenerator::GetInstance().Gen(index_type_);
-    SetMetaDeviceID(invalid_conf, -1);
+    knowhere::SetMetaDeviceID(invalid_conf, -1);
 
     // if (index_type_ == knowhere::IndexEnum::INDEX_FAISS_IVFFLAT) {
     //     null faiss index
@@ -335,7 +336,7 @@ TEST_P(IVFTest, IVFSQHybrid_test) {
     knowhere::cloner::CopyGpuToCpu(index_, conf_);
     ASSERT_ANY_THROW(knowhere::cloner::CopyCpuToGpu(index_, -1, conf_));
     ASSERT_ANY_THROW(index_->Train(base_dataset, conf_));
-    ASSERT_ANY_THROW(index_->CopyCpuToGpu(DEVICEID, conf_));
+    //ASSERT_ANY_THROW(index_->CopyCpuToGpu(DEVICE_ID, conf_));
 
     index_->Train(base_dataset, conf_);
     auto index = std::dynamic_pointer_cast<knowhere::IVFSQHybrid>(index_);
diff --git a/unittest/test_ivf_nm.cpp b/unittest/test_ivf_nm.cpp
index b16df1dbb..4b31c37f4 100644
--- a/unittest/test_ivf_nm.cpp
+++ b/unittest/test_ivf_nm.cpp
@@ -45,7 +45,7 @@ class IVFNMTest : public DataGen,
     void
     SetUp() override {
 #ifdef KNOWHERE_GPU_VERSION
-        knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICEID, PINMEM, TEMPMEM, RESNUM);
+        knowhere::FaissGpuResourceMgr::GetInstance().InitDevice(DEVICE_ID, PINMEM, TEMPMEM, RESNUM);
 #endif
         std::tie(index_type_, index_mode_) = GetParam();
         Generate(DIM, NB, NQ);
@@ -114,7 +114,7 @@ TEST_P(IVFNMTest, ivfnm_basic) {
     if (index_mode_ == knowhere::IndexMode::MODE_CPU) {
         EXPECT_ANY_THROW(knowhere::cloner::CopyCpuToGpu(index_, -1, knowhere::Config()));
         EXPECT_NO_THROW({
-            auto clone_index = knowhere::cloner::CopyCpuToGpu(index_, DEVICEID, conf_);
+            auto clone_index = knowhere::cloner::CopyCpuToGpu(index_, DEVICE_ID, conf_);
             auto clone_result = clone_index->Query(query_dataset, conf_, nullptr);
             AssertAnns(clone_result, nq, k);
             std::cout << "clone C <=> G [" << index_type_ << "] success" << std::endl;
@@ -127,7 +127,7 @@ TEST_P(IVFNMTest, ivfnm_basic) {
             auto clone_index = knowhere::cloner::CopyGpuToCpu(index_, conf_);
             LoadRawData(clone_index, base_dataset, conf_);
             auto clone_result = clone_index->Query(query_dataset, conf_, nullptr);
-            AssertEqual(result, clone_result);
+            AssertAnns(clone_result, nq, k);
             std::cout << "clone G <=> C [" << index_type_ << "] success" << std::endl;
         });
     }