Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Design of external storage + RocksDB interface #81

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
path = tests/googletest
url = https://github.com/google/googletest.git
ignore = dirty
[submodule "third_party/rocksdb"]
path = third_party/rocksdb
url = https://github.com/facebook/rocksdb.git
33 changes: 30 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,35 @@ endforeach()

message(CMAKE_CUDA_FLAGS="${CMAKE_CUDA_FLAGS}")

# Sub projects.
add_subdirectory(tests/googletest)

function(add_subdirectory_rocksdb)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
set(WITH_SNAPPY OFF)
set(WITH_LZ4 OFF)
set(WITH_ZLIB OFF)
set(WITH_ZSTD OFF)
set(WITH_GFLAGS OFF)
set(ROCKSDB_BUILD_SHARED OFF)
set(WITH_JNI OFF)
set(WITH_TESTS OFF)
set(WITH_BENCHMARK_TOOLS OFF)
set(WITH_CORE_TOOLS OFF)
set(WITH_TOOLS OFF)
set(WITH_ALL_TESTS OFF)
set(WITH_EXAMPLES OFF)
set(WITH_BENCHMARK OFF)
add_subdirectory(third_party/rocksdb)
endfunction()
add_subdirectory_rocksdb()

include_directories(
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/third_party/rocksdb/include
${PROJECT_SOURCE_DIR}/tests/googletest/googletest/include
)

ADD_SUBDIRECTORY(tests/googletest)

link_directories(
)

Expand Down Expand Up @@ -134,4 +156,9 @@ TARGET_LINK_LIBRARIES(group_lock_test gtest_main)
add_executable(find_or_insert_ptr_test tests/find_or_insert_ptr_test.cc.cu)
target_compile_features(find_or_insert_ptr_test PUBLIC cxx_std_14)
set_target_properties(find_or_insert_ptr_test PROPERTIES CUDA_ARCHITECTURES OFF)
TARGET_LINK_LIBRARIES(find_or_insert_ptr_test gtest_main)
TARGET_LINK_LIBRARIES(find_or_insert_ptr_test gtest_main)

add_executable(ext_storage_rocksdb_test tests/ext_storage_rocksdb_test.cc.cu)
target_compile_features(ext_storage_rocksdb_test PUBLIC cxx_std_14)
set_target_properties(ext_storage_rocksdb_test PROPERTIES CUDA_ARCHITECTURES OFF)
TARGET_LINK_LIBRARIES(ext_storage_rocksdb_test gtest_main rocksdb)
102 changes: 102 additions & 0 deletions include/merlin/external_storage.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cstdint>
#include <type_traits>

namespace nv {
namespace merlin {

template <class Key, class Value>
class ExternalStorage {
public:
using size_type = size_t;
using key_type = Key;
using value_type = Value;

/**
* @brief Inserts key/value pairs into the external storage. If a key/value
* pair already exists, overwrites the current value.
*
* @param n Number of key/value slots provided in other arguments.
* @param d_masked_keys Device pointer to an (n)-sized array of keys.
* Key-Value slots that should be ignored have the key set to `EMPTY_KEY`.
* @param d_values Device pointer to an (n)-sized array containing pointers to
* respectively a memory location where the current values for a key are
* stored. Each pointer points to a vector of length `value_dim`. Pointers
* *can* be set to `nullptr` for slots where the corresponding key equated to
* the `EMPTY_KEY`. The memory locations can be device or host memory (see
* also `hkvs_is_pure_hbm`).
* @param stream Stream that MUST be used for queuing asynchronous CUDA
* operations. If only the input arguments or resources obtained from
* respectively `dev_mem_pool` and `host_mem_pool` are used for such
* operations, it is not necessary to synchronize the stream prior to
* returning from the function.
*/
virtual void insert_or_assign(size_type n,
const key_type* d_masked_keys, // (n)
const value_type* d_values, // (n)
size_type value_dims, cudaStream_t stream) = 0;

/**
* @brief Attempts to find the supplied `d_keys` if the corresponding
* `d_founds`-flag is `false` and fills the stored into the supplied memory
* locations (i.e. in `d_values`).
*
* @param n Number of key/value slots provided in other arguments.
* @param d_keys Device pointer to an (n)-sized array of keys.
* @param d_values Device pointer to an (n * value_dim)-sized array to store
* the retrieved `d_values`. For slots where the corresponding `d_founds`-flag
* is not `false`, the value may already have been assigned and, thus, MUST
* not be altered.
* @param d_founds Device pointer to an (n)-sized array which indicates
* whether the corresponding `d_values` slot is already filled or not. So, if
* and only if `d_founds` is still false, the implementation shall attempt to
* retrieve and fill in the value for the corresponding key. If a key/value
* was retrieved successfully from external storage, the implementation MUST
* also set `d_founds` to `true`.
* @param stream Stream that MUST be used for queuing asynchronous CUDA
* operations. If only the input arguments or resources obtained from
* respectively `dev_mem_pool` and `host_mem_pool` are used for such
* operations, it is not necessary to synchronize the stream prior to
* returning from the function.
*/
virtual size_type find(size_type n,
const key_type* d_keys, // (n)
value_type* d_values, // (n * value_dim)
size_type value_dims,
bool* d_founds, // (n)
cudaStream_t stream) const = 0;

/**
* @brief Attempts to erase the entries associated with the supplied `d_keys`.
* For keys do not exist nothing happens. It is permissible for this function
* to be implemented asynchronously (i.e., to return before the actual
* deletion has happened).
*
* @param n Number of keys provided in `d_keys` arguments.
* @param d_keys Device pointer to an (n)-sized array of keys. This pointer is
* only guarnteed to be valid for the duration of the call. If easure is
* implemented asynchronously, you must make a copy and manage its lifetime
* yourself.
*/
virtual void erase(size_type n, const key_type* d_keys,
cudaStream_t stream) = 0;
};

} // namespace merlin
} // namespace nv
Loading