Skip to content

Commit

Permalink
External storage interface + rocksdb implementation.
Browse files Browse the repository at this point in the history
  • Loading branch information
bashimao committed Jun 9, 2023
1 parent 460db25 commit 7be9c65
Show file tree
Hide file tree
Showing 6 changed files with 640 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@
path = tests/googletest
url = https://github.com/google/googletest.git
ignore = dirty
[submodule "third_party/rocksdb"]
path = third_party/rocksdb
url = https://github.com/facebook/rocksdb.git
33 changes: 30 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,35 @@ endforeach()

message(CMAKE_CUDA_FLAGS="${CMAKE_CUDA_FLAGS}")

# Sub projects.
add_subdirectory(tests/googletest)

function(add_subdirectory_rocksdb)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
set(WITH_SNAPPY OFF)
set(WITH_LZ4 OFF)
set(WITH_ZLIB OFF)
set(WITH_ZSTD OFF)
set(WITH_GFLAGS OFF)
set(ROCKSDB_BUILD_SHARED OFF)
set(WITH_JNI OFF)
set(WITH_TESTS OFF)
set(WITH_BENCHMARK_TOOLS OFF)
set(WITH_CORE_TOOLS OFF)
set(WITH_TOOLS OFF)
set(WITH_ALL_TESTS OFF)
set(WITH_EXAMPLES OFF)
set(WITH_BENCHMARK OFF)
add_subdirectory(third_party/rocksdb)
endfunction()
add_subdirectory_rocksdb()

include_directories(
${PROJECT_SOURCE_DIR}/include
${PROJECT_SOURCE_DIR}/third_party/rocksdb/include
${PROJECT_SOURCE_DIR}/tests/googletest/googletest/include
)

ADD_SUBDIRECTORY(tests/googletest)

link_directories(
)

Expand Down Expand Up @@ -134,4 +156,9 @@ TARGET_LINK_LIBRARIES(group_lock_test gtest_main)
add_executable(find_or_insert_ptr_test tests/find_or_insert_ptr_test.cc.cu)
target_compile_features(find_or_insert_ptr_test PUBLIC cxx_std_14)
set_target_properties(find_or_insert_ptr_test PROPERTIES CUDA_ARCHITECTURES OFF)
TARGET_LINK_LIBRARIES(find_or_insert_ptr_test gtest_main)
TARGET_LINK_LIBRARIES(find_or_insert_ptr_test gtest_main)

add_executable(ext_storage_rocksdb_test tests/ext_storage_rocksdb_test.cc.cu)
target_compile_features(ext_storage_rocksdb_test PUBLIC cxx_std_14)
set_target_properties(ext_storage_rocksdb_test PROPERTIES CUDA_ARCHITECTURES OFF)
TARGET_LINK_LIBRARIES(ext_storage_rocksdb_test gtest_main rocksdb)
102 changes: 102 additions & 0 deletions include/merlin/external_storage.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cstdint>
#include <type_traits>

namespace nv {
namespace merlin {

template <class Key, class Value>
class ExternalStorage {
public:
using size_type = size_t;
using key_type = Key;
using value_type = Value;

/**
* @brief Inserts key/value pairs into the external storage. If a key/value
* pair already exists, overwrites the current value.
*
* @param n Number of key/value slots provided in other arguments.
* @param d_masked_keys Device pointer to an (n)-sized array of keys.
* Key-Value slots that should be ignored have the key set to `EMPTY_KEY`.
* @param d_values Device pointer to an (n)-sized array containing pointers to
* respectively a memory location where the current values for a key are
* stored. Each pointer points to a vector of length `value_dim`. Pointers
* *can* be set to `nullptr` for slots where the corresponding key equated to
* the `EMPTY_KEY`. The memory locations can be device or host memory (see
* also `hkvs_is_pure_hbm`).
* @param stream Stream that MUST be used for queuing asynchronous CUDA
* operations. If only the input arguments or resources obtained from
* respectively `dev_mem_pool` and `host_mem_pool` are used for such
* operations, it is not necessary to synchronize the stream prior to
* returning from the function.
*/
virtual void insert_or_assign(size_type n,
const key_type* d_masked_keys, // (n)
const value_type* d_values, // (n)
size_type value_dims, cudaStream_t stream) = 0;

/**
* @brief Attempts to find the supplied `d_keys` if the corresponding
* `d_founds`-flag is `false` and fills the stored into the supplied memory
* locations (i.e. in `d_values`).
*
* @param n Number of key/value slots provided in other arguments.
* @param d_keys Device pointer to an (n)-sized array of keys.
* @param d_values Device pointer to an (n * value_dim)-sized array to store
* the retrieved `d_values`. For slots where the corresponding `d_founds`-flag
* is not `false`, the value may already have been assigned and, thus, MUST
* not be altered.
* @param d_founds Device pointer to an (n)-sized array which indicates
* whether the corresponding `d_values` slot is already filled or not. So, if
* and only if `d_founds` is still false, the implementation shall attempt to
* retrieve and fill in the value for the corresponding key. If a key/value
* was retrieved successfully from external storage, the implementation MUST
* also set `d_founds` to `true`.
* @param stream Stream that MUST be used for queuing asynchronous CUDA
* operations. If only the input arguments or resources obtained from
* respectively `dev_mem_pool` and `host_mem_pool` are used for such
* operations, it is not necessary to synchronize the stream prior to
* returning from the function.
*/
virtual size_type find(size_type n,
const key_type* d_keys, // (n)
value_type* d_values, // (n * value_dim)
size_type value_dims,
bool* d_founds, // (n)
cudaStream_t stream) const = 0;

/**
* @brief Attempts to erase the entries associated with the supplied `d_keys`.
* For keys do not exist nothing happens. It is permissible for this function
* to be implemented asynchronously (i.e., to return before the actual
* deletion has happened).
*
* @param n Number of keys provided in `d_keys` arguments.
* @param d_keys Device pointer to an (n)-sized array of keys. This pointer is
* only guarnteed to be valid for the duration of the call. If easure is
* implemented asynchronously, you must make a copy and manage its lifetime
* yourself.
*/
virtual void erase(size_type n, const key_type* d_keys,
cudaStream_t stream) = 0;
};

} // namespace merlin
} // namespace nv
Loading

0 comments on commit 7be9c65

Please sign in to comment.