diff --git a/src/python/library/CMakeLists.txt b/src/python/library/CMakeLists.txt index 87bfd4050..dd9becb48 100644 --- a/src/python/library/CMakeLists.txt +++ b/src/python/library/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -38,9 +38,7 @@ if(${TRITON_ENABLE_PYTHON_HTTP}) file(COPY tritonhttpclient DESTINATION .) endif() # TRITON_ENABLE_PYTHON_HTTP file(COPY tritonclientutils DESTINATION .) -if (NOT WIN32) - file(COPY tritonshmutils DESTINATION .) -endif() # NOT WIN32 +file(COPY tritonshmutils DESTINATION .) #################################### file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/TRITON_VERSION ${TRITON_VERSION}) @@ -91,9 +89,37 @@ add_custom_target( ) # -# Linux specific Wheel file. Compatible with x86, x64 and aarch64 +# Windows-specific Wheel file. # -if (NOT WIN32) +if(WIN32) + set(WINDOWS_WHEEL_DEPENDS + cshm + ${WHEEL_DEPENDS} + ) + if (${TRITON_ENABLE_PERF_ANALYZER}) + set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer) + endif() + set(windows_wheel_stamp_file "windows_stamp.whl") + add_custom_command( + OUTPUT "${windows_wheel_stamp_file}" + COMMAND python3 + ARGS + "${CMAKE_CURRENT_SOURCE_DIR}/build_wheel.py" + --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/windows" + --windows + ${perf_analyzer_arg} + DEPENDS ${WINDOWS_WHEEL_DEPENDS} + ) + + add_custom_target( + windows-client-wheel ALL + DEPENDS + "${windows_wheel_stamp_file}" + ) +else() + # + # Linux specific Wheel file. Compatible with x86, x64 and aarch64 + # # Can generate linux specific wheel file on linux systems only. set(LINUX_WHEEL_DEPENDS cshm @@ -102,7 +128,10 @@ if (NOT WIN32) if (${TRITON_ENABLE_PERF_ANALYZER}) set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer) - endif() + endif() # TRITON_ENABLE_PERF_ANALYZER + if (${TRITON_ENABLE_GPU}) + set(gpu_arg --include-gpu-libs) + endif() # TRITON_ENABLE_GPU set(linux_wheel_stamp_file "linux_stamp.whl") add_custom_command( OUTPUT "${linux_wheel_stamp_file}" @@ -112,6 +141,7 @@ if (NOT WIN32) --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/linux" --linux ${perf_analyzer_arg} + ${gpu_arg} DEPENDS ${LINUX_WHEEL_DEPENDS} ) @@ -120,7 +150,7 @@ if (NOT WIN32) DEPENDS "${linux_wheel_stamp_file}" ) -endif() # NOT WIN32 +endif() # WIN32 if(${TRITON_ENABLE_PYTHON_GRPC}) add_dependencies( @@ -128,12 +158,17 @@ if(${TRITON_ENABLE_PYTHON_GRPC}) grpc-service-py-library proto-py-library ) - if (NOT WIN32) + if (WIN32) + add_dependencies( + windows-client-wheel + grpc-service-py-library proto-py-library + ) + else() add_dependencies( linux-client-wheel grpc-service-py-library proto-py-library - ) - endif() # NOT WIN32 + ) + endif() # WIN32 file( GLOB generated-py @@ -147,14 +182,22 @@ if(${TRITON_ENABLE_PYTHON_GRPC}) ) endif() # TRITON_ENABLE_PYTHON_GRPC +# Generic Wheel +set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/generic") install( - CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/generic/triton*.whl\")" + CODE "file(GLOB _Wheel \"${WHEEL_DIR}/triton*.whl\")" + CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" +) + +# Platform-specific wheels +if(WIN32) + set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/windows") +else() + set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/linux") +endif() # WIN32 + +install( + CODE "file(GLOB _Wheel \"${WHEEL_DIR}/triton*.whl\")" CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" ) -if (NOT WIN32) - install( - CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/linux/triton*.whl\")" - CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")" - ) -endif() # NOT WIN32 diff --git a/src/python/library/build_wheel.py b/src/python/library/build_wheel.py index d32e7732a..b9b768a91 100755 --- a/src/python/library/build_wheel.py +++ b/src/python/library/build_wheel.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -28,6 +28,7 @@ import argparse import os import pathlib +import platform import re import shutil import subprocess @@ -77,12 +78,25 @@ def sed(pattern, replace, source, dest=None): parser.add_argument( "--dest-dir", type=str, required=True, help="Destination directory." ) - parser.add_argument( + platform_group = parser.add_mutually_exclusive_group() + platform_group.add_argument( "--linux", action="store_true", required=False, help="Include linux specific artifacts.", ) + platform_group.add_argument( + "--windows", + action="store_true", + required=False, + help="Include windows specific artifacts.", + ) + parser.add_argument( + "--include-gpu-libs", + action="store_true", + required=False, + help="Include gpu specific libraries", + ) parser.add_argument( "--perf-analyzer", type=str, @@ -118,7 +132,7 @@ def sed(pattern, replace, source, dest=None): cpdir("tritonhttpclient", os.path.join(FLAGS.whl_dir, "tritonhttpclient")) if os.path.isdir("tritongrpcclient"): cpdir("tritongrpcclient", os.path.join(FLAGS.whl_dir, "tritongrpcclient")) - if FLAGS.linux: + if FLAGS.linux or FLAGS.windows: if os.path.isdir("tritonshmutils"): cpdir("tritonshmutils", os.path.join(FLAGS.whl_dir, "tritonshmutils")) @@ -178,10 +192,11 @@ def sed(pattern, replace, source, dest=None): "tritonclient/utils/libcshm.so", os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/libcshm.so"), ) - cpdir( - "tritonclient/utils/cuda_shared_memory", - os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), - ) + if FLAGS.include_gpu_libs: + cpdir( + "tritonclient/utils/cuda_shared_memory", + os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), + ) # Copy the pre-compiled perf_analyzer binary if FLAGS.perf_analyzer is not None: @@ -194,6 +209,22 @@ def sed(pattern, replace, source, dest=None): if not os.path.exists(os.path.join(FLAGS.whl_dir, "perf_client")): os.symlink("perf_analyzer", os.path.join(FLAGS.whl_dir, "perf_client")) + if FLAGS.windows: + cpdir( + "tritonclient/utils/shared_memory", + os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory"), + ) + shutil.copyfile( + "tritonclient/utils/Release/cshm.dll", + os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/cshm.dll"), + ) + # FIXME: Enable when Windows supports GPU tensors DLIS-4169 + # if FLAGS.include_gpu_libs: + # cpdir( + # "tritonclient/utils/cuda_shared_memory", + # os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"), + # ) + shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt")) shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py")) cpdir("requirements", os.path.join(FLAGS.whl_dir, "requirements")) @@ -208,6 +239,9 @@ def sed(pattern, replace, source, dest=None): else: platform_name = "manylinux1_x86_64" args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] + elif FLAGS.windows and platform.uname().machine == "AMD64": + platform_name = "win_amd64" + args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name] else: args = ["python3", "setup.py", "bdist_wheel"] diff --git a/src/python/library/setup.py b/src/python/library/setup.py index 58cddbecf..63f5dc41c 100755 --- a/src/python/library/setup.py +++ b/src/python/library/setup.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -76,8 +76,10 @@ def req_file(filename, folder="requirements"): extras_require["all"] = list(chain(extras_require.values())) platform_package_data = [] -if PLATFORM_FLAG != "any": +if "linux" in PLATFORM_FLAG: platform_package_data += ["libcshm.so"] +elif PLATFORM_FLAG == "win_amd64": + platform_package_data += ["cshm.dll"] data_files = [ ("", ["LICENSE.txt"]), diff --git a/src/python/library/tritonclient/utils/CMakeLists.txt b/src/python/library/tritonclient/utils/CMakeLists.txt index 7de1acf96..5e2d96225 100644 --- a/src/python/library/tritonclient/utils/CMakeLists.txt +++ b/src/python/library/tritonclient/utils/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -24,26 +24,34 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# FIXME: Windows client currently does not support GPU tensors. +# For simplicity, we will override this option here. +if(WIN32 AND TRITON_ENABLE_GPU) + message(FATAL_ERROR "GPU shared memory is not currently supported by the Windows python client.") + set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE) +endif() + configure_file(__init__.py __init__.py COPYONLY) configure_file(_dlpack.py _dlpack.py COPYONLY) configure_file(_shared_memory_tensor.py _shared_memory_tensor.py COPYONLY) -if(NOT WIN32) - file(COPY shared_memory DESTINATION .) +file(COPY shared_memory DESTINATION .) +# +# libcshm.so / cshm.dll +# +add_library(cshm SHARED shared_memory/shared_memory.cc) +if(${TRITON_ENABLE_GPU}) + target_compile_definitions(cshm PUBLIC TRITON_ENABLE_GPU=1) + target_link_libraries(cshm PUBLIC CUDA::cudart) +endif() # TRITON_ENABLE_GPU - # - # libcshm.so - # - add_library(cshm SHARED shared_memory/shared_memory.cc) - if(${TRITON_ENABLE_GPU}) - target_compile_definitions(cshm PUBLIC TRITON_ENABLE_GPU=1) - target_link_libraries(cshm PUBLIC CUDA::cudart) - endif() # TRITON_ENABLE_GPU +if(NOT WIN32) target_link_libraries(cshm PRIVATE rt) -endif() # WIN32 +endif() # NOT WIN32 -if(NOT WIN32) - configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY) +configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY) + +if(${TRITON_ENABLE_GPU}) configure_file(cuda_shared_memory/__init__.py cuda_shared_memory/__init__.py COPYONLY) configure_file(cuda_shared_memory/_utils.py cuda_shared_memory/_utils.py COPYONLY) -endif() # NOT WIN32 +endif() # TRITON_ENABLE_GPU diff --git a/src/python/library/tritonclient/utils/shared_memory/__init__.py b/src/python/library/tritonclient/utils/shared_memory/__init__.py index 4fb245ea0..719b96819 100755 --- a/src/python/library/tritonclient/utils/shared_memory/__init__.py +++ b/src/python/library/tritonclient/utils/shared_memory/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -28,6 +28,7 @@ import os import struct +import sys from ctypes import * import numpy as np @@ -45,6 +46,16 @@ def from_param(cls, value): return value.encode("utf8") +class ShmFile(Structure): + if sys.platform == "win32": + _fields_ = [ + ("backing_file_handle_", c_void_p), + ("shm_mapping_handle_", c_void_p), + ] + else: + _fields_ = [("shm_fd_", c_int)] + + _cshm_lib = "cshm" if os.name == "nt" else "libcshm.so" _cshm_path = pkg_resources.resource_filename( "tritonclient.utils.shared_memory", _cshm_lib @@ -63,7 +74,7 @@ def from_param(cls, value): c_void_p, POINTER(c_char_p), POINTER(c_char_p), - POINTER(c_int), + POINTER(ShmFile), POINTER(c_uint64), POINTER(c_uint64), ] @@ -205,7 +216,7 @@ def get_contents_as_numpy(shm_handle, datatype, shape, offset=0): The numpy array generated using the contents of the specified shared memory region. """ - shm_fd = c_int() + shm_file = ShmFile() region_offset = c_uint64() byte_size = c_uint64() shm_addr = c_char_p() @@ -216,7 +227,7 @@ def get_contents_as_numpy(shm_handle, datatype, shape, offset=0): shm_handle, byref(shm_addr), byref(shm_key), - byref(shm_fd), + byref(shm_file), byref(region_offset), byref(byte_size), ) @@ -284,10 +295,7 @@ def destroy_shared_memory_region(shm_handle): SharedMemoryException If unable to unlink the shared memory region. """ - - _raise_if_error(c_int(_cshm_shared_memory_region_destroy(shm_handle))) - - shm_fd = c_int() + shm_file = ShmFile() offset = c_uint64() byte_size = c_uint64() shm_addr = c_char_p() @@ -298,13 +306,16 @@ def destroy_shared_memory_region(shm_handle): shm_handle, byref(shm_addr), byref(shm_key), - byref(shm_fd), + byref(shm_file), byref(offset), byref(byte_size), ) ) ) - mapped_shm_regions.remove(shm_key.value.decode("utf-8")) + shm_key_copy = bytes(shm_key.value) + _raise_if_error(c_int(_cshm_shared_memory_region_destroy(shm_handle))) + + mapped_shm_regions.remove(shm_key_copy.decode("utf-8")) return @@ -326,6 +337,9 @@ def __init__(self, err): -4: "unable to read/mmap the shared memory region", -5: "unable to unlink the shared memory region", -6: "unable to munmap the shared memory region", + -7: "unable to create shm directory or backing file", + -8: "unable to create file mapping", + -9: "unable to delete backing file", } self._msg = None if type(err) == str: diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc index 2ccebb9d1..5242c007d 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -23,60 +23,165 @@ // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "shared_memory.h" +#ifdef _WIN32 +#include +#else +#include +#include +#endif #include #include -#include -#include #include #include +#include "shared_memory.h" #include "shared_memory_handle.h" +#define TRITON_SHM_FILE_ROOT "C:\\triton_shm\\" + //============================================================================== // SharedMemoryControlContext - namespace { void* SharedMemoryHandleCreate( std::string triton_shm_name, void* shm_addr, std::string shm_key, - int shm_fd, size_t offset, size_t byte_size) + std::unique_ptr&& shm_file, size_t offset, size_t byte_size) { SharedMemoryHandle* handle = new SharedMemoryHandle(); handle->triton_shm_name_ = triton_shm_name; handle->base_addr_ = shm_addr; handle->shm_key_ = shm_key; - handle->shm_fd_ = shm_fd; + handle->platform_handle_ = std::move(shm_file); handle->offset_ = offset; handle->byte_size_ = byte_size; - return reinterpret_cast(handle); + return static_cast(handle); } int SharedMemoryRegionMap( - int shm_fd, size_t offset, size_t byte_size, void** shm_addr) + ShmFile* shm_file, size_t offset, size_t byte_size, void** shm_addr) { +#ifdef _WIN32 + // The MapViewOfFile function takes a high-order and low-order DWORD (4 bytes + // each) for offset. 'size_t' can either be 4 or 8 bytes depending on the + // operating system. To handle both cases agnostically, we cast 'offset' to + // uint64 to ensure we have a known size and enough space to perform our + // logical operations. + uint64_t upperbound_offset = (uint64_t)offset; + DWORD high_order_offset = (upperbound_offset >> 32) & 0xFFFFFFFF; + DWORD low_order_offset = upperbound_offset & 0xFFFFFFFF; + // map shared memory to process address space + *shm_addr = MapViewOfFile( + shm_file->shm_mapping_handle_, // handle to map object + FILE_MAP_ALL_ACCESS, // read/write permission + high_order_offset, // offset (high-order DWORD) + low_order_offset, // offset (low-order DWORD) + byte_size); + + if (*shm_addr == NULL) { + CloseHandle(shm_file->shm_mapping_handle_); + return -1; + } + // For Windows, we cannot close the shared memory handle here. When all + // handles are closed, the system will free the section of the paging + // file the shared memory object uses. Instead, we close on error or when + // we are destroying the shared memory object. + return 0; +#else // map shared memory to process address space - *shm_addr = mmap(NULL, byte_size, PROT_WRITE, MAP_SHARED, shm_fd, offset); + *shm_addr = + mmap(NULL, byte_size, PROT_WRITE, MAP_SHARED, shm_file->shm_fd_, offset); if (*shm_addr == MAP_FAILED) { return -1; } - // close shared memory descriptor, return 0 if success else return -1 - return close(shm_fd); + return 0; +#endif } -} // namespace +#ifdef _WIN32 +int +SharedMemoryCreateBackingFile(const char* shm_key, HANDLE* backing_file_handle) +{ + LPCSTR backing_file_directory(TRITON_SHM_FILE_ROOT); + bool success = CreateDirectory(backing_file_directory, NULL); + if (!success && GetLastError() != ERROR_ALREADY_EXISTS) { + return -1; + } + LPCSTR backing_file_path = + std::string(TRITON_SHM_FILE_ROOT + std::string(shm_key)).c_str(); + *backing_file_handle = CreateFile( + backing_file_path, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, NULL, + OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); + if (*backing_file_handle == INVALID_HANDLE_VALUE) { + return -1; + } + return 0; +} int +SharedMemoryDeleteBackingFile(const char* key, HANDLE backing_file_handle) +{ + CloseHandle(backing_file_handle); + LPCSTR backing_file_path = + std::string(TRITON_SHM_FILE_ROOT + std::string(key)).c_str(); + if (!DeleteFile(backing_file_path)) { + return -1; + } +} +#endif + +} // namespace + +TRITONCLIENT_DECLSPEC int SharedMemoryRegionCreate( const char* triton_shm_name, const char* shm_key, size_t byte_size, void** shm_handle) { +#ifdef _WIN32 + HANDLE backing_file_handle; + int err = SharedMemoryCreateBackingFile(shm_key, &backing_file_handle); + if (err == -1) { + return -7; + } + // The CreateFileMapping function takes a high-order and low-order DWORD (4 + // bytes each) for size. 'size_t' can either be 4 or 8 bytes depending on the + // operating system. To handle both cases agnostically, we cast 'byte_size' to + // uint64 to ensure we have a known size and enough space to perform our + // logical operations. + uint64_t upperbound_size = (uint64_t)byte_size; + DWORD high_order_size = (upperbound_size >> 32) & 0xFFFFFFFF; + DWORD low_order_size = upperbound_size & 0xFFFFFFFF; + + HANDLE win_handle = CreateFileMapping( + backing_file_handle, // use backing file + NULL, // default security + PAGE_READWRITE, // read/write access + high_order_size, // maximum object size (high-order DWORD) + low_order_size, // maximum object size (low-order DWORD) + shm_key); // name of mapping object + + if (win_handle == NULL) { + LPCSTR backing_file_path = + std::string(TRITON_SHM_FILE_ROOT + std::string(shm_key)).c_str(); + // Cleanup backing file on failure + SharedMemoryDeleteBackingFile(shm_key, backing_file_handle); + return -8; + } + + std::unique_ptr shm_file = + std::make_unique(backing_file_handle, win_handle); + // get base address of shared memory region + void* shm_addr = nullptr; + err = SharedMemoryRegionMap(shm_file.get(), 0, byte_size, &shm_addr); + if (err == -1) { + SharedMemoryDeleteBackingFile(shm_key, backing_file_handle); + return -4; + } +#else // get shared memory region descriptor int shm_fd = shm_open(shm_key, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); if (shm_fd == -1) { @@ -89,52 +194,69 @@ SharedMemoryRegionCreate( return -3; } + std::unique_ptr shm_file = std::make_unique(shm_fd); // get base address of shared memory region void* shm_addr = nullptr; - int err = SharedMemoryRegionMap(shm_fd, 0, byte_size, &shm_addr); + int err = SharedMemoryRegionMap(shm_file.get(), 0, byte_size, &shm_addr); if (err == -1) { return -4; } - +#endif // create a handle for the shared memory region *shm_handle = SharedMemoryHandleCreate( - std::string(triton_shm_name), shm_addr, std::string(shm_key), shm_fd, 0, - byte_size); + std::string(triton_shm_name), shm_addr, std::string(shm_key), + std::move(shm_file), 0, byte_size); return 0; } -int +TRITONCLIENT_DECLSPEC int SharedMemoryRegionSet( void* shm_handle, size_t offset, size_t byte_size, const void* data) { - void* shm_addr = - reinterpret_cast(shm_handle)->base_addr_; - char* shm_addr_offset = reinterpret_cast(shm_addr); + void* shm_addr = static_cast(shm_handle)->base_addr_; + char* shm_addr_offset = static_cast(shm_addr); std::memcpy(shm_addr_offset + offset, data, byte_size); return 0; } -int +TRITONCLIENT_DECLSPEC int GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, int* shm_fd, + void* shm_handle, char** shm_addr, const char** shm_key, void* shm_file, size_t* offset, size_t* byte_size) { - SharedMemoryHandle* handle = - reinterpret_cast(shm_handle); - *shm_addr = reinterpret_cast(handle->base_addr_); + SharedMemoryHandle* handle = static_cast(shm_handle); + ShmFile* file = static_cast(shm_file); + *shm_addr = static_cast(handle->base_addr_); *shm_key = handle->shm_key_.c_str(); - *shm_fd = handle->shm_fd_; *offset = handle->offset_; *byte_size = handle->byte_size_; +#ifdef _WIN32 + file->backing_file_handle_ = handle->platform_handle_->shm_mapping_handle_; + file->shm_mapping_handle_ = handle->platform_handle_->shm_mapping_handle_; +#else + file->shm_fd_ = handle->platform_handle_->shm_fd_; +#endif return 0; } -int +TRITONCLIENT_DECLSPEC int SharedMemoryRegionDestroy(void* shm_handle) { - SharedMemoryHandle* handle = - reinterpret_cast(shm_handle); - void* shm_addr = reinterpret_cast(handle->base_addr_); + SharedMemoryHandle* handle = static_cast(shm_handle); + void* shm_addr = static_cast(handle->base_addr_); + +#ifdef _WIN32 + bool success = UnmapViewOfFile(shm_addr); + if (!success) { + return -6; + } + CloseHandle(handle->platform_handle_->shm_mapping_handle_); + int err = SharedMemoryDeleteBackingFile( + handle->shm_key_.c_str(), handle->platform_handle_->backing_file_handle_); + if (err == -1) { + return -9; + } +#else int status = munmap(shm_addr, handle->byte_size_); if (status == -1) { return -6; @@ -144,8 +266,13 @@ SharedMemoryRegionDestroy(void* shm_handle) if (shm_fd == -1) { return -5; } + close(handle->platform_handle_->shm_fd_); +#endif // _WIN32 + + // FIXME: Investigate use of smart pointers for this + // allocation instead + delete handle; return 0; } - //============================================================================== diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h index 9d3e9519e..98f0037c0 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -25,26 +25,27 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once -#include -#include -#include -#include - #ifdef __cplusplus extern "C" { #endif +#ifdef _WIN32 +#define TRITONCLIENT_DECLSPEC __declspec(dllexport) +#else +#define TRITONCLIENT_DECLSPEC +#endif + //============================================================================== // SharedMemoryControlContext -int SharedMemoryRegionCreate( +TRITONCLIENT_DECLSPEC int SharedMemoryRegionCreate( const char* triton_shm_name, const char* shm_key, size_t byte_size, void** shm_handle); -int SharedMemoryRegionSet( +TRITONCLIENT_DECLSPEC int SharedMemoryRegionSet( void* shm_handle, size_t offset, size_t byte_size, const void* data); -int GetSharedMemoryHandleInfo( - void* shm_handle, char** shm_addr, const char** shm_key, int* shm_fd, +TRITONCLIENT_DECLSPEC int GetSharedMemoryHandleInfo( + void* shm_handle, char** shm_addr, const char** shm_key, void* shm_file, size_t* offset, size_t* byte_size); -int SharedMemoryRegionDestroy(void* shm_handle); +TRITONCLIENT_DECLSPEC int SharedMemoryRegionDestroy(void* shm_handle); //============================================================================== diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h index b929ed305..bd264546a 100644 --- a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h +++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -30,15 +30,29 @@ #include #endif // TRITON_ENABLE_GPU +#ifdef _WIN32 +#include +#endif // _WIN32 +#include + +struct ShmFile { +#ifdef _WIN32 + HANDLE backing_file_handle_; + HANDLE shm_mapping_handle_; + ShmFile(HANDLE backing_file_handle, HANDLE shm_mapping_handle) + : backing_file_handle_(backing_file_handle), + shm_mapping_handle_(shm_mapping_handle){}; +#else + int shm_fd_; + ShmFile(int shm_fd) : shm_fd_(shm_fd){}; +#endif // _WIN32 +}; + struct SharedMemoryHandle { std::string triton_shm_name_; std::string shm_key_; -#ifdef TRITON_ENABLE_GPU - cudaIpcMemHandle_t cuda_shm_handle_; - int device_id_; -#endif // TRITON_ENABLE_GPU void* base_addr_; - int shm_fd_; + std::unique_ptr platform_handle_; size_t offset_; size_t byte_size_; };