triton-inference-server · fpetrini15 · Mar 15, 2024 · Mar 16, 2024 · Mar 20, 2024 · Mar 20, 2024
diff --git a/src/python/library/CMakeLists.txt b/src/python/library/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -38,9 +38,7 @@ if(${TRITON_ENABLE_PYTHON_HTTP})
   file(COPY tritonhttpclient DESTINATION .)
 endif() # TRITON_ENABLE_PYTHON_HTTP
 file(COPY tritonclientutils DESTINATION .)
-if (NOT WIN32)
-  file(COPY tritonshmutils DESTINATION .)
-endif() # NOT WIN32
+file(COPY tritonshmutils DESTINATION .)
 ####################################
 
 file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/TRITON_VERSION ${TRITON_VERSION})
@@ -91,9 +89,37 @@ add_custom_target(
 )
 
 #
-# Linux specific Wheel file. Compatible with x86, x64 and aarch64
+# Windows-specific Wheel file.
 #
-if (NOT WIN32)
+if(WIN32)
+  set(WINDOWS_WHEEL_DEPENDS
+        cshm
+        ${WHEEL_DEPENDS}
+  )
+  if (${TRITON_ENABLE_PERF_ANALYZER})
+    set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer)
+  endif()
+  set(windows_wheel_stamp_file "windows_stamp.whl")
+  add_custom_command(
+    OUTPUT "${windows_wheel_stamp_file}"
+    COMMAND python3
+    ARGS
+      "${CMAKE_CURRENT_SOURCE_DIR}/build_wheel.py"
+      --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/windows"
+      --windows
+      ${perf_analyzer_arg}
+    DEPENDS ${WINDOWS_WHEEL_DEPENDS}
+  )
+
+  add_custom_target(
+    windows-client-wheel ALL
+    DEPENDS
+      "${windows_wheel_stamp_file}"
+  )
+else()
+  #
+  # Linux specific Wheel file. Compatible with x86, x64 and aarch64
+  #
   # Can generate linux specific wheel file on linux systems only.
   set(LINUX_WHEEL_DEPENDS
         cshm
@@ -102,7 +128,10 @@ if (NOT WIN32)
 
   if (${TRITON_ENABLE_PERF_ANALYZER})
     set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer)
-  endif()
+  endif() # TRITON_ENABLE_PERF_ANALYZER
+  if (${TRITON_ENABLE_GPU})
+    set(gpu_arg --include-gpu-libs)
+  endif() # TRITON_ENABLE_GPU
   set(linux_wheel_stamp_file "linux_stamp.whl")
   add_custom_command(
     OUTPUT "${linux_wheel_stamp_file}"
@@ -112,6 +141,7 @@ if (NOT WIN32)
       --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/linux"
       --linux
       ${perf_analyzer_arg}
+      ${gpu_arg}
     DEPENDS ${LINUX_WHEEL_DEPENDS}
   )
 
@@ -120,20 +150,25 @@ if (NOT WIN32)
     DEPENDS
       "${linux_wheel_stamp_file}"
   )
-endif() # NOT WIN32
+endif() # WIN32
 
 if(${TRITON_ENABLE_PYTHON_GRPC})
   add_dependencies(
     generic-client-wheel
     grpc-service-py-library proto-py-library
   )
 
-  if (NOT WIN32)
+  if (WIN32)
+    add_dependencies(
+      windows-client-wheel
+      grpc-service-py-library proto-py-library
+    )
+  else()
     add_dependencies(
       linux-client-wheel
       grpc-service-py-library proto-py-library
-  )
-  endif() # NOT WIN32
+    )
+  endif() # WIN32
 
   file(
     GLOB generated-py
@@ -147,14 +182,22 @@ if(${TRITON_ENABLE_PYTHON_GRPC})
   )
 endif() # TRITON_ENABLE_PYTHON_GRPC
 
+# Generic Wheel
+set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/generic")
 install(
-  CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/generic/triton*.whl\")"
+  CODE "file(GLOB _Wheel \"${WHEEL_DIR}/triton*.whl\")"
+  CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")"
+)
+
+# Platform-specific wheels
+if(WIN32)
+  set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/windows")
+else()
+  set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/linux")
+endif() # WIN32
+
+install(
+  CODE "file(GLOB _Wheel \"${WHEEL_DIR}/triton*.whl\")"
   CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")"
 )
 
-if (NOT WIN32)
-  install(
-    CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/linux/triton*.whl\")"
-    CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")"
-  )
-endif() # NOT WIN32
diff --git a/src/python/library/build_wheel.py b/src/python/library/build_wheel.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -28,6 +28,7 @@
 import argparse
 import os
 import pathlib
+import platform
 import re
 import shutil
 import subprocess
@@ -77,12 +78,25 @@
     parser.add_argument(
         "--dest-dir", type=str, required=True, help="Destination directory."
     )
-    parser.add_argument(
+    platform_group = parser.add_mutually_exclusive_group()
+    platform_group.add_argument(
         "--linux",
         action="store_true",
         required=False,
         help="Include linux specific artifacts.",
     )
+    platform_group.add_argument(
+        "--windows",
+        action="store_true",
+        required=False,
+        help="Include windows specific artifacts.",
+    )
+    parser.add_argument(
+        "--include-gpu-libs",
+        action="store_true",
+        required=False,
+        help="Include gpu specific libraries",
+    )
     parser.add_argument(
         "--perf-analyzer",
         type=str,
@@ -118,7 +132,7 @@
         cpdir("tritonhttpclient", os.path.join(FLAGS.whl_dir, "tritonhttpclient"))
     if os.path.isdir("tritongrpcclient"):
         cpdir("tritongrpcclient", os.path.join(FLAGS.whl_dir, "tritongrpcclient"))
-    if FLAGS.linux:
+    if FLAGS.linux or FLAGS.windows:
         if os.path.isdir("tritonshmutils"):
             cpdir("tritonshmutils", os.path.join(FLAGS.whl_dir, "tritonshmutils"))
 
@@ -178,10 +192,11 @@
             "tritonclient/utils/libcshm.so",
             os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/libcshm.so"),
         )
-        cpdir(
-            "tritonclient/utils/cuda_shared_memory",
-            os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"),
-        )
+        if FLAGS.include_gpu_libs:
+            cpdir(
+                "tritonclient/utils/cuda_shared_memory",
+                os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"),
+            )
 
         # Copy the pre-compiled perf_analyzer binary
         if FLAGS.perf_analyzer is not None:
@@ -194,6 +209,22 @@
             if not os.path.exists(os.path.join(FLAGS.whl_dir, "perf_client")):
                 os.symlink("perf_analyzer", os.path.join(FLAGS.whl_dir, "perf_client"))
 
+    if FLAGS.windows:
+        cpdir(
+            "tritonclient/utils/shared_memory",
+            os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory"),
+        )
+        shutil.copyfile(
+            "tritonclient/utils/Release/cshm.dll",
+            os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/cshm.dll"),
+        )
+        # FIXME: Enable when Windows supports GPU tensors DLIS-4169
+        # if FLAGS.include_gpu_libs:
+        #     cpdir(
+        #         "tritonclient/utils/cuda_shared_memory",
+        #         os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"),
+        #     )
+
     shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt"))
     shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py"))
     cpdir("requirements", os.path.join(FLAGS.whl_dir, "requirements"))
@@ -208,6 +239,9 @@
         else:
             platform_name = "manylinux1_x86_64"
         args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name]
+    elif FLAGS.windows and platform.uname().machine == "AMD64":
+        platform_name = "win_amd64"
+        args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name]
     else:
         args = ["python3", "setup.py", "bdist_wheel"]
 

diff --git a/src/python/library/setup.py b/src/python/library/setup.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -76,8 +76,10 @@ def req_file(filename, folder="requirements"):
 extras_require["all"] = list(chain(extras_require.values()))
 
 platform_package_data = []
-if PLATFORM_FLAG != "any":
+if "linux" in PLATFORM_FLAG:
     platform_package_data += ["libcshm.so"]
+elif PLATFORM_FLAG == "win_amd64":
+    platform_package_data += ["cshm.dll"]
 
 data_files = [
     ("", ["LICENSE.txt"]),

diff --git a/src/python/library/tritonclient/utils/CMakeLists.txt b/src/python/library/tritonclient/utils/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -24,26 +24,34 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+# FIXME: Windows client currently does not support GPU tensors.
+# For simplicity, we will override this option here.
+if(WIN32 AND TRITON_ENABLE_GPU)
+  message(FATAL_ERROR  "GPU shared memory is not currently supported by the Windows python client.")
+  set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE)
+endif()
+
 configure_file(__init__.py __init__.py COPYONLY)
 configure_file(_dlpack.py _dlpack.py COPYONLY)
 configure_file(_shared_memory_tensor.py _shared_memory_tensor.py COPYONLY)
 
-if(NOT WIN32)
-  file(COPY shared_memory DESTINATION .)
+file(COPY shared_memory DESTINATION .)
+#
+# libcshm.so / cshm.dll
+#
+add_library(cshm SHARED shared_memory/shared_memory.cc)
+if(${TRITON_ENABLE_GPU})
+  target_compile_definitions(cshm PUBLIC TRITON_ENABLE_GPU=1)
+  target_link_libraries(cshm PUBLIC CUDA::cudart)
+endif() # TRITON_ENABLE_GPU
 
-  #
-  # libcshm.so
-  #
-  add_library(cshm SHARED shared_memory/shared_memory.cc)
-  if(${TRITON_ENABLE_GPU})
-    target_compile_definitions(cshm PUBLIC TRITON_ENABLE_GPU=1)
-    target_link_libraries(cshm PUBLIC CUDA::cudart)
-  endif() # TRITON_ENABLE_GPU
+if(NOT WIN32)
   target_link_libraries(cshm PRIVATE rt)
-endif() # WIN32
+endif() # NOT WIN32
 
-if(NOT WIN32)
-  configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY)
+configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY)
+
+if(${TRITON_ENABLE_GPU})
   configure_file(cuda_shared_memory/__init__.py cuda_shared_memory/__init__.py COPYONLY)
   configure_file(cuda_shared_memory/_utils.py cuda_shared_memory/_utils.py COPYONLY)
-endif() # NOT WIN32
+endif() # TRITON_ENABLE_GPU