diff --git a/src/python/library/CMakeLists.txt b/src/python/library/CMakeLists.txt
index 87bfd4050..dd9becb48 100644
--- a/src/python/library/CMakeLists.txt
+++ b/src/python/library/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -38,9 +38,7 @@ if(${TRITON_ENABLE_PYTHON_HTTP})
   file(COPY tritonhttpclient DESTINATION .)
 endif() # TRITON_ENABLE_PYTHON_HTTP
 file(COPY tritonclientutils DESTINATION .)
-if (NOT WIN32)
-  file(COPY tritonshmutils DESTINATION .)
-endif() # NOT WIN32
+file(COPY tritonshmutils DESTINATION .)
 ####################################
 
 file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/TRITON_VERSION ${TRITON_VERSION})
@@ -91,9 +89,37 @@ add_custom_target(
 )
 
 #
-# Linux specific Wheel file. Compatible with x86, x64 and aarch64
+# Windows-specific Wheel file.
 #
-if (NOT WIN32)
+if(WIN32)
+  set(WINDOWS_WHEEL_DEPENDS
+        cshm
+        ${WHEEL_DEPENDS}
+  )
+  if (${TRITON_ENABLE_PERF_ANALYZER})
+    set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer)
+  endif()
+  set(windows_wheel_stamp_file "windows_stamp.whl")
+  add_custom_command(
+    OUTPUT "${windows_wheel_stamp_file}"
+    COMMAND python3
+    ARGS
+      "${CMAKE_CURRENT_SOURCE_DIR}/build_wheel.py"
+      --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/windows"
+      --windows
+      ${perf_analyzer_arg}
+    DEPENDS ${WINDOWS_WHEEL_DEPENDS}
+  )
+
+  add_custom_target(
+    windows-client-wheel ALL
+    DEPENDS
+      "${windows_wheel_stamp_file}"
+  )
+else()
+  #
+  # Linux specific Wheel file. Compatible with x86, x64 and aarch64
+  #
   # Can generate linux specific wheel file on linux systems only.
   set(LINUX_WHEEL_DEPENDS
         cshm
@@ -102,7 +128,10 @@ if (NOT WIN32)
 
   if (${TRITON_ENABLE_PERF_ANALYZER})
     set(perf_analyzer_arg --perf-analyzer ${CMAKE_INSTALL_PREFIX}/bin/perf_analyzer)
-  endif()
+  endif() # TRITON_ENABLE_PERF_ANALYZER
+  if (${TRITON_ENABLE_GPU})
+    set(gpu_arg --include-gpu-libs)
+  endif() # TRITON_ENABLE_GPU
   set(linux_wheel_stamp_file "linux_stamp.whl")
   add_custom_command(
     OUTPUT "${linux_wheel_stamp_file}"
@@ -112,6 +141,7 @@ if (NOT WIN32)
       --dest-dir "${CMAKE_CURRENT_BINARY_DIR}/linux"
       --linux
       ${perf_analyzer_arg}
+      ${gpu_arg}
     DEPENDS ${LINUX_WHEEL_DEPENDS}
   )
 
@@ -120,7 +150,7 @@ if (NOT WIN32)
     DEPENDS
       "${linux_wheel_stamp_file}"
   )
-endif() # NOT WIN32
+endif() # WIN32
 
 if(${TRITON_ENABLE_PYTHON_GRPC})
   add_dependencies(
@@ -128,12 +158,17 @@ if(${TRITON_ENABLE_PYTHON_GRPC})
     grpc-service-py-library proto-py-library
   )
 
-  if (NOT WIN32)
+  if (WIN32)
+    add_dependencies(
+      windows-client-wheel
+      grpc-service-py-library proto-py-library
+    )
+  else()
     add_dependencies(
       linux-client-wheel
       grpc-service-py-library proto-py-library
-  )
-  endif() # NOT WIN32
+    )
+  endif() # WIN32
 
   file(
     GLOB generated-py
@@ -147,14 +182,22 @@ if(${TRITON_ENABLE_PYTHON_GRPC})
   )
 endif() # TRITON_ENABLE_PYTHON_GRPC
 
+# Generic Wheel
+set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/generic")
 install(
-  CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/generic/triton*.whl\")"
+  CODE "file(GLOB _Wheel \"${WHEEL_DIR}/triton*.whl\")"
+  CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")"
+)
+
+# Platform-specific wheels
+if(WIN32)
+  set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/windows")
+else()
+  set(WHEEL_DIR "${CMAKE_CURRENT_BINARY_DIR}/linux")
+endif() # WIN32
+
+install(
+  CODE "file(GLOB _Wheel \"${WHEEL_DIR}/triton*.whl\")"
   CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")"
 )
 
-if (NOT WIN32)
-  install(
-    CODE "file(GLOB _Wheel \"${CMAKE_CURRENT_BINARY_DIR}/linux/triton*.whl\")"
-    CODE "file(INSTALL \${_Wheel} DESTINATION \"${CMAKE_INSTALL_PREFIX}/python\")"
-  )
-endif() # NOT WIN32
diff --git a/src/python/library/build_wheel.py b/src/python/library/build_wheel.py
index d32e7732a..b9b768a91 100755
--- a/src/python/library/build_wheel.py
+++ b/src/python/library/build_wheel.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -28,6 +28,7 @@
 import argparse
 import os
 import pathlib
+import platform
 import re
 import shutil
 import subprocess
@@ -77,12 +78,25 @@ def sed(pattern, replace, source, dest=None):
     parser.add_argument(
         "--dest-dir", type=str, required=True, help="Destination directory."
     )
-    parser.add_argument(
+    platform_group = parser.add_mutually_exclusive_group()
+    platform_group.add_argument(
         "--linux",
         action="store_true",
         required=False,
         help="Include linux specific artifacts.",
     )
+    platform_group.add_argument(
+        "--windows",
+        action="store_true",
+        required=False,
+        help="Include windows specific artifacts.",
+    )
+    parser.add_argument(
+        "--include-gpu-libs",
+        action="store_true",
+        required=False,
+        help="Include gpu specific libraries",
+    )
     parser.add_argument(
         "--perf-analyzer",
         type=str,
@@ -118,7 +132,7 @@ def sed(pattern, replace, source, dest=None):
         cpdir("tritonhttpclient", os.path.join(FLAGS.whl_dir, "tritonhttpclient"))
     if os.path.isdir("tritongrpcclient"):
         cpdir("tritongrpcclient", os.path.join(FLAGS.whl_dir, "tritongrpcclient"))
-    if FLAGS.linux:
+    if FLAGS.linux or FLAGS.windows:
         if os.path.isdir("tritonshmutils"):
             cpdir("tritonshmutils", os.path.join(FLAGS.whl_dir, "tritonshmutils"))
 
@@ -178,10 +192,11 @@ def sed(pattern, replace, source, dest=None):
             "tritonclient/utils/libcshm.so",
             os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/libcshm.so"),
         )
-        cpdir(
-            "tritonclient/utils/cuda_shared_memory",
-            os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"),
-        )
+        if FLAGS.include_gpu_libs:
+            cpdir(
+                "tritonclient/utils/cuda_shared_memory",
+                os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"),
+            )
 
         # Copy the pre-compiled perf_analyzer binary
         if FLAGS.perf_analyzer is not None:
@@ -194,6 +209,22 @@ def sed(pattern, replace, source, dest=None):
             if not os.path.exists(os.path.join(FLAGS.whl_dir, "perf_client")):
                 os.symlink("perf_analyzer", os.path.join(FLAGS.whl_dir, "perf_client"))
 
+    if FLAGS.windows:
+        cpdir(
+            "tritonclient/utils/shared_memory",
+            os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory"),
+        )
+        shutil.copyfile(
+            "tritonclient/utils/Release/cshm.dll",
+            os.path.join(FLAGS.whl_dir, "tritonclient/utils/shared_memory/cshm.dll"),
+        )
+        # FIXME: Enable when Windows supports GPU tensors DLIS-4169
+        # if FLAGS.include_gpu_libs:
+        #     cpdir(
+        #         "tritonclient/utils/cuda_shared_memory",
+        #         os.path.join(FLAGS.whl_dir, "tritonclient/utils/cuda_shared_memory"),
+        #     )
+
     shutil.copyfile("LICENSE.txt", os.path.join(FLAGS.whl_dir, "LICENSE.txt"))
     shutil.copyfile("setup.py", os.path.join(FLAGS.whl_dir, "setup.py"))
     cpdir("requirements", os.path.join(FLAGS.whl_dir, "requirements"))
@@ -208,6 +239,9 @@ def sed(pattern, replace, source, dest=None):
         else:
             platform_name = "manylinux1_x86_64"
         args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name]
+    elif FLAGS.windows and platform.uname().machine == "AMD64":
+        platform_name = "win_amd64"
+        args = ["python3", "setup.py", "bdist_wheel", "--plat-name", platform_name]
     else:
         args = ["python3", "setup.py", "bdist_wheel"]
 
diff --git a/src/python/library/setup.py b/src/python/library/setup.py
index 58cddbecf..63f5dc41c 100755
--- a/src/python/library/setup.py
+++ b/src/python/library/setup.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -76,8 +76,10 @@ def req_file(filename, folder="requirements"):
 extras_require["all"] = list(chain(extras_require.values()))
 
 platform_package_data = []
-if PLATFORM_FLAG != "any":
+if "linux" in PLATFORM_FLAG:
     platform_package_data += ["libcshm.so"]
+elif PLATFORM_FLAG == "win_amd64":
+    platform_package_data += ["cshm.dll"]
 
 data_files = [
     ("", ["LICENSE.txt"]),
diff --git a/src/python/library/tritonclient/utils/CMakeLists.txt b/src/python/library/tritonclient/utils/CMakeLists.txt
index 7de1acf96..5e2d96225 100644
--- a/src/python/library/tritonclient/utils/CMakeLists.txt
+++ b/src/python/library/tritonclient/utils/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -24,26 +24,34 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+# FIXME: Windows client currently does not support GPU tensors.
+# For simplicity, we will override this option here.
+if(WIN32 AND TRITON_ENABLE_GPU)
+  message(FATAL_ERROR  "GPU shared memory is not currently supported by the Windows python client.")
+  set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE)
+endif()
+
 configure_file(__init__.py __init__.py COPYONLY)
 configure_file(_dlpack.py _dlpack.py COPYONLY)
 configure_file(_shared_memory_tensor.py _shared_memory_tensor.py COPYONLY)
 
-if(NOT WIN32)
-  file(COPY shared_memory DESTINATION .)
+file(COPY shared_memory DESTINATION .)
+#
+# libcshm.so / cshm.dll
+#
+add_library(cshm SHARED shared_memory/shared_memory.cc)
+if(${TRITON_ENABLE_GPU})
+  target_compile_definitions(cshm PUBLIC TRITON_ENABLE_GPU=1)
+  target_link_libraries(cshm PUBLIC CUDA::cudart)
+endif() # TRITON_ENABLE_GPU
 
-  #
-  # libcshm.so
-  #
-  add_library(cshm SHARED shared_memory/shared_memory.cc)
-  if(${TRITON_ENABLE_GPU})
-    target_compile_definitions(cshm PUBLIC TRITON_ENABLE_GPU=1)
-    target_link_libraries(cshm PUBLIC CUDA::cudart)
-  endif() # TRITON_ENABLE_GPU
+if(NOT WIN32)
   target_link_libraries(cshm PRIVATE rt)
-endif() # WIN32
+endif() # NOT WIN32
 
-if(NOT WIN32)
-  configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY)
+configure_file(shared_memory/__init__.py shared_memory/__init__.py COPYONLY)
+
+if(${TRITON_ENABLE_GPU})
   configure_file(cuda_shared_memory/__init__.py cuda_shared_memory/__init__.py COPYONLY)
   configure_file(cuda_shared_memory/_utils.py cuda_shared_memory/_utils.py COPYONLY)
-endif() # NOT WIN32
+endif() # TRITON_ENABLE_GPU
diff --git a/src/python/library/tritonclient/utils/shared_memory/__init__.py b/src/python/library/tritonclient/utils/shared_memory/__init__.py
index 4fb245ea0..719b96819 100755
--- a/src/python/library/tritonclient/utils/shared_memory/__init__.py
+++ b/src/python/library/tritonclient/utils/shared_memory/__init__.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -28,6 +28,7 @@
 
 import os
 import struct
+import sys
 from ctypes import *
 
 import numpy as np
@@ -45,6 +46,16 @@ def from_param(cls, value):
             return value.encode("utf8")
 
 
+class ShmFile(Structure):
+    if sys.platform == "win32":
+        _fields_ = [
+            ("backing_file_handle_", c_void_p),
+            ("shm_mapping_handle_", c_void_p),
+        ]
+    else:
+        _fields_ = [("shm_fd_", c_int)]
+
+
 _cshm_lib = "cshm" if os.name == "nt" else "libcshm.so"
 _cshm_path = pkg_resources.resource_filename(
     "tritonclient.utils.shared_memory", _cshm_lib
@@ -63,7 +74,7 @@ def from_param(cls, value):
     c_void_p,
     POINTER(c_char_p),
     POINTER(c_char_p),
-    POINTER(c_int),
+    POINTER(ShmFile),
     POINTER(c_uint64),
     POINTER(c_uint64),
 ]
@@ -205,7 +216,7 @@ def get_contents_as_numpy(shm_handle, datatype, shape, offset=0):
         The numpy array generated using the contents of the specified shared
         memory region.
     """
-    shm_fd = c_int()
+    shm_file = ShmFile()
     region_offset = c_uint64()
     byte_size = c_uint64()
     shm_addr = c_char_p()
@@ -216,7 +227,7 @@ def get_contents_as_numpy(shm_handle, datatype, shape, offset=0):
                 shm_handle,
                 byref(shm_addr),
                 byref(shm_key),
-                byref(shm_fd),
+                byref(shm_file),
                 byref(region_offset),
                 byref(byte_size),
             )
@@ -284,10 +295,7 @@ def destroy_shared_memory_region(shm_handle):
     SharedMemoryException
         If unable to unlink the shared memory region.
     """
-
-    _raise_if_error(c_int(_cshm_shared_memory_region_destroy(shm_handle)))
-
-    shm_fd = c_int()
+    shm_file = ShmFile()
     offset = c_uint64()
     byte_size = c_uint64()
     shm_addr = c_char_p()
@@ -298,13 +306,16 @@ def destroy_shared_memory_region(shm_handle):
                 shm_handle,
                 byref(shm_addr),
                 byref(shm_key),
-                byref(shm_fd),
+                byref(shm_file),
                 byref(offset),
                 byref(byte_size),
             )
         )
     )
-    mapped_shm_regions.remove(shm_key.value.decode("utf-8"))
+    shm_key_copy = bytes(shm_key.value)
+    _raise_if_error(c_int(_cshm_shared_memory_region_destroy(shm_handle)))
+
+    mapped_shm_regions.remove(shm_key_copy.decode("utf-8"))
 
     return
 
@@ -326,6 +337,9 @@ def __init__(self, err):
             -4: "unable to read/mmap the shared memory region",
             -5: "unable to unlink the shared memory region",
             -6: "unable to munmap the shared memory region",
+            -7: "unable to create shm directory or backing file",
+            -8: "unable to create file mapping",
+            -9: "unable to delete backing file",
         }
         self._msg = None
         if type(err) == str:
diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc
index 2ccebb9d1..5242c007d 100644
--- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc
+++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -23,60 +23,165 @@
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include "shared_memory.h"
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#include <unistd.h>
+#endif
 
 #include <errno.h>
 #include <fcntl.h>
-#include <sys/mman.h>
-#include <unistd.h>
 
 #include <cstring>
 #include <iostream>
 
+#include "shared_memory.h"
 #include "shared_memory_handle.h"
 
+#define TRITON_SHM_FILE_ROOT "C:\\triton_shm\\"
+
 //==============================================================================
 // SharedMemoryControlContext
-
 namespace {
 
 void*
 SharedMemoryHandleCreate(
     std::string triton_shm_name, void* shm_addr, std::string shm_key,
-    int shm_fd, size_t offset, size_t byte_size)
+    std::unique_ptr<ShmFile>&& shm_file, size_t offset, size_t byte_size)
 {
   SharedMemoryHandle* handle = new SharedMemoryHandle();
   handle->triton_shm_name_ = triton_shm_name;
   handle->base_addr_ = shm_addr;
   handle->shm_key_ = shm_key;
-  handle->shm_fd_ = shm_fd;
+  handle->platform_handle_ = std::move(shm_file);
   handle->offset_ = offset;
   handle->byte_size_ = byte_size;
-  return reinterpret_cast<void*>(handle);
+  return static_cast<void*>(handle);
 }
 
 int
 SharedMemoryRegionMap(
-    int shm_fd, size_t offset, size_t byte_size, void** shm_addr)
+    ShmFile* shm_file, size_t offset, size_t byte_size, void** shm_addr)
 {
+#ifdef _WIN32
+  // The MapViewOfFile function takes a high-order and low-order DWORD (4 bytes
+  // each) for offset. 'size_t' can either be 4 or 8 bytes depending on the
+  // operating system. To handle both cases agnostically, we cast 'offset' to
+  // uint64 to ensure we have a known size and enough space to perform our
+  // logical operations.
+  uint64_t upperbound_offset = (uint64_t)offset;
+  DWORD high_order_offset = (upperbound_offset >> 32) & 0xFFFFFFFF;
+  DWORD low_order_offset = upperbound_offset & 0xFFFFFFFF;
+  // map shared memory to process address space
+  *shm_addr = MapViewOfFile(
+      shm_file->shm_mapping_handle_,  // handle to map object
+      FILE_MAP_ALL_ACCESS,            // read/write permission
+      high_order_offset,              // offset (high-order DWORD)
+      low_order_offset,               // offset (low-order DWORD)
+      byte_size);
+
+  if (*shm_addr == NULL) {
+    CloseHandle(shm_file->shm_mapping_handle_);
+    return -1;
+  }
+  // For Windows, we cannot close the shared memory handle here. When all
+  // handles are closed, the system will free the section of the paging
+  // file the shared memory object uses. Instead, we close on error or when
+  // we are destroying the shared memory object.
+  return 0;
+#else
   // map shared memory to process address space
-  *shm_addr = mmap(NULL, byte_size, PROT_WRITE, MAP_SHARED, shm_fd, offset);
+  *shm_addr =
+      mmap(NULL, byte_size, PROT_WRITE, MAP_SHARED, shm_file->shm_fd_, offset);
   if (*shm_addr == MAP_FAILED) {
     return -1;
   }
 
-  // close shared memory descriptor, return 0 if success else return -1
-  return close(shm_fd);
+  return 0;
+#endif
 }
 
-}  // namespace
+#ifdef _WIN32
+int
+SharedMemoryCreateBackingFile(const char* shm_key, HANDLE* backing_file_handle)
+{
+  LPCSTR backing_file_directory(TRITON_SHM_FILE_ROOT);
+  bool success = CreateDirectory(backing_file_directory, NULL);
+  if (!success && GetLastError() != ERROR_ALREADY_EXISTS) {
+    return -1;
+  }
+  LPCSTR backing_file_path =
+      std::string(TRITON_SHM_FILE_ROOT + std::string(shm_key)).c_str();
+  *backing_file_handle = CreateFile(
+      backing_file_path, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ, NULL,
+      OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+  if (*backing_file_handle == INVALID_HANDLE_VALUE) {
+    return -1;
+  }
+  return 0;
+}
 
 int
+SharedMemoryDeleteBackingFile(const char* key, HANDLE backing_file_handle)
+{
+  CloseHandle(backing_file_handle);
+  LPCSTR backing_file_path =
+      std::string(TRITON_SHM_FILE_ROOT + std::string(key)).c_str();
+  if (!DeleteFile(backing_file_path)) {
+    return -1;
+  }
+}
+#endif
+
+}  // namespace
+
+TRITONCLIENT_DECLSPEC int
 SharedMemoryRegionCreate(
     const char* triton_shm_name, const char* shm_key, size_t byte_size,
     void** shm_handle)
 {
+#ifdef _WIN32
+  HANDLE backing_file_handle;
+  int err = SharedMemoryCreateBackingFile(shm_key, &backing_file_handle);
+  if (err == -1) {
+    return -7;
+  }
+  // The CreateFileMapping function takes a high-order and low-order DWORD (4
+  // bytes each) for size. 'size_t' can either be 4 or 8 bytes depending on the
+  // operating system. To handle both cases agnostically, we cast 'byte_size' to
+  // uint64 to ensure we have a known size and enough space to perform our
+  // logical operations.
+  uint64_t upperbound_size = (uint64_t)byte_size;
+  DWORD high_order_size = (upperbound_size >> 32) & 0xFFFFFFFF;
+  DWORD low_order_size = upperbound_size & 0xFFFFFFFF;
+
+  HANDLE win_handle = CreateFileMapping(
+      backing_file_handle,  // use backing file
+      NULL,                 // default security
+      PAGE_READWRITE,       // read/write access
+      high_order_size,      // maximum object size (high-order DWORD)
+      low_order_size,       // maximum object size (low-order DWORD)
+      shm_key);             // name of mapping object
+
+  if (win_handle == NULL) {
+    LPCSTR backing_file_path =
+        std::string(TRITON_SHM_FILE_ROOT + std::string(shm_key)).c_str();
+    // Cleanup backing file on failure
+    SharedMemoryDeleteBackingFile(shm_key, backing_file_handle);
+    return -8;
+  }
+
+  std::unique_ptr<ShmFile> shm_file =
+      std::make_unique<ShmFile>(backing_file_handle, win_handle);
+  // get base address of shared memory region
+  void* shm_addr = nullptr;
+  err = SharedMemoryRegionMap(shm_file.get(), 0, byte_size, &shm_addr);
+  if (err == -1) {
+    SharedMemoryDeleteBackingFile(shm_key, backing_file_handle);
+    return -4;
+  }
+#else
   // get shared memory region descriptor
   int shm_fd = shm_open(shm_key, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
   if (shm_fd == -1) {
@@ -89,52 +194,69 @@ SharedMemoryRegionCreate(
     return -3;
   }
 
+  std::unique_ptr<ShmFile> shm_file = std::make_unique<ShmFile>(shm_fd);
   // get base address of shared memory region
   void* shm_addr = nullptr;
-  int err = SharedMemoryRegionMap(shm_fd, 0, byte_size, &shm_addr);
+  int err = SharedMemoryRegionMap(shm_file.get(), 0, byte_size, &shm_addr);
   if (err == -1) {
     return -4;
   }
-
+#endif
   // create a handle for the shared memory region
   *shm_handle = SharedMemoryHandleCreate(
-      std::string(triton_shm_name), shm_addr, std::string(shm_key), shm_fd, 0,
-      byte_size);
+      std::string(triton_shm_name), shm_addr, std::string(shm_key),
+      std::move(shm_file), 0, byte_size);
   return 0;
 }
 
-int
+TRITONCLIENT_DECLSPEC int
 SharedMemoryRegionSet(
     void* shm_handle, size_t offset, size_t byte_size, const void* data)
 {
-  void* shm_addr =
-      reinterpret_cast<SharedMemoryHandle*>(shm_handle)->base_addr_;
-  char* shm_addr_offset = reinterpret_cast<char*>(shm_addr);
+  void* shm_addr = static_cast<SharedMemoryHandle*>(shm_handle)->base_addr_;
+  char* shm_addr_offset = static_cast<char*>(shm_addr);
   std::memcpy(shm_addr_offset + offset, data, byte_size);
   return 0;
 }
 
-int
+TRITONCLIENT_DECLSPEC int
 GetSharedMemoryHandleInfo(
-    void* shm_handle, char** shm_addr, const char** shm_key, int* shm_fd,
+    void* shm_handle, char** shm_addr, const char** shm_key, void* shm_file,
     size_t* offset, size_t* byte_size)
 {
-  SharedMemoryHandle* handle =
-      reinterpret_cast<SharedMemoryHandle*>(shm_handle);
-  *shm_addr = reinterpret_cast<char*>(handle->base_addr_);
+  SharedMemoryHandle* handle = static_cast<SharedMemoryHandle*>(shm_handle);
+  ShmFile* file = static_cast<ShmFile*>(shm_file);
+  *shm_addr = static_cast<char*>(handle->base_addr_);
   *shm_key = handle->shm_key_.c_str();
-  *shm_fd = handle->shm_fd_;
   *offset = handle->offset_;
   *byte_size = handle->byte_size_;
+#ifdef _WIN32
+  file->backing_file_handle_ = handle->platform_handle_->shm_mapping_handle_;
+  file->shm_mapping_handle_ = handle->platform_handle_->shm_mapping_handle_;
+#else
+  file->shm_fd_ = handle->platform_handle_->shm_fd_;
+#endif
   return 0;
 }
 
-int
+TRITONCLIENT_DECLSPEC int
 SharedMemoryRegionDestroy(void* shm_handle)
 {
-  SharedMemoryHandle* handle =
-      reinterpret_cast<SharedMemoryHandle*>(shm_handle);
-  void* shm_addr = reinterpret_cast<char*>(handle->base_addr_);
+  SharedMemoryHandle* handle = static_cast<SharedMemoryHandle*>(shm_handle);
+  void* shm_addr = static_cast<char*>(handle->base_addr_);
+
+#ifdef _WIN32
+  bool success = UnmapViewOfFile(shm_addr);
+  if (!success) {
+    return -6;
+  }
+  CloseHandle(handle->platform_handle_->shm_mapping_handle_);
+  int err = SharedMemoryDeleteBackingFile(
+      handle->shm_key_.c_str(), handle->platform_handle_->backing_file_handle_);
+  if (err == -1) {
+    return -9;
+  }
+#else
   int status = munmap(shm_addr, handle->byte_size_);
   if (status == -1) {
     return -6;
@@ -144,8 +266,13 @@ SharedMemoryRegionDestroy(void* shm_handle)
   if (shm_fd == -1) {
     return -5;
   }
+  close(handle->platform_handle_->shm_fd_);
+#endif  // _WIN32
+
+  // FIXME: Investigate use of smart pointers for this
+  // allocation instead
+  delete handle;
 
   return 0;
 }
-
 //==============================================================================
diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h
index 9d3e9519e..98f0037c0 100644
--- a/src/python/library/tritonclient/utils/shared_memory/shared_memory.h
+++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -25,26 +25,27 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #pragma once
 
-#include <fcntl.h>
-#include <stddef.h>
-#include <sys/mman.h>
-#include <unistd.h>
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#ifdef _WIN32
+#define TRITONCLIENT_DECLSPEC __declspec(dllexport)
+#else
+#define TRITONCLIENT_DECLSPEC
+#endif
+
 //==============================================================================
 // SharedMemoryControlContext
-int SharedMemoryRegionCreate(
+TRITONCLIENT_DECLSPEC int SharedMemoryRegionCreate(
     const char* triton_shm_name, const char* shm_key, size_t byte_size,
     void** shm_handle);
-int SharedMemoryRegionSet(
+TRITONCLIENT_DECLSPEC int SharedMemoryRegionSet(
     void* shm_handle, size_t offset, size_t byte_size, const void* data);
-int GetSharedMemoryHandleInfo(
-    void* shm_handle, char** shm_addr, const char** shm_key, int* shm_fd,
+TRITONCLIENT_DECLSPEC int GetSharedMemoryHandleInfo(
+    void* shm_handle, char** shm_addr, const char** shm_key, void* shm_file,
     size_t* offset, size_t* byte_size);
-int SharedMemoryRegionDestroy(void* shm_handle);
+TRITONCLIENT_DECLSPEC int SharedMemoryRegionDestroy(void* shm_handle);
 
 //==============================================================================
 
diff --git a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h
index b929ed305..bd264546a 100644
--- a/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h
+++ b/src/python/library/tritonclient/utils/shared_memory/shared_memory_handle.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -30,15 +30,29 @@
 #include <cuda_runtime_api.h>
 #endif  // TRITON_ENABLE_GPU
 
+#ifdef _WIN32
+#include <windows.h>
+#endif  // _WIN32
+#include <memory>
+
+struct ShmFile {
+#ifdef _WIN32
+  HANDLE backing_file_handle_;
+  HANDLE shm_mapping_handle_;
+  ShmFile(HANDLE backing_file_handle, HANDLE shm_mapping_handle)
+      : backing_file_handle_(backing_file_handle),
+        shm_mapping_handle_(shm_mapping_handle){};
+#else
+  int shm_fd_;
+  ShmFile(int shm_fd) : shm_fd_(shm_fd){};
+#endif  // _WIN32
+};
+
 struct SharedMemoryHandle {
   std::string triton_shm_name_;
   std::string shm_key_;
-#ifdef TRITON_ENABLE_GPU
-  cudaIpcMemHandle_t cuda_shm_handle_;
-  int device_id_;
-#endif  // TRITON_ENABLE_GPU
   void* base_addr_;
-  int shm_fd_;
+  std::unique_ptr<ShmFile> platform_handle_;
   size_t offset_;
   size_t byte_size_;
 };