[libtorch] resurrect the port with 2.1.0

* https://github.com/pytorch/pytorch/releases/tag/v2.1.0
luncliff · Oct 8, 2023 · 63fa0a2 · 63fa0a2
1 parent e4113a2
commit 63fa0a2
Show file tree

Hide file tree

Showing 5 changed files with 901 additions and 0 deletions.
diff --git a/ports/libtorch/fix-build.patch b/ports/libtorch/fix-build.patch
@@ -0,0 +1,196 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 3a48eaf..ab58f31 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -21,6 +21,7 @@ cmake_policy(SET CMP0092 NEW)
+
+ # ---[ Project and semantic versioning.
+ project(Torch CXX C)
++include(GNUInstallDirs)
+
+ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+   set(LINUX TRUE)
+@@ -484,6 +485,12 @@ if(LINUX)
+ endif()
+
+ if(MSVC)
++  add_compile_options(
++    /wd4251 # ... needs to have dll-interface to be used by clients of struct ...
++    /wd4275 # non dll-interface class ... used as base for dll-interface class ...
++    /wd4305 # narrowing init
++    /wd4309 # truncation of constant value
++  )
+   set(CMAKE_NINJA_CMCLDEPS_RC OFF)
+   foreach(flag_var
+       CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
+@@ -686,7 +693,7 @@ if(NOT CMAKE_BUILD_TYPE)
+ endif()
+
+ # The below means we are cross compiling for arm64 or x86_64 on MacOSX
+-if(NOT IOS AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
++if(false)
+   set(CROSS_COMPILING_MACOSX TRUE)
+   # We need to compile a universal protoc to not fail protobuf build
+   # We set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed the cmake compiler check for cross-compiling
+@@ -704,6 +711,7 @@ if(NOT IOS AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_OSX_ARCHITECTURES M
+   set(PROTOBUF_PROTOC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
+   set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
+ endif()
++include(cmake/ProtoBuf.cmake)
+
+ # ---[ Misc checks to cope with various compiler modes
+ include(cmake/MiscCheck.cmake)
+@@ -719,7 +727,7 @@ endif()
+
+ set(BUILD_ONEDNN_GRAPH OFF)
+
+-include(cmake/Dependencies.cmake)
++include(cmake/vcpkg-dependencies.cmake) # see portfile.cmake
+
+ # Moved this cmake set option down here because CMAKE_CUDA_COMPILER_VERSION is not avaialble until now
+ cmake_dependent_option(
+@@ -1044,13 +1052,8 @@ include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
+ include_directories(BEFORE ${PROJECT_BINARY_DIR}/aten/src/)
+
+ if(USE_MIMALLOC)
+-  set(MI_OVERRIDE OFF)
+-  set(MI_BUILD_SHARED OFF)
+-  set(MI_BUILD_OBJECT OFF)
+-  set(MI_BUILD_TESTS OFF)
+-  add_definitions(-DUSE_MIMALLOC)
+-  add_subdirectory(third_party/mimalloc)
+-  include_directories(third_party/mimalloc/include)
++  add_compile_definitions(USE_MIMALLOC)
++  find_package(mimalloc CONFIG REQUIRED) # mimalloc
+ endif()
+
+ # ---[ Main build
+diff --git a/build_variables.bzl b/build_variables.bzl
+index bf9cf2b..48140aa 100644
+--- a/build_variables.bzl
++++ b/build_variables.bzl
+@@ -132,16 +132,16 @@ libtorch_sources_common = sorted(core_sources_common + torch_unpickler_common)
+ # The profilers are not needed in the lite interpreter build.
+ libtorch_profiler_sources = [
+     "torch/csrc/autograd/profiler_legacy.cpp",
+-    "torch/csrc/autograd/profiler_kineto.cpp",
+-    "torch/csrc/profiler/collection.cpp",
+-    "torch/csrc/profiler/data_flow.cpp",
+-    "torch/csrc/profiler/kineto_shim.cpp",
+-    "torch/csrc/profiler/kineto_client_interface.cpp",
++    # "torch/csrc/autograd/profiler_kineto.cpp",
++    # "torch/csrc/profiler/collection.cpp",
++    # "torch/csrc/profiler/data_flow.cpp",
++    # "torch/csrc/profiler/kineto_shim.cpp",
++    # "torch/csrc/profiler/kineto_client_interface.cpp",
+     "torch/csrc/profiler/orchestration/observer.cpp",
+-    "torch/csrc/profiler/orchestration/python_tracer.cpp",
++    # "torch/csrc/profiler/orchestration/python_tracer.cpp",
+     "torch/csrc/profiler/standalone/execution_trace_observer.cpp",
+-    "torch/csrc/profiler/standalone/itt_observer.cpp",
+-    "torch/csrc/profiler/standalone/nvtx_observer.cpp",
++    # "torch/csrc/profiler/standalone/itt_observer.cpp",
++    # "torch/csrc/profiler/standalone/nvtx_observer.cpp",
+     "torch/csrc/profiler/stubs/base.cpp",
+     "torch/csrc/profiler/orchestration/vulkan.cpp",
+     "torch/csrc/profiler/perf.cpp",
+diff --git a/c10/CMakeLists.txt b/c10/CMakeLists.txt
+index feebad7..b5b8cab 100644
+--- a/c10/CMakeLists.txt
++++ b/c10/CMakeLists.txt
+@@ -109,8 +109,8 @@ else()
+ endif()
+
+ if(USE_MIMALLOC)
+-  target_link_libraries(c10 PRIVATE "mimalloc-static")
+-  add_dependencies(c10 mimalloc-static)
++  find_package(mimalloc CONFIG REQUIRED) # mimalloc
++  target_link_libraries(c10 PRIVATE mimalloc)
+ endif()
+
+ if(ANDROID)
+@@ -140,7 +140,11 @@ endif()
+ # Note: for now, we will put all export path into one single Caffe2Targets group
+ # to deal with the cmake deployment need. Inside the Caffe2Targets set, the
+ # individual libraries like libc10.so and libcaffe2.so are still self-contained.
+-install(TARGETS c10 EXPORT Caffe2Targets DESTINATION lib)
++install(TARGETS c10 EXPORT Caffe2Targets
++  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
++  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
++  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
++)
+ install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
+         DESTINATION include
+         FILES_MATCHING PATTERN "*.h")
+@@ -148,5 +152,5 @@ install(FILES ${CMAKE_BINARY_DIR}/c10/macros/cmake_macros.h
+         DESTINATION include/c10/macros)
+
+ if(MSVC AND C10_BUILD_SHARED_LIBS)
+-  install(FILES $<TARGET_PDB_FILE:c10> DESTINATION lib OPTIONAL)
++  install(FILES $<TARGET_PDB_FILE:c10> DESTINATION ${CMAKE_INSTALL_BINDIR} OPTIONAL)
+ endif()
+diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
+index 74d0d55..6928192 100644
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -107,15 +107,6 @@ endif()
+ # Note: the folders that are being commented out have not been properly
+ # addressed yet.
+
+-if(NOT MSVC AND USE_XNNPACK)
+-  if(NOT TARGET fxdiv)
+-    set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
+-    set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
+-    add_subdirectory(
+-      "${FXDIV_SOURCE_DIR}"
+-      "${CMAKE_BINARY_DIR}/FXdiv")
+-  endif()
+-endif()
+
+ add_subdirectory(core)
+ add_subdirectory(serialize)
+@@ -1022,9 +1013,6 @@ elseif(USE_CUDA)
+   endif()
+ endif()
+
+-if(NOT MSVC AND USE_XNNPACK)
+-  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
+-endif()
+
+ # ==========================================================
+ # formerly-libtorch flags
+@@ -1475,7 +1463,11 @@ endif()
+
+ caffe2_interface_library(torch torch_library)
+
+-install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
++install(TARGETS torch_cpu torch_cpu_library EXPORT Caffe2Targets
++  RUNTIME DESTINATION "${TORCH_INSTALL_BIN_DIR}"
++  LIBRARY DESTINATION "${TORCH_INSTALL_LIB_DIR}"
++  ARCHIVE DESTINATION "${TORCH_INSTALL_LIB_DIR}"
++)
+
+ if(USE_CUDA)
+   install(TARGETS torch_cuda torch_cuda_library EXPORT Caffe2Targets DESTINATION "${TORCH_INSTALL_LIB_DIR}")
+@@ -1554,7 +1546,7 @@ target_link_libraries(torch_cpu PRIVATE flatbuffers)
+ # namespaces, so libtorch is loaded with all its dependencies in a local scope.
+ # That usually leads to missing symbol errors at run-time, so to avoid a situation like
+ # this we have to preload those libs in a global namespace.
+-if(BUILD_SHARED_LIBS)
++if(BUILD_SHARED_LIBS AND (NOT WIN32))
+   add_library(torch_global_deps SHARED ${TORCH_SRC_DIR}/csrc/empty.c)
+   if(HAVE_SOVERSION)
+     set_target_properties(torch_global_deps PROPERTIES
+@@ -1577,7 +1569,11 @@ if(BUILD_SHARED_LIBS)
+     target_link_libraries(torch_global_deps TBB::tbb)
+   endif()
+
+-  install(TARGETS torch_global_deps DESTINATION "${TORCH_INSTALL_LIB_DIR}")
++  install(TARGETS torch_global_deps
++    RUNTIME DESTINATION "${TORCH_INSTALL_BIN_DIR}"
++    LIBRARY DESTINATION "${TORCH_INSTALL_LIB_DIR}"
++    ARCHIVE DESTINATION "${TORCH_INSTALL_LIB_DIR}"
++  )
+ endif()
+
+ # ---[ Caffe2 HIP sources.
diff --git a/ports/libtorch/fix-windows.patch b/ports/libtorch/fix-windows.patch
@@ -0,0 +1,111 @@
+diff --git a/aten/src/ATen/Parallel.h b/aten/src/ATen/Parallel.h
+index ff14f56..90ba2f6 100644
+--- a/aten/src/ATen/Parallel.h
++++ b/aten/src/ATen/Parallel.h
+@@ -29,13 +29,7 @@ TORCH_API bool in_parallel_region();
+ namespace internal {
+
+ // Initialise num_threads lazily at first parallel call
+-inline void lazy_init_num_threads() {
+-  thread_local bool init = false;
+-  if (C10_UNLIKELY(!init)) {
+-    at::init_num_threads();
+-    init = true;
+-  }
+-}
++TORCH_API void lazy_init_num_threads();
+
+ TORCH_API void set_thread_num(int);
+
+diff --git a/aten/src/ATen/ParallelNative.cpp b/aten/src/ATen/ParallelNative.cpp
+index 3b9ea48..f0d7329 100644
+--- a/aten/src/ATen/ParallelNative.cpp
++++ b/aten/src/ATen/ParallelNative.cpp
+@@ -123,7 +123,13 @@ struct ParallelRegionGuard {
+ } // namespace
+
+ namespace internal {
+-
++void lazy_init_num_threads() {
++  thread_local bool init = false;
++  if (C10_UNLIKELY(!init)) {
++    at::init_num_threads();
++    init = true;
++  }
++}
+ inline std::tuple<size_t, size_t> calc_num_tasks_and_chunk_size(
+     int64_t begin, int64_t end, int64_t grain_size) {
+   if ((end - begin) < grain_size) {
+diff --git a/c10/util/Logging.cpp b/c10/util/Logging.cpp
+index ff8e1d6..ed371ac 100644
+--- a/c10/util/Logging.cpp
++++ b/c10/util/Logging.cpp
+@@ -1,6 +1,7 @@
+ #include <c10/util/Backtrace.h>
+ #include <c10/util/Flags.h>
+ #include <c10/util/Logging.h>
++#include <glog/logging.h>
+ #ifdef FBCODE_CAFFE2
+ #include <folly/synchronization/SanitizeThread.h>
+ #endif
+@@ -180,17 +181,6 @@ void setLogLevelFlagFromEnv();
+ } // namespace detail
+ } // namespace c10
+
+-#if defined(C10_USE_GFLAGS) && defined(C10_USE_GLOG)
+-// When GLOG depends on GFLAGS, these variables are being defined in GLOG
+-// directly via the GFLAGS definition, so we will use DECLARE_* to declare
+-// them, and use them in Caffe2.
+-// GLOG's minloglevel
+-DECLARE_int32(minloglevel);
+-// GLOG's verbose log value.
+-DECLARE_int32(v);
+-// GLOG's logtostderr value
+-DECLARE_bool(logtostderr);
+-#endif // defined(C10_USE_GFLAGS) && defined(C10_USE_GLOG)
+
+ #if !defined(C10_USE_GLOG)
+ // This backward compatibility flags are in order to deal with cases where
+@@ -218,23 +208,13 @@ C10_DEFINE_int(
+     google::GLOG_WARNING,
+     "The minimum log level that caffe2 will output.");
+
+-// Google glog's api does not have an external function that allows one to check
+-// if glog is initialized or not. It does have an internal function - so we are
+-// declaring it here. This is a hack but has been used by a bunch of others too
+-// (e.g. Torch).
+-namespace google {
+-namespace glog_internal_namespace_ {
+-bool IsGoogleLoggingInitialized();
+-} // namespace glog_internal_namespace_
+-} // namespace google
+-
+ namespace c10 {
+ namespace {
+
+ void initGoogleLogging(char const* name) {
+ #if !defined(_MSC_VER)
+   // This trick can only be used on UNIX platforms
+-  if (!::google::glog_internal_namespace_::IsGoogleLoggingInitialized())
++  if (!::google::IsGoogleLoggingInitialized())
+ #endif
+   {
+     ::google::InitGoogleLogging(name);
+diff --git a/torch/csrc/profiler/util.cpp b/torch/csrc/profiler/util.cpp
+index 180555f..3c271ee 100644
+--- a/torch/csrc/profiler/util.cpp
++++ b/torch/csrc/profiler/util.cpp
+@@ -1,5 +1,4 @@
+ #include <torch/csrc/autograd/function.h>
+-#include <torch/csrc/profiler/kineto_shim.h>
+ #include <torch/csrc/profiler/util.h>
+
+ #include <c10/util/ArrayRef.h>
+@@ -7,6 +6,7 @@
+ #include <fmt/format.h>
+
+ #ifdef USE_KINETO
++#include <torch/csrc/profiler/kineto_shim.h>
+ #include <libkineto.h>
+ #endif
+