From 21309542e69e1821ff8e905fa60d8852ac12a73f Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Tue, 2 Oct 2018 16:58:47 -0700 Subject: [PATCH 1/8] add . as include directory --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ca45178b..04a74ccf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,7 +79,7 @@ if (NOT MSVC) install(FILES "${CMAKE_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) endif() -include_directories(${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR}) +include_directories("." ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR}) if (SPM_BUILD_TEST) enable_testing() From 828d58140a756a769023e1e29575d4f819e50f0e Mon Sep 17 00:00:00 2001 From: Anthony Aue Date: Wed, 14 Nov 2018 11:09:25 -0800 Subject: [PATCH 2/8] Messy first attempt at getting a usable windows dll. Cleanup to follow. --- src/CMakeLists.txt | 20 ++++++++++++++++---- src/freelist.h | 2 +- src/sentencepiece_processor.h | 5 +++-- src/unigram_model.h | 4 +++- test.bat | 9 ++++++--- 5 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ebfcaa64..45042aaf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.! - +include(GenerateExportHeader) +set(CMAKE_SYSTEM_VERSION 8.1) find_package(Protobuf REQUIRED) include_directories(${Protobuf_INCLUDE_DIRS}) protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto) @@ -130,6 +131,13 @@ if (SPM_ENABLE_TCMALLOC) endif() endif() +#build shared library with minimal exports, suitable for +#linking on Windows (i.e. do not export all of the stuff in +#the standard library) +if (SPM_ENABLE_SHARED_MINEXPORT) + add_library(sentencepiece_minexport SHARED ${SPM_SRCS}) +endif() + if (SPM_ENABLE_SHARED) add_library(sentencepiece SHARED ${SPM_SRCS}) add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) @@ -146,21 +154,25 @@ if (SPM_ENABLE_SHARED) target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static) set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) - set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) - set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) +# set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) +# set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) if (MSVC) set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib") set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib") + set(CMAKE_CXX_FLAGS "/wd4251 ${CMAKE_CXX_FLAGS}") elseif (MINGW) set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a") set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a") endif() + add_definitions(-DPROTOBUF_USE_DLLS) else() add_library(sentencepiece ALIAS sentencepiece-static) add_library(sentencepiece_train ALIAS sentencepiece_train-static) set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static) endif() +generate_export_header(sentencepiece) + set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece") set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train") @@ -211,7 +223,7 @@ install(TARGETS ${SPM_INSTALLTARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -install(FILES sentencepiece_trainer.h sentencepiece_processor.h +install(FILES sentencepiece_trainer.h sentencepiece_processor.h sentencepiece_exports.h DESTINATION ${CMAKE_INSTALL_INCDIR}) file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir) diff --git a/src/freelist.h b/src/freelist.h index e39c3381..b3e977a6 100644 --- a/src/freelist.h +++ b/src/freelist.h @@ -33,7 +33,7 @@ class FreeList { // `Free` doesn't free the object but reuse the allocated memory chunks. void Free() { - const int size = std::min(chunk_index_ + 1, freelist_.size()); + const int size = std::min(chunk_index_ + 1, freelist_.size()); for (int i = 0; i < size; ++i) { T* chunk = freelist_[i]; memset(chunk, 0, sizeof(*chunk) * chunk_size_); diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h index 61da691d..864ebbea 100644 --- a/src/sentencepiece_processor.h +++ b/src/sentencepiece_processor.h @@ -20,6 +20,7 @@ #include #include #include +#include "sentencepiece_export.h" namespace absl { class string_view; @@ -117,7 +118,7 @@ enum Code { }; } // namespace error -class Status { +class SENTENCEPIECE_EXPORT Status { public: Status(); ~Status(); @@ -160,7 +161,7 @@ class min_string_view { }; } // namespace util -class SentencePieceProcessor { +class SENTENCEPIECE_EXPORT SentencePieceProcessor { public: SentencePieceProcessor(); virtual ~SentencePieceProcessor(); diff --git a/src/unigram_model.h b/src/unigram_model.h index 466a1c24..b9d0aba8 100644 --- a/src/unigram_model.h +++ b/src/unigram_model.h @@ -14,6 +14,7 @@ #ifndef UNIGRAM_MODEL_H_ #define UNIGRAM_MODEL_H_ +#define NOMINMAX #include #include @@ -25,6 +26,7 @@ #include "model_interface.h" #include "sentencepiece_model.pb.h" #include "third_party/darts_clone/darts.h" +#include "sentencepiece_export.h" namespace sentencepiece { namespace unigram { @@ -113,7 +115,7 @@ class Lattice { model::FreeList node_allocator_; }; -class Model : public ModelInterface { +class SENTENCEPIECE_EXPORT Model : public ModelInterface { public: explicit Model(const ModelProto &model_proto); Model() {} diff --git a/test.bat b/test.bat index 5b425080..9cb25a3d 100644 --- a/test.bat +++ b/test.bat @@ -9,16 +9,19 @@ set CURRENT_PATH=%~dp0 set LIBRARY_PATH=%CURRENT_PATH%build\root mkdir build +copy protobuf-cpp-%PROTOBUF_VERSION%.zip build cd build -curl -O -L https://github.com/google/protobuf/releases/download/v%PROTOBUF_VERSION%/protobuf-cpp-%PROTOBUF_VERSION%.zip +rem curl -O -L https://github.com/google/protobuf/releases/download/v%PROTOBUF_VERSION%/protobuf-cpp-%PROTOBUF_VERSION%.zip unzip protobuf-cpp-%PROTOBUF_VERSION%.zip cd protobuf-%PROTOBUF_VERSION%\cmake -cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% || goto :error +cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -DBUILD_SHARED_LIBS=true -Dprotobuf_MSVC_STATIC_RUNTIME=OFF -DCMAKE_SYSTEM_VERSION=8.1 || goto :error +rem cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% || goto :error cmake --build . --config Release --target install || goto :error cd ..\.. -cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% +cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=ON -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -DBUILD_SHARED_LIBS=true -DSPM_ENABLE_SHARED=true -DCMAKE_SYSTEM_VERSION=8.1 +rem cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% cmake --build . --config Release --target install || goto :error ctest -C Release || goto :error cpack || goto :error From e95aca4d88ef9970319b222031fc6cd82f99146a Mon Sep 17 00:00:00 2001 From: Anthony Aue Date: Thu, 15 Nov 2018 14:34:12 -0800 Subject: [PATCH 3/8] Beginning of cleanup. Separating changes required for min_export into separate files since they don't play nice with the existing bits. --- CMakeLists.txt | 11 ++-- CMakeLists_minexport.txt | 102 ++++++++++++++++++++++++++++++++++ src/CMakeLists.txt | 24 +++++--- src/sentencepiece_processor.h | 10 +++- src/unigram_model.h | 4 +- test.bat | 2 +- test_minexport.bat | 47 ++++++++++++++++ 7 files changed, 182 insertions(+), 18 deletions(-) create mode 100644 CMakeLists_minexport.txt create mode 100644 test_minexport.bat diff --git a/CMakeLists.txt b/CMakeLists.txt index 04a74ccf..61fc8e8a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,7 @@ project(sentencepiece VERSION ${SPM_VERSION} LANGUAGES CXX) option(SPM_ENABLE_NFKC_COMPILE "Enables NFKC compile" OFF) option(SPM_ENABLE_SHARED "Builds shared libaries in addition to static libraries." ON) +option(SPM_ENABLE_SHARED_MINEXPORT "Builds dll lib with minimal exports for windows." OFF) option(SPM_BUILD_TEST "Builds test binaries." OFF) option(SPM_COVERAGE "Runs gcov to test coverage." OFF) option(SPM_ENABLE_TENSORFLOW_SHARED "Makes a tensorflow compatible shared file." OFF) @@ -37,10 +38,12 @@ set(includedir "\${prefix}/include") set(GNUCXX_STD_SUPPORT_VERSION "4.3") if(MSVC) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) + #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) + #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_FLAGS_REALEASE} /MD") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_FLAGS_REALEASE} /MD") add_definitions("/wd4267 /wd4244 /wd4305 /Zc:strictStrings /utf-8") endif(MSVC) diff --git a/CMakeLists_minexport.txt b/CMakeLists_minexport.txt new file mode 100644 index 00000000..61fc8e8a --- /dev/null +++ b/CMakeLists_minexport.txt @@ -0,0 +1,102 @@ +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.! + +cmake_minimum_required(VERSION 3.1 FATAL_ERROR) +file(STRINGS "VERSION" SPM_VERSION) +message(STATUS "VERSION: ${SPM_VERSION}") +project(sentencepiece VERSION ${SPM_VERSION} LANGUAGES CXX) + +option(SPM_ENABLE_NFKC_COMPILE "Enables NFKC compile" OFF) +option(SPM_ENABLE_SHARED "Builds shared libaries in addition to static libraries." ON) +option(SPM_ENABLE_SHARED_MINEXPORT "Builds dll lib with minimal exports for windows." OFF) +option(SPM_BUILD_TEST "Builds test binaries." OFF) +option(SPM_COVERAGE "Runs gcov to test coverage." OFF) +option(SPM_ENABLE_TENSORFLOW_SHARED "Makes a tensorflow compatible shared file." OFF) +option(SPM_ENABLE_TCMALLOC "Enable TCMalloc if available." ON) +option(SPM_TCMALLOC_STATIC "Link static library of TCMALLOC." OFF) +option(SPM_NO_THREADLOCAL "Disable thread_local operator" OFF) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + + +set(prefix ${CMAKE_INSTALL_PREFIX}) +set(exec_prefix "\${prefix}") +set(libdir "\${exec_prefix}/lib") +set(includedir "\${prefix}/include") +set(GNUCXX_STD_SUPPORT_VERSION "4.3") + +if(MSVC) + #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) + #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_FLAGS_REALEASE} /MD") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_FLAGS_REALEASE} /MD") + add_definitions("/wd4267 /wd4244 /wd4305 /Zc:strictStrings /utf-8") +endif(MSVC) + +if (APPLE) + set(CMAKE_MACOSX_RPATH ON) + set(CMAKE_SKIP_BUILD_RPATH FALSE) + set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) + if ("${isSystemDir}" STREQUAL "-1") + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + endif() +endif() + +if (NOT DEFINED CMAKE_INSTALL_BINDIR) + set(CMAKE_INSTALL_BINDIR bin) +endif() + +if (NOT DEFINED CMAKE_INSTALL_LIBDIR) + set(CMAKE_INSTALL_LIBDIR lib) +endif() + +if (NOT DEFINED CMAKE_INSTALL_LIBDIR) + set(CMAKE_INSTALL_LIBDIR lib) +endif() + +if (NOT DEFINED CMAKE_INSTALL_INCDIR) + set(CMAKE_INSTALL_INCDIR include) +endif() + +configure_file("${PROJECT_SOURCE_DIR}/config.h.in" "config.h") +configure_file("${PROJECT_SOURCE_DIR}/sentencepiece.pc.in" "sentencepiece.pc" @ONLY) + +if (NOT MSVC) + install(FILES "${CMAKE_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +endif() + +include_directories("." ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR}) + +if (SPM_BUILD_TEST) + enable_testing() +endif() + +add_subdirectory(src) + +set(CPACK_SOURCE_GENERATOR "TXZ") +set(CPACK_GENERATOR "7Z") +set(CPACK_PACKAGE_VERSION "${SPM_VERSION}") +set(CPACK_STRIP_FILES TRUE) +set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE") +set(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md") +set(CPACK_PACKAGE_CONTACT "taku@google.com") +set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Taku Kudo") +set(CPACK_SOURCE_IGNORE_FILES "/build/;/.git/;/dist/;/sdist/;~$;${CPACK_SOURCE_IGNORE_FILES}") +include(CPack) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 45042aaf..3deb7e3f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License.! include(GenerateExportHeader) -set(CMAKE_SYSTEM_VERSION 8.1) find_package(Protobuf REQUIRED) include_directories(${Protobuf_INCLUDE_DIRS}) protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto) @@ -136,6 +135,13 @@ endif() #the standard library) if (SPM_ENABLE_SHARED_MINEXPORT) add_library(sentencepiece_minexport SHARED ${SPM_SRCS}) + target_compile_definitions(sentencepiece_minexport PUBLIC PROTOBUF_USE_DLLS MINEXPORT) + generate_export_header(sentencepiece_minexport) + target_link_libraries(sentencepiece_minexport ${SPM_LIBS}) + if(MSVC) + target_compile_options(sentencepiece_minexport PUBLIC /wd4251) + #set(CMAKE_CXX_FLAGS "/wd4251 ${CMAKE_CXX_FLAGS}") + endif() endif() if (SPM_ENABLE_SHARED) @@ -154,24 +160,22 @@ if (SPM_ENABLE_SHARED) target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static) set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) -# set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) -# set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) + set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) + set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) if (MSVC) set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib") set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib") - set(CMAKE_CXX_FLAGS "/wd4251 ${CMAKE_CXX_FLAGS}") + target_compile_definitions(sentencepiece PUBLIC PROTOBUF_USE_DLLS) elseif (MINGW) set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a") set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a") endif() - add_definitions(-DPROTOBUF_USE_DLLS) else() add_library(sentencepiece ALIAS sentencepiece-static) add_library(sentencepiece_train ALIAS sentencepiece_train-static) set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static) endif() -generate_export_header(sentencepiece) set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece") set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train") @@ -223,7 +227,13 @@ install(TARGETS ${SPM_INSTALLTARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -install(FILES sentencepiece_trainer.h sentencepiece_processor.h sentencepiece_exports.h +install(FILES sentencepiece_trainer.h sentencepiece_processor.h + DESTINATION ${CMAKE_INSTALL_INCDIR}) +if (SPM_ENABLE_SHARED_MINEXPORT) +install(FILES sentencepiece_minexport_export.h + DESTINATION ${CMAKE_INSTALL_INCDIR}) +endif() +install(FILES sentencepiece_trainer.h sentencepiece_processor.h DESTINATION ${CMAKE_INSTALL_INCDIR}) file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir) diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h index 864ebbea..16e20998 100644 --- a/src/sentencepiece_processor.h +++ b/src/sentencepiece_processor.h @@ -20,7 +20,11 @@ #include #include #include -#include "sentencepiece_export.h" +#ifdef MINEXPORT +#include "sentencepiece_minexport_export.h" +#else +#define SENTENCEPIECE_MINEXPORT_EXPORT +#endif namespace absl { class string_view; @@ -118,7 +122,7 @@ enum Code { }; } // namespace error -class SENTENCEPIECE_EXPORT Status { +class SENTENCEPIECE_MINEXPORT_EXPORT Status { public: Status(); ~Status(); @@ -161,7 +165,7 @@ class min_string_view { }; } // namespace util -class SENTENCEPIECE_EXPORT SentencePieceProcessor { +class SENTENCEPIECE_MINEXPORT_EXPORT SentencePieceProcessor { public: SentencePieceProcessor(); virtual ~SentencePieceProcessor(); diff --git a/src/unigram_model.h b/src/unigram_model.h index b9d0aba8..466a1c24 100644 --- a/src/unigram_model.h +++ b/src/unigram_model.h @@ -14,7 +14,6 @@ #ifndef UNIGRAM_MODEL_H_ #define UNIGRAM_MODEL_H_ -#define NOMINMAX #include #include @@ -26,7 +25,6 @@ #include "model_interface.h" #include "sentencepiece_model.pb.h" #include "third_party/darts_clone/darts.h" -#include "sentencepiece_export.h" namespace sentencepiece { namespace unigram { @@ -115,7 +113,7 @@ class Lattice { model::FreeList node_allocator_; }; -class SENTENCEPIECE_EXPORT Model : public ModelInterface { +class Model : public ModelInterface { public: explicit Model(const ModelProto &model_proto); Model() {} diff --git a/test.bat b/test.bat index 9cb25a3d..7fc4ad9f 100644 --- a/test.bat +++ b/test.bat @@ -20,7 +20,7 @@ rem cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% || goto :error cmake --build . --config Release --target install || goto :error cd ..\.. -cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=ON -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -DBUILD_SHARED_LIBS=true -DSPM_ENABLE_SHARED=true -DCMAKE_SYSTEM_VERSION=8.1 +cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=ON -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -DBUILD_SHARED_LIBS=true -DSPM_ENABLE_SHARED=true -DSPM_ENABLE_SHARED_MINEXPORT=ON rem cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% cmake --build . --config Release --target install || goto :error ctest -C Release || goto :error diff --git a/test_minexport.bat b/test_minexport.bat new file mode 100644 index 00000000..7fc4ad9f --- /dev/null +++ b/test_minexport.bat @@ -0,0 +1,47 @@ +set PROTOBUF_VERSION=3.6.1 +set PLATFORM=%1 +if "%PLATFORM%"=="" set PLATFORM=x64 +set PLATFORM_PREFIX= +if "%PLATFORM%"=="x64" set PLATFORM_PREFIX=-x64 +set _CL_=/utf-8 +set PATH=c:\Program Files\Git\usr\bin;c:\MinGW\bin;%PATH% +set CURRENT_PATH=%~dp0 +set LIBRARY_PATH=%CURRENT_PATH%build\root + +mkdir build +copy protobuf-cpp-%PROTOBUF_VERSION%.zip build +cd build + +rem curl -O -L https://github.com/google/protobuf/releases/download/v%PROTOBUF_VERSION%/protobuf-cpp-%PROTOBUF_VERSION%.zip +unzip protobuf-cpp-%PROTOBUF_VERSION%.zip +cd protobuf-%PROTOBUF_VERSION%\cmake +cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -DBUILD_SHARED_LIBS=true -Dprotobuf_MSVC_STATIC_RUNTIME=OFF -DCMAKE_SYSTEM_VERSION=8.1 || goto :error +rem cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% || goto :error +cmake --build . --config Release --target install || goto :error + +cd ..\.. +cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=ON -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -DBUILD_SHARED_LIBS=true -DSPM_ENABLE_SHARED=true -DSPM_ENABLE_SHARED_MINEXPORT=ON +rem cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% +cmake --build . --config Release --target install || goto :error +ctest -C Release || goto :error +cpack || goto :error + +cd ..\python +rem call :BuildPython C:\Python27%PLATFORM_PREFIX% +call :BuildPython C:\Python35%PLATFORM_PREFIX% +call :BuildPython C:\Python36%PLATFORM_PREFIX% +call :BuildPython C:\Python37%PLATFORM_PREFIX% +c:\Python37%PLATFORM_PREFIX%\python setup.py sdist || goto :error +exit + +:BuildPython +%1\python -m pip install wheel || goto :error +%1\python setup.py build || goto :error +%1\python setup.py bdist_wheel || goto :error +%1\python setup.py test || goto :error +rmdir /Q /S build +del /S *.pyd +exit /b + +:error +exit /b %errorlevel% From 9cb6ebbe51f3d501bbcd0b5541af01ad61f7b9e3 Mon Sep 17 00:00:00 2001 From: Anthony Aue Date: Thu, 15 Nov 2018 14:41:30 -0800 Subject: [PATCH 4/8] Revert files to original --- CMakeLists.txt | 11 +- src/CMakeLists.txt | 303 ++++++++--------------------------- src/CMakeLists_minexport.txt | 268 +++++++++++++++++++++++++++++++ test.bat | 9 +- 4 files changed, 342 insertions(+), 249 deletions(-) create mode 100644 src/CMakeLists_minexport.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 61fc8e8a..04a74ccf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,6 @@ project(sentencepiece VERSION ${SPM_VERSION} LANGUAGES CXX) option(SPM_ENABLE_NFKC_COMPILE "Enables NFKC compile" OFF) option(SPM_ENABLE_SHARED "Builds shared libaries in addition to static libraries." ON) -option(SPM_ENABLE_SHARED_MINEXPORT "Builds dll lib with minimal exports for windows." OFF) option(SPM_BUILD_TEST "Builds test binaries." OFF) option(SPM_COVERAGE "Runs gcov to test coverage." OFF) option(SPM_ENABLE_TENSORFLOW_SHARED "Makes a tensorflow compatible shared file." OFF) @@ -38,12 +37,10 @@ set(includedir "\${prefix}/include") set(GNUCXX_STD_SUPPORT_VERSION "4.3") if(MSVC) - #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) - #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) - #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) - #string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_FLAGS_REALEASE} /MD") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_FLAGS_REALEASE} /MD") + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) add_definitions("/wd4267 /wd4244 /wd4305 /Zc:strictStrings /utf-8") endif(MSVC) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3deb7e3f..04a74ccf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,258 +11,89 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.! -include(GenerateExportHeader) -find_package(Protobuf REQUIRED) -include_directories(${Protobuf_INCLUDE_DIRS}) -protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto) -protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto) -include_directories(${CMAKE_CURRENT_BINARY_DIR}) -include_directories(${PROTOBUF_INCLUDE_DIR}) - -set(SPM_SRCS - ${SPM_PROTO_HDRS} - ${SPM_PROTO_SRCS} - ${SPM_MODEL_PROTO_HDRS} - ${SPM_MODEL_PROTO_SRCS} - bpe_model.h - common.h - normalizer.h - util.h - freelist.h - filesystem.h - flags.h - sentencepiece_processor.h - word_model.h - model_factory.h - char_model.h - model_interface.h - testharness.h - unigram_model.h - bpe_model.cc - char_model.cc - error.cc - filesystem.cc - flags.cc - model_factory.cc - model_interface.cc - normalizer.cc - sentencepiece_processor.cc - unigram_model.cc - util.cc - word_model.cc - ../third_party/absl/strings/string_view.cc) - -set(SPM_TRAIN_SRCS - ${SPM_PROTO_HDRS} - ${SPM_MODEL_PROTO_HDRS} - builder.h - normalization_rule.h - unicode_script.h - unicode_script_map.h - trainer_factory.h - trainer_interface.h - unigram_model_trainer.h - word_model_trainer.h - char_model_trainer.h - bpe_model_trainer.h - sentencepiece_trainer.h - builder.cc - unicode_script.cc - trainer_factory.cc - trainer_interface.cc - unigram_model_trainer.cc - word_model_trainer.cc - char_model_trainer.cc - bpe_model_trainer.cc - sentencepiece_trainer.cc) - -set(SPM_TEST_SRCS - ${SPM_PROTO_HDRS} - ${SPM_MODEL_PROTO_HDRS} - testharness.h - bpe_model_test.cc - bpe_model_trainer_test.cc - builder_test.cc - char_model_test.cc - char_model_trainer_test.cc - filesystem_test.cc - flags_test.cc - model_factory_test.cc - model_interface_test.cc - normalizer_test.cc - sentencepiece_processor_test.cc - sentencepiece_trainer_test.cc - test_main.cc - testharness.cc - trainer_factory_test.cc - trainer_interface_test.cc - unicode_script_test.cc - unigram_model_test.cc - unigram_model_trainer_test.cc - util_test.cc - word_model_test.cc - word_model_trainer_test.cc) - -find_package(Threads REQUIRED) - -set(SPM_LIBS ${PROTOBUF_LIBRARY} Threads::Threads) - -if (SPM_ENABLE_NFKC_COMPILE) - find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED) - include_directories(${ICU_INCLUDE_DIRS}) - add_definitions(-DENABLE_NFKC_COMPILE) - list(APPEND SPM_LIBS ICU::i18n ICU::data ICU::uc) -endif() - -if (SPM_ENABLE_TCMALLOC) - if (SPM_TCMALLOC_STATIC) - find_library(TCMALLOC_LIB NAMES libtcmalloc_minimal.a) - else() - find_library(TCMALLOC_LIB NAMES tcmalloc_minimal) - endif() - if (TCMALLOC_LIB) - message(STATUS "Found TCMalloc: ${TCMALLOC_LIB}") - list(APPEND SPM_LIBS ${TCMALLOC_LIB}) - add_definitions(-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free) - else() - message(STATUS "Not Found TCMalloc: ${TCMALLOC_LIB}") +cmake_minimum_required(VERSION 3.1 FATAL_ERROR) +file(STRINGS "VERSION" SPM_VERSION) +message(STATUS "VERSION: ${SPM_VERSION}") +project(sentencepiece VERSION ${SPM_VERSION} LANGUAGES CXX) + +option(SPM_ENABLE_NFKC_COMPILE "Enables NFKC compile" OFF) +option(SPM_ENABLE_SHARED "Builds shared libaries in addition to static libraries." ON) +option(SPM_BUILD_TEST "Builds test binaries." OFF) +option(SPM_COVERAGE "Runs gcov to test coverage." OFF) +option(SPM_ENABLE_TENSORFLOW_SHARED "Makes a tensorflow compatible shared file." OFF) +option(SPM_ENABLE_TCMALLOC "Enable TCMalloc if available." ON) +option(SPM_TCMALLOC_STATIC "Link static library of TCMALLOC." OFF) +option(SPM_NO_THREADLOCAL "Disable thread_local operator" OFF) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + + +set(prefix ${CMAKE_INSTALL_PREFIX}) +set(exec_prefix "\${prefix}") +set(libdir "\${exec_prefix}/lib") +set(includedir "\${prefix}/include") +set(GNUCXX_STD_SUPPORT_VERSION "4.3") + +if(MSVC) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) + add_definitions("/wd4267 /wd4244 /wd4305 /Zc:strictStrings /utf-8") +endif(MSVC) + +if (APPLE) + set(CMAKE_MACOSX_RPATH ON) + set(CMAKE_SKIP_BUILD_RPATH FALSE) + set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) + if ("${isSystemDir}" STREQUAL "-1") + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") endif() endif() -#build shared library with minimal exports, suitable for -#linking on Windows (i.e. do not export all of the stuff in -#the standard library) -if (SPM_ENABLE_SHARED_MINEXPORT) - add_library(sentencepiece_minexport SHARED ${SPM_SRCS}) - target_compile_definitions(sentencepiece_minexport PUBLIC PROTOBUF_USE_DLLS MINEXPORT) - generate_export_header(sentencepiece_minexport) - target_link_libraries(sentencepiece_minexport ${SPM_LIBS}) - if(MSVC) - target_compile_options(sentencepiece_minexport PUBLIC /wd4251) - #set(CMAKE_CXX_FLAGS "/wd4251 ${CMAKE_CXX_FLAGS}") - endif() +if (NOT DEFINED CMAKE_INSTALL_BINDIR) + set(CMAKE_INSTALL_BINDIR bin) endif() -if (SPM_ENABLE_SHARED) - add_library(sentencepiece SHARED ${SPM_SRCS}) - add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) +if (NOT DEFINED CMAKE_INSTALL_LIBDIR) + set(CMAKE_INSTALL_LIBDIR lib) endif() -add_library(sentencepiece-static STATIC ${SPM_SRCS}) -add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS}) - -target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS}) -target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS}) - -if (SPM_ENABLE_SHARED) - target_link_libraries(sentencepiece ${SPM_LIBS}) - target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) - set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static) - set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) - set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) - set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) - if (MSVC) - set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib") - set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib") - target_compile_definitions(sentencepiece PUBLIC PROTOBUF_USE_DLLS) - elseif (MINGW) - set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a") - set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a") - endif() -else() - add_library(sentencepiece ALIAS sentencepiece-static) - add_library(sentencepiece_train ALIAS sentencepiece_train-static) - set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static) +if (NOT DEFINED CMAKE_INSTALL_LIBDIR) + set(CMAKE_INSTALL_LIBDIR lib) endif() - -set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece") -set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train") - -if (NOT MSVC) - if (SPM_COVERAGE) - set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}") - else() - set(CMAKE_CXX_FLAGS "-O3 -Wall -fPIC ${CMAKE_CXX_FLAGS}") - endif() - if (SPM_ENABLE_TENSORFLOW_SHARED) - add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) - endif() - if (SPM_NO_THREADLOCAL) - add_definitions(-DSPM_NO_THREADLOCAL=1) - endif() - set_source_files_properties( - sentencepiece.pb.cc sentencepiece_model.pb.cc - PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation") - set_source_files_properties(${SPM_TEST_SRCS} - PROPERTIES COMPILE_FLAGS "-Wno-sign-compare") - if (SPM_ENABLE_SHARED) - set_property(TARGET sentencepiece APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC") - set_property(TARGET sentencepiece_train APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC") - endif() -endif() - -add_executable(spm_encode spm_encode_main.cc) -add_executable(spm_decode spm_decode_main.cc) -add_executable(spm_normalize spm_normalize_main.cc) -add_executable(spm_train spm_train_main.cc) -add_executable(spm_export_vocab spm_export_vocab_main.cc) - -target_link_libraries(spm_encode sentencepiece) -target_link_libraries(spm_decode sentencepiece) -target_link_libraries(spm_normalize sentencepiece sentencepiece_train) -target_link_libraries(spm_train sentencepiece sentencepiece_train) -target_link_libraries(spm_export_vocab sentencepiece) - -if (SPM_ENABLE_NFKC_COMPILE) - add_executable(compile_charsmap compile_charsmap_main.cc) - target_link_libraries(compile_charsmap sentencepiece sentencepiece_train) +if (NOT DEFINED CMAKE_INSTALL_INCDIR) + set(CMAKE_INSTALL_INCDIR include) endif() -list(APPEND SPM_INSTALLTARGETS - spm_encode spm_decode spm_normalize spm_train spm_export_vocab) +configure_file("${PROJECT_SOURCE_DIR}/config.h.in" "config.h") +configure_file("${PROJECT_SOURCE_DIR}/sentencepiece.pc.in" "sentencepiece.pc" @ONLY) -install(TARGETS ${SPM_INSTALLTARGETS} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -install(FILES sentencepiece_trainer.h sentencepiece_processor.h - DESTINATION ${CMAKE_INSTALL_INCDIR}) -if (SPM_ENABLE_SHARED_MINEXPORT) -install(FILES sentencepiece_minexport_export.h - DESTINATION ${CMAKE_INSTALL_INCDIR}) +if (NOT MSVC) + install(FILES "${CMAKE_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) endif() -install(FILES sentencepiece_trainer.h sentencepiece_processor.h - DESTINATION ${CMAKE_INSTALL_INCDIR}) -file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir) +include_directories("." ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR}) -if (SPM_BUILD_TEST OR SPM_COVERAGE) +if (SPM_BUILD_TEST) enable_testing() - add_executable(spm_test test_main.cc ${SPM_TEST_SRCS}) - - if (SPM_COVERAGE) - target_link_libraries(spm_test sentencepiece sentencepiece_train "-lgcov") - else() - target_link_libraries(spm_test sentencepiece sentencepiece_train) - endif() - - set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --show-leak-kinds=definite,possible --error-exitcode=1") - find_program(CTEST_MEMORYCHECK_COMMAND NAMES valgrind) - include(Dart) - - add_test(NAME sentencepiece_test - COMMAND $ --data_dir=${data_dir}) endif() -if (SPM_COVERAGE) - add_custom_target(coverage - COMMAND mkdir -p coverage - COMMAND $ --data_dir=${data_dir} - COMMAND lcov -c -d . -o coverage.info - COMMAND lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info - COMMAND mkdir -p lcov_html - COMMAND genhtml -o lcov_html coverage.info) - add_dependencies(coverage spm_test) -endif() +add_subdirectory(src) + +set(CPACK_SOURCE_GENERATOR "TXZ") +set(CPACK_GENERATOR "7Z") +set(CPACK_PACKAGE_VERSION "${SPM_VERSION}") +set(CPACK_STRIP_FILES TRUE) +set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE") +set(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md") +set(CPACK_PACKAGE_CONTACT "taku@google.com") +set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Taku Kudo") +set(CPACK_SOURCE_IGNORE_FILES "/build/;/.git/;/dist/;/sdist/;~$;${CPACK_SOURCE_IGNORE_FILES}") +include(CPack) diff --git a/src/CMakeLists_minexport.txt b/src/CMakeLists_minexport.txt new file mode 100644 index 00000000..3deb7e3f --- /dev/null +++ b/src/CMakeLists_minexport.txt @@ -0,0 +1,268 @@ +# Copyright 2018 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.! +include(GenerateExportHeader) +find_package(Protobuf REQUIRED) +include_directories(${Protobuf_INCLUDE_DIRS}) +protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto) +protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto) + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${PROTOBUF_INCLUDE_DIR}) + +set(SPM_SRCS + ${SPM_PROTO_HDRS} + ${SPM_PROTO_SRCS} + ${SPM_MODEL_PROTO_HDRS} + ${SPM_MODEL_PROTO_SRCS} + bpe_model.h + common.h + normalizer.h + util.h + freelist.h + filesystem.h + flags.h + sentencepiece_processor.h + word_model.h + model_factory.h + char_model.h + model_interface.h + testharness.h + unigram_model.h + bpe_model.cc + char_model.cc + error.cc + filesystem.cc + flags.cc + model_factory.cc + model_interface.cc + normalizer.cc + sentencepiece_processor.cc + unigram_model.cc + util.cc + word_model.cc + ../third_party/absl/strings/string_view.cc) + +set(SPM_TRAIN_SRCS + ${SPM_PROTO_HDRS} + ${SPM_MODEL_PROTO_HDRS} + builder.h + normalization_rule.h + unicode_script.h + unicode_script_map.h + trainer_factory.h + trainer_interface.h + unigram_model_trainer.h + word_model_trainer.h + char_model_trainer.h + bpe_model_trainer.h + sentencepiece_trainer.h + builder.cc + unicode_script.cc + trainer_factory.cc + trainer_interface.cc + unigram_model_trainer.cc + word_model_trainer.cc + char_model_trainer.cc + bpe_model_trainer.cc + sentencepiece_trainer.cc) + +set(SPM_TEST_SRCS + ${SPM_PROTO_HDRS} + ${SPM_MODEL_PROTO_HDRS} + testharness.h + bpe_model_test.cc + bpe_model_trainer_test.cc + builder_test.cc + char_model_test.cc + char_model_trainer_test.cc + filesystem_test.cc + flags_test.cc + model_factory_test.cc + model_interface_test.cc + normalizer_test.cc + sentencepiece_processor_test.cc + sentencepiece_trainer_test.cc + test_main.cc + testharness.cc + trainer_factory_test.cc + trainer_interface_test.cc + unicode_script_test.cc + unigram_model_test.cc + unigram_model_trainer_test.cc + util_test.cc + word_model_test.cc + word_model_trainer_test.cc) + +find_package(Threads REQUIRED) + +set(SPM_LIBS ${PROTOBUF_LIBRARY} Threads::Threads) + +if (SPM_ENABLE_NFKC_COMPILE) + find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED) + include_directories(${ICU_INCLUDE_DIRS}) + add_definitions(-DENABLE_NFKC_COMPILE) + list(APPEND SPM_LIBS ICU::i18n ICU::data ICU::uc) +endif() + +if (SPM_ENABLE_TCMALLOC) + if (SPM_TCMALLOC_STATIC) + find_library(TCMALLOC_LIB NAMES libtcmalloc_minimal.a) + else() + find_library(TCMALLOC_LIB NAMES tcmalloc_minimal) + endif() + if (TCMALLOC_LIB) + message(STATUS "Found TCMalloc: ${TCMALLOC_LIB}") + list(APPEND SPM_LIBS ${TCMALLOC_LIB}) + add_definitions(-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free) + else() + message(STATUS "Not Found TCMalloc: ${TCMALLOC_LIB}") + endif() +endif() + +#build shared library with minimal exports, suitable for +#linking on Windows (i.e. do not export all of the stuff in +#the standard library) +if (SPM_ENABLE_SHARED_MINEXPORT) + add_library(sentencepiece_minexport SHARED ${SPM_SRCS}) + target_compile_definitions(sentencepiece_minexport PUBLIC PROTOBUF_USE_DLLS MINEXPORT) + generate_export_header(sentencepiece_minexport) + target_link_libraries(sentencepiece_minexport ${SPM_LIBS}) + if(MSVC) + target_compile_options(sentencepiece_minexport PUBLIC /wd4251) + #set(CMAKE_CXX_FLAGS "/wd4251 ${CMAKE_CXX_FLAGS}") + endif() +endif() + +if (SPM_ENABLE_SHARED) + add_library(sentencepiece SHARED ${SPM_SRCS}) + add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) +endif() + +add_library(sentencepiece-static STATIC ${SPM_SRCS}) +add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS}) + +target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS}) +target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS}) + +if (SPM_ENABLE_SHARED) + target_link_libraries(sentencepiece ${SPM_LIBS}) + target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) + set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static) + set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) + set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) + set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) + if (MSVC) + set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib") + set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib") + target_compile_definitions(sentencepiece PUBLIC PROTOBUF_USE_DLLS) + elseif (MINGW) + set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a") + set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a") + endif() +else() + add_library(sentencepiece ALIAS sentencepiece-static) + add_library(sentencepiece_train ALIAS sentencepiece_train-static) + set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static) +endif() + + +set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece") +set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train") + +if (NOT MSVC) + if (SPM_COVERAGE) + set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}") + else() + set(CMAKE_CXX_FLAGS "-O3 -Wall -fPIC ${CMAKE_CXX_FLAGS}") + endif() + if (SPM_ENABLE_TENSORFLOW_SHARED) + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + endif() + if (SPM_NO_THREADLOCAL) + add_definitions(-DSPM_NO_THREADLOCAL=1) + endif() + set_source_files_properties( + sentencepiece.pb.cc sentencepiece_model.pb.cc + PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation") + set_source_files_properties(${SPM_TEST_SRCS} + PROPERTIES COMPILE_FLAGS "-Wno-sign-compare") + if (SPM_ENABLE_SHARED) + set_property(TARGET sentencepiece APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC") + set_property(TARGET sentencepiece_train APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC") + endif() +endif() + +add_executable(spm_encode spm_encode_main.cc) +add_executable(spm_decode spm_decode_main.cc) +add_executable(spm_normalize spm_normalize_main.cc) +add_executable(spm_train spm_train_main.cc) +add_executable(spm_export_vocab spm_export_vocab_main.cc) + +target_link_libraries(spm_encode sentencepiece) +target_link_libraries(spm_decode sentencepiece) +target_link_libraries(spm_normalize sentencepiece sentencepiece_train) +target_link_libraries(spm_train sentencepiece sentencepiece_train) +target_link_libraries(spm_export_vocab sentencepiece) + +if (SPM_ENABLE_NFKC_COMPILE) + add_executable(compile_charsmap compile_charsmap_main.cc) + target_link_libraries(compile_charsmap sentencepiece sentencepiece_train) +endif() + +list(APPEND SPM_INSTALLTARGETS + spm_encode spm_decode spm_normalize spm_train spm_export_vocab) + +install(TARGETS ${SPM_INSTALLTARGETS} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +install(FILES sentencepiece_trainer.h sentencepiece_processor.h + DESTINATION ${CMAKE_INSTALL_INCDIR}) +if (SPM_ENABLE_SHARED_MINEXPORT) +install(FILES sentencepiece_minexport_export.h + DESTINATION ${CMAKE_INSTALL_INCDIR}) +endif() +install(FILES sentencepiece_trainer.h sentencepiece_processor.h + DESTINATION ${CMAKE_INSTALL_INCDIR}) + +file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir) + +if (SPM_BUILD_TEST OR SPM_COVERAGE) + enable_testing() + add_executable(spm_test test_main.cc ${SPM_TEST_SRCS}) + + if (SPM_COVERAGE) + target_link_libraries(spm_test sentencepiece sentencepiece_train "-lgcov") + else() + target_link_libraries(spm_test sentencepiece sentencepiece_train) + endif() + + set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --show-leak-kinds=definite,possible --error-exitcode=1") + find_program(CTEST_MEMORYCHECK_COMMAND NAMES valgrind) + include(Dart) + + add_test(NAME sentencepiece_test + COMMAND $ --data_dir=${data_dir}) +endif() + +if (SPM_COVERAGE) + add_custom_target(coverage + COMMAND mkdir -p coverage + COMMAND $ --data_dir=${data_dir} + COMMAND lcov -c -d . -o coverage.info + COMMAND lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info + COMMAND mkdir -p lcov_html + COMMAND genhtml -o lcov_html coverage.info) + add_dependencies(coverage spm_test) +endif() diff --git a/test.bat b/test.bat index 7fc4ad9f..5b425080 100644 --- a/test.bat +++ b/test.bat @@ -9,19 +9,16 @@ set CURRENT_PATH=%~dp0 set LIBRARY_PATH=%CURRENT_PATH%build\root mkdir build -copy protobuf-cpp-%PROTOBUF_VERSION%.zip build cd build -rem curl -O -L https://github.com/google/protobuf/releases/download/v%PROTOBUF_VERSION%/protobuf-cpp-%PROTOBUF_VERSION%.zip +curl -O -L https://github.com/google/protobuf/releases/download/v%PROTOBUF_VERSION%/protobuf-cpp-%PROTOBUF_VERSION%.zip unzip protobuf-cpp-%PROTOBUF_VERSION%.zip cd protobuf-%PROTOBUF_VERSION%\cmake -cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -DBUILD_SHARED_LIBS=true -Dprotobuf_MSVC_STATIC_RUNTIME=OFF -DCMAKE_SYSTEM_VERSION=8.1 || goto :error -rem cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% || goto :error +cmake . -A %PLATFORM% -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% || goto :error cmake --build . --config Release --target install || goto :error cd ..\.. -cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=ON -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -DBUILD_SHARED_LIBS=true -DSPM_ENABLE_SHARED=true -DSPM_ENABLE_SHARED_MINEXPORT=ON -rem cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% +cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% cmake --build . --config Release --target install || goto :error ctest -C Release || goto :error cpack || goto :error From b0831263fac5426a84dbc4cb9c9b58174731ecd7 Mon Sep 17 00:00:00 2001 From: Anthony Aue Date: Thu, 15 Nov 2018 15:25:55 -0800 Subject: [PATCH 5/8] Fix bad revert --- src/CMakeLists.txt | 283 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 215 insertions(+), 68 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 04a74ccf..ebfcaa64 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,88 +12,235 @@ # See the License for the specific language governing permissions and # limitations under the License.! -cmake_minimum_required(VERSION 3.1 FATAL_ERROR) -file(STRINGS "VERSION" SPM_VERSION) -message(STATUS "VERSION: ${SPM_VERSION}") -project(sentencepiece VERSION ${SPM_VERSION} LANGUAGES CXX) - -option(SPM_ENABLE_NFKC_COMPILE "Enables NFKC compile" OFF) -option(SPM_ENABLE_SHARED "Builds shared libaries in addition to static libraries." ON) -option(SPM_BUILD_TEST "Builds test binaries." OFF) -option(SPM_COVERAGE "Runs gcov to test coverage." OFF) -option(SPM_ENABLE_TENSORFLOW_SHARED "Makes a tensorflow compatible shared file." OFF) -option(SPM_ENABLE_TCMALLOC "Enable TCMalloc if available." ON) -option(SPM_TCMALLOC_STATIC "Link static library of TCMALLOC." OFF) -option(SPM_NO_THREADLOCAL "Disable thread_local operator" OFF) - -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - - -set(prefix ${CMAKE_INSTALL_PREFIX}) -set(exec_prefix "\${prefix}") -set(libdir "\${exec_prefix}/lib") -set(includedir "\${prefix}/include") -set(GNUCXX_STD_SUPPORT_VERSION "4.3") - -if(MSVC) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) - add_definitions("/wd4267 /wd4244 /wd4305 /Zc:strictStrings /utf-8") -endif(MSVC) - -if (APPLE) - set(CMAKE_MACOSX_RPATH ON) - set(CMAKE_SKIP_BUILD_RPATH FALSE) - set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) - set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") - set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) - list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) - if ("${isSystemDir}" STREQUAL "-1") - set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") +find_package(Protobuf REQUIRED) +include_directories(${Protobuf_INCLUDE_DIRS}) +protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto) +protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto) + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${PROTOBUF_INCLUDE_DIR}) + +set(SPM_SRCS + ${SPM_PROTO_HDRS} + ${SPM_PROTO_SRCS} + ${SPM_MODEL_PROTO_HDRS} + ${SPM_MODEL_PROTO_SRCS} + bpe_model.h + common.h + normalizer.h + util.h + freelist.h + filesystem.h + flags.h + sentencepiece_processor.h + word_model.h + model_factory.h + char_model.h + model_interface.h + testharness.h + unigram_model.h + bpe_model.cc + char_model.cc + error.cc + filesystem.cc + flags.cc + model_factory.cc + model_interface.cc + normalizer.cc + sentencepiece_processor.cc + unigram_model.cc + util.cc + word_model.cc + ../third_party/absl/strings/string_view.cc) + +set(SPM_TRAIN_SRCS + ${SPM_PROTO_HDRS} + ${SPM_MODEL_PROTO_HDRS} + builder.h + normalization_rule.h + unicode_script.h + unicode_script_map.h + trainer_factory.h + trainer_interface.h + unigram_model_trainer.h + word_model_trainer.h + char_model_trainer.h + bpe_model_trainer.h + sentencepiece_trainer.h + builder.cc + unicode_script.cc + trainer_factory.cc + trainer_interface.cc + unigram_model_trainer.cc + word_model_trainer.cc + char_model_trainer.cc + bpe_model_trainer.cc + sentencepiece_trainer.cc) + +set(SPM_TEST_SRCS + ${SPM_PROTO_HDRS} + ${SPM_MODEL_PROTO_HDRS} + testharness.h + bpe_model_test.cc + bpe_model_trainer_test.cc + builder_test.cc + char_model_test.cc + char_model_trainer_test.cc + filesystem_test.cc + flags_test.cc + model_factory_test.cc + model_interface_test.cc + normalizer_test.cc + sentencepiece_processor_test.cc + sentencepiece_trainer_test.cc + test_main.cc + testharness.cc + trainer_factory_test.cc + trainer_interface_test.cc + unicode_script_test.cc + unigram_model_test.cc + unigram_model_trainer_test.cc + util_test.cc + word_model_test.cc + word_model_trainer_test.cc) + +find_package(Threads REQUIRED) + +set(SPM_LIBS ${PROTOBUF_LIBRARY} Threads::Threads) + +if (SPM_ENABLE_NFKC_COMPILE) + find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED) + include_directories(${ICU_INCLUDE_DIRS}) + add_definitions(-DENABLE_NFKC_COMPILE) + list(APPEND SPM_LIBS ICU::i18n ICU::data ICU::uc) +endif() + +if (SPM_ENABLE_TCMALLOC) + if (SPM_TCMALLOC_STATIC) + find_library(TCMALLOC_LIB NAMES libtcmalloc_minimal.a) + else() + find_library(TCMALLOC_LIB NAMES tcmalloc_minimal) + endif() + if (TCMALLOC_LIB) + message(STATUS "Found TCMalloc: ${TCMALLOC_LIB}") + list(APPEND SPM_LIBS ${TCMALLOC_LIB}) + add_definitions(-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free) + else() + message(STATUS "Not Found TCMalloc: ${TCMALLOC_LIB}") endif() endif() -if (NOT DEFINED CMAKE_INSTALL_BINDIR) - set(CMAKE_INSTALL_BINDIR bin) +if (SPM_ENABLE_SHARED) + add_library(sentencepiece SHARED ${SPM_SRCS}) + add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) endif() -if (NOT DEFINED CMAKE_INSTALL_LIBDIR) - set(CMAKE_INSTALL_LIBDIR lib) +add_library(sentencepiece-static STATIC ${SPM_SRCS}) +add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS}) + +target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS}) +target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS}) + +if (SPM_ENABLE_SHARED) + target_link_libraries(sentencepiece ${SPM_LIBS}) + target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) + set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static) + set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) + set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) + set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) + if (MSVC) + set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib") + set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib") + elseif (MINGW) + set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a") + set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a") + endif() +else() + add_library(sentencepiece ALIAS sentencepiece-static) + add_library(sentencepiece_train ALIAS sentencepiece_train-static) + set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static) endif() -if (NOT DEFINED CMAKE_INSTALL_LIBDIR) - set(CMAKE_INSTALL_LIBDIR lib) +set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece") +set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train") + +if (NOT MSVC) + if (SPM_COVERAGE) + set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}") + else() + set(CMAKE_CXX_FLAGS "-O3 -Wall -fPIC ${CMAKE_CXX_FLAGS}") + endif() + if (SPM_ENABLE_TENSORFLOW_SHARED) + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + endif() + if (SPM_NO_THREADLOCAL) + add_definitions(-DSPM_NO_THREADLOCAL=1) + endif() + set_source_files_properties( + sentencepiece.pb.cc sentencepiece_model.pb.cc + PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation") + set_source_files_properties(${SPM_TEST_SRCS} + PROPERTIES COMPILE_FLAGS "-Wno-sign-compare") + if (SPM_ENABLE_SHARED) + set_property(TARGET sentencepiece APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC") + set_property(TARGET sentencepiece_train APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC") + endif() endif() -if (NOT DEFINED CMAKE_INSTALL_INCDIR) - set(CMAKE_INSTALL_INCDIR include) +add_executable(spm_encode spm_encode_main.cc) +add_executable(spm_decode spm_decode_main.cc) +add_executable(spm_normalize spm_normalize_main.cc) +add_executable(spm_train spm_train_main.cc) +add_executable(spm_export_vocab spm_export_vocab_main.cc) + +target_link_libraries(spm_encode sentencepiece) +target_link_libraries(spm_decode sentencepiece) +target_link_libraries(spm_normalize sentencepiece sentencepiece_train) +target_link_libraries(spm_train sentencepiece sentencepiece_train) +target_link_libraries(spm_export_vocab sentencepiece) + +if (SPM_ENABLE_NFKC_COMPILE) + add_executable(compile_charsmap compile_charsmap_main.cc) + target_link_libraries(compile_charsmap sentencepiece sentencepiece_train) endif() -configure_file("${PROJECT_SOURCE_DIR}/config.h.in" "config.h") -configure_file("${PROJECT_SOURCE_DIR}/sentencepiece.pc.in" "sentencepiece.pc" @ONLY) +list(APPEND SPM_INSTALLTARGETS + spm_encode spm_decode spm_normalize spm_train spm_export_vocab) -if (NOT MSVC) - install(FILES "${CMAKE_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) -endif() +install(TARGETS ${SPM_INSTALLTARGETS} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +install(FILES sentencepiece_trainer.h sentencepiece_processor.h + DESTINATION ${CMAKE_INSTALL_INCDIR}) -include_directories("." ${CMAKE_SOURCE_DIR} ${PROJECT_BINARY_DIR}) +file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir) -if (SPM_BUILD_TEST) +if (SPM_BUILD_TEST OR SPM_COVERAGE) enable_testing() + add_executable(spm_test test_main.cc ${SPM_TEST_SRCS}) + + if (SPM_COVERAGE) + target_link_libraries(spm_test sentencepiece sentencepiece_train "-lgcov") + else() + target_link_libraries(spm_test sentencepiece sentencepiece_train) + endif() + + set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --show-leak-kinds=definite,possible --error-exitcode=1") + find_program(CTEST_MEMORYCHECK_COMMAND NAMES valgrind) + include(Dart) + + add_test(NAME sentencepiece_test + COMMAND $ --data_dir=${data_dir}) endif() -add_subdirectory(src) - -set(CPACK_SOURCE_GENERATOR "TXZ") -set(CPACK_GENERATOR "7Z") -set(CPACK_PACKAGE_VERSION "${SPM_VERSION}") -set(CPACK_STRIP_FILES TRUE) -set(CPACK_RESOURCE_FILE_LICENSE "${PROJECT_SOURCE_DIR}/LICENSE") -set(CPACK_RESOURCE_FILE_README "${PROJECT_SOURCE_DIR}/README.md") -set(CPACK_PACKAGE_CONTACT "taku@google.com") -set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Taku Kudo") -set(CPACK_SOURCE_IGNORE_FILES "/build/;/.git/;/dist/;/sdist/;~$;${CPACK_SOURCE_IGNORE_FILES}") -include(CPack) +if (SPM_COVERAGE) + add_custom_target(coverage + COMMAND mkdir -p coverage + COMMAND $ --data_dir=${data_dir} + COMMAND lcov -c -d . -o coverage.info + COMMAND lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info + COMMAND mkdir -p lcov_html + COMMAND genhtml -o lcov_html coverage.info) + add_dependencies(coverage spm_test) +endif() From dab002065a53a7622e2bfc2e972c76244b653ac3 Mon Sep 17 00:00:00 2001 From: Anthony Aue Date: Thu, 15 Nov 2018 21:46:39 -0800 Subject: [PATCH 6/8] Add some documentation to the new parallel make files --- CMakeLists_minexport.txt | 10 ++++++++++ src/CMakeLists_minexport.txt | 11 +++++++++++ test_minexport.bat | 8 ++++++++ 3 files changed, 29 insertions(+) diff --git a/CMakeLists_minexport.txt b/CMakeLists_minexport.txt index 61fc8e8a..bd1fecfd 100644 --- a/CMakeLists_minexport.txt +++ b/CMakeLists_minexport.txt @@ -12,6 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License.! +# This can be used as a replacement to the existing CMakeLists.txt for +# the purposes of building a shared sentencepiece dll with MSVC machines. +# The default shared library build for MSVC exports all of the standard +# library symbols, making it unlinkable for most purposes in practice. + +# To build on Windows with MSVC, rename CMakeLists_minexport.txt in +# both the root dir and the src dir to CMakeLists.txt and run test_minexport.bat. +# Note that this only builds the new target, sentencepiece_export.dll|lib +# correctly. I have not tested the other build targets. + cmake_minimum_required(VERSION 3.1 FATAL_ERROR) file(STRINGS "VERSION" SPM_VERSION) message(STATUS "VERSION: ${SPM_VERSION}") diff --git a/src/CMakeLists_minexport.txt b/src/CMakeLists_minexport.txt index 3deb7e3f..7ca5d91a 100644 --- a/src/CMakeLists_minexport.txt +++ b/src/CMakeLists_minexport.txt @@ -11,6 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License.! + +# This can be used as a replacement to the existing CMakeLists.txt for +# the purposes of building a shared sentencepiece dll with MSVC machines. +# The default shared library build for MSVC exports all of the standard +# library symbols, making it unlinkable for most purposes in practice. + +# To build on Windows with MSVC, rename CMakeLists_minexport.txt in +# both the root dir and the src dir to CMakeLists.txt and run test_minexport.bat. +# Note that this only builds the new target, sentencepiece_export.dll|lib +# correctly. I have not tested the other build targets. + include(GenerateExportHeader) find_package(Protobuf REQUIRED) include_directories(${Protobuf_INCLUDE_DIRS}) diff --git a/test_minexport.bat b/test_minexport.bat index 7fc4ad9f..d007d08e 100644 --- a/test_minexport.bat +++ b/test_minexport.bat @@ -1,3 +1,11 @@ +rem This is a version of the Windows build script (test.bat) that builds a minimal +rem export library of sentencepiece in a shared dll that is linkable to other projects +rem on Windows machines. I tried for way too long to make my changes play nicely with +rem the existing cmake files, but was unsuccessful, so I opted to change in parallel +rem files instead. This file and the two CMakeLists_minexport.txt files in the root +rem directory and the src directory allow the creation of a shared Windows library +rem that does not export all of the symbols in the standard library. + set PROTOBUF_VERSION=3.6.1 set PLATFORM=%1 if "%PLATFORM%"=="" set PLATFORM=x64 From 9a94870db127216321680bb33360c1fbc83b67ae Mon Sep 17 00:00:00 2001 From: Marcin Junczys-Dowmunt Date: Fri, 30 Nov 2018 10:54:15 -0800 Subject: [PATCH 7/8] do not warn about unused vars in generated code --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ebfcaa64..0afc0bd5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -178,7 +178,7 @@ if (NOT MSVC) endif() set_source_files_properties( sentencepiece.pb.cc sentencepiece_model.pb.cc - PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation") + PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation -Wno-unused-variable") set_source_files_properties(${SPM_TEST_SRCS} PROPERTIES COMPILE_FLAGS "-Wno-sign-compare") if (SPM_ENABLE_SHARED) From 399bad125ae50cd85783e618509e994658209a82 Mon Sep 17 00:00:00 2001 From: Anthony Aue Date: Tue, 11 Dec 2018 12:09:37 -0800 Subject: [PATCH 8/8] Document minexport build --- readme_minexport_windows.txt | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 readme_minexport_windows.txt diff --git a/readme_minexport_windows.txt b/readme_minexport_windows.txt new file mode 100644 index 00000000..f4081d75 --- /dev/null +++ b/readme_minexport_windows.txt @@ -0,0 +1,32 @@ +To recreate these bits: + +- git clone https://github.com/marian-nmt/sentencepiece.git +- cd sentencepiece +- git checkout anthonyaue/min_export_dll (or wait for this to go into master +branch and just pull that). +- run test.bat. This will build the statically-linked sentencepiece binaries. +- copy build\src\Release\*.exe \usr\tmp\sentencepiece_pack\bin +- copy build\src\Release\*.lib \usr\tmp\sentencepiece_pack\lib\amd64 +- copy src\*.h \usr\tmp\sentencepiece_pack\include +- rmdir /s build +- rename CMakeLists.txt CMakeLists.txt.stock +- rename src\CMakeLists.txt src\CMakeLists.txt.stock +- copy CMakeLists_minexport.txt CMakeLists.txt +- copy src\CMakeLists_minexport.txt src\CMakeLists.txt +- run test_minexport.bat. This will build the dynamically-linked minimum + export library (needed for the managed wrapper). It fails to build a bunch + of other targets, but will build the parts you need. I spent way too much + time trying to get this to work, but I suck at CMake and CMake sucks at + Visual Studio / Windows so we're left with this inelegant hack. Sorry. + When prompted about unzipping protobuf, choose 'N' because you've already + done this. +- copy build\src\Release\sentencepiece_minexport.dll to + \usr\tmp\sentencepiece_pack\bin +- copy build\src\Release\sentencepiece_minexport.lib to + \usr\tmp\sentencepiece_pack\lib +- copy protobuf-3.6.1\cmake\Release\libprotobuf.dll to + \usr\tmp\sentencepiece_pack\bin +- Create your package (cd \usr\tmp; packagecreate SentencePiece + \usr\tmp\sentencepiece_pack VER) +- Update corext.config as appropriate. +