From 902cc770ed9b3416841494cdcc67833150cc4b91 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 20 Feb 2024 16:18:10 +1100 Subject: [PATCH 01/10] sentencepiece 0.2.0 --- recipe/meta.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 1cfefc00..96a0ccff 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,4 +1,4 @@ -{% set version = "0.1.99" %} +{% set version = "0.2.0" %} package: name: sentencepiece-split @@ -6,7 +6,7 @@ package: source: url: https://github.com/google/sentencepiece/archive/refs/tags/v{{ version }}.tar.gz - sha256: 63617eaf56c7a3857597dcd8780461f57dd21381b56a27716ef7d7e02e14ced4 + sha256: 9970f0a0afee1648890293321665e5b2efa04eaec9f1671fcf8048f456f5bb86 patches: # trying to build both static & shared build seems to break on OSX - patches/0001-do-not-mix-static-shared-builds.patch @@ -27,7 +27,7 @@ source: - patches/0008-create-and-install-CMake-metadata.patch build: - number: 6 + number: 0 requirements: build: From a0abb83598c93ea8617d01133392dc159794df3f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 20 Feb 2024 16:37:59 +1100 Subject: [PATCH 02/10] rebase patches * drop abseil workarounds to use new upstream option SPM_ABSL_PROVIDER * drop patches messing with CMAKE_INSTALL_* * link libprotobuf-lite instead of libprotobuf as intended by upstream --- recipe/meta.yaml | 13 +- ...0001-do-not-mix-static-shared-builds.patch | 16 +- ...ild-vendored-abseil-libprotobuf-lite.patch | 51 +- ...-absolute-paths-for-CMAKE_INSTALL_-D.patch | 46 - ...ibs-headers-for-windows-in-setup.py.patch} | 12 +- .../0004-ACTUALLY-use-external-absl.patch | 1108 ---------- ...install-pkg-config-files-on-windows.patch} | 8 +- ...5-create-and-install-CMake-metadata.patch} | 20 +- ...sp-glue-code-belongs-in-third_party-.patch | 1774 ----------------- 9 files changed, 38 insertions(+), 3010 deletions(-) delete mode 100644 recipe/patches/0003-consistently-use-absolute-paths-for-CMAKE_INSTALL_-D.patch rename recipe/patches/{0006-point-to-our-libs-headers-for-windows-in-setup.py.patch => 0003-point-to-our-libs-headers-for-windows-in-setup.py.patch} (88%) delete mode 100644 recipe/patches/0004-ACTUALLY-use-external-absl.patch rename recipe/patches/{0007-also-install-pkg-config-files-on-windows.patch => 0004-also-install-pkg-config-files-on-windows.patch} (76%) rename recipe/patches/{0008-create-and-install-CMake-metadata.patch => 0005-create-and-install-CMake-metadata.patch} (88%) delete mode 100644 recipe/patches/0005-stop-pretending-sp-glue-code-belongs-in-third_party-.patch diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 96a0ccff..8bff7675 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -12,19 +12,12 @@ source: - patches/0001-do-not-mix-static-shared-builds.patch # unvendor abseil & protobuf-lite - patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch - # avoid installing into $PREFIX/lib64; make sure pkg-config template is filled correctly - - patches/0003-consistently-use-absolute-paths-for-CMAKE_INSTALL_-D.patch - # upstream sources still directly include third_party/absl... - - patches/0004-ACTUALLY-use-external-absl.patch - # upstream carries its own glue code under third_party/absl/flags; - # move it to the sentencepiece sources - - patches/0005-stop-pretending-sp-glue-code-belongs-in-third_party-.patch # ensure python bindings link to correct libs on windows - - patches/0006-point-to-our-libs-headers-for-windows-in-setup.py.patch + - patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch # install pkg-config metadata also on windows - - patches/0007-also-install-pkg-config-files-on-windows.patch + - patches/0004-also-install-pkg-config-files-on-windows.patch # install CMake metadata - - patches/0008-create-and-install-CMake-metadata.patch + - patches/0005-create-and-install-CMake-metadata.patch build: number: 0 diff --git a/recipe/patches/0001-do-not-mix-static-shared-builds.patch b/recipe/patches/0001-do-not-mix-static-shared-builds.patch index c0e46543..79b94a30 100644 --- a/recipe/patches/0001-do-not-mix-static-shared-builds.patch +++ b/recipe/patches/0001-do-not-mix-static-shared-builds.patch @@ -1,20 +1,20 @@ -From df7925b506acf44eb3f06ce94e812c8a23f83699 Mon Sep 17 00:00:00 2001 +From 2fe3e37744c810590e631c01fb57133080fc5f46 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 2 Dec 2021 08:39:53 +1100 -Subject: [PATCH 1/8] do not mix static & shared builds +Subject: [PATCH 1/5] do not mix static & shared builds --- src/CMakeLists.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index 1c7726e..39b0ef2 100644 +index 8d4a34f..fbdf238 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt -@@ -220,18 +220,18 @@ endif() - if (SPM_ENABLE_SHARED) - add_library(sentencepiece SHARED ${SPM_SRCS}) - add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) +@@ -228,18 +228,18 @@ if (SPM_ENABLE_SHARED) + target_link_libraries(sentencepiece log) + target_link_libraries(sentencepiece_train log) + endif() -endif() - -add_library(sentencepiece-static STATIC ${SPM_SRCS}) @@ -37,7 +37,7 @@ index 1c7726e..39b0ef2 100644 set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) -@@ -246,10 +246,10 @@ else() +@@ -254,10 +254,10 @@ else() add_library(sentencepiece ALIAS sentencepiece-static) add_library(sentencepiece_train ALIAS sentencepiece_train-static) set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static) diff --git a/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch b/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch index 673b37b5..a77643c7 100644 --- a/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch +++ b/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch @@ -1,58 +1,21 @@ -From 075ffeaf1de3319784ba18f2b516bb3daf9f8d16 Mon Sep 17 00:00:00 2001 +From ab5c20be4a987d1cd6d2e472634f5e1b9c11211f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 2 Dec 2021 10:05:12 +1100 -Subject: [PATCH 2/8] do not build vendored abseil & libprotobuf-lite +Subject: [PATCH 2/5] do not build vendored abseil & libprotobuf-lite +ensure we can use shared builds of libprotobuf also on windows --- - CMakeLists.txt | 6 +++--- - sentencepiece.pc.in | 2 +- src/CMakeLists.txt | 5 +++++ third_party/CMakeLists.txt | 5 +---- - 4 files changed, 10 insertions(+), 8 deletions(-) + 2 files changed, 6 insertions(+), 4 deletions(-) -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 1b3af04..7e40f5c 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -68,9 +68,9 @@ add_definitions(-D_FREEBSD) - endif() - - if (SPM_USE_BUILTIN_PROTOBUF) -- set(libprotobuf_lite "") -+ set(libprotobuf "") - else() -- set(libprotobuf_lite "-lprotobuf-lite") -+ set(libprotobuf "-lprotobuf") - endif() - - if (MSVC) -@@ -147,7 +147,7 @@ if (SPM_BUILD_TEST) - endif() - - if (SPM_USE_EXTERNAL_ABSL) -- add_subdirectory(third_party/abseil-cpp) -+ find_package(absl REQUIRED) - endif() - - add_subdirectory(src) -diff --git a/sentencepiece.pc.in b/sentencepiece.pc.in -index 6a5ba56..1108973 100644 ---- a/sentencepiece.pc.in -+++ b/sentencepiece.pc.in -@@ -6,5 +6,5 @@ includedir=@includedir_for_pc_file@ - Name: @PROJECT_NAME@ - Description: Unsupervised text tokenizer and detokenizer for Neural Network-based text generation. - Version: @PROJECT_VERSION@ --Libs: -L${libdir} -lsentencepiece -lsentencepiece_train @libprotobuf_lite@ @pkgconfiglibs@ -+Libs: -L${libdir} -lsentencepiece -lsentencepiece_train @libprotobuf@ @pkgconfiglibs@ - Cflags: -I${includedir} @pkgconfigcflags@ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index 39b0ef2..6dfd76d 100644 +index fbdf238..2b8aefa 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt -@@ -69,6 +69,11 @@ if (SPM_USE_BUILTIN_PROTOBUF) +@@ -71,6 +71,11 @@ if (SPM_PROTOBUF_PROVIDER STREQUAL "internal") include_directories(builtin_pb) - else() + elseif (SPM_PROTOBUF_PROVIDER STREQUAL "package") find_package(Protobuf REQUIRED) + if (MSVC) + add_definitions("/DPROTOBUF_USE_DLLS") diff --git a/recipe/patches/0003-consistently-use-absolute-paths-for-CMAKE_INSTALL_-D.patch b/recipe/patches/0003-consistently-use-absolute-paths-for-CMAKE_INSTALL_-D.patch deleted file mode 100644 index 66e3de4b..00000000 --- a/recipe/patches/0003-consistently-use-absolute-paths-for-CMAKE_INSTALL_-D.patch +++ /dev/null @@ -1,46 +0,0 @@ -From bb0bda21c70a52c388353876a64560b5f6243c7b Mon Sep 17 00:00:00 2001 -From: "H. Vetinari" -Date: Mon, 6 Dec 2021 21:18:54 +1100 -Subject: [PATCH 3/8] consistently use absolute paths for CMAKE_INSTALL_*DIR - ---- - CMakeLists.txt | 13 +++++-------- - 1 file changed, 5 insertions(+), 8 deletions(-) - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 7e40f5c..434530a 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -51,11 +51,8 @@ endif() - - if (UNIX) - include(GNUInstallDirs) -- set(prefix ${CMAKE_INSTALL_PREFIX}) -- set(exec_prefix "\${prefix}") -- set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") -- set(includedir "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") --else() -+endif() -+if (TRUE) - set(prefix ${CMAKE_INSTALL_PREFIX}) - set(exec_prefix "\${prefix}") - set(libdir "\${exec_prefix}/lib") -@@ -96,15 +93,15 @@ if (APPLE) - endif() - - if (NOT DEFINED CMAKE_INSTALL_BINDIR) -- set(CMAKE_INSTALL_BINDIR bin) -+ set(CMAKE_INSTALL_BINDIR "${CMAKE_INSTALL_PREFIX}/bin") - endif() - - if (NOT DEFINED CMAKE_INSTALL_LIBDIR) -- set(CMAKE_INSTALL_LIBDIR lib) -+ set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib") - endif() - - if (NOT DEFINED CMAKE_INSTALL_INCDIR) -- set(CMAKE_INSTALL_INCDIR include) -+ set(CMAKE_INSTALL_INCDIR "${CMAKE_INSTALL_PREFIX}/include") - endif() - - # SPDX-License-Identifier: (MIT OR CC0-1.0) diff --git a/recipe/patches/0006-point-to-our-libs-headers-for-windows-in-setup.py.patch b/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch similarity index 88% rename from recipe/patches/0006-point-to-our-libs-headers-for-windows-in-setup.py.patch rename to recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch index b6993842..7cf807a6 100644 --- a/recipe/patches/0006-point-to-our-libs-headers-for-windows-in-setup.py.patch +++ b/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch @@ -1,7 +1,7 @@ -From e884436083b6d5c2beee6bc341f0a55958715f01 Mon Sep 17 00:00:00 2001 +From 428ac3758e24b1aeedec8e0568d128e2097d1646 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 11 Dec 2022 01:09:03 +1100 -Subject: [PATCH 6/8] point to our libs / headers for windows in setup.py +Subject: [PATCH 3/5] point to our libs / headers for windows in setup.py also do not risk building against bundled libs, nor setting /MT for the MSVC static runtime libs @@ -10,7 +10,7 @@ setting /MT for the MSVC static runtime libs 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/python/setup.py b/python/setup.py -index 5411231..d8f0b5e 100755 +index d600321..fb301ac 100755 --- a/python/setup.py +++ b/python/setup.py @@ -77,10 +77,11 @@ class build_ext(_build_ext): @@ -28,7 +28,7 @@ index 5411231..d8f0b5e 100755 cflags = cflags + run_pkg_config('cflags') libs = run_pkg_config('libs') else: -@@ -106,17 +107,21 @@ if os.name == 'nt': +@@ -108,17 +109,21 @@ if os.name == 'nt': arch = 'win32' if sys.maxsize > 2**32: arch = 'amd64' @@ -46,12 +46,12 @@ index 5411231..d8f0b5e 100755 libs = [ - '..\\build\\root\\lib\\sentencepiece.lib', - '..\\build\\root\\lib\\sentencepiece_train.lib', -+ # equivalent of -L$PREFIX/lib -lsentencepiece -lsentencepiece_train -lprotobuf ++ # equivalent of -L$PREFIX/lib -lsentencepiece -lsentencepiece_train -lprotobuf-lite + os.environ["LIBRARY_LIB"] + f"\\{x}.lib" + # protobuf actually has the lib-prefix in the name also on windows; + # since libsentencepiece is static on windows, we also need _its_ + # host dependencies for the link interface, i.e. also abseil -+ for x in ["sentencepiece", "sentencepiece_train", "libprotobuf", "abseil_dll"] ++ for x in ["sentencepiece", "sentencepiece_train", "libprotobuf-lite", "abseil_dll"] ] else: # build library locally with cmake and vc++. diff --git a/recipe/patches/0004-ACTUALLY-use-external-absl.patch b/recipe/patches/0004-ACTUALLY-use-external-absl.patch deleted file mode 100644 index 3e1784fc..00000000 --- a/recipe/patches/0004-ACTUALLY-use-external-absl.patch +++ /dev/null @@ -1,1108 +0,0 @@ -From 25b3524ec1fd000bab823d4e67cb25179f71aedd Mon Sep 17 00:00:00 2001 -From: "H. Vetinari" -Date: Mon, 21 Feb 2022 10:05:39 +1100 -Subject: [PATCH 4/8] ACTUALLY use external absl - -make include order consistent: -- own header -- std facilities -- external code -- internal code ---- - src/CMakeLists.txt | 6 ------ - src/bpe_model.cc | 3 ++- - src/bpe_model_trainer.cc | 7 ++++--- - src/bpe_model_trainer.h | 3 ++- - src/bpe_model_trainer_test.cc | 5 +++-- - src/builder.cc | 9 +++++---- - src/builder.h | 3 ++- - src/builder_test.cc | 3 ++- - src/char_model_trainer_test.cc | 5 +++-- - src/common.h | 3 ++- - src/compile_charsmap_main.cc | 5 +++-- - src/error.cc | 4 ++-- - src/filesystem.cc | 3 ++- - src/filesystem.h | 3 ++- - src/filesystem_test.cc | 3 ++- - src/init.h | 5 +++-- - src/model_factory.cc | 3 ++- - src/model_interface.cc | 5 +++-- - src/model_interface.h | 5 +++-- - src/model_interface_test.cc | 3 ++- - src/normalizer.cc | 9 +++++---- - src/normalizer.h | 3 ++- - src/pretokenizer_for_training.cc | 3 ++- - src/pretokenizer_for_training.h | 3 ++- - src/pretokenizer_for_training_test.cc | 8 +++++--- - src/sentencepiece_processor.cc | 17 +++++++++-------- - src/sentencepiece_processor.h | 9 ++------- - src/sentencepiece_processor_test.cc | 9 +++++---- - src/sentencepiece_trainer.cc | 13 +++++++------ - src/sentencepiece_trainer_test.cc | 3 ++- - src/spec_parser.h | 5 +++-- - src/spm_decode_main.cc | 5 +++-- - src/spm_encode_main.cc | 9 +++++---- - src/spm_export_vocab_main.cc | 3 ++- - src/spm_normalize_main.cc | 3 ++- - src/spm_train_main.cc | 9 +++++---- - src/testharness.cc | 3 ++- - src/testharness.h | 7 ++++--- - src/trainer_factory.cc | 3 ++- - src/trainer_interface.cc | 19 ++++++++++--------- - src/trainer_interface.h | 3 ++- - src/trainer_interface_test.cc | 5 +++-- - src/unicode_script.cc | 3 ++- - src/unicode_script_map.h | 4 +++- - src/unicode_script_test.cc | 3 ++- - src/unigram_model.cc | 9 +++++---- - src/unigram_model_test.cc | 5 +++-- - src/unigram_model_trainer.cc | 9 +++++---- - src/unigram_model_trainer.h | 3 ++- - src/unigram_model_trainer_test.cc | 5 +++-- - src/util.h | 3 ++- - src/util_test.cc | 3 ++- - src/word_model_trainer.cc | 5 +++-- - src/word_model_trainer_test.cc | 5 +++-- - 54 files changed, 168 insertions(+), 126 deletions(-) - -diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index 6dfd76d..d30a4c2 100644 ---- a/src/CMakeLists.txt -+++ b/src/CMakeLists.txt -@@ -89,12 +89,6 @@ endif() - include_directories(${CMAKE_CURRENT_BINARY_DIR}) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party) - --if (MSVC) -- add_definitions("/D_USE_INTERNAL_STRING_VIEW") --else() -- add_definitions("-D_USE_INTERNAL_STRING_VIEW") --endif() -- - set(SPM_SRCS - ${PROTOBUF_LITE_SRCS} - ${SPM_PROTO_HDRS} -diff --git a/src/bpe_model.cc b/src/bpe_model.cc -index bc7ada1..6cb5ab4 100644 ---- a/src/bpe_model.cc -+++ b/src/bpe_model.cc -@@ -21,8 +21,9 @@ - #include - #include - -+#include "absl/container/flat_hash_map.h" -+ - #include "freelist.h" --#include "third_party/absl/container/flat_hash_map.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc -index de86f14..42d737a 100644 ---- a/src/bpe_model_trainer.cc -+++ b/src/bpe_model_trainer.cc -@@ -19,10 +19,11 @@ - #include - #include - -+#include "absl/container/flat_hash_set.h" -+#include "absl/strings/str_join.h" -+#include "absl/strings/str_replace.h" -+ - #include "pretokenizer_for_training.h" --#include "third_party/absl/container/flat_hash_set.h" --#include "third_party/absl/strings/str_join.h" --#include "third_party/absl/strings/str_replace.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h -index 2fdfb9c..980a42c 100644 ---- a/src/bpe_model_trainer.h -+++ b/src/bpe_model_trainer.h -@@ -21,8 +21,9 @@ - #include - #include - -+#include "absl/container/flat_hash_map.h" -+ - #include "sentencepiece_model.pb.h" --#include "third_party/absl/container/flat_hash_map.h" - #include "trainer_interface.h" - - namespace sentencepiece { -diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc -index 173eb9c..f977fd0 100644 ---- a/src/bpe_model_trainer_test.cc -+++ b/src/bpe_model_trainer_test.cc -@@ -15,13 +15,14 @@ - #include - #include - -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_join.h" -+ - #include "bpe_model_trainer.h" - #include "filesystem.h" - #include "sentencepiece_processor.h" - #include "sentencepiece_trainer.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_join.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/builder.cc b/src/builder.cc -index 822f6fc..f804e2f 100644 ---- a/src/builder.cc -+++ b/src/builder.cc -@@ -18,11 +18,12 @@ - #include - #include - -+#include "absl/strings/str_join.h" -+#include "absl/strings/str_replace.h" -+#include "absl/strings/str_split.h" -+#include "absl/strings/strip.h" -+ - #include "filesystem.h" --#include "third_party/absl/strings/str_join.h" --#include "third_party/absl/strings/str_replace.h" --#include "third_party/absl/strings/str_split.h" --#include "third_party/absl/strings/strip.h" - - #ifdef ENABLE_NFKC_COMPILE - #include -diff --git a/src/builder.h b/src/builder.h -index 094da72..8cd4d96 100644 ---- a/src/builder.h -+++ b/src/builder.h -@@ -19,10 +19,11 @@ - #include - #include - -+#include "absl/strings/string_view.h" -+ - #include "common.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/strings/string_view.h" - - namespace sentencepiece { - namespace normalizer { -diff --git a/src/builder_test.cc b/src/builder_test.cc -index 4acb7b3..f586fba 100644 ---- a/src/builder_test.cc -+++ b/src/builder_test.cc -@@ -12,13 +12,14 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - -+#include "absl/strings/str_cat.h" -+ - #include "builder.h" - #include "common.h" - #include "filesystem.h" - #include "normalizer.h" - #include "sentencepiece_trainer.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc -index 8c2e4b7..b49cea1 100644 ---- a/src/char_model_trainer_test.cc -+++ b/src/char_model_trainer_test.cc -@@ -15,12 +15,13 @@ - #include - #include - -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_join.h" -+ - #include "char_model_trainer.h" - #include "filesystem.h" - #include "sentencepiece_processor.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_join.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/common.h b/src/common.h -index ef5546d..5245004 100644 ---- a/src/common.h -+++ b/src/common.h -@@ -25,8 +25,9 @@ - #include - #include - -+#include "absl/strings/string_view.h" -+ - #include "config.h" --#include "third_party/absl/strings/string_view.h" - - #if defined(_WIN32) && !defined(__CYGWIN__) - #define OS_WIN -diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc -index da15328..23e5ef8 100644 ---- a/src/compile_charsmap_main.cc -+++ b/src/compile_charsmap_main.cc -@@ -18,12 +18,13 @@ - #include - #include - -+#include "absl/flags/flag.h" -+#include "absl/strings/string_view.h" -+ - #include "builder.h" - #include "filesystem.h" - #include "init.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/flags/flag.h" --#include "third_party/absl/strings/string_view.h" - - using sentencepiece::normalizer::Builder; - -diff --git a/src/error.cc b/src/error.cc -index d3792dc..19ef6f3 100644 ---- a/src/error.cc -+++ b/src/error.cc -@@ -21,8 +21,8 @@ - #ifdef _USE_EXTERNAL_ABSL - // Naive workaround to define minloglevel on external absl package. - // We want to define them in other cc file. --#include "third_party/absl/flags/flag.h" --#include "third_party/absl/flags/parse.h" -+#include "absl/flags/flag.h" -+#include "absl/flags/parse.h" - ABSL_FLAG(int32, minloglevel, 0, - "Messages logged at a lower level than this don't actually."); - #endif -diff --git a/src/filesystem.cc b/src/filesystem.cc -index 833c8f7..baf8ad9 100644 ---- a/src/filesystem.cc -+++ b/src/filesystem.cc -@@ -14,8 +14,9 @@ - - #include - -+#include "absl/memory/memory.h" -+ - #include "filesystem.h" --#include "third_party/absl/memory/memory.h" - #include "util.h" - - #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE) -diff --git a/src/filesystem.h b/src/filesystem.h -index e572b4b..50e9fa8 100644 ---- a/src/filesystem.h -+++ b/src/filesystem.h -@@ -21,9 +21,10 @@ - #include - #include - -+#include "absl/strings/string_view.h" -+ - #include "common.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/strings/string_view.h" - - namespace sentencepiece { - namespace filesystem { -diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc -index 790e756..3c9fbdc 100644 ---- a/src/filesystem_test.cc -+++ b/src/filesystem_test.cc -@@ -12,9 +12,10 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - -+#include "absl/strings/str_cat.h" -+ - #include "filesystem.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/init.h b/src/init.h -index 6ae047e..1f4c292 100644 ---- a/src/init.h -+++ b/src/init.h -@@ -15,9 +15,10 @@ - #ifndef INIT_H_ - #define INIT_H_ - -+#include "absl/flags/flag.h" -+#include "absl/flags/parse.h" -+ - #include "common.h" --#include "third_party/absl/flags/flag.h" --#include "third_party/absl/flags/parse.h" - - #ifdef _USE_EXTERNAL_PROTOBUF - #include "google/protobuf/message_lite.h" -diff --git a/src/model_factory.cc b/src/model_factory.cc -index be99501..a7ccf0f 100644 ---- a/src/model_factory.cc -+++ b/src/model_factory.cc -@@ -12,10 +12,11 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - -+#include "absl/memory/memory.h" -+ - #include "bpe_model.h" - #include "char_model.h" - #include "model_factory.h" --#include "third_party/absl/memory/memory.h" - #include "unigram_model.h" - #include "word_model.h" - -diff --git a/src/model_interface.cc b/src/model_interface.cc -index c49be1e..3ab6a35 100644 ---- a/src/model_interface.cc -+++ b/src/model_interface.cc -@@ -14,10 +14,11 @@ - - #include - -+#include "absl/memory/memory.h" -+#include "absl/strings/str_format.h" -+ - #include "model_interface.h" - #include "sentencepiece_model.pb.h" --#include "third_party/absl/memory/memory.h" --#include "third_party/absl/strings/str_format.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/model_interface.h b/src/model_interface.h -index 06e9243..b22b671 100644 ---- a/src/model_interface.h -+++ b/src/model_interface.h -@@ -21,12 +21,13 @@ - #include - #include - -+#include "absl/container/flat_hash_map.h" -+#include "absl/strings/string_view.h" -+ - #include "common.h" - #include "normalizer.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/strings/string_view.h" - #include "third_party/darts_clone/darts.h" - #include "util.h" - -diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc -index 09e41d3..865e427 100644 ---- a/src/model_interface_test.cc -+++ b/src/model_interface_test.cc -@@ -14,9 +14,10 @@ - - #include "model_interface.h" - -+#include "absl/container/flat_hash_map.h" -+ - #include "model_factory.h" - #include "testharness.h" --#include "third_party/absl/container/flat_hash_map.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/normalizer.cc b/src/normalizer.cc -index 2ab8084..e0af25b 100644 ---- a/src/normalizer.cc -+++ b/src/normalizer.cc -@@ -17,11 +17,12 @@ - #include - #include - -+#include "absl/memory/memory.h" -+#include "absl/strings/match.h" -+#include "absl/strings/string_view.h" -+#include "absl/strings/strip.h" -+ - #include "common.h" --#include "third_party/absl/memory/memory.h" --#include "third_party/absl/strings/match.h" --#include "third_party/absl/strings/string_view.h" --#include "third_party/absl/strings/strip.h" - #include "third_party/darts_clone/darts.h" - #include "util.h" - -diff --git a/src/normalizer.h b/src/normalizer.h -index c79813c..50b5108 100644 ---- a/src/normalizer.h -+++ b/src/normalizer.h -@@ -21,10 +21,11 @@ - #include - #include - -+#include "absl/strings/string_view.h" -+ - #include "common.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/strings/string_view.h" - #include "third_party/darts_clone/darts.h" - - namespace sentencepiece { -diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc -index d4f492c..03df377 100644 ---- a/src/pretokenizer_for_training.cc -+++ b/src/pretokenizer_for_training.cc -@@ -11,11 +11,12 @@ - // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - // See the License for the specific language governing permissions and - // limitations under the License.! -+ - #include "pretokenizer_for_training.h" - - #include - --#include "third_party/absl/strings/str_replace.h" -+#include "absl/strings/str_replace.h" - - namespace sentencepiece { - namespace pretokenizer { -diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h -index fa54f95..1ec5dfd 100644 ---- a/src/pretokenizer_for_training.h -+++ b/src/pretokenizer_for_training.h -@@ -18,10 +18,11 @@ - #include - #include - -+#include "absl/strings/string_view.h" -+ - #include "common.h" - #include "sentencepiece.pb.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/strings/string_view.h" - - namespace sentencepiece { - namespace pretokenizer { -diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc -index 99db0c5..a968b57 100644 ---- a/src/pretokenizer_for_training_test.cc -+++ b/src/pretokenizer_for_training_test.cc -@@ -11,12 +11,14 @@ - // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - // See the License for the specific language governing permissions and - // limitations under the License.! -+ - #include "pretokenizer_for_training.h" - -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_join.h" -+#include "absl/strings/str_split.h" -+ - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_join.h" --#include "third_party/absl/strings/str_split.h" - #include "trainer_interface.h" - - namespace sentencepiece { -diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc -index f0df2f6..5d80844 100644 ---- a/src/sentencepiece_processor.cc -+++ b/src/sentencepiece_processor.cc -@@ -18,20 +18,21 @@ - #include - #include - -+#include "absl/memory/memory.h" -+#include "absl/strings/numbers.h" -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_join.h" -+#include "absl/strings/str_replace.h" -+#include "absl/strings/str_split.h" -+#include "absl/strings/string_view.h" -+#include "absl/strings/strip.h" -+ - #include "common.h" - #include "filesystem.h" - #include "model_factory.h" - #include "model_interface.h" - #include "normalizer.h" - #include "sentencepiece.pb.h" --#include "third_party/absl/memory/memory.h" --#include "third_party/absl/strings/numbers.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_join.h" --#include "third_party/absl/strings/str_replace.h" --#include "third_party/absl/strings/str_split.h" --#include "third_party/absl/strings/string_view.h" --#include "third_party/absl/strings/strip.h" - #include "unigram_model.h" - #include "util.h" - -diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h -index 14b1e8c..b81cff5 100644 ---- a/src/sentencepiece_processor.h -+++ b/src/sentencepiece_processor.h -@@ -15,19 +15,14 @@ - #ifndef SENTENCEPIECE_PROCESSOR_H_ - #define SENTENCEPIECE_PROCESSOR_H_ - -+#include "absl/strings/string_view.h" -+ - #include - #include - #include --#include - #include - #include - --#ifndef SWIG --namespace absl { --using std::string_view; --} // namespace absl --#endif // SWIG -- - namespace sentencepiece { - namespace util { - -diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc -index f05dc5d..4077c65 100644 ---- a/src/sentencepiece_processor_test.cc -+++ b/src/sentencepiece_processor_test.cc -@@ -16,6 +16,11 @@ - - #include - -+#include "absl/container/flat_hash_map.h" -+#include "absl/memory/memory.h" -+#include "absl/strings/str_cat.h" -+#include "absl/strings/string_view.h" -+ - #include "builder.h" - #include "filesystem.h" - #include "model_interface.h" -@@ -24,10 +29,6 @@ - #include "sentencepiece_model.pb.h" - #include "sentencepiece_trainer.h" - #include "testharness.h" --#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/memory/memory.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/string_view.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc -index b9fe64f..ae8a7af 100644 ---- a/src/sentencepiece_trainer.cc -+++ b/src/sentencepiece_trainer.cc -@@ -15,6 +15,13 @@ - #include - #include - -+#include "absl/flags/flag.h" -+#include "absl/strings/numbers.h" -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_split.h" -+#include "absl/strings/string_view.h" -+#include "absl/strings/strip.h" -+ - #include "builder.h" - #include "common.h" - #include "normalizer.h" -@@ -22,12 +29,6 @@ - #include "sentencepiece_model.pb.h" - #include "sentencepiece_trainer.h" - #include "spec_parser.h" --#include "third_party/absl/flags/flag.h" --#include "third_party/absl/strings/numbers.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_split.h" --#include "third_party/absl/strings/string_view.h" --#include "third_party/absl/strings/strip.h" - #include "trainer_factory.h" - #include "util.h" - -diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc -index e44e66b..0bb5aab 100644 ---- a/src/sentencepiece_trainer_test.cc -+++ b/src/sentencepiece_trainer_test.cc -@@ -12,11 +12,12 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - -+#include "absl/strings/str_cat.h" -+ - #include "filesystem.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_trainer.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/spec_parser.h b/src/spec_parser.h -index c5f0582..3871621 100644 ---- a/src/spec_parser.h -+++ b/src/spec_parser.h -@@ -18,9 +18,10 @@ - #include - #include - -+#include "absl/strings/ascii.h" -+#include "absl/strings/str_split.h" -+ - #include "sentencepiece_processor.h" --#include "third_party/absl/strings/ascii.h" --#include "third_party/absl/strings/str_split.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc -index bc49bd3..32fbb76 100644 ---- a/src/spm_decode_main.cc -+++ b/src/spm_decode_main.cc -@@ -16,13 +16,14 @@ - #include - #include - -+#include "absl/flags/flag.h" -+#include "absl/strings/str_split.h" -+ - #include "common.h" - #include "filesystem.h" - #include "init.h" - #include "sentencepiece.pb.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/flags/flag.h" --#include "third_party/absl/strings/str_split.h" - #include "util.h" - - ABSL_FLAG(std::string, model, "", "model file name"); -diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc -index 2fbb850..02def40 100644 ---- a/src/spm_encode_main.cc -+++ b/src/spm_encode_main.cc -@@ -16,15 +16,16 @@ - #include - #include - -+#include "absl/container/flat_hash_map.h" -+#include "absl/flags/flag.h" -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_join.h" -+ - #include "common.h" - #include "filesystem.h" - #include "init.h" - #include "sentencepiece.pb.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/flags/flag.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_join.h" - #include "trainer_interface.h" - - ABSL_FLAG(std::string, model, "", "model file name"); -diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc -index e5b97df..d0aea7d 100644 ---- a/src/spm_export_vocab_main.cc -+++ b/src/spm_export_vocab_main.cc -@@ -14,12 +14,13 @@ - - #include - -+#include "absl/flags/flag.h" -+ - #include "common.h" - #include "filesystem.h" - #include "init.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/flags/flag.h" - - ABSL_FLAG(std::string, output, "", "Output filename"); - ABSL_FLAG(std::string, model, "", "input model file name"); -diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc -index 39f3ef9..7352f63 100644 ---- a/src/spm_normalize_main.cc -+++ b/src/spm_normalize_main.cc -@@ -12,6 +12,8 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - -+#include "absl/flags/flag.h" -+ - #include "builder.h" - #include "common.h" - #include "filesystem.h" -@@ -21,7 +23,6 @@ - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" - #include "sentencepiece_trainer.h" --#include "third_party/absl/flags/flag.h" - - ABSL_FLAG(std::string, model, "", "Model file name"); - ABSL_FLAG(bool, use_internal_normalization, false, -diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc -index 34369cd..08c519f 100644 ---- a/src/spm_train_main.cc -+++ b/src/spm_train_main.cc -@@ -14,14 +14,15 @@ - - #include - -+#include "absl/flags/flag.h" -+#include "absl/strings/ascii.h" -+#include "absl/strings/str_join.h" -+#include "absl/strings/str_split.h" -+ - #include "filesystem.h" - #include "init.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_trainer.h" --#include "third_party/absl/flags/flag.h" --#include "third_party/absl/strings/ascii.h" --#include "third_party/absl/strings/str_join.h" --#include "third_party/absl/strings/str_split.h" - #include "util.h" - - using sentencepiece::NormalizerSpec; -diff --git a/src/testharness.cc b/src/testharness.cc -index f6b1efe..6769cd8 100644 ---- a/src/testharness.cc -+++ b/src/testharness.cc -@@ -25,8 +25,9 @@ - #include - #include - -+#include "absl/strings/str_cat.h" -+ - #include "common.h" --#include "third_party/absl/strings/str_cat.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/testharness.h b/src/testharness.h -index 9879b06..8de9c5c 100644 ---- a/src/testharness.h -+++ b/src/testharness.h -@@ -20,10 +20,11 @@ - #include - #include - -+#include "absl/flags/flag.h" -+#include "absl/flags/parse.h" -+#include "absl/strings/string_view.h" -+ - #include "common.h" --#include "third_party/absl/flags/flag.h" --#include "third_party/absl/flags/parse.h" --#include "third_party/absl/strings/string_view.h" - - ABSL_DECLARE_FLAG(std::string, test_tmpdir); - ABSL_DECLARE_FLAG(std::string, test_srcdir); -diff --git a/src/trainer_factory.cc b/src/trainer_factory.cc -index d1d2541..34abe3f 100644 ---- a/src/trainer_factory.cc -+++ b/src/trainer_factory.cc -@@ -12,9 +12,10 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - -+#include "absl/memory/memory.h" -+ - #include "bpe_model_trainer.h" - #include "char_model_trainer.h" --#include "third_party/absl/memory/memory.h" - #include "trainer_factory.h" - #include "unigram_model_trainer.h" - #include "word_model_trainer.h" -diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc -index 968f7b9..27cec4f 100644 ---- a/src/trainer_interface.cc -+++ b/src/trainer_interface.cc -@@ -22,21 +22,22 @@ - #include - #include - -+#include "absl/container/flat_hash_map.h" -+#include "absl/memory/memory.h" -+#include "absl/random/distributions.h" -+#include "absl/random/random.h" -+#include "absl/strings/numbers.h" -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_format.h" -+#include "absl/strings/str_join.h" -+#include "absl/strings/str_split.h" -+ - #include "filesystem.h" - #include "model_factory.h" - #include "model_interface.h" - #include "normalizer.h" - #include "sentencepiece_processor.h" - #include "sentencepiece_trainer.h" --#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/memory/memory.h" --#include "third_party/absl/random/distributions.h" --#include "third_party/absl/random/random.h" --#include "third_party/absl/strings/numbers.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_format.h" --#include "third_party/absl/strings/str_join.h" --#include "third_party/absl/strings/str_split.h" - #include "unicode_script.h" - #include "util.h" - -diff --git a/src/trainer_interface.h b/src/trainer_interface.h -index 8d625a9..58425c5 100644 ---- a/src/trainer_interface.h -+++ b/src/trainer_interface.h -@@ -22,12 +22,13 @@ - #include - #include - -+#include "absl/container/flat_hash_map.h" -+ - #include "common.h" - #include "filesystem.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" - #include "sentencepiece_trainer.h" --#include "third_party/absl/container/flat_hash_map.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc -index feb970f..75e9f54 100644 ---- a/src/trainer_interface_test.cc -+++ b/src/trainer_interface_test.cc -@@ -16,10 +16,11 @@ - - #include - -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_format.h" -+ - #include "filesystem.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_format.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/unicode_script.cc b/src/unicode_script.cc -index 583dc30..5790566 100644 ---- a/src/unicode_script.cc -+++ b/src/unicode_script.cc -@@ -14,7 +14,8 @@ - - #include - --#include "third_party/absl/container/flat_hash_map.h" -+#include "absl/container/flat_hash_map.h" -+ - #include "unicode_script.h" - #include "unicode_script_map.h" - #include "util.h" -diff --git a/src/unicode_script_map.h b/src/unicode_script_map.h -index f2e67e9..6edfaa8 100644 ---- a/src/unicode_script_map.h -+++ b/src/unicode_script_map.h -@@ -14,7 +14,9 @@ - - #ifndef UNICODE_SCRIPT_DATA_H_ - #define UNICODE_SCRIPT_DATA_H_ --#include "third_party/absl/container/flat_hash_map.h" -+ -+#include "absl/container/flat_hash_map.h" -+ - namespace sentencepiece { - namespace unicode_script { - namespace { -diff --git a/src/unicode_script_test.cc b/src/unicode_script_test.cc -index ab33565..76f2aa0 100644 ---- a/src/unicode_script_test.cc -+++ b/src/unicode_script_test.cc -@@ -12,9 +12,10 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - -+#include "absl/strings/string_view.h" -+ - #include "common.h" - #include "testharness.h" --#include "third_party/absl/strings/string_view.h" - #include "unicode_script.h" - #include "util.h" - -diff --git a/src/unigram_model.cc b/src/unigram_model.cc -index d9f1ce9..ee085ec 100644 ---- a/src/unigram_model.cc -+++ b/src/unigram_model.cc -@@ -24,10 +24,11 @@ - #include - #include - --#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/memory/memory.h" --#include "third_party/absl/strings/str_split.h" --#include "third_party/absl/strings/string_view.h" -+#include "absl/container/flat_hash_map.h" -+#include "absl/memory/memory.h" -+#include "absl/strings/str_split.h" -+#include "absl/strings/string_view.h" -+ - #include "util.h" - - namespace sentencepiece { -diff --git a/src/unigram_model_test.cc b/src/unigram_model_test.cc -index bf22da3..567166a 100644 ---- a/src/unigram_model_test.cc -+++ b/src/unigram_model_test.cc -@@ -19,11 +19,12 @@ - #include - #include - -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_join.h" -+ - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_join.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc -index d58c408..baed626 100644 ---- a/src/unigram_model_trainer.cc -+++ b/src/unigram_model_trainer.cc -@@ -24,13 +24,14 @@ - #include - #include - -+#include "absl/container/flat_hash_map.h" -+#include "absl/memory/memory.h" -+#include "absl/strings/str_replace.h" -+#include "absl/strings/str_split.h" -+ - #include "normalizer.h" - #include "pretokenizer_for_training.h" - #include "sentencepiece_trainer.h" --#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/memory/memory.h" --#include "third_party/absl/strings/str_replace.h" --#include "third_party/absl/strings/str_split.h" - #include "third_party/esaxx/esa.hxx" // Suffix array library. - #include "unicode_script.h" - #include "util.h" -diff --git a/src/unigram_model_trainer.h b/src/unigram_model_trainer.h -index c6562e6..0a3f640 100644 ---- a/src/unigram_model_trainer.h -+++ b/src/unigram_model_trainer.h -@@ -20,8 +20,9 @@ - #include - #include - -+#include "absl/strings/string_view.h" -+ - #include "sentencepiece_model.pb.h" --#include "third_party/absl/strings/string_view.h" - #include "trainer_interface.h" - #include "unigram_model.h" - #include "util.h" -diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc -index 9d2c526..ab887b6 100644 ---- a/src/unigram_model_trainer_test.cc -+++ b/src/unigram_model_trainer_test.cc -@@ -17,13 +17,14 @@ - #include - #include - -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_join.h" -+ - #include "filesystem.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" - #include "sentencepiece_trainer.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_join.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/util.h b/src/util.h -index 5110291..ece08e9 100644 ---- a/src/util.h -+++ b/src/util.h -@@ -28,9 +28,10 @@ - #include - #include - -+#include "absl/strings/string_view.h" -+ - #include "common.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/strings/string_view.h" - - #ifdef SPM_NO_THREADLOCAL - #include -diff --git a/src/util_test.cc b/src/util_test.cc -index 71d006f..e264081 100644 ---- a/src/util_test.cc -+++ b/src/util_test.cc -@@ -14,9 +14,10 @@ - - #include - -+#include "absl/strings/str_cat.h" -+ - #include "filesystem.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" - #include "util.h" - - namespace sentencepiece { -diff --git a/src/word_model_trainer.cc b/src/word_model_trainer.cc -index 0b8b062..3f702fd 100644 ---- a/src/word_model_trainer.cc -+++ b/src/word_model_trainer.cc -@@ -15,8 +15,9 @@ - #include - #include - --#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/strings/string_view.h" -+#include "absl/container/flat_hash_map.h" -+#include "absl/strings/string_view.h" -+ - #include "util.h" - #include "word_model.h" - #include "word_model_trainer.h" -diff --git a/src/word_model_trainer_test.cc b/src/word_model_trainer_test.cc -index c4a8bc6..8288027 100644 ---- a/src/word_model_trainer_test.cc -+++ b/src/word_model_trainer_test.cc -@@ -15,11 +15,12 @@ - #include - #include - -+#include "absl/strings/str_cat.h" -+#include "absl/strings/str_join.h" -+ - #include "filesystem.h" - #include "sentencepiece_processor.h" - #include "testharness.h" --#include "third_party/absl/strings/str_cat.h" --#include "third_party/absl/strings/str_join.h" - #include "util.h" - #include "word_model_trainer.h" - diff --git a/recipe/patches/0007-also-install-pkg-config-files-on-windows.patch b/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch similarity index 76% rename from recipe/patches/0007-also-install-pkg-config-files-on-windows.patch rename to recipe/patches/0004-also-install-pkg-config-files-on-windows.patch index 06dba0eb..f8e3f6d0 100644 --- a/recipe/patches/0007-also-install-pkg-config-files-on-windows.patch +++ b/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch @@ -1,17 +1,17 @@ -From 8b8c5277ec74053eac438b6e4d006afa0138abed Mon Sep 17 00:00:00 2001 +From f6ec12fe46a666940007cb205e715ee6f3916e97 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 12 Dec 2022 14:36:45 +1100 -Subject: [PATCH 7/8] also install pkg-config files on windows +Subject: [PATCH 4/5] also install pkg-config files on windows --- CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt -index 434530a..a741455 100644 +index 9fa37fa..deb3043 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -131,9 +131,7 @@ join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") +@@ -142,9 +142,7 @@ join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") configure_file("${PROJECT_SOURCE_DIR}/config.h.in" "config.h") configure_file("${PROJECT_SOURCE_DIR}/sentencepiece.pc.in" "sentencepiece.pc" @ONLY) diff --git a/recipe/patches/0008-create-and-install-CMake-metadata.patch b/recipe/patches/0005-create-and-install-CMake-metadata.patch similarity index 88% rename from recipe/patches/0008-create-and-install-CMake-metadata.patch rename to recipe/patches/0005-create-and-install-CMake-metadata.patch index 368374dc..a708d988 100644 --- a/recipe/patches/0008-create-and-install-CMake-metadata.patch +++ b/recipe/patches/0005-create-and-install-CMake-metadata.patch @@ -1,7 +1,7 @@ -From 858ec6bd17ad9cbd6d3bbdd08faa22c70d764433 Mon Sep 17 00:00:00 2001 +From 366a1080b048a43452bdfb43968c5f2a44acdcf4 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 18 Jan 2023 19:44:15 +1100 -Subject: [PATCH 8/8] create and install CMake metadata +Subject: [PATCH 5/5] create and install CMake metadata --- CMakeLists.txt | 10 ++++++++++ @@ -11,10 +11,10 @@ Subject: [PATCH 8/8] create and install CMake metadata create mode 100644 sentencepieceConfig.cmake.in diff --git a/CMakeLists.txt b/CMakeLists.txt -index a741455..372f78f 100644 +index deb3043..56830cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -135,6 +135,16 @@ if (TRUE) +@@ -146,6 +146,16 @@ if (TRUE) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) endif() @@ -46,10 +46,10 @@ index 0000000..b4c0474 +# Targets +include(${CMAKE_CURRENT_LIST_DIR}/sentencepieceTargets.cmake) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index c130e8b..2ea219e 100644 +index 2b8aefa..eed204f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt -@@ -232,7 +232,7 @@ endif() +@@ -244,7 +244,7 @@ endif() if (SPM_ENABLE_SHARED) target_link_libraries(sentencepiece ${SPM_LIBS}) target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) @@ -58,7 +58,7 @@ index c130e8b..2ea219e 100644 set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) -@@ -246,7 +246,7 @@ if (SPM_ENABLE_SHARED) +@@ -258,7 +258,7 @@ if (SPM_ENABLE_SHARED) else() add_library(sentencepiece ALIAS sentencepiece-static) add_library(sentencepiece_train ALIAS sentencepiece_train-static) @@ -67,7 +67,7 @@ index c130e8b..2ea219e 100644 set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece") set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train") -@@ -293,7 +293,7 @@ if (SPM_ENABLE_NFKC_COMPILE) +@@ -305,7 +305,7 @@ if (SPM_ENABLE_NFKC_COMPILE) target_link_libraries(compile_charsmap sentencepiece sentencepiece_train) endif() @@ -76,7 +76,7 @@ index c130e8b..2ea219e 100644 spm_encode spm_decode spm_normalize spm_train spm_export_vocab) if (CMAKE_SYSTEM_NAME STREQUAL "iOS") -@@ -303,12 +303,21 @@ if (CMAKE_SYSTEM_NAME STREQUAL "iOS") +@@ -315,12 +315,21 @@ if (CMAKE_SYSTEM_NAME STREQUAL "iOS") LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) else() @@ -98,4 +98,4 @@ index c130e8b..2ea219e 100644 + install(FILES sentencepiece_trainer.h sentencepiece_processor.h DESTINATION ${CMAKE_INSTALL_INCDIR}) - if (NOT SPM_USE_BUILTIN_PROTOBUF) + if (NOT SPM_PROTOBUF_PROVIDER STREQUAL "internal") diff --git a/recipe/patches/0005-stop-pretending-sp-glue-code-belongs-in-third_party-.patch b/recipe/patches/0005-stop-pretending-sp-glue-code-belongs-in-third_party-.patch deleted file mode 100644 index 77b407f1..00000000 --- a/recipe/patches/0005-stop-pretending-sp-glue-code-belongs-in-third_party-.patch +++ /dev/null @@ -1,1774 +0,0 @@ -From 60017d890e3f1211cea3b00120e2498f4755d01d Mon Sep 17 00:00:00 2001 -From: "H. Vetinari" -Date: Mon, 21 Feb 2022 12:50:47 +1100 -Subject: [PATCH 5/8] stop pretending sp glue code belongs in third_party/absl - ---- - src/CMakeLists.txt | 4 +- - src/bpe_model_trainer_test.cc | 11 +- - src/builder_test.cc | 15 +- - src/char_model_trainer_test.cc | 5 +- - src/compile_charsmap_main.cc | 6 +- - src/error.cc | 9 -- - src/filesystem_test.cc | 5 +- - {third_party/absl => src/glue}/flags/flag.cc | 22 +-- - {third_party/absl => src/glue}/flags/flag.h | 18 +-- - {third_party/absl => src/glue}/flags/parse.h | 10 +- - .../absl => src/glue}/random/distributions.h | 4 +- - .../absl => src/glue}/random/random.h | 6 +- - src/init.h | 15 +- - src/init_test.cc | 61 ++++---- - src/sentencepiece_processor_test.cc | 9 +- - src/sentencepiece_trainer_test.cc | 27 ++-- - src/spm_decode_main.cc | 43 +++--- - src/spm_encode_main.cc | 74 +++++----- - src/spm_export_vocab_main.cc | 19 ++- - src/spm_normalize_main.cc | 45 +++--- - src/spm_train_main.cc | 138 +++++++++--------- - src/test_main.cc | 7 +- - src/testharness.cc | 5 +- - src/testharness.h | 8 +- - src/trainer_interface.cc | 10 +- - src/trainer_interface_test.cc | 7 +- - src/unigram_model_trainer_test.cc | 11 +- - src/util_test.cc | 5 +- - src/word_model_trainer_test.cc | 5 +- - 29 files changed, 304 insertions(+), 300 deletions(-) - rename {third_party/absl => src/glue}/flags/flag.cc (94%) - rename {third_party/absl => src/glue}/flags/flag.h (77%) - rename {third_party/absl => src/glue}/flags/parse.h (79%) - rename {third_party/absl => src/glue}/random/distributions.h (94%) - rename {third_party/absl => src/glue}/random/random.h (91%) - -diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index d30a4c2..c130e8b 100644 ---- a/src/CMakeLists.txt -+++ b/src/CMakeLists.txt -@@ -13,7 +13,9 @@ - # limitations under the License.! - - if (SPM_USE_EXTERNAL_ABSL) -- set(ABSL_FLAGS_SRCS "") -+ # originally part of third_party/absl, but actually -+ # only relevant for sentencepiece; now moved. -+ set(ABSL_FLAGS_SRCS "glue/flags/flag.cc") - set(ABSL_STRINGS_SRCS "") - list(APPEND SPM_LIBS absl::strings) - list(APPEND SPM_LIBS absl::flags) -diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc -index f977fd0..67b5b2d 100644 ---- a/src/bpe_model_trainer_test.cc -+++ b/src/bpe_model_trainer_test.cc -@@ -20,6 +20,7 @@ - - #include "bpe_model_trainer.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "sentencepiece_processor.h" - #include "sentencepiece_trainer.h" - #include "testharness.h" -@@ -36,9 +37,9 @@ std::string RunTrainer( - const std::vector &input, int size, - const std::vector &user_defined_symbols = {}) { - const std::string input_file = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "input"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "input"); - const std::string model_prefix = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "model"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "model"); - { - auto output = filesystem::NewWritableFile(input_file); - for (const auto &line : input) { -@@ -94,13 +95,13 @@ static constexpr char kTestInputData[] = "wagahaiwa_nekodearu.txt"; - - TEST(BPETrainerTest, EndToEndTest) { - const std::string input = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kTestInputData); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kTestInputData); - - ASSERT_TRUE( - SentencePieceTrainer::Train( - absl::StrCat( - "--model_prefix=", -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "tmp_model"), -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "tmp_model"), - " --input=", input, - " --vocab_size=8000 --normalization_rule_name=identity" - " --model_type=bpe --control_symbols= " -@@ -109,7 +110,7 @@ TEST(BPETrainerTest, EndToEndTest) { - - SentencePieceProcessor sp; - ASSERT_TRUE(sp.Load(std::string(util::JoinPath( -- absl::GetFlag(FLAGS_test_tmpdir), "tmp_model.model"))) -+ sentencepiece::GetFlag(FLAGS_test_tmpdir), "tmp_model.model"))) - .ok()); - EXPECT_EQ(8000, sp.GetPieceSize()); - -diff --git a/src/builder_test.cc b/src/builder_test.cc -index f586fba..c6e6344 100644 ---- a/src/builder_test.cc -+++ b/src/builder_test.cc -@@ -17,6 +17,7 @@ - #include "builder.h" - #include "common.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "normalizer.h" - #include "sentencepiece_trainer.h" - #include "testharness.h" -@@ -144,7 +145,7 @@ TEST(BuilderTest, LoadCharsMapTest) { - Builder::CharsMap chars_map; - ASSERT_TRUE( - Builder::LoadCharsMap( -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kTestInputData), -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kTestInputData), - &chars_map) - .ok()); - -@@ -159,14 +160,14 @@ TEST(BuilderTest, LoadCharsMapTest) { - - ASSERT_TRUE( - Builder::SaveCharsMap( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "output.tsv"), -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "output.tsv"), - chars_map) - .ok()); - - Builder::CharsMap saved_chars_map; - ASSERT_TRUE( - Builder::LoadCharsMap( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "output.tsv"), -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "output.tsv"), - &saved_chars_map) - .ok()); - EXPECT_EQ(chars_map, saved_chars_map); -@@ -181,7 +182,7 @@ TEST(BuilderTest, LoadCharsMapTest) { - TEST(BuilderTest, LoadCharsMapWithEmptyeTest) { - { - auto output = filesystem::NewWritableFile( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "test.tsv")); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "test.tsv")); - output->WriteLine("0061\t0041"); - output->WriteLine("0062"); - output->WriteLine("0063\t\t#foo=>bar"); -@@ -189,7 +190,7 @@ TEST(BuilderTest, LoadCharsMapWithEmptyeTest) { - - Builder::CharsMap chars_map; - EXPECT_TRUE(Builder::LoadCharsMap( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "test.tsv"), -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "test.tsv"), - &chars_map) - .ok()); - -@@ -200,14 +201,14 @@ TEST(BuilderTest, LoadCharsMapWithEmptyeTest) { - - EXPECT_TRUE( - Builder::SaveCharsMap( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "test_out.tsv"), -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "test_out.tsv"), - chars_map) - .ok()); - - Builder::CharsMap new_chars_map; - EXPECT_TRUE( - Builder::LoadCharsMap( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "test_out.tsv"), -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "test_out.tsv"), - &new_chars_map) - .ok()); - EXPECT_EQ(chars_map, new_chars_map); -diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc -index b49cea1..7192b68 100644 ---- a/src/char_model_trainer_test.cc -+++ b/src/char_model_trainer_test.cc -@@ -20,6 +20,7 @@ - - #include "char_model_trainer.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "sentencepiece_processor.h" - #include "testharness.h" - #include "util.h" -@@ -33,9 +34,9 @@ namespace { - - std::string RunTrainer(const std::vector &input, int size) { - const std::string input_file = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "input"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "input"); - const std::string model_prefix = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "model"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "model"); - { - auto output = filesystem::NewWritableFile(input_file); - for (const auto &line : input) { -diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc -index 23e5ef8..88c217a 100644 ---- a/src/compile_charsmap_main.cc -+++ b/src/compile_charsmap_main.cc -@@ -18,17 +18,17 @@ - #include - #include - --#include "absl/flags/flag.h" - #include "absl/strings/string_view.h" - - #include "builder.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "init.h" - #include "sentencepiece_processor.h" - - using sentencepiece::normalizer::Builder; - --ABSL_FLAG(bool, output_precompiled_header, false, -+STPC_FLAG(bool, output_precompiled_header, false, - "make normalization_rule.h file"); - - namespace sentencepiece { -@@ -187,7 +187,7 @@ int main(int argc, char **argv) { - data.emplace_back(p.first, index); - } - -- if (absl::GetFlag(FLAGS_output_precompiled_header)) { -+ if (sentencepiece::GetFlag(FLAGS_output_precompiled_header)) { - constexpr char kPrecompiledHeaderFileName[] = "normalization_rule.h"; - auto output = - sentencepiece::filesystem::NewWritableFile(kPrecompiledHeaderFileName); -diff --git a/src/error.cc b/src/error.cc -index 19ef6f3..e30f914 100644 ---- a/src/error.cc -+++ b/src/error.cc -@@ -18,15 +18,6 @@ - #include "init.h" - #include "sentencepiece_processor.h" - --#ifdef _USE_EXTERNAL_ABSL --// Naive workaround to define minloglevel on external absl package. --// We want to define them in other cc file. --#include "absl/flags/flag.h" --#include "absl/flags/parse.h" --ABSL_FLAG(int32, minloglevel, 0, -- "Messages logged at a lower level than this don't actually."); --#endif -- - namespace sentencepiece { - namespace error { - int gTestCounter = 0; -diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc -index 3c9fbdc..305cd87 100644 ---- a/src/filesystem_test.cc -+++ b/src/filesystem_test.cc -@@ -15,6 +15,7 @@ - #include "absl/strings/str_cat.h" - - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "testharness.h" - #include "util.h" - -@@ -29,7 +30,7 @@ TEST(UtilTest, FilesystemTest) { - - { - auto output = filesystem::NewWritableFile( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "test_file")); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "test_file")); - for (size_t i = 0; i < kData.size(); ++i) { - output->WriteLine(kData[i]); - } -@@ -37,7 +38,7 @@ TEST(UtilTest, FilesystemTest) { - - { - auto input = filesystem::NewReadableFile( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "test_file")); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "test_file")); - std::string line; - for (size_t i = 0; i < kData.size(); ++i) { - EXPECT_TRUE(input->ReadLine(&line)); -diff --git a/third_party/absl/flags/flag.cc b/src/glue/flags/flag.cc -similarity index 94% -rename from third_party/absl/flags/flag.cc -rename to src/glue/flags/flag.cc -index 5d6642a..06aeb7e 100644 ---- a/third_party/absl/flags/flag.cc -+++ b/src/glue/flags/flag.cc -@@ -12,25 +12,25 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - --#include "third_party/absl/flags/flag.h" -- - #include - #include - #include - #include - #include - -+#include "flag.h" -+ - #include "config.h" --#include "src/common.h" --#include "src/util.h" -+#include "../../common.h" -+#include "../../util.h" - --ABSL_FLAG(bool, help, false, "show help"); --ABSL_FLAG(bool, version, false, "show version"); --ABSL_FLAG(int, minloglevel, 0, -+STPC_FLAG(bool, help, false, "show help"); -+STPC_FLAG(bool, version, false, "show version"); -+STPC_FLAG(int, minloglevel, 0, - "Messages logged at a lower level than this don't actually get " - "logged anywhere"); - --namespace absl { -+namespace sentencepiece { - namespace internal { - namespace { - template -@@ -209,10 +209,10 @@ std::vector ParseCommandLine(int argc, char *argv[]) { - } - } - -- if (absl::GetFlag(FLAGS_help)) { -+ if (sentencepiece::GetFlag(FLAGS_help)) { - std::cout << internal::PrintHelp(argv[0]); - sentencepiece::error::Exit(0); -- } else if (absl::GetFlag(FLAGS_version)) { -+ } else if (sentencepiece::GetFlag(FLAGS_version)) { - std::cout << PACKAGE_STRING << " " << VERSION << std::endl; - sentencepiece::error::Exit(0); - } -@@ -229,4 +229,4 @@ void CleanupFlags() { - } - } - --} // namespace absl -+} // namespace sentencepiece -diff --git a/third_party/absl/flags/flag.h b/src/glue/flags/flag.h -similarity index 77% -rename from third_party/absl/flags/flag.h -rename to src/glue/flags/flag.h -index c522358..ba439fe 100644 ---- a/third_party/absl/flags/flag.h -+++ b/src/glue/flags/flag.h -@@ -12,15 +12,15 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - --#ifndef ABSL_FLAGS_FLAG_H_ --#define ABSL_FLAGS_FLAG_H_ -+#ifndef SENTENCEPIECE_FLAG_H_ -+#define SENTENCEPIECE_FLAG_H_ - - #include - #include - #include - #include - --namespace absl { -+namespace sentencepiece { - namespace internal { - struct FlagFunc; - -@@ -54,15 +54,15 @@ void SetFlag(Flag *flag, const V &v) { - flag->set_value(value); - } - --#define HAS_ABSL_CLEANUP_FLAGS -+#define HAS_STPC_CLEANUP_FLAGS - - void CleanupFlags(); - --} // namespace absl -+} // namespace sentencepiece - --#define ABSL_FLAG(Type, name, defautl_value, help) \ -- absl::Flag FLAGS_##name(#name, #Type, help, defautl_value); -+#define STPC_FLAG(Type, name, default_value, help) \ -+ sentencepiece::Flag FLAGS_##name(#name, #Type, help, default_value); - --#define ABSL_DECLARE_FLAG(Type, name) extern absl::Flag FLAGS_##name; -+#define STPC_DECLARE_FLAG(Type, name) extern sentencepiece::Flag FLAGS_##name; - --#endif // ABSL_FLAGS_FLAG_H_ -+#endif // SENTENCEPIECE_FLAG_H_ -diff --git a/third_party/absl/flags/parse.h b/src/glue/flags/parse.h -similarity index 79% -rename from third_party/absl/flags/parse.h -rename to src/glue/flags/parse.h -index 6a06e63..705eadf 100644 ---- a/third_party/absl/flags/parse.h -+++ b/src/glue/flags/parse.h -@@ -12,14 +12,14 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - --#ifndef ABSL_FLAGS_PARSE_H_ --#define ABSL_FLAGS_PARSE_H_ -+#ifndef SENTENCEPIECE_FLAGS_PARSE_H_ -+#define SENTENCEPIECE_FLAGS_PARSE_H_ - - #include - --namespace absl { -+namespace sentencepiece { - - std::vector ParseCommandLine(int argc, char *argv[]); --} // namespace absl -+} // namespace sentencepiece - --#endif // ABSL_FLAGS_PARSE_H_ -+#endif // SENTENCEPIECE_FLAGS_PARSE_H_ -diff --git a/third_party/absl/random/distributions.h b/src/glue/random/distributions.h -similarity index 94% -rename from third_party/absl/random/distributions.h -rename to src/glue/random/distributions.h -index 246ecb2..09bb838 100644 ---- a/third_party/absl/random/distributions.h -+++ b/src/glue/random/distributions.h -@@ -19,13 +19,13 @@ - - #include "random.h" - --namespace absl { -+namespace sentencepiece { - - template - T Gaussian(SharedBitGen &generator, T mean, T stddev) { - std::normal_distribution<> dist(mean, stddev); - return dist(*generator.engine()); - } --} // namespace absl -+} // namespace sentencepiece - - #endif // ABSL_CONTAINER_DISTRIBUTIONS_H_ -diff --git a/third_party/absl/random/random.h b/src/glue/random/random.h -similarity index 91% -rename from third_party/absl/random/random.h -rename to src/glue/random/random.h -index 3c3a21e..ac20d33 100644 ---- a/third_party/absl/random/random.h -+++ b/src/glue/random/random.h -@@ -17,17 +17,17 @@ - - #include - --#include "../../../src/util.h" -+#include "../../util.h" - - using sentencepiece::random::GetRandomGenerator; - --namespace absl { -+namespace sentencepiece { - - class SharedBitGen { - public: - std::mt19937 *engine() { return GetRandomGenerator(); } - }; - --} // namespace absl -+} // namespace sentencepiece - - #endif // ABSL_CONTAINER_RANDOM_H_ -diff --git a/src/init.h b/src/init.h -index 1f4c292..ec4e4be 100644 ---- a/src/init.h -+++ b/src/init.h -@@ -15,10 +15,9 @@ - #ifndef INIT_H_ - #define INIT_H_ - --#include "absl/flags/flag.h" --#include "absl/flags/parse.h" -- - #include "common.h" -+#include "glue/flags/flag.h" -+#include "glue/flags/parse.h" - - #ifdef _USE_EXTERNAL_PROTOBUF - #include "google/protobuf/message_lite.h" -@@ -26,12 +25,12 @@ - #include "third_party/protobuf-lite/google/protobuf/message_lite.h" - #endif - --ABSL_DECLARE_FLAG(int32, minloglevel); -+STPC_DECLARE_FLAG(int32, minloglevel); - - namespace sentencepiece { - inline void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, - bool remove_arg = true) { -- const auto unused_args = absl::ParseCommandLine(*argc, *argv); -+ const auto unused_args = sentencepiece::ParseCommandLine(*argc, *argv); - - if (remove_arg) { - char **argv_val = *argv; -@@ -40,13 +39,13 @@ inline void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, - *argc = static_cast(unused_args.size()); - } - -- logging::SetMinLogLevel(absl::GetFlag(FLAGS_minloglevel)); -+ logging::SetMinLogLevel(sentencepiece::GetFlag(FLAGS_minloglevel)); - } - - inline void ShutdownLibrary() { - google::protobuf::ShutdownProtobufLibrary(); --#ifdef HAS_ABSL_CLEANUP_FLAGS -- absl::CleanupFlags(); -+#ifdef HAS_STPC_CLEANUP_FLAGS -+ sentencepiece::CleanupFlags(); - #endif - } - -diff --git a/src/init_test.cc b/src/init_test.cc -index e5cd2e4..82381c2 100644 ---- a/src/init_test.cc -+++ b/src/init_test.cc -@@ -15,28 +15,29 @@ - #include "init.h" - - #include "common.h" -+#include "glue/flags/flag.h" - #include "testharness.h" - --ABSL_FLAG(int32, int32_f, 10, "int32_flags"); --ABSL_FLAG(bool, bool_f, false, "bool_flags"); --ABSL_FLAG(int64, int64_f, 9223372036854775807LL, "int64_flags"); --ABSL_FLAG(uint64, uint64_f, 18446744073709551615ULL, "uint64_flags"); --ABSL_FLAG(double, double_f, 40.0, "double_flags"); --ABSL_FLAG(std::string, string_f, "str", "string_flags"); -+STPC_FLAG(int32, int32_f, 10, "int32_flags"); -+STPC_FLAG(bool, bool_f, false, "bool_flags"); -+STPC_FLAG(int64, int64_f, 9223372036854775807LL, "int64_flags"); -+STPC_FLAG(uint64, uint64_f, 18446744073709551615ULL, "uint64_flags"); -+STPC_FLAG(double, double_f, 40.0, "double_flags"); -+STPC_FLAG(std::string, string_f, "str", "string_flags"); - --ABSL_DECLARE_FLAG(bool, help); --ABSL_DECLARE_FLAG(bool, version); -+STPC_DECLARE_FLAG(bool, help); -+STPC_DECLARE_FLAG(bool, version); - - using sentencepiece::ParseCommandLineFlags; - - namespace absl { - TEST(FlagsTest, DefaultValueTest) { -- EXPECT_EQ(10, absl::GetFlag(FLAGS_int32_f)); -- EXPECT_EQ(false, absl::GetFlag(FLAGS_bool_f)); -- EXPECT_EQ(9223372036854775807LL, absl::GetFlag(FLAGS_int64_f)); -- EXPECT_EQ(18446744073709551615ULL, absl::GetFlag(FLAGS_uint64_f)); -- EXPECT_EQ(40.0, absl::GetFlag(FLAGS_double_f)); -- EXPECT_EQ("str", absl::GetFlag(FLAGS_string_f)); -+ EXPECT_EQ(10, sentencepiece::GetFlag(FLAGS_int32_f)); -+ EXPECT_EQ(false, sentencepiece::GetFlag(FLAGS_bool_f)); -+ EXPECT_EQ(9223372036854775807LL, sentencepiece::GetFlag(FLAGS_int64_f)); -+ EXPECT_EQ(18446744073709551615ULL, sentencepiece::GetFlag(FLAGS_uint64_f)); -+ EXPECT_EQ(40.0, sentencepiece::GetFlag(FLAGS_double_f)); -+ EXPECT_EQ("str", sentencepiece::GetFlag(FLAGS_string_f)); - } - - TEST(FlagsTest, ParseCommandLineFlagsTest) { -@@ -48,12 +49,12 @@ TEST(FlagsTest, ParseCommandLineFlagsTest) { - char **argv = const_cast(kFlags); - ParseCommandLineFlags(kFlags[0], &argc, &argv); - -- EXPECT_EQ(100, absl::GetFlag(FLAGS_int32_f)); -- EXPECT_EQ(true, absl::GetFlag(FLAGS_bool_f)); -- EXPECT_EQ(200, absl::GetFlag(FLAGS_int64_f)); -- EXPECT_EQ(300, absl::GetFlag(FLAGS_uint64_f)); -- EXPECT_EQ(400.0, absl::GetFlag(FLAGS_double_f)); -- EXPECT_EQ("foo", absl::GetFlag(FLAGS_string_f)); -+ EXPECT_EQ(100, sentencepiece::GetFlag(FLAGS_int32_f)); -+ EXPECT_EQ(true, sentencepiece::GetFlag(FLAGS_bool_f)); -+ EXPECT_EQ(200, sentencepiece::GetFlag(FLAGS_int64_f)); -+ EXPECT_EQ(300, sentencepiece::GetFlag(FLAGS_uint64_f)); -+ EXPECT_EQ(400.0, sentencepiece::GetFlag(FLAGS_double_f)); -+ EXPECT_EQ("foo", sentencepiece::GetFlag(FLAGS_string_f)); - EXPECT_EQ(4, argc); - EXPECT_EQ("program", std::string(argv[0])); - EXPECT_EQ("other1", std::string(argv[1])); -@@ -69,10 +70,10 @@ TEST(FlagsTest, ParseCommandLineFlagsTest2) { - char **argv = const_cast(kFlags); - ParseCommandLineFlags(kFlags[0], &argc, &argv); - -- EXPECT_EQ(500, absl::GetFlag(FLAGS_int32_f)); -- EXPECT_EQ(600, absl::GetFlag(FLAGS_int64_f)); -- EXPECT_EQ(700, absl::GetFlag(FLAGS_uint64_f)); -- EXPECT_FALSE(absl::GetFlag(FLAGS_bool_f)); -+ EXPECT_EQ(500, sentencepiece::GetFlag(FLAGS_int32_f)); -+ EXPECT_EQ(600, sentencepiece::GetFlag(FLAGS_int64_f)); -+ EXPECT_EQ(700, sentencepiece::GetFlag(FLAGS_uint64_f)); -+ EXPECT_FALSE(sentencepiece::GetFlag(FLAGS_bool_f)); - EXPECT_EQ(1, argc); - } - -@@ -82,8 +83,8 @@ TEST(FlagsTest, ParseCommandLineFlagsTest3) { - int argc = arraysize(kFlags); - char **argv = const_cast(kFlags); - ParseCommandLineFlags(kFlags[0], &argc, &argv); -- EXPECT_TRUE(absl::GetFlag(FLAGS_bool_f)); -- EXPECT_EQ(800, absl::GetFlag(FLAGS_int32_f)); -+ EXPECT_TRUE(sentencepiece::GetFlag(FLAGS_bool_f)); -+ EXPECT_EQ(800, sentencepiece::GetFlag(FLAGS_int32_f)); - EXPECT_EQ(1, argc); - } - -@@ -94,7 +95,7 @@ TEST(FlagsTest, ParseCommandLineFlagsHelpTest) { - int argc = arraysize(kFlags); - char **argv = const_cast(kFlags); - EXPECT_DEATH(ParseCommandLineFlags(kFlags[0], &argc, &argv), ""); -- absl::SetFlag(&FLAGS_help, false); -+ sentencepiece::SetFlag(&FLAGS_help, false); - } - - TEST(FlagsTest, ParseCommandLineFlagsVersionTest) { -@@ -102,7 +103,7 @@ TEST(FlagsTest, ParseCommandLineFlagsVersionTest) { - int argc = arraysize(kFlags); - char **argv = const_cast(kFlags); - EXPECT_DEATH(ParseCommandLineFlags(kFlags[0], &argc, &argv), ""); -- absl::SetFlag(&FLAGS_version, false); -+ sentencepiece::SetFlag(&FLAGS_version, false); - } - - TEST(FlagsTest, ParseCommandLineFlagsUnknownTest) { -@@ -125,7 +126,7 @@ TEST(FlagsTest, ParseCommandLineFlagsEmptyStringArgs) { - char **argv = const_cast(kFlags); - ParseCommandLineFlags(kFlags[0], &argc, &argv); - EXPECT_EQ(1, argc); -- EXPECT_EQ("", absl::GetFlag(FLAGS_string_f)); -+ EXPECT_EQ("", sentencepiece::GetFlag(FLAGS_string_f)); - } - - TEST(FlagsTest, ParseCommandLineFlagsEmptyBoolArgs) { -@@ -134,7 +135,7 @@ TEST(FlagsTest, ParseCommandLineFlagsEmptyBoolArgs) { - char **argv = const_cast(kFlags); - ParseCommandLineFlags(kFlags[0], &argc, &argv); - EXPECT_EQ(1, argc); -- EXPECT_TRUE(absl::GetFlag(FLAGS_bool_f)); -+ EXPECT_TRUE(sentencepiece::GetFlag(FLAGS_bool_f)); - } - - TEST(FlagsTest, ParseCommandLineFlagsEmptyIntArgs) { -diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc -index 4077c65..208b92f 100644 ---- a/src/sentencepiece_processor_test.cc -+++ b/src/sentencepiece_processor_test.cc -@@ -23,6 +23,7 @@ - - #include "builder.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "model_interface.h" - #include "normalizer.h" - #include "sentencepiece.pb.h" -@@ -996,13 +997,13 @@ TEST(SentencePieceProcessorTest, EndToEndTest) { - - { - auto output = filesystem::NewWritableFile( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "model"), true); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "model"), true); - output->Write(model_proto.SerializeAsString()); - } - - SentencePieceProcessor sp; - EXPECT_TRUE( -- sp.Load(util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "model")).ok()); -+ sp.Load(util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "model")).ok()); - - EXPECT_EQ(model_proto.SerializeAsString(), - sp.model_proto().SerializeAsString()); -@@ -1469,10 +1470,10 @@ TEST(SentencePieceProcessorTest, VocabularyTest) { - auto GetInlineFilename = [](const std::string content) { - { - auto out = filesystem::NewWritableFile( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "vocab.txt")); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "vocab.txt")); - out->Write(content); - } -- return util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "vocab.txt"); -+ return util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "vocab.txt"); - }; - - sp1->set_type(ModelProto::SentencePiece::UNKNOWN); -diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc -index 0bb5aab..1a241b0 100644 ---- a/src/sentencepiece_trainer_test.cc -+++ b/src/sentencepiece_trainer_test.cc -@@ -15,6 +15,7 @@ - #include "absl/strings/str_cat.h" - - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_trainer.h" - #include "testharness.h" -@@ -51,9 +52,9 @@ void CheckNormalizer(absl::string_view filename, bool expected_has_normalizer, - - TEST(SentencePieceTrainerTest, TrainFromArgsTest) { - const std::string input = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kTestData); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kTestData); - const std::string model = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "m"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "m"); - - ASSERT_TRUE(SentencePieceTrainer::Train( - absl::StrCat("--input=", input, " --model_prefix=", model, -@@ -118,9 +119,9 @@ TEST(SentencePieceTrainerTest, TrainFromIterator) { - }; - - const std::string input = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kTestData); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kTestData); - const std::string model = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "m"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "m"); - - std::vector sentences; - { -@@ -141,11 +142,11 @@ TEST(SentencePieceTrainerTest, TrainFromIterator) { - - TEST(SentencePieceTrainerTest, TrainWithCustomNormalizationRule) { - std::string input = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kTestData); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kTestData); - std::string rule = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kNfkcTestData); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kNfkcTestData); - const std::string model = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "m"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "m"); - - EXPECT_TRUE(SentencePieceTrainer::Train( - absl::StrCat("--input=", input, " --model_prefix=", model, -@@ -157,13 +158,13 @@ TEST(SentencePieceTrainerTest, TrainWithCustomNormalizationRule) { - - TEST(SentencePieceTrainerTest, TrainWithCustomDenormalizationRule) { - const std::string input_file = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kTestDataJa); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kTestDataJa); - const std::string model = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "m"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "m"); - const std::string norm_rule_tsv = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kIdsNormTsv); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kIdsNormTsv); - const std::string denorm_rule_tsv = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kIdsDenormTsv); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kIdsDenormTsv); - EXPECT_TRUE( - SentencePieceTrainer::Train( - absl::StrCat("--input=", input_file, " --model_prefix=", model, -@@ -186,9 +187,9 @@ TEST(SentencePieceTrainerTest, TrainErrorTest) { - TEST(SentencePieceTrainerTest, TrainTest) { - TrainerSpec trainer_spec; - trainer_spec.add_input( -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kTestData)); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kTestData)); - trainer_spec.set_model_prefix( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "m")); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "m")); - trainer_spec.set_vocab_size(1000); - NormalizerSpec normalizer_spec; - ASSERT_TRUE(SentencePieceTrainer::Train(trainer_spec, normalizer_spec).ok()); -diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc -index 32fbb76..a045823 100644 ---- a/src/spm_decode_main.cc -+++ b/src/spm_decode_main.cc -@@ -21,17 +21,18 @@ - - #include "common.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "init.h" - #include "sentencepiece.pb.h" - #include "sentencepiece_processor.h" - #include "util.h" - --ABSL_FLAG(std::string, model, "", "model file name"); --ABSL_FLAG(std::string, input, "", "input filename"); --ABSL_FLAG(std::string, output, "", "output filename"); --ABSL_FLAG(std::string, input_format, "piece", "choose from piece or id"); --ABSL_FLAG(std::string, output_format, "string", "choose from string or proto"); --ABSL_FLAG(std::string, extra_options, "", -+STPC_FLAG(std::string, model, "", "model file name"); -+STPC_FLAG(std::string, input, "", "input filename"); -+STPC_FLAG(std::string, output, "", "output filename"); -+STPC_FLAG(std::string, input_format, "piece", "choose from piece or id"); -+STPC_FLAG(std::string, output_format, "string", "choose from string or proto"); -+STPC_FLAG(std::string, extra_options, "", - "':' separated encoder extra options, e.g., \"reverse:bos:eos\""); - - int main(int argc, char *argv[]) { -@@ -39,25 +40,25 @@ int main(int argc, char *argv[]) { - sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); - std::vector rest_args; - -- if (absl::GetFlag(FLAGS_input).empty()) { -+ if (sentencepiece::GetFlag(FLAGS_input).empty()) { - for (int i = 1; i < argc; ++i) { - rest_args.push_back(std::string(argv[i])); - } - } else { -- rest_args.push_back(absl::GetFlag(FLAGS_input)); -+ rest_args.push_back(sentencepiece::GetFlag(FLAGS_input)); - } - - if (rest_args.empty()) - rest_args.push_back(""); // empty means that reading from stdin. - -- CHECK(!absl::GetFlag(FLAGS_model).empty()); -+ CHECK(!sentencepiece::GetFlag(FLAGS_model).empty()); - - sentencepiece::SentencePieceProcessor sp; -- CHECK_OK(sp.Load(absl::GetFlag(FLAGS_model))); -- CHECK_OK(sp.SetDecodeExtraOptions(absl::GetFlag(FLAGS_extra_options))); -+ CHECK_OK(sp.Load(sentencepiece::GetFlag(FLAGS_model))); -+ CHECK_OK(sp.SetDecodeExtraOptions(sentencepiece::GetFlag(FLAGS_extra_options))); - - auto output = -- sentencepiece::filesystem::NewWritableFile(absl::GetFlag(FLAGS_output)); -+ sentencepiece::filesystem::NewWritableFile(sentencepiece::GetFlag(FLAGS_output)); - CHECK_OK(output->status()); - - std::string detok, line; -@@ -73,36 +74,36 @@ int main(int argc, char *argv[]) { - return ids; - }; - -- if (absl::GetFlag(FLAGS_input_format) == "piece") { -- if (absl::GetFlag(FLAGS_output_format) == "string") { -+ if (sentencepiece::GetFlag(FLAGS_input_format) == "piece") { -+ if (sentencepiece::GetFlag(FLAGS_output_format) == "string") { - process = [&](const std::vector &pieces) { - CHECK_OK(sp.Decode(pieces, &detok)); - output->WriteLine(detok); - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "proto") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "proto") { - process = [&](const std::vector &pieces) { - CHECK_OK(sp.Decode(pieces, &spt)); - }; - } else { - LOG(FATAL) << "Unknown output format: " -- << absl::GetFlag(FLAGS_output_format); -+ << sentencepiece::GetFlag(FLAGS_output_format); - } -- } else if (absl::GetFlag(FLAGS_input_format) == "id") { -- if (absl::GetFlag(FLAGS_output_format) == "string") { -+ } else if (sentencepiece::GetFlag(FLAGS_input_format) == "id") { -+ if (sentencepiece::GetFlag(FLAGS_output_format) == "string") { - process = [&](const std::vector &pieces) { - CHECK_OK(sp.Decode(ToIds(pieces), &detok)); - output->WriteLine(detok); - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "proto") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "proto") { - process = [&](const std::vector &pieces) { - CHECK_OK(sp.Decode(ToIds(pieces), &spt)); - }; - } else { - LOG(FATAL) << "Unknown output format: " -- << absl::GetFlag(FLAGS_output_format); -+ << sentencepiece::GetFlag(FLAGS_output_format); - } - } else { -- LOG(FATAL) << "Unknown input format: " << absl::GetFlag(FLAGS_input_format); -+ LOG(FATAL) << "Unknown input format: " << sentencepiece::GetFlag(FLAGS_input_format); - } - - for (const auto &filename : rest_args) { -diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc -index 02def40..b22cc7a 100644 ---- a/src/spm_encode_main.cc -+++ b/src/spm_encode_main.cc -@@ -17,38 +17,38 @@ - #include - - #include "absl/container/flat_hash_map.h" --#include "absl/flags/flag.h" - #include "absl/strings/str_cat.h" - #include "absl/strings/str_join.h" - - #include "common.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "init.h" - #include "sentencepiece.pb.h" - #include "sentencepiece_processor.h" - #include "trainer_interface.h" - --ABSL_FLAG(std::string, model, "", "model file name"); --ABSL_FLAG( -+STPC_FLAG(std::string, model, "", "model file name"); -+STPC_FLAG( - std::string, output_format, "piece", - "choose from piece, id, proto, nbest_piece, nbest_id, or nbest_proto"); --ABSL_FLAG(std::string, input, "", "input filename"); --ABSL_FLAG(std::string, output, "", "output filename"); --ABSL_FLAG(std::string, extra_options, "", -+STPC_FLAG(std::string, input, "", "input filename"); -+STPC_FLAG(std::string, output, "", "output filename"); -+STPC_FLAG(std::string, extra_options, "", - "':' separated encoder extra options, e.g., \"reverse:bos:eos\""); --ABSL_FLAG(int32, nbest_size, 10, "NBest size"); --ABSL_FLAG(double, alpha, 0.5, "Smoothing parameter for sampling mode."); --ABSL_FLAG(uint32, random_seed, static_cast(-1), -+STPC_FLAG(int32, nbest_size, 10, "NBest size"); -+STPC_FLAG(double, alpha, 0.5, "Smoothing parameter for sampling mode."); -+STPC_FLAG(uint32, random_seed, static_cast(-1), - "Seed value for random generator."); - - // Piece restriction with vocabulary file. - // https://github.com/rsennrich/subword-nmt#best-practice-advice-for-byte-pair-encoding-in-nmt --ABSL_FLAG(std::string, vocabulary, "", -+STPC_FLAG(std::string, vocabulary, "", - "Restrict the vocabulary. The encoder only emits the " - "tokens in \"vocabulary\" file"); --ABSL_FLAG(int32, vocabulary_threshold, 0, -+STPC_FLAG(int32, vocabulary_threshold, 0, - "Words with frequency < threshold will be treated as OOV"); --ABSL_FLAG(bool, generate_vocabulary, false, -+STPC_FLAG(bool, generate_vocabulary, false, - "Generates vocabulary file instead of segmentation"); - - int main(int argc, char *argv[]) { -@@ -56,34 +56,34 @@ int main(int argc, char *argv[]) { - sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); - std::vector rest_args; - -- if (absl::GetFlag(FLAGS_input).empty()) { -+ if (sentencepiece::GetFlag(FLAGS_input).empty()) { - for (int i = 1; i < argc; ++i) { - rest_args.push_back(std::string(argv[i])); - } - } else { -- rest_args.push_back(absl::GetFlag(FLAGS_input)); -+ rest_args.push_back(sentencepiece::GetFlag(FLAGS_input)); - } - -- if (absl::GetFlag(FLAGS_random_seed) != -1) { -- sentencepiece::SetRandomGeneratorSeed(absl::GetFlag(FLAGS_random_seed)); -+ if (sentencepiece::GetFlag(FLAGS_random_seed) != -1) { -+ sentencepiece::SetRandomGeneratorSeed(sentencepiece::GetFlag(FLAGS_random_seed)); - } - - if (rest_args.empty()) - rest_args.push_back(""); // empty means that reading from stdin. - -- CHECK(!absl::GetFlag(FLAGS_model).empty()); -+ CHECK(!sentencepiece::GetFlag(FLAGS_model).empty()); - - sentencepiece::SentencePieceProcessor sp; -- CHECK_OK(sp.Load(absl::GetFlag(FLAGS_model))); -- CHECK_OK(sp.SetEncodeExtraOptions(absl::GetFlag(FLAGS_extra_options))); -+ CHECK_OK(sp.Load(sentencepiece::GetFlag(FLAGS_model))); -+ CHECK_OK(sp.SetEncodeExtraOptions(sentencepiece::GetFlag(FLAGS_extra_options))); - -- if (!absl::GetFlag(FLAGS_vocabulary).empty()) { -- CHECK_OK(sp.LoadVocabulary(absl::GetFlag(FLAGS_vocabulary), -- absl::GetFlag(FLAGS_vocabulary_threshold))); -+ if (!sentencepiece::GetFlag(FLAGS_vocabulary).empty()) { -+ CHECK_OK(sp.LoadVocabulary(sentencepiece::GetFlag(FLAGS_vocabulary), -+ sentencepiece::GetFlag(FLAGS_vocabulary_threshold))); - } - - auto output = -- sentencepiece::filesystem::NewWritableFile(absl::GetFlag(FLAGS_output)); -+ sentencepiece::filesystem::NewWritableFile(sentencepiece::GetFlag(FLAGS_output)); - CHECK_OK(output->status()); - - std::string line; -@@ -96,10 +96,10 @@ int main(int argc, char *argv[]) { - sentencepiece::NBestSentencePieceText nbest_spt; - std::function process; - -- const int nbest_size = absl::GetFlag(FLAGS_nbest_size); -- const float alpha = absl::GetFlag(FLAGS_alpha); -+ const int nbest_size = sentencepiece::GetFlag(FLAGS_nbest_size); -+ const float alpha = sentencepiece::GetFlag(FLAGS_alpha); - -- if (absl::GetFlag(FLAGS_generate_vocabulary)) { -+ if (sentencepiece::GetFlag(FLAGS_generate_vocabulary)) { - process = [&](absl::string_view line) { - CHECK_OK(sp.Encode(line, &spt)); - for (const auto &piece : spt.pieces()) { -@@ -107,53 +107,53 @@ int main(int argc, char *argv[]) { - vocab[piece.piece()]++; - } - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "piece") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "piece") { - process = [&](absl::string_view line) { - CHECK_OK(sp.Encode(line, &sps)); - output->WriteLine(absl::StrJoin(sps, " ")); - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "id") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "id") { - process = [&](absl::string_view line) { - CHECK_OK(sp.Encode(line, &ids)); - output->WriteLine(absl::StrJoin(ids, " ")); - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "proto") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "proto") { - process = [&](absl::string_view line) { CHECK_OK(sp.Encode(line, &spt)); }; -- } else if (absl::GetFlag(FLAGS_output_format) == "sample_piece") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "sample_piece") { - process = [&](absl::string_view line) { - CHECK_OK(sp.SampleEncode(line, nbest_size, alpha, &sps)); - output->WriteLine(absl::StrJoin(sps, " ")); - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "sample_id") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "sample_id") { - process = [&](absl::string_view line) { - CHECK_OK(sp.SampleEncode(line, nbest_size, alpha, &ids)); - output->WriteLine(absl::StrJoin(ids, " ")); - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "sample_proto") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "sample_proto") { - process = [&](absl::string_view line) { - CHECK_OK(sp.SampleEncode(line, nbest_size, alpha, &spt)); - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "nbest_piece") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "nbest_piece") { - process = [&](absl::string_view line) { - CHECK_OK(sp.NBestEncode(line, nbest_size, &nbest_sps)); - for (const auto &result : nbest_sps) { - output->WriteLine(absl::StrJoin(result, " ")); - } - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "nbest_id") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "nbest_id") { - process = [&](absl::string_view line) { - CHECK_OK(sp.NBestEncode(line, nbest_size, &nbest_ids)); - for (const auto &result : nbest_ids) { - output->WriteLine(absl::StrJoin(result, " ")); - } - }; -- } else if (absl::GetFlag(FLAGS_output_format) == "nbest_proto") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "nbest_proto") { - process = [&](absl::string_view line) { - CHECK_OK(sp.NBestEncode(line, nbest_size, &nbest_spt)); - }; - } else { - LOG(FATAL) << "Unknown output format: " -- << absl::GetFlag(FLAGS_output_format); -+ << sentencepiece::GetFlag(FLAGS_output_format); - } - - for (const auto &filename : rest_args) { -@@ -164,7 +164,7 @@ int main(int argc, char *argv[]) { - } - } - -- if (absl::GetFlag(FLAGS_generate_vocabulary)) { -+ if (sentencepiece::GetFlag(FLAGS_generate_vocabulary)) { - for (const auto &it : sentencepiece::Sorted(vocab)) { - output->WriteLine(it.first + "\t" + - sentencepiece::string_util::SimpleItoa(it.second)); -diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc -index d0aea7d..c161662 100644 ---- a/src/spm_export_vocab_main.cc -+++ b/src/spm_export_vocab_main.cc -@@ -14,17 +14,16 @@ - - #include - --#include "absl/flags/flag.h" -- - #include "common.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "init.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" - --ABSL_FLAG(std::string, output, "", "Output filename"); --ABSL_FLAG(std::string, model, "", "input model file name"); --ABSL_FLAG(std::string, output_format, "vocab", -+STPC_FLAG(std::string, output, "", "Output filename"); -+STPC_FLAG(std::string, model, "", "input model file name"); -+STPC_FLAG(std::string, output_format, "vocab", - "output format. choose from vocab or syms. vocab outputs pieces " - "and scores, syms outputs pieces and indices."); - -@@ -33,19 +32,19 @@ int main(int argc, char *argv[]) { - sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); - - sentencepiece::SentencePieceProcessor sp; -- CHECK_OK(sp.Load(absl::GetFlag(FLAGS_model))); -+ CHECK_OK(sp.Load(sentencepiece::GetFlag(FLAGS_model))); - - auto output = -- sentencepiece::filesystem::NewWritableFile(absl::GetFlag(FLAGS_output)); -+ sentencepiece::filesystem::NewWritableFile(sentencepiece::GetFlag(FLAGS_output)); - CHECK_OK(output->status()); - -- if (absl::GetFlag(FLAGS_output_format) == "vocab") { -+ if (sentencepiece::GetFlag(FLAGS_output_format) == "vocab") { - for (const auto &piece : sp.model_proto().pieces()) { - std::ostringstream os; - os << piece.piece() << "\t" << piece.score(); - output->WriteLine(os.str()); - } -- } else if (absl::GetFlag(FLAGS_output_format) == "syms") { -+ } else if (sentencepiece::GetFlag(FLAGS_output_format) == "syms") { - for (int i = 0; i < sp.model_proto().pieces_size(); i++) { - std::ostringstream os; - os << sp.model_proto().pieces(i).piece() << "\t" << i; -@@ -53,7 +52,7 @@ int main(int argc, char *argv[]) { - } - } else { - LOG(FATAL) << "Unsupported output format: " -- << absl::GetFlag(FLAGS_output_format); -+ << sentencepiece::GetFlag(FLAGS_output_format); - } - - return 0; -diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc -index 7352f63..f6fe88a 100644 ---- a/src/spm_normalize_main.cc -+++ b/src/spm_normalize_main.cc -@@ -12,11 +12,10 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - --#include "absl/flags/flag.h" -- - #include "builder.h" - #include "common.h" - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "init.h" - #include "normalizer.h" - #include "sentencepiece.pb.h" -@@ -24,20 +23,20 @@ - #include "sentencepiece_processor.h" - #include "sentencepiece_trainer.h" - --ABSL_FLAG(std::string, model, "", "Model file name"); --ABSL_FLAG(bool, use_internal_normalization, false, -+STPC_FLAG(std::string, model, "", "Model file name"); -+STPC_FLAG(bool, use_internal_normalization, false, - "Use NormalizerSpec \"as-is\" to run the normalizer " - "for SentencePiece segmentation"); --ABSL_FLAG(std::string, normalization_rule_name, "", -+STPC_FLAG(std::string, normalization_rule_name, "", - "Normalization rule name. " - "Choose from nfkc or identity"); --ABSL_FLAG(std::string, normalization_rule_tsv, "", -+STPC_FLAG(std::string, normalization_rule_tsv, "", - "Normalization rule TSV file. "); --ABSL_FLAG(bool, remove_extra_whitespaces, true, "Remove extra whitespaces"); --ABSL_FLAG(bool, decompile, false, -+STPC_FLAG(bool, remove_extra_whitespaces, true, "Remove extra whitespaces"); -+STPC_FLAG(bool, decompile, false, - "Decompile compiled charamap and output it as TSV."); --ABSL_FLAG(std::string, input, "", "Input filename"); --ABSL_FLAG(std::string, output, "", "Output filename"); -+STPC_FLAG(std::string, input, "", "Input filename"); -+STPC_FLAG(std::string, output, "", "Output filename"); - - using sentencepiece::ModelProto; - using sentencepiece::NormalizerSpec; -@@ -51,27 +50,27 @@ int main(int argc, char *argv[]) { - sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); - std::vector rest_args; - -- if (absl::GetFlag(FLAGS_input).empty()) { -+ if (sentencepiece::GetFlag(FLAGS_input).empty()) { - for (int i = 1; i < argc; ++i) { - rest_args.push_back(std::string(argv[i])); - } - } else { -- rest_args.push_back(absl::GetFlag(FLAGS_input)); -+ rest_args.push_back(sentencepiece::GetFlag(FLAGS_input)); - } - - NormalizerSpec spec; - -- if (!absl::GetFlag(FLAGS_model).empty()) { -+ if (!sentencepiece::GetFlag(FLAGS_model).empty()) { - ModelProto model_proto; - SentencePieceProcessor sp; -- CHECK_OK(sp.Load(absl::GetFlag(FLAGS_model))); -+ CHECK_OK(sp.Load(sentencepiece::GetFlag(FLAGS_model))); - spec = sp.model_proto().normalizer_spec(); -- } else if (!absl::GetFlag(FLAGS_normalization_rule_tsv).empty()) { -+ } else if (!sentencepiece::GetFlag(FLAGS_normalization_rule_tsv).empty()) { - spec.set_normalization_rule_tsv( -- absl::GetFlag(FLAGS_normalization_rule_tsv)); -+ sentencepiece::GetFlag(FLAGS_normalization_rule_tsv)); - CHECK_OK(SentencePieceTrainer::PopulateNormalizerSpec(&spec)); -- } else if (!absl::GetFlag(FLAGS_normalization_rule_name).empty()) { -- spec.set_name(absl::GetFlag(FLAGS_normalization_rule_name)); -+ } else if (!sentencepiece::GetFlag(FLAGS_normalization_rule_name).empty()) { -+ spec.set_name(sentencepiece::GetFlag(FLAGS_normalization_rule_name)); - CHECK_OK(SentencePieceTrainer::PopulateNormalizerSpec(&spec)); - } else { - LOG(FATAL) << "Sets --model, normalization_rule_tsv, or " -@@ -79,22 +78,22 @@ int main(int argc, char *argv[]) { - } - - // Uses the normalizer spec encoded in the model_pb. -- if (!absl::GetFlag(FLAGS_use_internal_normalization)) { -+ if (!sentencepiece::GetFlag(FLAGS_use_internal_normalization)) { - spec.set_add_dummy_prefix(false); // do not add dummy prefix. - spec.set_escape_whitespaces(false); // do not output meta symbol. - spec.set_remove_extra_whitespaces( -- absl::GetFlag(FLAGS_remove_extra_whitespaces)); -+ sentencepiece::GetFlag(FLAGS_remove_extra_whitespaces)); - } - -- if (absl::GetFlag(FLAGS_decompile)) { -+ if (sentencepiece::GetFlag(FLAGS_decompile)) { - Builder::CharsMap chars_map; - CHECK_OK( - Builder::DecompileCharsMap(spec.precompiled_charsmap(), &chars_map)); -- CHECK_OK(Builder::SaveCharsMap(absl::GetFlag(FLAGS_output), chars_map)); -+ CHECK_OK(Builder::SaveCharsMap(sentencepiece::GetFlag(FLAGS_output), chars_map)); - } else { - const Normalizer normalizer(spec); - auto output = -- sentencepiece::filesystem::NewWritableFile(absl::GetFlag(FLAGS_output)); -+ sentencepiece::filesystem::NewWritableFile(sentencepiece::GetFlag(FLAGS_output)); - CHECK_OK(output->status()); - - if (rest_args.empty()) { -diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc -index 08c519f..1e6d8a9 100644 ---- a/src/spm_train_main.cc -+++ b/src/spm_train_main.cc -@@ -14,12 +14,12 @@ - - #include - --#include "absl/flags/flag.h" - #include "absl/strings/ascii.h" - #include "absl/strings/str_join.h" - #include "absl/strings/str_split.h" - - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "init.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_trainer.h" -@@ -33,130 +33,130 @@ static sentencepiece::TrainerSpec kDefaultTrainerSpec; - static sentencepiece::NormalizerSpec kDefaultNormalizerSpec; - } // namespace - --ABSL_FLAG(std::string, input, "", "comma separated list of input sentences"); --ABSL_FLAG(std::string, input_format, kDefaultTrainerSpec.input_format(), -+STPC_FLAG(std::string, input, "", "comma separated list of input sentences"); -+STPC_FLAG(std::string, input_format, kDefaultTrainerSpec.input_format(), - "Input format. Supported format is `text` or `tsv`."); --ABSL_FLAG(std::string, model_prefix, "", "output model prefix"); --ABSL_FLAG(std::string, model_type, "unigram", -+STPC_FLAG(std::string, model_prefix, "", "output model prefix"); -+STPC_FLAG(std::string, model_type, "unigram", - "model algorithm: unigram, bpe, word or char"); --ABSL_FLAG(int32, vocab_size, kDefaultTrainerSpec.vocab_size(), -+STPC_FLAG(int32, vocab_size, kDefaultTrainerSpec.vocab_size(), - "vocabulary size"); --ABSL_FLAG(std::string, accept_language, "", -+STPC_FLAG(std::string, accept_language, "", - "comma-separated list of languages this model can accept"); --ABSL_FLAG(int32, self_test_sample_size, -+STPC_FLAG(int32, self_test_sample_size, - kDefaultTrainerSpec.self_test_sample_size(), - "the size of self test samples"); --ABSL_FLAG(double, character_coverage, kDefaultTrainerSpec.character_coverage(), -+STPC_FLAG(double, character_coverage, kDefaultTrainerSpec.character_coverage(), - "character coverage to determine the minimum symbols"); --ABSL_FLAG(std::uint64_t, input_sentence_size, -+STPC_FLAG(std::uint64_t, input_sentence_size, - kDefaultTrainerSpec.input_sentence_size(), - "maximum size of sentences the trainer loads"); --ABSL_FLAG(bool, shuffle_input_sentence, -+STPC_FLAG(bool, shuffle_input_sentence, - kDefaultTrainerSpec.shuffle_input_sentence(), - "Randomly sample input sentences in advance. Valid when " - "--input_sentence_size > 0"); --ABSL_FLAG(int32, seed_sentencepiece_size, -+STPC_FLAG(int32, seed_sentencepiece_size, - kDefaultTrainerSpec.seed_sentencepiece_size(), - "the size of seed sentencepieces"); --ABSL_FLAG(double, shrinking_factor, kDefaultTrainerSpec.shrinking_factor(), -+STPC_FLAG(double, shrinking_factor, kDefaultTrainerSpec.shrinking_factor(), - "Keeps top shrinking_factor pieces with respect to the loss"); --ABSL_FLAG(int32, num_threads, kDefaultTrainerSpec.num_threads(), -+STPC_FLAG(int32, num_threads, kDefaultTrainerSpec.num_threads(), - "number of threads for training"); --ABSL_FLAG(int32, num_sub_iterations, kDefaultTrainerSpec.num_sub_iterations(), -+STPC_FLAG(int32, num_sub_iterations, kDefaultTrainerSpec.num_sub_iterations(), - "number of EM sub-iterations"); --ABSL_FLAG(int32, max_sentencepiece_length, -+STPC_FLAG(int32, max_sentencepiece_length, - kDefaultTrainerSpec.max_sentencepiece_length(), - "maximum length of sentence piece"); --ABSL_FLAG(int32, max_sentence_length, kDefaultTrainerSpec.max_sentence_length(), -+STPC_FLAG(int32, max_sentence_length, kDefaultTrainerSpec.max_sentence_length(), - "maximum length of sentence in byte"); --ABSL_FLAG(bool, split_by_unicode_script, -+STPC_FLAG(bool, split_by_unicode_script, - kDefaultTrainerSpec.split_by_unicode_script(), - "use Unicode script to split sentence pieces"); --ABSL_FLAG(bool, split_by_number, kDefaultTrainerSpec.split_by_number(), -+STPC_FLAG(bool, split_by_number, kDefaultTrainerSpec.split_by_number(), - "split tokens by numbers (0-9)"); --ABSL_FLAG(bool, split_by_whitespace, kDefaultTrainerSpec.split_by_whitespace(), -+STPC_FLAG(bool, split_by_whitespace, kDefaultTrainerSpec.split_by_whitespace(), - "use a white space to split sentence pieces"); --ABSL_FLAG(bool, split_digits, kDefaultTrainerSpec.split_digits(), -+STPC_FLAG(bool, split_digits, kDefaultTrainerSpec.split_digits(), - "split all digits (0-9) into separate pieces"); --ABSL_FLAG(std::string, pretokenization_delimiter, -+STPC_FLAG(std::string, pretokenization_delimiter, - kDefaultTrainerSpec.pretokenization_delimiter(), - "specifies the delimiter of pre-tokenization"); --ABSL_FLAG(bool, treat_whitespace_as_suffix, -+STPC_FLAG(bool, treat_whitespace_as_suffix, - kDefaultTrainerSpec.treat_whitespace_as_suffix(), - "treat whitespace marker as suffix instead of prefix."); --ABSL_FLAG(bool, allow_whitespace_only_pieces, -+STPC_FLAG(bool, allow_whitespace_only_pieces, - kDefaultTrainerSpec.allow_whitespace_only_pieces(), - "allow pieces that only contain (consecutive) whitespace tokens"); --ABSL_FLAG(std::string, control_symbols, "", -+STPC_FLAG(std::string, control_symbols, "", - "comma separated list of control symbols"); --ABSL_FLAG(std::string, control_symbols_file, "", -+STPC_FLAG(std::string, control_symbols_file, "", - "load control_symbols from file."); --ABSL_FLAG(std::string, user_defined_symbols, "", -+STPC_FLAG(std::string, user_defined_symbols, "", - "comma separated list of user defined symbols"); --ABSL_FLAG(std::string, user_defined_symbols_file, "", -+STPC_FLAG(std::string, user_defined_symbols_file, "", - "load user_defined_symbols from file."); --ABSL_FLAG(std::string, required_chars, "", -+STPC_FLAG(std::string, required_chars, "", - "UTF8 characters in this flag are always used in the character " - "set regardless of --character_coverage"); --ABSL_FLAG(std::string, required_chars_file, "", -+STPC_FLAG(std::string, required_chars_file, "", - "load required_chars from file."); --ABSL_FLAG(bool, byte_fallback, kDefaultTrainerSpec.byte_fallback(), -+STPC_FLAG(bool, byte_fallback, kDefaultTrainerSpec.byte_fallback(), - "decompose unknown pieces into UTF-8 byte pieces"); --ABSL_FLAG(bool, vocabulary_output_piece_score, -+STPC_FLAG(bool, vocabulary_output_piece_score, - kDefaultTrainerSpec.vocabulary_output_piece_score(), - "Define score in vocab file"); --ABSL_FLAG(std::string, normalization_rule_name, "nmt_nfkc", -+STPC_FLAG(std::string, normalization_rule_name, "nmt_nfkc", - "Normalization rule name. " - "Choose from nfkc or identity"); --ABSL_FLAG(std::string, normalization_rule_tsv, "", -+STPC_FLAG(std::string, normalization_rule_tsv, "", - "Normalization rule TSV file. "); --ABSL_FLAG(std::string, denormalization_rule_tsv, "", -+STPC_FLAG(std::string, denormalization_rule_tsv, "", - "Denormalization rule TSV file."); --ABSL_FLAG(bool, add_dummy_prefix, kDefaultNormalizerSpec.add_dummy_prefix(), -+STPC_FLAG(bool, add_dummy_prefix, kDefaultNormalizerSpec.add_dummy_prefix(), - "Add dummy whitespace at the beginning of text"); --ABSL_FLAG(bool, remove_extra_whitespaces, -+STPC_FLAG(bool, remove_extra_whitespaces, - kDefaultNormalizerSpec.remove_extra_whitespaces(), - "Removes leading, trailing, and " - "duplicate internal whitespace"); --ABSL_FLAG(bool, hard_vocab_limit, kDefaultTrainerSpec.hard_vocab_limit(), -+STPC_FLAG(bool, hard_vocab_limit, kDefaultTrainerSpec.hard_vocab_limit(), - "If set to false, --vocab_size is considered as a soft limit."); --ABSL_FLAG(bool, use_all_vocab, kDefaultTrainerSpec.use_all_vocab(), -+STPC_FLAG(bool, use_all_vocab, kDefaultTrainerSpec.use_all_vocab(), - "If set to true, use all tokens as vocab. " - "Valid for word/char models."); --ABSL_FLAG(int32, unk_id, kDefaultTrainerSpec.unk_id(), -+STPC_FLAG(int32, unk_id, kDefaultTrainerSpec.unk_id(), - "Override UNK () id."); --ABSL_FLAG(int32, bos_id, kDefaultTrainerSpec.bos_id(), -+STPC_FLAG(int32, bos_id, kDefaultTrainerSpec.bos_id(), - "Override BOS () id. Set -1 to disable BOS."); --ABSL_FLAG(int32, eos_id, kDefaultTrainerSpec.eos_id(), -+STPC_FLAG(int32, eos_id, kDefaultTrainerSpec.eos_id(), - "Override EOS () id. Set -1 to disable EOS."); --ABSL_FLAG(int32, pad_id, kDefaultTrainerSpec.pad_id(), -+STPC_FLAG(int32, pad_id, kDefaultTrainerSpec.pad_id(), - "Override PAD () id. Set -1 to disable PAD."); --ABSL_FLAG(std::string, unk_piece, kDefaultTrainerSpec.unk_piece(), -+STPC_FLAG(std::string, unk_piece, kDefaultTrainerSpec.unk_piece(), - "Override UNK () piece."); --ABSL_FLAG(std::string, bos_piece, kDefaultTrainerSpec.bos_piece(), -+STPC_FLAG(std::string, bos_piece, kDefaultTrainerSpec.bos_piece(), - "Override BOS () piece."); --ABSL_FLAG(std::string, eos_piece, kDefaultTrainerSpec.eos_piece(), -+STPC_FLAG(std::string, eos_piece, kDefaultTrainerSpec.eos_piece(), - "Override EOS () piece."); --ABSL_FLAG(std::string, pad_piece, kDefaultTrainerSpec.pad_piece(), -+STPC_FLAG(std::string, pad_piece, kDefaultTrainerSpec.pad_piece(), - "Override PAD () piece."); --ABSL_FLAG(std::string, unk_surface, kDefaultTrainerSpec.unk_surface(), -+STPC_FLAG(std::string, unk_surface, kDefaultTrainerSpec.unk_surface(), - "Dummy surface string for . In decoding is decoded to " - "`unk_surface`."); --ABSL_FLAG(bool, train_extremely_large_corpus, -+STPC_FLAG(bool, train_extremely_large_corpus, - kDefaultTrainerSpec.train_extremely_large_corpus(), - "Increase bit depth for unigram tokenization."); --ABSL_FLAG(uint32, random_seed, static_cast(-1), -+STPC_FLAG(uint32, random_seed, static_cast(-1), - "Seed value for random generator."); - - // DP related. --ABSL_FLAG(bool, enable_differential_privacy, false, -+STPC_FLAG(bool, enable_differential_privacy, false, - "Whether to add DP while training. Currently supported only by " - "UNIGRAM model."); - --ABSL_FLAG(float, differential_privacy_noise_level, 0.0f, -+STPC_FLAG(float, differential_privacy_noise_level, 0.0f, - "Amount of noise to add for" - " DP"); --ABSL_FLAG(std::uint64_t, differential_privacy_clipping_threshold, 0, -+STPC_FLAG(std::uint64_t, differential_privacy_clipping_threshold, 0, - "Threshold for" - " clipping the counts for DP"); - -@@ -168,11 +168,11 @@ int main(int argc, char *argv[]) { - sentencepiece::NormalizerSpec normalizer_spec; - NormalizerSpec denormalizer_spec; - -- CHECK(!absl::GetFlag(FLAGS_input).empty()); -- CHECK(!absl::GetFlag(FLAGS_model_prefix).empty()); -+ CHECK(!sentencepiece::GetFlag(FLAGS_input).empty()); -+ CHECK(!sentencepiece::GetFlag(FLAGS_model_prefix).empty()); - -- if (absl::GetFlag(FLAGS_random_seed) != -1) { -- sentencepiece::SetRandomGeneratorSeed(absl::GetFlag(FLAGS_random_seed)); -+ if (sentencepiece::GetFlag(FLAGS_random_seed) != -1) { -+ sentencepiece::SetRandomGeneratorSeed(sentencepiece::GetFlag(FLAGS_random_seed)); - } - - auto load_lines = [](absl::string_view filename) { -@@ -186,28 +186,28 @@ int main(int argc, char *argv[]) { - - // Populates the value from flags to spec. - #define SetTrainerSpecFromFlag(name) \ -- trainer_spec.set_##name(absl::GetFlag(FLAGS_##name)); -+ trainer_spec.set_##name(sentencepiece::GetFlag(FLAGS_##name)); - - #define SetNormalizerSpecFromFlag(name) \ -- normalizer_spec.set_##name(absl::GetFlag(FLAGS_##name)); -+ normalizer_spec.set_##name(sentencepiece::GetFlag(FLAGS_##name)); - - #define SetTrainerSpecFromFile(name) \ -- if (!absl::GetFlag(FLAGS_##name##_file).empty()) { \ -- const auto lines = load_lines(absl::GetFlag(FLAGS_##name##_file)); \ -+ if (!sentencepiece::GetFlag(FLAGS_##name##_file).empty()) { \ -+ const auto lines = load_lines(sentencepiece::GetFlag(FLAGS_##name##_file)); \ - trainer_spec.set_##name(absl::StrJoin(lines, "")); \ - } - - #define SetRepeatedTrainerSpecFromFlag(name) \ -- if (!absl::GetFlag(FLAGS_##name).empty()) { \ -+ if (!sentencepiece::GetFlag(FLAGS_##name).empty()) { \ - for (const auto &v : \ -- sentencepiece::util::StrSplitAsCSV(absl::GetFlag(FLAGS_##name))) { \ -+ sentencepiece::util::StrSplitAsCSV(sentencepiece::GetFlag(FLAGS_##name))) { \ - trainer_spec.add_##name(v); \ - } \ - } - - #define SetRepeatedTrainerSpecFromFile(name) \ -- if (!absl::GetFlag(FLAGS_##name##_file).empty()) { \ -- for (const auto &v : load_lines(absl::GetFlag(FLAGS_##name##_file))) { \ -+ if (!sentencepiece::GetFlag(FLAGS_##name##_file).empty()) { \ -+ for (const auto &v : load_lines(sentencepiece::GetFlag(FLAGS_##name##_file))) { \ - trainer_spec.add_##name(v); \ - } \ - } -@@ -261,21 +261,21 @@ int main(int argc, char *argv[]) { - SetRepeatedTrainerSpecFromFile(control_symbols); - SetRepeatedTrainerSpecFromFile(user_defined_symbols); - -- normalizer_spec.set_name(absl::GetFlag(FLAGS_normalization_rule_name)); -+ normalizer_spec.set_name(sentencepiece::GetFlag(FLAGS_normalization_rule_name)); - SetNormalizerSpecFromFlag(normalization_rule_tsv); - SetNormalizerSpecFromFlag(add_dummy_prefix); - SetNormalizerSpecFromFlag(remove_extra_whitespaces); - -- if (!absl::GetFlag(FLAGS_denormalization_rule_tsv).empty()) { -+ if (!sentencepiece::GetFlag(FLAGS_denormalization_rule_tsv).empty()) { - denormalizer_spec.set_normalization_rule_tsv( -- absl::GetFlag(FLAGS_denormalization_rule_tsv)); -+ sentencepiece::GetFlag(FLAGS_denormalization_rule_tsv)); - denormalizer_spec.set_add_dummy_prefix(false); - denormalizer_spec.set_remove_extra_whitespaces(false); - denormalizer_spec.set_escape_whitespaces(false); - } - - CHECK_OK(sentencepiece::SentencePieceTrainer::PopulateModelTypeFromString( -- absl::GetFlag(FLAGS_model_type), &trainer_spec)); -+ sentencepiece::GetFlag(FLAGS_model_type), &trainer_spec)); - - CHECK_OK(sentencepiece::SentencePieceTrainer::Train( - trainer_spec, normalizer_spec, denormalizer_spec)); -diff --git a/src/test_main.cc b/src/test_main.cc -index 38c978d..f9cf694 100644 ---- a/src/test_main.cc -+++ b/src/test_main.cc -@@ -12,16 +12,17 @@ - // See the License for the specific language governing permissions and - // limitations under the License.! - -+#include "glue/flags/flag.h" - #include "init.h" - #include "testharness.h" - - #ifdef OS_WIN --ABSL_FLAG(std::string, test_srcdir, "..\\data", "Data directory."); -+STPC_FLAG(std::string, test_srcdir, "..\\data", "Data directory."); - #else --ABSL_FLAG(std::string, test_srcdir, "../data", "Data directory."); -+STPC_FLAG(std::string, test_srcdir, "../data", "Data directory."); - #endif - --ABSL_FLAG(std::string, test_tmpdir, "test_tmp", "Temporary directory."); -+STPC_FLAG(std::string, test_tmpdir, "test_tmp", "Temporary directory."); - - int main(int argc, char **argv) { - sentencepiece::ScopedResourceDestructor cleaner; -diff --git a/src/testharness.cc b/src/testharness.cc -index 6769cd8..d9a84d2 100644 ---- a/src/testharness.cc -+++ b/src/testharness.cc -@@ -28,6 +28,7 @@ - #include "absl/strings/str_cat.h" - - #include "common.h" -+#include "glue/flags/flag.h" - #include "util.h" - - namespace sentencepiece { -@@ -57,9 +58,9 @@ bool RegisterTest(const char *base, const char *name, void (*func)()) { - int RunAllTests() { - int num = 0; - #ifdef OS_WIN -- _mkdir(absl::GetFlag(FLAGS_test_tmpdir).c_str()); -+ _mkdir(sentencepiece::GetFlag(FLAGS_test_tmpdir).c_str()); - #else -- mkdir(absl::GetFlag(FLAGS_test_tmpdir).c_str(), S_IRUSR | S_IWUSR | S_IXUSR); -+ mkdir(sentencepiece::GetFlag(FLAGS_test_tmpdir).c_str(), S_IRUSR | S_IWUSR | S_IXUSR); - #endif - - if (tests == nullptr) { -diff --git a/src/testharness.h b/src/testharness.h -index 8de9c5c..022325d 100644 ---- a/src/testharness.h -+++ b/src/testharness.h -@@ -20,14 +20,14 @@ - #include - #include - --#include "absl/flags/flag.h" --#include "absl/flags/parse.h" - #include "absl/strings/string_view.h" - - #include "common.h" -+#include "glue/flags/flag.h" -+#include "glue/flags/parse.h" - --ABSL_DECLARE_FLAG(std::string, test_tmpdir); --ABSL_DECLARE_FLAG(std::string, test_srcdir); -+STPC_DECLARE_FLAG(std::string, test_tmpdir); -+STPC_DECLARE_FLAG(std::string, test_srcdir); - - namespace sentencepiece { - namespace test { -diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc -index 27cec4f..e9e1cd2 100644 ---- a/src/trainer_interface.cc -+++ b/src/trainer_interface.cc -@@ -24,8 +24,6 @@ - - #include "absl/container/flat_hash_map.h" - #include "absl/memory/memory.h" --#include "absl/random/distributions.h" --#include "absl/random/random.h" - #include "absl/strings/numbers.h" - #include "absl/strings/str_cat.h" - #include "absl/strings/str_format.h" -@@ -33,6 +31,8 @@ - #include "absl/strings/str_split.h" - - #include "filesystem.h" -+#include "glue/random/distributions.h" -+#include "glue/random/random.h" - #include "model_factory.h" - #include "model_interface.h" - #include "normalizer.h" -@@ -304,10 +304,10 @@ bool TrainerInterface::IsValidSentencePiece( - } - - template --void AddDPNoise(const TrainerSpec &trainer_spec, absl::SharedBitGen &generator, -+void AddDPNoise(const TrainerSpec &trainer_spec, sentencepiece::SharedBitGen &generator, - T *to_update) { - if (trainer_spec.differential_privacy_noise_level() > 0) { -- float random_num = absl::Gaussian( -+ float random_num = sentencepiece::Gaussian( - generator, 0, trainer_spec.differential_privacy_noise_level()); - - *to_update = -@@ -481,7 +481,7 @@ END: - for (int n = 0; n < num_workers; ++n) { - pool->Schedule([&, n]() { - // One per thread generator. -- absl::SharedBitGen generator; -+ sentencepiece::SharedBitGen generator; - for (size_t i = n; i < sentences_.size(); i += num_workers) { - AddDPNoise(trainer_spec_, generator, - &(sentences_[i].second)); -diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc -index 75e9f54..ca4d4e7 100644 ---- a/src/trainer_interface_test.cc -+++ b/src/trainer_interface_test.cc -@@ -20,6 +20,7 @@ - #include "absl/strings/str_format.h" - - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "testharness.h" - #include "util.h" - -@@ -494,7 +495,7 @@ TEST(TrainerInterfaceTest, SerializeTest) { - - TEST(TrainerInterfaceTest, CharactersTest) { - const std::string input_file = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "input"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "input"); - { - auto output = filesystem::NewWritableFile(input_file); - // Make a single line with 50 "a", 49 "あ", and 1 "b". -@@ -560,7 +561,7 @@ TEST(TrainerInterfaceTest, MultiFileSentenceIteratorTest) { - std::vector files; - std::vector expected; - for (int i = 0; i < 10; ++i) { -- const std::string file = util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), -+ const std::string file = util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), - absl::StrCat("input", i)); - auto output = filesystem::NewWritableFile(file); - int num_line = (rand() % 100) + 1; -@@ -582,7 +583,7 @@ TEST(TrainerInterfaceTest, MultiFileSentenceIteratorTest) { - TEST(TrainerInterfaceTest, MultiFileSentenceIteratorErrorTest) { - std::vector files; - for (int i = 0; i < 10; ++i) { -- const std::string file = util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), -+ const std::string file = util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), - absl::StrCat("input_not_exist", i)); - files.push_back(file); - } -diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc -index ab887b6..f1c465b 100644 ---- a/src/unigram_model_trainer_test.cc -+++ b/src/unigram_model_trainer_test.cc -@@ -21,6 +21,7 @@ - #include "absl/strings/str_join.h" - - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "sentencepiece_model.pb.h" - #include "sentencepiece_processor.h" - #include "sentencepiece_trainer.h" -@@ -49,9 +50,9 @@ TrainerResult RunTrainer(const std::vector& input, int size, - const bool use_dp = false, const float dp_noise = 0.0, - const uint32 dp_clip = 0) { - const std::string input_file = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "input"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "input"); - const std::string model_prefix = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "model"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "model"); - { - auto output = filesystem::NewWritableFile(input_file); - for (const auto& line : input) { -@@ -156,13 +157,13 @@ static constexpr char kTestInputData[] = "wagahaiwa_nekodearu.txt"; - - TEST(UnigramTrainerTest, EndToEndTest) { - const std::string input = -- util::JoinPath(absl::GetFlag(FLAGS_test_srcdir), kTestInputData); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_srcdir), kTestInputData); - - ASSERT_TRUE( - SentencePieceTrainer::Train( - absl::StrCat( - "--model_prefix=", -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "tmp_model"), -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "tmp_model"), - " --input=", input, - " --vocab_size=8000 --normalization_rule_name=identity", - " --model_type=unigram --user_defined_symbols=", -@@ -170,7 +171,7 @@ TEST(UnigramTrainerTest, EndToEndTest) { - .ok()); - - SentencePieceProcessor sp; -- EXPECT_TRUE(sp.Load(util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), -+ EXPECT_TRUE(sp.Load(util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), - "tmp_model.model")) - .ok()); - EXPECT_EQ(8000, sp.GetPieceSize()); -diff --git a/src/util_test.cc b/src/util_test.cc -index e264081..837cca4 100644 ---- a/src/util_test.cc -+++ b/src/util_test.cc -@@ -17,6 +17,7 @@ - #include "absl/strings/str_cat.h" - - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "testharness.h" - #include "util.h" - -@@ -333,7 +334,7 @@ TEST(UtilTest, InputOutputBufferTest) { - - { - auto output = filesystem::NewWritableFile( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "test_file")); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "test_file")); - for (size_t i = 0; i < kData.size(); ++i) { - output->WriteLine(kData[i]); - } -@@ -341,7 +342,7 @@ TEST(UtilTest, InputOutputBufferTest) { - - { - auto input = filesystem::NewReadableFile( -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "test_file")); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "test_file")); - std::string line; - for (size_t i = 0; i < kData.size(); ++i) { - EXPECT_TRUE(input->ReadLine(&line)); -diff --git a/src/word_model_trainer_test.cc b/src/word_model_trainer_test.cc -index 8288027..ab55030 100644 ---- a/src/word_model_trainer_test.cc -+++ b/src/word_model_trainer_test.cc -@@ -19,6 +19,7 @@ - #include "absl/strings/str_join.h" - - #include "filesystem.h" -+#include "glue/flags/flag.h" - #include "sentencepiece_processor.h" - #include "testharness.h" - #include "util.h" -@@ -33,9 +34,9 @@ namespace { - - std::string RunTrainer(const std::vector &input, int size) { - const std::string input_file = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "input"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "input"); - const std::string model_prefix = -- util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "model"); -+ util::JoinPath(sentencepiece::GetFlag(FLAGS_test_tmpdir), "model"); - { - auto output = filesystem::NewWritableFile(input_file); - for (const auto &line : input) { From 2962edf9221fa1cedea73dbb0e61abf82871472f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 20 Feb 2024 16:44:47 +1100 Subject: [PATCH 03/10] update CMake options for library build --- recipe/build-lib.bat | 6 ++++-- recipe/build-lib.sh | 7 ++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/recipe/build-lib.bat b/recipe/build-lib.bat index c0e2be0b..72841934 100644 --- a/recipe/build-lib.bat +++ b/recipe/build-lib.bat @@ -13,10 +13,12 @@ cmake -G "Ninja" ^ -DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX%;%LIBRARY_BIN%;%LIBRARY_LIB% ^ -DCMAKE_INCLUDE_PATH=%LIBRARY_INC% ^ -DCMAKE_INSTALL_PREFIX=%LIBRARY_PREFIX% ^ + -DCMAKE_INSTALL_LIBDIR="lib" ^ + -DCMAKE_INSTALL_INCLUDEDIR="include" ^ -Dprotobuf_BUILD_SHARED_LIBS=OFF ^ -DSPM_ENABLE_SHARED=OFF ^ - -DSPM_USE_BUILTIN_PROTOBUF=OFF ^ - -DSPM_USE_EXTERNAL_ABSL=ON ^ + -DSPM_ABSL_PROVIDER="package" ^ + -DSPM_PROTOBUF_PROVIDER="package" ^ .. IF %ERRORLEVEL% NEQ 0 exit 1 diff --git a/recipe/build-lib.sh b/recipe/build-lib.sh index 3b071d22..6f879ab7 100644 --- a/recipe/build-lib.sh +++ b/recipe/build-lib.sh @@ -16,12 +16,13 @@ fi cmake -G "Ninja" \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$PREFIX \ - -DCMAKE_INSTALL_LIBDIR=$PREFIX/lib \ + -DCMAKE_INSTALL_LIBDIR="lib" \ + -DCMAKE_INSTALL_INCLUDEDIR="include" \ -DCMAKE_AR="${AR}" \ -DSPM_ENABLE_SHARED=ON \ -DSPM_ENABLE_TCMALLOC=OFF \ - -DSPM_USE_EXTERNAL_ABSL=ON \ - -DSPM_USE_BUILTIN_PROTOBUF=OFF \ + -DSPM_ABSL_PROVIDER="package" \ + -DSPM_PROTOBUF_PROVIDER="package" \ ${CMAKE_ARGS} \ .. From 2bd05adb50c1519268680076e0b3c54fc83c4bae Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 20 Feb 2024 17:01:39 +1100 Subject: [PATCH 04/10] remove defensive deletes --- recipe/build-lib.bat | 5 ----- recipe/build-lib.sh | 5 ----- 2 files changed, 10 deletions(-) diff --git a/recipe/build-lib.bat b/recipe/build-lib.bat index 72841934..02e3d6ad 100644 --- a/recipe/build-lib.bat +++ b/recipe/build-lib.bat @@ -1,10 +1,5 @@ @echo on -:: we're trying to avoid the third_party sources, and not building them; -:: to avoid weird errors if those sources got picked up nevertheless, delete them -rmdir /S /Q third_party\absl -rmdir /S /Q third_party\protobuf-lite - mkdir build cd build diff --git a/recipe/build-lib.sh b/recipe/build-lib.sh index 6f879ab7..4beeaead 100644 --- a/recipe/build-lib.sh +++ b/recipe/build-lib.sh @@ -1,11 +1,6 @@ #!/bin/bash set -ex -# we're trying to avoid the third_party sources, and not building them; -# to avoid weird errors if those sources got picked up nevertheless, delete them -rm -rf third_party/absl -rm -rf third_party/protobuf-lite - mkdir build cd build From 9a21672356869b318fb62ef7cbcfa74c8d5798bf Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 20 Feb 2024 17:46:51 +1100 Subject: [PATCH 05/10] fix abseil setup on windows --- recipe/meta.yaml | 2 ++ ...0001-do-not-mix-static-shared-builds.patch | 2 +- ...ild-vendored-abseil-libprotobuf-lite.patch | 2 +- ...libs-headers-for-windows-in-setup.py.patch | 2 +- ...-install-pkg-config-files-on-windows.patch | 2 +- ...05-create-and-install-CMake-metadata.patch | 2 +- ...ink-to-static-absl_flags_-on-windows.patch | 36 +++++++++++++++++++ 7 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 8bff7675..a17df226 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -18,6 +18,8 @@ source: - patches/0004-also-install-pkg-config-files-on-windows.patch # install CMake metadata - patches/0005-create-and-install-CMake-metadata.patch + # fix abseil setup on windows + - patches/0006-also-link-to-static-absl_flags_-on-windows.patch build: number: 0 diff --git a/recipe/patches/0001-do-not-mix-static-shared-builds.patch b/recipe/patches/0001-do-not-mix-static-shared-builds.patch index 79b94a30..0185765f 100644 --- a/recipe/patches/0001-do-not-mix-static-shared-builds.patch +++ b/recipe/patches/0001-do-not-mix-static-shared-builds.patch @@ -1,7 +1,7 @@ From 2fe3e37744c810590e631c01fb57133080fc5f46 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 2 Dec 2021 08:39:53 +1100 -Subject: [PATCH 1/5] do not mix static & shared builds +Subject: [PATCH 1/6] do not mix static & shared builds --- src/CMakeLists.txt | 20 ++++++++++---------- diff --git a/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch b/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch index a77643c7..d921105a 100644 --- a/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch +++ b/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch @@ -1,7 +1,7 @@ From ab5c20be4a987d1cd6d2e472634f5e1b9c11211f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 2 Dec 2021 10:05:12 +1100 -Subject: [PATCH 2/5] do not build vendored abseil & libprotobuf-lite +Subject: [PATCH 2/6] do not build vendored abseil & libprotobuf-lite ensure we can use shared builds of libprotobuf also on windows --- diff --git a/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch b/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch index 7cf807a6..82499c7b 100644 --- a/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch +++ b/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch @@ -1,7 +1,7 @@ From 428ac3758e24b1aeedec8e0568d128e2097d1646 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 11 Dec 2022 01:09:03 +1100 -Subject: [PATCH 3/5] point to our libs / headers for windows in setup.py +Subject: [PATCH 3/6] point to our libs / headers for windows in setup.py also do not risk building against bundled libs, nor setting /MT for the MSVC static runtime libs diff --git a/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch b/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch index f8e3f6d0..af08f2c8 100644 --- a/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch +++ b/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch @@ -1,7 +1,7 @@ From f6ec12fe46a666940007cb205e715ee6f3916e97 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 12 Dec 2022 14:36:45 +1100 -Subject: [PATCH 4/5] also install pkg-config files on windows +Subject: [PATCH 4/6] also install pkg-config files on windows --- CMakeLists.txt | 4 +--- diff --git a/recipe/patches/0005-create-and-install-CMake-metadata.patch b/recipe/patches/0005-create-and-install-CMake-metadata.patch index a708d988..e357c364 100644 --- a/recipe/patches/0005-create-and-install-CMake-metadata.patch +++ b/recipe/patches/0005-create-and-install-CMake-metadata.patch @@ -1,7 +1,7 @@ From 366a1080b048a43452bdfb43968c5f2a44acdcf4 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 18 Jan 2023 19:44:15 +1100 -Subject: [PATCH 5/5] create and install CMake metadata +Subject: [PATCH 5/6] create and install CMake metadata --- CMakeLists.txt | 10 ++++++++++ diff --git a/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch b/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch new file mode 100644 index 00000000..c7617bfb --- /dev/null +++ b/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch @@ -0,0 +1,36 @@ +From 64f7608bd0757387cee221df7014c9bdbe78585b Mon Sep 17 00:00:00 2001 +From: "H. Vetinari" +Date: Tue, 20 Feb 2024 17:43:23 +1100 +Subject: [PATCH 6/6] also link to static absl_flags_* on windows + +--- + python/setup.py | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/python/setup.py b/python/setup.py +index fb301ac..4366d1a 100755 +--- a/python/setup.py ++++ b/python/setup.py +@@ -117,13 +117,21 @@ if os.name == 'nt': + ] + elif True: + cflags = ['/std:c++17', '/MD', '/I' + os.environ["LIBRARY_INC"]] ++ # most of abseil can be built as shared, which ends up in one giant library ++ # called abseil_dll; the absl_flags_* libraries always stay static ++ absl_libs = [ ++ "abseil_dll", "absl_flags", "absl_flags_commandlineflag", "absl_flags_commandlineflag_internal", ++ "absl_flags_config", "absl_flags_internal", "absl_flags_marshalling", "absl_flags_parse", ++ "absl_flags_private_handle_accessor", "absl_flags_program_name", "absl_flags_reflection", ++ "absl_flags_usage", "absl_flags_usage_internal", ++ ] + libs = [ + # equivalent of -L$PREFIX/lib -lsentencepiece -lsentencepiece_train -lprotobuf-lite + os.environ["LIBRARY_LIB"] + f"\\{x}.lib" + # protobuf actually has the lib-prefix in the name also on windows; + # since libsentencepiece is static on windows, we also need _its_ + # host dependencies for the link interface, i.e. also abseil +- for x in ["sentencepiece", "sentencepiece_train", "libprotobuf-lite", "abseil_dll"] ++ for x in ["sentencepiece", "sentencepiece_train", "libprotobuf-lite"] + absl_libs + ] + else: + # build library locally with cmake and vc++. From 67b630eed9de7ecc8c23a41d836aafbf544e4a72 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 20 Feb 2024 18:45:53 +1100 Subject: [PATCH 06/10] drop unvendoring hunk; order definition before use While modern protobuf already sets PROTOBUF_USE_DLLS in the library interface, this does not seem to work as intended yet --- recipe/build-lib.bat | 2 - recipe/build-lib.sh | 2 - recipe/meta.yaml | 7 +- ...0001-do-not-mix-static-shared-builds.patch | 2 +- ...TOBUF_USE_DLLS-when-using-our-own-p.patch} | 21 ++---- ...libs-headers-for-windows-in-setup.py.patch | 4 +- ...-install-pkg-config-files-on-windows.patch | 4 +- ...05-create-and-install-CMake-metadata.patch | 4 +- ...ink-to-static-absl_flags_-on-windows.patch | 4 +- ...default-CMAKE_INSTALL_-BIN-INCLUDE-L.patch | 72 +++++++++++++++++++ 10 files changed, 91 insertions(+), 31 deletions(-) rename recipe/patches/{0002-do-not-build-vendored-abseil-libprotobuf-lite.patch => 0002-ensure-we-set-PROTOBUF_USE_DLLS-when-using-our-own-p.patch} (54%) create mode 100644 recipe/patches/0007-move-setting-of-default-CMAKE_INSTALL_-BIN-INCLUDE-L.patch diff --git a/recipe/build-lib.bat b/recipe/build-lib.bat index 02e3d6ad..67e20e8a 100644 --- a/recipe/build-lib.bat +++ b/recipe/build-lib.bat @@ -8,8 +8,6 @@ cmake -G "Ninja" ^ -DCMAKE_PREFIX_PATH=%LIBRARY_PREFIX%;%LIBRARY_BIN%;%LIBRARY_LIB% ^ -DCMAKE_INCLUDE_PATH=%LIBRARY_INC% ^ -DCMAKE_INSTALL_PREFIX=%LIBRARY_PREFIX% ^ - -DCMAKE_INSTALL_LIBDIR="lib" ^ - -DCMAKE_INSTALL_INCLUDEDIR="include" ^ -Dprotobuf_BUILD_SHARED_LIBS=OFF ^ -DSPM_ENABLE_SHARED=OFF ^ -DSPM_ABSL_PROVIDER="package" ^ diff --git a/recipe/build-lib.sh b/recipe/build-lib.sh index 4beeaead..de788b20 100644 --- a/recipe/build-lib.sh +++ b/recipe/build-lib.sh @@ -11,8 +11,6 @@ fi cmake -G "Ninja" \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=$PREFIX \ - -DCMAKE_INSTALL_LIBDIR="lib" \ - -DCMAKE_INSTALL_INCLUDEDIR="include" \ -DCMAKE_AR="${AR}" \ -DSPM_ENABLE_SHARED=ON \ -DSPM_ENABLE_TCMALLOC=OFF \ diff --git a/recipe/meta.yaml b/recipe/meta.yaml index a17df226..dd1f9b8b 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -10,8 +10,8 @@ source: patches: # trying to build both static & shared build seems to break on OSX - patches/0001-do-not-mix-static-shared-builds.patch - # unvendor abseil & protobuf-lite - - patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch + # set PROTOBUF_USE_DLLS + - patches/0002-ensure-we-set-PROTOBUF_USE_DLLS-when-using-our-own-p.patch # ensure python bindings link to correct libs on windows - patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch # install pkg-config metadata also on windows @@ -20,6 +20,9 @@ source: - patches/0005-create-and-install-CMake-metadata.patch # fix abseil setup on windows - patches/0006-also-link-to-static-absl_flags_-on-windows.patch + # backport of https://github.com/google/sentencepiece/pull/979: + # avoid having to specify CMAKE_INSTALL_{LIB,INCLUDE}DIR due to wrong order + - patches/0007-move-setting-of-default-CMAKE_INSTALL_-BIN-INCLUDE-L.patch build: number: 0 diff --git a/recipe/patches/0001-do-not-mix-static-shared-builds.patch b/recipe/patches/0001-do-not-mix-static-shared-builds.patch index 0185765f..6d63d7b7 100644 --- a/recipe/patches/0001-do-not-mix-static-shared-builds.patch +++ b/recipe/patches/0001-do-not-mix-static-shared-builds.patch @@ -1,7 +1,7 @@ From 2fe3e37744c810590e631c01fb57133080fc5f46 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 2 Dec 2021 08:39:53 +1100 -Subject: [PATCH 1/6] do not mix static & shared builds +Subject: [PATCH 1/7] do not mix static & shared builds --- src/CMakeLists.txt | 20 ++++++++++---------- diff --git a/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch b/recipe/patches/0002-ensure-we-set-PROTOBUF_USE_DLLS-when-using-our-own-p.patch similarity index 54% rename from recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch rename to recipe/patches/0002-ensure-we-set-PROTOBUF_USE_DLLS-when-using-our-own-p.patch index d921105a..b8e93a02 100644 --- a/recipe/patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch +++ b/recipe/patches/0002-ensure-we-set-PROTOBUF_USE_DLLS-when-using-our-own-p.patch @@ -1,13 +1,12 @@ -From ab5c20be4a987d1cd6d2e472634f5e1b9c11211f Mon Sep 17 00:00:00 2001 +From 185e8cd8603d188cccdb6f170a60d2984211b70c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 2 Dec 2021 10:05:12 +1100 -Subject: [PATCH 2/6] do not build vendored abseil & libprotobuf-lite +Subject: [PATCH 2/7] ensure we set PROTOBUF_USE_DLLS when using our own + protobuf -ensure we can use shared builds of libprotobuf also on windows --- - src/CMakeLists.txt | 5 +++++ - third_party/CMakeLists.txt | 5 +---- - 2 files changed, 6 insertions(+), 4 deletions(-) + src/CMakeLists.txt | 5 +++++ + 1 file changed, 5 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fbdf238..2b8aefa 100644 @@ -25,13 +24,3 @@ index fbdf238..2b8aefa 100644 include_directories(${Protobuf_INCLUDE_DIRS}) protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto) protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto) -diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt -index d00ecba..3096702 100644 ---- a/third_party/CMakeLists.txt -+++ b/third_party/CMakeLists.txt -@@ -1,4 +1 @@ --include_directories(absl/strings darts_clone esaxx protobuf-lite) -- -- -- -+include_directories(darts_clone esaxx) diff --git a/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch b/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch index 82499c7b..ba1f4a3c 100644 --- a/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch +++ b/recipe/patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch @@ -1,7 +1,7 @@ -From 428ac3758e24b1aeedec8e0568d128e2097d1646 Mon Sep 17 00:00:00 2001 +From a285dbb0bb469256fb43f483e398cc0f028cd2c8 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 11 Dec 2022 01:09:03 +1100 -Subject: [PATCH 3/6] point to our libs / headers for windows in setup.py +Subject: [PATCH 3/7] point to our libs / headers for windows in setup.py also do not risk building against bundled libs, nor setting /MT for the MSVC static runtime libs diff --git a/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch b/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch index af08f2c8..6a17a23c 100644 --- a/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch +++ b/recipe/patches/0004-also-install-pkg-config-files-on-windows.patch @@ -1,7 +1,7 @@ -From f6ec12fe46a666940007cb205e715ee6f3916e97 Mon Sep 17 00:00:00 2001 +From d1afa1e1983080ce57443aca9afac359199f115b Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 12 Dec 2022 14:36:45 +1100 -Subject: [PATCH 4/6] also install pkg-config files on windows +Subject: [PATCH 4/7] also install pkg-config files on windows --- CMakeLists.txt | 4 +--- diff --git a/recipe/patches/0005-create-and-install-CMake-metadata.patch b/recipe/patches/0005-create-and-install-CMake-metadata.patch index e357c364..bc2a5d07 100644 --- a/recipe/patches/0005-create-and-install-CMake-metadata.patch +++ b/recipe/patches/0005-create-and-install-CMake-metadata.patch @@ -1,7 +1,7 @@ -From 366a1080b048a43452bdfb43968c5f2a44acdcf4 Mon Sep 17 00:00:00 2001 +From c963db4cb65affe315ae197b482557715908c3da Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 18 Jan 2023 19:44:15 +1100 -Subject: [PATCH 5/6] create and install CMake metadata +Subject: [PATCH 5/7] create and install CMake metadata --- CMakeLists.txt | 10 ++++++++++ diff --git a/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch b/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch index c7617bfb..58c129b2 100644 --- a/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch +++ b/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch @@ -1,7 +1,7 @@ -From 64f7608bd0757387cee221df7014c9bdbe78585b Mon Sep 17 00:00:00 2001 +From 2a09741805ff7c3fad70b41e2c07b0520d9f77a9 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 20 Feb 2024 17:43:23 +1100 -Subject: [PATCH 6/6] also link to static absl_flags_* on windows +Subject: [PATCH 6/7] also link to static absl_flags_* on windows --- python/setup.py | 10 +++++++++- diff --git a/recipe/patches/0007-move-setting-of-default-CMAKE_INSTALL_-BIN-INCLUDE-L.patch b/recipe/patches/0007-move-setting-of-default-CMAKE_INSTALL_-BIN-INCLUDE-L.patch new file mode 100644 index 00000000..5e0328a9 --- /dev/null +++ b/recipe/patches/0007-move-setting-of-default-CMAKE_INSTALL_-BIN-INCLUDE-L.patch @@ -0,0 +1,72 @@ +From aa7701e2fc3bc5997cc07e44c647dfe3261c9e49 Mon Sep 17 00:00:00 2001 +From: "H. Vetinari" +Date: Tue, 20 Feb 2024 18:43:25 +1100 +Subject: [PATCH 7/7] move setting of default + CMAKE_INSTALL_{BIN,INCLUDE,LIB}DIR before first use + +also unify spelling of CMAKE_INSTALL_INCLUDEDIR following GNUInstallDirs +defaults, see also CMake docs: +https://cmake.org/cmake/help/latest/command/install.html +--- + CMakeLists.txt | 24 ++++++++++++------------ + src/CMakeLists.txt | 4 ++-- + 2 files changed, 14 insertions(+), 14 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 56830cf..03f1589 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -57,6 +57,18 @@ if((CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + string(APPEND CMAKE_CXX_FLAGS " -fmacro-prefix-map=${CMAKE_SOURCE_DIR}/=''") + endif() + ++if (NOT DEFINED CMAKE_INSTALL_BINDIR) ++ set(CMAKE_INSTALL_BINDIR bin) ++endif() ++ ++if (NOT DEFINED CMAKE_INSTALL_LIBDIR) ++ set(CMAKE_INSTALL_LIBDIR lib) ++endif() ++ ++if (NOT DEFINED CMAKE_INSTALL_INCLUDEDIR) ++ set(CMAKE_INSTALL_INCLUDEDIR include) ++endif() ++ + if (UNIX) + include(GNUInstallDirs) + set(prefix ${CMAKE_INSTALL_PREFIX}) +@@ -103,18 +115,6 @@ if (APPLE) + endif() + endif() + +-if (NOT DEFINED CMAKE_INSTALL_BINDIR) +- set(CMAKE_INSTALL_BINDIR bin) +-endif() +- +-if (NOT DEFINED CMAKE_INSTALL_LIBDIR) +- set(CMAKE_INSTALL_LIBDIR lib) +-endif() +- +-if (NOT DEFINED CMAKE_INSTALL_INCDIR) +- set(CMAKE_INSTALL_INCDIR include) +-endif() +- + # SPDX-License-Identifier: (MIT OR CC0-1.0) + # Copyright 2020 Jan Tojnar + # https://github.com/jtojnar/cmake-snips +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index eed204f..a612357 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -331,9 +331,9 @@ install(EXPORT sentencepieceTargets + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/sentencepiece") + + install(FILES sentencepiece_trainer.h sentencepiece_processor.h +- DESTINATION ${CMAKE_INSTALL_INCDIR}) ++ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + if (NOT SPM_PROTOBUF_PROVIDER STREQUAL "internal") +- install(FILES ${SPM_PROTO_HDRS} DESTINATION ${CMAKE_INSTALL_INCDIR}) ++ install(FILES ${SPM_PROTO_HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + endif() + + file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir) From 0dcf90180d3361fc3696d2fb63c53d1b65e6faf2 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 15 Mar 2024 18:07:28 +1100 Subject: [PATCH 07/10] expect exit code 1 for spm_* --- recipe/meta.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index dd1f9b8b..15f4aa60 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -141,7 +141,9 @@ outputs: commands: # binaries {% for each_bin in ["decode", "encode", "export_vocab", "normalize", "train"] %} - - spm_{{ each_bin }} --help + # expect exit code 1, see https://github.com/google/sentencepiece/issues/978 + - spm_{{ each_bin }} --help >/dev/null || [[ $? == 1 ]] # [unix] + - spm_{{ each_bin }} --help & if %ERRORLEVEL% NEQ 1 (exit 0) else (exit 1) # [win] {% endfor %} - name: sentencepiece-python From ece7e3208081d71ba134458032d29795cd7e29da Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 15 Mar 2024 18:13:45 +1100 Subject: [PATCH 08/10] link extra abseil libs in cmake test on win --- recipe/cmake_test/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/recipe/cmake_test/CMakeLists.txt b/recipe/cmake_test/CMakeLists.txt index d031809f..1834b5ee 100644 --- a/recipe/cmake_test/CMakeLists.txt +++ b/recipe/cmake_test/CMakeLists.txt @@ -8,4 +8,9 @@ if(MSVC) # static libsentencepiece leaks its host dependencies; # need to link them as well target_link_libraries(test_me libprotobuf abseil_dll) + # absl_flag* are always static on windows + target_link_libraries(test_me absl_log_flags absl_flags_commandlineflag + absl_flags_commandlineflag_internal absl_flags_config absl_flags_internal + absl_flags_marshalling absl_flags_parse absl_flags_private_handle_accessor + absl_flags_program_name absl_flags_reflection absl_flags_usage absl_flags_usage_internal) endif() From 0f967659765f9a370212b2bf30bf71b28426c94c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 15 Mar 2024 18:15:14 +1100 Subject: [PATCH 09/10] update changed library name in abseil 20240116 --- .../0006-also-link-to-static-absl_flags_-on-windows.patch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch b/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch index 58c129b2..8fa58106 100644 --- a/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch +++ b/recipe/patches/0006-also-link-to-static-absl_flags_-on-windows.patch @@ -18,7 +18,7 @@ index fb301ac..4366d1a 100755 + # most of abseil can be built as shared, which ends up in one giant library + # called abseil_dll; the absl_flags_* libraries always stay static + absl_libs = [ -+ "abseil_dll", "absl_flags", "absl_flags_commandlineflag", "absl_flags_commandlineflag_internal", ++ "abseil_dll", "absl_log_flags", "absl_flags_commandlineflag", "absl_flags_commandlineflag_internal", + "absl_flags_config", "absl_flags_internal", "absl_flags_marshalling", "absl_flags_parse", + "absl_flags_private_handle_accessor", "absl_flags_program_name", "absl_flags_reflection", + "absl_flags_usage", "absl_flags_usage_internal", From 4314defe766ea474c21616b57a930520d0d2dda7 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 15 Mar 2024 18:16:17 +1100 Subject: [PATCH 10/10] move away from deprecated option in conda-forge.yml --- conda-forge.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda-forge.yml b/conda-forge.yml index 092b9fdf..2c603b9e 100644 --- a/conda-forge.yml +++ b/conda-forge.yml @@ -11,4 +11,4 @@ github: provider: linux_aarch64: default linux_ppc64le: default -test_on_native_only: true +test: native_and_emulated