Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sentencepiece 0.2.0 #48

Merged
merged 11 commits into from
Mar 15, 2024
2 changes: 1 addition & 1 deletion conda-forge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ github:
provider:
linux_aarch64: default
linux_ppc64le: default
test_on_native_only: true
test: native_and_emulated
9 changes: 2 additions & 7 deletions recipe/build-lib.bat
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
@echo on

:: we're trying to avoid the third_party sources, and not building them;
:: to avoid weird errors if those sources got picked up nevertheless, delete them
rmdir /S /Q third_party\absl
rmdir /S /Q third_party\protobuf-lite

mkdir build
cd build

Expand All @@ -15,8 +10,8 @@ cmake -G "Ninja" ^
-DCMAKE_INSTALL_PREFIX=%LIBRARY_PREFIX% ^
-Dprotobuf_BUILD_SHARED_LIBS=OFF ^
-DSPM_ENABLE_SHARED=OFF ^
-DSPM_USE_BUILTIN_PROTOBUF=OFF ^
-DSPM_USE_EXTERNAL_ABSL=ON ^
-DSPM_ABSL_PROVIDER="package" ^
-DSPM_PROTOBUF_PROVIDER="package" ^
..
IF %ERRORLEVEL% NEQ 0 exit 1

Expand Down
10 changes: 2 additions & 8 deletions recipe/build-lib.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
#!/bin/bash
set -ex

# we're trying to avoid the third_party sources, and not building them;
# to avoid weird errors if those sources got picked up nevertheless, delete them
rm -rf third_party/absl
rm -rf third_party/protobuf-lite

mkdir build
cd build

Expand All @@ -16,12 +11,11 @@ fi
cmake -G "Ninja" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$PREFIX \
-DCMAKE_INSTALL_LIBDIR=$PREFIX/lib \
-DCMAKE_AR="${AR}" \
-DSPM_ENABLE_SHARED=ON \
-DSPM_ENABLE_TCMALLOC=OFF \
-DSPM_USE_EXTERNAL_ABSL=ON \
-DSPM_USE_BUILTIN_PROTOBUF=OFF \
-DSPM_ABSL_PROVIDER="package" \
-DSPM_PROTOBUF_PROVIDER="package" \
${CMAKE_ARGS} \
..

Expand Down
5 changes: 5 additions & 0 deletions recipe/cmake_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,9 @@ if(MSVC)
# static libsentencepiece leaks its host dependencies;
# need to link them as well
target_link_libraries(test_me libprotobuf abseil_dll)
# absl_flag* are always static on windows
target_link_libraries(test_me absl_log_flags absl_flags_commandlineflag
absl_flags_commandlineflag_internal absl_flags_config absl_flags_internal
absl_flags_marshalling absl_flags_parse absl_flags_private_handle_accessor
absl_flags_program_name absl_flags_reflection absl_flags_usage absl_flags_usage_internal)
endif()
32 changes: 16 additions & 16 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,33 +1,31 @@
{% set version = "0.1.99" %}
{% set version = "0.2.0" %}

package:
name: sentencepiece-split
version: {{ version }}

source:
url: https://github.com/google/sentencepiece/archive/refs/tags/v{{ version }}.tar.gz
sha256: 63617eaf56c7a3857597dcd8780461f57dd21381b56a27716ef7d7e02e14ced4
sha256: 9970f0a0afee1648890293321665e5b2efa04eaec9f1671fcf8048f456f5bb86
patches:
# trying to build both static & shared build seems to break on OSX
- patches/0001-do-not-mix-static-shared-builds.patch
# unvendor abseil & protobuf-lite
- patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch
# avoid installing into $PREFIX/lib64; make sure pkg-config template is filled correctly
- patches/0003-consistently-use-absolute-paths-for-CMAKE_INSTALL_-D.patch
# upstream sources still directly include third_party/absl...
- patches/0004-ACTUALLY-use-external-absl.patch
# upstream carries its own glue code under third_party/absl/flags;
# move it to the sentencepiece sources
- patches/0005-stop-pretending-sp-glue-code-belongs-in-third_party-.patch
# set PROTOBUF_USE_DLLS
- patches/0002-ensure-we-set-PROTOBUF_USE_DLLS-when-using-our-own-p.patch
# ensure python bindings link to correct libs on windows
- patches/0006-point-to-our-libs-headers-for-windows-in-setup.py.patch
- patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch
# install pkg-config metadata also on windows
- patches/0007-also-install-pkg-config-files-on-windows.patch
- patches/0004-also-install-pkg-config-files-on-windows.patch
# install CMake metadata
- patches/0008-create-and-install-CMake-metadata.patch
- patches/0005-create-and-install-CMake-metadata.patch
# fix abseil setup on windows
- patches/0006-also-link-to-static-absl_flags_-on-windows.patch
# backport of https://github.com/google/sentencepiece/pull/979:
# avoid having to specify CMAKE_INSTALL_{LIB,INCLUDE}DIR due to wrong order
- patches/0007-move-setting-of-default-CMAKE_INSTALL_-BIN-INCLUDE-L.patch

build:
number: 8
number: 0

requirements:
build:
Expand Down Expand Up @@ -143,7 +141,9 @@ outputs:
commands:
# binaries
{% for each_bin in ["decode", "encode", "export_vocab", "normalize", "train"] %}
- spm_{{ each_bin }} --help
# expect exit code 1, see https://github.com/google/sentencepiece/issues/978
- spm_{{ each_bin }} --help >/dev/null || [[ $? == 1 ]] # [unix]
- spm_{{ each_bin }} --help & if %ERRORLEVEL% NEQ 1 (exit 0) else (exit 1) # [win]
{% endfor %}

- name: sentencepiece-python
Expand Down
16 changes: 8 additions & 8 deletions recipe/patches/0001-do-not-mix-static-shared-builds.patch
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
From df7925b506acf44eb3f06ce94e812c8a23f83699 Mon Sep 17 00:00:00 2001
From 2fe3e37744c810590e631c01fb57133080fc5f46 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Thu, 2 Dec 2021 08:39:53 +1100
Subject: [PATCH 1/8] do not mix static & shared builds
Subject: [PATCH 1/7] do not mix static & shared builds

---
src/CMakeLists.txt | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1c7726e..39b0ef2 100644
index 8d4a34f..fbdf238 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -220,18 +220,18 @@ endif()
if (SPM_ENABLE_SHARED)
add_library(sentencepiece SHARED ${SPM_SRCS})
add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
@@ -228,18 +228,18 @@ if (SPM_ENABLE_SHARED)
target_link_libraries(sentencepiece log)
target_link_libraries(sentencepiece_train log)
endif()
-endif()
-
-add_library(sentencepiece-static STATIC ${SPM_SRCS})
Expand All @@ -37,7 +37,7 @@ index 1c7726e..39b0ef2 100644
set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0)
set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
@@ -246,10 +246,10 @@ else()
@@ -254,10 +254,10 @@ else()
add_library(sentencepiece ALIAS sentencepiece-static)
add_library(sentencepiece_train ALIAS sentencepiece_train-static)
set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static)
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
From 185e8cd8603d188cccdb6f170a60d2984211b70c Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Thu, 2 Dec 2021 10:05:12 +1100
Subject: [PATCH 2/7] ensure we set PROTOBUF_USE_DLLS when using our own
protobuf

---
src/CMakeLists.txt | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fbdf238..2b8aefa 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -71,6 +71,11 @@ if (SPM_PROTOBUF_PROVIDER STREQUAL "internal")
include_directories(builtin_pb)
elseif (SPM_PROTOBUF_PROVIDER STREQUAL "package")
find_package(Protobuf REQUIRED)
+ if (MSVC)
+ add_definitions("/DPROTOBUF_USE_DLLS")
+ else()
+ add_definitions("-DPROTOBUF_USE_DLLS")
+ endif()
include_directories(${Protobuf_INCLUDE_DIRS})
protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto)
protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto)

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From e884436083b6d5c2beee6bc341f0a55958715f01 Mon Sep 17 00:00:00 2001
From a285dbb0bb469256fb43f483e398cc0f028cd2c8 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Sun, 11 Dec 2022 01:09:03 +1100
Subject: [PATCH 6/8] point to our libs / headers for windows in setup.py
Subject: [PATCH 3/7] point to our libs / headers for windows in setup.py

also do not risk building against bundled libs, nor
setting /MT for the MSVC static runtime libs
Expand All @@ -10,7 +10,7 @@ setting /MT for the MSVC static runtime libs
1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index 5411231..d8f0b5e 100755
index d600321..fb301ac 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -77,10 +77,11 @@ class build_ext(_build_ext):
Expand All @@ -28,7 +28,7 @@ index 5411231..d8f0b5e 100755
cflags = cflags + run_pkg_config('cflags')
libs = run_pkg_config('libs')
else:
@@ -106,17 +107,21 @@ if os.name == 'nt':
@@ -108,17 +109,21 @@ if os.name == 'nt':
arch = 'win32'
if sys.maxsize > 2**32:
arch = 'amd64'
Expand All @@ -46,12 +46,12 @@ index 5411231..d8f0b5e 100755
libs = [
- '..\\build\\root\\lib\\sentencepiece.lib',
- '..\\build\\root\\lib\\sentencepiece_train.lib',
+ # equivalent of -L$PREFIX/lib -lsentencepiece -lsentencepiece_train -lprotobuf
+ # equivalent of -L$PREFIX/lib -lsentencepiece -lsentencepiece_train -lprotobuf-lite
+ os.environ["LIBRARY_LIB"] + f"\\{x}.lib"
+ # protobuf actually has the lib-prefix in the name also on windows;
+ # since libsentencepiece is static on windows, we also need _its_
+ # host dependencies for the link interface, i.e. also abseil
+ for x in ["sentencepiece", "sentencepiece_train", "libprotobuf", "abseil_dll"]
+ for x in ["sentencepiece", "sentencepiece_train", "libprotobuf-lite", "abseil_dll"]
]
else:
# build library locally with cmake and vc++.
Loading