Skip to content

Commit

Permalink
sentencepiece 0.2.0 (#48)
Browse files Browse the repository at this point in the history
automerged PR by conda-forge/automerge-action
  • Loading branch information
github-actions[bot] authored Mar 15, 2024
2 parents aa2a030 + 4314def commit 94d9131
Show file tree
Hide file tree
Showing 16 changed files with 188 additions and 3,062 deletions.
2 changes: 1 addition & 1 deletion conda-forge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ github:
provider:
linux_aarch64: default
linux_ppc64le: default
test_on_native_only: true
test: native_and_emulated
9 changes: 2 additions & 7 deletions recipe/build-lib.bat
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
@echo on

:: we're trying to avoid the third_party sources, and not building them;
:: to avoid weird errors if those sources got picked up nevertheless, delete them
rmdir /S /Q third_party\absl
rmdir /S /Q third_party\protobuf-lite

mkdir build
cd build

Expand All @@ -15,8 +10,8 @@ cmake -G "Ninja" ^
-DCMAKE_INSTALL_PREFIX=%LIBRARY_PREFIX% ^
-Dprotobuf_BUILD_SHARED_LIBS=OFF ^
-DSPM_ENABLE_SHARED=OFF ^
-DSPM_USE_BUILTIN_PROTOBUF=OFF ^
-DSPM_USE_EXTERNAL_ABSL=ON ^
-DSPM_ABSL_PROVIDER="package" ^
-DSPM_PROTOBUF_PROVIDER="package" ^
..
IF %ERRORLEVEL% NEQ 0 exit 1

Expand Down
10 changes: 2 additions & 8 deletions recipe/build-lib.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
#!/bin/bash
set -ex

# we're trying to avoid the third_party sources, and not building them;
# to avoid weird errors if those sources got picked up nevertheless, delete them
rm -rf third_party/absl
rm -rf third_party/protobuf-lite

mkdir build
cd build

Expand All @@ -16,12 +11,11 @@ fi
cmake -G "Ninja" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$PREFIX \
-DCMAKE_INSTALL_LIBDIR=$PREFIX/lib \
-DCMAKE_AR="${AR}" \
-DSPM_ENABLE_SHARED=ON \
-DSPM_ENABLE_TCMALLOC=OFF \
-DSPM_USE_EXTERNAL_ABSL=ON \
-DSPM_USE_BUILTIN_PROTOBUF=OFF \
-DSPM_ABSL_PROVIDER="package" \
-DSPM_PROTOBUF_PROVIDER="package" \
${CMAKE_ARGS} \
..

Expand Down
5 changes: 5 additions & 0 deletions recipe/cmake_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,9 @@ if(MSVC)
# static libsentencepiece leaks its host dependencies;
# need to link them as well
target_link_libraries(test_me libprotobuf abseil_dll)
# absl_flag* are always static on windows
target_link_libraries(test_me absl_log_flags absl_flags_commandlineflag
absl_flags_commandlineflag_internal absl_flags_config absl_flags_internal
absl_flags_marshalling absl_flags_parse absl_flags_private_handle_accessor
absl_flags_program_name absl_flags_reflection absl_flags_usage absl_flags_usage_internal)
endif()
32 changes: 16 additions & 16 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,33 +1,31 @@
{% set version = "0.1.99" %}
{% set version = "0.2.0" %}

package:
name: sentencepiece-split
version: {{ version }}

source:
url: https://github.com/google/sentencepiece/archive/refs/tags/v{{ version }}.tar.gz
sha256: 63617eaf56c7a3857597dcd8780461f57dd21381b56a27716ef7d7e02e14ced4
sha256: 9970f0a0afee1648890293321665e5b2efa04eaec9f1671fcf8048f456f5bb86
patches:
# trying to build both static & shared build seems to break on OSX
- patches/0001-do-not-mix-static-shared-builds.patch
# unvendor abseil & protobuf-lite
- patches/0002-do-not-build-vendored-abseil-libprotobuf-lite.patch
# avoid installing into $PREFIX/lib64; make sure pkg-config template is filled correctly
- patches/0003-consistently-use-absolute-paths-for-CMAKE_INSTALL_-D.patch
# upstream sources still directly include third_party/absl...
- patches/0004-ACTUALLY-use-external-absl.patch
# upstream carries its own glue code under third_party/absl/flags;
# move it to the sentencepiece sources
- patches/0005-stop-pretending-sp-glue-code-belongs-in-third_party-.patch
# set PROTOBUF_USE_DLLS
- patches/0002-ensure-we-set-PROTOBUF_USE_DLLS-when-using-our-own-p.patch
# ensure python bindings link to correct libs on windows
- patches/0006-point-to-our-libs-headers-for-windows-in-setup.py.patch
- patches/0003-point-to-our-libs-headers-for-windows-in-setup.py.patch
# install pkg-config metadata also on windows
- patches/0007-also-install-pkg-config-files-on-windows.patch
- patches/0004-also-install-pkg-config-files-on-windows.patch
# install CMake metadata
- patches/0008-create-and-install-CMake-metadata.patch
- patches/0005-create-and-install-CMake-metadata.patch
# fix abseil setup on windows
- patches/0006-also-link-to-static-absl_flags_-on-windows.patch
# backport of https://github.com/google/sentencepiece/pull/979:
# avoid having to specify CMAKE_INSTALL_{LIB,INCLUDE}DIR due to wrong order
- patches/0007-move-setting-of-default-CMAKE_INSTALL_-BIN-INCLUDE-L.patch

build:
number: 8
number: 0

requirements:
build:
Expand Down Expand Up @@ -143,7 +141,9 @@ outputs:
commands:
# binaries
{% for each_bin in ["decode", "encode", "export_vocab", "normalize", "train"] %}
- spm_{{ each_bin }} --help
# expect exit code 1, see https://github.com/google/sentencepiece/issues/978
- spm_{{ each_bin }} --help >/dev/null || [[ $? == 1 ]] # [unix]
- spm_{{ each_bin }} --help & if %ERRORLEVEL% NEQ 1 (exit 0) else (exit 1) # [win]
{% endfor %}

- name: sentencepiece-python
Expand Down
16 changes: 8 additions & 8 deletions recipe/patches/0001-do-not-mix-static-shared-builds.patch
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
From df7925b506acf44eb3f06ce94e812c8a23f83699 Mon Sep 17 00:00:00 2001
From 2fe3e37744c810590e631c01fb57133080fc5f46 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Thu, 2 Dec 2021 08:39:53 +1100
Subject: [PATCH 1/8] do not mix static & shared builds
Subject: [PATCH 1/7] do not mix static & shared builds

---
src/CMakeLists.txt | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 1c7726e..39b0ef2 100644
index 8d4a34f..fbdf238 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -220,18 +220,18 @@ endif()
if (SPM_ENABLE_SHARED)
add_library(sentencepiece SHARED ${SPM_SRCS})
add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS})
@@ -228,18 +228,18 @@ if (SPM_ENABLE_SHARED)
target_link_libraries(sentencepiece log)
target_link_libraries(sentencepiece_train log)
endif()
-endif()
-
-add_library(sentencepiece-static STATIC ${SPM_SRCS})
Expand All @@ -37,7 +37,7 @@ index 1c7726e..39b0ef2 100644
set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0)
set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES)
@@ -246,10 +246,10 @@ else()
@@ -254,10 +254,10 @@ else()
add_library(sentencepiece ALIAS sentencepiece-static)
add_library(sentencepiece_train ALIAS sentencepiece_train-static)
set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static)
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
From 185e8cd8603d188cccdb6f170a60d2984211b70c Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Thu, 2 Dec 2021 10:05:12 +1100
Subject: [PATCH 2/7] ensure we set PROTOBUF_USE_DLLS when using our own
protobuf

---
src/CMakeLists.txt | 5 +++++
1 file changed, 5 insertions(+)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index fbdf238..2b8aefa 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -71,6 +71,11 @@ if (SPM_PROTOBUF_PROVIDER STREQUAL "internal")
include_directories(builtin_pb)
elseif (SPM_PROTOBUF_PROVIDER STREQUAL "package")
find_package(Protobuf REQUIRED)
+ if (MSVC)
+ add_definitions("/DPROTOBUF_USE_DLLS")
+ else()
+ add_definitions("-DPROTOBUF_USE_DLLS")
+ endif()
include_directories(${Protobuf_INCLUDE_DIRS})
protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto)
protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto)

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From e884436083b6d5c2beee6bc341f0a55958715f01 Mon Sep 17 00:00:00 2001
From a285dbb0bb469256fb43f483e398cc0f028cd2c8 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <[email protected]>
Date: Sun, 11 Dec 2022 01:09:03 +1100
Subject: [PATCH 6/8] point to our libs / headers for windows in setup.py
Subject: [PATCH 3/7] point to our libs / headers for windows in setup.py

also do not risk building against bundled libs, nor
setting /MT for the MSVC static runtime libs
Expand All @@ -10,7 +10,7 @@ setting /MT for the MSVC static runtime libs
1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index 5411231..d8f0b5e 100755
index d600321..fb301ac 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -77,10 +77,11 @@ class build_ext(_build_ext):
Expand All @@ -28,7 +28,7 @@ index 5411231..d8f0b5e 100755
cflags = cflags + run_pkg_config('cflags')
libs = run_pkg_config('libs')
else:
@@ -106,17 +107,21 @@ if os.name == 'nt':
@@ -108,17 +109,21 @@ if os.name == 'nt':
arch = 'win32'
if sys.maxsize > 2**32:
arch = 'amd64'
Expand All @@ -46,12 +46,12 @@ index 5411231..d8f0b5e 100755
libs = [
- '..\\build\\root\\lib\\sentencepiece.lib',
- '..\\build\\root\\lib\\sentencepiece_train.lib',
+ # equivalent of -L$PREFIX/lib -lsentencepiece -lsentencepiece_train -lprotobuf
+ # equivalent of -L$PREFIX/lib -lsentencepiece -lsentencepiece_train -lprotobuf-lite
+ os.environ["LIBRARY_LIB"] + f"\\{x}.lib"
+ # protobuf actually has the lib-prefix in the name also on windows;
+ # since libsentencepiece is static on windows, we also need _its_
+ # host dependencies for the link interface, i.e. also abseil
+ for x in ["sentencepiece", "sentencepiece_train", "libprotobuf", "abseil_dll"]
+ for x in ["sentencepiece", "sentencepiece_train", "libprotobuf-lite", "abseil_dll"]
]
else:
# build library locally with cmake and vc++.
Loading

0 comments on commit 94d9131

Please sign in to comment.