From eac2ea984f9138cc9329acd1cdf63222bdf34fc9 Mon Sep 17 00:00:00 2001 From: walterbm-cohere Date: Thu, 19 Sep 2024 18:49:20 -0400 Subject: [PATCH] expect libtokenizers.a in tokenizer src directory --- .gitignore | 3 +++ Makefile | 9 ++++----- example/Dockerfile | 4 ++-- release/Dockerfile | 6 +++--- tokenizer.go | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 06518061..85557daa 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ target # Python .env + +# macos +.DS_Store \ No newline at end of file diff --git a/Makefile b/Makefile index cd92031a..3564061a 100644 --- a/Makefile +++ b/Makefile @@ -16,10 +16,9 @@ release-darwin-%: test cp artifacts/darwin-$*/libtokenizers.darwin-$*.tar.gz artifacts/all/libtokenizers.darwin-$*.tar.gz release-linux-%: test - docker buildx build --platform linux/$* --build-arg="DOCKER_TARGETPLATFORM=linux/$*" -f release/Dockerfile . -t tokenizers.linux-$* + docker buildx build --platform linux/$* -f release/Dockerfile . -t tokenizers.linux-$* mkdir -p artifacts/linux-$* - docker run -v $(PWD)/artifacts/linux-$*:/mnt --entrypoint ls tokenizers.linux-$* /workspace/tokenizers/lib/linux - docker run -v $(PWD)/artifacts/linux-$*:/mnt --entrypoint cp tokenizers.linux-$* /workspace/tokenizers/lib/linux/$*/libtokenizers.a /mnt/libtokenizers.a + docker run -v $(PWD)/artifacts/linux-$*:/mnt --entrypoint cp tokenizers.linux-$* /workspace/tokenizers/libtokenizers.a /mnt/libtokenizers.a cd artifacts/linux-$* && \ tar -czf libtokenizers.linux-$*.tar.gz libtokenizers.a mkdir -p artifacts/all @@ -31,10 +30,10 @@ release: release-darwin-aarch64 release-darwin-x86_64 release-linux-arm64 releas cp artifacts/all/libtokenizers.linux-x86_64.tar.gz artifacts/all/libtokenizers.linux-amd64.tar.gz test: build - @go test -ldflags="-extldflags '-L./'" -v ./... -count=1 + @go test -v ./... -count=1 clean: rm -rf libtokenizers.a target bazel-sync: - CARGO_BAZEL_REPIN=1 bazel sync --only=crate_index + CARGO_BAZEL_REPIN=1 bazel sync --only=crate_index \ No newline at end of file diff --git a/example/Dockerfile b/example/Dockerfile index 9d0723c3..9a31cb3b 100644 --- a/example/Dockerfile +++ b/example/Dockerfile @@ -8,6 +8,6 @@ RUN curl -fsSL https://github.com/cohere-ai/tokenizers/releases/download/${VERSI COPY ./example . COPY ./test/data ./test/data RUN go mod download -RUN mv ./libtokenizers.a /go/pkg/mod/github.com/cohere-ai/tokenizers@${VERSION}/lib/$(echo ${TARGETPLATFORM} | tr / -)/libtokenizers.a +RUN mv ./libtokenizers.a /go/pkg/mod/github.com/daulet/tokenizers@${VERSION}/libtokenizers.a # mounting Go cache won't work since we mutate it above -RUN go run main.go +RUN go run main.go \ No newline at end of file diff --git a/release/Dockerfile b/release/Dockerfile index fda89358..6321a666 100644 --- a/release/Dockerfile +++ b/release/Dockerfile @@ -10,7 +10,7 @@ COPY ./Cargo.lock ./Cargo.lock RUN cargo build --release FROM golang:1.21 as builder-go -ARG DOCKER_TARGETPLATFORM +ARG TARGETPLATFORM WORKDIR /workspace COPY ./release/go.mod . COPY ./release/main.go . @@ -19,6 +19,6 @@ COPY tokenizer.go ./tokenizers/ COPY tokenizers.h ./tokenizers/ COPY --from=builder-rust \ /workspace/target/release/libtokenizers.a \ - ./tokenizers/lib/${DOCKER_TARGETPLATFORM}/ + ./tokenizers/ COPY ./test/data ./test/data -RUN go run -ldflags="-extldflags '-L./tokenizers/lib/${DOCKER_TARGETPLATFORM}'" . +RUN go run . \ No newline at end of file diff --git a/tokenizer.go b/tokenizer.go index 80c0a17c..1cd36f9f 100644 --- a/tokenizer.go +++ b/tokenizer.go @@ -3,7 +3,7 @@ package tokenizers // TODO packaging: how do we build the rust lib for distribution? /* -#cgo LDFLAGS: -ltokenizers -ldl -lm -lstdc++ +#cgo LDFLAGS: ${SRCDIR}/libtokenizers.a -ldl -lm -lstdc++ #include #include "tokenizers.h" */