From 171c6c77ed5577fb91bbbd96aa1ef96a5b519ffe Mon Sep 17 00:00:00 2001 From: walterbm-cohere Date: Thu, 19 Sep 2024 14:59:18 -0400 Subject: [PATCH] daulet -> cohere-ai --- BUILD.bazel | 4 ++-- CONTRIBUTING.md | 2 +- README.md | 14 +++++++------- example/BUILD.bazel | 2 +- example/Dockerfile | 4 ++-- example/README.md | 2 +- example/go.mod | 4 ++-- example/go.sum | 4 ++-- example/main.go | 2 +- go.mod | 4 ++-- release/go.mod | 4 ++-- release/main.go | 2 +- tokenizer_test.go | 2 +- 13 files changed, 25 insertions(+), 25 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index ad9ddea1..08efff47 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -3,7 +3,7 @@ load("@crate_index//:defs.bzl", "aliases", "all_crate_deps") load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") load("@rules_rust//rust:defs.bzl", "rust_static_library") -# gazelle:prefix github.com/daulet/tokenizers +# gazelle:prefix github.com/cohere-ai/tokenizers gazelle( name = "gazelle", ) @@ -53,6 +53,6 @@ go_library( ":tokenizers_rs", ], cgo = True, - importpath = "github.com/daulet/tokenizers", + importpath = "github.com/cohere-ai/tokenizers", visibility = ["//visibility:public"], ) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 40a03c10..81a61a23 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,7 +17,7 @@ It should look something like this: ``` goos: darwin goarch: arm64 -pkg: github.com/daulet/tokenizers +pkg: github.com/cohere-ai/tokenizers │ benchmarks/786da4095f5ca3d598db1236c46401b63874f640.txt │ benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │ │ sec/op │ sec/op vs base │ EncodeNTimes-10 13.26µ ± 4% 13.11µ ± 1% -1.09% (p=0.041 n=6) diff --git a/README.md b/README.md index 119606ac..e473864a 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,11 @@ Go bindings for the [HuggingFace Tokenizers](https://github.com/huggingface/toke ### Using pre-built binaries -If you don't want to install Rust toolchain, build it in docker: `docker build --platform=linux/amd64 -f release/Dockerfile .` or use prebuilt binaries from the [releases](https://github.com/daulet/tokenizers/releases) page. Prebuilt libraries are available for: +If you don't want to install Rust toolchain, build it in docker: `docker build --platform=linux/amd64 -f release/Dockerfile .` or use prebuilt binaries from the [releases](https://github.com/cohere-ai/tokenizers/releases) page. Prebuilt libraries are available for: -* [darwin-arm64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.darwin-arm64.tar.gz) -* [linux-arm64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.linux-arm64.tar.gz) -* [linux-amd64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.linux-amd64.tar.gz) +* [darwin-arm64](https://github.com/cohere-ai/tokenizers/releases/latest/download/libtokenizers.darwin-arm64.tar.gz) +* [linux-arm64](https://github.com/cohere-ai/tokenizers/releases/latest/download/libtokenizers.linux-arm64.tar.gz) +* [linux-amd64](https://github.com/cohere-ai/tokenizers/releases/latest/download/libtokenizers.linux-amd64.tar.gz) ## Getting started @@ -21,7 +21,7 @@ TLDR: [working example](example/main.go). Load a tokenizer from a JSON config: ```go -import "github.com/daulet/tokenizers" +import "github.com/cohere-ai/tokenizers" tk, err := tokenizers.FromFile("./data/bert-base-uncased.json") if err != nil { @@ -55,13 +55,13 @@ go test . -bench=. -benchmem -benchtime=10s goos: darwin goarch: arm64 -pkg: github.com/daulet/tokenizers +pkg: github.com/cohere-ai/tokenizers BenchmarkEncodeNTimes-10 959494 12622 ns/op 232 B/op 12 allocs/op BenchmarkEncodeNChars-10 1000000000 2.046 ns/op 0 B/op 0 allocs/op BenchmarkDecodeNTimes-10 2758072 4345 ns/op 96 B/op 3 allocs/op BenchmarkDecodeNTokens-10 18689725 648.5 ns/op 7 B/op 0 allocs/op PASS -ok github.com/daulet/tokenizers 126.681s +ok github.com/cohere-ai/tokenizers 126.681s ``` Run equivalent Rust tests with `cargo bench`. diff --git a/example/BUILD.bazel b/example/BUILD.bazel index e2e43997..b4cc12a8 100644 --- a/example/BUILD.bazel +++ b/example/BUILD.bazel @@ -3,7 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") go_library( name = "example_lib", srcs = ["main.go"], - importpath = "github.com/daulet/tokenizers/example", + importpath = "github.com/cohere-ai/tokenizers/example", visibility = ["//visibility:private"], deps = ["//:tokenizers"], ) diff --git a/example/Dockerfile b/example/Dockerfile index a2734186..9d0723c3 100644 --- a/example/Dockerfile +++ b/example/Dockerfile @@ -4,10 +4,10 @@ FROM golang:1.21 as builder-go ARG TARGETPLATFORM ARG VERSION=v0.6.0 WORKDIR /workspace -RUN curl -fsSL https://github.com/daulet/tokenizers/releases/download/${VERSION}/libtokenizers.$(echo ${TARGETPLATFORM} | tr / -).tar.gz | tar xvz +RUN curl -fsSL https://github.com/cohere-ai/tokenizers/releases/download/${VERSION}/libtokenizers.$(echo ${TARGETPLATFORM} | tr / -).tar.gz | tar xvz COPY ./example . COPY ./test/data ./test/data RUN go mod download -RUN mv ./libtokenizers.a /go/pkg/mod/github.com/daulet/tokenizers@${VERSION}/lib/$(echo ${TARGETPLATFORM} | tr / -)/libtokenizers.a +RUN mv ./libtokenizers.a /go/pkg/mod/github.com/cohere-ai/tokenizers@${VERSION}/lib/$(echo ${TARGETPLATFORM} | tr / -)/libtokenizers.a # mounting Go cache won't work since we mutate it above RUN go run main.go diff --git a/example/README.md b/example/README.md index 8dfec5a4..15bb6e22 100644 --- a/example/README.md +++ b/example/README.md @@ -2,7 +2,7 @@ ``` # Keep this version in sync with go module release -curl -fsSL https://github.com/daulet/tokenizers/releases/download/v0.9.0/libtokenizers.darwin-aarch64.tar.gz | tar xvz +curl -fsSL https://github.com/cohere-ai/tokenizers/releases/download/v0.9.0/libtokenizers.darwin-aarch64.tar.gz | tar xvz # change -L argument to where you've placed the library download above go run -ldflags="-extldflags '-L$(pwd)'" main.go ``` \ No newline at end of file diff --git a/example/go.mod b/example/go.mod index 7023aa34..edf285f4 100644 --- a/example/go.mod +++ b/example/go.mod @@ -1,5 +1,5 @@ -module github.com/daulet/tokenizers/example +module github.com/cohere-ai/tokenizers/example go 1.22 -require github.com/daulet/tokenizers v0.9.0 +require github.com/cohere-ai/tokenizers v0.9.0 \ No newline at end of file diff --git a/example/go.sum b/example/go.sum index 81993142..dbbc7a26 100644 --- a/example/go.sum +++ b/example/go.sum @@ -1,5 +1,5 @@ -github.com/daulet/tokenizers v0.9.0 h1:PSjFUGeuhqb3C0GKP9hdvtHvJ6L1AZceV+0nYGACtCk= -github.com/daulet/tokenizers v0.9.0/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs= +github.com/cohere-ai/tokenizers v0.9.0 h1:PSjFUGeuhqb3C0GKP9hdvtHvJ6L1AZceV+0nYGACtCk= +github.com/cohere-ai/tokenizers v0.9.0/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/example/main.go b/example/main.go index 43d07830..eedd3162 100644 --- a/example/main.go +++ b/example/main.go @@ -3,7 +3,7 @@ package main import ( "fmt" - "github.com/daulet/tokenizers" + "github.com/cohere-ai/tokenizers" ) func main() { diff --git a/go.mod b/go.mod index 5a7a8f03..9809168a 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/daulet/tokenizers +module github.com/cohere-ai/tokenizers go 1.18 @@ -8,4 +8,4 @@ require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect -) +) \ No newline at end of file diff --git a/release/go.mod b/release/go.mod index 1f275626..fc0ee562 100644 --- a/release/go.mod +++ b/release/go.mod @@ -1,3 +1,3 @@ -module github.com/daulet/tokenizers/release +module github.com/cohere-ai/tokenizers/release -go 1.21.5 +go 1.21.5 \ No newline at end of file diff --git a/release/main.go b/release/main.go index 013e7f9a..bb250d01 100644 --- a/release/main.go +++ b/release/main.go @@ -3,7 +3,7 @@ package main import ( "fmt" - "github.com/daulet/tokenizers/release/tokenizers" + "github.com/cohere-ai/tokenizers/release/tokenizers" ) func main() { diff --git a/tokenizer_test.go b/tokenizer_test.go index da2c1fda..66b9cc82 100644 --- a/tokenizer_test.go +++ b/tokenizer_test.go @@ -5,7 +5,7 @@ import ( "math/rand" "testing" - "github.com/daulet/tokenizers" + "github.com/cohere-ai/tokenizers" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require"