Skip to content

Commit

Permalink
daulet -> cohere-ai
Browse files Browse the repository at this point in the history
  • Loading branch information
walterbm-cohere committed Sep 19, 2024
1 parent eafe2ce commit 171c6c7
Show file tree
Hide file tree
Showing 13 changed files with 25 additions and 25 deletions.
4 changes: 2 additions & 2 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ load("@crate_index//:defs.bzl", "aliases", "all_crate_deps")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
load("@rules_rust//rust:defs.bzl", "rust_static_library")

# gazelle:prefix github.com/daulet/tokenizers
# gazelle:prefix github.com/cohere-ai/tokenizers
gazelle(
name = "gazelle",
)
Expand Down Expand Up @@ -53,6 +53,6 @@ go_library(
":tokenizers_rs",
],
cgo = True,
importpath = "github.com/daulet/tokenizers",
importpath = "github.com/cohere-ai/tokenizers",
visibility = ["//visibility:public"],
)
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ It should look something like this:
```
goos: darwin
goarch: arm64
pkg: github.com/daulet/tokenizers
pkg: github.com/cohere-ai/tokenizers
│ benchmarks/786da4095f5ca3d598db1236c46401b63874f640.txt │ benchmarks/38a9a14c1c56b113461b0c7350c72de949e23cc2.txt │
│ sec/op │ sec/op vs base │
EncodeNTimes-10 13.26µ ± 4% 13.11µ ± 1% -1.09% (p=0.041 n=6)
Expand Down
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ Go bindings for the [HuggingFace Tokenizers](https://github.com/huggingface/toke

### Using pre-built binaries

If you don't want to install Rust toolchain, build it in docker: `docker build --platform=linux/amd64 -f release/Dockerfile .` or use prebuilt binaries from the [releases](https://github.com/daulet/tokenizers/releases) page. Prebuilt libraries are available for:
If you don't want to install Rust toolchain, build it in docker: `docker build --platform=linux/amd64 -f release/Dockerfile .` or use prebuilt binaries from the [releases](https://github.com/cohere-ai/tokenizers/releases) page. Prebuilt libraries are available for:

* [darwin-arm64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.darwin-arm64.tar.gz)
* [linux-arm64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.linux-arm64.tar.gz)
* [linux-amd64](https://github.com/daulet/tokenizers/releases/latest/download/libtokenizers.linux-amd64.tar.gz)
* [darwin-arm64](https://github.com/cohere-ai/tokenizers/releases/latest/download/libtokenizers.darwin-arm64.tar.gz)
* [linux-arm64](https://github.com/cohere-ai/tokenizers/releases/latest/download/libtokenizers.linux-arm64.tar.gz)
* [linux-amd64](https://github.com/cohere-ai/tokenizers/releases/latest/download/libtokenizers.linux-amd64.tar.gz)

## Getting started

Expand All @@ -21,7 +21,7 @@ TLDR: [working example](example/main.go).
Load a tokenizer from a JSON config:

```go
import "github.com/daulet/tokenizers"
import "github.com/cohere-ai/tokenizers"

tk, err := tokenizers.FromFile("./data/bert-base-uncased.json")
if err != nil {
Expand Down Expand Up @@ -55,13 +55,13 @@ go test . -bench=. -benchmem -benchtime=10s

goos: darwin
goarch: arm64
pkg: github.com/daulet/tokenizers
pkg: github.com/cohere-ai/tokenizers
BenchmarkEncodeNTimes-10 959494 12622 ns/op 232 B/op 12 allocs/op
BenchmarkEncodeNChars-10 1000000000 2.046 ns/op 0 B/op 0 allocs/op
BenchmarkDecodeNTimes-10 2758072 4345 ns/op 96 B/op 3 allocs/op
BenchmarkDecodeNTokens-10 18689725 648.5 ns/op 7 B/op 0 allocs/op
PASS
ok github.com/daulet/tokenizers 126.681s
ok github.com/cohere-ai/tokenizers 126.681s
```

Run equivalent Rust tests with `cargo bench`.
Expand Down
2 changes: 1 addition & 1 deletion example/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
go_library(
name = "example_lib",
srcs = ["main.go"],
importpath = "github.com/daulet/tokenizers/example",
importpath = "github.com/cohere-ai/tokenizers/example",
visibility = ["//visibility:private"],
deps = ["//:tokenizers"],
)
Expand Down
4 changes: 2 additions & 2 deletions example/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ FROM golang:1.21 as builder-go
ARG TARGETPLATFORM
ARG VERSION=v0.6.0
WORKDIR /workspace
RUN curl -fsSL https://github.com/daulet/tokenizers/releases/download/${VERSION}/libtokenizers.$(echo ${TARGETPLATFORM} | tr / -).tar.gz | tar xvz
RUN curl -fsSL https://github.com/cohere-ai/tokenizers/releases/download/${VERSION}/libtokenizers.$(echo ${TARGETPLATFORM} | tr / -).tar.gz | tar xvz
COPY ./example .
COPY ./test/data ./test/data
RUN go mod download
RUN mv ./libtokenizers.a /go/pkg/mod/github.com/daulet/tokenizers@${VERSION}/lib/$(echo ${TARGETPLATFORM} | tr / -)/libtokenizers.a
RUN mv ./libtokenizers.a /go/pkg/mod/github.com/cohere-ai/tokenizers@${VERSION}/lib/$(echo ${TARGETPLATFORM} | tr / -)/libtokenizers.a
# mounting Go cache won't work since we mutate it above
RUN go run main.go
2 changes: 1 addition & 1 deletion example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

```
# Keep this version in sync with go module release
curl -fsSL https://github.com/daulet/tokenizers/releases/download/v0.9.0/libtokenizers.darwin-aarch64.tar.gz | tar xvz
curl -fsSL https://github.com/cohere-ai/tokenizers/releases/download/v0.9.0/libtokenizers.darwin-aarch64.tar.gz | tar xvz
# change -L argument to where you've placed the library download above
go run -ldflags="-extldflags '-L$(pwd)'" main.go
```
4 changes: 2 additions & 2 deletions example/go.mod
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module github.com/daulet/tokenizers/example
module github.com/cohere-ai/tokenizers/example

go 1.22

require github.com/daulet/tokenizers v0.9.0
require github.com/cohere-ai/tokenizers v0.9.0
4 changes: 2 additions & 2 deletions example/go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
github.com/daulet/tokenizers v0.9.0 h1:PSjFUGeuhqb3C0GKP9hdvtHvJ6L1AZceV+0nYGACtCk=
github.com/daulet/tokenizers v0.9.0/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs=
github.com/cohere-ai/tokenizers v0.9.0 h1:PSjFUGeuhqb3C0GKP9hdvtHvJ6L1AZceV+0nYGACtCk=
github.com/cohere-ai/tokenizers v0.9.0/go.mod h1:tGnMdZthXdcWY6DGD07IygpwJqiPvG85FQUnhs/wSCs=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
Expand Down
2 changes: 1 addition & 1 deletion example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package main
import (
"fmt"

"github.com/daulet/tokenizers"
"github.com/cohere-ai/tokenizers"
)

func main() {
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module github.com/daulet/tokenizers
module github.com/cohere-ai/tokenizers

go 1.18

Expand All @@ -8,4 +8,4 @@ require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
)
4 changes: 2 additions & 2 deletions release/go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module github.com/daulet/tokenizers/release
module github.com/cohere-ai/tokenizers/release

go 1.21.5
go 1.21.5
2 changes: 1 addition & 1 deletion release/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package main
import (
"fmt"

"github.com/daulet/tokenizers/release/tokenizers"
"github.com/cohere-ai/tokenizers/release/tokenizers"
)

func main() {
Expand Down
2 changes: 1 addition & 1 deletion tokenizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"math/rand"
"testing"

"github.com/daulet/tokenizers"
"github.com/cohere-ai/tokenizers"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down

0 comments on commit 171c6c7

Please sign in to comment.