From 3188ded27885d1002698a0e25f0b32306c430e88 Mon Sep 17 00:00:00 2001 From: Daulet Zhanguzin Date: Fri, 31 Mar 2023 22:16:20 -0700 Subject: [PATCH] remove redundant allocs --- README.md | 12 ++++++------ tokenizer.go | 4 ---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 0b08364f..3ee7bd48 100644 --- a/README.md +++ b/README.md @@ -40,11 +40,11 @@ go test . -bench=. -benchmem -benchtime=10s goos: darwin goarch: arm64 -pkg: github.com/daulet/tokenizer -BenchmarkEncodeNTimes-10 985678 12023 ns/op 132 B/op 7 allocs/op -BenchmarkEncodeNChars-10 1000000000 2.442 ns/op 0 B/op 0 allocs/op -BenchmarkDecodeNTimes-10 6762982 1767 ns/op 128 B/op 5 allocs/op -BenchmarkDecodeNTokens-10 65058678 219.8 ns/op 7 B/op 0 allocs/op +pkg: github.com/daulet/tokenizers +BenchmarkEncodeNTimes-10 996556 11851 ns/op 116 B/op 6 allocs/op +BenchmarkEncodeNChars-10 1000000000 2.446 ns/op 0 B/op 0 allocs/op +BenchmarkDecodeNTimes-10 7286056 1657 ns/op 112 B/op 4 allocs/op +BenchmarkDecodeNTokens-10 65191378 211.0 ns/op 7 B/op 0 allocs/op PASS -ok github.com/daulet/tokenizer 69.993s +ok github.com/daulet/tokenizers 126.681s ``` diff --git a/tokenizer.go b/tokenizer.go index 3312ce87..8b096c80 100644 --- a/tokenizer.go +++ b/tokenizer.go @@ -38,8 +38,6 @@ func (t *Tokenizer) Close() error { } func (t *Tokenizer) Encode(str string, addSpecialTokens bool) []uint32 { - config := C.CString("./lib/tokenizer/data/bert-base-uncased.json") - defer C.free(unsafe.Pointer(config)) cStr := C.CString(str) defer C.free(unsafe.Pointer(cStr)) var len C.uint @@ -58,8 +56,6 @@ func (t *Tokenizer) Encode(str string, addSpecialTokens bool) []uint32 { } func (t *Tokenizer) Decode(tokenIDs []uint32, skipSpecialTokens bool) string { - config := C.CString("./lib/tokenizer/data/bert-base-uncased.json") - defer C.free(unsafe.Pointer(config)) len := C.uint(len(tokenIDs)) res := C.decode(t.tokenizer, (*C.uint)(unsafe.Pointer(&tokenIDs[0])), len, C.bool(skipSpecialTokens)) defer C.free(unsafe.Pointer(res))