Skip to content

Commit

Permalink
handle empty encode/decode params
Browse files Browse the repository at this point in the history
  • Loading branch information
Daulet Zhanguzin committed May 7, 2023
1 parent 02705a2 commit f3f1a0e
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 0 deletions.
3 changes: 3 additions & 0 deletions tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ func (t *Tokenizer) Encode(str string, addSpecialTokens bool) []uint32 {
}

func (t *Tokenizer) Decode(tokenIDs []uint32, skipSpecialTokens bool) string {
if len(tokenIDs) == 0 {
return ""
}
len := C.uint(len(tokenIDs))
res := C.decode(t.tokenizer, (*C.uint)(unsafe.Pointer(&tokenIDs[0])), len, C.bool(skipSpecialTokens))
defer C.free(unsafe.Pointer(res))
Expand Down
18 changes: 18 additions & 0 deletions tokenizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@ func TestEncode(t *testing.T) {
addSpecial: true,
want: []uint32{101, 2829, 4419, 14523, 2058, 1996, 13971, 3899, 102},
},
{
name: "empty string",
str: "",
addSpecial: false,
want: []uint32{},
},
{
name: "empty string with special tokens",
str: "",
addSpecial: false,
want: []uint32{},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down Expand Up @@ -174,6 +186,12 @@ func TestDecode(t *testing.T) {
skipSpecial: false,
want: "[CLS] brown fox jumps over the lazy dog [SEP]",
},
{
name: "no tokens",
tokens: []uint32{},
skipSpecial: false,
want: "",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit f3f1a0e

Please sign in to comment.