diff --git a/bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py b/bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py index f9e424964..1a64213cc 100644 --- a/bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py +++ b/bindings/python/py_src/tokenizers/implementations/sentencepiece_bpe.py @@ -20,9 +20,12 @@ def __init__( replacement: str = "▁", add_prefix_space: bool = True, dropout: Optional[float] = None, + fuse_unk: Optional[bool] = False, ): if vocab is not None and merges is not None: - tokenizer = Tokenizer(BPE(vocab, merges, dropout=dropout, unk_token=unk_token)) + tokenizer = Tokenizer( + BPE(vocab, merges, dropout=dropout, unk_token=unk_token, fuse_unk=fuse_unk) + ) else: tokenizer = Tokenizer(BPE())