From cb5e8a514258ed54df80930f3029ed978a82b4e0 Mon Sep 17 00:00:00 2001 From: epwalsh Date: Wed, 9 Nov 2022 13:36:38 -0800 Subject: [PATCH 1/3] Add `into_tokens()` method --- tokenizers/src/tokenizer/encoding.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tokenizers/src/tokenizer/encoding.rs b/tokenizers/src/tokenizer/encoding.rs index c6274c2f2..c49ed7a39 100644 --- a/tokenizers/src/tokenizer/encoding.rs +++ b/tokenizers/src/tokenizer/encoding.rs @@ -126,6 +126,10 @@ impl Encoding { &self.tokens[..] } + pub fn into_tokens(self) -> Vec { + self.tokens + } + pub fn get_word_ids(&self) -> &[Option] { &self.words } From eb3d37f347427e7b550f1a4e53b635ef992241db Mon Sep 17 00:00:00 2001 From: epwalsh Date: Wed, 9 Nov 2022 16:22:35 -0800 Subject: [PATCH 2/3] derive clone --- tokenizers/src/tokenizer/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokenizers/src/tokenizer/mod.rs b/tokenizers/src/tokenizer/mod.rs index a1075e1e0..a88306f3a 100644 --- a/tokenizers/src/tokenizer/mod.rs +++ b/tokenizers/src/tokenizer/mod.rs @@ -399,7 +399,7 @@ where } } -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, Clone)] pub struct Tokenizer( TokenizerImpl< ModelWrapper, From 6a3660173d96920d51d1e962c95212a4d2286643 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 20 Nov 2023 09:40:05 +0100 Subject: [PATCH 3/3] Update tokenizers/src/tokenizer/encoding.rs --- tokenizers/src/tokenizer/encoding.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tokenizers/src/tokenizer/encoding.rs b/tokenizers/src/tokenizer/encoding.rs index c49ed7a39..c6274c2f2 100644 --- a/tokenizers/src/tokenizer/encoding.rs +++ b/tokenizers/src/tokenizer/encoding.rs @@ -126,10 +126,6 @@ impl Encoding { &self.tokens[..] } - pub fn into_tokens(self) -> Vec { - self.tokens - } - pub fn get_word_ids(&self) -> &[Option] { &self.words }