diff --git a/tokenizers/src/models/bpe/model.rs b/tokenizers/src/models/bpe/model.rs index 39760d411..618f42b47 100644 --- a/tokenizers/src/models/bpe/model.rs +++ b/tokenizers/src/models/bpe/model.rs @@ -461,26 +461,19 @@ impl BPE { fn tokenize_with_cache(&self, sequence: &str) -> Result> { if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) { - Ok(self.word_to_tokens(hit).collect()) - } else if let Some(id) = self.vocab.get(sequence) { - if self.ignore_merges { - Ok(vec![Token::new(*id, sequence.to_string().clone(), (0, 0))]) - } else { - let word = self.merge_word(sequence)?; - let ret = self.word_to_tokens(&word).collect(); - if let Some(ref cache) = self.cache { - cache.set(sequence.to_owned(), word); - } - Ok(ret) - } - } else { - let word = self.merge_word(sequence)?; - let ret = self.word_to_tokens(&word).collect(); - if let Some(ref cache) = self.cache { - cache.set(sequence.to_owned(), word); + return Ok(self.word_to_tokens(hit).collect()); + } + if self.ignore_merges { + if let Some(id) = self.vocab.get(sequence) { + return Ok(vec![Token::new(*id, sequence.to_string().clone(), (0, 0))]); } - Ok(ret) } + let word = self.merge_word(sequence)?; + let ret = self.word_to_tokens(&word).collect(); + if let Some(ref cache) = self.cache { + cache.set(sequence.to_owned(), word); + } + Ok(ret) } }