diff --git a/tokenizers/src/models/bpe/model.rs b/tokenizers/src/models/bpe/model.rs index 8d22ab52d..1585da761 100644 --- a/tokenizers/src/models/bpe/model.rs +++ b/tokenizers/src/models/bpe/model.rs @@ -460,14 +460,14 @@ impl BPE { } fn tokenize_with_cache(&self, sequence: &str) -> Result> { - if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) { - return Ok(self.word_to_tokens(hit).collect()); - } if self.ignore_merges { if let Some(id) = self.vocab.get(sequence) { return Ok(vec![Token::new(*id, sequence.to_string().clone(), (0, 0))]); } } + if let Some(ref hit) = self.cache.as_ref().and_then(|c| c.get(sequence)) { + return Ok(self.word_to_tokens(hit).collect()); + } let word = self.merge_word(sequence)?; let ret = self.word_to_tokens(&word).collect(); if let Some(ref cache) = self.cache {