diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs index 6408502b5..a4ec7b26f 100644 --- a/bindings/python/src/tokenizer.rs +++ b/bindings/python/src/tokenizer.rs @@ -1092,25 +1092,24 @@ impl PyTokenizer { fn encode_batch_fast( &self, py: Python<'_>, - input: Bound<'_, PyList>, + input: Bound<'_, PySequence>, is_pretokenized: bool, add_special_tokens: bool, ) -> PyResult> { - let input: Vec = input - .into_iter() - .map(|o| { - let input: tk::EncodeInput = if is_pretokenized { - o.extract::()?.into() - } else { - o.extract::()?.into() - }; - Ok(input) - }) - .collect::>>()?; + let mut items = Vec::::new(); + for i in 0..input.len()? { + let item = input.get_item(i)?; + let item: tk::EncodeInput = if is_pretokenized { + item.extract::()?.into() + } else { + item.extract::()?.into() + }; + items.push(item); + } py.allow_threads(|| { ToPyResult( self.tokenizer - .encode_batch_fast(input, add_special_tokens) + .encode_batch_fast(items, add_special_tokens) .map(|encodings| encodings.into_iter().map(|e| e.into()).collect()), ) .into()