diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs index 7fd03ae89..d1f6866d7 100644 --- a/bindings/python/src/tokenizer.rs +++ b/bindings/python/src/tokenizer.rs @@ -1030,25 +1030,24 @@ impl PyTokenizer { fn encode_batch( &self, py: Python<'_>, - input: Bound<'_, PyList>, + input: Bound<'_, PySequence>, is_pretokenized: bool, add_special_tokens: bool, ) -> PyResult> { - let input: Vec = input - .into_iter() - .map(|o| { - let input: tk::EncodeInput = if is_pretokenized { - o.extract::()?.into() - } else { - o.extract::()?.into() - }; - Ok(input) - }) - .collect::>>()?; + let mut items= Vec::::new(); + for i in 0..input.len()?{ + let item = input.get_item(i)?; + let item: tk::EncodeInput = if is_pretokenized { + item.extract::()?.into() + } else { + item.extract::()?.into() + }; + items.push(item); + } py.allow_threads(|| { ToPyResult( self.tokenizer - .encode_batch_char_offsets(input, add_special_tokens) + .encode_batch_char_offsets(items, add_special_tokens) .map(|encodings| encodings.into_iter().map(|e| e.into()).collect()), ) .into()