diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 8a81ac3d2..b494e4085 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -18,7 +18,6 @@ pyo3 = { version = "0.21" } numpy = "0.21" ndarray = "0.15" itertools = "0.12" -serde_pyo3 = { git = "https://github.com/Narsil/serde_pyo3" } [dependencies.tokenizers] path = "../../tokenizers" diff --git a/bindings/python/src/decoders.rs b/bindings/python/src/decoders.rs index 4a4af94dd..ed21f3469 100644 --- a/bindings/python/src/decoders.rs +++ b/bindings/python/src/decoders.rs @@ -29,8 +29,8 @@ use super::error::ToPyResult; /// a Decoder will return an instance of this class when instantiated. #[pyclass(dict, module = "tokenizers.decoders", name = "Decoder", subclass)] #[derive(Clone, Deserialize, Serialize)] -#[serde(transparent)] pub struct PyDecoder { + #[serde(flatten)] pub(crate) decoder: PyDecoderWrapper, } diff --git a/bindings/python/src/models.rs b/bindings/python/src/models.rs index 2bfaafd34..bffa1bc21 100644 --- a/bindings/python/src/models.rs +++ b/bindings/python/src/models.rs @@ -26,8 +26,8 @@ use super::error::{deprecation_warning, ToPyResult}; /// This class cannot be constructed directly. Please use one of the concrete models. #[pyclass(module = "tokenizers.models", name = "Model", subclass)] #[derive(Clone, Serialize, Deserialize)] -#[serde(transparent)] pub struct PyModel { + #[serde(flatten)] pub model: Arc>, } diff --git a/bindings/python/src/normalizers.rs b/bindings/python/src/normalizers.rs index 724e79b85..864947e39 100644 --- a/bindings/python/src/normalizers.rs +++ b/bindings/python/src/normalizers.rs @@ -44,8 +44,8 @@ impl PyNormalizedStringMut<'_> { /// Normalizer will return an instance of this class when instantiated. #[pyclass(dict, module = "tokenizers.normalizers", name = "Normalizer", subclass)] #[derive(Clone, Serialize, Deserialize)] -#[serde(transparent)] pub struct PyNormalizer { + #[serde(flatten)] pub(crate) normalizer: PyNormalizerTypeWrapper, } diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs index a9060ec3b..a2bd9b39c 100644 --- a/bindings/python/src/pre_tokenizers.rs +++ b/bindings/python/src/pre_tokenizers.rs @@ -35,8 +35,8 @@ use super::utils::*; subclass )] #[derive(Clone, Serialize, Deserialize)] -#[serde(transparent)] pub struct PyPreTokenizer { + #[serde(flatten)] pub(crate) pretok: PyPreTokenizerTypeWrapper, } diff --git a/bindings/python/src/processors.rs b/bindings/python/src/processors.rs index aceb1d446..c46d8ea49 100644 --- a/bindings/python/src/processors.rs +++ b/bindings/python/src/processors.rs @@ -28,8 +28,8 @@ use tokenizers as tk; subclass )] #[derive(Clone, Deserialize, Serialize)] -#[serde(transparent)] pub struct PyPostProcessor { + #[serde(flatten)] pub processor: Arc, } diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs index 5bc57f777..1c6bc9cc1 100644 --- a/bindings/python/src/tokenizer.rs +++ b/bindings/python/src/tokenizer.rs @@ -1,4 +1,3 @@ -use serde::Serialize; use std::collections::{hash_map::DefaultHasher, HashMap}; use std::hash::{Hash, Hasher}; @@ -463,8 +462,7 @@ type Tokenizer = TokenizerImpl PyResult { - serde_pyo3::to_string(self).map_err(|e| exceptions::PyException::new_err(e.to_string())) - } - /// Return the number of special tokens that would be added for single/pair sentences. /// :param is_pair: Boolean indicating if the input would be a single sentence or a pair /// :return: @@ -1441,16 +1434,4 @@ mod test { Tokenizer::from_file(&tmp).unwrap(); } - - #[test] - fn serde_pyo3() { - let mut tokenizer = Tokenizer::new(PyModel::from(BPE::default())); - tokenizer.with_normalizer(PyNormalizer::new(PyNormalizerTypeWrapper::Sequence(vec![ - Arc::new(RwLock::new(NFKC.into())), - Arc::new(RwLock::new(Lowercase.into())), - ]))); - - let output = serde_pyo3::to_string(&tokenizer).unwrap(); - assert_eq!(output, ""); - } } diff --git a/bindings/python/src/trainers.rs b/bindings/python/src/trainers.rs index cbce2aef9..716e4cfeb 100644 --- a/bindings/python/src/trainers.rs +++ b/bindings/python/src/trainers.rs @@ -16,8 +16,8 @@ use tokenizers as tk; /// Trainer will return an instance of this class when instantiated. #[pyclass(module = "tokenizers.trainers", name = "Trainer", subclass)] #[derive(Clone, Deserialize, Serialize)] -#[serde(transparent)] pub struct PyTrainer { + #[serde(flatten)] pub trainer: Arc>, }