From f9740c8f2e05d1bab49eb3f1cbb7ced0161e6b56 Mon Sep 17 00:00:00 2001 From: Arthur Zucker Date: Thu, 6 Jun 2024 09:55:42 +0200 Subject: [PATCH] __repr__ should use Debug? --- bindings/python/src/decoders.rs | 6 ++-- bindings/python/src/models.rs | 2 +- bindings/python/src/normalizers.rs | 50 ++++++++++++++++----------- bindings/python/src/pre_tokenizers.rs | 15 ++++---- bindings/python/src/processors.rs | 2 +- bindings/python/src/tokenizer.rs | 2 +- 6 files changed, 42 insertions(+), 35 deletions(-) diff --git a/bindings/python/src/decoders.rs b/bindings/python/src/decoders.rs index 3d88c15d1..e2a61c571 100644 --- a/bindings/python/src/decoders.rs +++ b/bindings/python/src/decoders.rs @@ -29,7 +29,7 @@ use super::error::ToPyResult; /// This class is not supposed to be instantiated directly. Instead, any implementation of /// a Decoder will return an instance of this class when instantiated. #[pyclass(dict, module = "tokenizers.decoders", name = "Decoder", subclass)] -#[derive(Clone, Deserialize, Serialize, Display)] +#[derive(Clone, Deserialize, Serialize, Display, Debug)] #[display(fmt = "{}", decoder)] pub struct PyDecoder { #[serde(flatten)] @@ -488,7 +488,7 @@ impl PySequenceDecoder { } } -#[derive(Clone, Display)] +#[derive(Clone, Display, Debug)] pub(crate) struct CustomDecoder { pub inner: PyObject, } @@ -541,7 +541,7 @@ impl<'de> Deserialize<'de> for CustomDecoder { } } -#[derive(Clone, Deserialize, Serialize, Display)] +#[derive(Clone, Deserialize, Serialize, Display, Debug)] #[serde(untagged)] pub(crate) enum PyDecoderWrapper { #[display(fmt = "{}", "_0.as_ref().read().unwrap().inner")] diff --git a/bindings/python/src/models.rs b/bindings/python/src/models.rs index 4c3a9cdd4..ece1ed4e4 100644 --- a/bindings/python/src/models.rs +++ b/bindings/python/src/models.rs @@ -25,7 +25,7 @@ use tokenizers as tk; /// /// This class cannot be constructed directly. Please use one of the concrete models. #[pyclass(module = "tokenizers.models", name = "Model", subclass)] -#[derive(Clone, Serialize, Deserialize, Display)] +#[derive(Clone, Serialize, Deserialize, Display, Debug)] #[display(fmt = "{}", "model.as_ref().read().unwrap()")] pub struct PyModel { #[serde(flatten)] diff --git a/bindings/python/src/normalizers.rs b/bindings/python/src/normalizers.rs index 762e60f3b..56252aad3 100644 --- a/bindings/python/src/normalizers.rs +++ b/bindings/python/src/normalizers.rs @@ -90,7 +90,6 @@ impl PyNormalizer { }, }) } - } impl Normalizer for PyNormalizer { @@ -571,33 +570,42 @@ impl Serialize for PyNormalizerWrapper { } } -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, Deserialize, Display)] #[serde(untagged)] pub(crate) enum PyNormalizerTypeWrapper { + #[display(fmt = "Normalizer.Sequence([{}])", "_0.iter() + .map(|d| d.as_ref().read().unwrap().to_string()) + .fold(String::new(), |mut acc, s| { + if !acc.is_empty() { + acc.push_str(\", \"); + } + acc.push_str(&s); + acc + })")] Sequence(Vec>>), + #[display(fmt ="Normalizer.{}", "_0.as_ref().read().unwrap()")] Single(Arc>), } -// Implement the Display trait for PyNormalizerTypeWrapper -impl std::fmt::Display for PyNormalizerTypeWrapper { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - PyNormalizerTypeWrapper::Sequence(ref decoders) => { - for decoder in decoders { - let decoder = decoder.read().unwrap(); - writeln!(f, "{}", decoder)?; +// // Implement the Display trait for PyNormalizerTypeWrapper +// impl std::fmt::Display for PyNormalizerTypeWrapper { +// fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { +// match self { +// PyNormalizerTypeWrapper::Sequence(ref decoders) => { +// for decoder in decoders { +// let decoder = decoder.read().unwrap(); +// writeln!(f, "{}", decoder)?; - } - writeln!(f, "?????")?; - Ok(()) - } - PyNormalizerTypeWrapper::Single(ref decoder) => { - let decoder = decoder.read().unwrap(); - write!(f, "{}", decoder) - } - } - } -} +// } +// Ok(()) +// } +// PyNormalizerTypeWrapper::Single(ref decoder) => { +// let decoder = decoder.read().unwrap(); +// write!(f, "{}", decoder) +// } +// } +// } +// } impl Serialize for PyNormalizerTypeWrapper { fn serialize(&self, serializer: S) -> Result diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs index 4334fc029..b2d1ca51b 100644 --- a/bindings/python/src/pre_tokenizers.rs +++ b/bindings/python/src/pre_tokenizers.rs @@ -34,8 +34,7 @@ use derive_more::Display; name = "PreTokenizer", subclass )] -#[derive(Clone, Serialize, Deserialize, Display)] -#[display(fmt = "PreTokenizer(pretok={})", pretok)] +#[derive(Clone, Serialize, Deserialize, Display, Debug)] pub struct PyPreTokenizer { #[serde(flatten)] pub(crate) pretok: PyPreTokenizerTypeWrapper, @@ -596,7 +595,7 @@ impl PyUnicodeScripts { } } -#[derive(Clone, Display)] +#[derive(Clone, Display, Debug)] pub(crate) struct CustomPreTokenizer { inner: PyObject, } @@ -640,7 +639,7 @@ impl<'de> Deserialize<'de> for CustomPreTokenizer { } } -#[derive(Clone, Deserialize, Display)] +#[derive(Clone, Deserialize, Display, Debug)] #[display(fmt="{}")] #[serde(untagged)] pub(crate) enum PyPreTokenizerWrapper { @@ -660,10 +659,10 @@ impl Serialize for PyPreTokenizerWrapper { } } -#[derive(Clone, Deserialize, Display)] +#[derive(Clone, Deserialize, Display, Debug)] #[serde(untagged)] pub(crate) enum PyPreTokenizerTypeWrapper { - #[display(fmt = "[{}]", "_0.iter() + #[display(fmt = "PreTokenizer.Sequence([{}])", "_0.iter() .map(|d| d.as_ref().read().unwrap().to_string()) .fold(String::new(), |mut acc, s| { if !acc.is_empty() { @@ -671,10 +670,10 @@ pub(crate) enum PyPreTokenizerTypeWrapper { } acc.push_str(&s); acc - })")] + })")] // This one is only used when the pre_tokenizer is set in python Sequence(Vec>>), #[display(fmt ="{}", "_0.as_ref().read().unwrap()")] - Single(Arc>), + Single(Arc>), // this one can actually be a sequence in rust } impl Serialize for PyPreTokenizerTypeWrapper { diff --git a/bindings/python/src/processors.rs b/bindings/python/src/processors.rs index 7fdc68ef1..8adff9c33 100644 --- a/bindings/python/src/processors.rs +++ b/bindings/python/src/processors.rs @@ -27,7 +27,7 @@ use tokenizers as tk; name = "PostProcessor", subclass )] -#[derive(Clone, Deserialize, Serialize, Display)] +#[derive(Clone, Deserialize, Serialize, Display, Debug)] pub struct PyPostProcessor { #[serde(flatten)] pub processor: Arc, diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs index 637d34f14..2a7bdc1a1 100644 --- a/bindings/python/src/tokenizer.rs +++ b/bindings/python/src/tokenizer.rs @@ -1416,7 +1416,7 @@ impl PyTokenizer { } fn __repr__(&self) -> PyResult{ - Ok(format!("{}", self.tokenizer)) + Ok(format!("{:?}", self.tokenizer)) } }