Skip to content

Commit

Permalink
__repr__ should use Debug?
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Jun 6, 2024
1 parent 823eec3 commit f9740c8
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 35 deletions.
6 changes: 3 additions & 3 deletions bindings/python/src/decoders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use super::error::ToPyResult;
/// This class is not supposed to be instantiated directly. Instead, any implementation of
/// a Decoder will return an instance of this class when instantiated.
#[pyclass(dict, module = "tokenizers.decoders", name = "Decoder", subclass)]
#[derive(Clone, Deserialize, Serialize, Display)]
#[derive(Clone, Deserialize, Serialize, Display, Debug)]
#[display(fmt = "{}", decoder)]
pub struct PyDecoder {
#[serde(flatten)]
Expand Down Expand Up @@ -488,7 +488,7 @@ impl PySequenceDecoder {
}
}

#[derive(Clone, Display)]
#[derive(Clone, Display, Debug)]
pub(crate) struct CustomDecoder {
pub inner: PyObject,
}
Expand Down Expand Up @@ -541,7 +541,7 @@ impl<'de> Deserialize<'de> for CustomDecoder {
}
}

#[derive(Clone, Deserialize, Serialize, Display)]
#[derive(Clone, Deserialize, Serialize, Display, Debug)]
#[serde(untagged)]
pub(crate) enum PyDecoderWrapper {
#[display(fmt = "{}", "_0.as_ref().read().unwrap().inner")]
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/src/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use tokenizers as tk;
///
/// This class cannot be constructed directly. Please use one of the concrete models.
#[pyclass(module = "tokenizers.models", name = "Model", subclass)]
#[derive(Clone, Serialize, Deserialize, Display)]
#[derive(Clone, Serialize, Deserialize, Display, Debug)]
#[display(fmt = "{}", "model.as_ref().read().unwrap()")]
pub struct PyModel {
#[serde(flatten)]
Expand Down
50 changes: 29 additions & 21 deletions bindings/python/src/normalizers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ impl PyNormalizer {
},
})
}

}

impl Normalizer for PyNormalizer {
Expand Down Expand Up @@ -571,33 +570,42 @@ impl Serialize for PyNormalizerWrapper {
}
}

#[derive(Debug, Clone, Deserialize)]
#[derive(Debug, Clone, Deserialize, Display)]
#[serde(untagged)]
pub(crate) enum PyNormalizerTypeWrapper {
#[display(fmt = "Normalizer.Sequence([{}])", "_0.iter()
.map(|d| d.as_ref().read().unwrap().to_string())
.fold(String::new(), |mut acc, s| {
if !acc.is_empty() {
acc.push_str(\", \");
}
acc.push_str(&s);
acc
})")]
Sequence(Vec<Arc<RwLock<PyNormalizerWrapper>>>),
#[display(fmt ="Normalizer.{}", "_0.as_ref().read().unwrap()")]
Single(Arc<RwLock<PyNormalizerWrapper>>),
}

// Implement the Display trait for PyNormalizerTypeWrapper
impl std::fmt::Display for PyNormalizerTypeWrapper {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
PyNormalizerTypeWrapper::Sequence(ref decoders) => {
for decoder in decoders {
let decoder = decoder.read().unwrap();
writeln!(f, "{}", decoder)?;
// // Implement the Display trait for PyNormalizerTypeWrapper
// impl std::fmt::Display for PyNormalizerTypeWrapper {
// fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
// match self {
// PyNormalizerTypeWrapper::Sequence(ref decoders) => {
// for decoder in decoders {
// let decoder = decoder.read().unwrap();
// writeln!(f, "{}", decoder)?;

}
writeln!(f, "?????")?;
Ok(())
}
PyNormalizerTypeWrapper::Single(ref decoder) => {
let decoder = decoder.read().unwrap();
write!(f, "{}", decoder)
}
}
}
}
// }
// Ok(())
// }
// PyNormalizerTypeWrapper::Single(ref decoder) => {
// let decoder = decoder.read().unwrap();
// write!(f, "{}", decoder)
// }
// }
// }
// }

impl Serialize for PyNormalizerTypeWrapper {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
Expand Down
15 changes: 7 additions & 8 deletions bindings/python/src/pre_tokenizers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ use derive_more::Display;
name = "PreTokenizer",
subclass
)]
#[derive(Clone, Serialize, Deserialize, Display)]
#[display(fmt = "PreTokenizer(pretok={})", pretok)]
#[derive(Clone, Serialize, Deserialize, Display, Debug)]
pub struct PyPreTokenizer {
#[serde(flatten)]
pub(crate) pretok: PyPreTokenizerTypeWrapper,
Expand Down Expand Up @@ -596,7 +595,7 @@ impl PyUnicodeScripts {
}
}

#[derive(Clone, Display)]
#[derive(Clone, Display, Debug)]
pub(crate) struct CustomPreTokenizer {
inner: PyObject,
}
Expand Down Expand Up @@ -640,7 +639,7 @@ impl<'de> Deserialize<'de> for CustomPreTokenizer {
}
}

#[derive(Clone, Deserialize, Display)]
#[derive(Clone, Deserialize, Display, Debug)]
#[display(fmt="{}")]
#[serde(untagged)]
pub(crate) enum PyPreTokenizerWrapper {
Expand All @@ -660,21 +659,21 @@ impl Serialize for PyPreTokenizerWrapper {
}
}

#[derive(Clone, Deserialize, Display)]
#[derive(Clone, Deserialize, Display, Debug)]
#[serde(untagged)]
pub(crate) enum PyPreTokenizerTypeWrapper {
#[display(fmt = "[{}]", "_0.iter()
#[display(fmt = "PreTokenizer.Sequence([{}])", "_0.iter()
.map(|d| d.as_ref().read().unwrap().to_string())
.fold(String::new(), |mut acc, s| {
if !acc.is_empty() {
acc.push_str(\", \");
}
acc.push_str(&s);
acc
})")]
})")] // This one is only used when the pre_tokenizer is set in python
Sequence(Vec<Arc<RwLock<PyPreTokenizerWrapper>>>),
#[display(fmt ="{}", "_0.as_ref().read().unwrap()")]
Single(Arc<RwLock<PyPreTokenizerWrapper>>),
Single(Arc<RwLock<PyPreTokenizerWrapper>>), // this one can actually be a sequence in rust
}

impl Serialize for PyPreTokenizerTypeWrapper {
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/src/processors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use tokenizers as tk;
name = "PostProcessor",
subclass
)]
#[derive(Clone, Deserialize, Serialize, Display)]
#[derive(Clone, Deserialize, Serialize, Display, Debug)]
pub struct PyPostProcessor {
#[serde(flatten)]
pub processor: Arc<PostProcessorWrapper>,
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1416,7 +1416,7 @@ impl PyTokenizer {
}

fn __repr__(&self) -> PyResult<String>{
Ok(format!("{}", self.tokenizer))
Ok(format!("{:?}", self.tokenizer))
}
}

Expand Down

0 comments on commit f9740c8

Please sign in to comment.