diff --git a/tokenizers/src/normalizers/byte_level.rs b/tokenizers/src/normalizers/byte_level.rs index 42c7fa510..9d52f7eaa 100644 --- a/tokenizers/src/normalizers/byte_level.rs +++ b/tokenizers/src/normalizers/byte_level.rs @@ -2,10 +2,11 @@ use crate::processors::byte_level::bytes_char; use crate::tokenizer::{NormalizedString, Normalizer, Result}; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; +use crate::utils::macro_rules_attribute; -#[derive(Clone, Debug, Deserialize, Serialize)] -#[serde(tag = "type")] -pub struct ByteLevel {} +#[derive(Clone, Debug)] +#[macro_rules_attribute(impl_serde_type!)] +pub struct ByteLevel; lazy_static! { static ref BYTES_CHAR: HashMap = bytes_char(); diff --git a/tokenizers/src/normalizers/mod.rs b/tokenizers/src/normalizers/mod.rs index c5144be14..43f50bfda 100644 --- a/tokenizers/src/normalizers/mod.rs +++ b/tokenizers/src/normalizers/mod.rs @@ -73,3 +73,36 @@ impl_enum_from!(Precompiled, NormalizerWrapper, Precompiled); impl_enum_from!(Replace, NormalizerWrapper, Replace); impl_enum_from!(Prepend, NormalizerWrapper, Prepend); impl_enum_from!(ByteLevel, NormalizerWrapper, ByteLevel); + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn post_processor_deserialization_no_type() { + let json = r#"{"strip_left":false, "strip_right":true}"#; + let reconstructed = serde_json::from_str::(json); + assert!(matches!( + reconstructed.unwrap(), + NormalizerWrapper::StripNormalizer(_) + )); + + let json = + r#"{"sep":["",2], "cls":["",0], "trim_offsets":true, "add_prefix_space":true}"#; + let reconstructed = serde_json::from_str::(json).unwrap(); + println!("{:?}", reconstructed); + assert!(matches!( + reconstructed, + NormalizerWrapper::Sequence(_) + )); + + let json = r#"{"type":"RobertaProcessing", "sep":["",2] }"#; + let reconstructed = serde_json::from_str::(json); + match reconstructed { + Err(err) => assert_eq!( + err.to_string(), + "data did not match any variant of untagged enum NormalizerWrapper" + ), + _ => panic!("Expected an error here"), + } + } + } \ No newline at end of file