Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Jun 10, 2024
1 parent 7591f2b commit f50e4e0
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 4 deletions.
1 change: 0 additions & 1 deletion tokenizers/src/processors/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use serde::{Deserialize, Serialize};

#[derive(Serialize, Deserialize, PartialEq, Debug, Clone, Eq, Display)]
#[serde(untagged)]
#[display(fmt = "{}")]
pub enum PostProcessorWrapper {
// Roberta must be before Bert for deserialization (serde does not validate tags)
Roberta(RobertaProcessing),
Expand Down
10 changes: 8 additions & 2 deletions tokenizers/src/processors/sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@ use serde::{Deserialize, Serialize};
#[macro_rules_attribute(impl_serde_type!)]
#[derive(Clone, Debug, PartialEq, Eq, Display)]
#[display(
fmt = "[{}]",
"processors.iter().map(|d| d.to_string()).collect::<Vec<_>>().join(\", \")"
fmt = "processors.Sequence([{}])",
"processors.iter().fold(String::new(), |mut acc, p| {
if !acc.is_empty() {
acc.push_str(\", \");
}
acc.push_str(&p.to_string());
acc
})"
)]
pub struct Sequence {
processors: Vec<PostProcessorWrapper>,
Expand Down
2 changes: 1 addition & 1 deletion tokenizers/src/tokenizer/added_vocabulary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ fn space_rightmost_at_start(sentence: &str) -> usize {
/// exist as required.
///
#[derive(Clone, Debug, Display)]
#[display(fmt="AddedVocabulary(added_tokens_map_r={{{}}}, encode_special_tokens={})", "&(0..=5).fold(String::new(), |mut acc, key| {if let Some(token) = added_tokens_map_r.get(&key){if !acc.is_empty(){acc.push_str(\", \");}acc.push_str(&format!(\"\n\t{}: {}\", key, &token.to_string()));}acc})", encode_special_tokens)]
#[display(fmt="AddedVocabulary(added_tokens_map_r={{{}, ...}}, encode_special_tokens={})", "&(0..=5).fold(String::new(), |mut acc, key| {if let Some(token) = added_tokens_map_r.get(&key){if !acc.is_empty(){acc.push_str(\", \");}acc.push_str(&format!(\"\n\t{}: {}\", key, &token.to_string()));}acc})", encode_special_tokens)]
pub struct AddedVocabulary {
/// Contains the mapping from String (token content) to ID. This map contains both special
/// tokens and classic added tokens that were added to the this vocabulary.
Expand Down

0 comments on commit f50e4e0

Please sign in to comment.