Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Jul 11, 2024
1 parent 2f29cdd commit 7e83218
Showing 1 changed file with 0 additions and 33 deletions.
33 changes: 0 additions & 33 deletions tokenizers/src/tokenizer/pre_tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,39 +99,6 @@ impl PreTokenizedString {
self.splits = new_splits;
Ok(())
}

pub fn split_with_added<F, U, R>(
&mut self,
mut split_fn: F,
split_added_fn: Option<F>,
) -> Result<()>
where
F: FnMut(usize, NormalizedString) -> Result<U> + Copy,
U: IntoIterator<Item = R>,
R: Into<Split>,
{
// new_splits is at least as big as self.splits
let mut new_splits = Vec::with_capacity(self.splits.len());
for (i, original_split) in self.splits.drain(..).enumerate() {
let splits = match split_added_fn {
Some(mut fn_ptr) => fn_ptr(i, original_split.normalized)?,
None => split_fn(i, original_split.normalized)?,
};
// Filter and extend the new_splits with non-empty splits
new_splits.extend(splits.into_iter().filter_map(|split| {
let split: Split = split.into();
if split.normalized.is_empty() {
None
} else {
Some(split)
}
}));
}
self.splits.insert(0, NormalizedString::from(" ").into());
println!("self = : {:?}", self);
Ok(())
}

/// Normalized all the splits that do not have attached `Tokens`, using the provided
/// `normalize` function.
pub fn normalize<F>(&mut self, normalize: F) -> Result<()>
Expand Down

0 comments on commit 7e83218

Please sign in to comment.