Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
rlrs committed Dec 12, 2023
1 parent 320a5f4 commit 41834c3
Showing 1 changed file with 1 addition and 21 deletions.
22 changes: 1 addition & 21 deletions tokenizers/src/tokenizer/normalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,6 @@ impl NormalizedString {

/// Replace anything that matches the pattern with the given content.
pub fn replace<P: Pattern>(&mut self, pattern: P, content: &str) -> Result<()> {
let mut num_matches = 0;
let start = std::time::Instant::now();
let mut time_spent_in = std::time::Duration::new(0, 0);
let mut new_normalized = String::with_capacity(self.normalized.len()); // Initially allocate for the input size
let mut new_alignments: Vec<(usize, usize)> = Vec::with_capacity(self.alignments.len());
let mut last_end = 0; // Keep track of the last end position
Expand All @@ -579,10 +576,7 @@ impl NormalizedString {
.into_iter()
.for_each(|((start, end), is_match)| {
if is_match {
num_matches += 1;
let range = start..end;
//apply_signed!(range.start, offset);
//apply_signed!(range.end, offset);

let mut new_len = 0;
let removed_chars = self.normalized[range.clone()].chars().count();
Expand Down Expand Up @@ -620,7 +614,6 @@ impl NormalizedString {
(c, 1)
});
let mut offset = (initial_removed + n_range.start) as isize;
//let mut alignments = Vec::with_capacity(n_range.len());
let normalized = dest
.into_iter()
.map(|(c, changes): (char, i32)| {
Expand Down Expand Up @@ -666,17 +659,7 @@ impl NormalizedString {
})
.collect::<String>();

//self.alignments.splice(n_range.clone(), alignments);
new_normalized.push_str(&normalized);
/*unsafe {
self.normalized
.as_mut_vec()
.splice(n_range, normalized.bytes());
}*/
time_spent_in += this_start.elapsed();

let old_len = end - start;
offset += new_len as isize - old_len as isize;
last_end = end;
}
});
Expand All @@ -685,11 +668,8 @@ impl NormalizedString {
new_normalized.push_str(&self.normalized[last_end..]);
new_alignments.extend(&self.alignments[last_end..]);

self.normalized = new_normalized; //std::mem::take(&mut new_normalized);
self.normalized = new_normalized;
self.alignments = new_alignments;

println!("Replaced {} matches in {:?}", num_matches, start.elapsed());
println!("Time spent in transform: {:?}", time_spent_in);
Ok(())
}

Expand Down

0 comments on commit 41834c3

Please sign in to comment.