Skip to content

Commit

Permalink
fix offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Nov 10, 2023
1 parent 948d2dd commit dbe25cd
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions tokenizers/src/pre_tokenizers/metaspace.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use serde::{Deserialize, Deserializer, Serialize};

use crate::tokenizer::{Decoder, PreTokenizedString, PreTokenizer, Result, SplitDelimiterBehavior};
use regex::Regex;

#[derive(Debug, Clone, PartialEq, Serialize, Eq)]
/// Replaces all the whitespaces by the provided meta character and then
Expand Down Expand Up @@ -240,7 +239,7 @@ mod tests {
.map(|(s, o, _)| (s, o))
.collect::<Vec<_>>(),
vec![
("▁Hey", (0, 6)), ("▁my", (6, 11)), ("▁friend", (11, 20)), ("▁", (20, 23)), ("▁<s>", (23, 26)), ("▁how", (26, 29)), ("▁are", (29, 35)), ("▁you", (35, 41))
("▁Hey", (0, 6)), ("▁my", (6, 11)), ("▁friend", (11, 20)), ("▁", (20, 23)), ("▁<s>", (23, 29)), ("▁how", (29, 35)), ("▁are", (35, 41)), ("▁you", (41, 47))
]
);
}
Expand Down

0 comments on commit dbe25cd

Please sign in to comment.