Skip to content

Commit

Permalink
Update tokenizers/src/pre_tokenizers/byte_level.rs
Browse files Browse the repository at this point in the history
Co-authored-by: Luc Georges <[email protected]>
  • Loading branch information
ArthurZucker and McPatate committed Jul 12, 2024
1 parent dbbf905 commit 8213ad8
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion tokenizers/src/pre_tokenizers/byte_level.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::utils::macro_rules_attribute;

/// Converts bytes to unicode characters.
/// See https://github.com/openai/gpt-2/blob/master/src/encoder.py#L9
pub fn bytes_char() -> HashMap<u8, char> {
pub(crate) fn bytes_char() -> HashMap<u8, char> {
let mut bs: Vec<u8> = vec![];
bs.extend(b'!'..=b'~');
bs.extend(b'\xA1'..=b'\xAC');
Expand Down

0 comments on commit 8213ad8

Please sign in to comment.