Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurZucker committed Oct 21, 2024
1 parent c81c34a commit b2c667c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 13 deletions.
5 changes: 1 addition & 4 deletions tokenizers/src/models/bpe/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,7 @@ impl BpeBuilder {
.iter()
.map(|(key, val)| (*val, key.to_owned()))
.collect();
let cache = match self.config.cache_capacity {
0 => None,
capacity => Some(Cache::new(false)),
};
let cache =Some(Cache::new(self.config.cache_capacity));

let vocab = self.config.vocab;
let prefix_len = if let Some(prefix) = &self.config.continuing_subword_prefix {
Expand Down
20 changes: 11 additions & 9 deletions tokenizers/src/utils/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ use std::borrow::Borrow;
use std::collections::HashMap;
use std::hash::Hash;
use std::sync::RwLock;
use sysinfo::{System};
use sysinfo::System;
use std::mem;


/// The default capacity for a `BPE`'s internal cache.
pub static DEFAULT_CACHE_CAPACITY: usize = 10000;

/// Provides a simple multithread cache to speed up BPE tokenization that will try to read values
/// concurrently but won't block if another thread is writing.
/// The goal is clearly not the accuracy of the content, both get and set
Expand Down Expand Up @@ -36,7 +40,7 @@ where
V: Clone,
{
fn default() -> Self {
Self::new(false)
Self::new(0)
}
}

Expand All @@ -46,11 +50,11 @@ where
V: Clone,
{
/// Create new `Cache` with the given capacity.
pub(crate) fn new(use_default_capacity: bool) -> Self {
let capacity = if use_default_capacity{
DEFAULT_CACHE_CAPACITY
} else{
pub(crate) fn new(use_default_capacity: usize) -> Self {
let capacity = if use_default_capacity == 0{
default_cache_capacity::<K, V>()
} else{
use_default_capacity
};
let h_format = capacity / (1024 * 1024 * 1024);
println!("Using capacity {h_format} (nb of elements)");
Expand All @@ -60,7 +64,7 @@ where

/// Create a fresh `Cache` with the same configuration.
pub(crate) fn fresh(&self) -> Self {
Self::new(false)
Self::new(0)
}

/// Clear the cache.
Expand Down Expand Up @@ -144,5 +148,3 @@ fn default_cache_capacity<K, V>() -> usize {
return available_memory_bytes /entry_size
}

/// The default capacity for a `BPE`'s internal cache.
pub static DEFAULT_CACHE_CAPACITY: usize = 10_000;

0 comments on commit b2c667c

Please sign in to comment.