Skip to content

Commit

Permalink
make from_bytes less flaky
Browse files Browse the repository at this point in the history
  • Loading branch information
Daulet Zhanguzin committed Apr 7, 2023
1 parent c36680f commit 5a53a30
Show file tree
Hide file tree
Showing 4 changed files with 501,314 additions and 20 deletions.
15 changes: 4 additions & 11 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,10 @@ use std::ptr;
use tokenizers::tokenizer::Tokenizer;

#[no_mangle]
pub extern "C" fn from_bytes(bytes: *const u8, len: u32) -> *mut libc::c_void {
pub extern "C" fn from_bytes(bytes: *const u8, len: u32) -> *mut Tokenizer {
let bytes_slice = unsafe { std::slice::from_raw_parts(bytes, len as usize) };
match Tokenizer::from_bytes(bytes_slice) {
Ok(tokenizer) => {
let ptr = Box::into_raw(Box::new(tokenizer));
ptr.cast()
}
Err(_) => {
ptr::null_mut()
}
}
let tokenizer = Tokenizer::from_bytes(bytes_slice).expect("failed to create tokenizer");
return Box::into_raw(Box::new(tokenizer));
}

#[no_mangle]
Expand Down Expand Up @@ -83,4 +76,4 @@ pub extern "C" fn vocab_size(ptr: *mut libc::c_void) -> u32 {
tokenizer = ptr.cast::<Tokenizer>().as_ref().expect("failed to cast tokenizer");
}
tokenizer.get_vocab_size(true) as u32
}
}
Loading

0 comments on commit 5a53a30

Please sign in to comment.