From efcbf7726743bc4ff4ee249f2455dcdc0eee4476 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Sat, 28 Sep 2024 20:01:46 -0700 Subject: [PATCH] Checkpoint: switch to ByteStr throughout builder API --- utils/zerotrie/benches/overview.rs | 14 +- utils/zerotrie/src/builder/bytestr.rs | 72 +++-- utils/zerotrie/src/builder/litemap.rs | 18 +- utils/zerotrie/src/builder/mod.rs | 2 +- .../zerotrie/src/builder/nonconst/builder.rs | 34 +-- utils/zerotrie/src/comparison.rs | 7 + utils/zerotrie/src/lib.rs | 1 + utils/zerotrie/src/serde.rs | 1 + utils/zerotrie/src/zerotrie.rs | 12 +- utils/zerotrie/tests/asciitrie_test.rs | 1 + utils/zerotrie/tests/builder_test.rs | 260 +++++++++--------- utils/zerotrie/tests/data/data.rs | 5 +- utils/zerotrie/tests/ignorecase_test.rs | 10 +- utils/zerotrie/tests/locale_aux_test.rs | 12 +- 14 files changed, 240 insertions(+), 209 deletions(-) diff --git a/utils/zerotrie/benches/overview.rs b/utils/zerotrie/benches/overview.rs index 832274378fa..7990a89eb42 100644 --- a/utils/zerotrie/benches/overview.rs +++ b/utils/zerotrie/benches/overview.rs @@ -13,8 +13,10 @@ use zerotrie::ZeroTrieSimpleAscii; use zerovec::ZeroHashMap; #[cfg(feature = "bench")] use zerovec::ZeroMap; +use zerotrie::ByteStr; mod testdata { + use zerotrie::ByteStr; include!("../tests/data/data.rs"); } @@ -137,7 +139,7 @@ fn get_subtags_bench_large(c: &mut Criterion) { fn get_subtags_bench_helper( mut g: criterion::BenchmarkGroup, strings: &[&str], - litemap: LiteMap<&[u8], usize>, + litemap: LiteMap<&ByteStr, usize>, ) { g.bench_function("SimpleAscii", |b| { let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap(); @@ -171,7 +173,7 @@ fn get_subtags_bench_helper( #[cfg(feature = "bench")] g.bench_function("ZeroMap/usize", |b| { - let zm: ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zm: ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { let actual = black_box(&zm).get_copied(key.as_bytes()); @@ -182,7 +184,7 @@ fn get_subtags_bench_helper( #[cfg(feature = "bench")] g.bench_function("ZeroMap/u8", |b| { - let zm: ZeroMap<[u8], u8> = litemap.iter().map(|(k, v)| (*k, *v as u8)).collect(); + let zm: ZeroMap<[u8], u8> = litemap.iter().map(|(k, v)| (k.as_bytes(), *v as u8)).collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { let actual = black_box(&zm).get_copied(key.as_bytes()); @@ -193,7 +195,7 @@ fn get_subtags_bench_helper( #[cfg(feature = "bench")] g.bench_function("HashMap", |b| { - let hm: HashMap<&[u8], usize> = litemap.iter().map(|(a, b)| (*a, *b)).collect(); + let hm: HashMap<&[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b)).collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { let actual = black_box(&hm).get(key.as_bytes()); @@ -206,7 +208,7 @@ fn get_subtags_bench_helper( g.bench_function("ZeroHashMap/usize", |b| { let zhm: ZeroHashMap<[u8], usize> = litemap .iter() - .map(|(a, b)| (*a, b)) + .map(|(a, b)| (a.as_bytes(), b)) .collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { @@ -220,7 +222,7 @@ fn get_subtags_bench_helper( #[cfg(feature = "bench")] g.bench_function("ZeroHashMap/u8", |b| { - let zhm: ZeroHashMap<[u8], u8> = litemap.iter().map(|(k, v)| (*k, *v as u8)).collect(); + let zhm: ZeroHashMap<[u8], u8> = litemap.iter().map(|(k, v)| (k.as_bytes(), *v as u8)).collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { let actual = black_box(&zhm).get(key.as_bytes()).copied(); diff --git a/utils/zerotrie/src/builder/bytestr.rs b/utils/zerotrie/src/builder/bytestr.rs index fad5f0b6078..3a098154729 100644 --- a/utils/zerotrie/src/builder/bytestr.rs +++ b/utils/zerotrie/src/builder/bytestr.rs @@ -3,63 +3,77 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::comparison; -use core::borrow::Borrow; +use core::cmp::Ordering; use core::fmt; #[cfg(feature = "serde")] use alloc::boxed::Box; -/// A struct transparent over `[u8]` with convenient helper functions. +/// A string key in a ZeroTrie. +/// +/// This type has a custom Ord impl, making it suitable for use in a sorted +/// map for ZeroTrie construction. #[repr(transparent)] -#[derive(PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct ByteStr([u8]); +#[derive(PartialEq, Eq)] +pub struct ByteStr([u8]); impl ByteStr { - pub const fn from_byte_slice_with_value<'a, 'l>( + #[inline] + pub(crate) const fn from_byte_slice_with_value<'a, 'l>( input: &'l [(&'a [u8], usize)], ) -> &'l [(&'a ByteStr, usize)] { // Safety: [u8] and ByteStr have the same layout and invariants unsafe { core::mem::transmute(input) } } - pub const fn from_str_slice_with_value<'a, 'l>( + #[inline] + pub(crate) const fn from_str_slice_with_value<'a, 'l>( input: &'l [(&'a str, usize)], ) -> &'l [(&'a ByteStr, usize)] { // Safety: str and ByteStr have the same layout, and ByteStr is less restrictive unsafe { core::mem::transmute(input) } } - pub fn from_bytes(input: &[u8]) -> &Self { + /// Casts a `&[u8]` to a `&ByteStr` + #[inline] + pub const fn from_bytes(input: &[u8]) -> &Self { // Safety: [u8] and ByteStr have the same layout and invariants unsafe { core::mem::transmute(input) } } - #[cfg(feature = "serde")] - pub fn from_boxed_bytes(input: Box<[u8]>) -> Box { + /// Casts a `Box<[u8]>` to a `Box` + #[cfg(feature = "alloc")] + pub const fn from_boxed_bytes(input: Box<[u8]>) -> Box { // Safety: [u8] and ByteStr have the same layout and invariants unsafe { core::mem::transmute(input) } } - #[allow(dead_code)] // may want this in the future - pub fn from_str(input: &str) -> &Self { + /// Casts a `&str` to a `&ByteStr` + pub const fn from_str(input: &str) -> &Self { Self::from_bytes(input.as_bytes()) } - #[allow(dead_code)] // may want this in the future - pub fn empty() -> &'static Self { + /// Creates an empty ByteStr + pub const fn empty() -> &'static Self { Self::from_bytes(&[]) } - #[allow(dead_code)] // not used in all features + /// Returns this ByteStr as a byte slice pub const fn as_bytes(&self) -> &[u8] { &self.0 } + /// Whether the ByteStr is an empty slice + pub const fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// How many bytes are in the ByteStr pub const fn len(&self) -> usize { self.0.len() } - #[allow(dead_code)] // not used in all features + /// Whether the ByteStr is all ASCII-range pub fn is_all_ascii(&self) -> bool { for byte in self.0.iter() { if !byte.is_ascii() { @@ -111,16 +125,18 @@ impl ByteStr { } } -impl Borrow<[u8]> for ByteStr { - fn borrow(&self) -> &[u8] { +// Note: Does NOT impl Borrow<[u8]> because the Ord impls differ. +// AsRef is okay to implement. + +impl AsRef<[u8]> for ByteStr { + fn as_ref(&self) -> &[u8] { self.as_bytes() } } -#[cfg(feature = "alloc")] -impl Borrow<[u8]> for alloc::boxed::Box { - fn borrow(&self) -> &[u8] { - self.as_bytes() +impl AsRef for ByteStr { + fn as_ref(&self) -> &ByteStr { + self } } @@ -133,3 +149,17 @@ impl fmt::Debug for ByteStr { } } } + +impl Ord for ByteStr { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + crate::comparison::cmp_slices(&self.0, &other.0) + } +} + +impl PartialOrd for ByteStr { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} diff --git a/utils/zerotrie/src/builder/litemap.rs b/utils/zerotrie/src/builder/litemap.rs index 6577b69357e..8b5d698b533 100644 --- a/utils/zerotrie/src/builder/litemap.rs +++ b/utils/zerotrie/src/builder/litemap.rs @@ -15,14 +15,13 @@ use litemap::LiteMap; impl ZeroTrieSimpleAscii> { #[doc(hidden)] - pub fn try_from_litemap_with_const_builder<'a, S>( - items: &LiteMap<&'a [u8], usize, S>, + pub fn try_from_litemap_with_const_builder<'a, 'b, S>( + items: &'a LiteMap<&'b ByteStr, usize, S>, ) -> Result where - S: litemap::store::StoreSlice<&'a [u8], usize, Slice = [(&'a [u8], usize)]>, + S: litemap::store::StoreSlice<&'b ByteStr, usize, Slice = [(&'b ByteStr, usize)]>, { - let tuples = items.as_slice(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(tuples); + let byte_str_slice = items.as_slice(); ZeroTrieBuilderConst::<10000>::from_sorted_const_tuple_slice::<100>(byte_str_slice.into()) .map(|s| Self { store: s.as_bytes().to_vec(), @@ -30,18 +29,17 @@ impl ZeroTrieSimpleAscii> { } } -impl<'a, K, S> TryFrom<&'a LiteMap> for ZeroTrie> +impl<'a, 'b, K, S> TryFrom<&'a LiteMap> for ZeroTrie> where // Borrow, not AsRef, because we rely on Ord being the same. Unfortunately // this means `LiteMap<&str, usize>` does not work. - K: Borrow<[u8]>, + K: Borrow, S: litemap::store::StoreSlice, { type Error = ZeroTrieBuildError; fn try_from(items: &LiteMap) -> Result { - let byte_litemap = items.to_borrowed_keys::<[u8], Vec<_>>(); - let byte_slice = byte_litemap.as_slice(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(byte_slice); + let byte_litemap = items.to_borrowed_keys::>(); + let byte_str_slice = byte_litemap.as_slice(); Self::try_from_tuple_slice(byte_str_slice) } } diff --git a/utils/zerotrie/src/builder/mod.rs b/utils/zerotrie/src/builder/mod.rs index 8086cc14dbd..1e3830bc46a 100644 --- a/utils/zerotrie/src/builder/mod.rs +++ b/utils/zerotrie/src/builder/mod.rs @@ -152,7 +152,7 @@ mod litemap; #[cfg(feature = "alloc")] pub(crate) mod nonconst; -use bytestr::ByteStr; +pub use bytestr::ByteStr; use super::ZeroTrieSimpleAscii; diff --git a/utils/zerotrie/src/builder/nonconst/builder.rs b/utils/zerotrie/src/builder/nonconst/builder.rs index c1b470a873f..5ccd58dde63 100644 --- a/utils/zerotrie/src/builder/nonconst/builder.rs +++ b/utils/zerotrie/src/builder/nonconst/builder.rs @@ -9,11 +9,9 @@ use super::store::NonConstLengthsStack; use super::store::TrieBuilderStore; use crate::builder::bytestr::ByteStr; use crate::byte_phf::PerfectByteHashMapCacheOwned; -use crate::comparison; use crate::error::ZeroTrieBuildError; use crate::options::*; use crate::varint; -use alloc::borrow::Cow; use alloc::vec::Vec; /// A low-level builder for ZeroTrie. Supports all options. @@ -102,11 +100,10 @@ impl ZeroTrieBuilder { let items = Vec::<(K, usize)>::from_iter(iter); let mut items = items .iter() - .map(|(k, v)| (k.as_ref(), *v)) - .collect::>(); - items.sort_by(|a, b| cmp_keys_values(&options, *a, *b)); - let ascii_str_slice = items.as_slice(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(ascii_str_slice); + .map(|(k, v)| (ByteStr::from_bytes(k.as_ref()), *v)) + .collect::>(); + items.sort_by(|a, b| cmp_keys_values(*a, *b)); + let byte_str_slice = items.as_slice(); Self::from_sorted_tuple_slice(byte_str_slice, options) } @@ -121,12 +118,14 @@ impl ZeroTrieBuilder { options: ZeroTrieBuilderOptions, ) -> Result { for ab in items.windows(2) { - debug_assert!(cmp_keys_values( - &options, - (ab[0].0.as_bytes(), ab[0].1), - (ab[1].0.as_bytes(), ab[1].1) - ) - .is_lt(), "{ab:?}"); + debug_assert!( + cmp_keys_values( + (&ab[0].0, ab[0].1), + (&ab[1].0, ab[1].1) + ) + .is_lt(), + "{ab:?}" + ); } let mut result = Self { data: S::atbs_new_empty(), @@ -389,11 +388,8 @@ impl ZeroTrieBuilder { } fn cmp_keys_values( - options: &ZeroTrieBuilderOptions, - a: (&[u8], usize), - b: (&[u8], usize), + a: (&ByteStr, usize), + b: (&ByteStr, usize), ) -> Ordering { - let a_iter = a.0.iter().copied().map(comparison::shift); - let b_iter = b.0.iter().copied().map(comparison::shift); - Iterator::cmp(a_iter, b_iter).then_with(|| a.1.cmp(&b.1)) + a.0.cmp(b.0).then_with(|| a.1.cmp(&b.1)) } diff --git a/utils/zerotrie/src/comparison.rs b/utils/zerotrie/src/comparison.rs index ba24b196223..debdf2bc4b5 100644 --- a/utils/zerotrie/src/comparison.rs +++ b/utils/zerotrie/src/comparison.rs @@ -21,6 +21,13 @@ pub(crate) fn cmpi(a: u8, b: u8) -> Ordering { shift(a.to_ascii_lowercase()).cmp(&shift(b.to_ascii_lowercase())) } +#[inline] +pub(crate) fn cmp_slices(a: &[u8], b: &[u8]) -> Ordering { + let a_iter = a.iter().copied().map(shift); + let b_iter = b.iter().copied().map(shift); + Iterator::cmp(a_iter, b_iter) +} + #[test] fn test_basic_cmp() { let mut all_bytes = (0u8..=255u8).collect::>(); diff --git a/utils/zerotrie/src/lib.rs b/utils/zerotrie/src/lib.rs index 7e2680d68e6..c82977a0bc0 100644 --- a/utils/zerotrie/src/lib.rs +++ b/utils/zerotrie/src/lib.rs @@ -74,6 +74,7 @@ pub use crate::zerotrie::ZeroTrieExtendedCapacity; pub use crate::zerotrie::ZeroTriePerfectHash; pub use crate::zerotrie::ZeroTrieSimpleAscii; pub use error::ZeroTrieBuildError; +pub use builder::ByteStr; #[cfg(feature = "alloc")] pub use crate::zerotrie::ZeroTrieStringIterator; diff --git a/utils/zerotrie/src/serde.rs b/utils/zerotrie/src/serde.rs index 48ae87193e8..d7bf2f54fe8 100644 --- a/utils/zerotrie/src/serde.rs +++ b/utils/zerotrie/src/serde.rs @@ -359,6 +359,7 @@ where #[cfg(test)] mod testdata { + use crate::ByteStr; include!("../tests/data/data.rs"); } diff --git a/utils/zerotrie/src/zerotrie.rs b/utils/zerotrie/src/zerotrie.rs index 21d6b430de2..fdc577b271e 100644 --- a/utils/zerotrie/src/zerotrie.rs +++ b/utils/zerotrie/src/zerotrie.rs @@ -483,17 +483,16 @@ macro_rules! impl_zerotrie_subtype { #[cfg(feature = "litemap")] impl<'a, K, S> TryFrom<&'a LiteMap> for $name> where - K: Borrow<[u8]>, + K: Borrow, S: litemap::store::StoreIterable<'a, K, usize>, { type Error = crate::error::ZeroTrieBuildError; fn try_from(map: &'a LiteMap) -> Result { - let tuples: Vec<(&[u8], usize)> = map + let byte_str_slice: Vec<(&ByteStr, usize)> = map .iter() .map(|(k, v)| (k.borrow(), *v)) .collect(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples); - Self::try_from_tuple_slice(byte_str_slice) + Self::try_from_tuple_slice(&byte_str_slice) } } #[cfg(feature = "litemap")] @@ -799,11 +798,10 @@ where fn from_iter>(iter: T) -> Self { // We need two Vecs because the first one anchors the `K`s that the second one borrows. let items = Vec::from_iter(iter); - let mut items: Vec<(&[u8], usize)> = items.iter().map(|(k, v)| (k.as_ref(), *v)).collect(); + let mut items: Vec<(&ByteStr, usize)> = items.iter().map(|(k, v)| (ByteStr::from_bytes(k.as_ref()), *v)).collect(); items.sort(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(&items); #[allow(clippy::unwrap_used)] // FromIterator is panicky - Self::try_from_tuple_slice(byte_str_slice).unwrap() + Self::try_from_tuple_slice(&items).unwrap() } } diff --git a/utils/zerotrie/tests/asciitrie_test.rs b/utils/zerotrie/tests/asciitrie_test.rs index ae9dcbcf9e3..5e83bd42239 100644 --- a/utils/zerotrie/tests/asciitrie_test.rs +++ b/utils/zerotrie/tests/asciitrie_test.rs @@ -9,6 +9,7 @@ use zerotrie::ZeroTrieSimpleAscii; use zerovec::ZeroMap; mod testdata { + use zerotrie::ByteStr; include!("data/data.rs"); } diff --git a/utils/zerotrie/tests/builder_test.rs b/utils/zerotrie/tests/builder_test.rs index d92ad8520dc..7aa6fa03e03 100644 --- a/utils/zerotrie/tests/builder_test.rs +++ b/utils/zerotrie/tests/builder_test.rs @@ -5,8 +5,10 @@ use litemap::LiteMap; use zerotrie::ZeroTriePerfectHash; use zerotrie::ZeroTrieSimpleAscii; +use zerotrie::ByteStr; mod testdata { + use zerotrie::ByteStr; include!("data/data.rs"); } @@ -23,13 +25,13 @@ macro_rules! assert_bytes_eq { }; } -fn check_simple_ascii_trie(items: &LiteMap<&[u8], usize>, trie: &ZeroTrieSimpleAscii) +fn check_simple_ascii_trie(items: &LiteMap<&ByteStr, usize>, trie: &ZeroTrieSimpleAscii) where S: AsRef<[u8]> + ?Sized, { // Check that each item is in the trie for (k, v) in items.iter() { - assert_eq!(trie.get(k), Some(*v)); + assert_eq!(trie.get(k.as_bytes()), Some(*v)); } // Check that some items are not in the trie for s in NON_EXISTENT_STRINGS.iter() { @@ -38,20 +40,20 @@ where // Check that the iterator returns items in the same order as the LiteMap assert!(items .iter() - .map(|(s, v)| (String::from_utf8(s.to_vec()).unwrap(), *v)) + .map(|(s, v)| (String::from_utf8(s.as_bytes().to_vec()).unwrap(), *v)) .eq(trie.iter())); // Check that the const builder works let const_trie = ZeroTrieSimpleAscii::try_from_litemap_with_const_builder(items).unwrap(); assert_eq!(trie.as_bytes(), const_trie.as_bytes()); } -fn check_phf_ascii_trie(items: &LiteMap<&[u8], usize>, trie: &ZeroTriePerfectHash) +fn check_phf_ascii_trie(items: &LiteMap<&ByteStr, usize>, trie: &ZeroTriePerfectHash) where S: AsRef<[u8]> + ?Sized, { // Check that each item is in the trie for (k, v) in items.iter() { - assert_eq!(trie.get(k), Some(*v)); + assert_eq!(trie.get(k.as_bytes()), Some(*v)); } // Check that some items are not in the trie for s in NON_EXISTENT_STRINGS.iter() { @@ -59,20 +61,20 @@ where } // Check that the iterator returns the contents of the LiteMap // Note: Since the items might not be in order, we collect them into a new LiteMap - let recovered_items: LiteMap<_, _> = trie.iter().collect(); + let recovered_items: LiteMap, usize> = trie.iter().map(|(k, v)| (ByteStr::from_boxed_bytes(k.into_boxed_slice()), v)).collect(); assert_eq!( - items.to_borrowed_keys_values::<[u8], usize, Vec<_>>(), + items.to_borrowed_keys_values::>(), recovered_items.to_borrowed_keys_values() ); } -fn check_phf_bytes_trie(items: &LiteMap<&[u8], usize>, trie: &ZeroTriePerfectHash) +fn check_phf_bytes_trie(items: &LiteMap<&ByteStr, usize>, trie: &ZeroTriePerfectHash) where S: AsRef<[u8]> + ?Sized, { // Check that each item is in the trie for (k, v) in items.iter() { - assert_eq!(trie.get(k), Some(*v), "{k:?}"); + assert_eq!(trie.get(k.as_bytes()), Some(*v), "{k:?}"); } // Check that some items are not in the trie for s in NON_EXISTENT_STRINGS.iter() { @@ -80,19 +82,19 @@ where } // Check that the iterator returns the contents of the LiteMap // Note: Since the items might not be in order, we collect them into a new LiteMap - let recovered_items: LiteMap<_, _> = trie.iter().collect(); + let recovered_items: LiteMap, usize> = trie.iter().map(|(k, v)| (ByteStr::from_boxed_bytes(k.into_boxed_slice()), v)).collect(); assert_eq!( - items.to_borrowed_keys_values::<[u8], usize, Vec<_>>(), + items.to_borrowed_keys_values::>(), recovered_items.to_borrowed_keys_values() ); } #[test] fn test_basic() { - let lm1a: LiteMap<&[u8], usize> = testdata::basic::DATA_ASCII.iter().copied().collect(); - let lm1b: LiteMap<&[u8], usize> = lm1a.to_borrowed_keys(); - let lm2: LiteMap<&[u8], usize> = testdata::basic::DATA_UNICODE.iter().copied().collect(); - let lm3: LiteMap<&[u8], usize> = testdata::basic::DATA_BINARY.iter().copied().collect(); + let lm1a: LiteMap<&ByteStr, usize> = testdata::basic::DATA_ASCII.iter().map(|(k, v)| (ByteStr::from_bytes(k), *v)).collect(); + let lm1b: LiteMap<&ByteStr, usize> = lm1a.to_borrowed_keys(); + let lm2: LiteMap<&ByteStr, usize> = testdata::basic::DATA_UNICODE.iter().map(|(k, v)| (ByteStr::from_bytes(k), *v)).collect(); + let lm3: LiteMap<&ByteStr, usize> = testdata::basic::DATA_BINARY.iter().map(|(k, v)| (ByteStr::from_bytes(k), *v)).collect(); let expected_bytes = testdata::basic::TRIE_ASCII; let trie = ZeroTrieSimpleAscii::try_from(&lm1a).unwrap(); @@ -116,7 +118,7 @@ fn test_basic() { #[test] fn test_empty() { - let trie = ZeroTrieSimpleAscii::try_from(&LiteMap::<&[u8], usize>::new_vec()).unwrap(); + let trie = ZeroTrieSimpleAscii::try_from(&LiteMap::<&ByteStr, usize>::new_vec()).unwrap(); assert_eq!(trie.byte_len(), 0); assert!(trie.is_empty()); assert_eq!(trie.get(b""), None); @@ -125,8 +127,8 @@ fn test_empty() { #[test] fn test_single_empty_value() { - let litemap: LiteMap<&[u8], usize> = [ - (&b""[..], 10), // + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str(""), 10), // ] .into_iter() .collect(); @@ -136,16 +138,15 @@ fn test_single_empty_value() { let expected_bytes = &[0b10001010]; assert_eq!(trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(1, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_single_byte_string() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"x"[..], 10), // + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("x"), 10), // ] .into_iter() .collect(); @@ -156,16 +157,15 @@ fn test_single_byte_string() { let expected_bytes = &[b'x', 0b10001010]; assert_bytes_eq!(2, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(2, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_single_string() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"xyz"[..], 10), // + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("xyz"), 10), // ] .into_iter() .collect(); @@ -178,15 +178,14 @@ fn test_single_string() { let expected_bytes = &[b'x', b'y', b'z', 0b10001010]; assert_bytes_eq!(4, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(4, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_prefix_strings() { - let litemap: LiteMap<&[u8], usize> = [(&b"x"[..], 0), (b"xy", 1)].into_iter().collect(); + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str("x"), 0), (ByteStr::from_str("xy"), 1)].into_iter().collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); assert_eq!(trie.get(b""), None); assert_eq!(trie.get(b"xyz"), None); @@ -194,15 +193,14 @@ fn test_prefix_strings() { let expected_bytes = &[b'x', 0b10000000, b'y', 0b10000001]; assert_bytes_eq!(4, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(4, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_single_byte_branch() { - let litemap: LiteMap<&[u8], usize> = [(&b"x"[..], 0), (b"y", 1)].into_iter().collect(); + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str("x"), 0), (ByteStr::from_str("y"), 1)].into_iter().collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); assert_eq!(trie.get(b""), None); assert_eq!(trie.get(b"xy"), None); @@ -210,15 +208,14 @@ fn test_single_byte_branch() { let expected_bytes = &[0b11000010, b'x', b'y', 1, 0b10000000, 0b10000001]; assert_bytes_eq!(6, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(6, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_multi_byte_branch() { - let litemap: LiteMap<&[u8], usize> = [(&b"axb"[..], 0), (b"ayc", 1)].into_iter().collect(); + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str("axb"), 0), (ByteStr::from_str("ayc"), 1)].into_iter().collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); assert_eq!(trie.get(b""), None); assert_eq!(trie.get(b"a"), None); @@ -230,15 +227,14 @@ fn test_multi_byte_branch() { ]; assert_bytes_eq!(9, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(9, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_linear_varint_values() { - let litemap: LiteMap<&[u8], usize> = [(&b""[..], 100), (b"x", 500), (b"xyz", 5000)] + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str(""), 100), (ByteStr::from_str("x"), 500), (ByteStr::from_str("xyz"), 5000)] .into_iter() .collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); @@ -249,15 +245,14 @@ fn test_linear_varint_values() { let expected_bytes = &[0x90, 0x54, b'x', 0x93, 0x64, b'y', b'z', 0x90, 0x96, 0x78]; assert_bytes_eq!(10, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(10, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_bug() { - let litemap: LiteMap<&[u8], usize> = [(&b"abc"[..], 100), (b"abcd", 500), (b"abcde", 5000)] + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str("abc"), 100), (ByteStr::from_str("abcd"), 500), (ByteStr::from_str("abcde"), 5000)] .into_iter() .collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); @@ -266,16 +261,15 @@ fn test_bug() { assert_eq!(trie.get(b"abCD"), None); check_simple_ascii_trie(&litemap, &trie); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_varint_branch() { let chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - let litemap: LiteMap<&[u8], usize> = (0..chars.len()) - .map(|i| (chars.get(i..i + 1).unwrap().as_bytes(), i)) + let litemap: LiteMap<&ByteStr, usize> = (0..chars.len()) + .map(|i| (ByteStr::from_str(chars.get(i..i + 1).unwrap()), i)) .collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); assert_eq!(trie.get(b""), None); @@ -347,25 +341,24 @@ fn test_varint_branch() { 0x80 | 13, 0x80 | 14, 0x90, 16, 0x90, 10, 0x90, 11, 0x90, 12, 0x90, 29, 0x90, 13, 0x90, 15, 0x90, 14, ]; - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(246, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_below_wide() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"abcdefghijklmnopqrstuvwxyz"[..], 1), - (b"bcdefghijklmnopqrstuvwxyza", 2), - (b"cdefghijklmnopqrstuvwxyzab", 3), - (b"defghijklmnopqrstuvwxyzabc", 4), - (b"efghijklmnopqrstuvwxyzabcd", 5), - (b"fghijklmnopqrstuvwxyzabcde", 6), - (b"ghijklmnopqrstuvwxyzabcdef", 7), - (b"hijklmnopqrstuvwxyzabcdefg", 8), - (b"ijklmnopqrstuvwxyzabcdefgh", 9), - (b"jklmnopqrstuvwxyzabcd", 10), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("abcdefghijklmnopqrstuvwxyz"), 1), + (ByteStr::from_str("bcdefghijklmnopqrstuvwxyza"), 2), + (ByteStr::from_str("cdefghijklmnopqrstuvwxyzab"), 3), + (ByteStr::from_str("defghijklmnopqrstuvwxyzabc"), 4), + (ByteStr::from_str("efghijklmnopqrstuvwxyzabcd"), 5), + (ByteStr::from_str("fghijklmnopqrstuvwxyzabcde"), 6), + (ByteStr::from_str("ghijklmnopqrstuvwxyzabcdef"), 7), + (ByteStr::from_str("hijklmnopqrstuvwxyzabcdefg"), 8), + (ByteStr::from_str("ijklmnopqrstuvwxyzabcdefgh"), 9), + (ByteStr::from_str("jklmnopqrstuvwxyzabcd"), 10), ] .into_iter() .collect(); @@ -417,17 +410,17 @@ fn test_below_wide() { #[test] fn test_at_wide() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"abcdefghijklmnopqrstuvwxyz"[..], 1), - (b"bcdefghijklmnopqrstuvwxyza", 2), - (b"cdefghijklmnopqrstuvwxyzab", 3), - (b"defghijklmnopqrstuvwxyzabc", 4), - (b"efghijklmnopqrstuvwxyzabcd", 5), - (b"fghijklmnopqrstuvwxyzabcde", 6), - (b"ghijklmnopqrstuvwxyzabcdef", 7), - (b"hijklmnopqrstuvwxyzabcdefg", 8), - (b"ijklmnopqrstuvwxyzabcdefgh", 9), - (b"jklmnopqrstuvwxyzabcde", 10), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("abcdefghijklmnopqrstuvwxyz"), 1), + (ByteStr::from_str("bcdefghijklmnopqrstuvwxyza"), 2), + (ByteStr::from_str("cdefghijklmnopqrstuvwxyzab"), 3), + (ByteStr::from_str("defghijklmnopqrstuvwxyzabc"), 4), + (ByteStr::from_str("efghijklmnopqrstuvwxyzabcd"), 5), + (ByteStr::from_str("fghijklmnopqrstuvwxyzabcde"), 6), + (ByteStr::from_str("ghijklmnopqrstuvwxyzabcdef"), 7), + (ByteStr::from_str("hijklmnopqrstuvwxyzabcdefg"), 8), + (ByteStr::from_str("ijklmnopqrstuvwxyzabcdefgh"), 9), + (ByteStr::from_str("jklmnopqrstuvwxyzabcde"), 10), ] .into_iter() .collect(); @@ -481,17 +474,17 @@ fn test_at_wide() { #[test] fn test_at_wide_plus() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"abcdefghijklmnopqrstuvwxyz"[..], 1), - (b"bcdefghijklmnopqrstuvwxyza", 2), - (b"cdefghijklmnopqrstuvwxyzab", 3), - (b"defghijklmnopqrstuvwxyzabc", 4), - (b"efghijklmnopqrstuvwxyzabcd", 5), - (b"fghijklmnopqrstuvwxyzabcde", 6), - (b"ghijklmnopqrstuvwxyzabcdef", 7), - (b"hijklmnopqrstuvwxyzabcdefg", 8), - (b"ijklmnopqrstuvwxyzabcdefgh", 9), - (b"jklmnopqrstuvwxyzabcdef", 10), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("abcdefghijklmnopqrstuvwxyz"), 1), + (ByteStr::from_str("bcdefghijklmnopqrstuvwxyza"), 2), + (ByteStr::from_str("cdefghijklmnopqrstuvwxyzab"), 3), + (ByteStr::from_str("defghijklmnopqrstuvwxyzabc"), 4), + (ByteStr::from_str("efghijklmnopqrstuvwxyzabcd"), 5), + (ByteStr::from_str("fghijklmnopqrstuvwxyzabcde"), 6), + (ByteStr::from_str("ghijklmnopqrstuvwxyzabcdef"), 7), + (ByteStr::from_str("hijklmnopqrstuvwxyzabcdefg"), 8), + (ByteStr::from_str("ijklmnopqrstuvwxyzabcdefgh"), 9), + (ByteStr::from_str("jklmnopqrstuvwxyzabcdef"), 10), ] .into_iter() .collect(); @@ -545,16 +538,16 @@ fn test_at_wide_plus() { #[test] fn test_everything() { - let litemap: LiteMap<&[u8], usize> = [ - (&b""[..], 0), - (b"axb", 100), - (b"ayc", 2), - (b"azd", 3), - (b"bxe", 4), - (b"bxefg", 500), - (b"bxefh", 6), - (b"bxei", 7), - (b"bxeikl", 8), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str(""), 0), + (ByteStr::from_str("axb"), 100), + (ByteStr::from_str("ayc"), 2), + (ByteStr::from_str("azd"), 3), + (ByteStr::from_str("bxe"), 4), + (ByteStr::from_str("bxefg"), 500), + (ByteStr::from_str("bxefh"), 6), + (ByteStr::from_str("bxei"), 7), + (ByteStr::from_str("bxeikl"), 8), ] .into_iter() .collect(); @@ -643,24 +636,23 @@ fn test_everything() { b'l', // 0b10001000, // value 8 ]; - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(36, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); - let zhm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 73); - let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 63); - let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 146); - let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 136); } @@ -675,19 +667,19 @@ macro_rules! utf8_byte { #[test] fn test_non_ascii() { - let litemap: LiteMap<&[u8], usize> = [ - ("".as_bytes(), 0), - ("axb".as_bytes(), 100), - ("ayc".as_bytes(), 2), - ("azd".as_bytes(), 3), - ("bxe".as_bytes(), 4), - ("bxefg".as_bytes(), 500), - ("bxefh".as_bytes(), 6), - ("bxei".as_bytes(), 7), - ("bxeikl".as_bytes(), 8), - ("bxeiklmΚαλημέρααα".as_bytes(), 9), - ("bxeiklmαnλo".as_bytes(), 10), - ("bxeiklmη".as_bytes(), 11), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str(""), 0), + (ByteStr::from_str("axb"), 100), + (ByteStr::from_str("ayc"), 2), + (ByteStr::from_str("azd"), 3), + (ByteStr::from_str("bxe"), 4), + (ByteStr::from_str("bxefg"), 500), + (ByteStr::from_str("bxefh"), 6), + (ByteStr::from_str("bxei"), 7), + (ByteStr::from_str("bxeikl"), 8), + (ByteStr::from_str("bxeiklmΚαλημέρααα"), 9), + (ByteStr::from_str("bxeiklmαnλo"), 10), + (ByteStr::from_str("bxeiklmη"), 11), ] .into_iter() .collect(); @@ -734,11 +726,20 @@ fn test_non_ascii() { 0b10100001, // span of length 1 utf8_byte!('Κ', 0), // NOTE: all three letters have the same lead byte 0b11000011, // branch of 3 - utf8_byte!('Κ', 1), utf8_byte!('α', 1), utf8_byte!('η', 1), - 21, - 27, + utf8_byte!('Κ', 1), + 6, + 7, + // 21, + // 27, + b'n', + 0b10100010, // span of length 2 + utf8_byte!('λ', 0), + utf8_byte!('λ', 1), + b'o', + 0b10001010, // value 10 + 0b10001011, // value 11 0b10110000, // span of length 18 (lead) 0b00000010, // span of length 18 (trail) utf8_byte!('α', 0), @@ -760,13 +761,6 @@ fn test_non_ascii() { utf8_byte!('α', 0), utf8_byte!('α', 1), 0b10001001, // value 9 - b'n', - 0b10100010, // span of length 2 - utf8_byte!('λ', 0), - utf8_byte!('λ', 1), - b'o', - 0b10001010, // value 10 - 0b10001011, // value 11 ]; let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(73, trie_phf.as_bytes(), expected_bytes); @@ -776,15 +770,15 @@ fn test_non_ascii() { #[test] fn test_max_branch() { // Evaluate a branch with all 256 possible children - let mut litemap: LiteMap<&[u8], usize> = LiteMap::new_vec(); + let mut litemap: LiteMap<&ByteStr, usize> = LiteMap::new_vec(); let all_bytes: Vec = (u8::MIN..=u8::MAX).collect(); assert_eq!(all_bytes.len(), 256); let all_bytes_prefixed: Vec<[u8; 2]> = (u8::MIN..=u8::MAX).map(|x| [b'\0', x]).collect(); for b in all_bytes.iter() { - litemap.insert(core::slice::from_ref(b), *b as usize); + litemap.insert(ByteStr::from_bytes(core::slice::from_ref(b)), *b as usize); } for s in all_bytes_prefixed.iter() { - litemap.insert(s, s[1] as usize); + litemap.insert(ByteStr::from_bytes(s), s[1] as usize); } let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_eq!(trie_phf.byte_len(), 3042); @@ -799,24 +793,23 @@ fn test_short_subtags_10pct() { assert_eq!(trie.byte_len(), 1050); check_simple_ascii_trie(&litemap, &trie); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_eq!(trie_phf.byte_len(), 1100); check_phf_ascii_trie(&litemap, &trie_phf); - let zhm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 1329); - let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 1328); - let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 2835); - let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 2834); } @@ -829,24 +822,23 @@ fn test_short_subtags() { assert_eq!(trie.byte_len(), 8793); check_simple_ascii_trie(&litemap, &trie); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_eq!(trie_phf.byte_len(), 9400); check_phf_ascii_trie(&litemap, &trie_phf); - let zm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zm).unwrap(); assert_eq!(zhm_buf.len(), 15180); - let zm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zm).unwrap(); assert_eq!(zhm_buf.len(), 13302); - let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 30198); - let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 28320); } diff --git a/utils/zerotrie/tests/data/data.rs b/utils/zerotrie/tests/data/data.rs index 2db9b0f3b42..0d7a63640d5 100644 --- a/utils/zerotrie/tests/data/data.rs +++ b/utils/zerotrie/tests/data/data.rs @@ -19,13 +19,12 @@ const fn single_byte_branch_equal(x: u8) -> u8 { use single_byte_branch_equal as single_byte_short_match; #[allow(dead_code)] -pub fn strings_to_litemap<'a>(strings: &[&'a str]) -> LiteMap<&'a [u8], usize> { +pub fn strings_to_litemap<'a>(strings: &[&'a str]) -> LiteMap<&'a ByteStr, usize> { strings .iter() .copied() - .map(|x| x.as_bytes()) .enumerate() - .map(|(i, s)| (s, i)) + .map(|(i, s)| (ByteStr::from_str(s), i)) .collect() } diff --git a/utils/zerotrie/tests/ignorecase_test.rs b/utils/zerotrie/tests/ignorecase_test.rs index fb73ef7cc33..ff9c38caa0c 100644 --- a/utils/zerotrie/tests/ignorecase_test.rs +++ b/utils/zerotrie/tests/ignorecase_test.rs @@ -3,8 +3,10 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use zerotrie::ZeroAsciiIgnoreCaseTrie; +use zerotrie::ByteStr; mod testdata { + use zerotrie::ByteStr; include!("data/data.rs"); } @@ -18,18 +20,20 @@ fn test_ignore_case_coverage() { ZeroAsciiIgnoreCaseTrie::try_from(&litemap).unwrap(); let trie = litemap .iter() - .map(|(k, v)| (*k, *v)) + .map(|(k, v)| (k.as_bytes(), *v)) .collect::>>(); // Test lookup for (k, v) in litemap.iter() { - assert_eq!(trie.get(k), Some(*v), "normal: {k:?}"); + assert_eq!(trie.get(k.as_bytes()), Some(*v), "normal: {k:?}"); let k_upper = k + .as_bytes() .iter() .map(|c| c.to_ascii_uppercase()) .collect::>(); assert_eq!(trie.get(k_upper), Some(*v), "upper: {k:?}"); let k_lower = k + .as_bytes() .iter() .map(|c| c.to_ascii_lowercase()) .collect::>(); @@ -40,7 +44,7 @@ fn test_ignore_case_coverage() { let problematic_strs = &["A", "ab", "abc", "aBcd", "aBcgHi"]; for problematic_str in problematic_strs { let mut litemap = litemap.clone(); - litemap.insert(problematic_str.as_bytes(), 100); + litemap.insert(ByteStr::from_str(problematic_str), 100); ZeroAsciiIgnoreCaseTrie::try_from(&litemap).expect_err(problematic_str); } } diff --git a/utils/zerotrie/tests/locale_aux_test.rs b/utils/zerotrie/tests/locale_aux_test.rs index eea27af7842..177d39b19c3 100644 --- a/utils/zerotrie/tests/locale_aux_test.rs +++ b/utils/zerotrie/tests/locale_aux_test.rs @@ -10,8 +10,10 @@ use writeable::Writeable; use zerotrie::ZeroTriePerfectHash; use zerotrie::ZeroTrieSimpleAscii; use zerovec::VarZeroVec; +use zerotrie::ByteStr; mod testdata { + use zerotrie::ByteStr; include!("data/data.rs"); } @@ -91,7 +93,7 @@ fn test_aux_split() { let mut total_simpleascii_len = 0; let mut total_perfecthash_len = 0; let mut total_vzv_len = 0; - let mut unique_locales = BTreeSet::new(); + let mut unique_locales = BTreeSet::>::new(); for private in aux_keys.iter() { let current_locales: Vec = locales .iter() @@ -102,10 +104,10 @@ fn test_aux_split() { l }) .collect(); - let litemap: LiteMap, usize> = current_locales + let litemap: LiteMap, usize> = current_locales .iter() .map(|l| { - (l.write_to_string().into_owned().into_bytes(), { + (ByteStr::from_boxed_bytes(l.write_to_string().into_owned().into_bytes().into_boxed_slice()), { cumulative_index += 1; cumulative_index - 1 }) @@ -118,8 +120,8 @@ fn test_aux_split() { let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap(); total_perfecthash_len += trie.byte_len(); - for k in litemap.iter_keys() { - unique_locales.insert(k.clone()); + for (k, _) in litemap.into_iter() { + unique_locales.insert(k); } let strs: Vec = current_locales