From a27bf1f20dd07bdb42a131b1f695acf1e5575669 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Sat, 23 Nov 2024 20:38:43 -0800 Subject: [PATCH] Fix get_parameterized; migrate tests --- utils/zerotrie/src/builder/bytestr.rs | 6 ++++ utils/zerotrie/src/reader.rs | 6 +++- utils/zerotrie/src/zerotrie.rs | 45 +++++++++++++++------------ utils/zerotrie/tests/builder_test.rs | 43 +++++++++++++------------ 4 files changed, 57 insertions(+), 43 deletions(-) diff --git a/utils/zerotrie/src/builder/bytestr.rs b/utils/zerotrie/src/builder/bytestr.rs index 3a098154729..80ba4c3e256 100644 --- a/utils/zerotrie/src/builder/bytestr.rs +++ b/utils/zerotrie/src/builder/bytestr.rs @@ -140,6 +140,12 @@ impl AsRef for ByteStr { } } +impl<'a> From<&'a str> for &'a ByteStr { + fn from(other: &'a str) -> Self { + ByteStr::from_str(other) + } +} + impl fmt::Debug for ByteStr { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { if let Ok(s) = core::str::from_utf8(self.as_bytes()) { diff --git a/utils/zerotrie/src/reader.rs b/utils/zerotrie/src/reader.rs index a22952a01fa..400cca5ddbd 100644 --- a/utils/zerotrie/src/reader.rs +++ b/utils/zerotrie/src/reader.rs @@ -368,7 +368,11 @@ pub(crate) fn get_parameterized( if matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly) || x < 16 { // binary search (search, trie) = trie.debug_split_at(x); - let bsearch_result = search.binary_search(c); + let bsearch_result = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) { + search.binary_search_by(|p| comparison::cmpi(*p, *c)) + } else { + search.binary_search_by(|p| comparison::cmp(*p, *c)) + }; i = bsearch_result.ok()?; } else { // phf diff --git a/utils/zerotrie/src/zerotrie.rs b/utils/zerotrie/src/zerotrie.rs index fdc577b271e..b113de8ca65 100644 --- a/utils/zerotrie/src/zerotrie.rs +++ b/utils/zerotrie/src/zerotrie.rs @@ -47,11 +47,12 @@ use litemap::LiteMap; /// ``` /// use litemap::LiteMap; /// use zerotrie::ZeroTrie; +/// use zerotrie::ByteStr; /// -/// let mut map = LiteMap::<&[u8], usize>::new_vec(); -/// map.insert("foo".as_bytes(), 1); -/// map.insert("bar".as_bytes(), 2); -/// map.insert("bazzoo".as_bytes(), 3); +/// let mut map = LiteMap::<&ByteStr, usize>::new_vec(); +/// map.insert("foo".into(), 1); +/// map.insert("bar".into(), 2); +/// map.insert("bazzoo".into(), 3); /// /// let trie = ZeroTrie::try_from(&map)?; /// @@ -82,12 +83,13 @@ pub(crate) enum ZeroTrieFlavor { /// /// ``` /// use litemap::LiteMap; +/// use zerotrie::ByteStr; /// use zerotrie::ZeroTrieSimpleAscii; /// -/// let mut map = LiteMap::new_vec(); -/// map.insert(&b"foo"[..], 1); -/// map.insert(b"bar", 2); -/// map.insert(b"bazzoo", 3); +/// let mut map = LiteMap::<&ByteStr, _>::new_vec(); +/// map.insert("foo".into(), 1); +/// map.insert("bar".into(), 2); +/// map.insert("bazzoo".into(), 3); /// /// let trie = ZeroTrieSimpleAscii::try_from(&map)?; /// @@ -134,12 +136,13 @@ impl ZeroTrieSimpleAscii { /// /// ``` /// use litemap::LiteMap; +/// use zerotrie::ByteStr; /// use zerotrie::ZeroAsciiIgnoreCaseTrie; /// -/// let mut map = LiteMap::new_vec(); -/// map.insert(&b"foo"[..], 1); -/// map.insert(b"Bar", 2); -/// map.insert(b"Bazzoo", 3); +/// let mut map = LiteMap::<&ByteStr, _>::new_vec(); +/// map.insert("foo".into(), 1); +/// map.insert("Bar".into(), 2); +/// map.insert("Bazzoo".into(), 3); /// /// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?; /// @@ -156,14 +159,15 @@ impl ZeroTrieSimpleAscii { /// /// ``` /// use litemap::LiteMap; +/// use zerotrie::ByteStr; /// use zerotrie::ZeroAsciiIgnoreCaseTrie; /// -/// let mut map = LiteMap::new_vec(); -/// map.insert(&b"bar"[..], 1); +/// let mut map = LiteMap::<&ByteStr, _>::new_vec(); +/// map.insert("bar".into(), 1); /// // OK: 'r' and 'Z' are different letters -/// map.insert(b"baZ", 2); +/// map.insert("baZ".into(), 2); /// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position -/// map.insert(b"baR", 2); +/// map.insert("baR".into(), 2); /// /// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!"); /// ``` @@ -187,12 +191,13 @@ pub struct ZeroAsciiIgnoreCaseTrie { /// /// ``` /// use litemap::LiteMap; +/// use zerotrie::ByteStr; /// use zerotrie::ZeroTriePerfectHash; /// -/// let mut map = LiteMap::<&[u8], usize>::new_vec(); -/// map.insert("foo".as_bytes(), 1); -/// map.insert("bår".as_bytes(), 2); -/// map.insert("båzzøø".as_bytes(), 3); +/// let mut map = LiteMap::<&ByteStr, usize>::new_vec(); +/// map.insert("foo".into(), 1); +/// map.insert("bår".into(), 2); +/// map.insert("båzzøø".into(), 3); /// /// let trie = ZeroTriePerfectHash::try_from(&map)?; /// diff --git a/utils/zerotrie/tests/builder_test.rs b/utils/zerotrie/tests/builder_test.rs index 7aa6fa03e03..e8e22d69dfb 100644 --- a/utils/zerotrie/tests/builder_test.rs +++ b/utils/zerotrie/tests/builder_test.rs @@ -281,29 +281,28 @@ fn test_varint_branch() { 0b11100000, // branch varint lead 0x14, // branch varint trail // search array: - b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', - b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', - b'U', b'V', b'W', b'X', b'Y', b'Z', - b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', - b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', - b'u', b'v', b'w', b'x', b'y', b'z', + b'A', b'a', b'B', b'b', b'C', b'c', b'D', b'd', b'E', b'e', + b'F', b'f', b'G', b'g', b'H', b'h', b'I', b'i', b'J', b'j', + b'K', b'k', b'L', b'l', b'M', b'm', b'N', b'n', b'O', b'o', + b'P', b'p', b'Q', b'q', b'R', b'r', b'S', b's', b'T', b't', + b'U', b'u', b'V', b'v', b'W', b'w', b'X', b'x', b'Y', b'y', + b'Z', b'z', // offset array: - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, - 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, - 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, - 86, - // single-byte values: - 0x80, (0x80 | 1), (0x80 | 2), (0x80 | 3), (0x80 | 4), - (0x80 | 5), (0x80 | 6), (0x80 | 7), (0x80 | 8), (0x80 | 9), - (0x80 | 10), (0x80 | 11), (0x80 | 12), (0x80 | 13), (0x80 | 14), - (0x80 | 15), - // multi-byte values: - 0x90, 0, 0x90, 1, 0x90, 2, 0x90, 3, 0x90, 4, 0x90, 5, - 0x90, 6, 0x90, 7, 0x90, 8, 0x90, 9, 0x90, 10, 0x90, 11, - 0x90, 12, 0x90, 13, 0x90, 14, 0x90, 15, 0x90, 16, 0x90, 17, - 0x90, 18, 0x90, 19, 0x90, 20, 0x90, 21, 0x90, 22, 0x90, 23, - 0x90, 24, 0x90, 25, 0x90, 26, 0x90, 27, 0x90, 28, 0x90, 29, - 0x90, 30, 0x90, 31, 0x90, 32, 0x90, 33, 0x90, 34, 0x90, 35, + 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24, + 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46, + 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, + // values (mix of single-byte and multi-byte): + (0x80 | 0), 0x90, 10, (0x80 | 1), 0x90, 11, (0x80 | 2), 0x90, 12, + (0x80 | 3), 0x90, 13, (0x80 | 4), 0x90, 14, (0x80 | 5), 0x90, 15, + (0x80 | 6), 0x90, 16, (0x80 | 7), 0x90, 17, (0x80 | 8), 0x90, 18, + (0x80 | 9), 0x90, 19, (0x80 | 10), 0x90, 20, (0x80 | 11), 0x90, 21, + (0x80 | 12), 0x90, 22, (0x80 | 13), 0x90, 23, (0x80 | 14), 0x90, 24, + (0x80 | 15), 0x90, 25, + 0x90, 0, 0x90, 26, 0x90, 1, 0x90, 27, 0x90, 2, 0x90, 28, + 0x90, 3, 0x90, 29, 0x90, 4, 0x90, 30, 0x90, 5, 0x90, 31, + 0x90, 6, 0x90, 32, 0x90, 7, 0x90, 33, 0x90, 8, 0x90, 34, + 0x90, 9, 0x90, 35, ]; assert_bytes_eq!(193, trie.as_bytes(), expected_bytes);