Skip to content

Commit

Permalink
Fix get_parameterized; migrate tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc committed Nov 24, 2024
1 parent efcbf77 commit a27bf1f
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 43 deletions.
6 changes: 6 additions & 0 deletions utils/zerotrie/src/builder/bytestr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ impl AsRef<ByteStr> for ByteStr {
}
}

impl<'a> From<&'a str> for &'a ByteStr {
fn from(other: &'a str) -> Self {
ByteStr::from_str(other)
}
}

impl fmt::Debug for ByteStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
if let Ok(s) = core::str::from_utf8(self.as_bytes()) {
Expand Down
6 changes: 5 additions & 1 deletion utils/zerotrie/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,11 @@ pub(crate) fn get_parameterized<T: ZeroTrieWithOptions + ?Sized>(
if matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly) || x < 16 {
// binary search
(search, trie) = trie.debug_split_at(x);
let bsearch_result = search.binary_search(c);
let bsearch_result = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) {
search.binary_search_by(|p| comparison::cmpi(*p, *c))
} else {
search.binary_search_by(|p| comparison::cmp(*p, *c))
};
i = bsearch_result.ok()?;
} else {
// phf
Expand Down
45 changes: 25 additions & 20 deletions utils/zerotrie/src/zerotrie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ use litemap::LiteMap;
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ZeroTrie;
/// use zerotrie::ByteStr;
///
/// let mut map = LiteMap::<&[u8], usize>::new_vec();
/// map.insert("foo".as_bytes(), 1);
/// map.insert("bar".as_bytes(), 2);
/// map.insert("bazzoo".as_bytes(), 3);
/// let mut map = LiteMap::<&ByteStr, usize>::new_vec();
/// map.insert("foo".into(), 1);
/// map.insert("bar".into(), 2);
/// map.insert("bazzoo".into(), 3);
///
/// let trie = ZeroTrie::try_from(&map)?;
///
Expand Down Expand Up @@ -82,12 +83,13 @@ pub(crate) enum ZeroTrieFlavor<Store> {
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ByteStr;
/// use zerotrie::ZeroTrieSimpleAscii;
///
/// let mut map = LiteMap::new_vec();
/// map.insert(&b"foo"[..], 1);
/// map.insert(b"bar", 2);
/// map.insert(b"bazzoo", 3);
/// let mut map = LiteMap::<&ByteStr, _>::new_vec();
/// map.insert("foo".into(), 1);
/// map.insert("bar".into(), 2);
/// map.insert("bazzoo".into(), 3);
///
/// let trie = ZeroTrieSimpleAscii::try_from(&map)?;
///
Expand Down Expand Up @@ -134,12 +136,13 @@ impl<Store> ZeroTrieSimpleAscii<Store> {
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ByteStr;
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
///
/// let mut map = LiteMap::new_vec();
/// map.insert(&b"foo"[..], 1);
/// map.insert(b"Bar", 2);
/// map.insert(b"Bazzoo", 3);
/// let mut map = LiteMap::<&ByteStr, _>::new_vec();
/// map.insert("foo".into(), 1);
/// map.insert("Bar".into(), 2);
/// map.insert("Bazzoo".into(), 3);
///
/// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
///
Expand All @@ -156,14 +159,15 @@ impl<Store> ZeroTrieSimpleAscii<Store> {
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ByteStr;
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
///
/// let mut map = LiteMap::new_vec();
/// map.insert(&b"bar"[..], 1);
/// let mut map = LiteMap::<&ByteStr, _>::new_vec();
/// map.insert("bar".into(), 1);
/// // OK: 'r' and 'Z' are different letters
/// map.insert(b"baZ", 2);
/// map.insert("baZ".into(), 2);
/// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position
/// map.insert(b"baR", 2);
/// map.insert("baR".into(), 2);
///
/// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!");
/// ```
Expand All @@ -187,12 +191,13 @@ pub struct ZeroAsciiIgnoreCaseTrie<Store: ?Sized> {
///
/// ```
/// use litemap::LiteMap;
/// use zerotrie::ByteStr;
/// use zerotrie::ZeroTriePerfectHash;
///
/// let mut map = LiteMap::<&[u8], usize>::new_vec();
/// map.insert("foo".as_bytes(), 1);
/// map.insert("bår".as_bytes(), 2);
/// map.insert("båzzøø".as_bytes(), 3);
/// let mut map = LiteMap::<&ByteStr, usize>::new_vec();
/// map.insert("foo".into(), 1);
/// map.insert("bår".into(), 2);
/// map.insert("båzzøø".into(), 3);
///
/// let trie = ZeroTriePerfectHash::try_from(&map)?;
///
Expand Down
43 changes: 21 additions & 22 deletions utils/zerotrie/tests/builder_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,29 +281,28 @@ fn test_varint_branch() {
0b11100000, // branch varint lead
0x14, // branch varint trail
// search array:
b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J',
b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
b'U', b'V', b'W', b'X', b'Y', b'Z',
b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j',
b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
b'u', b'v', b'w', b'x', b'y', b'z',
b'A', b'a', b'B', b'b', b'C', b'c', b'D', b'd', b'E', b'e',
b'F', b'f', b'G', b'g', b'H', b'h', b'I', b'i', b'J', b'j',
b'K', b'k', b'L', b'l', b'M', b'm', b'N', b'n', b'O', b'o',
b'P', b'p', b'Q', b'q', b'R', b'r', b'S', b's', b'T', b't',
b'U', b'u', b'V', b'v', b'W', b'w', b'X', b'x', b'Y', b'y',
b'Z', b'z',
// offset array:
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20,
22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52,
54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84,
86,
// single-byte values:
0x80, (0x80 | 1), (0x80 | 2), (0x80 | 3), (0x80 | 4),
(0x80 | 5), (0x80 | 6), (0x80 | 7), (0x80 | 8), (0x80 | 9),
(0x80 | 10), (0x80 | 11), (0x80 | 12), (0x80 | 13), (0x80 | 14),
(0x80 | 15),
// multi-byte values:
0x90, 0, 0x90, 1, 0x90, 2, 0x90, 3, 0x90, 4, 0x90, 5,
0x90, 6, 0x90, 7, 0x90, 8, 0x90, 9, 0x90, 10, 0x90, 11,
0x90, 12, 0x90, 13, 0x90, 14, 0x90, 15, 0x90, 16, 0x90, 17,
0x90, 18, 0x90, 19, 0x90, 20, 0x90, 21, 0x90, 22, 0x90, 23,
0x90, 24, 0x90, 25, 0x90, 26, 0x90, 27, 0x90, 28, 0x90, 29,
0x90, 30, 0x90, 31, 0x90, 32, 0x90, 33, 0x90, 34, 0x90, 35,
1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24,
25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46,
48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
78, 80, 82, 84, 86,
// values (mix of single-byte and multi-byte):
(0x80 | 0), 0x90, 10, (0x80 | 1), 0x90, 11, (0x80 | 2), 0x90, 12,
(0x80 | 3), 0x90, 13, (0x80 | 4), 0x90, 14, (0x80 | 5), 0x90, 15,
(0x80 | 6), 0x90, 16, (0x80 | 7), 0x90, 17, (0x80 | 8), 0x90, 18,
(0x80 | 9), 0x90, 19, (0x80 | 10), 0x90, 20, (0x80 | 11), 0x90, 21,
(0x80 | 12), 0x90, 22, (0x80 | 13), 0x90, 23, (0x80 | 14), 0x90, 24,
(0x80 | 15), 0x90, 25,
0x90, 0, 0x90, 26, 0x90, 1, 0x90, 27, 0x90, 2, 0x90, 28,
0x90, 3, 0x90, 29, 0x90, 4, 0x90, 30, 0x90, 5, 0x90, 31,
0x90, 6, 0x90, 32, 0x90, 7, 0x90, 33, 0x90, 8, 0x90, 34,
0x90, 9, 0x90, 35,
];
assert_bytes_eq!(193, trie.as_bytes(), expected_bytes);

Expand Down

0 comments on commit a27bf1f

Please sign in to comment.