forked from unicode-org/icu4x
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ZeroTrie: Add some tests for locale with aux key
- Loading branch information
Showing
4 changed files
with
157 additions
and
0 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use icu_locid::extensions::private::Private; | ||
use icu_locid::Locale; | ||
use litemap::LiteMap; | ||
use std::collections::BTreeSet; | ||
use writeable::Writeable; | ||
use zerotrie::ZeroTriePerfectHash; | ||
use zerotrie::ZeroTrieSimpleAscii; | ||
|
||
mod testdata { | ||
include!("data/data.rs"); | ||
} | ||
|
||
use testdata::locales_with_aux::{NUM_UNIQUE_BLOBS, STRINGS}; | ||
use testdata::strings_to_litemap; | ||
|
||
#[test] | ||
fn test_combined() { | ||
let litemap = strings_to_litemap(STRINGS); | ||
|
||
let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap(); | ||
|
||
// Lookup table size: | ||
assert_eq!(trie.byte_len(), 5104); | ||
|
||
// Size including pointer array: | ||
assert_eq!( | ||
trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(), | ||
8392 | ||
); | ||
|
||
let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap(); | ||
|
||
// Lookup table size: | ||
assert_eq!(trie.byte_len(), 5157); | ||
|
||
// Size including pointer array: | ||
assert_eq!( | ||
trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(), | ||
8445 | ||
); | ||
|
||
let total_str_len = litemap.iter_keys().map(|k| k.len()).sum::<usize>(); | ||
assert_eq!(total_str_len, 8115); | ||
|
||
// Lookup table size: | ||
assert_eq!( | ||
total_str_len + STRINGS.len() * core::mem::size_of::<usize>(), | ||
16531 | ||
); | ||
|
||
// Size including pointer array: (2x for the lookup array and value array) | ||
assert_eq!( | ||
total_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(), | ||
24947 | ||
); | ||
|
||
// Size including u16 pointer array: | ||
assert_eq!( | ||
total_str_len | ||
+ STRINGS.len() * core::mem::size_of::<usize>() | ||
+ STRINGS.len() * core::mem::size_of::<u16>() | ||
+ NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(), | ||
21923 | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_aux_split() { | ||
let locales: Vec<Locale> = STRINGS.iter().map(|s| s.parse().unwrap()).collect(); | ||
|
||
let aux_keys: BTreeSet<&Private> = locales.iter().map(|l| &l.extensions.private).collect(); | ||
assert_eq!(aux_keys.len(), 6); | ||
|
||
let mut cumulative_index = 0; | ||
let mut total_simpleascii_len = 0; | ||
let mut total_perfecthash_len = 0; | ||
let mut unique_locales = BTreeSet::new(); | ||
for private in aux_keys.iter() { | ||
let current_locales: Vec<Locale> = locales | ||
.iter() | ||
.filter(|l| l.extensions.private == **private) | ||
.map(|l| { | ||
let mut l = l.clone(); | ||
l.extensions.private = Private::default(); | ||
l | ||
}) | ||
.collect(); | ||
let litemap: LiteMap<Vec<u8>, usize> = current_locales | ||
.iter() | ||
.map(|l| { | ||
(l.write_to_string().into_owned().into_bytes(), { | ||
cumulative_index += 1; | ||
cumulative_index - 1 | ||
}) | ||
}) | ||
.collect(); | ||
|
||
let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap(); | ||
total_simpleascii_len += trie.byte_len(); | ||
|
||
let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap(); | ||
total_perfecthash_len += trie.byte_len(); | ||
|
||
for k in litemap.iter_keys() { | ||
unique_locales.insert(k.clone()); | ||
} | ||
} | ||
assert_eq!(cumulative_index, locales.len()); | ||
|
||
assert_eq!(total_simpleascii_len, 5098); | ||
assert_eq!(total_perfecthash_len, 5302); | ||
|
||
let total_unique_locale_str_len = unique_locales.iter().map(|v| v.len()).sum::<usize>(); | ||
assert_eq!(total_unique_locale_str_len, 945); | ||
|
||
// Size including pointer array: | ||
assert_eq!( | ||
total_simpleascii_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(), | ||
8386 | ||
); | ||
assert_eq!( | ||
total_perfecthash_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(), | ||
8590 | ||
); | ||
// 2x for the lookup arrays and value arrays | ||
assert_eq!( | ||
total_unique_locale_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(), | ||
17777 | ||
); | ||
|
||
// Size including u16 pointer array: | ||
assert_eq!( | ||
total_unique_locale_str_len | ||
+ STRINGS.len() * core::mem::size_of::<usize>() | ||
+ STRINGS.len() * core::mem::size_of::<u16>() | ||
+ NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(), | ||
14753 | ||
); | ||
} |