Skip to content

Commit

Permalink
ZeroTrie: Add some tests for locale with aux key
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc committed Oct 22, 2023
1 parent f3e3760 commit 3c6b7e4
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions experimental/zerotrie/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ rand_pcg = "0.3"
serde = { version = "1.0", default-features = false }
serde_json = "1.0"
zerovec = { path = "../../utils/zerovec", features = ["serde", "hashmap"] }
icu_locid = { path = "../../components/locid" }
writeable = { path = "../../utils/writeable" }

[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
criterion = "0.4"
Expand Down Expand Up @@ -71,3 +73,7 @@ required-features = ["alloc", "litemap"]
[[test]]
name = "builder_test"
required-features = ["alloc", "litemap"]

[[test]]
name = "locale_aux_test"
required-features = ["alloc", "litemap"]
6 changes: 6 additions & 0 deletions experimental/zerotrie/tests/data/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2202,3 +2202,9 @@ pub mod short_subtags_10pct {
"zia",
];
}

#[allow(dead_code)]
pub mod locales_with_aux {
pub static NUM_UNIQUE_BLOBS: usize = 411;
pub static STRINGS: &[&str] = &["af-x-3", "af-x-3s", "af-x-4", "af-x-4s", "af-x-5", "af-x-5s", "am-x-3", "am-x-3s", "am-x-4", "am-x-4s", "am-x-5", "am-x-5s", "ar-DZ-x-3", "ar-DZ-x-3s", "ar-DZ-x-4", "ar-DZ-x-4s", "ar-DZ-x-5", "ar-DZ-x-5s", "ar-IQ-x-3", "ar-IQ-x-3s", "ar-IQ-x-4", "ar-IQ-x-4s", "ar-IQ-x-5", "ar-IQ-x-5s", "ar-JO-x-3", "ar-JO-x-3s", "ar-JO-x-4", "ar-JO-x-4s", "ar-JO-x-5", "ar-JO-x-5s", "ar-LB-x-3", "ar-LB-x-3s", "ar-LB-x-4", "ar-LB-x-4s", "ar-LB-x-5", "ar-LB-x-5s", "ar-MA-x-3", "ar-MA-x-3s", "ar-MA-x-4", "ar-MA-x-4s", "ar-MA-x-5", "ar-MA-x-5s", "ar-MR-x-3", "ar-MR-x-3s", "ar-MR-x-4", "ar-MR-x-4s", "ar-MR-x-5", "ar-MR-x-5s", "ar-PS-x-3", "ar-PS-x-3s", "ar-PS-x-4", "ar-PS-x-4s", "ar-PS-x-5", "ar-PS-x-5s", "ar-SY-x-3", "ar-SY-x-3s", "ar-SY-x-4", "ar-SY-x-4s", "ar-SY-x-5", "ar-SY-x-5s", "ar-TN-x-3", "ar-TN-x-3s", "ar-TN-x-4", "ar-TN-x-4s", "ar-TN-x-5", "ar-TN-x-5s", "ar-x-3", "ar-x-3s", "ar-x-4", "ar-x-4s", "ar-x-5", "ar-x-5s", "as-x-3", "as-x-3s", "as-x-4", "as-x-4s", "as-x-5", "as-x-5s", "ast-x-3", "ast-x-3s", "ast-x-4", "ast-x-4s", "ast-x-5", "ast-x-5s", "az-x-3", "az-x-3s", "az-x-5", "az-x-5s", "be-x-3", "be-x-3s", "be-x-4", "be-x-4s", "be-x-5", "be-x-5s", "bg-x-3", "bg-x-3s", "bg-x-4", "bg-x-4s", "bg-x-5", "bg-x-5s", "bgc-x-3", "bgc-x-3s", "bgc-x-5", "bgc-x-5s", "bho-x-3", "bho-x-3s", "bho-x-5", "bho-x-5s", "bn-IN-x-3", "bn-IN-x-3s", "bn-IN-x-4", "bn-IN-x-4s", "bn-x-3", "bn-x-3s", "bn-x-4", "bn-x-4s", "bn-x-5", "bn-x-5s", "br-x-3", "br-x-3s", "br-x-4", "br-x-4s", "br-x-5", "br-x-5s", "brx-x-3", "brx-x-3s", "brx-x-4", "brx-x-4s", "brx-x-5", "brx-x-5s", "bs-Cyrl-x-3", "bs-Cyrl-x-3s", "bs-Cyrl-x-4", "bs-Cyrl-x-4s", "bs-Cyrl-x-5", "bs-Cyrl-x-5s", "bs-x-3", "bs-x-3s", "bs-x-4", "bs-x-4s", "bs-x-5", "bs-x-5s", "ca-x-3", "ca-x-3s", "ca-x-4", "ca-x-4s", "ca-x-5", "ca-x-5s", "ceb-x-3", "ceb-x-3s", "ceb-x-4", "ceb-x-4s", "ceb-x-5", "ceb-x-5s", "chr-x-3", "chr-x-3s", "chr-x-4", "chr-x-4s", "chr-x-5", "chr-x-5s", "cs-x-3", "cs-x-3s", "cs-x-5", "cs-x-5s", "cv-x-3", "cv-x-3s", "cv-x-4", "cv-x-4s", "cv-x-5", "cv-x-5s", "cy-x-3", "cy-x-3s", "cy-x-4", "cy-x-4s", "cy-x-5", "cy-x-5s", "da-x-3", "da-x-3s", "da-x-4", "da-x-4s", "da-x-5", "da-x-5s", "de-AT-x-3", "de-AT-x-3s", "de-AT-x-5", "de-AT-x-5s", "de-IT-x-3", "de-IT-x-3s", "de-IT-x-5", "de-IT-x-5s", "de-x-3", "de-x-3s", "de-x-4", "de-x-4s", "de-x-5", "de-x-5s", "doi-x-3", "doi-x-3s", "doi-x-4", "doi-x-4s", "doi-x-5", "doi-x-5s", "dsb-x-3", "dsb-x-3s", "dsb-x-4", "dsb-x-4s", "dsb-x-5", "dsb-x-5s", "el-polyton-x-3", "el-polyton-x-5", "el-polyton-x-5s", "el-x-3", "el-x-3s", "el-x-4", "el-x-4s", "el-x-5", "el-x-5s", "en-001-x-3", "en-001-x-3s", "en-150-x-3", "en-150-x-3s", "en-AG-x-3", "en-AG-x-3s", "en-AI-x-3", "en-AI-x-3s", "en-AT-x-3", "en-AT-x-3s", "en-AU-x-3", "en-AU-x-3s", "en-BB-x-3", "en-BB-x-3s", "en-BE-x-3", "en-BE-x-3s", "en-BM-x-3", "en-BM-x-3s", "en-BS-x-3", "en-BS-x-3s", "en-BW-x-3", "en-BW-x-3s", "en-BZ-x-3", "en-BZ-x-3s", "en-CC-x-3", "en-CC-x-3s", "en-CH-x-3", "en-CH-x-3s", "en-CK-x-3", "en-CK-x-3s", "en-CM-x-3", "en-CM-x-3s", "en-CX-x-3", "en-CX-x-3s", "en-CY-x-3", "en-CY-x-3s", "en-DE-x-3", "en-DE-x-3s", "en-DG-x-3", "en-DG-x-3s", "en-DK-x-3", "en-DK-x-3s", "en-DM-x-3", "en-DM-x-3s", "en-ER-x-3", "en-ER-x-3s", "en-FI-x-3", "en-FI-x-3s", "en-FJ-x-3", "en-FJ-x-3s", "en-FK-x-3", "en-FK-x-3s", "en-FM-x-3", "en-FM-x-3s", "en-GB-x-3", "en-GB-x-3s", "en-GD-x-3", "en-GD-x-3s", "en-GG-x-3", "en-GG-x-3s", "en-GH-x-3", "en-GH-x-3s", "en-GI-x-3", "en-GI-x-3s", "en-GM-x-3", "en-GM-x-3s", "en-GY-x-3", "en-GY-x-3s", "en-HK-x-3", "en-HK-x-3s", "en-IE-x-3", "en-IE-x-3s", "en-IL-x-3", "en-IL-x-3s", "en-IM-x-3", "en-IM-x-3s", "en-IN-x-3", "en-IN-x-3s", "en-IO-x-3", "en-IO-x-3s", "en-JE-x-3", "en-JE-x-3s", "en-JM-x-3", "en-JM-x-3s", "en-KE-x-3", "en-KE-x-3s", "en-KI-x-3", "en-KI-x-3s", "en-KN-x-3", "en-KN-x-3s", "en-KY-x-3", "en-KY-x-3s", "en-LC-x-3", "en-LC-x-3s", "en-LR-x-3", "en-LR-x-3s", "en-LS-x-3", "en-LS-x-3s", "en-MG-x-3", "en-MG-x-3s", "en-MO-x-3", "en-MO-x-3s", "en-MS-x-3", "en-MS-x-3s", "en-MT-x-3", "en-MT-x-3s", "en-MU-x-3", "en-MU-x-3s", "en-MV-x-3", "en-MV-x-3s", "en-MW-x-3", "en-MW-x-3s", "en-MY-x-3", "en-MY-x-3s", "en-NA-x-3", "en-NA-x-3s", "en-NF-x-3", "en-NF-x-3s", "en-NG-x-3", "en-NG-x-3s", "en-NL-x-3", "en-NL-x-3s", "en-NR-x-3", "en-NR-x-3s", "en-NU-x-3", "en-NU-x-3s", "en-NZ-x-3", "en-NZ-x-3s", "en-PG-x-3", "en-PG-x-3s", "en-PK-x-3", "en-PK-x-3s", "en-PN-x-3", "en-PN-x-3s", "en-PW-x-3", "en-PW-x-3s", "en-RW-x-3", "en-RW-x-3s", "en-SB-x-3", "en-SB-x-3s", "en-SC-x-3", "en-SC-x-3s", "en-SD-x-3", "en-SD-x-3s", "en-SE-x-3", "en-SE-x-3s", "en-SG-x-3", "en-SG-x-3s", "en-SH-x-3", "en-SH-x-3s", "en-SI-x-3", "en-SI-x-3s", "en-SL-x-3", "en-SL-x-3s", "en-SS-x-3", "en-SS-x-3s", "en-SX-x-3", "en-SX-x-3s", "en-SZ-x-3", "en-SZ-x-3s", "en-TC-x-3", "en-TC-x-3s", "en-TK-x-3", "en-TK-x-3s", "en-TO-x-3", "en-TO-x-3s", "en-TT-x-3", "en-TT-x-3s", "en-TV-x-3", "en-TV-x-3s", "en-TZ-x-3", "en-TZ-x-3s", "en-UG-x-3", "en-UG-x-3s", "en-VC-x-3", "en-VC-x-3s", "en-VG-x-3", "en-VG-x-3s", "en-VU-x-3", "en-VU-x-3s", "en-WS-x-3", "en-WS-x-3s", "en-ZA-x-3", "en-ZA-x-3s", "en-ZM-x-3", "en-ZM-x-3s", "en-ZW-x-3", "en-ZW-x-3s", "en-x-3", "en-x-3s", "en-x-4", "en-x-4s", "en-x-5", "en-x-5s", "es-CL-x-3s", "es-CO-x-3s", "es-PE-x-3", "es-PE-x-3s", "es-PE-x-5", "es-PE-x-5s", "es-PY-x-3", "es-PY-x-3s", "es-UY-x-3", "es-UY-x-3s", "es-UY-x-5", "es-UY-x-5s", "es-VE-x-3", "es-VE-x-3s", "es-x-3", "es-x-3s", "es-x-4", "es-x-4s", "es-x-5", "es-x-5s", "et-x-3", "et-x-3s", "et-x-4", "et-x-4s", "et-x-5", "et-x-5s", "eu-x-3", "eu-x-3s", "eu-x-4", "eu-x-4s", "eu-x-5", "eu-x-5s", "fa-AF-x-3", "fa-AF-x-3s", "fa-AF-x-4", "fa-AF-x-4s", "fa-AF-x-5", "fa-AF-x-5s", "fa-x-3", "fa-x-3s", "fa-x-4", "fa-x-4s", "fa-x-5", "fa-x-5s", "ff-Adlm-x-3", "ff-Adlm-x-3s", "ff-Adlm-x-4", "ff-Adlm-x-4s", "ff-Adlm-x-5", "ff-Adlm-x-5s", "fi-x-3", "fi-x-3s", "fi-x-4", "fi-x-4s", "fi-x-5", "fi-x-5s", "fil-x-3", "fil-x-3s", "fil-x-4", "fil-x-4s", "fil-x-5", "fil-x-5s", "fo-x-3", "fo-x-3s", "fo-x-4", "fo-x-4s", "fo-x-5", "fo-x-5s", "fr-CA-x-3", "fr-CA-x-3s", "fr-MA-x-3", "fr-MA-x-3s", "fr-x-3", "fr-x-3s", "fr-x-4", "fr-x-4s", "fr-x-5", "fr-x-5s", "ga-x-3", "ga-x-3s", "ga-x-4", "ga-x-4s", "ga-x-5", "ga-x-5s", "gd-x-3", "gd-x-3s", "gd-x-4", "gd-x-4s", "gd-x-5", "gd-x-5s", "gl-x-3", "gl-x-3s", "gl-x-4", "gl-x-4s", "gl-x-5", "gl-x-5s", "gu-x-3", "gu-x-3s", "gu-x-4", "gu-x-4s", "gu-x-5", "gu-x-5s", "ha-x-3", "ha-x-3s", "ha-x-4", "ha-x-4s", "ha-x-5", "ha-x-5s", "he-x-3", "he-x-3s", "he-x-5", "he-x-5s", "hi-Latn-x-3", "hi-Latn-x-3s", "hi-Latn-x-4", "hi-Latn-x-4s", "hi-Latn-x-5", "hi-Latn-x-5s", "hi-x-3", "hi-x-3s", "hi-x-4", "hi-x-4s", "hi-x-5", "hi-x-5s", "hr-x-3", "hr-x-3s", "hr-x-4", "hr-x-4s", "hr-x-5", "hr-x-5s", "hsb-x-3", "hsb-x-3s", "hsb-x-4", "hsb-x-4s", "hsb-x-5", "hsb-x-5s", "hu-x-3", "hu-x-3s", "hu-x-4", "hu-x-4s", "hu-x-5", "hu-x-5s", "hy-x-3", "hy-x-3s", "hy-x-4", "hy-x-4s", "hy-x-5", "hy-x-5s", "ia-x-3", "ia-x-3s", "ia-x-4", "ia-x-5", "ia-x-5s", "id-x-3", "id-x-3s", "id-x-4", "id-x-4s", "id-x-5", "id-x-5s", "ig-x-3", "ig-x-3s", "ig-x-4", "ig-x-4s", "ig-x-5", "ig-x-5s", "is-x-3", "is-x-3s", "is-x-4", "is-x-4s", "is-x-5", "is-x-5s", "it-x-3", "it-x-3s", "it-x-4", "it-x-4s", "it-x-5", "it-x-5s", "ja-x-3", "ja-x-3s", "ja-x-5", "ja-x-5s", "jv-x-3", "jv-x-3s", "jv-x-4", "jv-x-4s", "jv-x-5", "jv-x-5s", "ka-x-3", "ka-x-3s", "ka-x-4", "ka-x-4s", "ka-x-5", "ka-x-5s", "kea-x-3", "kea-x-3s", "kea-x-4", "kea-x-4s", "kea-x-5", "kea-x-5s", "kgp-x-3", "kgp-x-3s", "kgp-x-4", "kgp-x-4s", "kgp-x-5", "kgp-x-5s", "kk-x-3", "kk-x-3s", "kk-x-4", "kk-x-4s", "kk-x-5", "kk-x-5s", "km-x-3", "km-x-3s", "km-x-4", "km-x-4s", "km-x-5", "km-x-5s", "kn-x-3", "kn-x-3s", "kn-x-4", "kn-x-4s", "kn-x-5", "kn-x-5s", "ko-x-3", "ko-x-3s", "ko-x-4", "ko-x-4s", "ko-x-5", "ko-x-5s", "kok-x-3", "kok-x-3s", "kok-x-5", "kok-x-5s", "ks-Deva-x-3", "ks-Deva-x-3s", "ks-Deva-x-4", "ks-Deva-x-4s", "ks-Deva-x-5", "ks-Deva-x-5s", "ks-x-3", "ks-x-3s", "ks-x-4", "ks-x-4s", "ks-x-5", "ks-x-5s", "ky-x-3", "ky-x-3s", "ky-x-4", "ky-x-4s", "ky-x-5", "ky-x-5s", "lo-x-3", "lo-x-3s", "lo-x-5", "lo-x-5s", "lt-x-3", "lt-x-3s", "lt-x-4", "lt-x-4s", "lt-x-5", "lt-x-5s", "lv-x-3", "lv-x-3s", "lv-x-4", "lv-x-4s", "lv-x-5", "lv-x-5s", "mai-x-3", "mai-x-3s", "mai-x-4", "mai-x-4s", "mai-x-5", "mai-x-5s", "mi-x-3", "mi-x-3s", "mi-x-4", "mi-x-4s", "mi-x-5", "mi-x-5s", "mk-x-3", "mk-x-3s", "mk-x-4", "mk-x-4s", "mk-x-5", "mk-x-5s", "ml-x-3", "ml-x-3s", "ml-x-4", "ml-x-4s", "ml-x-5", "ml-x-5s", "mn-x-3", "mn-x-3s", "mn-x-4", "mn-x-4s", "mn-x-5", "mn-x-5s", "mni-x-3", "mni-x-3s", "mni-x-4", "mni-x-4s", "mni-x-5", "mni-x-5s", "mr-x-3", "mr-x-3s", "mr-x-4", "mr-x-4s", "mr-x-5", "mr-x-5s", "ms-x-3", "ms-x-3s", "ms-x-4", "ms-x-4s", "ms-x-5", "ms-x-5s", "my-x-3", "my-x-3s", "my-x-4", "my-x-4s", "my-x-5", "my-x-5s", "nb-x-3", "nb-x-3s", "nb-x-4", "nb-x-4s", "nb-x-5", "nb-x-5s", "ne-x-3", "ne-x-3s", "ne-x-4", "ne-x-4s", "ne-x-5", "ne-x-5s", "nl-x-3", "nl-x-3s", "nl-x-4", "nl-x-4s", "nl-x-5", "nl-x-5s", "nn-x-3", "nn-x-3s", "nn-x-4", "nn-x-4s", "nn-x-5", "nn-x-5s", "no-x-3", "no-x-3s", "no-x-4", "no-x-4s", "no-x-5", "no-x-5s", "or-x-3", "or-x-3s", "or-x-4", "or-x-4s", "or-x-5", "or-x-5s", "pa-x-3", "pa-x-3s", "pa-x-4", "pa-x-4s", "pa-x-5", "pa-x-5s", "pcm-x-3", "pcm-x-3s", "pcm-x-4", "pcm-x-4s", "pcm-x-5", "pcm-x-5s", "pl-x-3", "pl-x-3s", "pl-x-4", "pl-x-4s", "pl-x-5", "pl-x-5s", "ps-x-3", "ps-x-3s", "ps-x-4", "ps-x-5", "ps-x-5s", "pt-x-3", "pt-x-3s", "pt-x-4", "pt-x-4s", "pt-x-5", "pt-x-5s", "qu-x-3", "qu-x-3s", "qu-x-5", "qu-x-5s", "raj-x-3", "raj-x-3s", "raj-x-5", "raj-x-5s", "rm-x-3", "rm-x-3s", "rm-x-4", "rm-x-4s", "rm-x-5", "rm-x-5s", "ro-x-3", "ro-x-3s", "ro-x-4", "ro-x-4s", "ro-x-5", "ro-x-5s", "ru-x-3", "ru-x-3s", "ru-x-4", "ru-x-4s", "ru-x-5", "ru-x-5s", "sa-x-3", "sa-x-3s", "sa-x-4", "sa-x-5", "sa-x-5s", "sat-x-3", "sat-x-3s", "sat-x-4", "sat-x-4s", "sat-x-5", "sat-x-5s", "sc-x-3", "sc-x-3s", "sc-x-4", "sc-x-4s", "sc-x-5", "sc-x-5s", "sd-Deva-x-3", "sd-Deva-x-3s", "sd-Deva-x-4", "sd-Deva-x-4s", "sd-Deva-x-5", "sd-Deva-x-5s", "sd-x-3", "sd-x-3s", "sd-x-4", "sd-x-4s", "sd-x-5", "sd-x-5s", "si-x-3", "si-x-3s", "si-x-4", "si-x-4s", "si-x-5", "si-x-5s", "sk-x-3", "sk-x-3s", "sk-x-4", "sk-x-4s", "sk-x-5", "sk-x-5s", "sl-x-3", "sl-x-3s", "sl-x-4", "sl-x-4s", "sl-x-5", "sl-x-5s", "so-x-3", "so-x-3s", "so-x-4", "so-x-4s", "so-x-5", "so-x-5s", "sq-x-3", "sq-x-3s", "sq-x-4", "sq-x-4s", "sq-x-5", "sq-x-5s", "sr-Latn-XK-x-3", "sr-Latn-XK-x-3s", "sr-Latn-x-3", "sr-Latn-x-3s", "sr-Latn-x-4", "sr-Latn-x-4s", "sr-Latn-x-5", "sr-Latn-x-5s", "sr-ME-x-3", "sr-ME-x-3s", "sr-XK-x-3", "sr-XK-x-3s", "sr-x-3", "sr-x-3s", "sr-x-4", "sr-x-4s", "sr-x-5", "sr-x-5s", "su-x-3", "su-x-3s", "su-x-4", "su-x-4s", "su-x-5", "su-x-5s", "sv-x-3", "sv-x-3s", "sv-x-4", "sv-x-4s", "sv-x-5", "sv-x-5s", "sw-x-3", "sw-x-3s", "sw-x-4", "sw-x-4s", "sw-x-5", "sw-x-5s", "ta-x-3", "ta-x-3s", "ta-x-4", "ta-x-4s", "ta-x-5", "ta-x-5s", "te-x-3", "te-x-3s", "te-x-4", "te-x-4s", "te-x-5", "te-x-5s", "tg-x-3", "tg-x-3s", "tg-x-4", "tg-x-4s", "tg-x-5", "tg-x-5s", "th-x-3", "th-x-3s", "th-x-4", "th-x-4s", "th-x-5", "th-x-5s", "ti-x-3", "ti-x-3s", "ti-x-4", "ti-x-4s", "ti-x-5", "ti-x-5s", "tk-x-3", "tk-x-3s", "tk-x-4", "tk-x-4s", "tk-x-5", "tk-x-5s", "to-x-3", "to-x-3s", "to-x-4", "to-x-4s", "to-x-5", "to-x-5s", "tr-x-3", "tr-x-3s", "tr-x-4", "tr-x-4s", "tr-x-5", "tr-x-5s", "tt-x-3", "tt-x-3s", "tt-x-5", "tt-x-5s", "uk-x-3", "uk-x-3s", "uk-x-4", "uk-x-4s", "uk-x-5", "uk-x-5s", "und-x-3", "und-x-3s", "und-x-4", "und-x-4s", "und-x-5", "und-x-5s", "ur-x-3", "ur-x-3s", "ur-x-4", "ur-x-4s", "ur-x-5", "ur-x-5s", "uz-Cyrl-x-3", "uz-Cyrl-x-3s", "uz-Cyrl-x-4", "uz-Cyrl-x-4s", "uz-Cyrl-x-5", "uz-Cyrl-x-5s", "uz-x-3", "uz-x-3s", "uz-x-4", "uz-x-4s", "uz-x-5", "uz-x-5s", "vi-x-3", "vi-x-3s", "vi-x-5", "vi-x-5s", "wo-x-3", "wo-x-3s", "wo-x-5", "wo-x-5s", "xh-x-3", "xh-x-3s", "xh-x-5", "xh-x-5s", "yo-BJ-x-3", "yo-BJ-x-3s", "yo-BJ-x-4", "yo-BJ-x-4s", "yo-BJ-x-5", "yo-BJ-x-5s", "yo-x-3", "yo-x-3s", "yo-x-4", "yo-x-4s", "yo-x-5", "yo-x-5s", "yrl-x-3", "yrl-x-3s", "yrl-x-4", "yrl-x-4s", "yrl-x-5", "yrl-x-5s", "yue-Hans-x-3", "yue-Hans-x-3s", "yue-Hans-x-5", "yue-Hans-x-5s", "yue-x-3", "yue-x-3s", "yue-x-5", "yue-x-5s", "zh-Hant-x-3", "zh-Hant-x-3s", "zh-Hant-x-5", "zh-Hant-x-5s", "zh-x-3", "zh-x-3s", "zh-x-5", "zh-x-5s", "zu-x-3", "zu-x-3s", "zu-x-4", "zu-x-4s", "zu-x-5", "zu-x-5s"];
}
143 changes: 143 additions & 0 deletions experimental/zerotrie/tests/locale_aux_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use icu_locid::extensions::private::Private;
use icu_locid::Locale;
use litemap::LiteMap;
use std::collections::BTreeSet;
use writeable::Writeable;
use zerotrie::ZeroTriePerfectHash;
use zerotrie::ZeroTrieSimpleAscii;

mod testdata {
include!("data/data.rs");
}

use testdata::locales_with_aux::{NUM_UNIQUE_BLOBS, STRINGS};
use testdata::strings_to_litemap;

#[test]
fn test_combined() {
let litemap = strings_to_litemap(STRINGS);

let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();

// Lookup table size:
assert_eq!(trie.byte_len(), 5104);

// Size including pointer array:
assert_eq!(
trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
8392
);

let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();

// Lookup table size:
assert_eq!(trie.byte_len(), 5157);

// Size including pointer array:
assert_eq!(
trie.byte_len() + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
8445
);

let total_str_len = litemap.iter_keys().map(|k| k.len()).sum::<usize>();
assert_eq!(total_str_len, 8115);

// Lookup table size:
assert_eq!(
total_str_len + STRINGS.len() * core::mem::size_of::<usize>(),
16531
);

// Size including pointer array: (2x for the lookup array and value array)
assert_eq!(
total_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
24947
);

// Size including u16 pointer array:
assert_eq!(
total_str_len
+ STRINGS.len() * core::mem::size_of::<usize>()
+ STRINGS.len() * core::mem::size_of::<u16>()
+ NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
21923
);
}

#[test]
fn test_aux_split() {
let locales: Vec<Locale> = STRINGS.iter().map(|s| s.parse().unwrap()).collect();

let aux_keys: BTreeSet<&Private> = locales.iter().map(|l| &l.extensions.private).collect();
assert_eq!(aux_keys.len(), 6);

let mut cumulative_index = 0;
let mut total_simpleascii_len = 0;
let mut total_perfecthash_len = 0;
let mut unique_locales = BTreeSet::new();
for private in aux_keys.iter() {
let current_locales: Vec<Locale> = locales
.iter()
.filter(|l| l.extensions.private == **private)
.map(|l| {
let mut l = l.clone();
l.extensions.private = Private::default();
l
})
.collect();
let litemap: LiteMap<Vec<u8>, usize> = current_locales
.iter()
.map(|l| {
(l.write_to_string().into_owned().into_bytes(), {
cumulative_index += 1;
cumulative_index - 1
})
})
.collect();

let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap();
total_simpleascii_len += trie.byte_len();

let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap();
total_perfecthash_len += trie.byte_len();

for k in litemap.iter_keys() {
unique_locales.insert(k.clone());
}
}
assert_eq!(cumulative_index, locales.len());

assert_eq!(total_simpleascii_len, 5098);
assert_eq!(total_perfecthash_len, 5302);

let total_unique_locale_str_len = unique_locales.iter().map(|v| v.len()).sum::<usize>();
assert_eq!(total_unique_locale_str_len, 945);

// Size including pointer array:
assert_eq!(
total_simpleascii_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
8386
);
assert_eq!(
total_perfecthash_len + NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
8590
);
// 2x for the lookup arrays and value arrays
assert_eq!(
total_unique_locale_str_len + 2 * STRINGS.len() * core::mem::size_of::<usize>(),
17777
);

// Size including u16 pointer array:
assert_eq!(
total_unique_locale_str_len
+ STRINGS.len() * core::mem::size_of::<usize>()
+ STRINGS.len() * core::mem::size_of::<u16>()
+ NUM_UNIQUE_BLOBS * core::mem::size_of::<usize>(),
14753
);
}

0 comments on commit 3c6b7e4

Please sign in to comment.