From 4c28a99adfb9003ae434b5751827391e609c8022 Mon Sep 17 00:00:00 2001 From: Robert Bastian <4706271+robertbastian@users.noreply.github.com> Date: Wed, 18 Sep 2024 23:57:04 +0200 Subject: [PATCH] New properties API (#5548) --- components/casemap/src/titlecase.rs | 14 +- components/casemap/tests/gen_greek_to_me.rs | 9 +- components/collator/src/elements.rs | 2 +- components/collections/src/iterator_utils.rs | 178 +- .../experimental/src/personnames/formatter.rs | 9 +- .../specifications/derive_locale.rs | 24 +- .../src/transliterate/compile/mod.rs | 24 +- .../src/transliterate/compile/parse.rs | 7 +- .../src/unicodeset_parse/parse.rs | 78 +- components/normalizer/src/lib.rs | 2 +- components/normalizer/src/properties.rs | 4 +- components/normalizer/tests/tests.rs | 5 +- components/properties/Cargo.toml | 2 +- components/properties/README.md | 26 +- components/properties/src/bidi.rs | 394 +- components/properties/src/bidi_data.rs | 214 - components/properties/src/code_point_map.rs | 345 ++ components/properties/src/code_point_set.rs | 381 ++ components/properties/src/error.rs | 28 - components/properties/src/lib.rs | 72 +- components/properties/src/maps.rs | 653 --- components/properties/src/names.rs | 1494 +++++++ components/properties/src/props.rs | 3526 +++++++++-------- components/properties/src/provider.rs | 365 +- .../src/provider/{bidi_data.rs => bidi.rs} | 6 +- components/properties/src/provider/props.rs | 270 ++ components/properties/src/runtime.rs | 267 +- components/properties/src/script.rs | 217 +- components/properties/src/sets.rs | 2384 ----------- components/properties/src/trievalue.rs | 10 +- components/properties/src/unicode_set.rs | 172 + components/segmenter/src/line.rs | 4 +- components/segmenter/tests/spec_test.rs | 28 +- ffi/capi/Cargo.toml | 2 +- ffi/capi/bindings/c/CodePointSetData.h | 3 +- ffi/capi/bindings/c/Error.d.h | 1 - .../bindings/cpp/icu4x/CodePointSetData.d.hpp | 3 +- .../bindings/cpp/icu4x/CodePointSetData.hpp | 7 +- ffi/capi/bindings/cpp/icu4x/Error.d.hpp | 2 - ffi/capi/bindings/cpp/icu4x/Error.hpp | 1 - ffi/capi/bindings/dart/Bidi.g.dart | 2 + .../bindings/dart/CodePointMapData16.g.dart | 14 +- .../bindings/dart/CodePointMapData8.g.dart | 36 +- .../bindings/dart/CodePointSetData.g.dart | 152 +- ffi/capi/bindings/dart/Error.g.dart | 4 - .../GeneralCategoryNameToMaskMapper.g.dart | 10 +- .../dart/PropertyValueNameToEnumMapper.g.dart | 30 +- .../bindings/dart/ScriptWithExtensions.g.dart | 2 +- ffi/capi/bindings/dart/UnicodeSetData.g.dart | 12 +- ffi/capi/bindings/js/Bidi.d.ts | 2 + ffi/capi/bindings/js/Bidi.mjs | 2 + ffi/capi/bindings/js/CodePointMapData16.d.ts | 4 +- ffi/capi/bindings/js/CodePointMapData16.mjs | 4 +- ffi/capi/bindings/js/CodePointMapData8.d.ts | 4 +- ffi/capi/bindings/js/CodePointMapData8.mjs | 4 +- ffi/capi/bindings/js/CodePointSetData.d.ts | 5 +- ffi/capi/bindings/js/CodePointSetData.mjs | 9 +- ffi/capi/bindings/js/Error.d.ts | 1 - ffi/capi/bindings/js/Error.mjs | 3 - .../js/GeneralCategoryNameToMaskMapper.d.ts | 4 +- .../js/GeneralCategoryNameToMaskMapper.mjs | 4 +- .../js/PropertyValueNameToEnumMapper.d.ts | 6 +- .../js/PropertyValueNameToEnumMapper.mjs | 6 +- ffi/capi/bindings/js/UnicodeSetData.d.ts | 6 +- ffi/capi/bindings/js/UnicodeSetData.mjs | 6 +- ffi/capi/src/bidi.rs | 22 +- ffi/capi/src/collections_sets.rs | 8 +- ffi/capi/src/errors.rs | 15 - ffi/capi/src/properties_maps.rs | 140 +- ffi/capi/src/properties_names.rs | 176 +- ffi/capi/src/properties_sets.rs | 539 ++- ffi/capi/src/properties_unisets.rs | 24 +- ffi/capi/src/script.rs | 20 +- ffi/capi/tests/missing_apis.txt | 624 +-- ffi/harfbuzz/src/lib.rs | 38 +- ...idi_auxiliary_properties_v1_marker.rs.data | 20 +- ...bidi_class_name_to_value_v1_marker.rs.data | 20 +- .../data/bidi_class_v1_marker.rs.data | 2 +- ...class_value_to_long_name_v1_marker.rs.data | 20 +- ...lass_value_to_short_name_v1_marker.rs.data | 20 +- ...ning_class_name_to_value_v1_marker.rs.data | 20 +- ...anonical_combining_class_v1_marker.rs.data | 2 +- ...class_value_to_long_name_v1_marker.rs.data | 20 +- ...lass_value_to_short_name_v1_marker.rs.data | 20 +- ...sian_width_name_to_value_v1_marker.rs.data | 20 +- .../data/east_asian_width_v1_marker.rs.data | 2 +- ...width_value_to_long_name_v1_marker.rs.data | 20 +- ...idth_value_to_short_name_v1_marker.rs.data | 20 +- ...egory_mask_name_to_value_v1_marker.rs.data | 20 +- ...l_category_name_to_value_v1_marker.rs.data | 20 +- .../data/general_category_v1_marker.rs.data | 2 +- ...egory_value_to_long_name_v1_marker.rs.data | 20 +- ...gory_value_to_short_name_v1_marker.rs.data | 20 +- ...ster_break_name_to_value_v1_marker.rs.data | 20 +- .../grapheme_cluster_break_v1_marker.rs.data | 2 +- ...break_value_to_long_name_v1_marker.rs.data | 20 +- ...reak_value_to_short_name_v1_marker.rs.data | 20 +- ...lable_type_name_to_value_v1_marker.rs.data | 20 +- .../hangul_syllable_type_v1_marker.rs.data | 2 +- ..._type_value_to_long_name_v1_marker.rs.data | 20 +- ...type_value_to_short_name_v1_marker.rs.data | 20 +- ...c_category_name_to_value_v1_marker.rs.data | 20 +- .../indic_syllabic_category_v1_marker.rs.data | 2 +- ...egory_value_to_long_name_v1_marker.rs.data | 20 +- ...gory_value_to_short_name_v1_marker.rs.data | 20 +- ...ining_type_name_to_value_v1_marker.rs.data | 20 +- .../data/joining_type_v1_marker.rs.data | 2 +- ..._type_value_to_long_name_v1_marker.rs.data | 20 +- ...type_value_to_short_name_v1_marker.rs.data | 20 +- ...line_break_name_to_value_v1_marker.rs.data | 20 +- .../data/line_break_v1_marker.rs.data | 2 +- ...break_value_to_long_name_v1_marker.rs.data | 20 +- ...reak_value_to_short_name_v1_marker.rs.data | 20 +- provider/data/properties/data/mod.rs | 76 +- .../script_name_to_value_v1_marker.rs.data | 20 +- .../properties/data/script_v1_marker.rs.data | 2 +- ...cript_value_to_long_name_v1_marker.rs.data | 20 +- ...ript_value_to_short_name_v1_marker.rs.data | 20 +- ...ence_break_name_to_value_v1_marker.rs.data | 20 +- .../data/sentence_break_v1_marker.rs.data | 2 +- ...break_value_to_long_name_v1_marker.rs.data | 20 +- ...reak_value_to_short_name_v1_marker.rs.data | 20 +- ...word_break_name_to_value_v1_marker.rs.data | 20 +- .../data/word_break_v1_marker.rs.data | 2 +- ...break_value_to_long_name_v1_marker.rs.data | 20 +- ...reak_value_to_short_name_v1_marker.rs.data | 20 +- ...idi_auxiliary_properties_v1_marker.rs.data | 20 +- ...bidi_class_name_to_value_v1_marker.rs.data | 20 +- .../stubdata/bidi_class_v1_marker.rs.data | 2 +- ...class_value_to_long_name_v1_marker.rs.data | 20 +- ...lass_value_to_short_name_v1_marker.rs.data | 20 +- ...ning_class_name_to_value_v1_marker.rs.data | 20 +- ...anonical_combining_class_v1_marker.rs.data | 2 +- ...class_value_to_long_name_v1_marker.rs.data | 20 +- ...lass_value_to_short_name_v1_marker.rs.data | 20 +- ...sian_width_name_to_value_v1_marker.rs.data | 20 +- .../east_asian_width_v1_marker.rs.data | 2 +- ...width_value_to_long_name_v1_marker.rs.data | 20 +- ...idth_value_to_short_name_v1_marker.rs.data | 20 +- ...egory_mask_name_to_value_v1_marker.rs.data | 20 +- ...l_category_name_to_value_v1_marker.rs.data | 20 +- .../general_category_v1_marker.rs.data | 2 +- ...egory_value_to_long_name_v1_marker.rs.data | 20 +- ...gory_value_to_short_name_v1_marker.rs.data | 20 +- ...ster_break_name_to_value_v1_marker.rs.data | 20 +- .../grapheme_cluster_break_v1_marker.rs.data | 2 +- ...break_value_to_long_name_v1_marker.rs.data | 20 +- ...reak_value_to_short_name_v1_marker.rs.data | 20 +- ...lable_type_name_to_value_v1_marker.rs.data | 20 +- .../hangul_syllable_type_v1_marker.rs.data | 2 +- ..._type_value_to_long_name_v1_marker.rs.data | 20 +- ...type_value_to_short_name_v1_marker.rs.data | 20 +- ...c_category_name_to_value_v1_marker.rs.data | 20 +- .../indic_syllabic_category_v1_marker.rs.data | 2 +- ...egory_value_to_long_name_v1_marker.rs.data | 20 +- ...gory_value_to_short_name_v1_marker.rs.data | 20 +- ...ining_type_name_to_value_v1_marker.rs.data | 20 +- .../stubdata/joining_type_v1_marker.rs.data | 2 +- ..._type_value_to_long_name_v1_marker.rs.data | 20 +- ...type_value_to_short_name_v1_marker.rs.data | 20 +- ...line_break_name_to_value_v1_marker.rs.data | 20 +- .../stubdata/line_break_v1_marker.rs.data | 2 +- ...break_value_to_long_name_v1_marker.rs.data | 20 +- ...reak_value_to_short_name_v1_marker.rs.data | 20 +- provider/data/properties/stubdata/mod.rs | 76 +- .../script_name_to_value_v1_marker.rs.data | 20 +- .../stubdata/script_v1_marker.rs.data | 2 +- ...cript_value_to_long_name_v1_marker.rs.data | 20 +- ...ript_value_to_short_name_v1_marker.rs.data | 20 +- ...ence_break_name_to_value_v1_marker.rs.data | 20 +- .../stubdata/sentence_break_v1_marker.rs.data | 2 +- ...break_value_to_long_name_v1_marker.rs.data | 20 +- ...reak_value_to_short_name_v1_marker.rs.data | 20 +- ...word_break_name_to_value_v1_marker.rs.data | 20 +- .../stubdata/word_break_v1_marker.rs.data | 2 +- ...break_value_to_long_name_v1_marker.rs.data | 20 +- ...reak_value_to_short_name_v1_marker.rs.data | 20 +- provider/registry/src/lib.rs | 76 +- provider/source/src/currency/essentials.rs | 8 +- provider/source/src/list/mod.rs | 4 +- .../src/properties/{bidi_data.rs => bidi.rs} | 8 +- provider/source/src/properties/bin_cp_set.rs | 4 +- provider/source/src/properties/bin_uniset.rs | 18 +- .../src/properties/enum_codepointtrie.rs | 9 +- provider/source/src/properties/mod.rs | 2 +- provider/source/src/properties/script.rs | 14 +- provider/source/src/segmenter/mod.rs | 57 +- tools/make/diplomat-coverage/src/allowlist.rs | 41 +- tools/make/diplomat-coverage/src/main.rs | 26 +- 189 files changed, 7188 insertions(+), 7970 deletions(-) delete mode 100644 components/properties/src/bidi_data.rs create mode 100644 components/properties/src/code_point_map.rs create mode 100644 components/properties/src/code_point_set.rs delete mode 100644 components/properties/src/error.rs delete mode 100644 components/properties/src/maps.rs create mode 100644 components/properties/src/names.rs rename components/properties/src/provider/{bidi_data.rs => bidi.rs} (99%) create mode 100644 components/properties/src/provider/props.rs delete mode 100644 components/properties/src/sets.rs create mode 100644 components/properties/src/unicode_set.rs rename provider/source/src/properties/{bidi_data.rs => bidi.rs} (95%) diff --git a/components/casemap/src/titlecase.rs b/components/casemap/src/titlecase.rs index 49f189dea4f..7c13c20bfef 100644 --- a/components/casemap/src/titlecase.rs +++ b/components/casemap/src/titlecase.rs @@ -7,9 +7,9 @@ use crate::provider::CaseMapV1Marker; use crate::CaseMapper; use alloc::string::String; use icu_locale_core::LanguageIdentifier; -use icu_properties::maps::CodePointMapData; +use icu_properties::props::{GeneralCategory, GeneralCategoryGroup}; use icu_properties::provider::GeneralCategoryV1Marker; -use icu_properties::{GeneralCategory, GeneralCategoryGroup}; +use icu_properties::CodePointMapData; use icu_provider::prelude::*; use writeable::Writeable; @@ -221,7 +221,8 @@ impl TitlecaseMapper { pub const fn new() -> Self { Self { cm: CaseMapper::new(), - gc: icu_properties::maps::general_category().static_to_owned(), + gc: icu_properties::CodePointMapData::::new() + .static_to_owned(), } } @@ -240,7 +241,7 @@ impl TitlecaseMapper { P: DataProvider + DataProvider + ?Sized, { let cm = CaseMapper::try_new_unstable(provider)?; - let gc = icu_properties::maps::load_general_category(provider)?; + let gc = icu_properties::CodePointMapData::::try_new_unstable(provider)?; Ok(Self { cm, gc }) } } @@ -266,7 +267,8 @@ impl> TitlecaseMapper { pub const fn new_with_mapper(casemapper: CM) -> Self { Self { cm: casemapper, - gc: icu_properties::maps::general_category().static_to_owned(), + gc: icu_properties::CodePointMapData::::new() + .static_to_owned(), } } @@ -276,7 +278,7 @@ impl> TitlecaseMapper { where P: DataProvider + DataProvider + ?Sized, { - let gc = icu_properties::maps::load_general_category(provider)?; + let gc = icu_properties::CodePointMapData::::try_new_unstable(provider)?; Ok(Self { cm: casemapper, gc }) } diff --git a/components/casemap/tests/gen_greek_to_me.rs b/components/casemap/tests/gen_greek_to_me.rs index 35ebfcc6c66..565dc1d196c 100644 --- a/components/casemap/tests/gen_greek_to_me.rs +++ b/components/casemap/tests/gen_greek_to_me.rs @@ -7,14 +7,17 @@ use icu_casemap::greek_to_me::{ }; use icu_casemap::CaseMapper; use icu_normalizer::DecomposingNormalizerBorrowed; -use icu_properties::{maps, GeneralCategoryGroup, Script}; +use icu_properties::{ + props::{GeneralCategory, GeneralCategoryGroup, Script}, + CodePointMapData, +}; use std::collections::BTreeMap; use std::fmt::Write; fn main() { let decomposer = DecomposingNormalizerBorrowed::new_nfd(); - let script = maps::script(); - let gc = maps::general_category(); + let script = CodePointMapData::