diff --git a/components/collator/src/comparison.rs b/components/collator/src/comparison.rs index 3bf46f9bf23..c5b8bdefa85 100644 --- a/components/collator/src/comparison.rs +++ b/components/collator/src/comparison.rs @@ -29,9 +29,9 @@ use crate::provider::CollationTailoringV1Marker; use crate::{AlternateHandling, CollatorOptions, MaxVariable, ResolvedCollatorOptions, Strength}; use core::cmp::Ordering; use core::convert::TryFrom; -use icu_normalizer::provider::CanonicalDecompositionDataV1Marker; +use icu_normalizer::provider::CanonicalDecompositionDataV2Marker; use icu_normalizer::provider::CanonicalDecompositionTablesV1Marker; -use icu_normalizer::provider::DecompositionDataV1; +use icu_normalizer::provider::DecompositionDataV2; use icu_normalizer::provider::DecompositionTablesV1; use icu_normalizer::Decomposition; use icu_provider::prelude::*; @@ -220,7 +220,7 @@ pub struct Collator { diacritics: DataPayload, options: CollatorOptionsBitField, reordering: Option>, - decompositions: DataPayload, + decompositions: DataPayload, tables: DataPayload, lithuanian_dot_above: bool, } @@ -276,7 +276,7 @@ impl Collator { + DataProvider + DataProvider + DataProvider - + DataProvider + + DataProvider + DataProvider + ?Sized, { @@ -296,7 +296,7 @@ impl Collator { fn try_new_unstable_internal( provider: &D, root: DataPayload, - decompositions: DataPayload, + decompositions: DataPayload, tables: DataPayload, jamo: DataPayload, special_primaries: impl FnOnce() -> Result< @@ -364,7 +364,7 @@ pub struct CollatorBorrowed<'a> { diacritics: &'a CollationDiacriticsV1<'a>, options: CollatorOptionsBitField, reordering: Option<&'a CollationReorderingV1<'a>>, - decompositions: &'a DecompositionDataV1<'a>, + decompositions: &'a DecompositionDataV2<'a>, tables: &'a DecompositionTablesV1<'a>, lithuanian_dot_above: bool, } @@ -381,7 +381,7 @@ impl CollatorBorrowed<'static> { let provider = &crate::provider::Baked; let decompositions = - icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER; + icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V2_MARKER; let tables = icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_TABLES_V1_MARKER; let root = crate::provider::Baked::SINGLETON_COLLATION_ROOT_V1_MARKER; diff --git a/components/collator/src/elements.rs b/components/collator/src/elements.rs index 88088d92568..4384ca03c8b 100644 --- a/components/collator/src/elements.rs +++ b/components/collator/src/elements.rs @@ -21,7 +21,7 @@ use core::char::REPLACEMENT_CHARACTER; use icu_collections::char16trie::TrieResult; use icu_collections::codepointtrie::CodePointTrie; -use icu_normalizer::provider::DecompositionDataV1; +use icu_normalizer::provider::DecompositionDataV2; use icu_normalizer::provider::DecompositionTablesV1; use icu_properties::props::CanonicalCombiningClass; use smallvec::SmallVec; @@ -31,67 +31,66 @@ use zerovec::{zeroslice, ZeroSlice}; use crate::provider::CollationDataV1; -/// Marker that a complex decomposition isn't round-trippable -/// under re-composition. +/// Marker that the decomposition does not round trip via NFC. /// -/// TODO: When taking a data format break, swap this and -/// `BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER` around -/// to make backward-combiningness use the same bit in all -/// cases. -const NON_ROUND_TRIP_MARKER: u16 = 0b1; +/// See components/normalizer/trie-value-format.md +const NON_ROUND_TRIP_MARKER: u32 = 1 << 30; -/// Marker that a complex decomposition starts with a starter -/// that can combine backwards. -const BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER: u16 = 0b10; - -/// Values above this are treated as a BMP character. -const HIGHEST_MARKER: u16 = NON_ROUND_TRIP_MARKER | BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER; - -/// Marker value for U+FDFA in NFKD -const FDFA_MARKER: u16 = 3; +/// Marker that the first character of the decomposition +/// can combine backwards. +/// +/// See components/normalizer/trie-value-format.md +const BACKWARD_COMBINING_MARKER: u32 = 1 << 31; -/// Marker for starters that decompose to themselves but may -/// combine backwards under canonical composition. -/// (Main trie only; not used in the supplementary trie.) -const BACKWARD_COMBINING_STARTER_MARKER: u32 = 1; +/// Mask for the bits have to be zero for this to be a BMP +/// singleton decomposition, or value baked into the surrogate +/// range. +/// +/// See components/normalizer/trie-value-format.md +const HIGH_ZEROS_MASK: u32 = 0x3FFF0000; -// Magic marker trie value for characters whose decomposition -// starts with a non-starter. The actual decomposition is -// hard-coded. -const SPECIAL_NON_STARTER_DECOMPOSITION_MARKER: u32 = 2; +/// Mask for the bits have to be zero for this to be a complex +/// decomposition. +/// +/// See components/normalizer/trie-value-format.md +const LOW_ZEROS_MASK: u32 = 0xFFE0; -/// `u16` version of the previous marker value. -const SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16: u16 = 2; +/// Marker value for U+FDFA in NFKD. (Unified with Hangul syllable marker, +/// but they differ by `NON_ROUND_TRIP_MARKER`.) +/// +/// See components/normalizer/trie-value-format.md +const FDFA_MARKER: u16 = 1; /// Checks if a trie value carries a (non-zero) canonical /// combining class. +/// +/// See components/normalizer/trie-value-format.md fn trie_value_has_ccc(trie_value: u32) -> bool { - (trie_value & 0xFFFFFF00) == 0xD800 + (trie_value & 0x3FFFFE00) == 0xD800 } /// Checks if the trie signifies a special non-starter decomposition. +/// +/// See components/normalizer/trie-value-format.md fn trie_value_indicates_special_non_starter_decomposition(trie_value: u32) -> bool { - trie_value == SPECIAL_NON_STARTER_DECOMPOSITION_MARKER + (trie_value & 0x3FFFFF00) == 0xD900 } /// Checks if a trie value signifies a character whose decomposition /// starts with a non-starter. +/// +/// See components/normalizer/trie-value-format.md fn decomposition_starts_with_non_starter(trie_value: u32) -> bool { trie_value_has_ccc(trie_value) - || trie_value_indicates_special_non_starter_decomposition(trie_value) } /// Extracts a canonical combining class (possibly zero) from a trie value. /// -/// # Panics -/// -/// The trie value must not be one that signifies a special non-starter -/// decomposition. (Debug-only) +/// See components/normalizer/trie-value-format.md fn ccc_from_trie_value(trie_value: u32) -> CanonicalCombiningClass { if trie_value_has_ccc(trie_value) { CanonicalCombiningClass(trie_value as u8) } else { - debug_assert_ne!(trie_value, SPECIAL_NON_STARTER_DECOMPOSITION_MARKER); CanonicalCombiningClass::NotReordered } } @@ -809,6 +808,8 @@ where /// The `CollationElement32` mapping for the Combining Diacritical Marks block. diacritics: &'data ZeroSlice, /// NFD main trie. + /// + /// See components/normalizer/trie-value-format.md trie: &'data CodePointTrie<'data, u32>, /// NFD complex decompositions on the BMP scalars16: &'data ZeroSlice, @@ -835,7 +836,7 @@ where tailoring: &'data CollationDataV1, jamo: &'data [::ULE; JAMO_COUNT], diacritics: &'data ZeroSlice, - decompositions: &'data DecompositionDataV1, + decompositions: &'data DecompositionDataV2, tables: &'data DecompositionTablesV1, numeric_primary: Option, lithuanian_dot_above: bool, @@ -1040,36 +1041,37 @@ where // Hangul syllables in lookahead, because Hangul isn't allowed to // participate in contractions, and the trie default is that a character // is its own decomposition. + + // See components/normalizer/trie-value-format.md let decomposition = c.trie_val; - if decomposition <= BACKWARD_COMBINING_STARTER_MARKER { + if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) <= 1 { // The character is its own decomposition (or Hangul syllable) // Set the Canonical Combining Class to zero self.upcoming.push( CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(c.character()), ); } else { - let trail_or_complex = (decomposition >> 16) as u16; - let lead = decomposition as u16; - if lead > HIGHEST_MARKER && trail_or_complex != 0 { + let high_zeros = (decomposition & HIGH_ZEROS_MASK) == 0; + let low_zeros = (decomposition & LOW_ZEROS_MASK) == 0; + if !high_zeros && !low_zeros { // Decomposition into two BMP characters: starter and non-starter - self.upcoming.push( - CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(char_from_u16( - lead, - )), - ); - let low_c = char_from_u16(trail_or_complex); + let starter = char_from_u32(decomposition & 0x7FFF); + let low_c = char_from_u32((decomposition >> 15) & 0x7FFF); + self.upcoming + .push(CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(starter)); let trie_value = self.trie.get(low_c); self.upcoming.push( CharacterAndClassAndTrieValue::new_with_non_special_decomposition_trie_val( low_c, trie_value, ), ); - } else if trail_or_complex == 0 { + } else if high_zeros { + let singleton = decomposition as u16; debug_assert_ne!( - lead, FDFA_MARKER, + singleton, FDFA_MARKER, "How come U+FDFA NFKD marker seen in NFD?" ); - if (lead & 0xFF00) == 0xD800 { + if (singleton & 0xFF00) == 0xD800 { // We're at the end of the stream, so we aren't dealing with the // next undecomposed starter but are dealing with an // already-decomposed non-starter. Just put it back. @@ -1078,35 +1080,22 @@ where #[cfg(debug_assertions)] debug_assert!(self.iter_exhausted); } else { - debug_assert_ne!(lead, SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16); // Decomposition into one BMP character self.upcoming.push( CharacterAndClassAndTrieValue::new_with_non_decomposing_starter( - char_from_u16(lead), + char_from_u16(singleton), ), ); } } else { - // Complex decomposition - // Format for 16-bit value: - // 15..13: length minus two for 16-bit case and length minus one for - // the 32-bit case. Length 8 needs to fit in three bits in - // the 16-bit case, and this way the value is future-proofed - // up to 9 in the 16-bit case. Zero is unused and length one - // in the 16-bit case goes directly into the trie. - // 12: 1 if all trailing characters are guaranteed non-starters, - // 0 if no guarantees about non-starterness. - // Note: The bit choice is this way around to allow for - // dynamically falling back to not having this but instead - // having one more bit for length by merely choosing - // different masks. - // 11..0: Start offset in storage. If less than the length of - // scalars16, the offset is into scalars16. Otherwise, - // the offset minus the length of scalars16 is an offset - // into scalars32. - let offset = usize::from(trail_or_complex & 0xFFF); + debug_assert!(low_zeros); + // Only 12 of 14 bits used as of Unicode 16. + let offset = (((decomposition & !(0b11 << 30)) >> 16) as usize) - 1; + // Only 3 of 4 bits used as of Unicode 16. + let len_bits = decomposition & 0b1111; + let only_non_starters_in_trail = (decomposition & 0b10000) != 0; if offset < self.scalars16.len() { - let len = usize::from(trail_or_complex >> 13) + 2; + let len = (len_bits + 2) as usize; for u in unwrap_or_gigo( self.scalars16.get_subslice(offset..offset + len), SINGLE_REPLACEMENT_CHARACTER_U16, // single instead of empty for consistency with the other code path @@ -1119,7 +1108,7 @@ where .push(CharacterAndClassAndTrieValue::new_with_non_special_decomposition_trie_val(ch, trie_value)); } } else { - let len = usize::from(trail_or_complex >> 13) + 1; + let len = (len_bits + 1) as usize; let offset32 = offset - self.scalars16.len(); for ch in unwrap_or_gigo( self.scalars32.get_subslice(offset32..offset32 + len), @@ -1132,7 +1121,7 @@ where .push(CharacterAndClassAndTrieValue::new_with_non_special_decomposition_trie_val(ch, trie_value)); } } - search_start_combining = trail_or_complex & 0x1000 == 0; + search_start_combining = !only_non_starters_in_trail; } } let start_combining = if search_start_combining { @@ -1289,8 +1278,9 @@ where // optimize based on that bet. let hangul_offset = u32::from(c).wrapping_sub(HANGUL_S_BASE); // SIndex in the spec if hangul_offset >= HANGUL_S_COUNT { + // See components/normalizer/trie-value-format.md let decomposition = c_c_tv.trie_val; - if decomposition <= BACKWARD_COMBINING_STARTER_MARKER { + if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0 { // The character is its own decomposition let jamo_index = (c as usize).wrapping_sub(HANGUL_L_BASE as usize); // Attribute belongs on an inner expression, but @@ -1344,22 +1334,22 @@ where // handle `Implicit` and `Offset` tags here. } } else { - let trail_or_complex = (decomposition >> 16) as u16; - let lead = decomposition as u16; - if lead > HIGHEST_MARKER && trail_or_complex != 0 { + let high_zeros = (decomposition & HIGH_ZEROS_MASK) == 0; + let low_zeros = (decomposition & LOW_ZEROS_MASK) == 0; + if !high_zeros && !low_zeros { // Decomposition into two BMP characters: starter and non-starter - c = char_from_u16(lead); + c = char_from_u32(decomposition & 0x7FFF); ce32 = data.ce32_for_char(c); if ce32 == FALLBACK_CE32 { data = self.root; ce32 = data.ce32_for_char(c); } - let combining = char_from_u16(trail_or_complex); + let combining = char_from_u32((decomposition >> 15) & 0x7FFF); if self.is_next_decomposition_starts_with_starter() { let diacritic_index = - (trail_or_complex as usize).wrapping_sub(COMBINING_DIACRITICS_BASE); + (combining as usize).wrapping_sub(COMBINING_DIACRITICS_BASE); if let Some(secondary) = self.diacritics.get(diacritic_index) { - debug_assert!(trail_or_complex != 0x0344, "Should never have COMBINING GREEK DIALYTIKA TONOS here, since it should have decomposed further."); + debug_assert_ne!(combining, '\u{0344}', "Should never have COMBINING GREEK DIALYTIKA TONOS here, since it should have decomposed further."); if let Some(ce) = ce32.to_ce_simple_or_long_primary() { let ce_for_combining = CollationElement::new_from_secondary(secondary); @@ -1409,13 +1399,14 @@ where } combining_characters .push(CharacterAndClass::new_with_placeholder(combining)); - } else if trail_or_complex == 0 { + } else if high_zeros { + let singleton = decomposition as u16; debug_assert_ne!( - lead, FDFA_MARKER, + singleton, FDFA_MARKER, "How come U+FDFA NFKD marker seen in NFD?" ); // Decomposition into one BMP character - c = char_from_u16(lead); + c = char_from_u16(singleton); ce32 = data.ce32_for_char(c); if ce32 == FALLBACK_CE32 { data = self.root; @@ -1428,26 +1419,14 @@ where } } } else { - // Complex decomposition - // Format for 16-bit value: - // 15..13: length minus two for 16-bit case and length minus one for - // the 32-bit case. Length 8 needs to fit in three bits in - // the 16-bit case, and this way the value is future-proofed - // up to 9 in the 16-bit case. Zero is unused and length one - // in the 16-bit case goes directly into the trie. - // 12: 1 if all trailing characters are guaranteed non-starters, - // 0 if no guarantees about non-starterness. - // Note: The bit choice is this way around to allow for - // dynamically falling back to not having this but instead - // having one more bit for length by merely choosing - // different masks. - // 11..0: Start offset in storage. If less than the length of - // scalars16, the offset is into scalars16. Otherwise, - // the offset minus the length of scalars16 is an offset - // into scalars32. - let offset = usize::from(trail_or_complex & 0xFFF); + debug_assert!(low_zeros); + // Only 12 of 14 bits used as of Unicode 16. + let offset = (((decomposition & !(0b11 << 30)) >> 16) as usize) - 1; + // Only 3 of 4 bits used as of Unicode 16. + let len_bits = decomposition & 0b1111; + let only_non_starters_in_trail = (decomposition & 0b10000) != 0; if offset < self.scalars16.len() { - let len = usize::from(trail_or_complex >> 13) + 2; + let len = (len_bits + 2) as usize; let (starter, tail) = self .scalars16 .get_subslice(offset..offset + len) @@ -1461,7 +1440,7 @@ where |(first, tail)| (char_from_u16(first), tail), ); c = starter; - if trail_or_complex & 0x1000 != 0 { + if only_non_starters_in_trail { for u in tail.iter() { let char_from_u = char_from_u16(u); let trie_value = self.trie.get(char_from_u); @@ -1498,7 +1477,7 @@ where } } } else { - let len = usize::from(trail_or_complex >> 13) + 1; + let len = (len_bits + 1) as usize; let offset32 = offset - self.scalars16.len(); let (starter, tail) = self .scalars32 @@ -1511,7 +1490,7 @@ where }); c = starter; - if trail_or_complex & 0x1000 != 0 { + if only_non_starters_in_trail { for ch in tail.iter() { let trie_value = self.trie.get(ch); let ccc = ccc_from_trie_value(trie_value); diff --git a/components/collator/tests/tests.rs b/components/collator/tests/tests.rs index 60722710f49..f8586852558 100644 --- a/components/collator/tests/tests.rs +++ b/components/collator/tests/tests.rs @@ -32,11 +32,11 @@ const _: () = { icu_normalizer_data::impl_canonical_compositions_v1_marker!(TestingProvider); icu_normalizer_data::impl_non_recursive_decomposition_supplement_v1_marker!(TestingProvider); - icu_normalizer_data::impl_canonical_decomposition_data_v1_marker!(TestingProvider); + icu_normalizer_data::impl_canonical_decomposition_data_v2_marker!(TestingProvider); icu_normalizer_data::impl_canonical_decomposition_tables_v1_marker!(TestingProvider); - icu_normalizer_data::impl_compatibility_decomposition_supplement_v1_marker!(TestingProvider); + icu_normalizer_data::impl_compatibility_decomposition_data_v2_marker!(TestingProvider); icu_normalizer_data::impl_compatibility_decomposition_tables_v1_marker!(TestingProvider); - icu_normalizer_data::impl_uts46_decomposition_supplement_v1_marker!(TestingProvider); + icu_normalizer_data::impl_uts46_decomposition_data_v2_marker!(TestingProvider); }; type StackString = arraystring::ArrayString; diff --git a/components/datetime/src/error.rs b/components/datetime/src/error.rs index dde16a054e9..7363d79072d 100644 --- a/components/datetime/src/error.rs +++ b/components/datetime/src/error.rs @@ -30,6 +30,12 @@ pub enum DateTimeFormatterLoadError { Data(DataError), } +impl From for DateTimeFormatterLoadError { + fn from(error: DataError) -> Self { + Self::Data(error) + } +} + /// An error from mixing calendar types in a formatter. #[derive(Display, Debug, Copy, Clone, PartialEq)] #[displaydoc("DateTimeFormatter for {this_kind} calendar was given a {date_kind:?} calendar")] diff --git a/components/datetime/src/neo.rs b/components/datetime/src/neo.rs index 988877f82a0..9b06d7e223b 100644 --- a/components/datetime/src/neo.rs +++ b/components/datetime/src/neo.rs @@ -21,7 +21,7 @@ use crate::MismatchedCalendarError; use core::fmt; use core::marker::PhantomData; use icu_calendar::any_calendar::IntoAnyCalendar; -use icu_calendar::{AnyCalendar, AnyCalendarPreferences}; +use icu_calendar::{AnyCalendar, AnyCalendarKind, AnyCalendarPreferences}; use icu_decimal::FixedDecimalFormatterPreferences; use icu_locale_core::preferences::extensions::unicode::keywords::{ CalendarAlgorithm, HourCycle, NumberingSystem, @@ -765,6 +765,36 @@ impl DateTimeFormatter { _calendar: PhantomData, }) } + + /// Returns the calendar system used in this formatter. + /// + /// # Examples + /// + /// ``` + /// use icu::calendar::AnyCalendarKind; + /// use icu::calendar::Date; + /// use icu::datetime::fieldsets::YMD; + /// use icu::datetime::DateTimeFormatter; + /// use icu::locale::locale; + /// use writeable::assert_writeable_eq; + /// + /// let formatter = DateTimeFormatter::try_new( + /// locale!("th").into(), + /// YMD::long(), + /// ) + /// .unwrap(); + /// + /// assert_writeable_eq!( + /// formatter.format_any_calendar(&Date::try_new_iso(2024, 12, 16).unwrap()), + /// "16 ธันวาคม 2567" + /// ); + /// + /// assert_eq!(formatter.calendar_kind(), AnyCalendarKind::Buddhist); + /// assert_eq!(formatter.calendar_kind().as_bcp47_string(), "buddhist"); + /// ``` + pub fn calendar_kind(&self) -> AnyCalendarKind { + self.calendar.kind() + } } /// A formatter optimized for time and time zone formatting. @@ -840,6 +870,9 @@ impl_display_with_writeable!(FormattedDateTime<'_>); impl FormattedDateTime<'_> { /// Gets the pattern used in this formatted value. + /// + /// From the pattern, one can check the properties of the included components, such as + /// the hour cycle being used for formatting. See [`DateTimePattern`]. pub fn pattern(&self) -> DateTimePattern { self.pattern.to_pattern() } diff --git a/components/datetime/src/pattern/pattern.rs b/components/datetime/src/pattern/pattern.rs index 1deb40e77bd..0888e31d0f6 100644 --- a/components/datetime/src/pattern/pattern.rs +++ b/components/datetime/src/pattern/pattern.rs @@ -75,6 +75,34 @@ size_test!(DateTimePattern, date_time_pattern_size, 32); /// assert_eq!(actual_components_bag, expected_components_bag); /// ``` /// +/// Check the hour cycle of a resolved pattern: +/// +/// ``` +/// use icu::calendar::Time; +/// use icu::datetime::fields::components; +/// use icu::datetime::fieldsets::T; +/// use icu::datetime::pattern::DateTimePattern; +/// use icu::datetime::TimeFormatter; +/// use icu::locale::locale; +/// use icu::locale::preferences::extensions::unicode::keywords::HourCycle; +/// use writeable::assert_writeable_eq; +/// +/// let pattern = TimeFormatter::try_new( +/// locale!("es-MX").into(), +/// T::medium(), +/// ) +/// .unwrap() +/// // The pattern can depend on the datetime being formatted. +/// .format(&Time::try_new(12, 0, 0, 0).unwrap()) +/// .pattern(); +/// +/// assert_writeable_eq!(pattern, "hh:mm:ss a"); +/// +/// // Get the hour cycle from the resolved components: +/// let components = components::Bag::from(&pattern); +/// assert_eq!(components.hour_cycle, Some(HourCycle::H12)); +/// ``` +/// /// [`DateTimeFormatter`]: crate::DateTimeFormatter /// [`FormattedDateTime::pattern`]: crate::FormattedDateTime::pattern /// [`TypedDateTimeNames`]: crate::pattern::TypedDateTimeNames diff --git a/components/decimal/src/format.rs b/components/decimal/src/format.rs index 967e65a85df..5c5a0d64860 100644 --- a/components/decimal/src/format.rs +++ b/components/decimal/src/format.rs @@ -4,11 +4,16 @@ //! Lower-level types for decimal formatting. +use core::fmt::Write; + use crate::grouper; use crate::options::*; +use crate::parts; use crate::provider::*; use fixed_decimal::Sign; use fixed_decimal::SignedFixedDecimal; +use writeable::Part; +use writeable::PartsWrite; use writeable::Writeable; /// An intermediate structure returned by [`FixedDecimalFormatter`](crate::FixedDecimalFormatter). @@ -23,43 +28,74 @@ pub struct FormattedFixedDecimal<'l> { impl FormattedFixedDecimal<'_> { /// Returns the affixes needed for the current sign, as (prefix, suffix) - fn get_affixes(&self) -> Option<(&str, &str)> { + fn get_affixes(&self) -> Option<(Part, (&str, &str))> { match self.value.sign() { Sign::None => None, - Sign::Negative => Some(self.symbols.minus_sign_affixes()), - Sign::Positive => Some(self.symbols.plus_sign_affixes()), + Sign::Negative => Some((parts::MINUS_SIGN, self.symbols.minus_sign_affixes())), + Sign::Positive => Some((parts::PLUS_SIGN, self.symbols.plus_sign_affixes())), } } } impl Writeable for FormattedFixedDecimal<'_> { - fn write_to(&self, sink: &mut W) -> core::result::Result<(), core::fmt::Error> + fn write_to_parts(&self, w: &mut W) -> core::result::Result<(), core::fmt::Error> where - W: core::fmt::Write + ?Sized, + W: writeable::PartsWrite + ?Sized, { let affixes = self.get_affixes(); - if let Some(affixes) = affixes { - sink.write_str(affixes.0)?; + if let Some((part, affixes)) = affixes { + w.with_part(part, |w| w.write_str(affixes.0))?; } let range = self.value.absolute.magnitude_range(); let upper_magnitude = *range.end(); - for m in range.rev() { - if m == -1 { - sink.write_str(self.symbols.decimal_separator())?; - } - #[allow(clippy::indexing_slicing)] // digit_at in 0..=9 - sink.write_char(self.digits.digits[self.value.digit_at(m) as usize])?; - if grouper::check( - upper_magnitude, - m, - self.options.grouping_strategy, - &self.symbols.grouping_sizes, - ) { - sink.write_str(self.symbols.grouping_separator())?; + let mut range = range.rev(); + let mut has_fraction = false; + w.with_part(parts::INTEGER, |w| { + loop { + let m = match range.next() { + Some(m) if m < 0 => { + has_fraction = true; + break Ok(()); + } + Some(m) => m, + None => { + break Ok(()); + } + }; + #[allow(clippy::indexing_slicing)] // digit_at in 0..=9 + w.write_char(self.digits.digits[self.value.digit_at(m) as usize])?; + if grouper::check( + upper_magnitude, + m, + self.options.grouping_strategy, + &self.symbols.grouping_sizes, + ) { + w.with_part(parts::GROUP, |w| { + w.write_str(self.symbols.grouping_separator()) + })?; + } } + })?; + if has_fraction { + w.with_part(parts::DECIMAL, |w| { + w.write_str(self.symbols.decimal_separator()) + })?; + w.with_part(parts::FRACTION, |w| { + let mut m = -1; // read in the previous loop + loop { + #[allow(clippy::indexing_slicing)] // digit_at in 0..=9 + w.write_char(self.digits.digits[self.value.digit_at(m) as usize])?; + m = match range.next() { + Some(m) => m, + None => { + break Ok(()); + } + }; + } + })?; } - if let Some(affixes) = affixes { - sink.write_str(affixes.1)?; + if let Some((part, affixes)) = affixes { + w.with_part(part, |w| w.write_str(affixes.1))?; } Ok(()) } diff --git a/components/decimal/src/lib.rs b/components/decimal/src/lib.rs index 49f706aa8d8..ddf961fc100 100644 --- a/components/decimal/src/lib.rs +++ b/components/decimal/src/lib.rs @@ -97,6 +97,7 @@ extern crate alloc; mod format; mod grouper; pub mod options; +pub mod parts; pub mod provider; pub(crate) mod size_test_macro; @@ -104,6 +105,7 @@ pub use format::FormattedFixedDecimal; use alloc::string::String; use fixed_decimal::SignedFixedDecimal; +use icu_locale_core::extensions::unicode::Value; use icu_locale_core::locale; use icu_locale_core::preferences::{ define_preferences, extensions::unicode::keywords::NumberingSystem, @@ -214,4 +216,47 @@ impl FixedDecimalFormatter { pub fn format_to_string(&self, value: &SignedFixedDecimal) -> String { self.format(value).write_to_string().into_owned() } + + /// Gets the resolved numbering system identifier of this formatter. + /// + /// # Examples + /// + /// ``` + /// use icu::decimal::FixedDecimalFormatter; + /// use icu::locale::locale; + /// + /// let fmt_en = FixedDecimalFormatter::try_new( + /// locale!("en").into(), + /// Default::default() + /// ) + /// .unwrap(); + /// + /// let fmt_bn = FixedDecimalFormatter::try_new( + /// locale!("bn").into(), + /// Default::default() + /// ) + /// .unwrap(); + /// + /// let fmt_zh_nu = FixedDecimalFormatter::try_new( + /// locale!("zh-u-nu-hanidec").into(), + /// Default::default() + /// ) + /// .unwrap(); + /// + /// assert_eq!(fmt_en.numbering_system(), "latn"); + /// assert_eq!(fmt_bn.numbering_system(), "beng"); + /// assert_eq!(fmt_zh_nu.numbering_system(), "hanidec"); + /// ``` + pub fn numbering_system(&self) -> Value { + match Value::try_from_str(self.symbols.get().numsys()) { + Ok(v) => v, + Err(e) => { + debug_assert!( + false, + "Problem converting numbering system ID to Value: {e}" + ); + Value::new_empty() + } + } + } } diff --git a/components/decimal/src/parts.rs b/components/decimal/src/parts.rs new file mode 100644 index 00000000000..6aa21a16ae1 --- /dev/null +++ b/components/decimal/src/parts.rs @@ -0,0 +1,75 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Parts of a formatted decimal. +//! +//! # Examples +//! +//! ``` +//! use icu::calendar::Gregorian; +//! use icu::calendar::{Date, Time}; +//! use icu::decimal::parts; +//! use icu::decimal::FixedDecimalFormatter; +//! use icu::locale::locale; +//! use writeable::assert_writeable_parts_eq; +//! +//! let dtf = FixedDecimalFormatter::try_new( +//! locale!("en").into(), +//! Default::default(), +//! ) +//! .unwrap(); +//! +//! let fixed_decimal = "-987654.321".parse().unwrap(); +//! +//! // Missing data is filled in on a best-effort basis, and an error is signaled. +//! assert_writeable_parts_eq!( +//! dtf.format(&fixed_decimal), +//! "-987,654.321", +//! [ +//! (0, 1, parts::MINUS_SIGN), +//! (1, 8, parts::INTEGER), +//! (4, 5, parts::GROUP), +//! (8, 9, parts::DECIMAL), +//! (9, 12, parts::FRACTION), +//! ] +//! ); +//! ``` + +use writeable::Part; + +/// A [`Part`] used by [`FormattedFixedDecimal`](super::FormattedFixedDecimal). +pub const PLUS_SIGN: Part = Part { + category: "decimal", + value: "plusSign", +}; + +/// A [`Part`] used by [`FormattedFixedDecimal`](super::FormattedFixedDecimal). +pub const MINUS_SIGN: Part = Part { + category: "decimal", + value: "minusSign", +}; + +/// A [`Part`] used by [`FormattedFixedDecimal`](super::FormattedFixedDecimal). +pub const INTEGER: Part = Part { + category: "decimal", + value: "integer", +}; + +/// A [`Part`] used by [`FormattedFixedDecimal`](super::FormattedFixedDecimal). +pub const FRACTION: Part = Part { + category: "decimal", + value: "fraction", +}; + +/// A [`Part`] used by [`FormattedFixedDecimal`](super::FormattedFixedDecimal). +pub const GROUP: Part = Part { + category: "decimal", + value: "group", +}; + +/// A [`Part`] used by [`FormattedFixedDecimal`](super::FormattedFixedDecimal). +pub const DECIMAL: Part = Part { + category: "decimal", + value: "decimal", +}; diff --git a/components/experimental/src/dimension/units/format.rs b/components/experimental/src/dimension/units/format.rs index a39ab1cac8f..0b9c81717d9 100644 --- a/components/experimental/src/dimension/units/format.rs +++ b/components/experimental/src/dimension/units/format.rs @@ -21,15 +21,15 @@ pub struct FormattedUnit<'l> { } impl Writeable for FormattedUnit<'_> { - fn write_to(&self, sink: &mut W) -> core::result::Result<(), core::fmt::Error> + fn write_to_parts(&self, sink: &mut W) -> core::result::Result<(), core::fmt::Error> where - W: core::fmt::Write + ?Sized, + W: writeable::PartsWrite + ?Sized, { self.display_name .patterns .get(self.value.into(), self.plural_rules) .interpolate((self.fixed_decimal_formatter.format(self.value),)) - .write_to(sink) + .write_to_parts(sink) } } diff --git a/components/experimental/src/duration/format.rs b/components/experimental/src/duration/format.rs index b3b3e851240..925ec395c43 100644 --- a/components/experimental/src/duration/format.rs +++ b/components/experimental/src/duration/format.rs @@ -710,19 +710,28 @@ mod tests { [ (0, 6, parts::YEAR), (0, 6, icu_list::parts::ELEMENT), + (0, 1, icu_decimal::parts::MINUS_SIGN), + (1, 2, icu_decimal::parts::INTEGER), (6, 8, icu_list::parts::LITERAL), (8, 14, parts::MONTH), (8, 14, icu_list::parts::ELEMENT), + (8, 9, icu_decimal::parts::INTEGER), (14, 16, icu_list::parts::LITERAL), (16, 21, parts::WEEK), (16, 21, icu_list::parts::ELEMENT), + (16, 17, icu_decimal::parts::INTEGER), (21, 23, icu_list::parts::LITERAL), (23, 37, icu_list::parts::ELEMENT), + (23, 25, icu_decimal::parts::INTEGER), (23, 25, parts::HOUR), (25, 26, parts::LITERAL), + (26, 28, icu_decimal::parts::INTEGER), (26, 28, parts::MINUTE), (28, 29, parts::LITERAL), - (29, 37, parts::SECOND) + (29, 37, parts::SECOND), + (29, 31, icu_decimal::parts::INTEGER), + (31, 32, icu_decimal::parts::DECIMAL), + (32, 37, icu_decimal::parts::FRACTION), ] ); } diff --git a/components/experimental/src/transliterate/compile/mod.rs b/components/experimental/src/transliterate/compile/mod.rs index 00fa02465f4..d9daf1d8eac 100644 --- a/components/experimental/src/transliterate/compile/mod.rs +++ b/components/experimental/src/transliterate/compile/mod.rs @@ -69,7 +69,7 @@ impl Direction { /// {$AE} [:Lowercase:] → Ae; /// {$OE} [:Lowercase:] → Oe; /// {$UE} [:Lowercase:] → Ue; -/// +/// /// $AE → AE; /// $OE → OE; /// $UE → UE; @@ -232,8 +232,8 @@ impl RuleCollection { + DataProvider + DataProvider, NP: ?Sized - + DataProvider - + DataProvider + + DataProvider + + DataProvider + DataProvider + DataProvider + DataProvider, @@ -414,8 +414,8 @@ macro_rules! redirect { } redirect!( - CanonicalDecompositionDataV1Marker, - CompatibilityDecompositionSupplementV1Marker, + CanonicalDecompositionDataV2Marker, + CompatibilityDecompositionDataV2Marker, CanonicalDecompositionTablesV1Marker, CompatibilityDecompositionTablesV1Marker, CanonicalCompositionsV1Marker diff --git a/components/experimental/src/transliterate/transliterator/mod.rs b/components/experimental/src/transliterate/transliterator/mod.rs index 5cda2dac650..d6b3adc13aa 100644 --- a/components/experimental/src/transliterate/transliterator/mod.rs +++ b/components/experimental/src/transliterate/transliterator/mod.rs @@ -51,7 +51,7 @@ struct ComposingTransliterator(ComposingNormalizer); impl ComposingTransliterator { fn try_nfc

(provider: &P) -> Result where - P: DataProvider + P: DataProvider + DataProvider + DataProvider + ?Sized, @@ -63,8 +63,7 @@ impl ComposingTransliterator { fn try_nfkc

(provider: &P) -> Result where - P: DataProvider - + DataProvider + P: DataProvider + DataProvider + DataProvider + DataProvider @@ -90,7 +89,7 @@ struct DecomposingTransliterator(DecomposingNormalizer); impl DecomposingTransliterator { fn try_nfd

(provider: &P) -> Result where - P: DataProvider + P: DataProvider + DataProvider + ?Sized, { @@ -101,8 +100,7 @@ impl DecomposingTransliterator { fn try_nfkd

(provider: &P) -> Result where - P: DataProvider - + DataProvider + P: DataProvider + DataProvider + DataProvider + ?Sized, @@ -279,8 +277,8 @@ impl Transliterator { ) -> Result where PT: DataProvider + ?Sized, - PN: DataProvider - + DataProvider + PN: DataProvider + + DataProvider + DataProvider + DataProvider + DataProvider @@ -391,8 +389,8 @@ impl Transliterator { ) -> Result where PT: DataProvider + ?Sized, - PN: DataProvider - + DataProvider + PN: DataProvider + + DataProvider + DataProvider + DataProvider + DataProvider @@ -415,8 +413,8 @@ impl Transliterator { ) -> Result where PT: DataProvider + ?Sized, - PN: DataProvider - + DataProvider + PN: DataProvider + + DataProvider + DataProvider + DataProvider + DataProvider @@ -451,8 +449,8 @@ impl Transliterator { ) -> Result, DataError> where PT: DataProvider + ?Sized, - PN: DataProvider - + DataProvider + PN: DataProvider + + DataProvider + DataProvider + DataProvider + DataProvider @@ -500,8 +498,8 @@ impl Transliterator { normalizer_provider: &P, ) -> Option> where - P: DataProvider - + DataProvider + P: DataProvider + + DataProvider + DataProvider + DataProvider + DataProvider diff --git a/components/locale_core/src/extensions/unicode/value.rs b/components/locale_core/src/extensions/unicode/value.rs index 02c2e9df21b..818659909a3 100644 --- a/components/locale_core/src/extensions/unicode/value.rs +++ b/components/locale_core/src/extensions/unicode/value.rs @@ -142,6 +142,19 @@ impl Value { self.0.len() } + /// Creates an empty [`Value`], which corresponds to a "true" value. + /// + /// # Examples + /// + /// ``` + /// use icu::locale::extensions::unicode::{value, Value}; + /// + /// assert_eq!(value!("true"), Value::new_empty()); + /// ``` + pub const fn new_empty() -> Self { + Self(ShortBoxSlice::new()) + } + /// Returns `true` if the Value has no subtags. /// /// # Examples diff --git a/components/normalizer/src/lib.rs b/components/normalizer/src/lib.rs index 56b14417afa..7e4b44a8405 100644 --- a/components/normalizer/src/lib.rs +++ b/components/normalizer/src/lib.rs @@ -87,10 +87,10 @@ pub mod provider; pub mod uts46; use crate::provider::CanonicalCompositionsV1; -use crate::provider::CanonicalDecompositionDataV1Marker; -use crate::provider::CompatibilityDecompositionSupplementV1Marker; -use crate::provider::DecompositionDataV1; -use crate::provider::Uts46DecompositionSupplementV1Marker; +use crate::provider::CanonicalDecompositionDataV2Marker; +use crate::provider::CompatibilityDecompositionDataV2Marker; +use crate::provider::DecompositionDataV2; +use crate::provider::Uts46DecompositionDataV2Marker; use alloc::borrow::Cow; use alloc::string::String; use alloc::vec::Vec; @@ -106,7 +106,6 @@ use icu_provider::prelude::*; use provider::CanonicalCompositionsV1Marker; use provider::CanonicalDecompositionTablesV1Marker; use provider::CompatibilityDecompositionTablesV1Marker; -use provider::DecompositionSupplementV1; use provider::DecompositionTablesV1; use smallvec::SmallVec; use utf16_iter::Utf16CharsEx; @@ -123,21 +122,6 @@ struct CanonicalCombiningClass(pub(crate) u8); const CCC_NOT_REORDERED: CanonicalCombiningClass = ccc!(NotReordered, 0); const CCC_ABOVE: CanonicalCombiningClass = ccc!(Above, 230); -#[derive(Debug)] -enum SupplementPayloadHolder { - Compatibility(DataPayload), - Uts46(DataPayload), -} - -impl SupplementPayloadHolder { - fn get(&self) -> &DecompositionSupplementV1 { - match self { - SupplementPayloadHolder::Compatibility(d) => d.get(), - SupplementPayloadHolder::Uts46(d) => d.get(), - } - } -} - /// Treatment of the ignorable marker (0xFFFFFFFF) in data. #[derive(Debug, PartialEq, Eq)] enum IgnorableBehavior { @@ -150,79 +134,65 @@ enum IgnorableBehavior { ReplacementCharacter, } -/// Number of iterations allowed on the fast path before flushing. -/// Since a typical UTF-16 iteration advances over a 2-byte BMP -/// character, this means two memory pages. -/// Intel Core i7-4770 had the best results between 2 and 4 pages -/// when testing powers of two. Apple M1 didn't seem to care -/// about 1, 2, 4, or 8 pages. -/// -/// Curiously, the `str` case does not appear to benefit from -/// similar flushing, though the tested monomorphization never -/// passes an error through from `Write`. -const UTF16_FAST_PATH_FLUSH_THRESHOLD: usize = 4096; - /// Marker for UTS 46 ignorables. +/// +/// See trie-value-format.md const IGNORABLE_MARKER: u32 = 0xFFFFFFFF; -/// Marker for starters that decompose to themselves but may -/// combine backwards under canonical composition. -/// (Main trie only; not used in the supplementary trie.) -const BACKWARD_COMBINING_STARTER_MARKER: u32 = 1; - -/// Magic marker trie value for characters whose decomposition -/// starts with a non-starter. The actual decomposition is -/// hard-coded. -const SPECIAL_NON_STARTER_DECOMPOSITION_MARKER: u32 = 2; - -/// `u16` version of the previous marker value. -const SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16: u16 = 2; +/// Marker that the decomposition does not round trip via NFC. +/// +/// See trie-value-format.md +const NON_ROUND_TRIP_MARKER: u32 = 1 << 30; -/// Marker that a complex decomposition isn't round-trippable -/// under re-composition. +/// Marker that the first character of the decomposition +/// can combine backwards. /// -/// TODO: When taking a data format break, swap this and -/// `BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER` around -/// to make backward-combiningness use the same bit in all -/// cases. -const NON_ROUND_TRIP_MARKER: u16 = 0b1; +/// See trie-value-format.md +const BACKWARD_COMBINING_MARKER: u32 = 1 << 31; -/// Marker that a complex decomposition starts with a starter -/// that can combine backwards. -const BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER: u16 = 0b10; +/// Mask for the bits have to be zero for this to be a BMP +/// singleton decomposition, or value baked into the surrogate +/// range. +/// +/// See trie-value-format.md +const HIGH_ZEROS_MASK: u32 = 0x3FFF0000; -/// Values above this are treated as a BMP character. -const HIGHEST_MARKER: u16 = NON_ROUND_TRIP_MARKER | BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER; +/// Mask for the bits have to be zero for this to be a complex +/// decomposition. +/// +/// See trie-value-format.md +const LOW_ZEROS_MASK: u32 = 0xFFE0; /// Checks if a trie value carries a (non-zero) canonical /// combining class. +/// +/// See trie-value-format.md fn trie_value_has_ccc(trie_value: u32) -> bool { - (trie_value & 0xFFFFFF00) == 0xD800 + (trie_value & 0x3FFFFE00) == 0xD800 } /// Checks if the trie signifies a special non-starter decomposition. +/// +/// See trie-value-format.md fn trie_value_indicates_special_non_starter_decomposition(trie_value: u32) -> bool { - trie_value == SPECIAL_NON_STARTER_DECOMPOSITION_MARKER + (trie_value & 0x3FFFFF00) == 0xD900 } /// Checks if a trie value signifies a character whose decomposition /// starts with a non-starter. +/// +/// See trie-value-format.md fn decomposition_starts_with_non_starter(trie_value: u32) -> bool { trie_value_has_ccc(trie_value) - || trie_value_indicates_special_non_starter_decomposition(trie_value) } /// Extracts a canonical combining class (possibly zero) from a trie value. /// -/// # Panics -/// -/// The trie value must not be one that signifies a special non-starter -/// decomposition. (Debug-only) +/// See trie-value-format.md fn ccc_from_trie_value(trie_value: u32) -> CanonicalCombiningClass { if trie_value_has_ccc(trie_value) { CanonicalCombiningClass(trie_value as u8) } else { - debug_assert_ne!(trie_value, SPECIAL_NON_STARTER_DECOMPOSITION_MARKER); CCC_NOT_REORDERED } } @@ -234,8 +204,11 @@ static FDFA_NFKD: [u16; 17] = [ 0x633, 0x644, 0x645, ]; -/// Marker value for U+FDFA in NFKD -const FDFA_MARKER: u16 = 3; +/// Marker value for U+FDFA in NFKD. (Unified with Hangul syllable marker, +/// but they differ by `NON_ROUND_TRIP_MARKER`.) +/// +/// See trie-value-format.md +const FDFA_MARKER: u16 = 1; // These constants originate from page 143 of Unicode 14.0 /// Syllable base @@ -297,11 +270,6 @@ fn in_inclusive_range(c: char, start: char, end: char) -> bool { u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start)) } -#[inline(always)] -fn in_inclusive_range32(u: u32, start: u32, end: u32) -> bool { - u.wrapping_sub(start) <= (end - start) -} - #[inline(always)] fn in_inclusive_range16(u: u16, start: u16, end: u16) -> bool { u.wrapping_sub(start) <= (end - start) @@ -371,6 +339,20 @@ fn compose_non_hangul(mut iter: Char16TrieIterator, starter: char, second: char) } } +/// See trie-value-format.md +#[inline(always)] +fn starter_and_decomposes_to_self_impl(trie_val: u32) -> bool { + // The REPLACEMENT CHARACTER has `NON_ROUND_TRIP_MARKER` set, + // and this function needs to ignore that. + (trie_val & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0 +} + +/// See trie-value-format.md +#[inline(always)] +fn potential_passthrough_and_cannot_combine_backwards_impl(trie_val: u32) -> bool { + (trie_val & (NON_ROUND_TRIP_MARKER | BACKWARD_COMBINING_MARKER)) == 0 +} + /// Struct for holding together a character and the value /// looked up for it from the NFD trie in a more explicit /// way than an anonymous pair. @@ -378,8 +360,8 @@ fn compose_non_hangul(mut iter: Char16TrieIterator, starter: char, second: char) #[derive(Debug, PartialEq, Eq)] struct CharacterAndTrieValue { character: char, + /// See trie-value-format.md trie_val: u32, - from_supplement: bool, } impl CharacterAndTrieValue { @@ -388,84 +370,38 @@ impl CharacterAndTrieValue { CharacterAndTrieValue { character: c, trie_val: trie_value, - from_supplement: false, } } + #[inline(always)] - pub fn new_from_supplement(c: char, trie_value: u32) -> Self { - CharacterAndTrieValue { - character: c, - trie_val: trie_value, - from_supplement: true, - } + pub fn starter_and_decomposes_to_self(&self) -> bool { + starter_and_decomposes_to_self_impl(self.trie_val) } + + /// See trie-value-format.md #[inline(always)] - pub fn starter_and_decomposes_to_self(&self) -> bool { - if self.trie_val > BACKWARD_COMBINING_STARTER_MARKER { - return false; - } - // Hangul syllables get 0 as their trie value - u32::from(self.character).wrapping_sub(HANGUL_S_BASE) >= HANGUL_S_COUNT + pub fn starter_and_decomposes_to_self_except_replacement(&self) -> bool { + // This intentionally leaves `NON_ROUND_TRIP_MARKER` in the value + // to be compared with zero. U+FFFD has that flag set despite really + // being being round-tripping in order to make UTF-8 errors + // ineligible for passthrough. + (self.trie_val & !BACKWARD_COMBINING_MARKER) == 0 } + + /// See trie-value-format.md #[inline(always)] pub fn can_combine_backwards(&self) -> bool { - decomposition_starts_with_non_starter(self.trie_val) - || self.trie_val == BACKWARD_COMBINING_STARTER_MARKER - || (((self.trie_val as u16) & !1) == BACKWARD_COMBINING_STARTER_DECOMPOSITION_MARKER && (self.trie_val >> 16) != 0) // Combine with the previous condition when taking a data format break - || in_inclusive_range32(self.trie_val, 0x1161, 0x11C2) + (self.trie_val & BACKWARD_COMBINING_MARKER) != 0 } + /// See trie-value-format.md #[inline(always)] pub fn potential_passthrough(&self) -> bool { - self.potential_passthrough_impl(BACKWARD_COMBINING_STARTER_MARKER) + (self.trie_val & NON_ROUND_TRIP_MARKER) == 0 } + /// See trie-value-format.md #[inline(always)] pub fn potential_passthrough_and_cannot_combine_backwards(&self) -> bool { - self.potential_passthrough_impl(0) - } - #[inline(always)] - fn potential_passthrough_impl(&self, bound: u32) -> bool { - // This methods looks badly branchy, but most characters - // take the first return. - if self.trie_val <= bound { - return true; - } - if self.from_supplement { - return false; - } - let trail_or_complex = (self.trie_val >> 16) as u16; - if trail_or_complex == 0 { - return false; - } - let lead = self.trie_val as u16; - if lead == 0 { - return true; - } - if lead <= HIGHEST_MARKER { - return false; - } - if (trail_or_complex & 0x7F) == 0x3C - && in_inclusive_range16(trail_or_complex, 0x0900, 0x0BFF) - { - // Nukta - return false; - } - if in_inclusive_range(self.character, '\u{FB1D}', '\u{FB4E}') { - // Hebrew presentation forms - return false; - } - if in_inclusive_range(self.character, '\u{1F71}', '\u{1FFB}') { - // Polytonic Greek with oxia - return false; - } - // To avoid more branchiness, 4 characters that decompose to - // a BMP starter followed by a BMP non-starter are excluded - // from being encoded directly into the trie value and are - // handled as complex decompositions instead. These are: - // U+0F76 TIBETAN VOWEL SIGN VOCALIC R - // U+0F78 TIBETAN VOWEL SIGN VOCALIC L - // U+212B ANGSTROM SIGN - // U+2ADC FORKING - true + potential_passthrough_and_cannot_combine_backwards_impl(self.trie_val) } } @@ -566,13 +502,12 @@ where // However, when `Decomposition` appears inside a `Composition`, this // may become a non-starter before `decomposing_next()` is called. pending: Option, // None at end of stream + // See trie-value-format.md trie: &'data CodePointTrie<'data, u32>, - supplementary_trie: Option<&'data CodePointTrie<'data, u32>>, scalars16: &'data ZeroSlice, scalars24: &'data ZeroSlice, supplementary_scalars16: &'data ZeroSlice, supplementary_scalars24: &'data ZeroSlice, - half_width_voicing_marks_become_non_starters: bool, /// The lowest character for which either of the following does /// not hold: /// 1. Decomposes to self. @@ -597,13 +532,12 @@ where #[doc(hidden)] // used in collator pub fn new( delegate: I, - decompositions: &'data DecompositionDataV1, + decompositions: &'data DecompositionDataV2, tables: &'data DecompositionTablesV1, ) -> Self { Self::new_with_supplements( delegate, decompositions, - None, tables, None, 0xC0, @@ -619,19 +553,12 @@ where /// there's a good reason to use this constructor directly. fn new_with_supplements( delegate: I, - decompositions: &'data DecompositionDataV1, - supplementary_decompositions: Option<&'data DecompositionSupplementV1>, + decompositions: &'data DecompositionDataV2, tables: &'data DecompositionTablesV1, supplementary_tables: Option<&'data DecompositionTablesV1>, decomposition_passthrough_bound: u8, ignorable_behavior: IgnorableBehavior, ) -> Self { - let half_width_voicing_marks_become_non_starters = - if let Some(supplementary) = supplementary_decompositions { - supplementary.half_width_voicing_marks_become_non_starters() - } else { - false - }; let mut ret = Decomposition:: { delegate, buffer: SmallVec::new(), // Normalized @@ -640,7 +567,6 @@ where // the real stream starts with a non-starter. pending: Some(CharacterAndTrieValue::new('\u{FFFF}', 0)), trie: &decompositions.trie, - supplementary_trie: supplementary_decompositions.map(|s| &s.trie), scalars16: &tables.scalars16, scalars24: &tables.scalars24, supplementary_scalars16: if let Some(supplementary) = supplementary_tables { @@ -653,7 +579,6 @@ where } else { EMPTY_CHAR }, - half_width_voicing_marks_become_non_starters, decomposition_passthrough_bound: u32::from(decomposition_passthrough_bound), ignorable_behavior, }; @@ -663,11 +588,11 @@ where fn push_decomposition16( &mut self, - low: u16, offset: usize, + len: usize, + only_non_starters_in_trail: bool, slice16: &ZeroSlice, ) -> (char, usize) { - let len = usize::from(low >> 13) + 2; let (starter, tail) = slice16 .get_subslice(offset..offset + len) .and_then(|slice| slice.split_first()) @@ -679,7 +604,7 @@ where }, |(first, trail)| (char_from_u16(first), trail), ); - if low & 0x1000 != 0 { + if only_non_starters_in_trail { // All the rest are combining self.buffer.extend( tail.iter() @@ -708,11 +633,11 @@ where fn push_decomposition32( &mut self, - low: u16, offset: usize, + len: usize, + only_non_starters_in_trail: bool, slice32: &ZeroSlice, ) -> (char, usize) { - let len = usize::from(low >> 13) + 1; let (starter, tail) = slice32 .get_subslice(offset..offset + len) .and_then(|slice| slice.split_first()) @@ -721,7 +646,7 @@ where debug_assert!(false); (REPLACEMENT_CHARACTER, EMPTY_CHAR) }); - if low & 0x1000 != 0 { + if only_non_starters_in_trail { // All the rest are combining self.buffer .extend(tail.iter().map(CharacterAndClass::new_with_placeholder)); @@ -747,75 +672,39 @@ where #[inline(always)] fn attach_trie_value(&self, c: char) -> CharacterAndTrieValue { - if let Some(supplementary) = self.supplementary_trie { - if let Some(value) = self.attach_supplementary_trie_value(c, supplementary) { - return value; - } - } - CharacterAndTrieValue::new(c, self.trie.get(c)) } - #[inline(never)] - fn attach_supplementary_trie_value( - &self, - c: char, - supplementary: &CodePointTrie, - ) -> Option { - let voicing_mark = u32::from(c).wrapping_sub(0xFF9E); - if voicing_mark <= 1 && self.half_width_voicing_marks_become_non_starters { - return Some(CharacterAndTrieValue::new( - if voicing_mark == 0 { - '\u{3099}' - } else { - '\u{309A}' - }, - 0xD800 | ccc!(KanaVoicing, 8).0 as u32, - )); - } - let trie_value = supplementary.get32(u32::from(c)); - if trie_value != 0 { - return Some(CharacterAndTrieValue::new_from_supplement(c, trie_value)); - } - None - } - fn delegate_next_no_pending(&mut self) -> Option { debug_assert!(self.pending.is_none()); loop { let c = self.delegate.next()?; - // TODO(#2384): Measure if this check is actually an optimization even in the - // non-supplementary case of if this should go inside the supplementary - // `if` below. + // TODO(#2384): Measure if this check is actually an optimization. if u32::from(c) < self.decomposition_passthrough_bound { return Some(CharacterAndTrieValue::new(c, 0)); } - if let Some(supplementary) = self.supplementary_trie { - if let Some(value) = self.attach_supplementary_trie_value(c, supplementary) { - if value.trie_val == IGNORABLE_MARKER { - match self.ignorable_behavior { - IgnorableBehavior::Unsupported => { - debug_assert!(false); - } - IgnorableBehavior::ReplacementCharacter => { - return Some(CharacterAndTrieValue::new( - c, - u32::from(REPLACEMENT_CHARACTER), - )); - } - IgnorableBehavior::Ignored => { - // Else ignore this character by reading the next one from the delegate. - continue; - } - } + let trie_val = self.trie.get(c); + // TODO: Can we do something better about the cost of this branch in the + // non-UTS 46 case? + if trie_val == IGNORABLE_MARKER { + match self.ignorable_behavior { + IgnorableBehavior::Unsupported => { + debug_assert!(false); + } + IgnorableBehavior::ReplacementCharacter => { + return Some(CharacterAndTrieValue::new( + c, + u32::from(REPLACEMENT_CHARACTER) | NON_ROUND_TRIP_MARKER, + )); + } + IgnorableBehavior::Ignored => { + // Else ignore this character by reading the next one from the delegate. + continue; } - return Some(value); } } - let trie_val = self.trie.get(c); - debug_assert_ne!(trie_val, IGNORABLE_MARKER); return Some(CharacterAndTrieValue::new(c, trie_val)); } } @@ -835,35 +724,76 @@ where fn decomposing_next(&mut self, c_and_trie_val: CharacterAndTrieValue) -> char { let (starter, combining_start) = { let c = c_and_trie_val.character; - let hangul_offset = u32::from(c).wrapping_sub(HANGUL_S_BASE); // SIndex in the spec - if hangul_offset >= HANGUL_S_COUNT { - let decomposition = c_and_trie_val.trie_val; - if decomposition <= BACKWARD_COMBINING_STARTER_MARKER { - // The character is its own decomposition - (c, 0) - } else { - let trail_or_complex = (decomposition >> 16) as u16; - let lead = decomposition as u16; - if lead > HIGHEST_MARKER && trail_or_complex != 0 { - // Decomposition into two BMP characters: starter and non-starter - let starter = char_from_u16(lead); - let combining = char_from_u16(trail_or_complex); - self.buffer - .push(CharacterAndClass::new_with_placeholder(combining)); - (starter, 0) - } else if trail_or_complex == 0 { - if lead != FDFA_MARKER { - debug_assert_ne!( - lead, SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16, - "Should not reach this point with non-starter marker" - ); + // See trie-value-format.md + let decomposition = c_and_trie_val.trie_val; + // The REPLACEMENT CHARACTER has `NON_ROUND_TRIP_MARKER` set, + // and that flag needs to be ignored here. + if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0 { + // The character is its own decomposition + (c, 0) + } else { + let high_zeros = (decomposition & HIGH_ZEROS_MASK) == 0; + let low_zeros = (decomposition & LOW_ZEROS_MASK) == 0; + if !high_zeros && !low_zeros { + // Decomposition into two BMP characters: starter and non-starter + let starter = char_from_u32(decomposition & 0x7FFF); + let combining = char_from_u32((decomposition >> 15) & 0x7FFF); + self.buffer + .push(CharacterAndClass::new_with_placeholder(combining)); + (starter, 0) + } else if high_zeros { + // Do the check by looking at `c` instead of looking at a marker + // in `singleton` below, because if we looked at the trie value, + // we'd still have to check that `c` is in the Hangul syllable + // range in order for the subsequent interpretations as `char` + // to be safe. + // Alternatively, `FDFA_MARKER` and the Hangul marker could + // be unified. That would add a branch for Hangul and remove + // a branch from singleton decompositions. It seems more + // important to favor Hangul syllables than singleton + // decompositions. + // Note that it would be valid to hoist this Hangul check + // one or even two steps earlier in this check hierarchy. + // Right now, it's assumed the kind of decompositions into + // BMP starter and non-starter, which occur in many languages, + // should be checked before Hangul syllables, which are about + // one language specifically. Hopefully, we get some + // instruction-level parallelism out of the disjointness of + // operations on `c` and `decomposition`. + let hangul_offset = u32::from(c).wrapping_sub(HANGUL_S_BASE); // SIndex in the spec + if hangul_offset < HANGUL_S_COUNT { + debug_assert_eq!(decomposition, 1); + // Hangul syllable + // The math here comes from page 144 of Unicode 14.0 + let l = hangul_offset / HANGUL_N_COUNT; + let v = (hangul_offset % HANGUL_N_COUNT) / HANGUL_T_COUNT; + let t = hangul_offset % HANGUL_T_COUNT; + + // The unsafe blocks here are OK, because the values stay + // within the Hangul jamo block and, therefore, the scalar + // value range by construction. + self.buffer.push(CharacterAndClass::new_starter(unsafe { + core::char::from_u32_unchecked(HANGUL_V_BASE + v) + })); + let first = unsafe { core::char::from_u32_unchecked(HANGUL_L_BASE + l) }; + if t != 0 { + self.buffer.push(CharacterAndClass::new_starter(unsafe { + core::char::from_u32_unchecked(HANGUL_T_BASE + t) + })); + (first, 2) + } else { + (first, 1) + } + } else { + let singleton = decomposition as u16; + if singleton != FDFA_MARKER { // Decomposition into one BMP character - let starter = char_from_u16(lead); + let starter = char_from_u16(singleton); (starter, 0) } else { // Special case for the NFKD form of U+FDFA. self.buffer.extend(FDFA_NFKD.map(|u| { - // Safe, because `FDFA_NFKD` is known not to contain + // SAFETY: `FDFA_NFKD` is known not to contain // surrogates. CharacterAndClass::new_starter(unsafe { core::char::from_u32_unchecked(u32::from(u)) @@ -871,75 +801,50 @@ where })); ('\u{0635}', 17) } - } else { - // Complex decomposition - // Format for 16-bit value: - // 15..13: length minus two for 16-bit case and length minus one for - // the 32-bit case. Length 8 needs to fit in three bits in - // the 16-bit case, and this way the value is future-proofed - // up to 9 in the 16-bit case. Zero is unused and length one - // in the 16-bit case goes directly into the trie. - // 12: 1 if all trailing characters are guaranteed non-starters, - // 0 if no guarantees about non-starterness. - // Note: The bit choice is this way around to allow for - // dynamically falling back to not having this but instead - // having one more bit for length by merely choosing - // different masks. - // 11..0: Start offset in storage. The offset is to the logical - // sequence of scalars16, scalars32, supplementary_scalars16, - // supplementary_scalars32. - let offset = usize::from(trail_or_complex & 0xFFF); - if offset < self.scalars16.len() { - self.push_decomposition16(trail_or_complex, offset, self.scalars16) - } else if offset < self.scalars16.len() + self.scalars24.len() { - self.push_decomposition32( - trail_or_complex, - offset - self.scalars16.len(), - self.scalars24, - ) - } else if offset - < self.scalars16.len() - + self.scalars24.len() - + self.supplementary_scalars16.len() - { - self.push_decomposition16( - trail_or_complex, - offset - (self.scalars16.len() + self.scalars24.len()), - self.supplementary_scalars16, - ) - } else { - self.push_decomposition32( - trail_or_complex, - offset - - (self.scalars16.len() - + self.scalars24.len() - + self.supplementary_scalars16.len()), - self.supplementary_scalars24, - ) - } } - } - } else { - // Hangul syllable - // The math here comes from page 144 of Unicode 14.0 - let l = hangul_offset / HANGUL_N_COUNT; - let v = (hangul_offset % HANGUL_N_COUNT) / HANGUL_T_COUNT; - let t = hangul_offset % HANGUL_T_COUNT; - - // The unsafe blocks here are OK, because the values stay - // within the Hangul jamo block and, therefore, the scalar - // value range by construction. - self.buffer.push(CharacterAndClass::new_starter(unsafe { - core::char::from_u32_unchecked(HANGUL_V_BASE + v) - })); - let first = unsafe { core::char::from_u32_unchecked(HANGUL_L_BASE + l) }; - if t != 0 { - self.buffer.push(CharacterAndClass::new_starter(unsafe { - core::char::from_u32_unchecked(HANGUL_T_BASE + t) - })); - (first, 2) } else { - (first, 1) + debug_assert!(low_zeros); + // Only 12 of 14 bits used as of Unicode 16. + let offset = (((decomposition & !(0b11 << 30)) >> 16) as usize) - 1; + // Only 3 of 4 bits used as of Unicode 16. + let len_bits = decomposition & 0b1111; + let only_non_starters_in_trail = (decomposition & 0b10000) != 0; + if offset < self.scalars16.len() { + self.push_decomposition16( + offset, + (len_bits + 2) as usize, + only_non_starters_in_trail, + self.scalars16, + ) + } else if offset < self.scalars16.len() + self.scalars24.len() { + self.push_decomposition32( + offset - self.scalars16.len(), + (len_bits + 1) as usize, + only_non_starters_in_trail, + self.scalars24, + ) + } else if offset + < self.scalars16.len() + + self.scalars24.len() + + self.supplementary_scalars16.len() + { + self.push_decomposition16( + offset - (self.scalars16.len() + self.scalars24.len()), + (len_bits + 2) as usize, + only_non_starters_in_trail, + self.supplementary_scalars16, + ) + } else { + self.push_decomposition32( + offset + - (self.scalars16.len() + + self.scalars24.len() + + self.supplementary_scalars16.len()), + (len_bits + 1) as usize, + only_non_starters_in_trail, + self.supplementary_scalars24, + ) + } } } }; @@ -953,12 +858,15 @@ where // Not a `for` loop to avoid holding a mutable reference to `self` across // the loop body. while let Some(ch_and_trie_val) = self.delegate_next() { - if trie_value_has_ccc(ch_and_trie_val.trie_val) { - self.buffer - .push(CharacterAndClass::new_with_trie_value(ch_and_trie_val)); - } else if trie_value_indicates_special_non_starter_decomposition( + if !trie_value_has_ccc(ch_and_trie_val.trie_val) { + self.pending = Some(ch_and_trie_val); + break; + } else if !trie_value_indicates_special_non_starter_decomposition( ch_and_trie_val.trie_val, ) { + self.buffer + .push(CharacterAndClass::new_with_trie_value(ch_and_trie_val)); + } else { // The Tibetan special cases are starters that decompose into non-starters. let mapped = match ch_and_trie_val.character { '\u{0340}' => { @@ -997,6 +905,14 @@ where .push(CharacterAndClass::new('\u{0F71}', ccc!(CCC129, 129))); CharacterAndClass::new('\u{0F80}', ccc!(CCC130, 130)) } + '\u{FF9E}' => { + // HALFWIDTH KATAKANA VOICED SOUND MARK + CharacterAndClass::new('\u{3099}', ccc!(KanaVoicing, 8)) + } + '\u{FF9F}' => { + // HALFWIDTH KATAKANA VOICED SOUND MARK + CharacterAndClass::new('\u{309A}', ccc!(KanaVoicing, 8)) + } _ => { // GIGO case debug_assert!(false); @@ -1004,9 +920,6 @@ where } }; self.buffer.push(mapped); - } else { - self.pending = Some(ch_and_trie_val); - break; } } // Slicing succeeds by construction; we've always ensured that `combining_start` @@ -1640,8 +1553,7 @@ macro_rules! normalizer_methods { /// Borrowed version of a normalizer for performing decomposing normalization. #[derive(Debug)] pub struct DecomposingNormalizerBorrowed<'a> { - decompositions: &'a DecompositionDataV1<'a>, - supplementary_decompositions: Option<&'a DecompositionSupplementV1<'a>>, + decompositions: &'a DecompositionDataV2<'a>, tables: &'a DecompositionTablesV1<'a>, supplementary_tables: Option<&'a DecompositionTablesV1<'a>>, decomposition_passthrough_bound: u8, // never above 0xC0 @@ -1656,15 +1568,6 @@ impl DecomposingNormalizerBorrowed<'static> { pub const fn static_to_owned(self) -> DecomposingNormalizer { DecomposingNormalizer { decompositions: DataPayload::from_static_ref(self.decompositions), - supplementary_decompositions: if let Some(s) = self.supplementary_decompositions { - // `map` not available in const context - // TODO: Perhaps get rid of the holder enum, since we're just faking it here anyway. - Some(SupplementPayloadHolder::Compatibility( - DataPayload::from_static_ref(s), - )) - } else { - None - }, tables: DataPayload::from_static_ref(self.tables), supplementary_tables: if let Some(s) = self.supplementary_tables { // `map` not available in const context @@ -1697,8 +1600,7 @@ impl DecomposingNormalizerBorrowed<'static> { DecomposingNormalizerBorrowed { decompositions: - crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER, - supplementary_decompositions: None, + crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V2_MARKER, tables: crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_TABLES_V1_MARKER, supplementary_tables: None, decomposition_passthrough_bound: 0xC0, @@ -1731,28 +1633,28 @@ impl DecomposingNormalizerBorrowed<'static> { ); const _: () = assert!( - crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_SUPPLEMENT_V1_MARKER + crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_DATA_V2_MARKER .passthrough_cap <= 0x0300, "invalid" ); let decomposition_capped = - if crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_SUPPLEMENT_V1_MARKER + if crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_DATA_V2_MARKER .passthrough_cap < 0xC0 { - crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_SUPPLEMENT_V1_MARKER + crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_DATA_V2_MARKER .passthrough_cap } else { 0xC0 }; let composition_capped = - if crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_SUPPLEMENT_V1_MARKER + if crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_DATA_V2_MARKER .passthrough_cap < 0x0300 { - crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_SUPPLEMENT_V1_MARKER + crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_DATA_V2_MARKER .passthrough_cap } else { 0x0300 @@ -1760,10 +1662,7 @@ impl DecomposingNormalizerBorrowed<'static> { DecomposingNormalizerBorrowed { decompositions: - crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER, - supplementary_decompositions: Some( - crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_SUPPLEMENT_V1_MARKER, - ), + crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_DATA_V2_MARKER, tables: crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_TABLES_V1_MARKER, supplementary_tables: Some( crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_TABLES_V1_MARKER, @@ -1793,39 +1692,30 @@ impl DecomposingNormalizerBorrowed<'static> { ); const _: () = assert!( - crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_SUPPLEMENT_V1_MARKER - .passthrough_cap + crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_DATA_V2_MARKER.passthrough_cap <= 0x0300, "invalid" ); let decomposition_capped = - if crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_SUPPLEMENT_V1_MARKER - .passthrough_cap + if crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_DATA_V2_MARKER.passthrough_cap < 0xC0 { - crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_SUPPLEMENT_V1_MARKER - .passthrough_cap + crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_DATA_V2_MARKER.passthrough_cap } else { 0xC0 }; let composition_capped = - if crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_SUPPLEMENT_V1_MARKER - .passthrough_cap + if crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_DATA_V2_MARKER.passthrough_cap < 0x0300 { - crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_SUPPLEMENT_V1_MARKER - .passthrough_cap + crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_DATA_V2_MARKER.passthrough_cap } else { 0x0300 }; DecomposingNormalizerBorrowed { - decompositions: - crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER, - supplementary_decompositions: Some( - crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_SUPPLEMENT_V1_MARKER, - ), + decompositions: crate::provider::Baked::SINGLETON_UTS46_DECOMPOSITION_DATA_V2_MARKER, tables: crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_TABLES_V1_MARKER, supplementary_tables: Some( crate::provider::Baked::SINGLETON_COMPATIBILITY_DECOMPOSITION_TABLES_V1_MARKER, @@ -1843,7 +1733,6 @@ impl DecomposingNormalizerBorrowed<'_> { Decomposition::new_with_supplements( iter, self.decompositions, - self.supplementary_decompositions, self.tables, self.supplementary_tables, self.decomposition_passthrough_bound, @@ -1958,10 +1847,18 @@ impl DecomposingNormalizerBorrowed<'_> { // is an upcoming byte. let upcoming = decomposition.delegate.next().unwrap(); let upcoming_with_trie_value = decomposition.attach_trie_value(upcoming); - if upcoming_with_trie_value.starter_and_decomposes_to_self() { - if upcoming != REPLACEMENT_CHARACTER { - continue 'fast; - } + if upcoming_with_trie_value.starter_and_decomposes_to_self_except_replacement() { + // Note: The trie value of the REPLACEMENT CHARACTER is + // intentionally formatted to fail the + // `starter_and_decomposes_to_self` test even though it + // really is a starter that decomposes to self. This + // Allows moving the branch on REPLACEMENT CHARACTER + // below this `continue`. + continue 'fast; + } + + // TODO: Annotate as unlikely. + if upcoming == REPLACEMENT_CHARACTER { // We might have an error, so fall out of the fast path. // Since the U+FFFD might signify an error, we can't @@ -1979,6 +1876,7 @@ impl DecomposingNormalizerBorrowed<'_> { debug_assert!(decomposition.pending.is_none()); break 'fast; } + let consumed_so_far_slice = &pending_slice[..pending_slice.len() - decomposition.delegate.as_slice().len() - upcoming.len_utf8()]; @@ -2023,18 +1921,19 @@ impl DecomposingNormalizerBorrowed<'_> { as_slice, { let mut code_unit_iter = decomposition.delegate.as_slice().iter(); - // The purpose of the counter is to flush once in a while. If we flush - // too much, there is too much flushing overhead. If we flush too rarely, - // the flush starts reading from too far behind compared to the hot - // recently-read memory. - let mut counter = UTF16_FAST_PATH_FLUSH_THRESHOLD; 'fast: loop { - counter -= 1; if let Some(&upcoming_code_unit) = code_unit_iter.next() { let mut upcoming32 = u32::from(upcoming_code_unit); - if upcoming32 < decomposition_passthrough_bound && counter != 0 { + if upcoming32 < decomposition_passthrough_bound { continue 'fast; } + // We might be doing a trie lookup by surrogate. Surrogates get + // a decomposition to U+FFFD. + let mut trie_value = decomposition.trie.get32(upcoming32); + if starter_and_decomposes_to_self_impl(trie_value) { + continue 'fast; + } + // We might now be looking at a surrogate. // The loop is only broken out of as goto forward #[allow(clippy::never_loop)] 'surrogateloop: loop { @@ -2049,6 +1948,11 @@ impl DecomposingNormalizerBorrowed<'_> { if in_inclusive_range16(low, 0xDC00, 0xDFFF) { upcoming32 = (upcoming32 << 10) + u32::from(low) - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32); + // Successfully-paired surrogate. Read from the trie again. + trie_value = decomposition.trie.get32(upcoming32); + if starter_and_decomposes_to_self_impl(trie_value) { + continue 'fast; + } break 'surrogateloop; } else { code_unit_iter = iter_backup; @@ -2056,23 +1960,14 @@ impl DecomposingNormalizerBorrowed<'_> { } } // unpaired surrogate - let slice_to_write = &pending_slice - [..pending_slice.len() - code_unit_iter.as_slice().len() - 1]; - sink.write_slice(slice_to_write)?; - undecomposed_starter = - CharacterAndTrieValue::new(REPLACEMENT_CHARACTER, 0); - debug_assert!(decomposition.pending.is_none()); - // We could instead call `gather_and_sort_combining` and `continue 'outer`, but - // assuming this is better for code size. - break 'fast; + upcoming32 = 0xFFFD; // Safe value for `char::from_u32_unchecked` and matches later potential error check. + // trie_value already holds a decomposition to U+FFFD. + break 'surrogateloop; } - // Not unpaired surrogate + let upcoming = unsafe { char::from_u32_unchecked(upcoming32) }; - let upcoming_with_trie_value = - decomposition.attach_trie_value(upcoming); - if upcoming_with_trie_value.starter_and_decomposes_to_self() && counter != 0 { - continue 'fast; - } + let upcoming_with_trie_value = CharacterAndTrieValue::new(upcoming, trie_value); + let consumed_so_far_slice = &pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - upcoming.len_utf16()]; @@ -2114,8 +2009,7 @@ impl DecomposingNormalizerBorrowed<'_> { /// A normalizer for performing decomposing normalization. #[derive(Debug)] pub struct DecomposingNormalizer { - decompositions: DataPayload, - supplementary_decompositions: Option, + decompositions: DataPayload, tables: DataPayload, supplementary_tables: Option>, decomposition_passthrough_bound: u8, // never above 0xC0 @@ -2127,10 +2021,6 @@ impl DecomposingNormalizer { pub fn as_borrowed(&self) -> DecomposingNormalizerBorrowed { DecomposingNormalizerBorrowed { decompositions: self.decompositions.get(), - supplementary_decompositions: self - .supplementary_decompositions - .as_ref() - .map(|s| s.get()), tables: self.tables.get(), supplementary_tables: self.supplementary_tables.as_ref().map(|s| s.get()), decomposition_passthrough_bound: self.decomposition_passthrough_bound, @@ -2162,11 +2052,11 @@ impl DecomposingNormalizer { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_nfd)] pub fn try_new_nfd_unstable(provider: &D) -> Result where - D: DataProvider + D: DataProvider + DataProvider + ?Sized, { - let decompositions: DataPayload = + let decompositions: DataPayload = provider.load(Default::default())?.payload; let tables: DataPayload = provider.load(Default::default())?.payload; @@ -2182,13 +2072,21 @@ impl DecomposingNormalizer { .with_marker(CanonicalDecompositionTablesV1Marker::INFO)); } + let cap = decompositions.get().passthrough_cap; + if cap > 0x0300 { + return Err( + DataError::custom("invalid").with_marker(CanonicalDecompositionDataV2Marker::INFO) + ); + } + let decomposition_capped = cap.min(0xC0); + let composition_capped = cap.min(0x0300); + Ok(DecomposingNormalizer { decompositions, - supplementary_decompositions: None, tables, supplementary_tables: None, - decomposition_passthrough_bound: 0xC0, - composition_passthrough_bound: 0x0300, + decomposition_passthrough_bound: decomposition_capped as u8, + composition_passthrough_bound: composition_capped, }) } @@ -2216,17 +2114,13 @@ impl DecomposingNormalizer { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_nfkd)] pub fn try_new_nfkd_unstable(provider: &D) -> Result where - D: DataProvider - + DataProvider + D: DataProvider + DataProvider + DataProvider + ?Sized, { - let decompositions: DataPayload = + let decompositions: DataPayload = provider.load(Default::default())?.payload; - let supplementary_decompositions: DataPayload< - CompatibilityDecompositionSupplementV1Marker, - > = provider.load(Default::default())?.payload; let tables: DataPayload = provider.load(Default::default())?.payload; let supplementary_tables: DataPayload = @@ -2248,19 +2142,16 @@ impl DecomposingNormalizer { .with_marker(CanonicalDecompositionTablesV1Marker::INFO)); } - let cap = supplementary_decompositions.get().passthrough_cap; + let cap = decompositions.get().passthrough_cap; if cap > 0x0300 { return Err(DataError::custom("invalid") - .with_marker(CompatibilityDecompositionSupplementV1Marker::INFO)); + .with_marker(CompatibilityDecompositionDataV2Marker::INFO)); } let decomposition_capped = cap.min(0xC0); let composition_capped = cap.min(0x0300); Ok(DecomposingNormalizer { - decompositions, - supplementary_decompositions: Some(SupplementPayloadHolder::Compatibility( - supplementary_decompositions, - )), + decompositions: decompositions.cast(), tables, supplementary_tables: Some(supplementary_tables), decomposition_passthrough_bound: decomposition_capped as u8, @@ -2287,16 +2178,13 @@ impl DecomposingNormalizer { /// to other reorderable characters. pub(crate) fn try_new_uts46_decomposed_unstable(provider: &D) -> Result where - D: DataProvider - + DataProvider + D: DataProvider + DataProvider + DataProvider // UTS 46 tables merged into CompatibilityDecompositionTablesV1Marker + ?Sized, { - let decompositions: DataPayload = - provider.load(Default::default())?.payload; - let supplementary_decompositions: DataPayload = + let decompositions: DataPayload = provider.load(Default::default())?.payload; let tables: DataPayload = provider.load(Default::default())?.payload; @@ -2319,19 +2207,17 @@ impl DecomposingNormalizer { .with_marker(CanonicalDecompositionTablesV1Marker::INFO)); } - let cap = supplementary_decompositions.get().passthrough_cap; + let cap = decompositions.get().passthrough_cap; if cap > 0x0300 { - return Err(DataError::custom("invalid") - .with_marker(Uts46DecompositionSupplementV1Marker::INFO)); + return Err( + DataError::custom("invalid").with_marker(Uts46DecompositionDataV2Marker::INFO) + ); } let decomposition_capped = cap.min(0xC0); let composition_capped = cap.min(0x0300); Ok(DecomposingNormalizer { - decompositions, - supplementary_decompositions: Some(SupplementPayloadHolder::Uts46( - supplementary_decompositions, - )), + decompositions: decompositions.cast(), tables, supplementary_tables: Some(supplementary_tables), decomposition_passthrough_bound: decomposition_capped as u8, @@ -2422,7 +2308,6 @@ impl ComposingNormalizerBorrowed<'_> { Decomposition::new_with_supplements( iter, self.decomposing_normalizer.decompositions, - self.decomposing_normalizer.supplementary_decompositions, self.decomposing_normalizer.tables, self.decomposing_normalizer.supplementary_tables, self.decomposing_normalizer.decomposition_passthrough_bound, @@ -2453,13 +2338,7 @@ impl ComposingNormalizerBorrowed<'_> { // non-ASCII lead bytes is worthwhile is ever introduced. composition_passthrough_bound.min(0x80) as u8 }; - // This is basically an `Option` discriminant for `undecomposed_starter`, - // but making it a boolean so that writes in the tightest loop are as - // simple as possible (and potentially as peel-hoistable as possible). - // Furthermore, this reduces `unwrap()` later. - let mut undecomposed_starter_valid = true; - // Annotation belongs really on inner statements, but Rust doesn't - // allow it there. + // Attributes have to be on blocks, so hoisting all the way here. #[allow(clippy::unwrap_used)] 'fast: loop { let mut code_unit_iter = composition.decomposition.delegate.as_str().as_bytes().iter(); @@ -2467,7 +2346,6 @@ impl ComposingNormalizerBorrowed<'_> { if let Some(&upcoming_byte) = code_unit_iter.next() { if upcoming_byte < composition_passthrough_byte_bound { // Fast-track succeeded! - undecomposed_starter_valid = false; continue 'fastest; } composition.decomposition.delegate = pending_slice[pending_slice.len() - code_unit_iter.as_slice().len() - 1..].chars(); @@ -2486,26 +2364,19 @@ impl ComposingNormalizerBorrowed<'_> { // starter albeit past `composition_passthrough_bound` // Fast-track succeeded! - undecomposed_starter = upcoming_with_trie_value; - undecomposed_starter_valid = true; continue 'fast; } // We need to fall off the fast path. composition.decomposition.pending = Some(upcoming_with_trie_value); - let consumed_so_far_slice = if undecomposed_starter_valid { - &pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_str().len() - upcoming.len_utf8() - undecomposed_starter.character.len_utf8()] - } else { - // slicing and unwrap OK, because we've just evidently read enough previously. - let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_str().len() - upcoming.len_utf8()].chars(); - // `unwrap` OK, because we've previously manage to read the previous character - undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap()); - undecomposed_starter_valid = true; - consumed_so_far.as_str() - }; + + // slicing and unwrap OK, because we've just evidently read enough previously. + let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_str().len() - upcoming.len_utf8()].chars(); + // `unwrap` OK, because we've previously manage to read the previous character + undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap()); + let consumed_so_far_slice = consumed_so_far.as_str(); sink.write_str(consumed_so_far_slice)?; break 'fast; } - debug_assert!(undecomposed_starter_valid); }, text, sink, @@ -2529,18 +2400,26 @@ impl ComposingNormalizerBorrowed<'_> { false, as_slice, { - // This is basically an `Option` discriminant for `undecomposed_starter`, - // but making it a boolean so that writes in the tightest loop are as - // simple as possible (and potentially as peel-hoistable as possible). - // Furthermore, this reduces `unwrap()` later. - let mut undecomposed_starter_valid = true; 'fast: loop { if let Some(upcoming) = composition.decomposition.delegate.next() { if u32::from(upcoming) < composition_passthrough_bound { // Fast-track succeeded! - undecomposed_starter_valid = false; continue 'fast; } + // TODO: Be statically aware of fast/small trie. + let upcoming_with_trie_value = composition.decomposition.attach_trie_value(upcoming); + if upcoming_with_trie_value.potential_passthrough_and_cannot_combine_backwards() { + // Note: The trie value of the REPLACEMENT CHARACTER is + // intentionally formatted to fail the + // `potential_passthrough_and_cannot_combine_backwards` + // test even though it really is a starter that decomposes + // to self and cannot combine backwards. This + // Allows moving the branch on REPLACEMENT CHARACTER + // below this `continue`. + continue 'fast; + } + // We need to fall off the fast path. + // TODO(#2006): Annotate as unlikely if upcoming == REPLACEMENT_CHARACTER { // Can't tell if this is an error or a literal U+FFFD in @@ -2554,35 +2433,24 @@ impl ComposingNormalizerBorrowed<'_> { let consumed_so_far_slice = consumed_so_far.as_slice(); sink.write_str(unsafe{ from_utf8_unchecked(consumed_so_far_slice)})?; undecomposed_starter = CharacterAndTrieValue::new(REPLACEMENT_CHARACTER, 0); - undecomposed_starter_valid = true; composition.decomposition.pending = None; break 'fast; } - let upcoming_with_trie_value = composition.decomposition.attach_trie_value(upcoming); - if upcoming_with_trie_value.potential_passthrough_and_cannot_combine_backwards() { - // Can't combine backwards, hence a plain (non-backwards-combining) - // starter albeit past `composition_passthrough_bound` - // Fast-track succeeded! - undecomposed_starter = upcoming_with_trie_value; - undecomposed_starter_valid = true; - continue 'fast; - } - // We need to fall off the fast path. composition.decomposition.pending = Some(upcoming_with_trie_value); - // Annotation belongs really on inner statement, but Rust doesn't - // allow it there. + // slicing and unwrap OK, because we've just evidently read enough previously. + // `unwrap` OK, because we've previously manage to read the previous character + let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_slice().len() - upcoming.len_utf8()].chars(); #[allow(clippy::unwrap_used)] - let consumed_so_far_slice = if undecomposed_starter_valid { - &pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_slice().len() - upcoming.len_utf8() - undecomposed_starter.character.len_utf8()] - } else { - // slicing and unwrap OK, because we've just evidently read enough previously. - let mut consumed_so_far = pending_slice[..pending_slice.len() - composition.decomposition.delegate.as_slice().len() - upcoming.len_utf8()].chars(); - // `unwrap` OK, because we've previously manage to read the previous character + { + // TODO: If the previous character was below the passthrough bound, + // we really need to read from the trie. Otherwise, we could maintain + // the most-recent trie value. Need to measure what's more expensive: + // Remembering the trie value on each iteration or re-reading the + // last one after the fast-track run. undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap()); - undecomposed_starter_valid = true; - consumed_so_far.as_slice() - }; + } + let consumed_so_far_slice = consumed_so_far.as_slice(); sink.write_str(unsafe { from_utf8_unchecked(consumed_so_far_slice)})?; break 'fast; } @@ -2590,7 +2458,6 @@ impl ComposingNormalizerBorrowed<'_> { sink.write_str(unsafe {from_utf8_unchecked(pending_slice) })?; return Ok(()); } - debug_assert!(undecomposed_starter_valid); }, text, sink, @@ -2618,35 +2485,35 @@ impl ComposingNormalizerBorrowed<'_> { { let mut code_unit_iter = composition.decomposition.delegate.as_slice().iter(); let mut upcoming32; - // This is basically an `Option` discriminant for `undecomposed_starter`, - // but making it a boolean so that writes to it are are as - // simple as possible. - // Furthermore, this removes the need for `unwrap()` later. - let mut undecomposed_starter_valid; - // The purpose of the counter is to flush once in a while. If we flush - // too much, there is too much flushing overhead. If we flush too rarely, - // the flush starts reading from too far behind compared to the hot - // recently-read memory. - let mut counter = UTF16_FAST_PATH_FLUSH_THRESHOLD; - // The purpose of this trickiness is to avoid writing to - // `undecomposed_starter_valid` from the tightest loop. Writing to it - // from there destroys performance. - let mut counter_reference = counter - 1; + // Declaring this up here is useful for getting compile errors about invalid changes + // to the code structure below. + let mut trie_value; 'fast: loop { - counter -= 1; if let Some(&upcoming_code_unit) = code_unit_iter.next() { upcoming32 = u32::from(upcoming_code_unit); // may be surrogate - if upcoming32 < composition_passthrough_bound && counter != 0 { + if upcoming32 < composition_passthrough_bound { // No need for surrogate or U+FFFD check, because // `composition_passthrough_bound` cannot be higher than // U+0300. + // Fast-track succeeded! + // At this point, `trie_value` is out of sync with `upcoming32`. + // However, we either 1) reach the end of `code_unit_iter`, at + // which point nothing reads `trie_value` anymore or we + // execute the line immediately below this loop. + continue 'fast; + } + // We might be doing a trie lookup by surrogate. Surrogates get + // a decomposition to U+FFFD. + trie_value = composition.decomposition.trie.get32(upcoming32); + if potential_passthrough_and_cannot_combine_backwards_impl(trie_value) { + // Can't combine backwards, hence a plain (non-backwards-combining) + // starter albeit past `composition_passthrough_bound` + // Fast-track succeeded! continue 'fast; } - // if `counter` equals `counter_reference`, the `continue 'fast` - // line above has not executed and `undecomposed_starter` is still - // valid. - undecomposed_starter_valid = counter == counter_reference; + + // We might now be looking at a surrogate. // The loop is only broken out of as goto forward #[allow(clippy::never_loop)] 'surrogateloop: loop { @@ -2661,6 +2528,12 @@ impl ComposingNormalizerBorrowed<'_> { if in_inclusive_range16(low, 0xDC00, 0xDFFF) { upcoming32 = (upcoming32 << 10) + u32::from(low) - (((0xD800u32 << 10) - 0x10000u32) + 0xDC00u32); + // Successfully-paired surrogate. Read from the trie again. + trie_value = composition.decomposition.trie.get32(upcoming32); + if potential_passthrough_and_cannot_combine_backwards_impl(trie_value) { + // Fast-track succeeded! + continue 'fast; + } break 'surrogateloop; } else { code_unit_iter = iter_backup; @@ -2668,44 +2541,29 @@ impl ComposingNormalizerBorrowed<'_> { } } // unpaired surrogate - let slice_to_write = &pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - 1]; - sink.write_slice(slice_to_write)?; - undecomposed_starter = CharacterAndTrieValue::new(REPLACEMENT_CHARACTER, 0); - undecomposed_starter_valid = true; - composition.decomposition.pending = None; - break 'fast; + upcoming32 = 0xFFFD; // Safe value for `char::from_u32_unchecked` and matches later potential error check. + // trie_value already holds a decomposition to U+FFFD. + debug_assert_eq!(trie_value, NON_ROUND_TRIP_MARKER | BACKWARD_COMBINING_MARKER | 0xFFFD); + break 'surrogateloop; } - // Not unpaired surrogate - let upcoming = unsafe { char::from_u32_unchecked(upcoming32) }; - let upcoming_with_trie_value = composition.decomposition.attach_trie_value(upcoming); - if upcoming_with_trie_value.potential_passthrough_and_cannot_combine_backwards() && counter != 0 { - // Can't combine backwards, hence a plain (non-backwards-combining) - // starter albeit past `composition_passthrough_bound` - // Fast-track succeeded! - undecomposed_starter = upcoming_with_trie_value; - // Cause `undecomposed_starter_valid` to be set to true. - // This regresses English performance on Haswell by 11% - // compared to commenting out this assignment to - // `counter_reference`. - counter_reference = counter - 1; - continue 'fast; - } + // SAFETY: upcoming32 can no longer be a surrogate. + let upcoming = unsafe { char::from_u32_unchecked(upcoming32) }; + let upcoming_with_trie_value = CharacterAndTrieValue::new(upcoming, trie_value); // We need to fall off the fast path. composition.decomposition.pending = Some(upcoming_with_trie_value); - // Annotation belongs really on inner statement, but Rust doesn't - // allow it there. + let mut consumed_so_far = pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - upcoming.len_utf16()].chars(); + // `unwrap` OK, because we've previously managed to read the previous character #[allow(clippy::unwrap_used)] - let consumed_so_far_slice = if undecomposed_starter_valid { - &pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - upcoming.len_utf16() - undecomposed_starter.character.len_utf16()] - } else { - // slicing and unwrap OK, because we've just evidently read enough previously. - let mut consumed_so_far = pending_slice[..pending_slice.len() - code_unit_iter.as_slice().len() - upcoming.len_utf16()].chars(); - // `unwrap` OK, because we've previously manage to read the previous character + { + // TODO: If the previous character was below the passthrough bound, + // we really need to read from the trie. Otherwise, we could maintain + // the most-recent trie value. Need to measure what's more expensive: + // Remembering the trie value on each iteration or re-reading the + // last one after the fast-track run. undecomposed_starter = composition.decomposition.attach_trie_value(consumed_so_far.next_back().unwrap()); - undecomposed_starter_valid = true; - consumed_so_far.as_slice() - }; + } + let consumed_so_far_slice = consumed_so_far.as_slice(); sink.write_slice(consumed_so_far_slice)?; break 'fast; } @@ -2713,7 +2571,6 @@ impl ComposingNormalizerBorrowed<'_> { sink.write_slice(pending_slice)?; return Ok(()); } - debug_assert!(undecomposed_starter_valid); // Sync the main iterator composition.decomposition.delegate = code_unit_iter.as_slice().chars(); }, @@ -2767,7 +2624,7 @@ impl ComposingNormalizer { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_nfc)] pub fn try_new_nfc_unstable(provider: &D) -> Result where - D: DataProvider + D: DataProvider + DataProvider + DataProvider + ?Sized, @@ -2807,8 +2664,7 @@ impl ComposingNormalizer { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_nfkc)] pub fn try_new_nfkc_unstable(provider: &D) -> Result where - D: DataProvider - + DataProvider + D: DataProvider + DataProvider + DataProvider + DataProvider @@ -2828,8 +2684,7 @@ impl ComposingNormalizer { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_uts46)] pub(crate) fn try_new_uts46_unstable(provider: &D) -> Result where - D: DataProvider - + DataProvider + D: DataProvider + DataProvider + DataProvider // UTS 46 tables merged into CompatibilityDecompositionTablesV1Marker diff --git a/components/normalizer/src/properties.rs b/components/normalizer/src/properties.rs index 88c06ddf0e2..9090b4b14c0 100644 --- a/components/normalizer/src/properties.rs +++ b/components/normalizer/src/properties.rs @@ -12,19 +12,19 @@ //! glyph-availability-guided custom normalizer. use crate::char_from_u16; +use crate::char_from_u32; use crate::in_inclusive_range; use crate::provider::CanonicalCompositionsV1; use crate::provider::CanonicalCompositionsV1Marker; -use crate::provider::CanonicalDecompositionDataV1Marker; +use crate::provider::CanonicalDecompositionDataV2Marker; use crate::provider::CanonicalDecompositionTablesV1Marker; -use crate::provider::DecompositionDataV1; +use crate::provider::DecompositionDataV2; use crate::provider::DecompositionTablesV1; use crate::provider::NonRecursiveDecompositionSupplementV1; use crate::provider::NonRecursiveDecompositionSupplementV1Marker; use crate::trie_value_has_ccc; -use crate::trie_value_indicates_special_non_starter_decomposition; use crate::CanonicalCombiningClass; -use crate::BACKWARD_COMBINING_STARTER_MARKER; +use crate::BACKWARD_COMBINING_MARKER; use crate::FDFA_MARKER; use crate::HANGUL_L_BASE; use crate::HANGUL_N_COUNT; @@ -33,8 +33,9 @@ use crate::HANGUL_S_COUNT; use crate::HANGUL_T_BASE; use crate::HANGUL_T_COUNT; use crate::HANGUL_V_BASE; +use crate::HIGH_ZEROS_MASK; +use crate::LOW_ZEROS_MASK; use crate::NON_ROUND_TRIP_MARKER; -use crate::SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16; use icu_provider::prelude::*; /// Borrowed version of the raw canonical composition operation. @@ -188,7 +189,7 @@ pub enum Decomposed { /// glyph-availability-guided custom normalizer. #[derive(Debug)] pub struct CanonicalDecompositionBorrowed<'a> { - decompositions: &'a DecompositionDataV1<'a>, + decompositions: &'a DecompositionDataV2<'a>, tables: &'a DecompositionTablesV1<'a>, non_recursive: &'a NonRecursiveDecompositionSupplementV1<'a>, } @@ -233,7 +234,7 @@ impl CanonicalDecompositionBorrowed<'static> { Self { decompositions: - crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER, + crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V2_MARKER, tables: crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_TABLES_V1_MARKER, non_recursive: crate::provider::Baked::SINGLETON_NON_RECURSIVE_DECOMPOSITION_SUPPLEMENT_V1_MARKER, @@ -287,30 +288,30 @@ impl CanonicalDecompositionBorrowed<'_> { #[inline(always)] fn decompose_non_hangul(&self, c: char) -> Decomposed { let decomposition = self.decompositions.trie.get(c); - if decomposition <= BACKWARD_COMBINING_STARTER_MARKER { + // The REPLACEMENT CHARACTER has `NON_ROUND_TRIP_MARKER` set, + // and that flag needs to be ignored here. + if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0 { return Decomposed::Default; } // The loop is only broken out of as goto forward #[allow(clippy::never_loop)] loop { - let trail_or_complex = (decomposition >> 16) as u16; - let lead = decomposition as u16; - if lead > NON_ROUND_TRIP_MARKER && trail_or_complex != 0 { + let high_zeros = (decomposition & HIGH_ZEROS_MASK) == 0; + let low_zeros = (decomposition & LOW_ZEROS_MASK) == 0; + if !high_zeros && !low_zeros { // Decomposition into two BMP characters: starter and non-starter if in_inclusive_range(c, '\u{1F71}', '\u{1FFB}') { // Look in the other trie due to oxia singleton // mappings to corresponding character with tonos. break; } - return Decomposed::Expansion(char_from_u16(lead), char_from_u16(trail_or_complex)); + let starter = char_from_u32(decomposition & 0x7FFF); + let combining = char_from_u32((decomposition >> 15) & 0x7FFF); + return Decomposed::Expansion(starter, combining); } - if lead > NON_ROUND_TRIP_MARKER { + if high_zeros { // Decomposition into one BMP character or non-starter - debug_assert_ne!( - lead, FDFA_MARKER, - "How come we got the U+FDFA NFKD marker here?" - ); - if lead == SPECIAL_NON_STARTER_DECOMPOSITION_MARKER_U16 { + if trie_value_has_ccc(decomposition) { // Non-starter if !in_inclusive_range(c, '\u{0340}', '\u{0F81}') { return Decomposed::Default; @@ -347,35 +348,24 @@ impl CanonicalDecompositionBorrowed<'_> { _ => Decomposed::Default, }; } - return Decomposed::Singleton(char_from_u16(lead)); + let singleton = decomposition as u16; + debug_assert_ne!( + singleton, FDFA_MARKER, + "How come we got the U+FDFA NFKD marker here?" + ); + return Decomposed::Singleton(char_from_u16(singleton)); } - // The recursive decomposition of ANGSTROM SIGN is in the complex - // decomposition structure to avoid a branch in `potential_passthrough` - // for the BMP case. if c == '\u{212B}' { // ANGSTROM SIGN return Decomposed::Singleton('\u{00C5}'); } - // Complex decomposition - // Format for 16-bit value: - // 15..13: length minus two for 16-bit case and length minus one for - // the 32-bit case. Length 8 needs to fit in three bits in - // the 16-bit case, and this way the value is future-proofed - // up to 9 in the 16-bit case. Zero is unused and length one - // in the 16-bit case goes directly into the trie. - // 12: 1 if all trailing characters are guaranteed non-starters, - // 0 if no guarantees about non-starterness. - // Note: The bit choice is this way around to allow for - // dynamically falling back to not having this but instead - // having one more bit for length by merely choosing - // different masks. - // 11..0: Start offset in storage. The offset is to the logical - // sequence of scalars16, scalars32, supplementary_scalars16, - // supplementary_scalars32. - let offset = usize::from(trail_or_complex & 0xFFF); + // Only 12 of 14 bits used as of Unicode 16. + let offset = (((decomposition & !(0b11 << 30)) >> 16) as usize) - 1; + // Only 3 of 4 bits used as of Unicode 16. + let len_bits = decomposition & 0b1111; let tables = self.tables; if offset < tables.scalars16.len() { - if usize::from(trail_or_complex >> 13) != 0 { + if len_bits != 0 { // i.e. logical len isn't 2 break; } @@ -389,7 +379,7 @@ impl CanonicalDecompositionBorrowed<'_> { debug_assert!(false); return Decomposed::Default; } - let len = usize::from(trail_or_complex >> 13) + 1; + let len = len_bits + 1; if len > 2 { break; } @@ -445,7 +435,7 @@ impl CanonicalDecompositionBorrowed<'_> { /// glyph-availability-guided custom normalizer. #[derive(Debug)] pub struct CanonicalDecomposition { - decompositions: DataPayload, + decompositions: DataPayload, tables: DataPayload, non_recursive: DataPayload, } @@ -491,12 +481,12 @@ impl CanonicalDecomposition { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] pub fn try_new_unstable(provider: &D) -> Result where - D: DataProvider + D: DataProvider + DataProvider + DataProvider + ?Sized, { - let decompositions: DataPayload = + let decompositions: DataPayload = provider.load(Default::default())?.payload; let tables: DataPayload = provider.load(Default::default())?.payload; @@ -537,7 +527,7 @@ impl CanonicalDecomposition { #[derive(Debug)] pub struct CanonicalCombiningClassMapBorrowed<'a> { /// The data trie - decompositions: &'a DecompositionDataV1<'a>, + decompositions: &'a DecompositionDataV2<'a>, } #[cfg(feature = "compiled_data")] @@ -567,7 +557,7 @@ impl CanonicalCombiningClassMapBorrowed<'static> { pub const fn new() -> Self { CanonicalCombiningClassMapBorrowed { decompositions: - crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER, + crate::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V2_MARKER, } } } @@ -594,11 +584,6 @@ impl CanonicalCombiningClassMapBorrowed<'_> { let trie_value = self.decompositions.trie.get32(c); if trie_value_has_ccc(trie_value) { trie_value as u8 - } else if trie_value_indicates_special_non_starter_decomposition(trie_value) { - match c { - 0x0340 | 0x0341 | 0x0343 | 0x0344 => ccc!(Above, 230).0, - _ => ccc!(NotReordered, 0).0, - } } else { ccc!(NotReordered, 0).0 } @@ -628,7 +613,7 @@ impl CanonicalCombiningClassMapBorrowed<'_> { #[derive(Debug)] pub struct CanonicalCombiningClassMap { /// The data trie - decompositions: DataPayload, + decompositions: DataPayload, } #[cfg(feature = "compiled_data")] @@ -669,9 +654,9 @@ impl CanonicalCombiningClassMap { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] pub fn try_new_unstable(provider: &D) -> Result where - D: DataProvider + ?Sized, + D: DataProvider + ?Sized, { - let decompositions: DataPayload = + let decompositions: DataPayload = provider.load(Default::default())?.payload; Ok(CanonicalCombiningClassMap { decompositions }) } diff --git a/components/normalizer/src/provider.rs b/components/normalizer/src/provider.rs index a1fa0188df6..e20561f5d0e 100644 --- a/components/normalizer/src/provider.rs +++ b/components/normalizer/src/provider.rs @@ -42,49 +42,26 @@ const _: () = { make_provider!(Baked); impl_canonical_compositions_v1_marker!(Baked); impl_non_recursive_decomposition_supplement_v1_marker!(Baked); - impl_canonical_decomposition_data_v1_marker!(Baked); + impl_canonical_decomposition_data_v2_marker!(Baked); impl_canonical_decomposition_tables_v1_marker!(Baked); - impl_compatibility_decomposition_supplement_v1_marker!(Baked); + impl_compatibility_decomposition_data_v2_marker!(Baked); impl_compatibility_decomposition_tables_v1_marker!(Baked); - impl_uts46_decomposition_supplement_v1_marker!(Baked); + impl_uts46_decomposition_data_v2_marker!(Baked); }; #[cfg(feature = "datagen")] /// The latest minimum set of markers required by this component. pub const MARKERS: &[DataMarkerInfo] = &[ CanonicalCompositionsV1Marker::INFO, - CanonicalDecompositionDataV1Marker::INFO, + CanonicalDecompositionDataV2Marker::INFO, CanonicalDecompositionTablesV1Marker::INFO, - CompatibilityDecompositionSupplementV1Marker::INFO, + CompatibilityDecompositionDataV2Marker::INFO, CompatibilityDecompositionTablesV1Marker::INFO, NonRecursiveDecompositionSupplementV1Marker::INFO, - Uts46DecompositionSupplementV1Marker::INFO, + Uts46DecompositionDataV2Marker::INFO, ]; -/// Main data for NFD -/// -///

-/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, -/// including in SemVer minor releases. While the serde representation of data structs is guaranteed -/// to be stable, their Rust representation might not be. Use with caution. -///
-#[icu_provider::data_struct(marker( - CanonicalDecompositionDataV1Marker, - "normalizer/nfd@1", - singleton -))] -#[derive(Debug, PartialEq, Clone)] -#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] -#[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))] -#[cfg_attr(feature = "serde", derive(serde::Deserialize))] -pub struct DecompositionDataV1<'data> { - /// Trie for NFD decomposition. - #[cfg_attr(feature = "serde", serde(borrow))] - pub trie: CodePointTrie<'data, u32>, -} - -/// Data that either NFKD or the decomposed form of UTS 46 needs -/// _in addition to_ the NFD data. +/// Decomposition data /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, @@ -92,51 +69,24 @@ pub struct DecompositionDataV1<'data> { /// to be stable, their Rust representation might not be. Use with caution. ///
#[icu_provider::data_struct( - marker( - CompatibilityDecompositionSupplementV1Marker, - "normalizer/nfkd@1", - singleton - ), - marker(Uts46DecompositionSupplementV1Marker, "normalizer/uts46d@1", singleton) + marker(CanonicalDecompositionDataV2Marker, "normalizer/nfd@2", singleton), + marker(CompatibilityDecompositionDataV2Marker, "normalizer/nfkd@2", singleton), + marker(Uts46DecompositionDataV2Marker, "normalizer/uts46d@2", singleton) )] #[derive(Debug, PartialEq, Clone)] #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] #[cfg_attr(feature = "datagen", databake(path = icu_normalizer::provider))] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] -pub struct DecompositionSupplementV1<'data> { - /// Trie for the decompositions that differ from NFD. - /// Getting a zero from this trie means that you need - /// to make another lookup from `DecompositionDataV1::trie`. +pub struct DecompositionDataV2<'data> { + /// Trie for decomposition. #[cfg_attr(feature = "serde", serde(borrow))] pub trie: CodePointTrie<'data, u32>, - /// Flags that indicate how the set of characters whose - /// decompositions starts with a non-starter differs from - /// the set for NFD. - /// - /// Bit 0: Whether half-width kana voicing marks decompose - /// into non-starters (their full-width combining - /// counterparts). - /// Bit 1: Whether U+0345 COMBINING GREEK YPOGEGRAMMENI - /// decomposes into a starter (U+03B9 GREEK SMALL - /// LETTER IOTA). - /// (Other bits unused.) - pub flags: u8, /// The passthrough bounds of NFD/NFC are lowered to this /// maximum instead. (16-bit, because cannot be higher /// than 0x0300, which is the bound for NFC.) pub passthrough_cap: u16, } -impl DecompositionSupplementV1<'_> { - const HALF_WIDTH_VOICING_MARK_MASK: u8 = 1; - - /// Whether half-width kana voicing marks decompose into non-starters - /// (their full-width combining counterparts). - pub fn half_width_voicing_marks_become_non_starters(&self) -> bool { - (self.flags & DecompositionSupplementV1::HALF_WIDTH_VOICING_MARK_MASK) != 0 - } -} - /// The expansion tables for cases where the decomposition isn't /// contained in the trie value /// diff --git a/components/normalizer/src/uts46.rs b/components/normalizer/src/uts46.rs index 73e8fb2fd55..ddfd82d27ea 100644 --- a/components/normalizer/src/uts46.rs +++ b/components/normalizer/src/uts46.rs @@ -9,12 +9,11 @@ //! implementation, such as the `idna` crate. use crate::CanonicalCompositionsV1Marker; -use crate::CanonicalDecompositionDataV1Marker; use crate::CanonicalDecompositionTablesV1Marker; use crate::CompatibilityDecompositionTablesV1Marker; use crate::ComposingNormalizer; use crate::ComposingNormalizerBorrowed; -use crate::Uts46DecompositionSupplementV1Marker; +use crate::Uts46DecompositionDataV2Marker; use icu_provider::DataError; use icu_provider::DataProvider; @@ -164,8 +163,7 @@ impl Uts46Mapper { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] pub fn try_new(provider: &D) -> Result where - D: DataProvider - + DataProvider + D: DataProvider + DataProvider + DataProvider // UTS 46 tables merged into CompatibilityDecompositionTablesV1Marker diff --git a/components/normalizer/trie-value-format.md b/components/normalizer/trie-value-format.md new file mode 100644 index 00000000000..c9d23451d55 --- /dev/null +++ b/components/normalizer/trie-value-format.md @@ -0,0 +1,42 @@ +# The `icu_normalizer` Trie Value Format + +## General + +The trie values are 32-bit values. + +0xFFFFFFFF (all bits set to 1) marks UTS 46 ignorables and is not supported in NFD or NFKD data. + +## Common Flags + +Two flags common to all trie value types other than the above ignorable marker: + +Bit 31 (the most significant bit): 1 iff the first character of the decomposition can combine backwards. + +Bit 30: 1 iff applying NFC to the decomposition does not result in the character being decomposed. (Currently, this bit isn't actually useful for non-starters, and a future change might involve setting this flag on non-starters that decompose to themselves if that turns out to be useful for some optimization.) + +## Types of Trie Values + +The character is a starter (CCC == 0) that decomposes to itself: The 31 lower bits set to zero. (Bit 31 may be set to 1, but bit 30 cannot.) + +REPLACEMENT CHARACTER: Bit 31 set to 1 and all others set to zero. This in an exception to the above item in order to allow catching UTF-8 errors as a side effect of a passthrough check. + +The character is a non-starter (CCC != 0) that decomposes to itself: The highest bit is set to 1, the rest of the high half is set to zeros, the second-least-significant byte is 0xD8, and the least-significant byte is the CCC value. + +The character is a non-starter that decomposes to multiple characters such that the first character is a non-starter: The two highest bits are set to 1, the rest of the high half is set to zeros, the second-least-significant byte is 0xD9, and the least-significant byte is the CCC value of the _undecomposed_ character. (The actual decomposition is hard-coded.) + +The decomposition is the NFKD decomposition of U+FDFA: The highest bit is 0, the second-highest is 1, the lowest bit is 1, and the rest are zeros. (The actual decomposition is hard-coded.) The lowest bit is deliberately unified with Hangul syllables below to maximize the options for reordering the Hangul check relative to other checks. + +Hangul syllable: The trie value is 1. (I.e. only the lowest bit is set to 1.) + +The decomposition is a singleton decomposition to a single different BMP starter > 0x1F: The highest bit is 1 iff the decomposition can combine backwards (does not occur as of Unicode 16.0), the second-highest bit is 1, the low half is the decomposition. + +The character is not the ANGSTROM SIGN, and the decomposition is to a starter <= U+7FFF and > 0x1F that cannot combine backwards followed by a non-starter <= U+7FFF and > 0x1F: The highest bit is 0. The second-highest bit is set according to its general semantics. The lowest 15 bits are the leading starter. The next 15 bits are the trailing non-starter. + +Otherwise: This is a complex decomposition. It must start with a starter, which is theoretically not future-proof but is likely practically going to be OK. The two highest bits are set according to their general semantics. The lower 12 bits of the higher half are the _offset_ to the logical sequence of scalars16, scalars32, supplementary_scalars16, supplementary_scalars32. (The 14 lowest bits are interpreted as the length for forward compatibility, but technically the two highest of these are reserved and could be assigned to flags instead in the future.) The lowest 3 bits are _length_ minus 2 if all characters of the decomposition are within the BMP and _length_ minus 1 otherwise. The fourth-lowest bit is reserved and set to 0. (The four lowest bits are interpreted as length for forward-compatibility, but technically the fourth bit could be allocated to a flag instead.) The fifth bit is set to 1 if all the trailing characters are non-starters. If it is 0, there are no guarantees on the non-starterness of the trailing characters. The 11 highest bits of the lower half are set to zero. + +## Additional Tweaks + +In NFD and NFKD data, each surrogate has a singleton decomposition to U+FFFD. (This is not included in UTS 46 data, because this is used for UTF-16 slice mode optimization and UTS 46 is only supported in the iterator mode, because the UTS 46 ignorable marker is not supported in the slice mode.) + +In UTS 46 data, each disallowed character has a singleton decomposition to U+FFFD. + diff --git a/components/pattern/src/common.rs b/components/pattern/src/common.rs index a2e328e4baa..27790035b68 100644 --- a/components/pattern/src/common.rs +++ b/components/pattern/src/common.rs @@ -3,7 +3,7 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::Error; -use writeable::{Part, TryWriteable}; +use writeable::{TryWriteable, Writeable}; #[cfg(feature = "alloc")] use alloc::{borrow::Cow, boxed::Box}; @@ -110,30 +110,97 @@ pub trait PatternBackend: crate::private::Sealed + 'static + core::fmt::Debug { fn empty() -> &'static Self::Store; } -/// Default annotation for the literal portion of a pattern. +/// Trait implemented on collections that can produce [`TryWriteable`]s for interpolation. /// -/// For more information, see [`PlaceholderValueProvider`]. For an example, see [`Pattern`]. +/// This trait can add [`Part`]s for individual literals or placeholders. The implementations +/// of this trait on standard types do not add any [`Part`]s. /// -/// [`Pattern`]: crate::Pattern -pub const PATTERN_LITERAL_PART: Part = Part { - category: "pattern", - value: "literal", -}; - -/// Default annotation for the placeholder portion of a pattern. +/// # Examples /// -/// For more information, see [`PlaceholderValueProvider`]. For an example, see [`Pattern`]. +/// A custom implementation that adds parts: /// -/// [`Pattern`]: crate::Pattern -pub const PATTERN_PLACEHOLDER_PART: Part = Part { - category: "pattern", - value: "placeholder", -}; - -/// Trait implemented on collections that can produce [`TryWriteable`]s for interpolation. +/// ``` +/// use core::str::FromStr; +/// use icu_pattern::Pattern; +/// use icu_pattern::DoublePlaceholder; +/// use icu_pattern::DoublePlaceholderKey; +/// use icu_pattern::PlaceholderValueProvider; +/// use writeable::adapters::WithPart; +/// use writeable::adapters::WriteableAsTryWriteableInfallible; +/// use writeable::assert_writeable_parts_eq; +/// use writeable::Part; +/// use writeable::Writeable; +/// +/// let pattern = Pattern::::try_from_str( +/// "Hello, {0} and {1}!", +/// Default::default(), +/// ) +/// .unwrap(); +/// +/// struct ValuesWithParts<'a>(&'a str, &'a str); +/// +/// const PART_PLACEHOLDER_0: Part = Part { +/// category: "custom", +/// value: "placeholder0", +/// }; +/// const PART_PLACEHOLDER_1: Part = Part { +/// category: "custom", +/// value: "placeholder1", +/// }; +/// const PART_LITERAL: Part = Part { +/// category: "custom", +/// value: "literal", +/// }; +/// +/// impl PlaceholderValueProvider for ValuesWithParts<'_> { +/// type Error = core::convert::Infallible; +/// +/// type W<'a> = WriteableAsTryWriteableInfallible> +/// where +/// Self: 'a; +/// +/// type L<'a, 'l> = WithPart<&'l str> +/// where +/// Self: 'a; +/// +/// #[inline] +/// fn value_for(&self, key: DoublePlaceholderKey) -> Self::W<'_> { +/// let writeable = match key { +/// DoublePlaceholderKey::Place0 => WithPart { +/// writeable: self.0, +/// part: PART_PLACEHOLDER_0, +/// }, +/// DoublePlaceholderKey::Place1 => WithPart { +/// writeable: self.1, +/// part: PART_PLACEHOLDER_1, +/// }, +/// }; +/// WriteableAsTryWriteableInfallible(writeable) +/// } +/// +/// #[inline] +/// fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> { +/// WithPart { +/// writeable: literal, +/// part: PART_LITERAL, +/// } +/// } +/// } /// -/// This trait determines the [`Part`]s produced by the writeable. In this crate, implementations -/// of this trait default to using [`PATTERN_LITERAL_PART`] and [`PATTERN_PLACEHOLDER_PART`]. +/// assert_writeable_parts_eq!( +/// pattern.interpolate(ValuesWithParts("Alice", "Bob")), +/// "Hello, Alice and Bob!", +/// [ +/// (0, 7, PART_LITERAL), +/// (7, 12, PART_PLACEHOLDER_0), +/// (12, 17, PART_LITERAL), +/// (17, 20, PART_PLACEHOLDER_1), +/// (20, 21, PART_LITERAL), +/// ] +/// ); +/// ``` +/// +/// [`Part`]: writeable::Part pub trait PlaceholderValueProvider { type Error; @@ -141,11 +208,19 @@ pub trait PlaceholderValueProvider { where Self: 'a; - const LITERAL_PART: Part; + type L<'a, 'l>: Writeable + where + Self: 'a; - /// Returns the [`TryWriteable`] to substitute for the given placeholder - /// and the [`Part`] representing it. - fn value_for(&self, key: K) -> (Self::W<'_>, Part); + /// Returns the [`TryWriteable`] to substitute for the given placeholder. + /// + /// See [`PatternItem::Placeholder`] + fn value_for(&self, key: K) -> Self::W<'_>; + + /// Maps a literal string to a [`Writeable`] that could contain parts. + /// + /// See [`PatternItem::Literal`] + fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l>; } impl<'b, K, T> PlaceholderValueProvider for &'b T @@ -153,13 +228,21 @@ where T: PlaceholderValueProvider + ?Sized, { type Error = T::Error; + type W<'a> = T::W<'a> where - T: 'a, - 'b: 'a; - const LITERAL_PART: Part = T::LITERAL_PART; - fn value_for(&self, key: K) -> (Self::W<'_>, Part) { + Self: 'a; + + type L<'a, 'l> + = T::L<'a, 'l> + where + Self: 'a; + + fn value_for(&self, key: K) -> Self::W<'_> { (*self).value_for(key) } + fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> { + (*self).map_literal(literal) + } } diff --git a/components/pattern/src/double.rs b/components/pattern/src/double.rs index e5129e490fc..1dc8fd3c8fd 100644 --- a/components/pattern/src/double.rs +++ b/components/pattern/src/double.rs @@ -75,22 +75,28 @@ where W1: Writeable, { type Error = Infallible; + type W<'a> = WriteableAsTryWriteableInfallible> where - W0: 'a, - W1: 'a; - const LITERAL_PART: writeable::Part = crate::PATTERN_LITERAL_PART; + Self: 'a; + + type L<'a, 'l> + = &'l str + where + Self: 'a; + #[inline] - fn value_for(&self, key: DoublePlaceholderKey) -> (Self::W<'_>, writeable::Part) { + fn value_for(&self, key: DoublePlaceholderKey) -> Self::W<'_> { let writeable = match key { DoublePlaceholderKey::Place0 => Either::Left(&self.0), DoublePlaceholderKey::Place1 => Either::Right(&self.1), }; - ( - WriteableAsTryWriteableInfallible(writeable), - crate::PATTERN_PLACEHOLDER_PART, - ) + WriteableAsTryWriteableInfallible(writeable) + } + #[inline] + fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> { + literal } } @@ -99,22 +105,29 @@ where W: Writeable, { type Error = Infallible; + type W<'a> = WriteableAsTryWriteableInfallible<&'a W> where - W: 'a; - const LITERAL_PART: writeable::Part = crate::PATTERN_LITERAL_PART; + Self: 'a; + + type L<'a, 'l> + = &'l str + where + Self: 'a; + #[inline] - fn value_for(&self, key: DoublePlaceholderKey) -> (Self::W<'_>, writeable::Part) { + fn value_for(&self, key: DoublePlaceholderKey) -> Self::W<'_> { let [item0, item1] = self; let writeable = match key { DoublePlaceholderKey::Place0 => item0, DoublePlaceholderKey::Place1 => item1, }; - ( - WriteableAsTryWriteableInfallible(writeable), - crate::PATTERN_PLACEHOLDER_PART, - ) + WriteableAsTryWriteableInfallible(writeable) + } + #[inline] + fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> { + literal } } diff --git a/components/pattern/src/frontend/mod.rs b/components/pattern/src/frontend/mod.rs index 39a22c6a31f..a8cf38dad85 100644 --- a/components/pattern/src/frontend/mod.rs +++ b/components/pattern/src/frontend/mod.rs @@ -15,11 +15,7 @@ use crate::Parser; use crate::ParserOptions; #[cfg(feature = "alloc")] use alloc::{borrow::ToOwned, boxed::Box, str::FromStr, string::String}; -use core::{ - convert::Infallible, - fmt::{self, Write}, - marker::PhantomData, -}; +use core::{convert::Infallible, fmt, marker::PhantomData}; use writeable::{adapters::TryWriteableInfallibleAsWriteable, PartsWrite, TryWriteable, Writeable}; /// A string pattern with placeholders. @@ -38,21 +34,18 @@ use writeable::{adapters::TryWriteableInfallibleAsWriteable, PartsWrite, TryWrit /// /// # Format to Parts /// -/// [`Pattern`] supports interpolating with [writeable::Part]s, annotations for whether the -/// substring was a placeholder or a literal. -/// -/// By default, the substrings are annotated with [`PATTERN_LITERAL_PART`] and -/// [`PATTERN_PLACEHOLDER_PART`]. This can be customized with [`PlaceholderValueProvider`]. +/// [`Pattern`] propagates [`Part`]s from inner writeables. In addition, it supports annotating +/// [`Part`]s for individual literals or placeholders via the [`PlaceholderValueProvider`] trait. /// /// # Examples /// -/// Interpolating a [`SinglePlaceholder`] pattern with parts: +/// Interpolating a [`SinglePlaceholder`] pattern: /// /// ``` /// use core::str::FromStr; /// use icu_pattern::Pattern; /// use icu_pattern::SinglePlaceholder; -/// use writeable::assert_writeable_parts_eq; +/// use writeable::assert_writeable_eq; /// /// let pattern = Pattern::::try_from_str( /// "Hello, {0}!", @@ -60,20 +53,16 @@ use writeable::{adapters::TryWriteableInfallibleAsWriteable, PartsWrite, TryWrit /// ) /// .unwrap(); /// -/// assert_writeable_parts_eq!( +/// assert_writeable_eq!( /// pattern.interpolate(["Alice"]), -/// "Hello, Alice!", -/// [ -/// (0, 7, icu_pattern::PATTERN_LITERAL_PART), -/// (7, 12, icu_pattern::PATTERN_PLACEHOLDER_PART), -/// (12, 13, icu_pattern::PATTERN_LITERAL_PART), -/// ] +/// "Hello, Alice!" /// ); /// ``` /// /// [`SinglePlaceholder`]: crate::SinglePlaceholder /// [`DoublePlaceholder`]: crate::DoublePlaceholder /// [`MultiNamedPlaceholder`]: crate::MultiNamedPlaceholder +/// [`Part`]: writeable::Part #[cfg_attr(feature = "yoke", derive(yoke::Yokeable))] #[repr(transparent)] pub struct Pattern { @@ -344,17 +333,14 @@ where for item in it { match item { PatternItem::Literal(s) => { - sink.with_part(P::LITERAL_PART, |sink| sink.write_str(s))?; + self.value_provider.map_literal(s).write_to_parts(sink)?; } PatternItem::Placeholder(key) => { - let (element_writeable, part) = self.value_provider.value_for(key); - sink.with_part(part, |sink| { - if let Err(e) = element_writeable.try_write_to_parts(sink)? { - // Keep the first error if there was one - error.get_or_insert(e); - } - Ok(()) - })?; + let element_writeable = self.value_provider.value_for(key); + if let Err(e) = element_writeable.try_write_to_parts(sink)? { + // Keep the first error if there was one + error.get_or_insert(e); + } } } #[cfg(debug_assertions)] diff --git a/components/pattern/src/lib.rs b/components/pattern/src/lib.rs index 78b9cad560b..192427adbb1 100644 --- a/components/pattern/src/lib.rs +++ b/components/pattern/src/lib.rs @@ -66,8 +66,6 @@ pub use common::PatternItem; #[cfg(feature = "alloc")] pub use common::PatternItemCow; pub use common::PlaceholderValueProvider; -pub use common::PATTERN_LITERAL_PART; -pub use common::PATTERN_PLACEHOLDER_PART; pub use double::DoublePlaceholder; pub use double::DoublePlaceholderKey; pub use error::PatternError; diff --git a/components/pattern/src/multi_named.rs b/components/pattern/src/multi_named.rs index ace5e3b2474..2dfd4e021dd 100644 --- a/components/pattern/src/multi_named.rs +++ b/components/pattern/src/multi_named.rs @@ -94,22 +94,27 @@ where W: Writeable, { type Error = MissingNamedPlaceholderError<'k>; + type W<'a> = Result<&'a W, Self::Error> where - W: 'a, Self: 'a; - const LITERAL_PART: writeable::Part = crate::PATTERN_LITERAL_PART; + + type L<'a, 'l> + = &'l str + where + Self: 'a; + #[inline] - fn value_for<'a>( - &'a self, - key: MultiNamedPlaceholderKey<'k>, - ) -> (Self::W<'a>, writeable::Part) { - let writeable = match self.get(key.0) { + fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> { + match self.get(key.0) { Some(value) => Ok(value), None => Err(MissingNamedPlaceholderError { name: key.0 }), - }; - (writeable, crate::PATTERN_PLACEHOLDER_PART) + } + } + #[inline] + fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> { + literal } } @@ -121,22 +126,27 @@ where S: litemap::store::Store, { type Error = MissingNamedPlaceholderError<'k>; + type W<'a> = Result<&'a W, Self::Error> where - W: 'a, Self: 'a; - const LITERAL_PART: writeable::Part = crate::PATTERN_LITERAL_PART; + + type L<'a, 'l> + = &'l str + where + Self: 'a; + #[inline] - fn value_for<'a>( - &'a self, - key: MultiNamedPlaceholderKey<'k>, - ) -> (Self::W<'a>, writeable::Part) { - let writeable = match self.get(key.0) { + fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> { + match self.get(key.0) { Some(value) => Ok(value), None => Err(MissingNamedPlaceholderError { name: key.0 }), - }; - (writeable, crate::PATTERN_PLACEHOLDER_PART) + } + } + #[inline] + fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> { + literal } } diff --git a/components/pattern/src/single.rs b/components/pattern/src/single.rs index 9f74913b317..9cf75228015 100644 --- a/components/pattern/src/single.rs +++ b/components/pattern/src/single.rs @@ -65,16 +65,23 @@ where W: Writeable, { type Error = Infallible; + type W<'a> = WriteableAsTryWriteableInfallible<&'a W> where - W: 'a; - const LITERAL_PART: writeable::Part = crate::PATTERN_LITERAL_PART; - fn value_for(&self, _key: SinglePlaceholderKey) -> (Self::W<'_>, writeable::Part) { - ( - WriteableAsTryWriteableInfallible(&self.0), - crate::PATTERN_PLACEHOLDER_PART, - ) + Self: 'a; + + type L<'a, 'l> + = &'l str + where + Self: 'a; + + fn value_for(&self, _key: SinglePlaceholderKey) -> Self::W<'_> { + WriteableAsTryWriteableInfallible(&self.0) + } + #[inline] + fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> { + literal } } @@ -83,17 +90,24 @@ where W: Writeable, { type Error = Infallible; + type W<'a> = WriteableAsTryWriteableInfallible<&'a W> where - W: 'a; - const LITERAL_PART: writeable::Part = crate::PATTERN_LITERAL_PART; - fn value_for(&self, _key: SinglePlaceholderKey) -> (Self::W<'_>, writeable::Part) { + Self: 'a; + + type L<'a, 'l> + = &'l str + where + Self: 'a; + + fn value_for(&self, _key: SinglePlaceholderKey) -> Self::W<'_> { let [value] = self; - ( - WriteableAsTryWriteableInfallible(value), - crate::PATTERN_PLACEHOLDER_PART, - ) + WriteableAsTryWriteableInfallible(value) + } + #[inline] + fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> { + literal } } diff --git a/ffi/capi/bindings/c/DataProvider.h b/ffi/capi/bindings/c/DataProvider.h index b05b5839bd3..a27390b0c2d 100644 --- a/ffi/capi/bindings/c/DataProvider.h +++ b/ffi/capi/bindings/c/DataProvider.h @@ -17,16 +17,12 @@ -DataProvider* icu4x_DataProvider_compiled_mv1(void); - typedef struct icu4x_DataProvider_from_fs_mv1_result {union {DataProvider* ok; DataError err;}; bool is_ok;} icu4x_DataProvider_from_fs_mv1_result; icu4x_DataProvider_from_fs_mv1_result icu4x_DataProvider_from_fs_mv1(DiplomatStringView path); typedef struct icu4x_DataProvider_from_byte_slice_mv1_result {union {DataProvider* ok; DataError err;}; bool is_ok;} icu4x_DataProvider_from_byte_slice_mv1_result; icu4x_DataProvider_from_byte_slice_mv1_result icu4x_DataProvider_from_byte_slice_mv1(DiplomatU8View blob); -DataProvider* icu4x_DataProvider_empty_mv1(void); - typedef struct icu4x_DataProvider_fork_by_key_mv1_result {union { DataError err;}; bool is_ok;} icu4x_DataProvider_fork_by_key_mv1_result; icu4x_DataProvider_fork_by_key_mv1_result icu4x_DataProvider_fork_by_key_mv1(DataProvider* self, DataProvider* other); diff --git a/ffi/capi/bindings/c/DateFormatter.h b/ffi/capi/bindings/c/DateFormatter.h index d872bd3f509..3b5532aceb3 100644 --- a/ffi/capi/bindings/c/DateFormatter.h +++ b/ffi/capi/bindings/c/DateFormatter.h @@ -7,6 +7,7 @@ #include #include "diplomat_runtime.h" +#include "AnyCalendarKind.d.h" #include "DataProvider.d.h" #include "Date.d.h" #include "DateTime.d.h" @@ -42,6 +43,8 @@ icu4x_DateFormatter_format_datetime_mv1_result icu4x_DateFormatter_format_dateti typedef struct icu4x_DateFormatter_format_iso_datetime_mv1_result {union { DateTimeFormatError err;}; bool is_ok;} icu4x_DateFormatter_format_iso_datetime_mv1_result; icu4x_DateFormatter_format_iso_datetime_mv1_result icu4x_DateFormatter_format_iso_datetime_mv1(const DateFormatter* self, const IsoDateTime* value, DiplomatWrite* write); +AnyCalendarKind icu4x_DateFormatter_calendar_kind_mv1(const DateFormatter* self); + void icu4x_DateFormatter_destroy_mv1(DateFormatter* self); diff --git a/ffi/capi/bindings/c/DateTimeFormatter.h b/ffi/capi/bindings/c/DateTimeFormatter.h index 045724563e2..306caee11ca 100644 --- a/ffi/capi/bindings/c/DateTimeFormatter.h +++ b/ffi/capi/bindings/c/DateTimeFormatter.h @@ -7,6 +7,7 @@ #include #include "diplomat_runtime.h" +#include "AnyCalendarKind.d.h" #include "DataProvider.d.h" #include "DateTime.d.h" #include "DateTimeFormatError.d.h" @@ -34,6 +35,8 @@ icu4x_DateTimeFormatter_format_datetime_mv1_result icu4x_DateTimeFormatter_forma typedef struct icu4x_DateTimeFormatter_format_iso_datetime_mv1_result {union { DateTimeFormatError err;}; bool is_ok;} icu4x_DateTimeFormatter_format_iso_datetime_mv1_result; icu4x_DateTimeFormatter_format_iso_datetime_mv1_result icu4x_DateTimeFormatter_format_iso_datetime_mv1(const DateTimeFormatter* self, const IsoDateTime* value, DiplomatWrite* write); +AnyCalendarKind icu4x_DateTimeFormatter_calendar_kind_mv1(const DateTimeFormatter* self); + void icu4x_DateTimeFormatter_destroy_mv1(DateTimeFormatter* self); diff --git a/ffi/capi/bindings/c/FixedDecimalFormatter.h b/ffi/capi/bindings/c/FixedDecimalFormatter.h index c7e4468b73d..644a66bdff5 100644 --- a/ffi/capi/bindings/c/FixedDecimalFormatter.h +++ b/ffi/capi/bindings/c/FixedDecimalFormatter.h @@ -31,6 +31,8 @@ icu4x_FixedDecimalFormatter_create_with_manual_data_mv1_result icu4x_FixedDecima void icu4x_FixedDecimalFormatter_format_mv1(const FixedDecimalFormatter* self, const SignedFixedDecimal* value, DiplomatWrite* write); +void icu4x_FixedDecimalFormatter_numbering_system_mv1(const FixedDecimalFormatter* self, DiplomatWrite* write); + void icu4x_FixedDecimalFormatter_destroy_mv1(FixedDecimalFormatter* self); diff --git a/ffi/capi/bindings/c/SignedFixedDecimal.h b/ffi/capi/bindings/c/SignedFixedDecimal.h index d703e1cc38e..21fb5c8a8f6 100644 --- a/ffi/capi/bindings/c/SignedFixedDecimal.h +++ b/ffi/capi/bindings/c/SignedFixedDecimal.h @@ -67,6 +67,8 @@ void icu4x_SignedFixedDecimal_trim_start_mv1(SignedFixedDecimal* self); void icu4x_SignedFixedDecimal_trim_end_mv1(SignedFixedDecimal* self); +void icu4x_SignedFixedDecimal_trim_end_if_integer_mv1(SignedFixedDecimal* self); + void icu4x_SignedFixedDecimal_pad_start_mv1(SignedFixedDecimal* self, int16_t position); void icu4x_SignedFixedDecimal_pad_end_mv1(SignedFixedDecimal* self, int16_t position); diff --git a/ffi/capi/bindings/cpp/icu4x/DataProvider.d.hpp b/ffi/capi/bindings/cpp/icu4x/DataProvider.d.hpp index 9bca1b49c01..54915fb1b45 100644 --- a/ffi/capi/bindings/cpp/icu4x/DataProvider.d.hpp +++ b/ffi/capi/bindings/cpp/icu4x/DataProvider.d.hpp @@ -28,14 +28,10 @@ namespace icu4x { class DataProvider { public: - inline static std::unique_ptr compiled(); - inline static diplomat::result, icu4x::DataError> from_fs(std::string_view path); inline static diplomat::result, icu4x::DataError> from_byte_slice(diplomat::span blob); - inline static std::unique_ptr empty(); - inline diplomat::result fork_by_key(icu4x::DataProvider& other); inline diplomat::result fork_by_locale(icu4x::DataProvider& other); diff --git a/ffi/capi/bindings/cpp/icu4x/DataProvider.hpp b/ffi/capi/bindings/cpp/icu4x/DataProvider.hpp index 3eff3554fa1..1c63cbd76c5 100644 --- a/ffi/capi/bindings/cpp/icu4x/DataProvider.hpp +++ b/ffi/capi/bindings/cpp/icu4x/DataProvider.hpp @@ -18,16 +18,12 @@ namespace icu4x { namespace capi { extern "C" { - icu4x::capi::DataProvider* icu4x_DataProvider_compiled_mv1(void); - typedef struct icu4x_DataProvider_from_fs_mv1_result {union {icu4x::capi::DataProvider* ok; icu4x::capi::DataError err;}; bool is_ok;} icu4x_DataProvider_from_fs_mv1_result; icu4x_DataProvider_from_fs_mv1_result icu4x_DataProvider_from_fs_mv1(diplomat::capi::DiplomatStringView path); typedef struct icu4x_DataProvider_from_byte_slice_mv1_result {union {icu4x::capi::DataProvider* ok; icu4x::capi::DataError err;}; bool is_ok;} icu4x_DataProvider_from_byte_slice_mv1_result; icu4x_DataProvider_from_byte_slice_mv1_result icu4x_DataProvider_from_byte_slice_mv1(diplomat::capi::DiplomatU8View blob); - icu4x::capi::DataProvider* icu4x_DataProvider_empty_mv1(void); - typedef struct icu4x_DataProvider_fork_by_key_mv1_result {union { icu4x::capi::DataError err;}; bool is_ok;} icu4x_DataProvider_fork_by_key_mv1_result; icu4x_DataProvider_fork_by_key_mv1_result icu4x_DataProvider_fork_by_key_mv1(icu4x::capi::DataProvider* self, icu4x::capi::DataProvider* other); @@ -44,11 +40,6 @@ namespace capi { } // namespace capi } // namespace -inline std::unique_ptr icu4x::DataProvider::compiled() { - auto result = icu4x::capi::icu4x_DataProvider_compiled_mv1(); - return std::unique_ptr(icu4x::DataProvider::FromFFI(result)); -} - inline diplomat::result, icu4x::DataError> icu4x::DataProvider::from_fs(std::string_view path) { auto result = icu4x::capi::icu4x_DataProvider_from_fs_mv1({path.data(), path.size()}); return result.is_ok ? diplomat::result, icu4x::DataError>(diplomat::Ok>(std::unique_ptr(icu4x::DataProvider::FromFFI(result.ok)))) : diplomat::result, icu4x::DataError>(diplomat::Err(icu4x::DataError::FromFFI(result.err))); @@ -59,11 +50,6 @@ inline diplomat::result, icu4x::DataError> return result.is_ok ? diplomat::result, icu4x::DataError>(diplomat::Ok>(std::unique_ptr(icu4x::DataProvider::FromFFI(result.ok)))) : diplomat::result, icu4x::DataError>(diplomat::Err(icu4x::DataError::FromFFI(result.err))); } -inline std::unique_ptr icu4x::DataProvider::empty() { - auto result = icu4x::capi::icu4x_DataProvider_empty_mv1(); - return std::unique_ptr(icu4x::DataProvider::FromFFI(result)); -} - inline diplomat::result icu4x::DataProvider::fork_by_key(icu4x::DataProvider& other) { auto result = icu4x::capi::icu4x_DataProvider_fork_by_key_mv1(this->AsFFI(), other.AsFFI()); diff --git a/ffi/capi/bindings/cpp/icu4x/DateFormatter.d.hpp b/ffi/capi/bindings/cpp/icu4x/DateFormatter.d.hpp index 708f49b74fa..dc8b56c4bb5 100644 --- a/ffi/capi/bindings/cpp/icu4x/DateFormatter.d.hpp +++ b/ffi/capi/bindings/cpp/icu4x/DateFormatter.d.hpp @@ -24,6 +24,7 @@ namespace capi { struct IsoDateTime; } class IsoDateTime; namespace capi { struct Locale; } class Locale; +class AnyCalendarKind; class DateTimeFormatError; class DateTimeFormatterLoadError; class DateTimeLength; @@ -52,6 +53,8 @@ class DateFormatter { inline diplomat::result format_iso_datetime(const icu4x::IsoDateTime& value) const; + inline icu4x::AnyCalendarKind calendar_kind() const; + inline const icu4x::capi::DateFormatter* AsFFI() const; inline icu4x::capi::DateFormatter* AsFFI(); inline static const icu4x::DateFormatter* FromFFI(const icu4x::capi::DateFormatter* ptr); diff --git a/ffi/capi/bindings/cpp/icu4x/DateFormatter.hpp b/ffi/capi/bindings/cpp/icu4x/DateFormatter.hpp index 7879bf7fa5c..aad249f7e98 100644 --- a/ffi/capi/bindings/cpp/icu4x/DateFormatter.hpp +++ b/ffi/capi/bindings/cpp/icu4x/DateFormatter.hpp @@ -10,6 +10,7 @@ #include #include #include "../diplomat_runtime.hpp" +#include "AnyCalendarKind.hpp" #include "DataProvider.hpp" #include "Date.hpp" #include "DateTime.hpp" @@ -43,6 +44,8 @@ namespace capi { typedef struct icu4x_DateFormatter_format_iso_datetime_mv1_result {union { icu4x::capi::DateTimeFormatError err;}; bool is_ok;} icu4x_DateFormatter_format_iso_datetime_mv1_result; icu4x_DateFormatter_format_iso_datetime_mv1_result icu4x_DateFormatter_format_iso_datetime_mv1(const icu4x::capi::DateFormatter* self, const icu4x::capi::IsoDateTime* value, diplomat::capi::DiplomatWrite* write); + icu4x::capi::AnyCalendarKind icu4x_DateFormatter_calendar_kind_mv1(const icu4x::capi::DateFormatter* self); + void icu4x_DateFormatter_destroy_mv1(DateFormatter* self); @@ -99,6 +102,11 @@ inline diplomat::result icu4x::DateForm return result.is_ok ? diplomat::result(diplomat::Ok(std::move(output))) : diplomat::result(diplomat::Err(icu4x::DateTimeFormatError::FromFFI(result.err))); } +inline icu4x::AnyCalendarKind icu4x::DateFormatter::calendar_kind() const { + auto result = icu4x::capi::icu4x_DateFormatter_calendar_kind_mv1(this->AsFFI()); + return icu4x::AnyCalendarKind::FromFFI(result); +} + inline const icu4x::capi::DateFormatter* icu4x::DateFormatter::AsFFI() const { return reinterpret_cast(this); } diff --git a/ffi/capi/bindings/cpp/icu4x/DateTimeFormatter.d.hpp b/ffi/capi/bindings/cpp/icu4x/DateTimeFormatter.d.hpp index be7dcdd276e..56c4d5eb5f9 100644 --- a/ffi/capi/bindings/cpp/icu4x/DateTimeFormatter.d.hpp +++ b/ffi/capi/bindings/cpp/icu4x/DateTimeFormatter.d.hpp @@ -20,6 +20,7 @@ namespace capi { struct IsoDateTime; } class IsoDateTime; namespace capi { struct Locale; } class Locale; +class AnyCalendarKind; class DateTimeFormatError; class DateTimeFormatterLoadError; class DateTimeLength; @@ -44,6 +45,8 @@ class DateTimeFormatter { inline diplomat::result format_iso_datetime(const icu4x::IsoDateTime& value) const; + inline icu4x::AnyCalendarKind calendar_kind() const; + inline const icu4x::capi::DateTimeFormatter* AsFFI() const; inline icu4x::capi::DateTimeFormatter* AsFFI(); inline static const icu4x::DateTimeFormatter* FromFFI(const icu4x::capi::DateTimeFormatter* ptr); diff --git a/ffi/capi/bindings/cpp/icu4x/DateTimeFormatter.hpp b/ffi/capi/bindings/cpp/icu4x/DateTimeFormatter.hpp index e33aa20b0ce..65246177659 100644 --- a/ffi/capi/bindings/cpp/icu4x/DateTimeFormatter.hpp +++ b/ffi/capi/bindings/cpp/icu4x/DateTimeFormatter.hpp @@ -10,6 +10,7 @@ #include #include #include "../diplomat_runtime.hpp" +#include "AnyCalendarKind.hpp" #include "DataProvider.hpp" #include "DateTime.hpp" #include "DateTimeFormatError.hpp" @@ -35,6 +36,8 @@ namespace capi { typedef struct icu4x_DateTimeFormatter_format_iso_datetime_mv1_result {union { icu4x::capi::DateTimeFormatError err;}; bool is_ok;} icu4x_DateTimeFormatter_format_iso_datetime_mv1_result; icu4x_DateTimeFormatter_format_iso_datetime_mv1_result icu4x_DateTimeFormatter_format_iso_datetime_mv1(const icu4x::capi::DateTimeFormatter* self, const icu4x::capi::IsoDateTime* value, diplomat::capi::DiplomatWrite* write); + icu4x::capi::AnyCalendarKind icu4x_DateTimeFormatter_calendar_kind_mv1(const icu4x::capi::DateTimeFormatter* self); + void icu4x_DateTimeFormatter_destroy_mv1(DateTimeFormatter* self); @@ -73,6 +76,11 @@ inline diplomat::result icu4x::DateTime return result.is_ok ? diplomat::result(diplomat::Ok(std::move(output))) : diplomat::result(diplomat::Err(icu4x::DateTimeFormatError::FromFFI(result.err))); } +inline icu4x::AnyCalendarKind icu4x::DateTimeFormatter::calendar_kind() const { + auto result = icu4x::capi::icu4x_DateTimeFormatter_calendar_kind_mv1(this->AsFFI()); + return icu4x::AnyCalendarKind::FromFFI(result); +} + inline const icu4x::capi::DateTimeFormatter* icu4x::DateTimeFormatter::AsFFI() const { return reinterpret_cast(this); } diff --git a/ffi/capi/bindings/cpp/icu4x/FixedDecimalFormatter.d.hpp b/ffi/capi/bindings/cpp/icu4x/FixedDecimalFormatter.d.hpp index c9661b3b11d..1db040fed4b 100644 --- a/ffi/capi/bindings/cpp/icu4x/FixedDecimalFormatter.d.hpp +++ b/ffi/capi/bindings/cpp/icu4x/FixedDecimalFormatter.d.hpp @@ -41,6 +41,8 @@ class FixedDecimalFormatter { inline std::string format(const icu4x::SignedFixedDecimal& value) const; + inline std::string numbering_system() const; + inline const icu4x::capi::FixedDecimalFormatter* AsFFI() const; inline icu4x::capi::FixedDecimalFormatter* AsFFI(); inline static const icu4x::FixedDecimalFormatter* FromFFI(const icu4x::capi::FixedDecimalFormatter* ptr); diff --git a/ffi/capi/bindings/cpp/icu4x/FixedDecimalFormatter.hpp b/ffi/capi/bindings/cpp/icu4x/FixedDecimalFormatter.hpp index cfd8b63b972..64d7c7c7a35 100644 --- a/ffi/capi/bindings/cpp/icu4x/FixedDecimalFormatter.hpp +++ b/ffi/capi/bindings/cpp/icu4x/FixedDecimalFormatter.hpp @@ -32,6 +32,8 @@ namespace capi { void icu4x_FixedDecimalFormatter_format_mv1(const icu4x::capi::FixedDecimalFormatter* self, const icu4x::capi::SignedFixedDecimal* value, diplomat::capi::DiplomatWrite* write); + void icu4x_FixedDecimalFormatter_numbering_system_mv1(const icu4x::capi::FixedDecimalFormatter* self, diplomat::capi::DiplomatWrite* write); + void icu4x_FixedDecimalFormatter_destroy_mv1(FixedDecimalFormatter* self); @@ -76,6 +78,14 @@ inline std::string icu4x::FixedDecimalFormatter::format(const icu4x::SignedFixed return output; } +inline std::string icu4x::FixedDecimalFormatter::numbering_system() const { + std::string output; + diplomat::capi::DiplomatWrite write = diplomat::WriteFromString(output); + icu4x::capi::icu4x_FixedDecimalFormatter_numbering_system_mv1(this->AsFFI(), + &write); + return output; +} + inline const icu4x::capi::FixedDecimalFormatter* icu4x::FixedDecimalFormatter::AsFFI() const { return reinterpret_cast(this); } diff --git a/ffi/capi/bindings/cpp/icu4x/SignedFixedDecimal.d.hpp b/ffi/capi/bindings/cpp/icu4x/SignedFixedDecimal.d.hpp index 8b4adb670a0..22f17fd63f0 100644 --- a/ffi/capi/bindings/cpp/icu4x/SignedFixedDecimal.d.hpp +++ b/ffi/capi/bindings/cpp/icu4x/SignedFixedDecimal.d.hpp @@ -73,6 +73,8 @@ class SignedFixedDecimal { inline void trim_end(); + inline void trim_end_if_integer(); + inline void pad_start(int16_t position); inline void pad_end(int16_t position); diff --git a/ffi/capi/bindings/cpp/icu4x/SignedFixedDecimal.hpp b/ffi/capi/bindings/cpp/icu4x/SignedFixedDecimal.hpp index 524b5569bd5..e47ea847d62 100644 --- a/ffi/capi/bindings/cpp/icu4x/SignedFixedDecimal.hpp +++ b/ffi/capi/bindings/cpp/icu4x/SignedFixedDecimal.hpp @@ -69,6 +69,8 @@ namespace capi { void icu4x_SignedFixedDecimal_trim_end_mv1(icu4x::capi::SignedFixedDecimal* self); + void icu4x_SignedFixedDecimal_trim_end_if_integer_mv1(icu4x::capi::SignedFixedDecimal* self); + void icu4x_SignedFixedDecimal_pad_start_mv1(icu4x::capi::SignedFixedDecimal* self, int16_t position); void icu4x_SignedFixedDecimal_pad_end_mv1(icu4x::capi::SignedFixedDecimal* self, int16_t position); @@ -207,6 +209,10 @@ inline void icu4x::SignedFixedDecimal::trim_end() { icu4x::capi::icu4x_SignedFixedDecimal_trim_end_mv1(this->AsFFI()); } +inline void icu4x::SignedFixedDecimal::trim_end_if_integer() { + icu4x::capi::icu4x_SignedFixedDecimal_trim_end_if_integer_mv1(this->AsFFI()); +} + inline void icu4x::SignedFixedDecimal::pad_start(int16_t position) { icu4x::capi::icu4x_SignedFixedDecimal_pad_start_mv1(this->AsFFI(), position); diff --git a/ffi/capi/bindings/dart/DataProvider.g.dart b/ffi/capi/bindings/dart/DataProvider.g.dart index 22ef963d3ae..69990cac6fc 100644 --- a/ffi/capi/bindings/dart/DataProvider.g.dart +++ b/ffi/capi/bindings/dart/DataProvider.g.dart @@ -24,25 +24,6 @@ final class DataProvider implements ffi.Finalizable { static final _finalizer = ffi.NativeFinalizer(ffi.Native.addressOf(_icu4x_DataProvider_destroy_mv1)); - /// Constructs an [`DataProvider`] that uses compiled data. - /// - /// Requires the `compiled_data` feature. - /// - /// This provider cannot be modified or combined with other providers, so `enable_fallback`, - /// `enabled_fallback_with`, `fork_by_locale`, and `fork_by_key` will return `Err`s. - factory DataProvider.compiled() { - final result = _icu4x_DataProvider_compiled_mv1(); - return DataProvider._fromFfi(result, []); - } - - /// Constructs an empty [`DataProvider`]. - /// - /// See the [Rust documentation for `EmptyDataProvider`](https://docs.rs/icu_provider_adapters/latest/icu_provider_adapters/empty/struct.EmptyDataProvider.html) for more information. - factory DataProvider.empty() { - final result = _icu4x_DataProvider_empty_mv1(); - return DataProvider._fromFfi(result, []); - } - /// Creates a provider that tries the current provider and then, if the current provider /// doesn't support the data key, another provider `other`. /// @@ -95,16 +76,6 @@ final class DataProvider implements ffi.Finalizable { // ignore: non_constant_identifier_names external void _icu4x_DataProvider_destroy_mv1(ffi.Pointer self); -@meta.RecordUse() -@ffi.Native Function()>(isLeaf: true, symbol: 'icu4x_DataProvider_compiled_mv1') -// ignore: non_constant_identifier_names -external ffi.Pointer _icu4x_DataProvider_compiled_mv1(); - -@meta.RecordUse() -@ffi.Native Function()>(isLeaf: true, symbol: 'icu4x_DataProvider_empty_mv1') -// ignore: non_constant_identifier_names -external ffi.Pointer _icu4x_DataProvider_empty_mv1(); - @meta.RecordUse() @ffi.Native<_ResultVoidInt32 Function(ffi.Pointer, ffi.Pointer)>(isLeaf: true, symbol: 'icu4x_DataProvider_fork_by_key_mv1') // ignore: non_constant_identifier_names diff --git a/ffi/capi/bindings/dart/DateFormatter.g.dart b/ffi/capi/bindings/dart/DateFormatter.g.dart index 27ffae6a5f5..3635269998e 100644 --- a/ffi/capi/bindings/dart/DateFormatter.g.dart +++ b/ffi/capi/bindings/dart/DateFormatter.g.dart @@ -98,6 +98,14 @@ final class DateFormatter implements ffi.Finalizable { } return write.finalize(); } + + /// Returns the calendar system used in this formatter. + /// + /// See the [Rust documentation for `calendar_kind`](https://docs.rs/icu/latest/icu/datetime/struct.DateTimeFormatter.html#method.calendar_kind) for more information. + AnyCalendarKind calendarKind() { + final result = _icu4x_DateFormatter_calendar_kind_mv1(_ffi); + return AnyCalendarKind.values[result]; + } } @meta.RecordUse() @@ -134,3 +142,8 @@ external _ResultVoidInt32 _icu4x_DateFormatter_format_datetime_mv1(ffi.Pointer, ffi.Pointer, ffi.Pointer)>(isLeaf: true, symbol: 'icu4x_DateFormatter_format_iso_datetime_mv1') // ignore: non_constant_identifier_names external _ResultVoidInt32 _icu4x_DateFormatter_format_iso_datetime_mv1(ffi.Pointer self, ffi.Pointer value, ffi.Pointer write); + +@meta.RecordUse() +@ffi.Native)>(isLeaf: true, symbol: 'icu4x_DateFormatter_calendar_kind_mv1') +// ignore: non_constant_identifier_names +external int _icu4x_DateFormatter_calendar_kind_mv1(ffi.Pointer self); diff --git a/ffi/capi/bindings/dart/DateTimeFormatter.g.dart b/ffi/capi/bindings/dart/DateTimeFormatter.g.dart index e803c83b190..fb2bc1c92f4 100644 --- a/ffi/capi/bindings/dart/DateTimeFormatter.g.dart +++ b/ffi/capi/bindings/dart/DateTimeFormatter.g.dart @@ -72,6 +72,14 @@ final class DateTimeFormatter implements ffi.Finalizable { } return write.finalize(); } + + /// Returns the calendar system used in this formatter. + /// + /// See the [Rust documentation for `calendar_kind`](https://docs.rs/icu/latest/icu/datetime/struct.DateTimeFormatter.html#method.calendar_kind) for more information. + AnyCalendarKind calendarKind() { + final result = _icu4x_DateTimeFormatter_calendar_kind_mv1(_ffi); + return AnyCalendarKind.values[result]; + } } @meta.RecordUse() @@ -98,3 +106,8 @@ external _ResultVoidInt32 _icu4x_DateTimeFormatter_format_datetime_mv1(ffi.Point @ffi.Native<_ResultVoidInt32 Function(ffi.Pointer, ffi.Pointer, ffi.Pointer)>(isLeaf: true, symbol: 'icu4x_DateTimeFormatter_format_iso_datetime_mv1') // ignore: non_constant_identifier_names external _ResultVoidInt32 _icu4x_DateTimeFormatter_format_iso_datetime_mv1(ffi.Pointer self, ffi.Pointer value, ffi.Pointer write); + +@meta.RecordUse() +@ffi.Native)>(isLeaf: true, symbol: 'icu4x_DateTimeFormatter_calendar_kind_mv1') +// ignore: non_constant_identifier_names +external int _icu4x_DateTimeFormatter_calendar_kind_mv1(ffi.Pointer self); diff --git a/ffi/capi/bindings/dart/FixedDecimalFormatter.g.dart b/ffi/capi/bindings/dart/FixedDecimalFormatter.g.dart index 6ef3a34bfa4..c97fc5299da 100644 --- a/ffi/capi/bindings/dart/FixedDecimalFormatter.g.dart +++ b/ffi/capi/bindings/dart/FixedDecimalFormatter.g.dart @@ -72,6 +72,13 @@ final class FixedDecimalFormatter implements ffi.Finalizable { _icu4x_FixedDecimalFormatter_format_mv1(_ffi, value._ffi, write._ffi); return write.finalize(); } + + /// See the [Rust documentation for `numbering_system`](https://docs.rs/icu/latest/icu/decimal/struct.FixedDecimalFormatter.html#method.numbering_system) for more information. + String numberingSystem() { + final write = _Write(); + _icu4x_FixedDecimalFormatter_numbering_system_mv1(_ffi, write._ffi); + return write.finalize(); + } } @meta.RecordUse() @@ -98,3 +105,8 @@ external _ResultOpaqueInt32 _icu4x_FixedDecimalFormatter_create_with_manual_data @ffi.Native, ffi.Pointer, ffi.Pointer)>(isLeaf: true, symbol: 'icu4x_FixedDecimalFormatter_format_mv1') // ignore: non_constant_identifier_names external void _icu4x_FixedDecimalFormatter_format_mv1(ffi.Pointer self, ffi.Pointer value, ffi.Pointer write); + +@meta.RecordUse() +@ffi.Native, ffi.Pointer)>(isLeaf: true, symbol: 'icu4x_FixedDecimalFormatter_numbering_system_mv1') +// ignore: non_constant_identifier_names +external void _icu4x_FixedDecimalFormatter_numbering_system_mv1(ffi.Pointer self, ffi.Pointer write); diff --git a/ffi/capi/bindings/dart/SignedFixedDecimal.g.dart b/ffi/capi/bindings/dart/SignedFixedDecimal.g.dart index ef5017554b0..d1054ee04c4 100644 --- a/ffi/capi/bindings/dart/SignedFixedDecimal.g.dart +++ b/ffi/capi/bindings/dart/SignedFixedDecimal.g.dart @@ -161,6 +161,11 @@ final class SignedFixedDecimal implements ffi.Finalizable { _icu4x_SignedFixedDecimal_trim_end_mv1(_ffi); } + /// See the [Rust documentation for `trim_end_if_integer`](https://docs.rs/fixed_decimal/latest/fixed_decimal/struct.UnsignedFixedDecimal.html#method.trim_end_if_integer) for more information. + void trimEndIfInteger() { + _icu4x_SignedFixedDecimal_trim_end_if_integer_mv1(_ffi); + } + /// Zero-pad the [`SignedFixedDecimal`] on the left to a particular position /// /// See the [Rust documentation for `pad_start`](https://docs.rs/fixed_decimal/latest/fixed_decimal/struct.FixedDecimal.html#method.pad_start) for more information. @@ -336,6 +341,11 @@ external void _icu4x_SignedFixedDecimal_trim_start_mv1(ffi.Pointer s // ignore: non_constant_identifier_names external void _icu4x_SignedFixedDecimal_trim_end_mv1(ffi.Pointer self); +@meta.RecordUse() +@ffi.Native)>(isLeaf: true, symbol: 'icu4x_SignedFixedDecimal_trim_end_if_integer_mv1') +// ignore: non_constant_identifier_names +external void _icu4x_SignedFixedDecimal_trim_end_if_integer_mv1(ffi.Pointer self); + @meta.RecordUse() @ffi.Native, ffi.Int16)>(isLeaf: true, symbol: 'icu4x_SignedFixedDecimal_pad_start_mv1') // ignore: non_constant_identifier_names diff --git a/ffi/capi/bindings/demo_gen/FixedDecimalFormatter.d.ts b/ffi/capi/bindings/demo_gen/FixedDecimalFormatter.d.ts index 0742354294a..867900b6360 100644 --- a/ffi/capi/bindings/demo_gen/FixedDecimalFormatter.d.ts +++ b/ffi/capi/bindings/demo_gen/FixedDecimalFormatter.d.ts @@ -2,3 +2,4 @@ import { FixedDecimalFormatter } from "icu4x" import { Locale } from "icu4x" import { SignedFixedDecimal } from "icu4x" export function format(name: string, groupingStrategy: FixedDecimalGroupingStrategy, f: number, magnitude: number); +export function numberingSystem(name: string, groupingStrategy: FixedDecimalGroupingStrategy); diff --git a/ffi/capi/bindings/demo_gen/FixedDecimalFormatter.mjs b/ffi/capi/bindings/demo_gen/FixedDecimalFormatter.mjs index fd0f6062631..74ce26c81f1 100644 --- a/ffi/capi/bindings/demo_gen/FixedDecimalFormatter.mjs +++ b/ffi/capi/bindings/demo_gen/FixedDecimalFormatter.mjs @@ -27,3 +27,22 @@ export function format(name, groupingStrategy, f, magnitude) { ] ); } +export function numberingSystem(name, groupingStrategy) { + return (function (...args) { return args[0].numberingSystem(...args.slice(1)) }).apply( + null, + [ + FixedDecimalFormatter.createWithGroupingStrategy.apply( + null, + [ + Locale.fromString.apply( + null, + [ + name + ] + ), + groupingStrategy + ] + ) + ] + ); +} diff --git a/ffi/capi/bindings/demo_gen/index.mjs b/ffi/capi/bindings/demo_gen/index.mjs index 29ea392dd0a..e3053f423ac 100644 --- a/ffi/capi/bindings/demo_gen/index.mjs +++ b/ffi/capi/bindings/demo_gen/index.mjs @@ -1087,6 +1087,27 @@ let termini = Object.assign({ ] }, + "FixedDecimalFormatter.numberingSystem": { + func: FixedDecimalFormatterDemo.numberingSystem, + // For avoiding webpacking minifying issues: + funcName: "FixedDecimalFormatter.numberingSystem", + parameters: [ + + { + name: "Self:Locale:Name", + type: "string", + typeUse: "string" + }, + + { + name: "Self:GroupingStrategy", + type: "FixedDecimalGroupingStrategy", + typeUse: "enumerator" + } + + ] + }, + "SignedFixedDecimal.toString": { func: SignedFixedDecimalDemo.toString, // For avoiding webpacking minifying issues: diff --git a/ffi/capi/bindings/js/DataProvider.d.ts b/ffi/capi/bindings/js/DataProvider.d.ts index 0c5823c7b75..fc8cd56c830 100644 --- a/ffi/capi/bindings/js/DataProvider.d.ts +++ b/ffi/capi/bindings/js/DataProvider.d.ts @@ -13,10 +13,6 @@ export class DataProvider { get ffiValue(): pointer; - static compiled(): DataProvider; - - static empty(): DataProvider; - forkByKey(other: DataProvider): void; forkByLocale(other: DataProvider): void; diff --git a/ffi/capi/bindings/js/DataProvider.mjs b/ffi/capi/bindings/js/DataProvider.mjs index fdedf0364b4..b7116e95bd9 100644 --- a/ffi/capi/bindings/js/DataProvider.mjs +++ b/ffi/capi/bindings/js/DataProvider.mjs @@ -40,26 +40,6 @@ export class DataProvider { return this.#ptr; } - static compiled() { - const result = wasm.icu4x_DataProvider_compiled_mv1(); - - try { - return new DataProvider(diplomatRuntime.internalConstructor, result, []); - } - - finally {} - } - - static empty() { - const result = wasm.icu4x_DataProvider_empty_mv1(); - - try { - return new DataProvider(diplomatRuntime.internalConstructor, result, []); - } - - finally {} - } - forkByKey(other) { const diplomatReceive = new diplomatRuntime.DiplomatReceiveBuf(wasm, 5, 4, true); diff --git a/ffi/capi/bindings/js/DateFormatter.d.ts b/ffi/capi/bindings/js/DateFormatter.d.ts index 97ca1f354c9..7a8c8880897 100644 --- a/ffi/capi/bindings/js/DateFormatter.d.ts +++ b/ffi/capi/bindings/js/DateFormatter.d.ts @@ -1,4 +1,5 @@ // generated by diplomat-tool +import type { AnyCalendarKind } from "./AnyCalendarKind" import type { DataProvider } from "./DataProvider" import type { Date } from "./Date" import type { DateTime } from "./DateTime" @@ -32,4 +33,6 @@ export class DateFormatter { formatDatetime(value: DateTime): string; formatIsoDatetime(value: IsoDateTime): string; + + calendarKind(): AnyCalendarKind; } \ No newline at end of file diff --git a/ffi/capi/bindings/js/DateFormatter.mjs b/ffi/capi/bindings/js/DateFormatter.mjs index 8d721c3e78a..4e1e7639161 100644 --- a/ffi/capi/bindings/js/DateFormatter.mjs +++ b/ffi/capi/bindings/js/DateFormatter.mjs @@ -1,4 +1,5 @@ // generated by diplomat-tool +import { AnyCalendarKind } from "./AnyCalendarKind.mjs" import { DataProvider } from "./DataProvider.mjs" import { Date } from "./Date.mjs" import { DateTime } from "./DateTime.mjs" @@ -171,4 +172,14 @@ export class DateFormatter { write.free(); } } + + calendarKind() { + const result = wasm.icu4x_DateFormatter_calendar_kind_mv1(this.ffiValue); + + try { + return new AnyCalendarKind(diplomatRuntime.internalConstructor, result); + } + + finally {} + } } \ No newline at end of file diff --git a/ffi/capi/bindings/js/DateTimeFormatter.d.ts b/ffi/capi/bindings/js/DateTimeFormatter.d.ts index 7658b058e76..78fbb2fbb16 100644 --- a/ffi/capi/bindings/js/DateTimeFormatter.d.ts +++ b/ffi/capi/bindings/js/DateTimeFormatter.d.ts @@ -1,4 +1,5 @@ // generated by diplomat-tool +import type { AnyCalendarKind } from "./AnyCalendarKind" import type { DataProvider } from "./DataProvider" import type { DateTime } from "./DateTime" import type { DateTimeFormatError } from "./DateTimeFormatError" @@ -26,4 +27,6 @@ export class DateTimeFormatter { formatDatetime(value: DateTime): string; formatIsoDatetime(value: IsoDateTime): string; + + calendarKind(): AnyCalendarKind; } \ No newline at end of file diff --git a/ffi/capi/bindings/js/DateTimeFormatter.mjs b/ffi/capi/bindings/js/DateTimeFormatter.mjs index 5d6e1359ac1..524e6bfad15 100644 --- a/ffi/capi/bindings/js/DateTimeFormatter.mjs +++ b/ffi/capi/bindings/js/DateTimeFormatter.mjs @@ -1,4 +1,5 @@ // generated by diplomat-tool +import { AnyCalendarKind } from "./AnyCalendarKind.mjs" import { DataProvider } from "./DataProvider.mjs" import { DateTime } from "./DateTime.mjs" import { DateTimeFormatError } from "./DateTimeFormatError.mjs" @@ -125,4 +126,14 @@ export class DateTimeFormatter { write.free(); } } + + calendarKind() { + const result = wasm.icu4x_DateTimeFormatter_calendar_kind_mv1(this.ffiValue); + + try { + return new AnyCalendarKind(diplomatRuntime.internalConstructor, result); + } + + finally {} + } } \ No newline at end of file diff --git a/ffi/capi/bindings/js/FixedDecimalFormatter.d.ts b/ffi/capi/bindings/js/FixedDecimalFormatter.d.ts index 396e47dead8..12d54cd908c 100644 --- a/ffi/capi/bindings/js/FixedDecimalFormatter.d.ts +++ b/ffi/capi/bindings/js/FixedDecimalFormatter.d.ts @@ -23,4 +23,6 @@ export class FixedDecimalFormatter { static createWithManualData(plusSignPrefix: string, plusSignSuffix: string, minusSignPrefix: string, minusSignSuffix: string, decimalSeparator: string, groupingSeparator: string, primaryGroupSize: number, secondaryGroupSize: number, minGroupSize: number, digits: Array, groupingStrategy: FixedDecimalGroupingStrategy | null): FixedDecimalFormatter; format(value: SignedFixedDecimal): string; + + numberingSystem(): string; } \ No newline at end of file diff --git a/ffi/capi/bindings/js/FixedDecimalFormatter.mjs b/ffi/capi/bindings/js/FixedDecimalFormatter.mjs index 20547087ee0..e83d1edd461 100644 --- a/ffi/capi/bindings/js/FixedDecimalFormatter.mjs +++ b/ffi/capi/bindings/js/FixedDecimalFormatter.mjs @@ -127,4 +127,17 @@ export class FixedDecimalFormatter { write.free(); } } + + numberingSystem() { + const write = new diplomatRuntime.DiplomatWriteBuf(wasm); + wasm.icu4x_FixedDecimalFormatter_numbering_system_mv1(this.ffiValue, write.buffer); + + try { + return write.readString8(); + } + + finally { + write.free(); + } + } } \ No newline at end of file diff --git a/ffi/capi/bindings/js/SignedFixedDecimal.d.ts b/ffi/capi/bindings/js/SignedFixedDecimal.d.ts index 1b48a4e4bb1..c47f50f422d 100644 --- a/ffi/capi/bindings/js/SignedFixedDecimal.d.ts +++ b/ffi/capi/bindings/js/SignedFixedDecimal.d.ts @@ -51,6 +51,8 @@ export class SignedFixedDecimal { trimEnd(): void; + trimEndIfInteger(): void; + padStart(position: number): void; padEnd(position: number): void; diff --git a/ffi/capi/bindings/js/SignedFixedDecimal.mjs b/ffi/capi/bindings/js/SignedFixedDecimal.mjs index 0e5d069e6d9..40f4dc95d2b 100644 --- a/ffi/capi/bindings/js/SignedFixedDecimal.mjs +++ b/ffi/capi/bindings/js/SignedFixedDecimal.mjs @@ -245,6 +245,13 @@ export class SignedFixedDecimal { finally {} } + trimEndIfInteger() {wasm.icu4x_SignedFixedDecimal_trim_end_if_integer_mv1(this.ffiValue); + + try {} + + finally {} + } + padStart(position) {wasm.icu4x_SignedFixedDecimal_pad_start_mv1(this.ffiValue, position); try {} diff --git a/ffi/capi/src/bidi.rs b/ffi/capi/src/bidi.rs index bd1432a435e..5dfdebb7027 100644 --- a/ffi/capi/src/bidi.rs +++ b/ffi/capi/src/bidi.rs @@ -10,8 +10,8 @@ pub mod ffi { use alloc::vec::Vec; use core::fmt::Write; - use crate::errors::ffi::DataError; - use crate::provider::ffi::DataProvider; + #[cfg(feature = "buffer_provider")] + use crate::{errors::ffi::DataError, provider::ffi::DataProvider}; pub enum BidiDirection { Ltr, @@ -39,9 +39,9 @@ pub mod ffi { /// Creates a new [`Bidi`] from locale data, and a particular data source. #[diplomat::rust_link(icu::properties::bidi::BidiClassAdapter::new, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider(provider: &DataProvider) -> Result, DataError> { Ok(Box::new(Bidi(call_constructor_unstable!( - icu_properties::CodePointMapData::new [m => Ok(m.static_to_owned())], icu_properties::CodePointMapData::try_new_unstable, provider, )?))) diff --git a/ffi/capi/src/calendar.rs b/ffi/capi/src/calendar.rs index eb382fc354b..549946a19de 100644 --- a/ffi/capi/src/calendar.rs +++ b/ffi/capi/src/calendar.rs @@ -10,8 +10,10 @@ pub mod ffi { use alloc::sync::Arc; use core::fmt::Write; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] use crate::errors::ffi::DataError; use crate::locale_core::ffi::Locale; + #[cfg(feature = "buffer_provider")] use crate::provider::ffi::DataProvider; /// The various calendar types currently supported by [`Calendar`] @@ -126,34 +128,33 @@ pub mod ffi { #[diplomat::rust_link(icu::calendar::AnyCalendar::try_new, FnInEnum)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "for_locale_with_provider")] #[diplomat::demo(default_constructor)] + #[cfg(feature = "buffer_provider")] pub fn create_for_locale_with_provider( provider: &DataProvider, locale: &Locale, ) -> Result, DataError> { let prefs = (&locale.0).into(); - Ok(Box::new(Calendar(Arc::new(call_constructor!( - icu_calendar::AnyCalendar::try_new, - icu_calendar::AnyCalendar::try_new_with_any_provider, - icu_calendar::AnyCalendar::try_new_with_buffer_provider, - provider, - prefs + Ok(Box::new(Calendar(Arc::new(provider.call_constructor( + |provider| icu_calendar::AnyCalendar::try_new_with_buffer_provider(provider, prefs), )?)))) } /// Creates a new [`Calendar`] from the specified date and time, using a particular data source. #[diplomat::rust_link(icu::calendar::AnyCalendar::new_for_kind, FnInEnum)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "for_kind_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_for_kind_with_provider( provider: &DataProvider, kind: AnyCalendarKind, ) -> Result, DataError> { - Ok(Box::new(Calendar(Arc::new(call_constructor!( - icu_calendar::AnyCalendar::new_for_kind [r => Ok(r)], - icu_calendar::AnyCalendar::try_new_for_kind_with_any_provider, - icu_calendar::AnyCalendar::try_new_for_kind_with_buffer_provider, - provider, - kind.into() + Ok(Box::new(Calendar(Arc::new(provider.call_constructor( + |provider| { + icu_calendar::AnyCalendar::try_new_for_kind_with_buffer_provider( + provider, + kind.into(), + ) + }, )?)))) } diff --git a/ffi/capi/src/casemap.rs b/ffi/capi/src/casemap.rs index 7b45e68245f..5eadd0b336c 100644 --- a/ffi/capi/src/casemap.rs +++ b/ffi/capi/src/casemap.rs @@ -10,7 +10,11 @@ use icu_casemap::titlecase::TitlecaseOptions; pub mod ffi { use alloc::boxed::Box; - use crate::{errors::ffi::DataError, locale_core::ffi::Locale, provider::ffi::DataProvider}; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::errors::ffi::DataError; + use crate::locale_core::ffi::Locale; + #[cfg(feature = "buffer_provider")] + use crate::provider::ffi::DataProvider; use diplomat_runtime::DiplomatOption; use writeable::Writeable; @@ -65,12 +69,10 @@ pub mod ffi { /// Construct a new CaseMapper instance using a particular data source. #[diplomat::rust_link(icu::casemap::CaseMapper::new, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider(provider: &DataProvider) -> Result, DataError> { - Ok(Box::new(CaseMapper(call_constructor!( - icu_casemap::CaseMapper::new [r => Ok(r)], - icu_casemap::CaseMapper::try_new_with_any_provider, + Ok(Box::new(CaseMapper(provider.call_constructor( icu_casemap::CaseMapper::try_new_with_buffer_provider, - provider, )?))) } /// Returns the full lowercase mapping of the given string @@ -230,14 +232,12 @@ pub mod ffi { #[diplomat::rust_link(icu::casemap::CaseMapCloser::new, FnInStruct)] #[diplomat::rust_link(icu::casemap::CaseMapCloser::new_with_mapper, FnInStruct, hidden)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(CaseMapCloser(call_constructor!( - icu_casemap::CaseMapCloser::new [r => Ok(r)], - icu_casemap::CaseMapCloser::try_new_with_any_provider, + Ok(Box::new(CaseMapCloser(provider.call_constructor( icu_casemap::CaseMapCloser::try_new_with_buffer_provider, - provider, )?))) } /// Adds all simple case mappings and the full case folding for `c` to `builder`. @@ -289,14 +289,12 @@ pub mod ffi { #[diplomat::rust_link(icu::casemap::TitlecaseMapper::new, FnInStruct)] #[diplomat::rust_link(icu::casemap::TitlecaseMapper::new_with_mapper, FnInStruct, hidden)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(TitlecaseMapper(call_constructor!( - icu_casemap::TitlecaseMapper::new [r => Ok(r)], - icu_casemap::TitlecaseMapper::try_new_with_any_provider, + Ok(Box::new(TitlecaseMapper(provider.call_constructor( icu_casemap::TitlecaseMapper::try_new_with_buffer_provider, - provider, )?))) } /// Returns the full titlecase mapping of the given string diff --git a/ffi/capi/src/collator.rs b/ffi/capi/src/collator.rs index b6e43cb5c7f..46e45ae25fc 100644 --- a/ffi/capi/src/collator.rs +++ b/ffi/capi/src/collator.rs @@ -8,7 +8,10 @@ pub mod ffi { use alloc::boxed::Box; - use crate::{errors::ffi::DataError, locale_core::ffi::Locale, provider::ffi::DataProvider}; + #[cfg(feature = "buffer_provider")] + use crate::provider::ffi::DataProvider; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::{errors::ffi::DataError, locale_core::ffi::Locale}; use diplomat_runtime::DiplomatOption; #[diplomat::opaque] @@ -131,18 +134,21 @@ pub mod ffi { #[diplomat::rust_link(icu::collator::CollatorPreferences, Struct, hidden)] #[diplomat::attr(supports = fallible_constructors, constructor)] #[diplomat::attr(supports = non_exhaustive_structs, rename = "create_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_v1_with_provider( provider: &DataProvider, locale: &Locale, options: CollatorOptionsV1, ) -> Result, DataError> { - Ok(Box::new(Collator(call_constructor!( - icu_collator::Collator::try_new [r => Ok(r?.static_to_owned())], - icu_collator::Collator::try_new_with_any_provider, - icu_collator::Collator::try_new_with_buffer_provider, - provider, - icu_collator::CollatorPreferences::from(&locale.0), - icu_collator::CollatorOptions::from(options), + let options = options.into(); + Ok(Box::new(Collator(provider.call_constructor( + |provider| { + icu_collator::Collator::try_new_with_buffer_provider( + provider, + (&locale.0).into(), + options, + ) + }, )?))) } /// Compare two strings. diff --git a/ffi/capi/src/datetime_formatter.rs b/ffi/capi/src/datetime_formatter.rs index 98bb427d1a4..1ec9de85cec 100644 --- a/ffi/capi/src/datetime_formatter.rs +++ b/ffi/capi/src/datetime_formatter.rs @@ -7,17 +7,21 @@ #[diplomat::attr(auto, namespace = "icu4x")] pub mod ffi { use alloc::boxed::Box; - use icu_datetime::{ - fieldsets::{T, YMD, YMDT}, - options::Length, - }; - + use icu_datetime::fieldsets::{T, YMD, YMDT}; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use icu_datetime::options::Length; + + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::errors::ffi::DateTimeFormatterLoadError; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::locale_core::ffi::Locale; + #[cfg(feature = "buffer_provider")] + use crate::provider::ffi::DataProvider; use crate::{ + calendar::ffi::AnyCalendarKind, date::ffi::{Date, IsoDate}, datetime::ffi::{DateTime, IsoDateTime}, - errors::ffi::{DateTimeFormatError, DateTimeFormatterLoadError}, - locale_core::ffi::Locale, - provider::ffi::DataProvider, + errors::ffi::DateTimeFormatError, time::ffi::Time, }; @@ -55,6 +59,7 @@ pub mod ffi { /// Creates a new [`TimeFormatter`] using a particular data source. #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_length_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_length_and_provider( provider: &DataProvider, locale: &Locale, @@ -63,14 +68,13 @@ pub mod ffi { let prefs = (&locale.0).into(); let options = T::with_length(Length::from(length)).hm(); - Ok(Box::new(TimeFormatter(call_constructor!( - icu_datetime::FixedCalendarDateTimeFormatter::try_new, - icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_any_provider, - icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_buffer_provider, - provider, - prefs, - options - )?))) + Ok(Box::new(TimeFormatter( + provider.call_constructor_custom_err(move |provider| { + icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_buffer_provider( + provider, prefs, options, + ) + })?, + ))) } /// Formats a [`Time`] to a string. @@ -124,6 +128,7 @@ pub mod ffi { /// Creates a new [`GregorianDateFormatter`] using a particular data source. #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_length_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_length_and_provider( provider: &DataProvider, locale: &Locale, @@ -132,14 +137,13 @@ pub mod ffi { let prefs = (&locale.0).into(); let options = YMD::with_length(Length::from(length)); - Ok(Box::new(GregorianDateFormatter(call_constructor!( - icu_datetime::FixedCalendarDateTimeFormatter::try_new, - icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_any_provider, - icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_buffer_provider, - provider, - prefs, - options - )?))) + Ok(Box::new(GregorianDateFormatter( + provider.call_constructor_custom_err(move |provider| { + icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_buffer_provider( + provider, prefs, options, + ) + })?, + ))) } /// Formats a [`IsoDate`] to a string. @@ -189,6 +193,7 @@ pub mod ffi { /// Creates a new [`GregorianDateTimeFormatter`] using a particular data source. #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_length_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_length_and_provider( provider: &DataProvider, locale: &Locale, @@ -197,14 +202,13 @@ pub mod ffi { let prefs = (&locale.0).into(); let options = YMDT::with_length(Length::from(length)).hm(); - Ok(Box::new(GregorianDateTimeFormatter(call_constructor!( - icu_datetime::FixedCalendarDateTimeFormatter::try_new, - icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_any_provider, - icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_buffer_provider, - provider, - prefs, - options - )?))) + Ok(Box::new(GregorianDateTimeFormatter( + provider.call_constructor_custom_err(move |provider| { + icu_datetime::FixedCalendarDateTimeFormatter::try_new_with_buffer_provider( + provider, prefs, options, + ) + })?, + ))) } /// Formats a [`IsoDateTime`] to a string. @@ -243,6 +247,7 @@ pub mod ffi { /// Creates a new [`DateFormatter`] using a particular data source. #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_length_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_length_and_provider( provider: &DataProvider, locale: &Locale, @@ -251,14 +256,13 @@ pub mod ffi { let prefs = (&locale.0).into(); let options = YMD::with_length(Length::from(length)); - Ok(Box::new(DateFormatter(call_constructor!( - icu_datetime::DateTimeFormatter::try_new, - icu_datetime::DateTimeFormatter::try_new_with_any_provider, - icu_datetime::DateTimeFormatter::try_new_with_buffer_provider, - provider, - prefs, - options - )?))) + Ok(Box::new(DateFormatter( + provider.call_constructor_custom_err(move |provider| { + icu_datetime::DateTimeFormatter::try_new_with_buffer_provider( + provider, prefs, options, + ) + })?, + ))) } /// Formats a [`Date`] to a string. @@ -306,6 +310,12 @@ pub mod ffi { let _infallible = self.0.format_any_calendar(&any).write_to(write); Ok(()) } + + /// Returns the calendar system used in this formatter. + #[diplomat::rust_link(icu::datetime::DateTimeFormatter::calendar_kind, FnInStruct)] + pub fn calendar_kind(&self) -> AnyCalendarKind { + self.0.calendar_kind().into() + } } #[diplomat::opaque] @@ -333,6 +343,7 @@ pub mod ffi { /// Creates a new [`DateTimeFormatter`] using a particular data source. #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_length_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_length_and_provider( provider: &DataProvider, locale: &Locale, @@ -341,14 +352,13 @@ pub mod ffi { let prefs = (&locale.0).into(); let options = YMDT::with_length(Length::from(length)).hm(); - Ok(Box::new(DateTimeFormatter(call_constructor!( - icu_datetime::DateTimeFormatter::try_new, - icu_datetime::DateTimeFormatter::try_new_with_any_provider, - icu_datetime::DateTimeFormatter::try_new_with_buffer_provider, - provider, - prefs, - options - )?))) + Ok(Box::new(DateTimeFormatter( + provider.call_constructor_custom_err(move |provider| { + icu_datetime::DateTimeFormatter::try_new_with_buffer_provider( + provider, prefs, options, + ) + })?, + ))) } /// Formats a [`DateTime`] to a string. pub fn format_datetime( @@ -372,5 +382,11 @@ pub mod ffi { let _infallible = self.0.format_any_calendar(&any).write_to(write); Ok(()) } + + /// Returns the calendar system used in this formatter. + #[diplomat::rust_link(icu::datetime::DateTimeFormatter::calendar_kind, FnInStruct)] + pub fn calendar_kind(&self) -> AnyCalendarKind { + self.0.calendar_kind().into() + } } } diff --git a/ffi/capi/src/decimal.rs b/ffi/capi/src/decimal.rs index 79f0610f565..eaeb2a9e3f1 100644 --- a/ffi/capi/src/decimal.rs +++ b/ffi/capi/src/decimal.rs @@ -8,11 +8,14 @@ pub mod ffi { use alloc::boxed::Box; - use crate::{ - errors::ffi::DataError, fixed_decimal::ffi::SignedFixedDecimal, locale_core::ffi::Locale, - provider::ffi::DataProvider, - }; - use icu_decimal::{options::FixedDecimalFormatterOptions, FixedDecimalFormatterPreferences}; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::locale_core::ffi::Locale; + #[cfg(feature = "buffer_provider")] + use crate::provider::ffi::DataProvider; + use crate::{errors::ffi::DataError, fixed_decimal::ffi::SignedFixedDecimal}; + use icu_decimal::options::FixedDecimalFormatterOptions; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use icu_decimal::FixedDecimalFormatterPreferences; use writeable::Writeable; @@ -56,6 +59,7 @@ pub mod ffi { #[diplomat::rust_link(icu::decimal::FixedDecimalFormatter::try_new, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_grouping_strategy_and_provider")] #[diplomat::demo(default_constructor)] + #[cfg(feature = "buffer_provider")] pub fn create_with_grouping_strategy_and_provider( provider: &DataProvider, locale: &Locale, @@ -67,14 +71,13 @@ pub mod ffi { options.grouping_strategy = grouping_strategy .map(Into::into) .unwrap_or(options.grouping_strategy); - Ok(Box::new(FixedDecimalFormatter(call_constructor!( - icu_decimal::FixedDecimalFormatter::try_new, - icu_decimal::FixedDecimalFormatter::try_new_with_any_provider, - icu_decimal::FixedDecimalFormatter::try_new_with_buffer_provider, - provider, - prefs, - options, - )?))) + Ok(Box::new(FixedDecimalFormatter( + provider.call_constructor_custom_err(move |provider| { + icu_decimal::FixedDecimalFormatter::try_new_with_buffer_provider( + provider, prefs, options, + ) + })?, + ))) } /// Creates a new [`FixedDecimalFormatter`] from preconstructed locale data. @@ -176,5 +179,10 @@ pub mod ffi { ) { let _infallible = self.0.format(&value.0).write_to(write); } + + #[diplomat::rust_link(icu::decimal::FixedDecimalFormatter::numbering_system, FnInStruct)] + pub fn numbering_system(&self, write: &mut diplomat_runtime::DiplomatWrite) { + let _infallible = self.0.numbering_system().write_to(write); + } } } diff --git a/ffi/capi/src/displaynames.rs b/ffi/capi/src/displaynames.rs index 28db7b258f8..18e89babbd5 100644 --- a/ffi/capi/src/displaynames.rs +++ b/ffi/capi/src/displaynames.rs @@ -8,8 +8,11 @@ pub mod ffi { use alloc::boxed::Box; - use crate::errors::ffi::{DataError, LocaleParseError}; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::errors::ffi::DataError; + use crate::errors::ffi::LocaleParseError; use crate::locale_core::ffi::Locale; + #[cfg(feature = "buffer_provider")] use crate::provider::ffi::DataProvider; use diplomat_runtime::DiplomatOption; @@ -86,6 +89,7 @@ pub mod ffi { #[diplomat::attr(supports = non_exhaustive_structs, rename = "create_with_provider")] #[diplomat::attr(all(supports = fallible_constructors, supports = non_exhaustive_structs), named_constructor = "with_provider")] #[diplomat::attr(all(supports = fallible_constructors, not(supports = non_exhaustive_structs)), named_constructor = "v1_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_v1_with_provider( provider: &DataProvider, locale: &Locale, @@ -95,14 +99,9 @@ pub mod ffi { let options = icu_experimental::displaynames::DisplayNamesOptions::from(options); Ok(Box::new(LocaleDisplayNamesFormatter( - call_constructor!( - icu_experimental::displaynames::LocaleDisplayNamesFormatter::try_new, - icu_experimental::displaynames::LocaleDisplayNamesFormatter::try_new_with_any_provider, - icu_experimental::displaynames::LocaleDisplayNamesFormatter::try_new_with_buffer_provider, - provider, - prefs, + provider.call_constructor_custom_err(move |provider| icu_experimental::displaynames::LocaleDisplayNamesFormatter::try_new_with_buffer_provider(provider, prefs, options, - )?, + ))?, ))) } @@ -138,6 +137,7 @@ pub mod ffi { #[diplomat::attr(supports = non_exhaustive_structs, rename = "create_with_provider")] #[diplomat::attr(all(supports = fallible_constructors, supports = non_exhaustive_structs), named_constructor = "with_provider")] #[diplomat::attr(all(supports = fallible_constructors, not(supports = non_exhaustive_structs)), named_constructor = "v1_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_v1_with_provider( provider: &DataProvider, locale: &Locale, @@ -145,14 +145,9 @@ pub mod ffi { ) -> Result, DataError> { let prefs = (&locale.0).into(); let options = icu_experimental::displaynames::DisplayNamesOptions::from(options); - Ok(Box::new(RegionDisplayNames(call_constructor!( - icu_experimental::displaynames::RegionDisplayNames::try_new, - icu_experimental::displaynames::RegionDisplayNames::try_new_with_any_provider, - icu_experimental::displaynames::RegionDisplayNames::try_new_with_buffer_provider, - provider, - prefs, + Ok(Box::new(RegionDisplayNames(provider.call_constructor_custom_err(move |provider| icu_experimental::displaynames::RegionDisplayNames::try_new_with_buffer_provider(provider, prefs, options - )?))) + ))?))) } /// Returns the locale specific display name of a region. diff --git a/ffi/capi/src/exemplar_chars.rs b/ffi/capi/src/exemplar_chars.rs index 6fa23bec364..6514964e39b 100644 --- a/ffi/capi/src/exemplar_chars.rs +++ b/ffi/capi/src/exemplar_chars.rs @@ -8,9 +8,10 @@ pub mod ffi { use alloc::boxed::Box; - use crate::errors::ffi::DataError; - use crate::locale_core::ffi::Locale; + #[cfg(feature = "buffer_provider")] use crate::provider::ffi::DataProvider; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::{errors::ffi::DataError, locale_core::ffi::Locale}; #[diplomat::opaque] /// A set of "exemplar characters" for a given locale. @@ -67,13 +68,13 @@ pub mod ffi { FnInStruct )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "main_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_main_with_provider( provider: &DataProvider, locale: &Locale, ) -> Result, DataError> { let locale = locale.to_datalocale(); Ok(Box::new(ExemplarCharacters(call_constructor_unstable!( - icu_locale::exemplar_chars::ExemplarCharacters::try_new_main [r => r.map(|r| r.static_to_owned())], icu_locale::exemplar_chars::ExemplarCharacters::try_new_main_unstable, provider, &locale @@ -100,13 +101,13 @@ pub mod ffi { FnInStruct )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "auxiliary_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_auxiliary_with_provider( provider: &DataProvider, locale: &Locale, ) -> Result, DataError> { let locale = locale.to_datalocale(); Ok(Box::new(ExemplarCharacters(call_constructor_unstable!( - icu_locale::exemplar_chars::ExemplarCharacters::try_new_auxiliary [r => r.map(|r| r.static_to_owned())], icu_locale::exemplar_chars::ExemplarCharacters::try_new_auxiliary_unstable, provider, &locale @@ -133,13 +134,13 @@ pub mod ffi { FnInStruct )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "punctuation_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_punctuation_with_provider( provider: &DataProvider, locale: &Locale, ) -> Result, DataError> { let locale = locale.to_datalocale(); Ok(Box::new(ExemplarCharacters(call_constructor_unstable!( - icu_locale::exemplar_chars::ExemplarCharacters::try_new_punctuation [r => r.map(|r| r.static_to_owned())], icu_locale::exemplar_chars::ExemplarCharacters::try_new_punctuation_unstable, provider, &locale @@ -167,13 +168,13 @@ pub mod ffi { FnInStruct )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "numbers_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_numbers_with_provider( provider: &DataProvider, locale: &Locale, ) -> Result, DataError> { let locale = locale.to_datalocale(); Ok(Box::new(ExemplarCharacters(call_constructor_unstable!( - icu_locale::exemplar_chars::ExemplarCharacters::try_new_numbers [r => r.map(|r| r.static_to_owned())], icu_locale::exemplar_chars::ExemplarCharacters::try_new_numbers_unstable, provider, &locale @@ -201,13 +202,13 @@ pub mod ffi { FnInStruct )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "index_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_index_with_provider( provider: &DataProvider, locale: &Locale, ) -> Result, DataError> { let locale = locale.to_datalocale(); Ok(Box::new(ExemplarCharacters(call_constructor_unstable!( - icu_locale::exemplar_chars::ExemplarCharacters::try_new_index [r => r.map(|r| r.static_to_owned())], icu_locale::exemplar_chars::ExemplarCharacters::try_new_index_unstable, provider, &locale diff --git a/ffi/capi/src/fallbacker.rs b/ffi/capi/src/fallbacker.rs index fa5435b7004..ea6310d395a 100644 --- a/ffi/capi/src/fallbacker.rs +++ b/ffi/capi/src/fallbacker.rs @@ -8,7 +8,9 @@ pub mod ffi { use alloc::boxed::Box; - use crate::{errors::ffi::DataError, locale_core::ffi::Locale, provider::ffi::DataProvider}; + use crate::locale_core::ffi::Locale; + #[cfg(feature = "buffer_provider")] + use crate::{errors::ffi::DataError, provider::ffi::DataProvider}; /// An object that runs the ICU4X locale fallback algorithm. #[diplomat::opaque] @@ -78,14 +80,12 @@ pub mod ffi { hidden )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(LocaleFallbacker(call_constructor!( - icu_locale::LocaleFallbacker::new [r => Ok(r.static_to_owned())], - icu_locale::LocaleFallbacker::try_new_with_any_provider, + Ok(Box::new(LocaleFallbacker(provider.call_constructor( icu_locale::LocaleFallbacker::try_new_with_buffer_provider, - provider, )?))) } diff --git a/ffi/capi/src/fixed_decimal.rs b/ffi/capi/src/fixed_decimal.rs index 58b3d7bc409..33dcd2c0c1d 100644 --- a/ffi/capi/src/fixed_decimal.rs +++ b/ffi/capi/src/fixed_decimal.rs @@ -278,6 +278,16 @@ pub mod ffi { self.0.absolute.trim_end() } + #[diplomat::rust_link(fixed_decimal::UnsignedFixedDecimal::trim_end_if_integer, FnInStruct)] + #[diplomat::rust_link( + fixed_decimal::UnsignedFixedDecimal::trimmed_end_if_integer, + FnInStruct, + hidden + )] + pub fn trim_end_if_integer(&mut self) { + self.0.absolute.trim_end_if_integer() + } + /// Zero-pad the [`SignedFixedDecimal`] on the left to a particular position #[diplomat::rust_link(fixed_decimal::FixedDecimal::pad_start, FnInStruct)] #[diplomat::rust_link(fixed_decimal::FixedDecimal::padded_start, FnInStruct, hidden)] diff --git a/ffi/capi/src/list.rs b/ffi/capi/src/list.rs index d7e40368e82..fcf5c0ff006 100644 --- a/ffi/capi/src/list.rs +++ b/ffi/capi/src/list.rs @@ -8,9 +8,13 @@ pub mod ffi { use alloc::boxed::Box; use diplomat_runtime::{DiplomatStr16Slice, DiplomatStrSlice}; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] use icu_list::{ListFormatterOptions, ListFormatterPreferences}; - use crate::{errors::ffi::DataError, locale_core::ffi::Locale, provider::ffi::DataProvider}; + #[cfg(feature = "buffer_provider")] + use crate::provider::ffi::DataProvider; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::{errors::ffi::DataError, locale_core::ffi::Locale}; use writeable::Writeable; @@ -45,6 +49,7 @@ pub mod ffi { /// Construct a new ListFormatter instance for And patterns #[diplomat::rust_link(icu::list::ListFormatter::try_new_and, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "and_with_length_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_and_with_length_and_provider( provider: &DataProvider, locale: &Locale, @@ -52,13 +57,12 @@ pub mod ffi { ) -> Result, DataError> { let prefs = ListFormatterPreferences::from(&locale.0); let options = ListFormatterOptions::default().with_length(length.into()); - Ok(Box::new(ListFormatter(call_constructor!( - icu_list::ListFormatter::try_new_and, - icu_list::ListFormatter::try_new_and_with_any_provider, - icu_list::ListFormatter::try_new_and_with_buffer_provider, - provider, - prefs, - options, + Ok(Box::new(ListFormatter(provider.call_constructor( + move |provider| { + icu_list::ListFormatter::try_new_and_with_buffer_provider( + provider, prefs, options, + ) + }, )?))) } @@ -80,6 +84,7 @@ pub mod ffi { /// Construct a new ListFormatter instance for And patterns #[diplomat::rust_link(icu::list::ListFormatter::try_new_or, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "or_with_length_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_or_with_length_and_provider( provider: &DataProvider, locale: &Locale, @@ -87,13 +92,12 @@ pub mod ffi { ) -> Result, DataError> { let prefs = ListFormatterPreferences::from(&locale.0); let options = ListFormatterOptions::default().with_length(length.into()); - Ok(Box::new(ListFormatter(call_constructor!( - icu_list::ListFormatter::try_new_or, - icu_list::ListFormatter::try_new_or_with_any_provider, - icu_list::ListFormatter::try_new_or_with_buffer_provider, - provider, - prefs, - options + Ok(Box::new(ListFormatter(provider.call_constructor( + move |provider| { + icu_list::ListFormatter::try_new_or_with_buffer_provider( + provider, prefs, options, + ) + }, )?))) } @@ -115,6 +119,7 @@ pub mod ffi { /// Construct a new ListFormatter instance for And patterns #[diplomat::rust_link(icu::list::ListFormatter::try_new_unit, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "unit_with_length_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_unit_with_length_and_provider( provider: &DataProvider, locale: &Locale, @@ -122,13 +127,12 @@ pub mod ffi { ) -> Result, DataError> { let prefs = ListFormatterPreferences::from(&locale.0); let options = ListFormatterOptions::default().with_length(length.into()); - Ok(Box::new(ListFormatter(call_constructor!( - icu_list::ListFormatter::try_new_unit, - icu_list::ListFormatter::try_new_unit_with_any_provider, - icu_list::ListFormatter::try_new_unit_with_buffer_provider, - provider, - prefs, - options + Ok(Box::new(ListFormatter(provider.call_constructor( + move |provider| { + icu_list::ListFormatter::try_new_unit_with_buffer_provider( + provider, prefs, options, + ) + }, )?))) } diff --git a/ffi/capi/src/locale.rs b/ffi/capi/src/locale.rs index 4408cdc5721..d63aa63d978 100644 --- a/ffi/capi/src/locale.rs +++ b/ffi/capi/src/locale.rs @@ -8,9 +8,9 @@ pub mod ffi { use alloc::boxed::Box; - use crate::errors::ffi::DataError; use crate::locale_core::ffi::Locale; - use crate::provider::ffi::DataProvider; + #[cfg(feature = "buffer_provider")] + use crate::{errors::ffi::DataError, provider::ffi::DataProvider}; #[diplomat::rust_link(icu::locale::TransformResult, Enum)] #[diplomat::enum_convert(icu_locale::TransformResult)] @@ -35,14 +35,12 @@ pub mod ffi { /// Create a new [`LocaleCanonicalizer`]. #[diplomat::rust_link(icu::locale::LocaleCanonicalizer::new, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(LocaleCanonicalizer(call_constructor!( - icu_locale::LocaleCanonicalizer::new [r => Ok(r)], - icu_locale::LocaleCanonicalizer::try_new_with_any_provider, + Ok(Box::new(LocaleCanonicalizer(provider.call_constructor( icu_locale::LocaleCanonicalizer::try_new_with_buffer_provider, - provider, )?))) } /// Create a new [`LocaleCanonicalizer`] with extended data using compiled data. @@ -59,21 +57,19 @@ pub mod ffi { /// Create a new [`LocaleCanonicalizer`] with extended data. #[diplomat::rust_link(icu::locale::LocaleCanonicalizer::new_with_expander, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "extended_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_extended_with_provider( provider: &DataProvider, ) -> Result, DataError> { - let expander = call_constructor!( - icu_locale::LocaleExpander::new_extended [r => Ok(r)], - icu_locale::LocaleExpander::try_new_with_any_provider, - icu_locale::LocaleExpander::try_new_with_buffer_provider, - provider, - )?; - Ok(Box::new(LocaleCanonicalizer(call_constructor!( - icu_locale::LocaleCanonicalizer::new_with_expander [r => Ok(r)], - icu_locale::LocaleCanonicalizer::try_new_with_expander_with_any_provider, - icu_locale::LocaleCanonicalizer::try_new_with_expander_with_buffer_provider, - provider, - expander + let expander = provider.call_constructor(|provider| { + icu_locale::LocaleExpander::try_new_with_buffer_provider(provider) + })?; + Ok(Box::new(LocaleCanonicalizer(provider.call_constructor( + move |provider| { + icu_locale::LocaleCanonicalizer::try_new_with_expander_with_buffer_provider( + provider, expander, + ) + }, )?))) } #[diplomat::rust_link(icu::locale::LocaleCanonicalizer::canonicalize, FnInStruct)] @@ -98,14 +94,12 @@ pub mod ffi { /// Create a new [`LocaleExpander`] using a particular data source. #[diplomat::rust_link(icu::locale::LocaleExpander::new, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(LocaleExpander(call_constructor!( - icu_locale::LocaleExpander::new [r => Ok(r)], - icu_locale::LocaleExpander::try_new_with_any_provider, + Ok(Box::new(LocaleExpander(provider.call_constructor( icu_locale::LocaleExpander::try_new_with_buffer_provider, - provider, )?))) } /// Create a new [`LocaleExpander`] with extended data using compiled data. @@ -118,14 +112,12 @@ pub mod ffi { /// Create a new [`LocaleExpander`] with extended data using a particular data source. #[diplomat::rust_link(icu::locale::LocaleExpander::new_extended, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "extended_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_extended_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(LocaleExpander(call_constructor!( - icu_locale::LocaleExpander::new_extended [r => Ok(r)], - icu_locale::LocaleExpander::try_new_with_any_provider, + Ok(Box::new(LocaleExpander(provider.call_constructor( icu_locale::LocaleExpander::try_new_with_buffer_provider, - provider, )?))) } #[diplomat::rust_link(icu::locale::LocaleExpander::maximize, FnInStruct)] diff --git a/ffi/capi/src/locale_directionality.rs b/ffi/capi/src/locale_directionality.rs index ccf5a316b3e..3a1765281da 100644 --- a/ffi/capi/src/locale_directionality.rs +++ b/ffi/capi/src/locale_directionality.rs @@ -8,12 +8,14 @@ pub mod ffi { use alloc::boxed::Box; - use crate::{ - errors::ffi::DataError, - locale::ffi::LocaleExpander, - locale_core::ffi::Locale, - provider::{ffi::DataProvider, DataProviderInner}, - }; + #[cfg(feature = "buffer_provider")] + use crate::errors::ffi::DataError; + #[cfg(feature = "buffer_provider")] + use crate::provider::{ffi::DataProvider, DataProviderInner}; + + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] + use crate::locale::ffi::LocaleExpander; + use crate::locale_core::ffi::Locale; #[diplomat::rust_link(icu::locale::Direction, Enum)] pub enum LocaleDirection { @@ -38,14 +40,12 @@ pub mod ffi { /// Construct a new LocaleDirectionality instance using a particular data source. #[diplomat::rust_link(icu::locale::LocaleDirectionality::new, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(LocaleDirectionality(call_constructor!( - icu_locale::LocaleDirectionality::new [r => Ok(r)], - icu_locale::LocaleDirectionality::try_new_with_any_provider, + Ok(Box::new(LocaleDirectionality(provider.call_constructor( icu_locale::LocaleDirectionality::try_new_with_buffer_provider, - provider, )?))) } @@ -62,6 +62,7 @@ pub mod ffi { /// Construct a new LocaleDirectionality instance with a custom expander and a particular data source. #[diplomat::rust_link(icu::locale::LocaleDirectionality::new_with_expander, FnInStruct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_expander_and_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_expander_and_provider( provider: &DataProvider, expander: &LocaleExpander, @@ -72,23 +73,13 @@ pub mod ffi { DataProviderInner::Destroyed => Err(icu_provider::DataError::custom( "This provider has been destroyed", ))?, - DataProviderInner::Empty => { - icu_locale::LocaleDirectionality::try_new_with_expander_unstable( - &icu_provider_adapters::empty::EmptyDataProvider::new(), - expander.0.clone(), - )? - } - #[cfg(feature = "buffer_provider")] + DataProviderInner::Buffer(buffer_provider) => { icu_locale::LocaleDirectionality::try_new_with_expander_unstable( &buffer_provider.as_deserializing(), expander.0.clone(), )? } - #[cfg(feature = "compiled_data")] - DataProviderInner::Compiled => { - icu_locale::LocaleDirectionality::new_with_expander(expander.0.clone()) - } }))) } diff --git a/ffi/capi/src/normalizer.rs b/ffi/capi/src/normalizer.rs index 4653318babb..9b1da84e757 100644 --- a/ffi/capi/src/normalizer.rs +++ b/ffi/capi/src/normalizer.rs @@ -8,6 +8,7 @@ pub mod ffi { use alloc::boxed::Box; + #[cfg(feature = "buffer_provider")] use crate::{errors::ffi::DataError, provider::ffi::DataProvider}; #[diplomat::opaque] @@ -39,14 +40,14 @@ pub mod ffi { hidden )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "nfc_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_nfc_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(ComposingNormalizer(call_constructor!( - icu_normalizer::ComposingNormalizer::new_nfc [r => Ok(r.static_to_owned())], - icu_normalizer::ComposingNormalizer::try_new_nfc_with_any_provider, - icu_normalizer::ComposingNormalizer::try_new_nfc_with_buffer_provider, - provider, + Ok(Box::new(ComposingNormalizer(provider.call_constructor( + |provider| { + icu_normalizer::ComposingNormalizer::try_new_nfc_with_buffer_provider(provider) + }, )?))) } /// Construct a new ComposingNormalizer instance for NFKC using compiled data. @@ -71,14 +72,14 @@ pub mod ffi { hidden )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "nfkc_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_nfkc_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(ComposingNormalizer(call_constructor!( - icu_normalizer::ComposingNormalizer::new_nfkc [r => Ok(r.static_to_owned())], - icu_normalizer::ComposingNormalizer::try_new_nfkc_with_any_provider, - icu_normalizer::ComposingNormalizer::try_new_nfkc_with_buffer_provider, - provider, + Ok(Box::new(ComposingNormalizer(provider.call_constructor( + |provider| { + icu_normalizer::ComposingNormalizer::try_new_nfkc_with_buffer_provider(provider) + }, )?))) } /// Normalize a string @@ -199,14 +200,16 @@ pub mod ffi { hidden )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "nfd_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_nfd_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(DecomposingNormalizer(call_constructor!( - icu_normalizer::DecomposingNormalizer::new_nfd [r => Ok(r.static_to_owned())], - icu_normalizer::DecomposingNormalizer::try_new_nfd_with_any_provider, - icu_normalizer::DecomposingNormalizer::try_new_nfd_with_buffer_provider, - provider, + Ok(Box::new(DecomposingNormalizer(provider.call_constructor( + |provider| { + icu_normalizer::DecomposingNormalizer::try_new_nfd_with_buffer_provider( + provider, + ) + }, )?))) } @@ -233,14 +236,16 @@ pub mod ffi { hidden )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "nfkd_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_nfkd_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(DecomposingNormalizer(call_constructor!( - icu_normalizer::DecomposingNormalizer::new_nfkd [r => Ok(r.static_to_owned())], - icu_normalizer::DecomposingNormalizer::try_new_nfkd_with_any_provider, - icu_normalizer::DecomposingNormalizer::try_new_nfkd_with_buffer_provider, - provider, + Ok(Box::new(DecomposingNormalizer(provider.call_constructor( + |provider| { + icu_normalizer::DecomposingNormalizer::try_new_nfkd_with_buffer_provider( + provider, + ) + }, )?))) } diff --git a/ffi/capi/src/normalizer_properties.rs b/ffi/capi/src/normalizer_properties.rs index 12e5f39edbc..046fbb89a63 100644 --- a/ffi/capi/src/normalizer_properties.rs +++ b/ffi/capi/src/normalizer_properties.rs @@ -8,6 +8,7 @@ pub mod ffi { use alloc::boxed::Box; + #[cfg(feature = "buffer_provider")] use crate::{errors::ffi::DataError, provider::ffi::DataProvider}; /// Lookup of the Canonical_Combining_Class Unicode property @@ -52,15 +53,11 @@ pub mod ffi { hidden )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(CanonicalCombiningClassMap(call_constructor!( - icu_normalizer::properties::CanonicalCombiningClassMap::new [r => Ok(r.static_to_owned())], - icu_normalizer::properties::CanonicalCombiningClassMap::try_new_with_any_provider, - icu_normalizer::properties::CanonicalCombiningClassMap::try_new_with_buffer_provider, - provider - )?))) + Ok(Box::new(CanonicalCombiningClassMap(provider.call_constructor(icu_normalizer::properties::CanonicalCombiningClassMap::try_new_with_buffer_provider)?))) } #[diplomat::rust_link( @@ -129,14 +126,16 @@ pub mod ffi { hidden )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(CanonicalComposition(call_constructor!( - icu_normalizer::properties::CanonicalComposition::new [r => Ok(r.static_to_owned())], - icu_normalizer::properties::CanonicalComposition::try_new_with_any_provider, - icu_normalizer::properties::CanonicalComposition::try_new_with_buffer_provider, - provider, + Ok(Box::new(CanonicalComposition(provider.call_constructor( + |provider| { + icu_normalizer::properties::CanonicalComposition::try_new_with_buffer_provider( + provider, + ) + }, )?))) } @@ -201,15 +200,11 @@ pub mod ffi { hidden )] #[diplomat::attr(supports = fallible_constructors, named_constructor = "with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_with_provider( provider: &DataProvider, ) -> Result, DataError> { - Ok(Box::new(CanonicalDecomposition(call_constructor!( - icu_normalizer::properties::CanonicalDecomposition::new [r => Ok(r.static_to_owned())], - icu_normalizer::properties::CanonicalDecomposition::try_new_with_any_provider, - icu_normalizer::properties::CanonicalDecomposition::try_new_with_buffer_provider, - provider, - )?))) + Ok(Box::new(CanonicalDecomposition(provider.call_constructor(icu_normalizer::properties::CanonicalDecomposition::try_new_with_buffer_provider)?))) } /// Performs non-recursive canonical decomposition (including for Hangul). diff --git a/ffi/capi/src/pluralrules.rs b/ffi/capi/src/pluralrules.rs index f0fe7801abb..6086c7e33a9 100644 --- a/ffi/capi/src/pluralrules.rs +++ b/ffi/capi/src/pluralrules.rs @@ -8,9 +8,12 @@ pub mod ffi { use alloc::boxed::Box; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] use crate::errors::ffi::DataError; use crate::errors::ffi::FixedDecimalParseError; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] use crate::locale_core::ffi::Locale; + #[cfg(feature = "buffer_provider")] use crate::provider::ffi::DataProvider; #[diplomat::rust_link(icu::plurals::PluralCategory, Enum)] @@ -56,17 +59,16 @@ pub mod ffi { #[diplomat::rust_link(icu::plurals::PluralRules::try_new, FnInStruct, hidden)] #[diplomat::rust_link(icu::plurals::PluralRuleType, Enum, hidden)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "cardinal_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_cardinal_with_provider( provider: &DataProvider, locale: &Locale, ) -> Result, DataError> { let prefs = icu_plurals::PluralRulesPreferences::from(&locale.0); - Ok(Box::new(PluralRules(call_constructor!( - icu_plurals::PluralRules::try_new_cardinal, - icu_plurals::PluralRules::try_new_cardinal_with_any_provider, - icu_plurals::PluralRules::try_new_cardinal_with_buffer_provider, - provider, - prefs + Ok(Box::new(PluralRules(provider.call_constructor( + |provider| { + icu_plurals::PluralRules::try_new_cardinal_with_buffer_provider(provider, prefs) + }, )?))) } /// Construct an [`PluralRules`] for the given locale, for ordinal numbers, using compiled data. @@ -86,17 +88,16 @@ pub mod ffi { #[diplomat::rust_link(icu::plurals::PluralRules::try_new, FnInStruct, hidden)] #[diplomat::rust_link(icu::plurals::PluralRuleType, Enum, hidden)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "ordinal_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_ordinal_with_provider( provider: &DataProvider, locale: &Locale, ) -> Result, DataError> { let prefs = icu_plurals::PluralRulesPreferences::from(&locale.0); - Ok(Box::new(PluralRules(call_constructor!( - icu_plurals::PluralRules::try_new_ordinal, - icu_plurals::PluralRules::try_new_ordinal_with_any_provider, - icu_plurals::PluralRules::try_new_ordinal_with_buffer_provider, - provider, - prefs + Ok(Box::new(PluralRules(provider.call_constructor( + |provider| { + icu_plurals::PluralRules::try_new_ordinal_with_buffer_provider(provider, prefs) + }, )?))) } /// Get the category for a given number represented as operands diff --git a/ffi/capi/src/properties_maps.rs b/ffi/capi/src/properties_maps.rs index f1fea94fe51..16f632fb7da 100644 --- a/ffi/capi/src/properties_maps.rs +++ b/ffi/capi/src/properties_maps.rs @@ -7,16 +7,17 @@ #[diplomat::attr(auto, namespace = "icu4x")] pub mod ffi { use alloc::boxed::Box; + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] use icu_properties::props::{ BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GraphemeClusterBreak, HangulSyllableType, IndicSyllabicCategory, JoiningType, LineBreak, Script, SentenceBreak, WordBreak, }; - use crate::errors::ffi::DataError; use crate::properties_iter::ffi::CodePointRangeIterator; use crate::properties_sets::ffi::CodePointSetData; - use crate::provider::ffi::DataProvider; + #[cfg(feature = "buffer_provider")] + use crate::{errors::ffi::DataError, provider::ffi::DataProvider}; #[diplomat::opaque] /// An ICU4X Unicode Map Property object, capable of querying whether a code point (key) to obtain the Unicode property value, for a specific Unicode property. @@ -34,6 +35,7 @@ pub mod ffi { )] pub struct CodePointMapData8(icu_properties::CodePointMapData); + #[cfg(any(feature = "compiled_data", feature = "buffer_provider"))] fn convert_8( data: icu_properties::CodePointMapData

, ) -> Box { @@ -139,11 +141,11 @@ pub mod ffi { /// Create a map for the `General_Category` property, using a particular data source #[diplomat::rust_link(icu::properties::props::GeneralCategory, Enum)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "general_category_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_general_category_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -160,11 +162,11 @@ pub mod ffi { /// Create a map for the `Bidi_Class` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::BidiClass, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "bidi_class_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_bidi_class_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -180,11 +182,11 @@ pub mod ffi { /// Create a map for the `East_Asian_Width` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::EastAsianWidth, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "east_asian_width_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_east_asian_width_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -201,11 +203,11 @@ pub mod ffi { /// Create a map for the `Hangul_Syllable_Type` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::HangulSyllableType, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "hangul_syllable_type_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_hangul_syllable_type_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -222,11 +224,11 @@ pub mod ffi { /// Create a map for the `Indic_Syllabic_Property` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::IndicSyllabicCategory, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "indic_syllabic_category_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_indic_syllabic_category_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -241,11 +243,11 @@ pub mod ffi { /// Create a map for the `Line_Break` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::LineBreak, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "line_break_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_line_break_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -262,11 +264,11 @@ pub mod ffi { /// Create a map for the `Grapheme_Cluster_Break` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::GraphemeClusterBreak, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "grapheme_cluster_break_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_grapheme_cluster_break_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -281,11 +283,11 @@ pub mod ffi { /// Create a map for the `Word_Break` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::WordBreak, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "word_break_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_word_break_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -300,11 +302,11 @@ pub mod ffi { /// Create a map for the `Sentence_Break` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::SentenceBreak, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "sentence_break_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_sentence_break_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -320,11 +322,11 @@ pub mod ffi { /// Create a map for the `Joining_Type` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::JoiningType, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "joining_type_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_joining_type_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -342,11 +344,11 @@ pub mod ffi { /// Create a map for the `Canonical_Combining_Class` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::CanonicalCombiningClass, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "canonical_combining_class_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_canonical_combining_class_with_provider( provider: &DataProvider, ) -> Result, DataError> { Ok(convert_8(call_constructor_unstable!( - icu_properties::CodePointMapData::::new [r => Ok(r.static_to_owned())], icu_properties::CodePointMapData::::try_new_unstable, provider, )?)) @@ -426,13 +428,13 @@ pub mod ffi { /// Create a map for the `Script` property, using a particular data source. #[diplomat::rust_link(icu::properties::props::Script, Struct)] #[diplomat::attr(supports = fallible_constructors, named_constructor = "script_with_provider")] + #[cfg(feature = "buffer_provider")] pub fn create_script_with_provider( provider: &DataProvider, ) -> Result, DataError> { #[allow(clippy::unwrap_used)] // script is a 16-bit property Ok(Box::new(CodePointMapData16( call_constructor_unstable!( - icu_properties::CodePointMapData::