Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split numbering systems out of decimal data #5822

Merged
merged 10 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion components/datetime/src/external_loaders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,9 @@ pub(crate) struct ExternalLoaderUnstable<'a, P: ?Sized>(pub &'a P);

impl<P> FixedDecimalFormatterLoader for ExternalLoaderUnstable<'_, P>
where
P: ?Sized + DataProvider<icu_decimal::provider::DecimalSymbolsV2Marker>,
P: ?Sized
+ DataProvider<icu_decimal::provider::DecimalSymbolsV2Marker>
+ DataProvider<icu_decimal::provider::DecimalDigitsV1Marker>,
{
#[inline]
fn load(
Expand Down
9 changes: 5 additions & 4 deletions components/datetime/src/format/neo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use icu_calendar::types::FormattingEra;
use icu_calendar::types::MonthCode;
use icu_decimal::options::FixedDecimalFormatterOptions;
use icu_decimal::options::GroupingStrategy;
use icu_decimal::provider::DecimalSymbolsV2Marker;
use icu_decimal::provider::{DecimalDigitsV1Marker, DecimalSymbolsV2Marker};
use icu_decimal::FixedDecimalFormatter;
use icu_provider::marker::NeverMarker;
use icu_provider::prelude::*;
Expand Down Expand Up @@ -262,7 +262,7 @@ where
size_test!(
TypedDateTimeNames<icu_calendar::Gregorian, DateTimeMarker>,
typed_date_time_names_size,
336
352
);

/// A low-level type that formats datetime patterns with localized names.
Expand Down Expand Up @@ -625,7 +625,7 @@ impl<C: CldrCalendar, R: DateTimeNamesMarker> TypedDateTimeNames<C, R> {
#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::try_new)]
pub fn try_new_unstable<P>(provider: &P, locale: &DataLocale) -> Result<Self, DataError>
where
P: DataProvider<DecimalSymbolsV2Marker> + ?Sized,
P: DataProvider<DecimalSymbolsV2Marker> + DataProvider<DecimalDigitsV1Marker> + ?Sized,
{
let mut names = Self {
locale: locale.clone(),
Expand Down Expand Up @@ -1418,7 +1418,7 @@ impl<C: CldrCalendar, R: DateTimeNamesMarker> TypedDateTimeNames<C, R> {
#[inline]
pub fn load_fixed_decimal_formatter<P>(&mut self, provider: &P) -> Result<&mut Self, DataError>
where
P: DataProvider<DecimalSymbolsV2Marker> + ?Sized,
P: DataProvider<DecimalSymbolsV2Marker> + DataProvider<DecimalDigitsV1Marker> + ?Sized,
{
self.inner
.load_fixed_decimal_formatter(&ExternalLoaderUnstable(provider), &self.locale)?;
Expand Down Expand Up @@ -1498,6 +1498,7 @@ impl<C: CldrCalendar, R: DateTimeNamesMarker> TypedDateTimeNames<C, R> {
+ DataProvider<tz::MzSpecificShortV1Marker>
+ DataProvider<tz::MzPeriodV1Marker>
+ DataProvider<DecimalSymbolsV2Marker>
+ DataProvider<DecimalDigitsV1Marker>
+ ?Sized,
{
let locale = &self.locale;
Expand Down
4 changes: 2 additions & 2 deletions components/datetime/src/neo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ macro_rules! gen_any_buffer_constructors_with_external_loader {
// }
// }

size_test!(FixedCalendarDateTimeFormatter<icu_calendar::Gregorian, crate::fieldset::YMD>, typed_neo_year_month_day_formatter_size, 336);
size_test!(FixedCalendarDateTimeFormatter<icu_calendar::Gregorian, crate::fieldset::YMD>, typed_neo_year_month_day_formatter_size, 352);

/// [`FixedCalendarDateTimeFormatter`] is a formatter capable of formatting dates and/or times from
/// a calendar selected at compile time.
Expand Down Expand Up @@ -352,7 +352,7 @@ where
size_test!(
DateTimeFormatter<crate::fieldset::YMD>,
neo_year_month_day_formatter_size,
392
408
);

/// [`DateTimeFormatter`] is a formatter capable of formatting dates and/or times from
Expand Down
11 changes: 8 additions & 3 deletions components/datetime/src/scaffold/fieldset_traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use icu_calendar::{
},
Date, Iso, Time,
};
use icu_decimal::provider::DecimalSymbolsV2Marker;
use icu_decimal::provider::{DecimalDigitsV1Marker, DecimalSymbolsV2Marker};
use icu_provider::{marker::NeverMarker, prelude::*};
use icu_timezone::scaffold::IntoOption;
use icu_timezone::{TimeZoneBcp47Id, UtcOffset, ZoneVariant};
Expand Down Expand Up @@ -368,10 +368,13 @@ where

/// Trait to consolidate data provider markers external to this crate
/// for datetime formatting with a fixed calendar.
pub trait AllFixedCalendarExternalDataMarkers: DataProvider<DecimalSymbolsV2Marker> {}
pub trait AllFixedCalendarExternalDataMarkers:
DataProvider<DecimalSymbolsV2Marker> + DataProvider<DecimalDigitsV1Marker>
{
}

impl<T> AllFixedCalendarExternalDataMarkers for T where
T: ?Sized + DataProvider<DecimalSymbolsV2Marker>
T: ?Sized + DataProvider<DecimalSymbolsV2Marker> + DataProvider<DecimalDigitsV1Marker>
{
}

Expand All @@ -385,6 +388,7 @@ pub trait AllAnyCalendarExternalDataMarkers:
+ DataProvider<JapaneseErasV1Marker>
+ DataProvider<JapaneseExtendedErasV1Marker>
+ DataProvider<DecimalSymbolsV2Marker>
+ DataProvider<DecimalDigitsV1Marker>
{
}

Expand All @@ -397,6 +401,7 @@ impl<T> AllAnyCalendarExternalDataMarkers for T where
+ DataProvider<JapaneseErasV1Marker>
+ DataProvider<JapaneseExtendedErasV1Marker>
+ DataProvider<DecimalSymbolsV2Marker>
+ DataProvider<DecimalDigitsV1Marker>
{
}

Expand Down
5 changes: 3 additions & 2 deletions components/decimal/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ databake = { workspace = true, features = ["derive"], optional = true}
serde = { workspace = true, features = ["derive", "alloc"], optional = true }

icu_decimal_data = { workspace = true, optional = true }
tinystr = { workspace = true }

[dev-dependencies]
icu = { path = "../../components/icu", default-features = false }
Expand All @@ -46,8 +47,8 @@ criterion = { workspace = true }
[features]
default = ["compiled_data"]
std = ["fixed_decimal/std", "icu_locale_core/std", "icu_provider/std"]
serde = ["dep:serde", "icu_provider/serde", "zerovec/serde"]
datagen = ["serde", "dep:databake", "zerovec/databake"]
serde = ["dep:serde", "icu_provider/serde", "zerovec/serde", "tinystr/serde"]
datagen = ["serde", "dep:databake", "zerovec/databake", "tinystr/databake"]
bench = ["serde"]
compiled_data = ["dep:icu_decimal_data"]

Expand Down
21 changes: 3 additions & 18 deletions components/decimal/benches/fixed_decimal_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ use rand_pcg::Lcg64Xsh32;
use criterion::{black_box, criterion_group, criterion_main, Criterion};

use fixed_decimal::FixedDecimal;
use icu_decimal::provider::{Baked, DecimalSymbolsV2Marker};
use icu_decimal::FixedDecimalFormatter;
use icu_locale_core::locale;
use icu_provider::prelude::*;
use icu_provider_adapters::fixed::FixedProvider;

fn triangular_nums(range: f64) -> Vec<isize> {
// Use Lcg64Xsh32, a small, fast PRNG.
Expand All @@ -28,24 +24,13 @@ fn triangular_nums(range: f64) -> Vec<isize> {

fn overview_bench(c: &mut Criterion) {
let nums = triangular_nums(1e9);
let data = Baked
.load(DataRequest {
id: DataIdentifierBorrowed::for_locale(&locale!("en-US").into()),
..Default::default()
})
.unwrap()
.payload;
let provider = FixedProvider::<DecimalSymbolsV2Marker>::from_payload(data);

c.bench_function("icu_decimal/overview", |b| {
b.iter(|| {
// This benchmark demonstrates the performance of the format function on 1000 numbers
// ranging from -1e9 to 1e9.
let fdf = FixedDecimalFormatter::try_new_unstable(
&provider,
&Default::default(),
Default::default(),
)
.unwrap();
let fdf =
FixedDecimalFormatter::try_new(&Default::default(), Default::default()).unwrap();
for &num in &nums {
let fd = FixedDecimal::from(black_box(num));
fdf.format_to_string(&fd);
Expand Down
3 changes: 2 additions & 1 deletion components/decimal/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ pub struct FormattedFixedDecimal<'l> {
pub(crate) value: &'l FixedDecimal,
pub(crate) options: &'l FixedDecimalFormatterOptions,
pub(crate) symbols: &'l DecimalSymbolsV2<'l>,
pub(crate) digits: &'l DecimalDigitsV1,
}

impl FormattedFixedDecimal<'_> {
Expand Down Expand Up @@ -47,7 +48,7 @@ impl Writeable for FormattedFixedDecimal<'_> {
sink.write_str(self.symbols.decimal_separator())?;
}
#[allow(clippy::indexing_slicing)] // digit_at in 0..=9
sink.write_char(self.symbols.digits[self.value.digit_at(m) as usize])?;
sink.write_char(self.digits.digits[self.value.digit_at(m) as usize])?;
if grouper::check(
upper_magnitude,
m,
Expand Down
9 changes: 8 additions & 1 deletion components/decimal/src/grouper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ fn test_grouper() {
use fixed_decimal::FixedDecimal;
use icu_provider::prelude::*;
use icu_provider_adapters::fixed::FixedProvider;
use icu_provider_adapters::fork::ForkByMarkerProvider;
use writeable::assert_writeable_eq;

let western_sizes = GroupingSizesV1 {
Expand Down Expand Up @@ -154,12 +155,18 @@ fn test_grouper() {
for cas in &cases {
for i in 0..4 {
let dec = FixedDecimal::from(1).multiplied_pow10((i as i16) + 3);
let provider = FixedProvider::<DecimalSymbolsV2Marker>::from_owned(
let provider_symbols = FixedProvider::<DecimalSymbolsV2Marker>::from_owned(
crate::provider::DecimalSymbolsV2 {
grouping_sizes: cas.sizes,
..DecimalSymbolsV2::new_en_for_testing()
},
);
let provider_digits = FixedProvider::<DecimalDigitsV1Marker>::from_owned(
crate::provider::DecimalDigitsV1 {
digits: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
},
);
let provider = ForkByMarkerProvider::new(provider_symbols, provider_digits);
let options = options::FixedDecimalFormatterOptions {
grouping_strategy: cas.strategy,
..Default::default()
Expand Down
30 changes: 26 additions & 4 deletions components/decimal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,12 @@ pub use format::FormattedFixedDecimal;

use alloc::string::String;
use fixed_decimal::FixedDecimal;
use icu_locale_core::locale;
use icu_provider::prelude::*;
use size_test_macro::size_test;
use writeable::Writeable;

size_test!(FixedDecimalFormatter, fixed_decimal_formatter_size, 88);
size_test!(FixedDecimalFormatter, fixed_decimal_formatter_size, 104);

/// A formatter for [`FixedDecimal`], rendering decimal digits in an i18n-friendly way.
///
Expand All @@ -123,6 +124,7 @@ size_test!(FixedDecimalFormatter, fixed_decimal_formatter_size, 88);
pub struct FixedDecimalFormatter {
options: options::FixedDecimalFormatterOptions,
symbols: DataPayload<provider::DecimalSymbolsV2Marker>,
digits: DataPayload<provider::DecimalDigitsV1Marker>,
}

impl AsRef<FixedDecimalFormatter> for FixedDecimalFormatter {
Expand All @@ -138,12 +140,16 @@ impl FixedDecimalFormatter {
);

#[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::try_new)]
pub fn try_new_unstable<D: DataProvider<provider::DecimalSymbolsV2Marker> + ?Sized>(
pub fn try_new_unstable<
D: DataProvider<provider::DecimalSymbolsV2Marker>
+ DataProvider<provider::DecimalDigitsV1Marker>
+ ?Sized,
>(
provider: &D,
locale: &DataLocale,
options: options::FixedDecimalFormatterOptions,
) -> Result<Self, DataError> {
let symbols = provider
let symbols: DataPayload<provider::DecimalSymbolsV2Marker> = provider
.load(DataRequest {
id: DataIdentifierBorrowed::for_marker_attributes_and_locale(
DataMarkerAttributes::from_str_or_panic(
Expand All @@ -154,7 +160,22 @@ impl FixedDecimalFormatter {
..Default::default()
})?
.payload;
Ok(Self { options, symbols })

let digits = provider
.load(DataRequest {
id: DataIdentifierBorrowed::for_marker_attributes_and_locale(
DataMarkerAttributes::from_str_or_panic(&symbols.get().numsys),
&locale!("und").into(),
),
..Default::default()
})?
.payload;

Ok(Self {
options,
symbols,
digits,
})
}

/// Formats a [`FixedDecimal`], returning a [`FormattedFixedDecimal`].
Expand All @@ -163,6 +184,7 @@ impl FixedDecimalFormatter {
value,
options: &self.options,
symbols: self.symbols.get(),
digits: self.digits.get(),
}
}

Expand Down
41 changes: 38 additions & 3 deletions components/decimal/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

use alloc::borrow::Cow;
use icu_provider::prelude::*;
use tinystr::TinyStr8;
use zerovec::VarZeroCow;

#[cfg(feature = "compiled_data")]
Expand All @@ -41,11 +42,12 @@ const _: () = {
}
make_provider!(Baked);
impl_decimal_symbols_v2_marker!(Baked);
impl_decimal_digits_v1_marker!(Baked);
};

#[cfg(feature = "datagen")]
/// The latest minimum set of markers required by this component.
pub const MARKERS: &[DataMarkerInfo] = &[DecimalSymbolsV2Marker::INFO];
pub const MARKERS: &[DataMarkerInfo] = &[DecimalSymbolsV2Marker::INFO, DecimalDigitsV1Marker::INFO];

/// A collection of settings expressing where to put grouping separators in a decimal number.
/// For example, `1,000,000` has two grouping separators, positioned along every 3 digits.
Expand Down Expand Up @@ -75,7 +77,9 @@ pub struct GroupingSizesV1 {
pub min_grouping: u8,
}

/// The strings used in DecimalSymbolsV2
/// A stack representation of the strings used in [`DecimalSymbolsV2`], i.e. a builder type
/// for [`DecimalSymbolsStrs`]. This type can be obtained from a [`DecimalSymbolsStrs`]
/// the `From`/`Into` traits.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
Expand Down Expand Up @@ -114,6 +118,19 @@ pub struct DecimalSymbolStrsBuilder<'data> {
pub grouping_separator: Cow<'data, str>,
}

impl<'data> DecimalSymbolStrsBuilder<'data> {
/// Build a [`DecimalSymbolsStrs`]
pub fn build(&self) -> VarZeroCow<'static, DecimalSymbolsStrs> {
VarZeroCow::from_encodeable(self)
}
}

impl<'data> From<&'data DecimalSymbolsStrs> for DecimalSymbolStrsBuilder<'data> {
fn from(other: &'data DecimalSymbolsStrs) -> Self {
zerofrom::ZeroFrom::zero_from(other)
}
}
Manishearth marked this conversation as resolved.
Show resolved Hide resolved

/// Symbols and metadata required for formatting a [`FixedDecimal`](crate::FixedDecimal).
///
/// <div class="stab unstable">
Expand All @@ -136,6 +153,24 @@ pub struct DecimalSymbolsV2<'data> {
/// Settings used to determine where to place groups in the integer part of the number.
pub grouping_sizes: GroupingSizesV1,

/// The numbering system to use.
pub numsys: TinyStr8,
}

/// The digits for a given numbering system. This data ought to be stored in the `und` locale with an auxiliary key
/// set to the numbering system code.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[icu_provider::data_struct(DecimalDigitsV1Marker = "decimal/digits@1")]
#[derive(Debug, PartialEq, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_decimal::provider))]
pub struct DecimalDigitsV1 {
/// Digit characters for the current numbering system. In most systems, these digits are
/// contiguous, but in some systems, such as *hanidec*, they are not contiguous.
pub digits: [char; 10],
Expand Down Expand Up @@ -185,7 +220,7 @@ impl DecimalSymbolsV2<'static> {
secondary: 3,
min_grouping: 1,
},
digits: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'],
numsys: tinystr::tinystr!(8, "latn"),
}
}
}
Loading
Loading