Skip to content

Commit

Permalink
Moving root collation data into its own singleton marker (unicode-org…
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian authored Sep 6, 2024
1 parent d2c6e9c commit dae2b44
Show file tree
Hide file tree
Showing 41 changed files with 577 additions and 208,198 deletions.
27 changes: 17 additions & 10 deletions components/collator/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@ use crate::elements::{
NO_CE_SECONDARY, NO_CE_TERTIARY, OPTIMIZED_DIACRITICS_MAX_COUNT, QUATERNARY_MASK,
};
use crate::options::CollatorOptionsBitField;
use crate::provider::CollationDataV1Marker;
use crate::provider::CollationDiacriticsV1Marker;
use crate::provider::CollationJamoV1Marker;
use crate::provider::CollationMetadataV1Marker;
use crate::provider::CollationReorderingV1Marker;
use crate::provider::CollationRootV1Marker;
use crate::provider::CollationSpecialPrimariesV1Marker;
use crate::provider::CollationTailoringV1Marker;
use crate::{AlternateHandling, CollatorOptions, MaxVariable, ResolvedCollatorOptions, Strength};
use core::cmp::Ordering;
use core::convert::TryFrom;
Expand Down Expand Up @@ -55,8 +56,8 @@ impl AnyQuaternaryAccumulator {
#[derive(Debug)]
pub struct Collator {
special_primaries: Option<DataPayload<CollationSpecialPrimariesV1Marker>>,
root: DataPayload<CollationDataV1Marker>,
tailoring: Option<DataPayload<CollationDataV1Marker>>,
root: DataPayload<CollationRootV1Marker>,
tailoring: Option<DataPayload<CollationTailoringV1Marker>>,
jamo: DataPayload<CollationJamoV1Marker>,
diacritics: DataPayload<CollationDiacriticsV1Marker>,
options: CollatorOptionsBitField,
Expand All @@ -72,6 +73,9 @@ impl Collator {
pub fn try_new(locale: &DataLocale, options: CollatorOptions) -> Result<Self, DataError> {
Self::try_new_unstable_internal(
&crate::provider::Baked,
DataPayload::from_static_ref(
crate::provider::Baked::SINGLETON_COLLATION_ROOT_V1_MARKER,
),
DataPayload::from_static_ref(
icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER,
),
Expand Down Expand Up @@ -110,7 +114,8 @@ impl Collator {
) -> Result<Self, DataError>
where
D: DataProvider<CollationSpecialPrimariesV1Marker>
+ DataProvider<CollationDataV1Marker>
+ DataProvider<CollationRootV1Marker>
+ DataProvider<CollationTailoringV1Marker>
+ DataProvider<CollationDiacriticsV1Marker>
+ DataProvider<CollationJamoV1Marker>
+ DataProvider<CollationMetadataV1Marker>
Expand All @@ -124,14 +129,17 @@ impl Collator {
provider.load(Default::default())?.payload,
provider.load(Default::default())?.payload,
provider.load(Default::default())?.payload,
provider.load(Default::default())?.payload,
|| provider.load(Default::default()).map(|r| r.payload),
locale,
options,
)
}

#[allow(clippy::too_many_arguments)]
fn try_new_unstable_internal<D>(
provider: &D,
root: DataPayload<CollationRootV1Marker>,
decompositions: DataPayload<CanonicalDecompositionDataV1Marker>,
tables: DataPayload<CanonicalDecompositionTablesV1Marker>,
jamo: DataPayload<CollationJamoV1Marker>,
Expand All @@ -143,7 +151,8 @@ impl Collator {
options: CollatorOptions,
) -> Result<Self, DataError>
where
D: DataProvider<CollationDataV1Marker>
D: DataProvider<CollationRootV1Marker>
+ DataProvider<CollationTailoringV1Marker>
+ DataProvider<CollationDiacriticsV1Marker>
+ DataProvider<CollationMetadataV1Marker>
+ DataProvider<CollationReorderingV1Marker>
Expand Down Expand Up @@ -177,7 +186,7 @@ impl Collator {

let metadata = metadata_payload.get();

let tailoring: Option<DataPayload<crate::provider::CollationDataV1Marker>> =
let tailoring: Option<DataPayload<crate::provider::CollationTailoringV1Marker>> =
if metadata.tailored() {
Some(
provider
Expand Down Expand Up @@ -209,8 +218,6 @@ impl Collator {
}
}

let root: DataPayload<CollationDataV1Marker> = provider.load(Default::default())?.payload;

let tailored_diacritics = metadata.tailored_diacritics();
let diacritics: DataPayload<CollationDiacriticsV1Marker> = provider
.load(if tailored_diacritics {
Expand Down Expand Up @@ -339,7 +346,7 @@ impl Collator {
}

fn compare_impl<I: Iterator<Item = char>>(&self, left_chars: I, right_chars: I) -> Ordering {
let tailoring: &DataPayload<CollationDataV1Marker> =
let tailoring: &DataPayload<CollationTailoringV1Marker> =
if let Some(tailoring) = &self.tailoring {
tailoring
} else {
Expand All @@ -356,7 +363,7 @@ impl Collator {
// should we have a no-op tailoring that contains a
// specially-crafted CodePointTrie that always returns
// a FALLBACK_CE32 after a single branch?
&self.root
self.root.cast_ref()
};

// Sadly, it looks like variable CEs and backward second level
Expand Down
27 changes: 18 additions & 9 deletions components/collator/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ const _: () = {
pub use icu_collections as collections;
}
make_provider!(Baked);
impl_collation_data_v1_marker!(Baked);
impl_collation_root_v1_marker!(Baked);
impl_collation_tailoring_v1_marker!(Baked);
impl_collation_diacritics_v1_marker!(Baked);
impl_collation_jamo_v1_marker!(Baked);
impl_collation_metadata_v1_marker!(Baked);
Expand All @@ -70,7 +71,8 @@ const _: () = {
#[cfg(feature = "datagen")]
/// The latest minimum set of markers required by this component.
pub const MARKERS: &[DataMarkerInfo] = &[
CollationDataV1Marker::INFO,
CollationRootV1Marker::INFO,
CollationTailoringV1Marker::INFO,
CollationDiacriticsV1Marker::INFO,
CollationJamoV1Marker::INFO,
CollationMetadataV1Marker::INFO,
Expand Down Expand Up @@ -114,13 +116,20 @@ fn data_ce_to_primary(data_ce: u64, c: char) -> u32 {
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[icu_provider::data_struct(marker(
CollationDataV1Marker,
"collator/data@1",
// TODO(#3867): Use script fallback
fallback_by = "language",
attributes_domain = "collator",
))]
#[icu_provider::data_struct(
marker(
CollationRootV1Marker,
"collator/root@1",
singleton,
),
marker(
CollationTailoringV1Marker,
"collator/tailoring@1",
// TODO(#3867): Use script fallback
fallback_by = "language",
attributes_domain = "collator",
)
)]
#[derive(Debug, PartialEq, Clone)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
Expand Down
6 changes: 3 additions & 3 deletions components/collator/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1286,7 +1286,7 @@ fn test_nb_nn_no() {
strs.sort_by(|a, b| collator.compare(a, b));
assert_eq!(strs, expected);
assert_eq!(
DataProvider::<CollationDataV1Marker>::load(
DataProvider::<CollationTailoringV1Marker>::load(
&icu_collator::provider::Baked,
DataRequest {
id: DataIdentifierBorrowed::for_locale(&locale),
Expand All @@ -1306,7 +1306,7 @@ fn test_nb_nn_no() {
strs.sort_by(|a, b| collator.compare(a, b));
assert_eq!(strs, expected);
assert_eq!(
DataProvider::<CollationDataV1Marker>::load(
DataProvider::<CollationTailoringV1Marker>::load(
&icu_collator::provider::Baked,
DataRequest {
id: DataIdentifierBorrowed::for_locale(&locale),
Expand All @@ -1326,7 +1326,7 @@ fn test_nb_nn_no() {
strs.sort_by(|a, b| collator.compare(a, b));
assert_eq!(strs, expected);
assert_eq!(
DataProvider::<CollationDataV1Marker>::load(
DataProvider::<CollationTailoringV1Marker>::load(
&icu_collator::provider::Baked,
DataRequest {
id: DataIdentifierBorrowed::for_locale(&locale),
Expand Down
16 changes: 16 additions & 0 deletions provider/core/src/response.rs
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,22 @@ where
})
}

/// Convert between two [`DynamicDataMarker`] types that are compatible with each other
/// with compile-time type checking.
///
/// This happens if they both have the same [`DynamicDataMarker::DataStruct`] type.
///
/// Can be used to erase the marker of a data payload in cases where multiple markers correspond
/// to the same data struct.
#[inline]
pub fn cast_ref<M2>(&self) -> &DataPayload<M2>
where
M2: DynamicDataMarker<DataStruct = M::DataStruct>,
{
// SAFETY: As seen in the implementation of `cast`, the struct is the same, it's just the generic that changes.
unsafe { core::mem::transmute(self) }
}

/// Convert a [`DataPayload`] to one of the same type with runtime type checking.
///
/// Primarily useful to convert from a generic to a concrete marker type.
Expand Down
173 changes: 0 additions & 173 deletions provider/data/collator/data/collation_data_v1_marker.rs.data

This file was deleted.

75 changes: 75 additions & 0 deletions provider/data/collator/data/collation_root_v1_marker.rs.data

Large diffs are not rendered by default.

83 changes: 83 additions & 0 deletions provider/data/collator/data/collation_tailoring_v1_marker.rs.data

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions provider/data/collator/data/mod.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit dae2b44

Please sign in to comment.