diff --git a/docs/design/features/globalization-hybrid-mode.md b/docs/design/features/globalization-hybrid-mode.md index 0e6ed5d7c1e7f..8b0be8251e530 100644 --- a/docs/design/features/globalization-hybrid-mode.md +++ b/docs/design/features/globalization-hybrid-mode.md @@ -277,7 +277,7 @@ new CultureInfo("de-DE").CompareInfo.IndexOf("strasse", "stra\u00DFe", 0, Compar For OSX platforms we are using native apis instead of ICU data. -**String comparison** +## String comparison Affected public APIs: - CompareInfo.Compare, @@ -292,44 +292,120 @@ The number of `CompareOptions` and `NSStringCompareOptions` combinations are lim - `None`: -`CompareOptions.None` is mapped to `NSStringCompareOptions.NSLiteralSearch` + `CompareOptions.None` is mapped to `NSStringCompareOptions.NSLiteralSearch` -There are some behaviour changes. Below are examples of such cases. + There are some behaviour changes. Below are examples of such cases. -| **character 1** | **character 2** | **CompareOptions** | **hybrid globalization** | **icu** | **comments** | -|:---------------:|:---------------:|--------------------|:------------------------:|:-------:|:-------------------------------------------------------:| -| `\u3042` あ | `\u30A1` ァ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | -| `\u304D\u3083` きゃ | `\u30AD\u30E3` キャ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | -| `\u304D\u3083` きゃ | `\u30AD\u3083` キゃ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | -| `\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C` ばびブベぼ | `\u30D0\u30D3\u3076\u30D9\uFF8E\uFF9E` バビぶベボ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | -| `\u3060` だ | `\u30C0` ダ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | -| `\u00C0` À | `A\u0300` À | None | 1 | 0 | This is not same character for native api | + | **character 1** | **character 2** | **CompareOptions** | **hybrid globalization** | **icu** | **comments** | + |:---------------:|:---------------:|--------------------|:------------------------:|:-------:|:-------------------------------------------------------:| + | `\u3042` あ | `\u30A1` ァ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | + | `\u304D\u3083` きゃ | `\u30AD\u30E3` キャ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | + | `\u304D\u3083` きゃ | `\u30AD\u3083` キゃ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | + | `\u3070\u3073\uFF8C\uFF9E\uFF8D\uFF9E\u307C` ばびブベぼ | `\u30D0\u30D3\u3076\u30D9\uFF8E\uFF9E` バビぶベボ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | + | `\u3060` だ | `\u30C0` ダ | None | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | - `StringSort` : -`CompareOptions.StringSort` is mapped to `NSStringCompareOptions.NSLiteralSearch` .ICU's default is to use "StringSort", i.e. nonalphanumeric symbols come before alphanumeric. That is how works also `NSLiteralSearch`. + `CompareOptions.StringSort` is mapped to `NSStringCompareOptions.NSLiteralSearch` .ICU's default is to use "StringSort", i.e. nonalphanumeric symbols come before alphanumeric. That is how works also `NSLiteralSearch`. - `IgnoreCase`: -`CompareOptions.IgnoreCase` is mapped to `NSStringCompareOptions.NSCaseInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch` + `CompareOptions.IgnoreCase` is mapped to `NSStringCompareOptions.NSCaseInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch` -There are some behaviour changes. Below are examples of such cases. + There are some behaviour changes. Below are examples of such cases. -| **character 1** | **character 2** | **CompareOptions** | **hybrid globalization** | **icu** | **comments** | -|:---------------:|:---------------:|--------------------|:------------------------:|:-------:|:-------------------------------------------------------:| -| `\u3060` だ | `\u30C0` ダ | IgnoreCase | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | -| `\u00C0` À | `a\u0300` à | IgnoreCase | 1 | 0 | This is related to above mentioned case under `CompareOptions.None` i.e. `\u00C0` À != À `A\u0300` | + | **character 1** | **character 2** | **CompareOptions** | **hybrid globalization** | **icu** | **comments** | + |:---------------:|:---------------:|--------------------|:------------------------:|:-------:|:-------------------------------------------------------:| + | `\u3060` だ | `\u30C0` ダ | IgnoreCase | 1 | -1 | hiragana and katakana characters are ordered differently compared to ICU | - `IgnoreNonSpace`: -`CompareOptions.IgnoreNonSpace` is mapped to `NSStringCompareOptions.NSDiacriticInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch` + `CompareOptions.IgnoreNonSpace` is mapped to `NSStringCompareOptions.NSDiacriticInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch` - `IgnoreWidth`: -`CompareOptions.IgnoreWidth` is mapped to `NSStringCompareOptions.NSWidthInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch` + `CompareOptions.IgnoreWidth` is mapped to `NSStringCompareOptions.NSWidthInsensitiveSearch | NSStringCompareOptions.NSLiteralSearch` - All combinations that contain below `CompareOptions` always throw `PlatformNotSupportedException`: -`IgnoreSymbols`, + `IgnoreSymbols`, + + `IgnoreKanaType`, + +## String starts with / ends with + +Affected public APIs: +- CompareInfo.IsPrefix +- CompareInfo.IsSuffix +- String.StartsWith +- String.EndsWith + +Mapped to Apple Native API `compare:options:range:locale:`(https://developer.apple.com/documentation/foundation/nsstring/1414561-compare?language=objc) +Apple Native API does not expose locale-sensitive endsWith/startsWith function. As a workaround, both strings get normalized and weightless characters are removed. Resulting strings are cut to the same length and comparison is performed. As we are normalizing strings to be able to cut them, we cannot calculate the match length on the original strings. Methods that calculate this information throw PlatformNotSupported exception: + +- [CompareInfo.IsPrefix](https://learn.microsoft.com/dotnet/api/system.globalization.compareinfo.isprefix) +- [CompareInfo.IsSuffix](https://learn.microsoft.com/dotnet/api/system.globalization.compareinfo.issuffix) + +- `IgnoreSymbols` + + As there is no IgnoreSymbols equivalent in NSStringCompareOptions all `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException` + +## String indexing + +Affected public APIs: +- CompareInfo.IndexOf +- CompareInfo.LastIndexOf +- String.IndexOf +- String.LastIndexOf + +Mapped to Apple Native API `rangeOfString:options:range:locale:`(https://developer.apple.com/documentation/foundation/nsstring/1417348-rangeofstring?language=objc) + +In `rangeOfString:options:range:locale:` objects are compared by checking the Unicode canonical equivalence of their code point sequences. +In cases where search string contains diacritics and has different normalization form than in source string result can be incorrect. + +Characters in general are represented by unicode code points, and some characters can be represented in a single code point or by combining multiple characters (like diacritics/diaeresis). Normalization Form C will look to compress characters to their single code point format if they were originally represented as a sequence of multiple code points. Normalization Form D does the opposite and expands characters into their multiple code point formats if possible. + +`NSString` `rangeOfString:options:range:locale:` uses canonical equivalence to find the position of the `searchString` within the `sourceString`, however, it does not automatically handle comparison of precomposed (single code point representation) or decomposed (most code points representation). Because the `searchString` and `sourceString` can be of differing formats, to properly find the index, we need to ensure that the searchString is in the same form as the sourceString by checking the `rangeOfString:options:range:locale:` using every single normalization form. + +Here are the covered cases with diacritics: + 1. Search string contains diacritic and has same normalization form as in source string. + 2. Search string contains diacritic but with source string they have same letters with different char lengths but substring is normalized in source. + + a. search string `normalizing to form C` is substring of source string. example: search string: `U\u0308` source string: `Source is \u00DC` => matchLength is 1 + + b. search string `normalizing to form D` is substring of source string. example: search string: `\u00FC` source string: `Source is \u0075\u0308` => matchLength is 2 + +Not covered case: + Source string's intended substring match containing characters of mixed composition forms cannot be matched by 2. because partial precomposition/decomposition is not performed. example: search string: `U\u0308 and \u00FC` (Ü and ü) source string: `Source is \u00DC and \u0075\u0308` (Source is Ü and ü) + as it is visible from example normalizaing search string to form C or D will not help to find substring in source string. + +- `IgnoreSymbols` + + As there is no IgnoreSymbols equivalent in NSStringCompareOptions all `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException` + +- Some letters consist of more than one grapheme. + + Apple Native Api does not guarantee that string will be segmented by letters but by graphemes. E.g. in `cs-CZ` and `sk-SK` "ch" is 1 letter, 2 graphemes. The following code with `HybridGlobalization` switched off returns -1 (not found) while with `HybridGlobalization` switched on, it returns 1. + + ``` C# + new CultureInfo("sk-SK").CompareInfo.IndexOf("ch", "h"); // -1 or 1 + ``` + +- Some graphemes have multi-grapheme equivalents. + E.g. in `de-DE` ß (%u00DF) is one letter and one grapheme and "ss" is one letter and is recognized as two graphemes. Apple Native API's equivalent of `IgnoreNonSpace` treats them as the same letter when comparing. Similar case: dz (%u01F3) and dz. + + Using `IgnoreNonSpace` for these two with `HybridGlobalization` off, also returns 0 (they are equal). However, the workaround used in `HybridGlobalization` will compare them grapheme-by-grapheme and will return -1. + + ``` C# + new CultureInfo("de-DE").CompareInfo.IndexOf("strasse", "stra\u00DFe", 0, CompareOptions.IgnoreNonSpace); // 0 or -1 + ``` + + +## SortKey + +Affected public APIs: +- CompareInfo.GetSortKey +- CompareInfo.GetSortKeyLength +- CompareInfo.GetHashCode -`IgnoreKanaType`, +Apple Native API does not have an equivalent, so they throw `PlatformNotSupportedException`. \ No newline at end of file diff --git a/src/libraries/Common/src/Interop/Interop.Collation.OSX.cs b/src/libraries/Common/src/Interop/Interop.Collation.OSX.cs index d9e502274a654..70e907efa68a1 100644 --- a/src/libraries/Common/src/Interop/Interop.Collation.OSX.cs +++ b/src/libraries/Common/src/Interop/Interop.Collation.OSX.cs @@ -8,9 +8,26 @@ internal static partial class Interop { + internal struct Range + { + public int Location; + public int Length; + } + internal static partial class Globalization { [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_CompareStringNative", StringMarshalling = StringMarshalling.Utf16)] internal static unsafe partial int CompareStringNative(string localeName, int lNameLen, char* lpStr1, int cwStr1Len, char* lpStr2, int cwStr2Len, CompareOptions options); + + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_EndsWithNative", StringMarshalling = StringMarshalling.Utf16)] + [MethodImpl(MethodImplOptions.NoInlining)] + internal static unsafe partial int EndsWithNative(string localeName, int lNameLen, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options); + + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_IndexOfNative", StringMarshalling = StringMarshalling.Utf16)] + internal static unsafe partial Range IndexOfNative(string localeName, int lNameLen, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, [MarshalAs(UnmanagedType.Bool)] bool fromBeginning); + + [LibraryImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_StartsWithNative", StringMarshalling = StringMarshalling.Utf16)] + [MethodImpl(MethodImplOptions.NoInlining)] + internal static unsafe partial int StartsWithNative(string localeName, int lNameLen, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options); } } diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs index eb4d7e94d80e8..846640c710dce 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.Compare.cs @@ -192,10 +192,10 @@ public static IEnumerable Compare_TestData() yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.None, -1 }; yield return new object[] { s_invariantCompare, "i", "\u0130", CompareOptions.IgnoreCase, -1 }; - yield return new object[] { s_invariantCompare, "\u00C0", "A\u0300", CompareOptions.None, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : 0 }; + yield return new object[] { s_invariantCompare, "\u00C0", "A\u0300", CompareOptions.None, 0 }; yield return new object[] { s_invariantCompare, "\u00C0", "A\u0300", CompareOptions.Ordinal, 1 }; yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.None, 1 }; - yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.IgnoreCase, PlatformDetection.IsHybridGlobalizationOnOSX ? 1 : 0 }; + yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.IgnoreCase, 0 }; yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.Ordinal, 1 }; yield return new object[] { s_invariantCompare, "\u00C0", "a\u0300", CompareOptions.OrdinalIgnoreCase, 1 }; yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, -1 }; diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs index 55e3da38a4845..dea1a24959cbd 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IndexOf.cs @@ -33,7 +33,7 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", 0, 12, CompareOptions.Ordinal, -1, 0 }; // Slovak - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_slovakCompare, "ch", "h", 0, 2, CompareOptions.None, -1, 0 }; // Android has its own ICU, which doesn't work well with slovak @@ -82,7 +82,7 @@ public static IEnumerable IndexOf_TestData() yield return new object[] { s_invariantCompare, "hello", "\u200d", 1, 3, CompareOptions.IgnoreCase, 1, 0 }; // Ignore symbols - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.IgnoreSymbols, 5, 6 }; yield return new object[] { s_invariantCompare, "More Test's", "Tests", 0, 11, CompareOptions.None, -1, 0 }; yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 0, 13, CompareOptions.None, 2, 2 }; @@ -142,8 +142,11 @@ public static IEnumerable IndexOf_TestData() { yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 0, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, 4, 7 }; yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 0, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, 4, 6 }; - yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, supportedIgnoreNonSpaceOption, 3, 2 }; - yield return new object[] { s_invariantCompare, "abc\u01F3xyz", "dz", 0, 7, supportedIgnoreNonSpaceOption, 3, 1 }; + if (!PlatformDetection.IsHybridGlobalizationOnOSX) + { + yield return new object[] { s_invariantCompare, "abcdzxyz", "\u01F3", 0, 8, supportedIgnoreNonSpaceOption, 3, 2 }; + yield return new object[] { s_invariantCompare, "abc\u01F3xyz", "dz", 0, 7, supportedIgnoreNonSpaceOption, 3, 1 }; + } } yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "xtra\u00DFe", 0, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, -1, 0 }; yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Xtrasse", 0, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, -1, 0 }; diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs index 1c8a2423b07c6..d3486a1d84157 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs @@ -25,7 +25,7 @@ public static IEnumerable IsPrefix_TestData() yield return new object[] { s_invariantCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 }; yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 }; yield return new object[] { s_invariantCompare, "dz", "d", CompareOptions.None, true, 1 }; - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 }; yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.Ordinal, true, 1 }; @@ -35,7 +35,8 @@ public static IEnumerable IsPrefix_TestData() if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic) { yield return new object[] { s_turkishCompare, "interesting", "I", CompareOptions.IgnoreCase, false, 0 }; - yield return new object[] { s_turkishCompare, "interesting", "\u0130", CompareOptions.IgnoreCase, true, 1 }; + if (!PlatformDetection.IsHybridGlobalizationOnOSX) + yield return new object[] { s_turkishCompare, "interesting", "\u0130", CompareOptions.IgnoreCase, true, 1 }; } yield return new object[] { s_turkishCompare, "interesting", "\u0130", CompareOptions.None, false, 0 }; yield return new object[] { s_invariantCompare, "interesting", "I", CompareOptions.IgnoreCase, true, 1 }; @@ -71,7 +72,7 @@ public static IEnumerable IsPrefix_TestData() yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 }; // Ignore symbols - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 }; yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 }; @@ -83,7 +84,7 @@ public static IEnumerable IsPrefix_TestData() (PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsBrowserDomSupportedOrNodeJS); if (behavesLikeNls) { - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 }; yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 }; @@ -95,11 +96,14 @@ public static IEnumerable IsPrefix_TestData() else { yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, false, 0 }; - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, false, 0 }; yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, false, 0 }; - yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, false, 0 }; - yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, false, 0 }; + if (!PlatformDetection.IsHybridGlobalizationOnOSX) + { + yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, false, 0 }; + yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, false, 0 }; + } } // ICU bugs @@ -110,7 +114,7 @@ public static IEnumerable IsPrefix_TestData() } // Prefixes where matched length does not equal value string length - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 }; yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", supportedIgnoreNonSpaceOption, true, 1 }; @@ -147,7 +151,7 @@ public void IsPrefix(CompareInfo compareInfo, string source, string value, Compa valueBoundedMemory.MakeReadonly(); Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options)); - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength)); Assert.Equal(expectedMatchLength, actualMatchLength); diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs index e5d8a10527c05..8b83094efe3be 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsSuffix.cs @@ -25,12 +25,12 @@ public static IEnumerable IsSuffix_TestData() yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", CompareOptions.None, false, 0 }; yield return new object[] { s_invariantCompare, "foobardzsdzs", "rddzs", CompareOptions.Ordinal, false, 0 }; yield return new object[] { s_invariantCompare, "dz", "z", CompareOptions.None, true, 1 }; - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.None, false, 0 }; yield return new object[] { s_hungarianCompare, "dz", "z", CompareOptions.Ordinal, true, 1 }; // Slovak - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_slovakCompare, "ch", "h", CompareOptions.None, false, 0 }; yield return new object[] { s_slovakCompare, "velmi chora", "hora", CompareOptions.None, false, 0 }; @@ -80,7 +80,7 @@ public static IEnumerable IsSuffix_TestData() yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 }; // Ignore symbols - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.IgnoreSymbols, true, 6 }; yield return new object[] { s_invariantCompare, "More Test's", "Tests", CompareOptions.None, false, 0 }; @@ -107,13 +107,16 @@ public static IEnumerable IsSuffix_TestData() { yield return new object[] { s_hungarianCompare, "foobardzsdzs", "rddzs", CompareOptions.None, false, 0 }; yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, false, 0 }; - yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uDC00", CompareOptions.None, false, 0 }; - yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uDC00", CompareOptions.IgnoreCase, false, 0 }; + if (!PlatformDetection.IsHybridGlobalizationOnOSX) + { + yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uDC00", CompareOptions.None, false, 0 }; + yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uDC00", CompareOptions.IgnoreCase, false, 0 }; + } } // Suffixes where matched length does not equal value string length yield return new object[] { s_germanCompare, "xyz Strasse", "xtra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 }; - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_invariantCompare, "xyzdz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 }; yield return new object[] { s_invariantCompare, "xyz\u01F3", "dz", supportedIgnoreNonSpaceOption, true, 1 }; @@ -149,7 +152,7 @@ public void IsSuffix(CompareInfo compareInfo, string source, string value, Compa valueBoundedMemory.MakeReadonly(); Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options)); - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { Assert.Equal(expected, compareInfo.IsSuffix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength)); Assert.Equal(expectedMatchLength, actualMatchLength); diff --git a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs index 76646ed916f49..498290ccf8213 100644 --- a/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs +++ b/src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.LastIndexOf.cs @@ -49,7 +49,7 @@ public static IEnumerable LastIndexOf_TestData() // Slovak yield return new object[] { s_slovakCompare, "ch", "h", 0, 1, CompareOptions.None, -1, 0 }; // Android has its own ICU, which doesn't work well with slovak - if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic && !PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsAndroid && !PlatformDetection.IsLinuxBionic && !PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_slovakCompare, "hore chodit", "HO", 11, 12, CompareOptions.IgnoreCase, 0, 2 }; } @@ -104,7 +104,7 @@ public static IEnumerable LastIndexOf_TestData() yield return new object[] { s_invariantCompare, "AA\u200DA", "\u200d", 3, 4, CompareOptions.None, 4, 0}; // Ignore symbols - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.IgnoreSymbols, 5, 6 }; yield return new object[] { s_invariantCompare, "More Test's", "Tests", 10, 11, CompareOptions.None, -1, 0 }; yield return new object[] { s_invariantCompare, "cbabababdbaba", "ab", 12, 13, CompareOptions.None, 10, 2 }; @@ -120,7 +120,7 @@ public static IEnumerable LastIndexOf_TestData() } // Inputs where matched length does not equal value string length - if (!PlatformDetection.IsHybridGlobalizationOnBrowser) + if (!PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsHybridGlobalizationOnOSX) { yield return new object[] { s_germanCompare, "abc Strasse Strasse xyz", "stra\u00DFe", 22, 23, supportedIgnoreCaseIgnoreNonSpaceOptions, 12, 7 }; yield return new object[] { s_germanCompare, "abc stra\u00DFe stra\u00DFe xyz", "Strasse", 20, 21, supportedIgnoreCaseIgnoreNonSpaceOptions, 11, 6 }; diff --git a/src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj b/src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj index e476255b31dcf..1c680de3e24e1 100644 --- a/src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj +++ b/src/libraries/System.Globalization/tests/Hybrid/System.Globalization.IOS.Tests.csproj @@ -34,5 +34,9 @@ + + + + diff --git a/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx b/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx index 981d4e2d5bad5..60ef1fbf28cad 100644 --- a/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx +++ b/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx @@ -4067,6 +4067,9 @@ CompareOptions = {0} are not supported when HybridGlobalization=true on this platform. Disable it to load larger ICU bundle, then use this option. + + Mixed compositions in string not supported when HybridGlobalization=true on this platform. Disable it to load larger ICU bundle, then use this option. + CompareOptions = {0} are not supported for culture = {1} when HybridGlobalization=true on this platform. Disable it to load larger ICU bundle, then use this option. diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs index f29904d65e9f6..ee4c075845637 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.Icu.cs @@ -23,7 +23,13 @@ private void IcuInitSortHandle(string interopCultureName) { _isAsciiEqualityOrdinal = GetIsAsciiEqualityOrdinal(interopCultureName); if (!GlobalizationMode.Invariant) + { +#if TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS || TARGET_BROWSER + if (GlobalizationMode.Hybrid) + return; +#endif _sortHandle = SortHandleCache.GetCachedSortHandle(interopCultureName); + } } private bool GetIsAsciiEqualityOrdinal(string interopCultureName) @@ -78,6 +84,10 @@ private unsafe int IcuIndexOfCore(ReadOnlySpan source, ReadOnlySpan fixed (char* pSource = &MemoryMarshal.GetReference(source)) fixed (char* pTarget = &MemoryMarshal.GetReference(target)) { +#if TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return IndexOfCoreNative(pTarget, target.Length, pSource, source.Length, options, fromBeginning, matchLengthPtr); +#endif if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, pTarget, target.Length, pSource, source.Length, options, matchLengthPtr); else @@ -193,6 +203,9 @@ private unsafe int IndexOfOrdinalIgnoreCaseHelper(ReadOnlySpan source, Rea throw new Exception((string)ex_result); return result; } +#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return IndexOfCoreNative(b, target.Length, a, source.Length, options, fromBeginning, matchLengthPtr); #endif if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); @@ -292,6 +305,9 @@ private unsafe int IndexOfOrdinalHelper(ReadOnlySpan source, ReadOnlySpan< throw new Exception((string)ex_result); return result; } +#elif TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return IndexOfCoreNative(b, target.Length, a, source.Length, options, fromBeginning, matchLengthPtr); #endif if (fromBeginning) return Interop.Globalization.IndexOf(_sortHandle, b, target.Length, a, source.Length, options, matchLengthPtr); @@ -321,6 +337,10 @@ private unsafe bool IcuStartsWith(ReadOnlySpan source, ReadOnlySpan fixed (char* pSource = &MemoryMarshal.GetReference(source)) // could be null (or otherwise unable to be dereferenced) fixed (char* pPrefix = &MemoryMarshal.GetReference(prefix)) { +#if TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return NativeStartsWith(pPrefix, prefix.Length, pSource, source.Length, options); +#endif return Interop.Globalization.StartsWith(_sortHandle, pPrefix, prefix.Length, pSource, source.Length, options, matchLengthPtr); } } @@ -400,6 +420,10 @@ private unsafe bool StartsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, return true; InteropCall: +#if TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return NativeStartsWith(bp, prefix.Length, ap, source.Length, options); +#endif return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options, matchLengthPtr); } } @@ -468,6 +492,10 @@ private unsafe bool StartsWithOrdinalHelper(ReadOnlySpan source, ReadOnlyS return true; InteropCall: +#if TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return NativeStartsWith(bp, prefix.Length, ap, source.Length, options); +#endif return Interop.Globalization.StartsWith(_sortHandle, bp, prefix.Length, ap, source.Length, options, matchLengthPtr); } } @@ -493,6 +521,10 @@ private unsafe bool IcuEndsWith(ReadOnlySpan source, ReadOnlySpan su fixed (char* pSource = &MemoryMarshal.GetReference(source)) // could be null (or otherwise unable to be dereferenced) fixed (char* pSuffix = &MemoryMarshal.GetReference(suffix)) { +#if TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return NativeEndsWith(pSuffix, suffix.Length, pSource, source.Length, options); +#endif return Interop.Globalization.EndsWith(_sortHandle, pSuffix, suffix.Length, pSource, source.Length, options, matchLengthPtr); } } @@ -573,6 +605,10 @@ private unsafe bool EndsWithOrdinalIgnoreCaseHelper(ReadOnlySpan source, R return true; InteropCall: +#if TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return NativeEndsWith(bp, suffix.Length, ap, source.Length, options); +#endif return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options, matchLengthPtr); } } @@ -641,6 +677,10 @@ private unsafe bool EndsWithOrdinalHelper(ReadOnlySpan source, ReadOnlySpa return true; InteropCall: +#if TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS + if (GlobalizationMode.Hybrid) + return NativeEndsWith(bp, suffix.Length, ap, source.Length, options); +#endif return Interop.Globalization.EndsWith(_sortHandle, bp, suffix.Length, ap, source.Length, options, matchLengthPtr); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.OSX.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.OSX.cs index b96541580bc92..6d72fbcc01530 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.OSX.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.OSX.cs @@ -32,6 +32,40 @@ private unsafe int CompareStringNative(ReadOnlySpan string1, ReadOnlySpan< return result; } + private unsafe int IndexOfCoreNative(char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, bool fromBeginning, int* matchLengthPtr) + { + AssertComparisonSupported(options); + + Interop.Range result = Interop.Globalization.IndexOfNative(m_name, m_name.Length, target, cwTargetLength, pSource, cwSourceLength, options, fromBeginning); + Debug.Assert(result.Location != -2); + if (result.Location == -3) + throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMixedCompositions); + if (matchLengthPtr != null) + *matchLengthPtr = result.Length; + + return result.Location; + } + + private unsafe bool NativeStartsWith(char* pPrefix, int cwPrefixLength, char* pSource, int cwSourceLength, CompareOptions options) + { + AssertComparisonSupported(options); + + int result = Interop.Globalization.StartsWithNative(m_name, m_name.Length, pPrefix, cwPrefixLength, pSource, cwSourceLength, options); + Debug.Assert(result != -2); + + return result > 0 ? true : false; + } + + private unsafe bool NativeEndsWith(char* pSuffix, int cwSuffixLength, char* pSource, int cwSourceLength, CompareOptions options) + { + AssertComparisonSupported(options); + + int result = Interop.Globalization.EndsWithNative(m_name, m_name.Length, pSuffix, cwSuffixLength, pSource, cwSourceLength, options); + Debug.Assert(result != -2); + + return result > 0 ? true : false; + } + private static void AssertComparisonSupported(CompareOptions options) { if ((options | SupportedCompareOptions) != SupportedCompareOptions) diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.WebAssembly.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.WebAssembly.cs index 08256545244c3..dab9c1aac0ba7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.WebAssembly.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.WebAssembly.cs @@ -8,11 +8,6 @@ namespace System.Globalization { public partial class CompareInfo { - private void JsInit(string interopCultureName) - { - _isAsciiEqualityOrdinal = GetIsAsciiEqualityOrdinal(interopCultureName); - } - private static void AssertHybridOnWasm(CompareOptions options) { Debug.Assert(!GlobalizationMode.Invariant); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs index 53192ecd42b91..05bb9f758883d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.cs @@ -170,13 +170,6 @@ private void InitSort(CultureInfo culture) { _sortName = culture.SortName; -#if TARGET_BROWSER - if (GlobalizationMode.Hybrid) - { - JsInit(culture.InteropName!); - return; - } -#endif if (GlobalizationMode.UseNls) { NlsInitSortHandle(); @@ -622,7 +615,7 @@ public unsafe bool IsPrefix(ReadOnlySpan source, ReadOnlySpan prefix else { // Linguistic comparison requested and we don't need to special-case any args. -#if TARGET_BROWSER +#if TARGET_BROWSER || TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) { throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMatchLength); @@ -769,7 +762,7 @@ public unsafe bool IsSuffix(ReadOnlySpan source, ReadOnlySpan suffix else { // Linguistic comparison requested and we don't need to special-case any args. -#if TARGET_BROWSER +#if TARGET_BROWSER || TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) { throw new PlatformNotSupportedException(SR.PlatformNotSupported_HybridGlobalizationWithMatchLength); @@ -1457,7 +1450,7 @@ public SortKey GetSortKey(string source) private SortKey CreateSortKeyCore(string source, CompareOptions options) => GlobalizationMode.UseNls ? NlsCreateSortKey(source, options) : -#if TARGET_BROWSER +#if TARGET_BROWSER || TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS GlobalizationMode.Hybrid ? throw new PlatformNotSupportedException(GetPNSEText("SortKey")) : #endif @@ -1500,7 +1493,7 @@ public int GetSortKey(ReadOnlySpan source, Span destination, Compare private int GetSortKeyCore(ReadOnlySpan source, Span destination, CompareOptions options) => GlobalizationMode.UseNls ? NlsGetSortKey(source, destination, options) : -#if TARGET_BROWSER +#if TARGET_BROWSER || TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS GlobalizationMode.Hybrid ? throw new PlatformNotSupportedException(GetPNSEText("SortKey")) : #endif @@ -1537,7 +1530,7 @@ public int GetSortKeyLength(ReadOnlySpan source, CompareOptions options = private int GetSortKeyLengthCore(ReadOnlySpan source, CompareOptions options) => GlobalizationMode.UseNls ? NlsGetSortKeyLength(source, options) : -#if TARGET_BROWSER +#if TARGET_BROWSER || TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS GlobalizationMode.Hybrid ? throw new PlatformNotSupportedException(GetPNSEText("SortKey")) : #endif @@ -1614,7 +1607,7 @@ public int GetHashCode(ReadOnlySpan source, CompareOptions options) private unsafe int GetHashCodeOfStringCore(ReadOnlySpan source, CompareOptions options) => GlobalizationMode.UseNls ? NlsGetHashCodeOfString(source, options) : -#if TARGET_BROWSER +#if TARGET_BROWSER || TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS GlobalizationMode.Hybrid ? throw new PlatformNotSupportedException(GetPNSEText("HashCode")) : #endif @@ -1638,7 +1631,7 @@ public SortVersion Version } else { -#if TARGET_BROWSER +#if TARGET_BROWSER || TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS if (GlobalizationMode.Hybrid) { throw new PlatformNotSupportedException(GetPNSEText("SortVersion")); @@ -1654,7 +1647,7 @@ public SortVersion Version public int LCID => CultureInfo.GetCultureInfo(Name).LCID; -#if TARGET_BROWSER +#if TARGET_BROWSER || TARGET_OSX || TARGET_MACCATALYST || TARGET_IOS || TARGET_TVOS private static string GetPNSEText(string funcName) => SR.Format(SR.PlatformNotSupported_HybridGlobalization, funcName); #endif } diff --git a/src/native/libs/System.Globalization.Native/entrypoints.c b/src/native/libs/System.Globalization.Native/entrypoints.c index cc652d6a5e5f6..9e0e4f4276981 100644 --- a/src/native/libs/System.Globalization.Native/entrypoints.c +++ b/src/native/libs/System.Globalization.Native/entrypoints.c @@ -66,6 +66,9 @@ static const Entry s_globalizationNative[] = DllImportEntry(GlobalizationNative_GetLocaleInfoPrimaryGroupingSizeNative) DllImportEntry(GlobalizationNative_GetLocaleInfoSecondaryGroupingSizeNative) DllImportEntry(GlobalizationNative_GetLocaleTimeFormatNative) + DllImportEntry(GlobalizationNative_IndexOfNative) + DllImportEntry(GlobalizationNative_StartsWithNative) + DllImportEntry(GlobalizationNative_EndsWithNative) #endif }; diff --git a/src/native/libs/System.Globalization.Native/pal_collation.h b/src/native/libs/System.Globalization.Native/pal_collation.h index 2aaff773dd449..a8b44ba164f4a 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.h +++ b/src/native/libs/System.Globalization.Native/pal_collation.h @@ -8,6 +8,10 @@ #include "pal_errors.h" typedef struct SortHandle SortHandle; +typedef struct _Range { + int32_t location; + int32_t length; +} Range; PALEXPORT ResultCode GlobalizationNative_GetSortHandle(const char* lpLocaleName, SortHandle** ppSortHandle); @@ -65,9 +69,35 @@ PALEXPORT int32_t GlobalizationNative_GetSortKey(SortHandle* pSortHandle, #ifdef __APPLE__ PALEXPORT int32_t GlobalizationNative_CompareStringNative(const uint16_t* localeName, int32_t lNameLength, - const uint16_t* lpStr1, - int32_t cwStr1Length, - const uint16_t* lpStr2, - int32_t cwStr2Length, + const uint16_t* lpTarget, + int32_t cwTargetLength, + const uint16_t* lpSource, + int32_t cwSourceLength, int32_t options); + +PALEXPORT Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, + int32_t lNameLength, + const uint16_t* lpTarget, + int32_t cwTargetLength, + const uint16_t* lpSource, + int32_t cwSourceLength, + int32_t options, + int32_t fromBeginning); + +PALEXPORT int32_t GlobalizationNative_StartsWithNative(const uint16_t* localeName, + int32_t lNameLength, + const uint16_t* lpPrefix, + int32_t cwPrefixLength, + const uint16_t* lpSource, + int32_t cwSourceLength, + int32_t options); + +PALEXPORT int32_t GlobalizationNative_EndsWithNative(const uint16_t* localeName, + int32_t lNameLength, + const uint16_t* lpSuffix, + int32_t cwSuffixLength, + const uint16_t* lpSource, + int32_t cwSourceLength, + int32_t options); + #endif diff --git a/src/native/libs/System.Globalization.Native/pal_collation.m b/src/native/libs/System.Globalization.Native/pal_collation.m index f0120e5708f16..e6410f7a9de21 100644 --- a/src/native/libs/System.Globalization.Native/pal_collation.m +++ b/src/native/libs/System.Globalization.Native/pal_collation.m @@ -19,6 +19,21 @@ StringSort = 536870912, } CompareOptions; +static NSLocale* GetCurrentLocale(const uint16_t* localeName, int32_t lNameLength) +{ + NSLocale *currentLocale; + if(localeName == NULL || lNameLength == 0) + { + currentLocale = [NSLocale systemLocale]; + } + else + { + NSString *locName = [NSString stringWithCharacters: localeName length: lNameLength]; + currentLocale = [NSLocale localeWithLocaleIdentifier:locName]; + } + return currentLocale; +} + static NSStringCompareOptions ConvertFromCompareOptionsToNSStringCompareOptions(int32_t comparisonOptions) { int32_t supportedOptions = None | IgnoreCase | IgnoreNonSpace | IgnoreWidth | StringSort; @@ -45,33 +60,213 @@ static NSStringCompareOptions ConvertFromCompareOptionsToNSStringCompareOptions( Function: CompareString */ -int32_t GlobalizationNative_CompareStringNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpStr1, int32_t cwStr1Length, - const uint16_t* lpStr2, int32_t cwStr2Length, int32_t comparisonOptions) +int32_t GlobalizationNative_CompareStringNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpSource, int32_t cwSourceLength, + const uint16_t* lpTarget, int32_t cwTargetLength, int32_t comparisonOptions) { - NSLocale *currentLocale; - if(localeName == NULL || lNameLength == 0) + NSLocale *currentLocale = GetCurrentLocale(localeName, lNameLength); + NSString *sourceString = [NSString stringWithCharacters: lpSource length: cwSourceLength]; + NSString *sourceStrPrecomposed = sourceString.precomposedStringWithCanonicalMapping; + NSString *targetString = [NSString stringWithCharacters: lpTarget length: cwTargetLength]; + NSString *targetStrPrecomposed = targetString.precomposedStringWithCanonicalMapping; + + NSRange comparisonRange = NSMakeRange(0, sourceStrPrecomposed.length); + NSStringCompareOptions options = ConvertFromCompareOptionsToNSStringCompareOptions(comparisonOptions); + + // in case mapping is not found + if (options == 0) + return -2; + + return [sourceStrPrecomposed compare:targetStrPrecomposed + options:options + range:comparisonRange + locale:currentLocale]; +} + +static NSString* RemoveWeightlessCharacters(NSString* source) +{ + NSError *error = nil; + NSRegularExpression *regex = [NSRegularExpression regularExpressionWithPattern:@"[\u200B-\u200D\uFEFF\0]" options:NSRegularExpressionCaseInsensitive error:&error]; + + if (error != nil) + return source; + + NSString *modifiedString = [regex stringByReplacingMatchesInString:source options:0 range:NSMakeRange(0, [source length]) withTemplate:@""]; + + return modifiedString; +} + +static int32_t IsIndexFound(int32_t fromBeginning, int32_t foundLocation, int32_t newLocation) +{ + // last index + if (!fromBeginning && foundLocation > newLocation) + return 1; + // first index + if (fromBeginning && foundLocation > 0 && foundLocation < newLocation) + return 1; + return 0; +} + +/* +Function: IndexOf +Find detailed explanation how this function works in https://github.com/dotnet/runtime/blob/main/docs/design/features/globalization-hybrid-mode.md +*/ +Range GlobalizationNative_IndexOfNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpTarget, int32_t cwTargetLength, + const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions, int32_t fromBeginning) +{ + assert(cwTargetLength >= 0); + Range result = {-2, 0}; + NSStringCompareOptions options = ConvertFromCompareOptionsToNSStringCompareOptions(comparisonOptions); + + // in case mapping is not found + if (options == 0) + return result; + + NSString *searchString = [NSString stringWithCharacters: lpTarget length: cwTargetLength]; + NSString *searchStrCleaned = RemoveWeightlessCharacters(searchString); + NSString *sourceString = [NSString stringWithCharacters: lpSource length: cwSourceLength]; + NSString *sourceStrCleaned = RemoveWeightlessCharacters(sourceString); + + if (sourceStrCleaned.length == 0 || searchStrCleaned.length == 0) { - currentLocale = [NSLocale systemLocale]; + result.location = fromBeginning ? 0 : sourceString.length; + return result; } - else + + NSLocale *currentLocale = GetCurrentLocale(localeName, lNameLength); + NSString *searchStrPrecomposed = searchStrCleaned.precomposedStringWithCanonicalMapping; + NSString *sourceStrPrecomposed = sourceStrCleaned.precomposedStringWithCanonicalMapping; + + // last index + if (!fromBeginning) + options |= NSBackwardsSearch; + + // check if there is a possible match and return -1 if not + // doesn't matter which normalization form is used here + NSRange rangeOfReceiverToSearch = NSMakeRange(0, sourceStrPrecomposed.length); + NSRange containsRange = [sourceStrPrecomposed rangeOfString:searchStrPrecomposed + options:options + range:rangeOfReceiverToSearch + locale:currentLocale]; + + if (containsRange.location == NSNotFound) + return result; + + // in case search string is inside source string but we can't find the index return -3 + result.location = -3; + // sourceString and searchString possibly have the same composition of characters + rangeOfReceiverToSearch = NSMakeRange(0, sourceStrCleaned.length); + NSRange nsRange = [sourceStrCleaned rangeOfString:searchStrCleaned + options:options + range:rangeOfReceiverToSearch + locale:currentLocale]; + + if (nsRange.location != NSNotFound) + { + result.location = nsRange.location; + result.length = nsRange.length; + // in case of CompareOptions.IgnoreCase if letters have different representations in source and search strings + // and case insensitive search appears more than one time in source string take last index for LastIndexOf and first index for IndexOf + // e.g. new CultureInfo().CompareInfo.LastIndexOf("Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?", "U\u0308", 25,18, CompareOptions.IgnoreCase); + // should return 24 but here it will be 9 + if (!(comparisonOptions & IgnoreCase)) + return result; + } + + // check if sourceString has precomposed form of characters and searchString has decomposed form of characters + // convert searchString to a precomposed form + NSRange precomposedRange = [sourceStrCleaned rangeOfString:searchStrPrecomposed + options:options + range:rangeOfReceiverToSearch + locale:currentLocale]; + + if (precomposedRange.location != NSNotFound) { - NSString *locName = [NSString stringWithCharacters: localeName length: lNameLength]; - currentLocale = [[NSLocale alloc] initWithLocaleIdentifier:locName]; + // in case of CompareOptions.IgnoreCase if letters have different representations in source and search strings + // and search appears more than one time in source string take last index for LastIndexOf and first index for IndexOf + // e.g. new CultureInfo().CompareInfo.LastIndexOf("Is \u0055\u0308 or \u0075\u0308 the same as \u00DC or \u00FC?", "U\u0308", 25,18, CompareOptions.IgnoreCase); + // this will return 24 + if ((comparisonOptions & IgnoreCase) && IsIndexFound(fromBeginning, (int32_t)result.location, (int32_t)precomposedRange.location)) + return result; + + result.location = precomposedRange.location; + result.length = precomposedRange.length; + if (!(comparisonOptions & IgnoreCase)) + return result; } - NSString *firstString = [NSString stringWithCharacters: lpStr1 length: cwStr1Length]; - NSString *secondString = [NSString stringWithCharacters: lpStr2 length: cwStr2Length]; - NSRange string1Range = NSMakeRange(0, cwStr1Length); + // check if sourceString has decomposed form of characters and searchString has precomposed form of characters + // convert searchString to a decomposed form + NSString *searchStrDecomposed = searchStrCleaned.decomposedStringWithCanonicalMapping; + NSRange decomposedRange = [sourceStrCleaned rangeOfString:searchStrDecomposed + options:options + range:rangeOfReceiverToSearch + locale:currentLocale]; + + if (decomposedRange.location != NSNotFound) + { + if ((comparisonOptions & IgnoreCase) && IsIndexFound(fromBeginning, (int32_t)result.location, (int32_t)decomposedRange.location)) + return result; + + result.location = decomposedRange.location; + result.length = decomposedRange.length; + return result; + } + + return result; +} + +/* + Return value is a "Win32 BOOL" (1 = true, 0 = false) + */ +int32_t GlobalizationNative_StartsWithNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpPrefix, int32_t cwPrefixLength, + const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions) +{ NSStringCompareOptions options = ConvertFromCompareOptionsToNSStringCompareOptions(comparisonOptions); // in case mapping is not found if (options == 0) return -2; + + NSLocale *currentLocale = GetCurrentLocale(localeName, lNameLength); + NSString *prefixString = [NSString stringWithCharacters: lpPrefix length: cwPrefixLength]; + NSString *prefixStrComposed = RemoveWeightlessCharacters(prefixString.precomposedStringWithCanonicalMapping); + NSString *sourceString = [NSString stringWithCharacters: lpSource length: cwSourceLength]; + NSString *sourceStrComposed = RemoveWeightlessCharacters(sourceString.precomposedStringWithCanonicalMapping); + + NSRange sourceRange = NSMakeRange(0, prefixStrComposed.length > sourceStrComposed.length ? sourceStrComposed.length : prefixStrComposed.length); - return [firstString compare:secondString - options:options - range:string1Range - locale:currentLocale]; + int32_t result = [sourceStrComposed compare:prefixStrComposed + options:options + range:sourceRange + locale:currentLocale]; + return result == NSOrderedSame ? 1 : 0; +} + +/* + Return value is a "Win32 BOOL" (1 = true, 0 = false) + */ +int32_t GlobalizationNative_EndsWithNative(const uint16_t* localeName, int32_t lNameLength, const uint16_t* lpSuffix, int32_t cwSuffixLength, + const uint16_t* lpSource, int32_t cwSourceLength, int32_t comparisonOptions) +{ + NSStringCompareOptions options = ConvertFromCompareOptionsToNSStringCompareOptions(comparisonOptions); + + // in case mapping is not found + if (options == 0) + return -2; + + NSLocale *currentLocale = GetCurrentLocale(localeName, lNameLength); + NSString *suffixString = [NSString stringWithCharacters: lpSuffix length: cwSuffixLength]; + NSString *suffixStrComposed = RemoveWeightlessCharacters(suffixString.precomposedStringWithCanonicalMapping); + NSString *sourceString = [NSString stringWithCharacters: lpSource length: cwSourceLength]; + NSString *sourceStrComposed = RemoveWeightlessCharacters(sourceString.precomposedStringWithCanonicalMapping); + int32_t startIndex = suffixStrComposed.length > sourceStrComposed.length ? 0 : sourceStrComposed.length - suffixStrComposed.length; + NSRange sourceRange = NSMakeRange(startIndex, sourceStrComposed.length - startIndex); + + int32_t result = [sourceStrComposed compare:suffixStrComposed + options:options + range:sourceRange + locale:currentLocale]; + return result == NSOrderedSame ? 1 : 0; } #endif