From d51ad9ad46bf8b3d8c087ac22ac225d0e5e44f59 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Fri, 29 Nov 2024 13:07:28 +0700 Subject: [PATCH 1/5] define custom collations per writing system based on wsId and use for sorting --- .../MiniLcmTests/SortingTests.cs | 12 ++ .../MiniLcmTests/SortingTests.cs | 19 +++ backend/FwLite/LcmCrdt/CrdtMiniLcmApi.cs | 4 +- .../LcmCrdt/Data/SqlSortingExtensions.cs | 152 ++++++++++++++++++ backend/FwLite/LcmCrdt/LcmCrdtDbContext.cs | 14 +- backend/FwLite/LcmCrdt/LcmCrdtKernel.cs | 2 + .../FwLite/MiniLcm.Tests/SortingTestsBase.cs | 62 +++++++ 7 files changed, 261 insertions(+), 4 deletions(-) create mode 100644 backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs create mode 100644 backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs create mode 100644 backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs create mode 100644 backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs diff --git a/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs b/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs new file mode 100644 index 000000000..5681d7e93 --- /dev/null +++ b/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs @@ -0,0 +1,12 @@ +using FwDataMiniLcmBridge.Tests.Fixtures; + +namespace FwDataMiniLcmBridge.Tests.MiniLcmTests; + +[Collection(ProjectLoaderFixture.Name)] +public class SortingTests(ProjectLoaderFixture fixture) : SortingTestsBase +{ + protected override Task NewApi() + { + return Task.FromResult(fixture.NewProjectApi("sorting-test", "en", "en")); + } +} diff --git a/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs b/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs new file mode 100644 index 000000000..100c89e9d --- /dev/null +++ b/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs @@ -0,0 +1,19 @@ +namespace LcmCrdt.Tests.MiniLcmTests; + +public class SortingTests : SortingTestsBase +{ + private readonly MiniLcmApiFixture _fixture = new(); + + protected override async Task NewApi() + { + await _fixture.InitializeAsync(); + var api = _fixture.Api; + return api; + } + + public override async Task DisposeAsync() + { + await base.DisposeAsync(); + await _fixture.DisposeAsync(); + } +} diff --git a/backend/FwLite/LcmCrdt/CrdtMiniLcmApi.cs b/backend/FwLite/LcmCrdt/CrdtMiniLcmApi.cs index 3a1fbd07c..2ec34d44e 100644 --- a/backend/FwLite/LcmCrdt/CrdtMiniLcmApi.cs +++ b/backend/FwLite/LcmCrdt/CrdtMiniLcmApi.cs @@ -230,7 +230,7 @@ private async IAsyncEnumerable GetEntries( queryable = queryable.WhereExemplar(ws.Value, options.Exemplar.Value); } - var sortWs = (await GetWritingSystem(options.Order.WritingSystem, WritingSystemType.Vernacular))?.WsId; + var sortWs = (await GetWritingSystem(options.Order.WritingSystem, WritingSystemType.Vernacular)); if (sortWs is null) throw new NullReferenceException($"sort writing system {options.Order.WritingSystem} not found"); queryable = queryable @@ -238,7 +238,7 @@ private async IAsyncEnumerable GetEntries( .LoadWith(e => e.ComplexForms) .LoadWith(e => e.Components) .AsQueryable() - .OrderBy(e => e.Headword(sortWs.Value)) + .OrderBy(e => e.Headword(sortWs.WsId).CollateUnicode(sortWs)) .ThenBy(e => e.Id) .Skip(options.Offset) .Take(options.Count); diff --git a/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs b/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs new file mode 100644 index 000000000..e58b82071 --- /dev/null +++ b/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs @@ -0,0 +1,152 @@ +using System.Data.Common; +using System.Data.SQLite; +using System.Globalization; +using System.Linq.Expressions; +using LinqToDB; +using Microsoft.Data.Sqlite; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Diagnostics; +using Microsoft.Extensions.Caching.Memory; +using Microsoft.Extensions.Logging; +using SIL.WritingSystems; + +namespace LcmCrdt.Data; + +public static class SqlSortingExtensions +{ + public const string CollateUnicodeNoCase = "NOCASE_UNICODE"; + + [ExpressionMethod(nameof(CollateUnicodeExpression))] + internal static string CollateUnicode(this string value, WritingSystem ws) + { + //could optionally just return the value here, but it would work differently than sql + throw new InvalidOperationException("CollateUnicode is server-side only API."); + } + + private static Expression> CollateUnicodeExpression() + { + //todo maybe in the future we use a custom collation based on the writing system + return (s, ws) => s.Collate(CollationName(ws)); + } + + internal static string CollationName(WritingSystem ws) + { + //don't use ':' in the name, it won't work + return $"NOCASE_WS_{ws.WsId}"; + } +} + +public class SetupCollationInterceptor(IMemoryCache cache, ILogger logger) : IDbConnectionInterceptor, ISaveChangesInterceptor +{ + private WritingSystem[] GetWritingSystems(LcmCrdtDbContext dbContext, DbConnection connection) + { + //todo this needs to be invalidated when the writing systems change + return cache.GetOrCreate(CacheKey(connection), + entry => + { + entry.SlidingExpiration = TimeSpan.FromMinutes(30); + try + { + + return dbContext.WritingSystems.ToArray(); + } + catch (SqliteException e) + { + return []; + } + }) ?? []; + } + + private static string CacheKey(DbConnection connection) + { + return $"writingSystems|{connection.ConnectionString}"; + } + + private void InvalidateWritingSystemsCache(DbConnection connection) + { + cache.Remove(CacheKey(connection)); + } + + public void ConnectionOpened(DbConnection connection, ConnectionEndEventData eventData) + { + var context = (LcmCrdtDbContext?)eventData.Context; + if (context is null) throw new InvalidOperationException("context is null"); + var sqliteConnection = (SqliteConnection)connection; + SetupCollations(sqliteConnection, GetWritingSystems(context, connection)); + + //setup general use collation + sqliteConnection.CreateCollation(SqlSortingExtensions.CollateUnicodeNoCase, + CultureInfo.CurrentCulture.CompareInfo, + (compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); + } + + public Task ConnectionOpenedAsync(DbConnection connection, + ConnectionEndEventData eventData, + CancellationToken cancellationToken = default) + { + ConnectionOpened(connection, eventData); + return Task.CompletedTask; + } + + public InterceptionResult SavingChanges(DbContextEventData eventData, InterceptionResult result) + { + UpdateCollationsOnSave(eventData.Context); + return result; + } + + public ValueTask> SavingChangesAsync(DbContextEventData eventData, + InterceptionResult result, + CancellationToken cancellationToken = default) + { + UpdateCollationsOnSave(eventData.Context); + return ValueTask.FromResult(result); + } + + private void UpdateCollationsOnSave(DbContext? dbContext) + { + if (dbContext is null) return; + var connection = (SqliteConnection)dbContext.Database.GetDbConnection(); + bool updateWs = false; + foreach (var entityEntry in dbContext.ChangeTracker.Entries()) + { + if (entityEntry.State is EntityState.Added or EntityState.Modified) + { + var writingSystem = entityEntry.Entity; + SetupCollation(connection, writingSystem); + updateWs = true; + } + } + + if (updateWs) + { + InvalidateWritingSystemsCache(connection); + } + } + + private void SetupCollations(SqliteConnection connection, WritingSystem[] writingSystems) + { + foreach (var writingSystem in writingSystems) + { + SetupCollation(connection, writingSystem); + } + } + + private void SetupCollation(SqliteConnection connection, WritingSystem writingSystem) + { + CompareInfo compareInfo; + try + { + //todo use ICU/SLDR instead + compareInfo = CultureInfo.CreateSpecificCulture(writingSystem.WsId.Code).CompareInfo; + } + catch (Exception e) + { + logger.LogError(e, "Failed to create compare info for '{WritingSystemId}'", writingSystem.WsId); + compareInfo = CultureInfo.InvariantCulture.CompareInfo; + } + connection.CreateCollation(SqlSortingExtensions.CollationName(writingSystem), + //todo use custom comparison based on the writing system + compareInfo, + static (compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); + } +} diff --git a/backend/FwLite/LcmCrdt/LcmCrdtDbContext.cs b/backend/FwLite/LcmCrdt/LcmCrdtDbContext.cs index 3a303f643..26e37bf6d 100644 --- a/backend/FwLite/LcmCrdt/LcmCrdtDbContext.cs +++ b/backend/FwLite/LcmCrdt/LcmCrdtDbContext.cs @@ -1,15 +1,25 @@ -using System.Text.Json; +using System.Data.Common; +using System.Text.Json; +using LcmCrdt.Data; +using Microsoft.Data.Sqlite; using SIL.Harmony; using SIL.Harmony.Db; using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Diagnostics; using Microsoft.EntityFrameworkCore.Storage.ValueConversion; using Microsoft.Extensions.Options; namespace LcmCrdt; -public class LcmCrdtDbContext(DbContextOptions dbContextOptions, IOptions options): DbContext(dbContextOptions), ICrdtDbContext +public class LcmCrdtDbContext(DbContextOptions dbContextOptions, IOptions options, SetupCollationInterceptor setupCollationInterceptor) + : DbContext(dbContextOptions), ICrdtDbContext { public DbSet ProjectData => Set(); + public IQueryable WritingSystems => Set().AsNoTracking(); + protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder) + { + optionsBuilder.AddInterceptors(setupCollationInterceptor); + } protected override void OnModelCreating(ModelBuilder modelBuilder) { diff --git a/backend/FwLite/LcmCrdt/LcmCrdtKernel.cs b/backend/FwLite/LcmCrdt/LcmCrdtKernel.cs index 74562c440..9bf02964f 100644 --- a/backend/FwLite/LcmCrdt/LcmCrdtKernel.cs +++ b/backend/FwLite/LcmCrdt/LcmCrdtKernel.cs @@ -5,6 +5,7 @@ using SIL.Harmony.Changes; using LcmCrdt.Changes; using LcmCrdt.Changes.Entries; +using LcmCrdt.Data; using LcmCrdt.Objects; using LcmCrdt.RemoteSync; using LinqToDB; @@ -27,6 +28,7 @@ public static IServiceCollection AddLcmCrdtClient(this IServiceCollection servic { LinqToDBForEFTools.Initialize(); services.AddMemoryCache(); + services.AddSingleton(); services.AddDbContext(ConfigureDbOptions); services.AddOptions().BindConfiguration("LcmCrdt"); diff --git a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs new file mode 100644 index 000000000..be58f4bee --- /dev/null +++ b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs @@ -0,0 +1,62 @@ +namespace MiniLcm.Tests; + +public abstract class SortingTestsBase : MiniLcmTestBase +{ + public override async Task InitializeAsync() + { + await base.InitializeAsync(); + await Api.CreateWritingSystem(WritingSystemType.Analysis, + new WritingSystem() + { + Id = Guid.NewGuid(), + Type = WritingSystemType.Analysis, + WsId = "en", + Name = "English", + Abbreviation = "En", + Font = "Arial", + Exemplars = [] + }); + await Api.CreateWritingSystem(WritingSystemType.Vernacular, + new WritingSystem() + { + Id = Guid.NewGuid(), + Type = WritingSystemType.Vernacular, + WsId = "en-US", + Name = "English", + Abbreviation = "En", + Font = "Arial", + Exemplars = [] + }); + } + + private Task CreateEntry(string headword) + { + return Api.CreateEntry(new() { LexemeForm = { { "en", headword } }, }); + } + + + // ReSharper disable InconsistentNaming + const string Ru_A= "\u0410"; + const string Ru_a = "\u0430"; + const string Ru_Б= "\u0411"; + const string Ru_б = "\u0431"; + const string Ru_В= "\u0412"; + const string Ru_в = "\u0432"; + // ReSharper restore InconsistentNaming + + [Theory] + [InlineData("aa,ab,ac")] + [InlineData("aa,Ab,ac")] + [InlineData($"{Ru_a}{Ru_a},{Ru_a}{Ru_б},{Ru_a}{Ru_в}")] + [InlineData($"{Ru_a}{Ru_a},{Ru_A}{Ru_б},{Ru_a}{Ru_в}")] + public async Task EntriesAreSorted(string headwords) + { + var headwordList = headwords.Split(','); + foreach (var headword in headwordList.OrderBy(h => Random.Shared.Next())) + { + await CreateEntry(headword); + } + var entries = await Api.GetEntries().Select(e => e.Headword()).ToArrayAsync(); + entries.Should().Equal(headwordList); + } +} From 5d008dae11d84a60af1315b3fcdf2dbc87f728ea Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Fri, 29 Nov 2024 13:13:07 +0700 Subject: [PATCH 2/5] move interceptor into it's own file --- .../LcmCrdt/Data/SetupCollationInterceptor.cs | 124 ++++++++++++++++++ .../LcmCrdt/Data/SqlSortingExtensions.cs | 124 +----------------- 2 files changed, 125 insertions(+), 123 deletions(-) create mode 100644 backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs diff --git a/backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs b/backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs new file mode 100644 index 000000000..0c3fdc971 --- /dev/null +++ b/backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs @@ -0,0 +1,124 @@ +using System.Data.Common; +using System.Globalization; +using Microsoft.Data.Sqlite; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Diagnostics; +using Microsoft.Extensions.Caching.Memory; +using Microsoft.Extensions.Logging; + +namespace LcmCrdt.Data; + +internal class SetupCollationInterceptor(IMemoryCache cache, ILogger logger) : IDbConnectionInterceptor, ISaveChangesInterceptor +{ + private WritingSystem[] GetWritingSystems(LcmCrdtDbContext dbContext, DbConnection connection) + { + //todo this needs to be invalidated when the writing systems change + return cache.GetOrCreate(CacheKey(connection), + entry => + { + entry.SlidingExpiration = TimeSpan.FromMinutes(30); + try + { + + return dbContext.WritingSystems.ToArray(); + } + catch (SqliteException e) + { + return []; + } + }) ?? []; + } + + private static string CacheKey(DbConnection connection) + { + return $"writingSystems|{connection.ConnectionString}"; + } + + private void InvalidateWritingSystemsCache(DbConnection connection) + { + cache.Remove(CacheKey(connection)); + } + + public void ConnectionOpened(DbConnection connection, ConnectionEndEventData eventData) + { + var context = (LcmCrdtDbContext?)eventData.Context; + if (context is null) throw new InvalidOperationException("context is null"); + var sqliteConnection = (SqliteConnection)connection; + SetupCollations(sqliteConnection, GetWritingSystems(context, connection)); + + //setup general use collation + sqliteConnection.CreateCollation(SqlSortingExtensions.CollateUnicodeNoCase, + CultureInfo.CurrentCulture.CompareInfo, + (compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); + } + + public Task ConnectionOpenedAsync(DbConnection connection, + ConnectionEndEventData eventData, + CancellationToken cancellationToken = default) + { + ConnectionOpened(connection, eventData); + return Task.CompletedTask; + } + + public InterceptionResult SavingChanges(DbContextEventData eventData, InterceptionResult result) + { + UpdateCollationsOnSave(eventData.Context); + return result; + } + + public ValueTask> SavingChangesAsync(DbContextEventData eventData, + InterceptionResult result, + CancellationToken cancellationToken = default) + { + UpdateCollationsOnSave(eventData.Context); + return ValueTask.FromResult(result); + } + + private void UpdateCollationsOnSave(DbContext? dbContext) + { + if (dbContext is null) return; + var connection = (SqliteConnection)dbContext.Database.GetDbConnection(); + bool updateWs = false; + foreach (var entityEntry in dbContext.ChangeTracker.Entries()) + { + if (entityEntry.State is EntityState.Added or EntityState.Modified) + { + var writingSystem = entityEntry.Entity; + SetupCollation(connection, writingSystem); + updateWs = true; + } + } + + if (updateWs) + { + InvalidateWritingSystemsCache(connection); + } + } + + private void SetupCollations(SqliteConnection connection, WritingSystem[] writingSystems) + { + foreach (var writingSystem in writingSystems) + { + SetupCollation(connection, writingSystem); + } + } + + private void SetupCollation(SqliteConnection connection, WritingSystem writingSystem) + { + CompareInfo compareInfo; + try + { + //todo use ICU/SLDR instead + compareInfo = CultureInfo.CreateSpecificCulture(writingSystem.WsId.Code).CompareInfo; + } + catch (Exception e) + { + logger.LogError(e, "Failed to create compare info for '{WritingSystemId}'", writingSystem.WsId); + compareInfo = CultureInfo.InvariantCulture.CompareInfo; + } + connection.CreateCollation(SqlSortingExtensions.CollationName(writingSystem), + //todo use custom comparison based on the writing system + compareInfo, + static (compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); + } +} diff --git a/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs b/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs index e58b82071..417745265 100644 --- a/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs +++ b/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs @@ -1,13 +1,6 @@ -using System.Data.Common; -using System.Data.SQLite; -using System.Globalization; +using System.Data.SQLite; using System.Linq.Expressions; using LinqToDB; -using Microsoft.Data.Sqlite; -using Microsoft.EntityFrameworkCore; -using Microsoft.EntityFrameworkCore.Diagnostics; -using Microsoft.Extensions.Caching.Memory; -using Microsoft.Extensions.Logging; using SIL.WritingSystems; namespace LcmCrdt.Data; @@ -35,118 +28,3 @@ internal static string CollationName(WritingSystem ws) return $"NOCASE_WS_{ws.WsId}"; } } - -public class SetupCollationInterceptor(IMemoryCache cache, ILogger logger) : IDbConnectionInterceptor, ISaveChangesInterceptor -{ - private WritingSystem[] GetWritingSystems(LcmCrdtDbContext dbContext, DbConnection connection) - { - //todo this needs to be invalidated when the writing systems change - return cache.GetOrCreate(CacheKey(connection), - entry => - { - entry.SlidingExpiration = TimeSpan.FromMinutes(30); - try - { - - return dbContext.WritingSystems.ToArray(); - } - catch (SqliteException e) - { - return []; - } - }) ?? []; - } - - private static string CacheKey(DbConnection connection) - { - return $"writingSystems|{connection.ConnectionString}"; - } - - private void InvalidateWritingSystemsCache(DbConnection connection) - { - cache.Remove(CacheKey(connection)); - } - - public void ConnectionOpened(DbConnection connection, ConnectionEndEventData eventData) - { - var context = (LcmCrdtDbContext?)eventData.Context; - if (context is null) throw new InvalidOperationException("context is null"); - var sqliteConnection = (SqliteConnection)connection; - SetupCollations(sqliteConnection, GetWritingSystems(context, connection)); - - //setup general use collation - sqliteConnection.CreateCollation(SqlSortingExtensions.CollateUnicodeNoCase, - CultureInfo.CurrentCulture.CompareInfo, - (compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); - } - - public Task ConnectionOpenedAsync(DbConnection connection, - ConnectionEndEventData eventData, - CancellationToken cancellationToken = default) - { - ConnectionOpened(connection, eventData); - return Task.CompletedTask; - } - - public InterceptionResult SavingChanges(DbContextEventData eventData, InterceptionResult result) - { - UpdateCollationsOnSave(eventData.Context); - return result; - } - - public ValueTask> SavingChangesAsync(DbContextEventData eventData, - InterceptionResult result, - CancellationToken cancellationToken = default) - { - UpdateCollationsOnSave(eventData.Context); - return ValueTask.FromResult(result); - } - - private void UpdateCollationsOnSave(DbContext? dbContext) - { - if (dbContext is null) return; - var connection = (SqliteConnection)dbContext.Database.GetDbConnection(); - bool updateWs = false; - foreach (var entityEntry in dbContext.ChangeTracker.Entries()) - { - if (entityEntry.State is EntityState.Added or EntityState.Modified) - { - var writingSystem = entityEntry.Entity; - SetupCollation(connection, writingSystem); - updateWs = true; - } - } - - if (updateWs) - { - InvalidateWritingSystemsCache(connection); - } - } - - private void SetupCollations(SqliteConnection connection, WritingSystem[] writingSystems) - { - foreach (var writingSystem in writingSystems) - { - SetupCollation(connection, writingSystem); - } - } - - private void SetupCollation(SqliteConnection connection, WritingSystem writingSystem) - { - CompareInfo compareInfo; - try - { - //todo use ICU/SLDR instead - compareInfo = CultureInfo.CreateSpecificCulture(writingSystem.WsId.Code).CompareInfo; - } - catch (Exception e) - { - logger.LogError(e, "Failed to create compare info for '{WritingSystemId}'", writingSystem.WsId); - compareInfo = CultureInfo.InvariantCulture.CompareInfo; - } - connection.CreateCollation(SqlSortingExtensions.CollationName(writingSystem), - //todo use custom comparison based on the writing system - compareInfo, - static (compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); - } -} From 3223a256dfb44a4a953d31140b516353ce056fe2 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Fri, 29 Nov 2024 13:47:29 +0700 Subject: [PATCH 3/5] use a span-based comparison overload for custom collation to avoid allocating strings --- .../LcmCrdt/Data/SetupCollationInterceptor.cs | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs b/backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs index 0c3fdc971..1a9a1033a 100644 --- a/backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs +++ b/backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs @@ -1,5 +1,7 @@ -using System.Data.Common; +using System.Data; +using System.Data.Common; using System.Globalization; +using System.Text; using Microsoft.Data.Sqlite; using Microsoft.EntityFrameworkCore; using Microsoft.EntityFrameworkCore.Diagnostics; @@ -8,7 +10,7 @@ namespace LcmCrdt.Data; -internal class SetupCollationInterceptor(IMemoryCache cache, ILogger logger) : IDbConnectionInterceptor, ISaveChangesInterceptor +public class SetupCollationInterceptor(IMemoryCache cache, ILogger logger) : IDbConnectionInterceptor, ISaveChangesInterceptor { private WritingSystem[] GetWritingSystems(LcmCrdtDbContext dbContext, DbConnection connection) { @@ -116,9 +118,35 @@ private void SetupCollation(SqliteConnection connection, WritingSystem writingSy logger.LogError(e, "Failed to create compare info for '{WritingSystemId}'", writingSystem.WsId); compareInfo = CultureInfo.InvariantCulture.CompareInfo; } - connection.CreateCollation(SqlSortingExtensions.CollationName(writingSystem), - //todo use custom comparison based on the writing system + + //todo use custom comparison based on the writing system + CreateSpanCollation(connection, SqlSortingExtensions.CollationName(writingSystem), compareInfo, static (compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); } + + //this is a premature optimization, but it avoids creating strings for each comparison and instead uses spans which avoids allocations + //if the new comparison function does not support spans then we can use SqliteConnection.CreateCollation instead which works with strings + private void CreateSpanCollation(SqliteConnection connection, + string name, T state, + Func, ReadOnlySpan, int> compare) + { + if (connection.State != ConnectionState.Open) + throw new InvalidOperationException("Unable to create custom collation Connection must be open."); + var rc = SQLitePCL.raw.sqlite3__create_collation_utf8(connection.Handle, + name, + Tuple.Create(state, compare), + static (s, x, y) => + { + var (state, compare) = (Tuple, ReadOnlySpan, int>>) s; + Span xSpan = stackalloc char[Encoding.UTF8.GetCharCount(x)]; + Span ySpan = stackalloc char[Encoding.UTF8.GetCharCount(y)]; + Encoding.UTF8.GetChars(x, xSpan); + Encoding.UTF8.GetChars(y, ySpan); + + return compare(state, xSpan, ySpan); + }); + SqliteException.ThrowExceptionForRC(rc, connection.Handle); + + } } From e32d297ae77eb4dc4f82c7de747dff62b657cfc1 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Fri, 29 Nov 2024 14:19:28 +0700 Subject: [PATCH 4/5] convert headword to lowercase when sorting in mongo --- backend/LfClassicData/LfClassicMiniLcmApi.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/LfClassicData/LfClassicMiniLcmApi.cs b/backend/LfClassicData/LfClassicMiniLcmApi.cs index cc7945e01..c8c25c8fa 100644 --- a/backend/LfClassicData/LfClassicMiniLcmApi.cs +++ b/backend/LfClassicData/LfClassicMiniLcmApi.cs @@ -206,7 +206,7 @@ private async IAsyncEnumerable Query(QueryOptions? options = null, string new BsonDocument("$ne", new BsonArray { new BsonDocument("$trim", new BsonDocument("input", $"$citationForm.{sortWs}.value")), "" }), }) }, - { "then", $"$citationForm.{sortWs}.value" }, + { "then", new BsonDocument("$toLower", $"$citationForm.{sortWs}.value") }, { "else", new BsonDocument("$cond", new BsonDocument { { "if", new BsonDocument("$and", new BsonArray @@ -216,7 +216,7 @@ private async IAsyncEnumerable Query(QueryOptions? options = null, string new BsonDocument("$ne", new BsonArray { new BsonDocument("$trim", new BsonDocument("input", $"$lexeme.{sortWs}.value")), "" }), }) }, - { "then", $"$lexeme.{sortWs}.value" }, + { "then", new BsonDocument("$toLower", $"$lexeme.{sortWs}.value") }, { "else", "" } }) } From 2465d7e0cdb73f1e1556026182efaf3762432ef5 Mon Sep 17 00:00:00 2001 From: Kevin Hahn Date: Fri, 29 Nov 2024 22:07:47 +0700 Subject: [PATCH 5/5] Update backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs Co-authored-by: Tim Haasdyk --- backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs b/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs index 417745265..8e5465fa2 100644 --- a/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs +++ b/backend/FwLite/LcmCrdt/Data/SqlSortingExtensions.cs @@ -13,7 +13,7 @@ public static class SqlSortingExtensions internal static string CollateUnicode(this string value, WritingSystem ws) { //could optionally just return the value here, but it would work differently than sql - throw new InvalidOperationException("CollateUnicode is server-side only API."); + throw new InvalidOperationException("CollateUnicode is a LinqToDB only API."); } private static Expression> CollateUnicodeExpression()