-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
* define custom collations per writing system based on wsId and use for sorting * use a span-based comparison overload for custom collation to avoid allocating strings * convert headword to lowercase when sorting in mongo --------- Co-authored-by: Tim Haasdyk <[email protected]>
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
using FwDataMiniLcmBridge.Tests.Fixtures; | ||
|
||
namespace FwDataMiniLcmBridge.Tests.MiniLcmTests; | ||
|
||
[Collection(ProjectLoaderFixture.Name)] | ||
public class SortingTests(ProjectLoaderFixture fixture) : SortingTestsBase | ||
{ | ||
protected override Task<IMiniLcmApi> NewApi() | ||
{ | ||
return Task.FromResult<IMiniLcmApi>(fixture.NewProjectApi("sorting-test", "en", "en")); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
namespace LcmCrdt.Tests.MiniLcmTests; | ||
|
||
public class SortingTests : SortingTestsBase | ||
{ | ||
private readonly MiniLcmApiFixture _fixture = new(); | ||
|
||
protected override async Task<IMiniLcmApi> NewApi() | ||
{ | ||
await _fixture.InitializeAsync(); | ||
var api = _fixture.Api; | ||
return api; | ||
} | ||
|
||
public override async Task DisposeAsync() | ||
{ | ||
await base.DisposeAsync(); | ||
await _fixture.DisposeAsync(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
using System.Data; | ||
using System.Data.Common; | ||
using System.Globalization; | ||
using System.Text; | ||
using Microsoft.Data.Sqlite; | ||
using Microsoft.EntityFrameworkCore; | ||
using Microsoft.EntityFrameworkCore.Diagnostics; | ||
using Microsoft.Extensions.Caching.Memory; | ||
using Microsoft.Extensions.Logging; | ||
|
||
namespace LcmCrdt.Data; | ||
|
||
public class SetupCollationInterceptor(IMemoryCache cache, ILogger<SetupCollationInterceptor> logger) : IDbConnectionInterceptor, ISaveChangesInterceptor | ||
{ | ||
private WritingSystem[] GetWritingSystems(LcmCrdtDbContext dbContext, DbConnection connection) | ||
{ | ||
//todo this needs to be invalidated when the writing systems change | ||
return cache.GetOrCreate(CacheKey(connection), | ||
entry => | ||
{ | ||
entry.SlidingExpiration = TimeSpan.FromMinutes(30); | ||
try | ||
{ | ||
|
||
return dbContext.WritingSystems.ToArray(); | ||
} | ||
catch (SqliteException e) | ||
Check warning on line 27 in backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs GitHub Actions / Build FwHeadless / publish-fw-headless
Check warning on line 27 in backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs GitHub Actions / Build FW Lite and run tests
Check warning on line 27 in backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs GitHub Actions / Publish FW Lite app for Linux
Check warning on line 27 in backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs GitHub Actions / Publish FW Lite app for Linux
Check warning on line 27 in backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs GitHub Actions / Publish FW Lite app for Mac
Check warning on line 27 in backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs GitHub Actions / Publish FW Lite app for Mac
Check warning on line 27 in backend/FwLite/LcmCrdt/Data/SetupCollationInterceptor.cs GitHub Actions / Publish FW Lite app for Windows
|
||
{ | ||
return []; | ||
} | ||
}) ?? []; | ||
} | ||
|
||
private static string CacheKey(DbConnection connection) | ||
{ | ||
return $"writingSystems|{connection.ConnectionString}"; | ||
} | ||
|
||
private void InvalidateWritingSystemsCache(DbConnection connection) | ||
{ | ||
cache.Remove(CacheKey(connection)); | ||
} | ||
|
||
public void ConnectionOpened(DbConnection connection, ConnectionEndEventData eventData) | ||
{ | ||
var context = (LcmCrdtDbContext?)eventData.Context; | ||
if (context is null) throw new InvalidOperationException("context is null"); | ||
var sqliteConnection = (SqliteConnection)connection; | ||
SetupCollations(sqliteConnection, GetWritingSystems(context, connection)); | ||
|
||
//setup general use collation | ||
sqliteConnection.CreateCollation(SqlSortingExtensions.CollateUnicodeNoCase, | ||
CultureInfo.CurrentCulture.CompareInfo, | ||
(compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); | ||
} | ||
|
||
public Task ConnectionOpenedAsync(DbConnection connection, | ||
ConnectionEndEventData eventData, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
ConnectionOpened(connection, eventData); | ||
return Task.CompletedTask; | ||
} | ||
|
||
public InterceptionResult<int> SavingChanges(DbContextEventData eventData, InterceptionResult<int> result) | ||
{ | ||
UpdateCollationsOnSave(eventData.Context); | ||
return result; | ||
} | ||
|
||
public ValueTask<InterceptionResult<int>> SavingChangesAsync(DbContextEventData eventData, | ||
InterceptionResult<int> result, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
UpdateCollationsOnSave(eventData.Context); | ||
return ValueTask.FromResult(result); | ||
} | ||
|
||
private void UpdateCollationsOnSave(DbContext? dbContext) | ||
{ | ||
if (dbContext is null) return; | ||
var connection = (SqliteConnection)dbContext.Database.GetDbConnection(); | ||
bool updateWs = false; | ||
foreach (var entityEntry in dbContext.ChangeTracker.Entries<WritingSystem>()) | ||
{ | ||
if (entityEntry.State is EntityState.Added or EntityState.Modified) | ||
{ | ||
var writingSystem = entityEntry.Entity; | ||
SetupCollation(connection, writingSystem); | ||
updateWs = true; | ||
} | ||
} | ||
|
||
if (updateWs) | ||
{ | ||
InvalidateWritingSystemsCache(connection); | ||
} | ||
} | ||
|
||
private void SetupCollations(SqliteConnection connection, WritingSystem[] writingSystems) | ||
{ | ||
foreach (var writingSystem in writingSystems) | ||
{ | ||
SetupCollation(connection, writingSystem); | ||
} | ||
} | ||
|
||
private void SetupCollation(SqliteConnection connection, WritingSystem writingSystem) | ||
{ | ||
CompareInfo compareInfo; | ||
try | ||
{ | ||
//todo use ICU/SLDR instead | ||
compareInfo = CultureInfo.CreateSpecificCulture(writingSystem.WsId.Code).CompareInfo; | ||
} | ||
catch (Exception e) | ||
{ | ||
logger.LogError(e, "Failed to create compare info for '{WritingSystemId}'", writingSystem.WsId); | ||
compareInfo = CultureInfo.InvariantCulture.CompareInfo; | ||
} | ||
|
||
//todo use custom comparison based on the writing system | ||
CreateSpanCollation(connection, SqlSortingExtensions.CollationName(writingSystem), | ||
compareInfo, | ||
static (compareInfo, x, y) => compareInfo.Compare(x, y, CompareOptions.IgnoreCase)); | ||
} | ||
|
||
//this is a premature optimization, but it avoids creating strings for each comparison and instead uses spans which avoids allocations | ||
//if the new comparison function does not support spans then we can use SqliteConnection.CreateCollation instead which works with strings | ||
private void CreateSpanCollation<T>(SqliteConnection connection, | ||
string name, T state, | ||
Func<T, ReadOnlySpan<char>, ReadOnlySpan<char>, int> compare) | ||
{ | ||
if (connection.State != ConnectionState.Open) | ||
throw new InvalidOperationException("Unable to create custom collation Connection must be open."); | ||
var rc = SQLitePCL.raw.sqlite3__create_collation_utf8(connection.Handle, | ||
name, | ||
Tuple.Create(state, compare), | ||
static (s, x, y) => | ||
{ | ||
var (state, compare) = (Tuple<T, Func<T, ReadOnlySpan<char>, ReadOnlySpan<char>, int>>) s; | ||
Span<char> xSpan = stackalloc char[Encoding.UTF8.GetCharCount(x)]; | ||
Span<char> ySpan = stackalloc char[Encoding.UTF8.GetCharCount(y)]; | ||
Encoding.UTF8.GetChars(x, xSpan); | ||
Encoding.UTF8.GetChars(y, ySpan); | ||
|
||
return compare(state, xSpan, ySpan); | ||
}); | ||
SqliteException.ThrowExceptionForRC(rc, connection.Handle); | ||
|
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
using System.Data.SQLite; | ||
using System.Linq.Expressions; | ||
using LinqToDB; | ||
using SIL.WritingSystems; | ||
|
||
namespace LcmCrdt.Data; | ||
|
||
public static class SqlSortingExtensions | ||
{ | ||
public const string CollateUnicodeNoCase = "NOCASE_UNICODE"; | ||
|
||
[ExpressionMethod(nameof(CollateUnicodeExpression))] | ||
internal static string CollateUnicode(this string value, WritingSystem ws) | ||
{ | ||
//could optionally just return the value here, but it would work differently than sql | ||
throw new InvalidOperationException("CollateUnicode is a LinqToDB only API."); | ||
} | ||
|
||
private static Expression<Func<string, WritingSystem, string>> CollateUnicodeExpression() | ||
{ | ||
//todo maybe in the future we use a custom collation based on the writing system | ||
return (s, ws) => s.Collate(CollationName(ws)); | ||
} | ||
|
||
internal static string CollationName(WritingSystem ws) | ||
{ | ||
//don't use ':' in the name, it won't work | ||
return $"NOCASE_WS_{ws.WsId}"; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
namespace MiniLcm.Tests; | ||
|
||
public abstract class SortingTestsBase : MiniLcmTestBase | ||
{ | ||
public override async Task InitializeAsync() | ||
{ | ||
await base.InitializeAsync(); | ||
await Api.CreateWritingSystem(WritingSystemType.Analysis, | ||
new WritingSystem() | ||
{ | ||
Id = Guid.NewGuid(), | ||
Type = WritingSystemType.Analysis, | ||
WsId = "en", | ||
Name = "English", | ||
Abbreviation = "En", | ||
Font = "Arial", | ||
Exemplars = [] | ||
}); | ||
await Api.CreateWritingSystem(WritingSystemType.Vernacular, | ||
new WritingSystem() | ||
{ | ||
Id = Guid.NewGuid(), | ||
Type = WritingSystemType.Vernacular, | ||
WsId = "en-US", | ||
Name = "English", | ||
Abbreviation = "En", | ||
Font = "Arial", | ||
Exemplars = [] | ||
}); | ||
} | ||
|
||
private Task CreateEntry(string headword) | ||
{ | ||
return Api.CreateEntry(new() { LexemeForm = { { "en", headword } }, }); | ||
} | ||
|
||
|
||
// ReSharper disable InconsistentNaming | ||
const string Ru_A= "\u0410"; | ||
const string Ru_a = "\u0430"; | ||
const string Ru_Б= "\u0411"; | ||
const string Ru_б = "\u0431"; | ||
const string Ru_В= "\u0412"; | ||
const string Ru_в = "\u0432"; | ||
// ReSharper restore InconsistentNaming | ||
|
||
[Theory] | ||
[InlineData("aa,ab,ac")] | ||
[InlineData("aa,Ab,ac")] | ||
[InlineData($"{Ru_a}{Ru_a},{Ru_a}{Ru_б},{Ru_a}{Ru_в}")] | ||
[InlineData($"{Ru_a}{Ru_a},{Ru_A}{Ru_б},{Ru_a}{Ru_в}")] | ||
public async Task EntriesAreSorted(string headwords) | ||
{ | ||
var headwordList = headwords.Split(','); | ||
foreach (var headword in headwordList.OrderBy(h => Random.Shared.Next())) | ||
{ | ||
await CreateEntry(headword); | ||
} | ||
var entries = await Api.GetEntries().Select(e => e.Headword()).ToArrayAsync(); | ||
entries.Should().Equal(headwordList); | ||
} | ||
} |