From b3b568ddf7eb05b5dde3715bfb6c97147864f94d Mon Sep 17 00:00:00 2001 From: Matt Lyons Date: Mon, 7 Oct 2024 14:06:19 -0500 Subject: [PATCH] Fix problems with check result offset calculations and filtering (#1198) --- c-sharp-tests/Checks/CheckRunResultTests.cs | 4 +- .../ParatextUtils/UsfnBookIndexerTests.cs | 103 +++++++++++ .../Projects/LocalParatextProjectsTests.cs | 1 - c-sharp/Checks/CheckLocation.cs | 4 +- c-sharp/Checks/CheckResultsRecorder.cs | 108 +++++++---- c-sharp/Checks/CheckRunResult.cs | 21 ++- c-sharp/Checks/CheckRunner.cs | 115 ++++++++---- c-sharp/ParatextUtils/UsfmBookIndexer.cs | 173 ++++++++++++++++++ .../src/checking-results-list.web-view.tsx | 1 + 9 files changed, 447 insertions(+), 83 deletions(-) create mode 100644 c-sharp-tests/ParatextUtils/UsfnBookIndexerTests.cs create mode 100644 c-sharp/ParatextUtils/UsfmBookIndexer.cs diff --git a/c-sharp-tests/Checks/CheckRunResultTests.cs b/c-sharp-tests/Checks/CheckRunResultTests.cs index 131be63e88..318e1d9ec0 100644 --- a/c-sharp-tests/Checks/CheckRunResultTests.cs +++ b/c-sharp-tests/Checks/CheckRunResultTests.cs @@ -28,13 +28,13 @@ bool expectedResult CheckLocation start1 = new(vrefStart1, 1); VerseRef vrefEnd1 = new("GEN 1:2"); CheckLocation end1 = new(vrefEnd1, 5); - CheckRunResult checkRunResult1 = new("checkId", "projectId", "message", start1, end1); + CheckRunResult checkRunResult1 = new("checkId", "projectId", "message", "", start1, end1); VerseRef vrefStart2 = new(verseRefStart2); CheckLocation start2 = new(vrefStart2, offsetStart2); VerseRef vrefEnd2 = new(verseRefEnd2); CheckLocation end2 = new(vrefEnd2, offsetEnd2); - CheckRunResult checkRunResult2 = new(checkId2, projectId2, message2, start2, end2); + CheckRunResult checkRunResult2 = new(checkId2, projectId2, message2, "", start2, end2); Assert.That(checkRunResult1 == checkRunResult2, Is.EqualTo(expectedResult)); Assert.That(checkRunResult1.Equals(checkRunResult2), Is.EqualTo(expectedResult)); diff --git a/c-sharp-tests/ParatextUtils/UsfnBookIndexerTests.cs b/c-sharp-tests/ParatextUtils/UsfnBookIndexerTests.cs new file mode 100644 index 0000000000..6b1cff5d6e --- /dev/null +++ b/c-sharp-tests/ParatextUtils/UsfnBookIndexerTests.cs @@ -0,0 +1,103 @@ +using Paranext.DataProvider.ParatextUtils; + +namespace ParatextUtils.Tests; + +[TestFixture] +public class UsfmBookIndexerTests +{ + private static readonly string BOOK_CONTENT = """ +\id PHM Philemon +\h Philemon +\toc1 Paul's Letter to Philemon +\c 1 +\p +\v 1 Verse 1 +\p +\v 2 Verse 2 +\v 3 Verse 3 +\c 2 +\v 1 Another verse 1 +\v 2 Another verse 2 +"""; + + private UsfmBookIndexer _indexer; + + [SetUp] + public void Setup() + { + _indexer = new UsfmBookIndexer(BOOK_CONTENT); + } + + [TestCase(1, 0, 0)] + [TestCase(1, 1, 69)] + [TestCase(1, 2, 85)] + [TestCase(1, 3, 98)] + [TestCase(2, 0, 111)] + [TestCase(2, 1, 116)] + [TestCase(2, 2, 137)] + public void GetIndex_NormalInput_ReturnsNormalValues( + int chapterNum, + int verseNum, + int? expectedIndex + ) + { + var result = _indexer.GetIndex(chapterNum, verseNum); + Assert.That(result, Is.Not.Null); + Assert.That(result, Is.EqualTo(expectedIndex)); + } + + [TestCase(1, 4)] + [TestCase(2, 3)] + [TestCase(3, 1)] + [TestCase(999999, 1)] + public void GetIndex_NormalInput_ReturnsNullValues(int chapterNum, int verseNum) + { + var result = _indexer.GetIndex(chapterNum, verseNum); + Assert.That(result, Is.Null); + } + + [TestCase(0, 1)] + [TestCase(1, -1)] + [TestCase(-1, 1)] + [TestCase(-1, -1)] + public void GetIndex_InvalidInput_Throws(int chapterNum, int verseNum) + { + Assert.Throws(() => _indexer.GetIndex(chapterNum, verseNum)); + } + + [TestCase(1, 0, 69)] + [TestCase(1, 1, 85)] + [TestCase(1, 2, 98)] + [TestCase(1, 3, 111)] + [TestCase(1, 999, 111)] + [TestCase(2, 0, 116)] + [TestCase(2, 1, 137)] + public void GetIndexFollowing_NormalInput_ReturnsNormalValues( + int chapterNum, + int verseNum, + int? expectedIndex + ) + { + var result = _indexer.GetIndexFollowing(chapterNum, verseNum); + Assert.That(result, Is.Not.Null); + Assert.That(result, Is.EqualTo(expectedIndex)); + } + + [TestCase(2, 2)] + public void GetIndexFollowing_NormalInput_ReturnsNullValues(int chapterNum, int verseNum) + { + var result = _indexer.GetIndexFollowing(chapterNum, verseNum); + Assert.That(result, Is.Null); + } + + [TestCase(0, 1)] + [TestCase(1, -1)] + [TestCase(-1, 1)] + [TestCase(-1, -1)] + public void GetIndexFollowing_InvalidInput_Throws(int chapterNum, int verseNum) + { + Assert.Throws( + () => _indexer.GetIndexFollowing(chapterNum, verseNum) + ); + } +} diff --git a/c-sharp-tests/Projects/LocalParatextProjectsTests.cs b/c-sharp-tests/Projects/LocalParatextProjectsTests.cs index aa96f23d50..929202adb1 100644 --- a/c-sharp-tests/Projects/LocalParatextProjectsTests.cs +++ b/c-sharp-tests/Projects/LocalParatextProjectsTests.cs @@ -1,5 +1,4 @@ using System.Diagnostics.CodeAnalysis; -using Paranext.DataProvider.ParatextUtils; using Paranext.DataProvider.Projects; namespace TestParanextDataProvider.Projects; diff --git a/c-sharp/Checks/CheckLocation.cs b/c-sharp/Checks/CheckLocation.cs index d6b0b9aed1..54ebbc23b3 100644 --- a/c-sharp/Checks/CheckLocation.cs +++ b/c-sharp/Checks/CheckLocation.cs @@ -17,10 +17,10 @@ namespace Paranext.DataProvider.Checks; /// nothing about USJ and JsonPath, so we can only create /// representations in this class. See the TypeScript check data types for more details. /// -public sealed class CheckLocation(VerseRef verseRef, int offset): IEquatable +public sealed class CheckLocation(VerseRef verseRef, int offset) : IEquatable { public VerseRef VerseRef { get; } = verseRef; - public int Offset { get; } = offset; + public int Offset { get; set; } = offset; public override bool Equals(object? obj) { diff --git a/c-sharp/Checks/CheckResultsRecorder.cs b/c-sharp/Checks/CheckResultsRecorder.cs index 03290e5274..8ce8a8de08 100644 --- a/c-sharp/Checks/CheckResultsRecorder.cs +++ b/c-sharp/Checks/CheckResultsRecorder.cs @@ -1,3 +1,4 @@ +using Paranext.DataProvider.ParatextUtils; using Paratext.Checks; using Paratext.Data; using Paratext.Data.Checking; @@ -15,7 +16,7 @@ namespace Paranext.DataProvider.Checks; /// public sealed class CheckResultsRecorder(string checkId, string projectId) : IRecordCheckError { - public int CurrentBookNumber { get; set; } = 0; + public List CheckRunResults { get; } = []; public void RecordError( ITextToken token, @@ -27,16 +28,18 @@ public void RecordError( VerseListItemType type = VerseListItemType.Error ) { - var chapterVerse = token.ScrRefString.Split(":"); - int chapterNumber = int.Parse(chapterVerse[0]); - int verseNumber = GetVerseNumber(chapterVerse[1]); - VerseRef verseRef = new (CurrentBookNumber, chapterNumber, verseNumber); - CheckRunResults.Add(new CheckRunResult( - checkId, - projectId, - message, - new CheckLocation(verseRef, offset), - new CheckLocation(verseRef, offset + length))); + CheckRunResults.Add( + new CheckRunResult( + checkId, + projectId, + message, + // ParatextData adds a space at the end sometimes that isn't in the text + token.Text.TrimEnd(), + // Actual offsets will be calculated below after results have been filtered + new CheckLocation(token.VerseRef, offset), + new CheckLocation(token.VerseRef, 0) + ) + ); } public void RecordError( @@ -49,34 +52,77 @@ public void RecordError( VerseListItemType type = VerseListItemType.Error ) { - CheckRunResults.Add(new CheckRunResult( - checkId, - projectId, - message, - new CheckLocation(vref, selectionStart), - new CheckLocation(vref, selectionStart + text.Length))); + CheckRunResults.Add( + new CheckRunResult( + checkId, + projectId, + message, + // ParatextData adds a space at the end sometimes that isn't in the text + text.TrimEnd(), + // Actual offsets will be calculated below after results have been filtered + new CheckLocation(vref, selectionStart), + new CheckLocation(vref, 0) + ) + ); } - public List CheckRunResults { get; } = []; - - private static int GetVerseNumber(string verseNumber) + /// + /// Remove all results that are within the given book and return them + /// + /// All results that were removed + public List TrimResultsFromBook(int bookNum) { - ArgumentException.ThrowIfNullOrWhiteSpace(verseNumber); - if (!IsDigit(verseNumber[0])) - throw new ArgumentException($"verseNumber must start with an integer: {verseNumber}"); + var retVal = new List(); + for (int i = CheckRunResults.Count - 1; i >= 0; i--) + { + var result = CheckRunResults[i]; + var verseRef = result.Start.VerseRef; + if (verseRef.BookNum == bookNum) + { + retVal.Add(result); + CheckRunResults.RemoveAt(i); + } + } + return retVal; + } - int lastIndex = 1; - while (lastIndex < verseNumber.Length) + /// + /// Remove all results that are not within the given range + /// + public void FilterResults(CheckInputRange range) + { + for (int i = CheckRunResults.Count - 1; i >= 0; i--) { - if (!IsDigit(verseNumber[lastIndex])) - break; - lastIndex++; + var result = CheckRunResults[i]; + var verseRef = result.Start.VerseRef; + if (!range.IsWithinRange(result.ProjectId, verseRef.BookNum, verseRef.ChapterNum)) + CheckRunResults.RemoveAt(i); } - return int.Parse(verseNumber[..lastIndex]); } - private static bool IsDigit(char c) + /// + /// Given an indexed view of USFM text, determine the actual offsets to include for each result + /// + public void CalculateActualOffsets(UsfmBookIndexer indexer) { - return c >= '0' && c <= '9'; + foreach (var result in CheckRunResults) + { + var verseIndex = indexer.GetIndex(result.Start.VerseRef); + if (!verseIndex.HasValue) + { + result.Start.Offset = 0; + continue; + } + + var textIndex = indexer.Usfm.IndexOf(result.Text, verseIndex.Value); + if (textIndex < 0) + { + result.Start.Offset = 0; + continue; + } + + result.Start.Offset += textIndex - verseIndex.Value; + result.End.Offset = result.Start.Offset + result.Text.Length; + } } } diff --git a/c-sharp/Checks/CheckRunResult.cs b/c-sharp/Checks/CheckRunResult.cs index dbf82278ed..f50617413f 100644 --- a/c-sharp/Checks/CheckRunResult.cs +++ b/c-sharp/Checks/CheckRunResult.cs @@ -1,3 +1,5 @@ +using System.Text.Json.Serialization; + namespace Paranext.DataProvider.Checks; /// @@ -8,13 +10,17 @@ public sealed class CheckRunResult( string checkId, string projectId, string messageFormatString, + string text, CheckLocation start, - CheckLocation end) - : IEquatable + CheckLocation end +) : IEquatable { public string CheckId { get; } = checkId; public string ProjectId { get; } = projectId; public string MessageFormatString { get; } = messageFormatString; + + [JsonIgnore] + public string Text { get; } = text; public CheckLocation Start { get; } = start; public CheckLocation End { get; } = end; @@ -29,10 +35,11 @@ public bool Equals(CheckRunResult? other) return false; return CheckId == other.CheckId - && ProjectId == other.ProjectId - && MessageFormatString == other.MessageFormatString - && Start == other.Start - && End == other.End; + && ProjectId == other.ProjectId + && MessageFormatString == other.MessageFormatString + && Text == other.Text + && Start == other.Start + && End == other.End; } public static bool operator ==(CheckRunResult a, CheckRunResult b) @@ -47,6 +54,6 @@ public bool Equals(CheckRunResult? other) public override int GetHashCode() { - return HashCode.Combine(CheckId, ProjectId, MessageFormatString, Start, End); + return HashCode.Combine(CheckId, ProjectId, MessageFormatString, Text, Start, End); } } diff --git a/c-sharp/Checks/CheckRunner.cs b/c-sharp/Checks/CheckRunner.cs index 4a27a1df66..1374db9ae9 100644 --- a/c-sharp/Checks/CheckRunner.cs +++ b/c-sharp/Checks/CheckRunner.cs @@ -3,6 +3,7 @@ using Paranext.DataProvider.JsonUtils; using Paranext.DataProvider.MessageHandlers; using Paranext.DataProvider.MessageTransports; +using Paranext.DataProvider.ParatextUtils; using Paranext.DataProvider.Projects; using Paratext.Checks; using Paratext.Data; @@ -23,7 +24,7 @@ internal class CheckRunner(PapiClient papiClient) private sealed class CheckForProject(ScriptureCheckBase check, string checkId, string projectId) { public ScriptureCheckBase Check { get; } = check; - public CheckResultsRecorder ResultsRecorder { get; } = new (checkId, projectId); + public CheckResultsRecorder ResultsRecorder { get; } = new(checkId, projectId); } #endregion @@ -36,25 +37,30 @@ private sealed class CheckForProject(ScriptureCheckBase check, string checkId, s // Note that CheckType.Schema is not available outside Paratext itself due to dependencies // It cannot be easily copied, either, without some refactoring - private readonly Dictionary _checkDetailsByCheckId = new() - { - { CheckType.Capitalization.InternalValue, new (CheckType.Capitalization) }, - { CheckType.ChapterVerse.InternalValue, new (CheckType.ChapterVerse) }, - { CheckType.Character.InternalValue, new (CheckType.Character) }, - { CheckType.Marker.InternalValue, new (CheckType.Marker) }, - { CheckType.MatchedPairs.InternalValue, new (CheckType.MatchedPairs) }, - { CheckType.Numbers.InternalValue, new (CheckType.Numbers) }, - { CheckType.ParagraphFinalPunctuation.InternalValue, new (CheckType.ParagraphFinalPunctuation) }, - { CheckType.Punctuation.InternalValue, new (CheckType.Punctuation) }, - { CheckType.Quotation.InternalValue, new (CheckType.Quotation) }, - { CheckType.QuotationTypes.InternalValue, new (CheckType.QuotationTypes) }, - { CheckType.QuotedText.InternalValue, new (CheckType.QuotedText) }, - { CheckType.Reference.InternalValue, new (CheckType.Reference) }, - { CheckType.RepeatedWord.InternalValue, new (CheckType.RepeatedWord) }, - }; + private readonly Dictionary _checkDetailsByCheckId = + new() + { + { CheckType.Capitalization.InternalValue, new(CheckType.Capitalization) }, + { CheckType.ChapterVerse.InternalValue, new(CheckType.ChapterVerse) }, + { CheckType.Character.InternalValue, new(CheckType.Character) }, + { CheckType.Marker.InternalValue, new(CheckType.Marker) }, + { CheckType.MatchedPairs.InternalValue, new(CheckType.MatchedPairs) }, + { CheckType.Numbers.InternalValue, new(CheckType.Numbers) }, + { + CheckType.ParagraphFinalPunctuation.InternalValue, + new(CheckType.ParagraphFinalPunctuation) + }, + { CheckType.Punctuation.InternalValue, new(CheckType.Punctuation) }, + { CheckType.Quotation.InternalValue, new(CheckType.Quotation) }, + { CheckType.QuotationTypes.InternalValue, new(CheckType.QuotationTypes) }, + { CheckType.QuotedText.InternalValue, new(CheckType.QuotedText) }, + { CheckType.Reference.InternalValue, new(CheckType.Reference) }, + { CheckType.RepeatedWord.InternalValue, new(CheckType.RepeatedWord) }, + }; private CheckInputRange[] _activeRanges = []; private readonly Dictionary _dataSourcesByProjectId = []; - private readonly Dictionary<(string checkId, string projectId), CheckForProject> _checksByIds = []; + private readonly Dictionary<(string checkId, string projectId), CheckForProject> _checksByIds = + []; private readonly object _dataProviderLock = new(); #endregion @@ -88,12 +94,23 @@ protected override ResponseToRequest HandleRequest(string functionName, JsonArra return functionName switch { - "disableCheck" => DisableCheck(args[0].Deserialize() ?? "", args.Count > 1 ? args[1].Deserialize() : null), - "enableCheck" => EnableCheck(args[0].Deserialize() ?? "", args[1].Deserialize() ?? ""), + "disableCheck" + => DisableCheck( + args[0].Deserialize() ?? "", + args.Count > 1 ? args[1].Deserialize() : null + ), + "enableCheck" + => EnableCheck( + args[0].Deserialize() ?? "", + args[1].Deserialize() ?? "" + ), "getActiveRanges" => GetActiveRanges(), "getAvailableChecks" => GetAvailableChecks(), "getCheckResults" => GetCheckResults(), - "setActiveRanges" => SetActiveRanges(CheckInputRangeConverter.CreateCheckInputRangeArray(args[1])), + "setActiveRanges" + => SetActiveRanges( + CheckInputRangeConverter.CreateCheckInputRangeArray(args[1]) + ), _ => ResponseToRequest.Failed($"Unknown function: {functionName}"), }; } @@ -105,7 +122,9 @@ protected override ResponseToRequest HandleRequest(string functionName, JsonArra private ResponseToRequest GetAvailableChecks() { - return ResponseToRequest.Succeeded(new List(_checkDetailsByCheckId.Values)); + return ResponseToRequest.Succeeded( + new List(_checkDetailsByCheckId.Values) + ); } private ResponseToRequest GetActiveRanges() @@ -123,9 +142,9 @@ private ResponseToRequest SetActiveRanges(CheckInputRange[]? ranges) throw new ArgumentException("Ranges cannot span between books"); } - foreach(var projectId in _activeRanges.Select(range => range.ProjectId).Distinct()) + foreach (var projectId in _activeRanges.Select(range => range.ProjectId).Distinct()) GetOrCreateDataSource(projectId).ScrText.TextChanged -= RerunChecks; - foreach(var projectId in ranges.Select(range => range.ProjectId).Distinct()) + foreach (var projectId in ranges.Select(range => range.ProjectId).Distinct()) GetOrCreateDataSource(projectId).ScrText.TextChanged += RerunChecks; _activeRanges = ranges; @@ -136,7 +155,7 @@ private ResponseToRequest SetActiveRanges(CheckInputRange[]? ranges) notifyOfUpdatedCheckResults |= producedNewOrDifferentResults; } - List updateEvents = [ DATA_TYPE_ACTIVE_RANGES ]; + List updateEvents = [DATA_TYPE_ACTIVE_RANGES]; if (notifyOfUpdatedCheckResults) updateEvents.Add(DATA_TYPE_CHECK_RESULTS); @@ -190,7 +209,7 @@ private ResponseToRequest DisableCheck(string checkId, string? projectId) { ArgumentException.ThrowIfNullOrEmpty(checkId); - List updateEvents = [ DATA_TYPE_AVAILABLE_CHECKS ]; + List updateEvents = [DATA_TYPE_AVAILABLE_CHECKS]; var checkDetails = _checkDetailsByCheckId[checkId]; if (string.IsNullOrEmpty(projectId)) @@ -213,9 +232,11 @@ private ResponseToRequest DisableCheck(string checkId, string? projectId) var projectIds = checkDetails.EnabledProjectIds; int resultsCount = _checksByIds[(checkId, projectId)].ResultsRecorder.CheckRunResults.Count; _checksByIds.Remove((checkId, projectId)); - Console.WriteLine(projectIds.Remove(projectId) - ? $"Disabled check {checkId} for project {projectId}" - : $"Project {projectId} was not enabled for check {checkId}"); + Console.WriteLine( + projectIds.Remove(projectId) + ? $"Disabled check {checkId} for project {projectId}" + : $"Project {projectId} was not enabled for check {checkId}" + ); if (resultsCount > 0) updateEvents.Add(DATA_TYPE_CHECK_RESULTS); @@ -228,7 +249,9 @@ private void RerunChecks(object? sender, TextChangedEventArgs e) var projectId = _dataSourcesByProjectId.First((kvp) => kvp.Value.ScrText == e.ScrText).Key; if (string.IsNullOrEmpty(projectId)) { - Console.WriteLine($"Attempted to run checks on project {e.ScrText.Guid} but couldn't find its project ID"); + Console.WriteLine( + $"Attempted to run checks on project {e.ScrText.Guid} but couldn't find its project ID" + ); return; } @@ -252,8 +275,8 @@ private bool RunChecksForProject(string projectId) // Text has to be tokenized for the checks before the checks can run var enabledChecksForProject = _checksByIds - .Where((kvp) => kvp.Key.projectId == projectId) - .Select((kvp) => kvp.Value.Check); + .Where((kvp) => kvp.Key.projectId == projectId) + .Select((kvp) => kvp.Value.Check); CheckDataFormat neededDataFormat = 0; foreach (var check in enabledChecksForProject) neededDataFormat |= check.NeededFormat; @@ -264,12 +287,19 @@ private bool RunChecksForProject(string projectId) // "0" chapter number means all chapters _dataSourcesByProjectId[projectId].GetText(range.Start.BookNum, 0, neededDataFormat); - foreach (var checkId in _checkDetailsByCheckId.Values - .Where((checkDetails) => checkDetails.EnabledProjectIds.Contains(projectId)) - .Select((checkDetails) => checkDetails.CheckId)) + var scrText = LocalParatextProjects.GetParatextProject(projectId); + var indexer = new UsfmBookIndexer(scrText.GetText(range.Start.BookNum)); + + foreach ( + var checkId in _checkDetailsByCheckId + .Values.Where( + (checkDetails) => checkDetails.EnabledProjectIds.Contains(projectId) + ) + .Select((checkDetails) => checkDetails.CheckId) + ) { var check = _checksByIds[(checkId, projectId)].Check; - bool newResultsReturned = RunCheck(checkId, check, range); + bool newResultsReturned = RunCheck(checkId, check, range, indexer); retVal |= newResultsReturned; } } @@ -279,7 +309,12 @@ private bool RunChecksForProject(string projectId) /// /// Returns true if the check produced any new or different results, false otherwise. /// - private bool RunCheck(string checkId, ScriptureCheckBase check, CheckInputRange range) + private bool RunCheck( + string checkId, + ScriptureCheckBase check, + CheckInputRange range, + UsfmBookIndexer indexer + ) { CheckResultsRecorder recorder; if (!_checksByIds.TryGetValue((checkId, range.ProjectId), out var data)) @@ -291,11 +326,11 @@ private bool RunCheck(string checkId, ScriptureCheckBase check, CheckInputRange else recorder = data.ResultsRecorder; - var removedItems = recorder.CheckRunResults.FindAll((result) => result.Start.VerseRef.BookNum == range.Start.BookNum); - recorder.CheckRunResults.RemoveAll((result) => result.Start.VerseRef.BookNum == range.Start.BookNum); + var removedItems = recorder.TrimResultsFromBook(range.Start.BookNum); int totalBeforeRunning = recorder.CheckRunResults.Count; - recorder.CurrentBookNumber = range.Start.BookNum; check.Run(range.Start.BookNum, GetOrCreateDataSource(range.ProjectId), recorder); + recorder.FilterResults(range); + recorder.CalculateActualOffsets(indexer); int totalAfterRunning = recorder.CheckRunResults.Count; if (totalAfterRunning == totalBeforeRunning) diff --git a/c-sharp/ParatextUtils/UsfmBookIndexer.cs b/c-sharp/ParatextUtils/UsfmBookIndexer.cs new file mode 100644 index 0000000000..26ca47b97e --- /dev/null +++ b/c-sharp/ParatextUtils/UsfmBookIndexer.cs @@ -0,0 +1,173 @@ +using SIL.Scripture; + +namespace Paranext.DataProvider.ParatextUtils; + +public class UsfmBookIndexer +{ + #region Member variables + + private readonly string _usfm; + + // Created lazily or else this could be readonly + private Dictionary>? _indexes = null; + + #endregion + + #region Constructor + + public UsfmBookIndexer(string usfm) + { + _usfm = usfm; + } + + #endregion + + #region Properties + + public string Usfm { get { return _usfm;} } + + #endregion + + #region Public methods + + /// + /// Returns the index into the USFM string that corresponds to the start of the chapter and + /// verse in this VerseRef. If the verse number is 0, returns the index that corresponds to the + /// start of the chapter. If the chapter and verse can't be found, returns null. + ///
+ /// A returned index points to the backslash of the "\v" or "\c" marker. + ///
+ public int? GetIndex(VerseRef verseRef) + { + return GetIndex(verseRef.ChapterNum, verseRef.VerseNum); + } + + /// + /// Returns the index into the USFM string that corresponds to the start of the chapter and + /// verse provided. If the verse number is 0, returns the index that corresponds to the start of + /// the chapter. If the chapter and verse can't be found, returns null. + ///
+ /// A returned index points to the backslash of the "\v" or "\c" marker. + ///
+ public int? GetIndex(int chapterNum, int verseNum) + { + ArgumentOutOfRangeException.ThrowIfLessThan(chapterNum, 1); + ArgumentOutOfRangeException.ThrowIfNegative(verseNum); + + _indexes ??= BuildIndexes(); + + if (!_indexes.TryGetValue(chapterNum, out Dictionary? verses) || verses == null) + return null; + + return verses.TryGetValue(verseNum, out int retVal) ? retVal : null; + } + + /// + /// Returns the index into the USFM string that corresponds to the start of the verse + /// immediately after the given chapter and verse. If a new chapter is found immediately after + /// the given chapter and verse, then the start of the new chapter is returned instead. If the + /// given chapter and verse can't be found, returns null. If no chapter or verse is found after + /// the given chapter and verse, returns null. + ///
+ /// Note that the given chapter and verse may not exist in the USFM string. This just looks up + /// whatever would immediately proceed that chapter and verse. + ///
+ /// A returned index points to the backslash of the "\v" or "\c" marker. + ///
+ public int? GetIndexFollowing(int chapterNum, int verseNum) + { + ArgumentOutOfRangeException.ThrowIfLessThan(chapterNum, 1); + ArgumentOutOfRangeException.ThrowIfNegative(verseNum); + + _indexes ??= BuildIndexes(); + + if (_indexes.TryGetValue(chapterNum, out Dictionary? chapter) && chapter != null) + { + var verseNums = new SortedSet(chapter.Keys); + var nextVerseNum = verseNums.GetViewBetween(verseNum + 1, 10000).FirstOrDefault(); + if (nextVerseNum != 0) + return chapter[nextVerseNum]; + } + + if (_indexes.TryGetValue(chapterNum + 1, out chapter) && chapter != null) + { + if (chapter.TryGetValue(0, out int retVal)) + return retVal; + Console.WriteLine("Unexpected missing verse 0 from chapter"); + } + + return null; + } + + #endregion + + #region Private methods + + private Dictionary> BuildIndexes() + { + var retVal = new Dictionary>(); + + if (string.IsNullOrEmpty(_usfm)) + return retVal; + + int onChapter = 1; + retVal.Add(1, []); + retVal[1].Add(0, 0); + + for (int i = 0; i < _usfm.Length - 1; i++) + { + if (_usfm[i] != '\\') + continue; + + if (_usfm[i+1] == 'c') + { + var chapterNumber = ExtractNumber(_usfm, i+2); + if (chapterNumber.HasValue && chapterNumber.Value > 1) + { + onChapter = chapterNumber.Value; + retVal.Add(onChapter, []); + retVal[onChapter].Add(0, i); + } + } + else if (_usfm[i+1] == 'v') + { + var verseNumber = ExtractNumber(_usfm, i+2); + if (verseNumber.HasValue) + retVal[onChapter].Add(verseNumber.Value, i); + } + } + + return retVal; + } + + private static int? ExtractNumber(string text, int index) + { + // Increment past the whitespace preceding the number + while (index < text.Length && char.IsWhiteSpace(text[index])) + { + index++; + } + + // Find all the consecutive digits + int start = index; + while (index < text.Length && char.IsDigit(text[index])) + { + index++; + } + + // See if we found any digits + int length = index - start; + if (length == 0) + return null; + + // Convert the number from a string to an integer + int number = 0; + for (int i = start; i < index; i++) + { + number = (number * 10) + (int)char.GetNumericValue(text[i]); + } + return number; + } + + #endregion +} diff --git a/extensions/src/platform-scripture/src/checking-results-list.web-view.tsx b/extensions/src/platform-scripture/src/checking-results-list.web-view.tsx index efe6c0028d..7eae9ac85d 100644 --- a/extensions/src/platform-scripture/src/checking-results-list.web-view.tsx +++ b/extensions/src/platform-scripture/src/checking-results-list.web-view.tsx @@ -83,6 +83,7 @@ const parseResults = ( (item) => JSON.stringify(item) === JSON.stringify(newData), ); if (!isDuplicate) resultsSet.data.push(newData); + else logger.debug(`duplicate result found ${JSON.stringify(newData)}`); }); return resultsSets; };