Skip to content

Commit

Permalink
fuzzy: added 'logo' as a known stop word to ignore during fuzzy name …
Browse files Browse the repository at this point in the history
…matching
  • Loading branch information
stojy committed Sep 20, 2023
1 parent fd8506c commit 7b2ef9a
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 5 deletions.
1 change: 1 addition & 0 deletions ClrVpin.Tests/Shared/FuzzyTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ public void MatchTest(string gameName, string fileName, bool expectedSuccess)
[TestCase("Mephisto (Cirsa 1987)", "jp mephisto.directb2s", 145, TestName = "author 'jp'")]
[TestCase("Martian Queen (LTD do Brasil 1981)", "marqueen_w.directb2s", 149, TestName = "known word substitution: marqueen and trim single character 'w'")]
[TestCase("Martian Queen (LTD do Brasil 1981)", "q_marqueen_w.directb2s", 149, TestName = "known word substitution: marqueen and trim single character 'q' and 'w'")]
[TestCase("O Gaucho (LTD do Brasil 1975)", "o gaucho_logo.png", 145, TestName = "drop word: logo")]
public void MatchScoreTest(string databaseName, string fileOrFeedName, int expectedScore)
{
// exactly same as MatchTest.. with a score validation
Expand Down
8 changes: 4 additions & 4 deletions ClrVpin/Shared/Fuzzy/Fuzzy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ static Fuzzy()
// - https://regex101.com/r/DoztL5/1
Authors = new[] { "jps", "jp's", "jp", "sg1bson", "vpw", "starlion", "pinball58", "vp99", "balutito", "siggis", "uws" };
string[] language = { "a", "and", "n'", "'n", "n", "the", "en" };
string[] vpx = {"vpx8", "vpx", "mod", "vp10", "4k", "b2s", "4player", "2021", "2022", "2023", "2024" }; // order is important, e.g. vpx8 to be stripped before vpx
string[] vpx = {"vpx8", "vpx", "mod", "vp10", "4k", "b2s", "4player", "2021", "2022", "2023", "2024", "logo" }; // order is important, e.g. vpx8 to be stripped before vpx
string[] technologyTypes = { TableType.ElectroMagnetic.ToLower(), TableType.SolidState.ToLower(), TableType.PureMechanical.ToLower() };
string[] descriptions = { "no leds", "upgrade", "premium" };
string[] versions = { "beta1", "beta" }; // order is important, e.g. ensure beta1 is removed before beta
pattern = string.Join('|', Authors.Concat(language).Concat(vpx).Concat(technologyTypes).Concat(descriptions).Concat(versions));
_wholeWordRegex = new Regex($"(?<=^|[^a-z^A-Z])({pattern})(?=$|[^a-zA-Z])", RegexOptions.Compiled);
_stopWholeWordRegex = new Regex($"(?<=^|[^a-z^A-Z])({pattern})(?=$|[^a-zA-Z])", RegexOptions.Compiled);

// first pass single whitespace
// - performed BEFORE other checks that aren't sensitive to these changes
Expand Down Expand Up @@ -195,7 +195,7 @@ public static string CleanPostSplit(string name, bool removeAllWhiteSpace)
cleanName = cleanName.ToNullLowerAndTrim() ?? "";

// trim (whole) words
cleanName = _wholeWordRegex.Replace(cleanName, "");
cleanName = _stopWholeWordRegex.Replace(cleanName, "");

// trim pseudo white space, e.g. trailing '_' char caused by whole removal: blah_VPX8
cleanName = cleanName.TrimPseudoWhitespace();
Expand Down Expand Up @@ -663,7 +663,7 @@ private class MatchDetail
private static readonly Regex _fileNameInfoRegex;
private static readonly Regex _trimSpecialAndNonAsciiCharRegex;
private static readonly Regex _trimTrailingPeriodRegex;
private static readonly Regex _wholeWordRegex;
private static readonly Regex _stopWholeWordRegex;
private static readonly Regex _addSpacingFirstPassRegex;
private static readonly Regex _addSpacingSecondPassRegex;
private static readonly Regex _versionRegex;
Expand Down
2 changes: 1 addition & 1 deletion Utils/Extensions/StringExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -99,5 +99,5 @@ public static string TrimSingleLetterWords(this string source)
return source.TrimPseudoWhitespace();
}

private static readonly char[] _pseudoWhiteSpaceChars = { ' ', '_', '-' };
private static readonly char[] _pseudoWhiteSpaceChars = { ' ', '_', '-' }; // _ = snake case, - = kebab case
}

0 comments on commit 7b2ef9a

Please sign in to comment.