diff --git a/Commons.Benchmark/BenchmarkHelper.cs b/Commons.Benchmark/BenchmarkHelper.cs new file mode 100644 index 0000000..1665900 --- /dev/null +++ b/Commons.Benchmark/BenchmarkHelper.cs @@ -0,0 +1,17 @@ +using System.Collections.ObjectModel; +using System.Reflection; +using Xunit; + +namespace Commons.Benchmark; + +internal static class BenchmarkHelper +{ + public static List GetInlineData(string methodName) + { + var type = typeof(TType); + var member = type.GetMethod(methodName); + if (member == null) throw new Exception($"Could not find a method named '{methodName}' on type '{type}'."); + return member.CustomAttributes.Where(a => a.AttributeType == typeof(InlineDataAttribute)) + .Select(a => (a.ConstructorArguments[0].Value as ReadOnlyCollection)?.Select(v => v.Value).ToArray()).ToList()!; + } +} \ No newline at end of file diff --git a/Commons.Benchmark/Commons.Benchmark.csproj b/Commons.Benchmark/Commons.Benchmark.csproj new file mode 100644 index 0000000..eecc7b3 --- /dev/null +++ b/Commons.Benchmark/Commons.Benchmark.csproj @@ -0,0 +1,20 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + + + + + + + diff --git a/Commons.Benchmark/Program.cs b/Commons.Benchmark/Program.cs new file mode 100644 index 0000000..a700ca7 --- /dev/null +++ b/Commons.Benchmark/Program.cs @@ -0,0 +1,4 @@ +using BenchmarkDotNet.Running; +using Commons.Benchmark.Util; + +BenchmarkRunner.Run(); \ No newline at end of file diff --git a/Commons.Benchmark/README.md b/Commons.Benchmark/README.md new file mode 100644 index 0000000..5a2ea2e --- /dev/null +++ b/Commons.Benchmark/README.md @@ -0,0 +1,49 @@ +### NaturalComparer + +#### Benchmark + +| Method | Mean | Error | StdDev | Allocated | +|--------------------------------- |-----------:|---------:|---------:|----------:| +| Compare | 222.8 ns | 4.30 ns | 5.12 ns | 360 B | +| CompareIgnoreCase | 350.6 ns | 7.04 ns | 10.32 ns | 536 B | +| CompareIgnoreWhiteSpace | 3,831.2 ns | 46.87 ns | 46.04 ns | 3,464 B | +| CompareIgnoreCaseWhiteSpace | 3,982.3 ns | 37.84 ns | 31.59 ns | 3,640 B | +| Compare_Span | 101.3 ns | 1.82 ns | 1.70 ns | - | +| CompareIgnoreCase_Span | 137.6 ns | 2.49 ns | 2.33 ns | - | +| CompareIgnoreWhiteSpace_Span | 1,632.6 ns | 12.74 ns | 9.95 ns | 376 B | +| CompareIgnoreCaseWhiteSpace_Span | 1,756.9 ns | 29.59 ns | 27.68 ns | 376 B | + +#### Large Random String + +| Method | Mean | Error | StdDev | Allocated | +|--------------------------------- |-------------:|-----------:|-----------:|----------:| +| Compare | 541.04 ns | 5.690 ns | 5.322 ns | 2,360 B | +| CompareIgnoreCase | 1,917.42 ns | 35.455 ns | 34.822 ns | 4,720 B | +| CompareIgnoreWhiteSpace | 9,676.14 ns | 190.178 ns | 177.892 ns | 7,328 B | +| CompareIgnoreCaseWhiteSpace | 10,953.94 ns | 147.037 ns | 137.539 ns | 10,816 B | +| Compare_Span | 32.67 ns | 0.689 ns | 1.052 ns | - | +| CompareIgnoreCase_Span | 24.55 ns | 0.416 ns | 0.369 ns | - | +| CompareIgnoreWhiteSpace_Span | 3,440.01 ns | 66.125 ns | 73.498 ns | 4,704 B | +| CompareIgnoreCaseWhiteSpace_Span | 3,797.10 ns | 49.217 ns | 41.098 ns | 4,688 B | + +#### Large Similar String + +| Method | Mean | Error | StdDev | Allocated | +|--------------------------------- |---------:|----------:|----------:|----------:| +| Compare | 4.988 us | 0.0638 us | 0.0597 us | 2,736 B | +| CompareIgnoreCase | 5.421 us | 0.0992 us | 0.0928 us | 2,736 B | +| CompareIgnoreWhiteSpace | 8.286 us | 0.1003 us | 0.0938 us | 4,712 B | +| CompareIgnoreCaseWhiteSpace | 8.806 us | 0.1670 us | 0.1787 us | 4,712 B | +| Compare_Span | 2.336 us | 0.0234 us | 0.0183 us | - | +| CompareIgnoreCase_Span | 2.905 us | 0.0336 us | 0.0314 us | - | +| CompareIgnoreWhiteSpace_Span | 3.189 us | 0.0609 us | 0.0570 us | 928 B | +| CompareIgnoreCaseWhiteSpace_Span | 3.757 us | 0.0733 us | 0.0612 us | 928 B | + +##### Test Machine +``` +BenchmarkDotNet=v0.13.1, OS=Windows 10.0.19044.1645 (21H2) +Intel Core i7-6700HQ CPU 2.60GHz (Skylake), 1 CPU, 8 logical and 4 physical cores +.NET SDK=6.0.202 +[Host] : .NET 6.0.4 (6.0.422.16404), X64 RyuJIT +DefaultJob : .NET 6.0.4 (6.0.422.16404), X64 RyuJIT +``` \ No newline at end of file diff --git a/Commons.Benchmark/Util/NaturalComparerBenchmarks.cs b/Commons.Benchmark/Util/NaturalComparerBenchmarks.cs new file mode 100644 index 0000000..f0c4d35 --- /dev/null +++ b/Commons.Benchmark/Util/NaturalComparerBenchmarks.cs @@ -0,0 +1,61 @@ +using BenchmarkDotNet.Attributes; +using CG.Commons.Util; +#pragma warning disable CS0612 + +namespace Commons.Benchmark.Util; + +[MemoryDiagnoser(false)] +public class NaturalComparerBenchmarks +{ + private NaturalComparerObsolete _comparerObsolete = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreCase = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreWhitespace = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreCaseWhitespace = null!; + private NaturalComparer _comparer = null!; + private NaturalComparer _comparerIgnoreCase = null!; + private NaturalComparer _comparerIgnoreWhitespace = null!; + private NaturalComparer _comparerIgnoreCaseWhitespace = null!; + + [GlobalSetup] + public void Setup() + { + _comparerObsolete = new NaturalComparerObsolete(); + _comparerObsoleteIgnoreCase = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreCase); + _comparerObsoleteIgnoreWhitespace = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreWhiteSpace); + _comparerObsoleteIgnoreCaseWhitespace = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreCase | NaturalComparerOptions.IgnoreWhiteSpace); + + _comparer = new NaturalComparer(); + _comparerIgnoreCase = new NaturalComparer(NaturalComparerOptions.IgnoreCase); + _comparerIgnoreWhitespace = new NaturalComparer(NaturalComparerOptions.IgnoreWhiteSpace); + _comparerIgnoreCaseWhitespace = new NaturalComparer(NaturalComparerOptions.IgnoreCase | NaturalComparerOptions.IgnoreWhiteSpace); + } + + private const string Left = " ThisIsA StringWithANumber00201.3 "; + private const string Right = " ThisIsA StringWithANumber00100.6 "; + + [Benchmark] + public void Compare() => _ = _comparerObsolete.Compare(Left , Right); + + [Benchmark] + public void CompareIgnoreCase() => _ = _comparerObsoleteIgnoreCase.Compare(Left, Right); + + [Benchmark] + public void CompareIgnoreWhiteSpace() => _ = _comparerObsoleteIgnoreWhitespace.Compare(Left, Right); + + [Benchmark] + public void CompareIgnoreCaseWhiteSpace() => _ = _comparerObsoleteIgnoreCaseWhitespace.Compare(Left, Right); + + + + [Benchmark] + public void Compare_Span() => _ = _comparer.Compare(Left , Right); + + [Benchmark] + public void CompareIgnoreCase_Span() => _ = _comparerIgnoreCase.Compare(Left, Right); + + [Benchmark] + public void CompareIgnoreWhiteSpace_Span() => _ = _comparerIgnoreWhitespace.Compare(Left, Right); + + [Benchmark] + public void CompareIgnoreCaseWhiteSpace_Span() => _ = _comparerIgnoreCaseWhitespace.Compare(Left, Right); +} \ No newline at end of file diff --git a/Commons.Benchmark/Util/NaturalComparerLargeRandomStringBenchmarks.cs b/Commons.Benchmark/Util/NaturalComparerLargeRandomStringBenchmarks.cs new file mode 100644 index 0000000..fbf93ea --- /dev/null +++ b/Commons.Benchmark/Util/NaturalComparerLargeRandomStringBenchmarks.cs @@ -0,0 +1,76 @@ +using BenchmarkDotNet.Attributes; +using CG.Commons.Util; +#pragma warning disable CS0612 + +namespace Commons.Benchmark.Util; + +[MemoryDiagnoser(false)] +public class NaturalComparerLargeRandomStringBenchmarks +{ + private NaturalComparerObsolete _comparerObsolete = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreCase = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreWhitespace = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreCaseWhitespace = null!; + private NaturalComparer _comparer = null!; + private NaturalComparer _comparerIgnoreCase = null!; + private NaturalComparer _comparerIgnoreWhitespace = null!; + private NaturalComparer _comparerIgnoreCaseWhitespace = null!; + + private string _left = null!; + private string _right = null!; + + [GlobalSetup] + public void Setup() + { + _comparerObsolete = new NaturalComparerObsolete(); + _comparerObsoleteIgnoreCase = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreCase); + _comparerObsoleteIgnoreWhitespace = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreWhiteSpace); + _comparerObsoleteIgnoreCaseWhitespace = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreCase | NaturalComparerOptions.IgnoreWhiteSpace); + + _comparer = new NaturalComparer(); + _comparerIgnoreCase = new NaturalComparer(NaturalComparerOptions.IgnoreCase); + _comparerIgnoreWhitespace = new NaturalComparer(NaturalComparerOptions.IgnoreWhiteSpace); + _comparerIgnoreCaseWhitespace = new NaturalComparer(NaturalComparerOptions.IgnoreCase | NaturalComparerOptions.IgnoreWhiteSpace); + + _left = GenerateString(600); + _right = GenerateString(555); + } + + private static string GenerateString(int i) + { + var random = new Random(); + var array = new char[i--]; + for (; i >= 0; i--) + { + array[i] = (char)random.Next(32, 122); + } + return new string(array); + } + + + [Benchmark] + public void Compare() => _ = _comparerObsolete.Compare(_left , _right); + + [Benchmark] + public void CompareIgnoreCase() => _ = _comparerObsoleteIgnoreCase.Compare(_left, _right); + + [Benchmark] + public void CompareIgnoreWhiteSpace() => _ = _comparerObsoleteIgnoreWhitespace.Compare(_left, _right); + + [Benchmark] + public void CompareIgnoreCaseWhiteSpace() => _ = _comparerObsoleteIgnoreCaseWhitespace.Compare(_left, _right); + + + + [Benchmark] + public void Compare_Span() => _ = _comparer.Compare(_left , _right); + + [Benchmark] + public void CompareIgnoreCase_Span() => _ = _comparerIgnoreCase.Compare(_left, _right); + + [Benchmark] + public void CompareIgnoreWhiteSpace_Span() => _ = _comparerIgnoreWhitespace.Compare(_left, _right); + + [Benchmark] + public void CompareIgnoreCaseWhiteSpace_Span() => _ = _comparerIgnoreCaseWhitespace.Compare(_left, _right); +} \ No newline at end of file diff --git a/Commons.Benchmark/Util/NaturalComparerLargeSimilarStringBenchmarks.cs b/Commons.Benchmark/Util/NaturalComparerLargeSimilarStringBenchmarks.cs new file mode 100644 index 0000000..52f1701 --- /dev/null +++ b/Commons.Benchmark/Util/NaturalComparerLargeSimilarStringBenchmarks.cs @@ -0,0 +1,87 @@ +using BenchmarkDotNet.Attributes; +using CG.Commons.Util; +#pragma warning disable CS0612 + +namespace Commons.Benchmark.Util; + +[MemoryDiagnoser(false)] +public class NaturalComparerLargeSimilarStringBenchmarks +{ + private NaturalComparerObsolete _comparerObsolete = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreCase = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreWhitespace = null!; + private NaturalComparerObsolete _comparerObsoleteIgnoreCaseWhitespace = null!; + private NaturalComparer _comparer = null!; + private NaturalComparer _comparerIgnoreCase = null!; + private NaturalComparer _comparerIgnoreWhitespace = null!; + private NaturalComparer _comparerIgnoreCaseWhitespace = null!; + + private string _left = null!; + private string _right = null!; + + [GlobalSetup] + public void Setup() + { + _comparerObsolete = new NaturalComparerObsolete(); + _comparerObsoleteIgnoreCase = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreCase); + _comparerObsoleteIgnoreWhitespace = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreWhiteSpace); + _comparerObsoleteIgnoreCaseWhitespace = new NaturalComparerObsolete(NaturalComparerOptions.IgnoreCase | NaturalComparerOptions.IgnoreWhiteSpace); + + _comparer = new NaturalComparer(); + _comparerIgnoreCase = new NaturalComparer(NaturalComparerOptions.IgnoreCase); + _comparerIgnoreWhitespace = new NaturalComparer(NaturalComparerOptions.IgnoreWhiteSpace); + _comparerIgnoreCaseWhitespace = new NaturalComparer(NaturalComparerOptions.IgnoreCase | NaturalComparerOptions.IgnoreWhiteSpace); + + _left = GenerateString(20, "abcdefg01.01"); + _right = GenerateString(20, "abcdefg01.02"); + } + + public static string GenerateString(int times, string seed) + { + var decimals = seed.Count(c => c == '.'); + var array = new char[times * (seed.Length - decimals) + decimals]; + var i = 0; + foreach (var c in seed) + { + if (c == '.') + { + array[i++] = c; + } + else + { + for (var j = 0; j < times; j++, i++) + { + array[i] = c; + } + } + } + return new string(array); + } + + + [Benchmark] + public void Compare() => _ = _comparerObsolete.Compare(_left , _right); + + [Benchmark] + public void CompareIgnoreCase() => _ = _comparerObsoleteIgnoreCase.Compare(_left, _right); + + [Benchmark] + public void CompareIgnoreWhiteSpace() => _ = _comparerObsoleteIgnoreWhitespace.Compare(_left, _right); + + [Benchmark] + public void CompareIgnoreCaseWhiteSpace() => _ = _comparerObsoleteIgnoreCaseWhitespace.Compare(_left, _right); + + + + [Benchmark] + public void Compare_Span() => _ = _comparer.Compare(_left , _right); + + [Benchmark] + public void CompareIgnoreCase_Span() => _ = _comparerIgnoreCase.Compare(_left, _right); + + [Benchmark] + public void CompareIgnoreWhiteSpace_Span() => _ = _comparerIgnoreWhitespace.Compare(_left, _right); + + [Benchmark] + public void CompareIgnoreCaseWhiteSpace_Span() => _ = _comparerIgnoreCaseWhitespace.Compare(_left, _right); +} \ No newline at end of file diff --git a/Commons.Test/Commons.Test.csproj b/Commons.Test/Commons.Test.csproj index d0c35ad..9c119dd 100644 --- a/Commons.Test/Commons.Test.csproj +++ b/Commons.Test/Commons.Test.csproj @@ -1,7 +1,7 @@ - netcoreapp2.1 + net6.0 false @@ -11,13 +11,9 @@ - - + + - - all - runtime; build; native; contentfiles; analyzers - diff --git a/Commons.Test/Util/NaturalComparerTests.cs b/Commons.Test/Util/NaturalComparerTests.cs index 903eb79..9e6295f 100644 --- a/Commons.Test/Util/NaturalComparerTests.cs +++ b/Commons.Test/Util/NaturalComparerTests.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using CG.Commons.Util; using Xunit; @@ -80,28 +81,32 @@ public enum ComparerEquality [InlineData("aa", " a\ta", ComparerEquality.Equal, NaturalComparerOptions.IgnoreWhiteSpace)] //capitalization order [InlineData("added4", "Added11", ComparerEquality.LessThan)] + [InlineData("added4", "Added11", ComparerEquality.GreaterThan, NaturalComparerOptions.LowercaseFirst)] //double decimals [InlineData("12.4.1", "12.4.1", ComparerEquality.Equal)] [InlineData("12.41", "12.4.1", ComparerEquality.GreaterThan)] public void TestCompare(string left, string right, ComparerEquality expectedResult, NaturalComparerOptions options = NaturalComparerOptions.None) { + var comparerOld = new NaturalComparerObsolete(options); + DoTest(left, right, expectedResult, comparerOld, nameof(NaturalComparerObsolete)); + var comparer = new NaturalComparer(options); - DoTest(left, right, expectedResult, comparer); + DoTest(left, right, expectedResult, comparer, nameof(NaturalComparer)); } - private static void DoTest(string left, string right, ComparerEquality expectedResult, NaturalComparer comparer) + private static void DoTest(string left, string right, ComparerEquality expectedResult, IComparer comparer, string note) { var result = comparer.Compare(left, right); switch (expectedResult) { case ComparerEquality.LessThan: - Assert.True(result <= (int)expectedResult, $"Result: {result} Expected Result: {expectedResult}({(int)expectedResult})"); + Assert.True(result <= (int)expectedResult, $"Result: {result} Expected: {expectedResult}({(int)expectedResult}) - {note}"); break; case ComparerEquality.Equal: - Assert.True(result == (int)expectedResult, $"Result: {result} Expected Result: {expectedResult}({(int)expectedResult})"); + Assert.True(result == (int)expectedResult, $"Result: {result} Expected: {expectedResult}({(int)expectedResult}) - {note}"); break; case ComparerEquality.GreaterThan: - Assert.True(result >= (int)expectedResult, $"Result: {result} Expected Result: {expectedResult}({(int)expectedResult})"); + Assert.True(result >= (int)expectedResult, $"Result: {result} Expected: {expectedResult}({(int)expectedResult}) - {note}"); break; default: throw new ArgumentOutOfRangeException(nameof(expectedResult), expectedResult, null); diff --git a/Commons.Test/Util/StringCypherTest.cs b/Commons.Test/Util/StringCypherTest.cs index f1baad3..b3b384e 100644 --- a/Commons.Test/Util/StringCypherTest.cs +++ b/Commons.Test/Util/StringCypherTest.cs @@ -20,7 +20,7 @@ public void TestEncryptDecrypt() //assert encrypted.Should().NotBeNullOrEmpty(); - decrypted.Should().NotBeNullOrEmpty().And.Should().BeEquivalentTo(testString); + decrypted.Should().NotBeNullOrEmpty().And.BeEquivalentTo(testString); } } } \ No newline at end of file diff --git a/Commons.sln b/Commons.sln index 24f52a1..b606471 100644 --- a/Commons.sln +++ b/Commons.sln @@ -7,6 +7,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Commons", "Commons\Commons. EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Commons.Test", "Commons.Test\Commons.Test.csproj", "{253030B2-DD5F-4928-B420-24D811E26D4D}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Commons.Benchmark", "Commons.Benchmark\Commons.Benchmark.csproj", "{DD56C9FD-0083-45A3-96C8-5805573DC88F}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -21,6 +23,10 @@ Global {253030B2-DD5F-4928-B420-24D811E26D4D}.Debug|Any CPU.Build.0 = Debug|Any CPU {253030B2-DD5F-4928-B420-24D811E26D4D}.Release|Any CPU.ActiveCfg = Release|Any CPU {253030B2-DD5F-4928-B420-24D811E26D4D}.Release|Any CPU.Build.0 = Release|Any CPU + {DD56C9FD-0083-45A3-96C8-5805573DC88F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {DD56C9FD-0083-45A3-96C8-5805573DC88F}.Debug|Any CPU.Build.0 = Debug|Any CPU + {DD56C9FD-0083-45A3-96C8-5805573DC88F}.Release|Any CPU.ActiveCfg = Release|Any CPU + {DD56C9FD-0083-45A3-96C8-5805573DC88F}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Commons/Commons.csproj b/Commons/Commons.csproj index b13e7c5..645520c 100644 --- a/Commons/Commons.csproj +++ b/Commons/Commons.csproj @@ -1,14 +1,14 @@ - netstandard2.0 + netstandard2.1 CG.Commons true A collection of useful C# utilities, extensions, and data structures. Chris Gonzales 1.0.9 Commons C# DataStructures Dictionary Extensions - Copyright 2021 Chris Gonzales + Copyright 2022 Chris Gonzales https://chrisg32.github.io/assets/cg_logo.png logo.png https://github.com/chrisg32/Commons @@ -19,12 +19,12 @@ CG Commons Library Git https://github.com/chrisg32/Commons - Fix for string Cypher AES broken decryption. WARNING! make break the ability to decrypt strings encrypted with 1.0.7 and below. + Performance improvements for NaturalComparer. CG.Commons en-US - 1.0.9 + 1.1.0 Commons - 9 + 10 1.0.9 1.0.9 diff --git a/Commons/Util/NaturalComparer.cs b/Commons/Util/NaturalComparer.cs index fcdab19..333f67b 100644 --- a/Commons/Util/NaturalComparer.cs +++ b/Commons/Util/NaturalComparer.cs @@ -1,27 +1,43 @@ using System.Collections; using System.Collections.Generic; -using System.Text; +using System; using System.Text.RegularExpressions; namespace CG.Commons.Util { public class NaturalComparer : IComparer, IComparer { - private readonly bool _ignoreCase; - private readonly bool _ignoreWhitespace; private readonly bool _checkTrailingDecimalLength; - private readonly bool _lowercaseFirst; private readonly bool _decimalPrecision; + private readonly bool _ignoreWhitespace; + private readonly Regex _whitespaceRegex; + private readonly Func _singleCharComparer; public NaturalComparer(NaturalComparerOptions options = NaturalComparerOptions.None) { - _ignoreCase = options.HasFlag(NaturalComparerOptions.IgnoreCase); - _ignoreWhitespace = options.HasFlag(NaturalComparerOptions.IgnoreWhiteSpace); _checkTrailingDecimalLength = options.HasFlag(NaturalComparerOptions.CheckTrailingDecimalLength); - _lowercaseFirst = options.HasFlag(NaturalComparerOptions.LowercaseFirst); _decimalPrecision = options.HasFlag(NaturalComparerOptions.DecimalPrecision); + + _singleCharComparer = options.HasFlag(NaturalComparerOptions.IgnoreCase) ? IgnoreCaseComparison : + options.HasFlag(NaturalComparerOptions.LowercaseFirst) ? LowercaseFirstComparison : NormalComparison; + + _ignoreWhitespace = options.HasFlag(NaturalComparerOptions.IgnoreWhiteSpace); + if (_ignoreWhitespace) + { + _whitespaceRegex = new Regex(@"\s", RegexOptions.Compiled); + } } + /// + /// Compares two strings using a natural sort. Nulls and empty strings are treated as equal. + /// + /// The left string to compare. + /// The right string to compare. + /// If the left string is less than the right the return value will be less than zero. + /// If the left string is greater than the right than a value greater than zero is returned. + /// If the left string is equal to the right string zero is returned. + public int Compare(string left, string right) => Compare(left ?? ReadOnlySpan.Empty, right?? ReadOnlySpan.Empty); + //less than zero = x is less than y //zero = x equals y //greater than zero = x is greater than y @@ -34,33 +50,26 @@ public NaturalComparer(NaturalComparerOptions options = NaturalComparerOptions.N /// If the left string is less than the right the return value will be less than zero. /// If the left string is greater than the right than a value greater than zero is returned. /// If the left string is equal to the right string zero is returned. - public int Compare(string left, string right) + public int Compare(ReadOnlySpan left, ReadOnlySpan right) { - //treat null and empty strings the same, also ignore leading and trailing whitespace - var x = left?.Trim() ?? string.Empty; - var y = right?.Trim() ?? string.Empty; - - if (_ignoreCase) + if (_ignoreWhitespace) { - x = x.ToLower(); - y = y.ToLower(); + //TODO this is still not efficient, heap allocation and regular expression + left = _whitespaceRegex.Replace(left.ToString(), string.Empty); + right = _whitespaceRegex.Replace(right.ToString(), string.Empty); } - - if (_ignoreWhitespace) + else { - var regex = new Regex(@"\s"); - x = regex.Replace(x, string.Empty); - y = regex.Replace(y, string.Empty); + left = left.Trim(); + right = right.Trim(); } - + //we can't use iterator since we need to see the next value - var xarray = x.ToCharArray(); - var yarray = y.ToCharArray(); int xindex = -1, yindex = -1; while (true) { - var xhas = xindex + 1 < xarray.Length; - var yhas = yindex + 1 < yarray.Length; + var xhas = xindex + 1 < left.Length; + var yhas = yindex + 1 < right.Length; //neither string has another to compare so they are equal if (!(xhas || yhas)) return 0; @@ -72,8 +81,8 @@ public int Compare(string left, string right) xindex++; yindex++; - var xchar = xarray[xindex]; - var ychar = yarray[yindex]; + var xchar = left[xindex]; + var ychar = right[yindex]; var xIsNum = char.IsDigit(xchar); var yIsNum = char.IsDigit(ychar); @@ -81,8 +90,8 @@ public int Compare(string left, string right) //if both characters are numeric then we have to compare the full numeric string part if (xIsNum && yIsNum) { - var xnums = GetNumericString(xarray, ref xindex, _decimalPrecision); - var ynums = GetNumericString(yarray, ref yindex, _decimalPrecision); + var xnums = GetNumericString(left, ref xindex, _decimalPrecision); + var ynums = GetNumericString(right, ref yindex, _decimalPrecision); var xnum = decimal.Parse(xnums); var ynum = decimal.Parse(ynums); var iresult = xnum.CompareTo(ynum); @@ -99,13 +108,15 @@ public int Compare(string left, string right) if (yIsNum) return 1; //both characters are not numeric so we simply compare - var cresult = CapitalOrderComparison(xchar, ychar); + var cresult = _singleCharComparer(xchar, ychar); if (cresult != 0) return cresult; } } + + private static int IgnoreCaseComparison(char x, char y) => char.ToLowerInvariant(x).CompareTo(char.ToLowerInvariant(y)); - private int CapitalOrderComparison(char x, char y) + private static int LowercaseFirstComparison(char x, char y) { var result = x.CompareTo(y); if (result == 0) return result; @@ -113,37 +124,39 @@ private int CapitalOrderComparison(char x, char y) { result = char.ToLowerInvariant(x).CompareTo(char.ToLowerInvariant(y)); if (result != 0) return result; - if (_lowercaseFirst) - { - if (!char.IsLower(x) && char.IsLower(y)) return -1; - if (char.IsLower(x) && !char.IsLower(y)) return 1; - } - else - { - if (!char.IsLower(x) && char.IsLower(y)) return 1; - if (char.IsLower(x) && !char.IsLower(y)) return -1; - } + if (!char.IsLower(x) && char.IsLower(y)) return -1; + if (char.IsLower(x) && !char.IsLower(y)) return 1; + } + return result; + } + + private static int NormalComparison(char x, char y) + { + var result = x.CompareTo(y); + if (result == 0) return result; + if (char.IsLetter(x) && char.IsLetter(y)) + { + result = char.ToLowerInvariant(x).CompareTo(char.ToLowerInvariant(y)); + if (result != 0) return result; + if (!char.IsLower(x) && char.IsLower(y)) return 1; + if (char.IsLower(x) && !char.IsLower(y)) return -1; } return result; } - private static string GetNumericString(IReadOnlyList source, ref int index, bool decimalPrecision) + private static ReadOnlySpan GetNumericString(ReadOnlySpan source, ref int index, bool decimalPrecision) { - var sb = new StringBuilder(); var point = true; - while (index < source.Count && (char.IsDigit(source[index]) || decimalPrecision && point && source[index] == '.')) + var start = index; + while (index < source.Length && (char.IsDigit(source[index]) || decimalPrecision && point && source[index] == '.')) { if (source[index] == '.') point = false; - sb.Append(source[index]); index++; } index--; - return sb.ToString(); + return source.Slice(start, index - start + 1); } - public int Compare(object x, object y) - { - return Compare(x?.ToString(), y?.ToString()); - } + public int Compare(object x, object y) => Compare(x?.ToString(), y?.ToString()); } } \ No newline at end of file diff --git a/Commons/Util/NaturalComparerObsolete.cs b/Commons/Util/NaturalComparerObsolete.cs new file mode 100644 index 0000000..5cc84a6 --- /dev/null +++ b/Commons/Util/NaturalComparerObsolete.cs @@ -0,0 +1,151 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Text; +using System.Text.RegularExpressions; + +namespace CG.Commons.Util +{ + [Obsolete] + public class NaturalComparerObsolete : IComparer, IComparer + { + private readonly bool _ignoreCase; + private readonly bool _ignoreWhitespace; + private readonly bool _checkTrailingDecimalLength; + private readonly bool _lowercaseFirst; + private readonly bool _decimalPrecision; + + public NaturalComparerObsolete(NaturalComparerOptions options = NaturalComparerOptions.None) + { + _ignoreCase = options.HasFlag(NaturalComparerOptions.IgnoreCase); + _ignoreWhitespace = options.HasFlag(NaturalComparerOptions.IgnoreWhiteSpace); + _checkTrailingDecimalLength = options.HasFlag(NaturalComparerOptions.CheckTrailingDecimalLength); + _lowercaseFirst = options.HasFlag(NaturalComparerOptions.LowercaseFirst); + _decimalPrecision = options.HasFlag(NaturalComparerOptions.DecimalPrecision); + } + + //less than zero = x is less than y + //zero = x equals y + //greater than zero = x is greater than y + + /// + /// Compares two strings using a natural sort. Nulls and empty strings are treated as equal. + /// + /// The left string to compare. + /// The right string to compare. + /// If the left string is less than the right the return value will be less than zero. + /// If the left string is greater than the right than a value greater than zero is returned. + /// If the left string is equal to the right string zero is returned. + public int Compare(string left, string right) + { + //treat null and empty strings the same, also ignore leading and trailing whitespace + var x = left?.Trim() ?? string.Empty; + var y = right?.Trim() ?? string.Empty; + + if (_ignoreCase) + { + x = x.ToLower(); + y = y.ToLower(); + } + + if (_ignoreWhitespace) + { + var regex = new Regex(@"\s"); + x = regex.Replace(x, string.Empty); + y = regex.Replace(y, string.Empty); + } + + //we can't use iterator since we need to see the next value + var xarray = x.ToCharArray(); + var yarray = y.ToCharArray(); + int xindex = -1, yindex = -1; + while (true) + { + var xhas = xindex + 1 < xarray.Length; + var yhas = yindex + 1 < yarray.Length; + + //neither string has another to compare so they are equal + if (!(xhas || yhas)) return 0; + + //if one string is shorter than the other than the shorter comes first + if (!xhas) return -1; + if (!yhas) return 1; + + xindex++; + yindex++; + + var xchar = xarray[xindex]; + var ychar = yarray[yindex]; + + var xIsNum = char.IsDigit(xchar); + var yIsNum = char.IsDigit(ychar); + + //if both characters are numeric then we have to compare the full numeric string part + if (xIsNum && yIsNum) + { + var xnums = GetNumericString(xarray, ref xindex, _decimalPrecision); + var ynums = GetNumericString(yarray, ref yindex, _decimalPrecision); + var xnum = decimal.Parse(xnums); + var ynum = decimal.Parse(ynums); + var iresult = xnum.CompareTo(ynum); + if (_checkTrailingDecimalLength && iresult == 0 && xnums.Length != ynums.Length) + { + return xnums.Length.CompareTo(ynums.Length); + } + if (iresult != 0) return iresult; + continue; + } + + //if one char is numeric but the other is not then the numeric comes first + if (xIsNum) return -1; + if (yIsNum) return 1; + + //both characters are not numeric so we simply compare + var cresult = CapitalOrderComparison(xchar, ychar); + if (cresult != 0) return cresult; + } + + } + + private int CapitalOrderComparison(char x, char y) + { + var result = x.CompareTo(y); + if (result == 0) return result; + if (char.IsLetter(x) && char.IsLetter(y)) + { + result = char.ToLowerInvariant(x).CompareTo(char.ToLowerInvariant(y)); + if (result != 0) return result; + if (_lowercaseFirst) + { + if (!char.IsLower(x) && char.IsLower(y)) return -1; + if (char.IsLower(x) && !char.IsLower(y)) return 1; + } + else + { + if (!char.IsLower(x) && char.IsLower(y)) return 1; + if (char.IsLower(x) && !char.IsLower(y)) return -1; + } + } + return result; + } + + private static string GetNumericString(IReadOnlyList source, ref int index, bool decimalPrecision) + { + var sb = new StringBuilder(); + var point = true; + while (index < source.Count && (char.IsDigit(source[index]) || decimalPrecision && point && source[index] == '.')) + { + if (source[index] == '.') point = false; + sb.Append(source[index]); + index++; + } + index--; + return sb.ToString(); + } + + public int Compare(object x, object y) + { + return Compare(x?.ToString(), y?.ToString()); + } + } +} \ No newline at end of file diff --git a/README.md b/README.md index 82c19c5..592cb2b 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ [![NuGet](https://img.shields.io/nuget/v/CG.Commons.svg)](https://www.nuget.org/packages/CG.Commons/) -A collection of useful C# utilities, extensions, and data structures. \ No newline at end of file +A collection of useful C# utilities, extensions, and data structures. + + + +## Performance \ No newline at end of file