Skip to content

Commit

Permalink
Speed up word splitting by another 30%
Browse files Browse the repository at this point in the history
  • Loading branch information
hhvrc committed Sep 12, 2024
1 parent fc8385d commit bc4ca21
Showing 1 changed file with 27 additions and 10 deletions.
37 changes: 27 additions & 10 deletions DiscordBot/MessageHandler/MessageHandler.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Discord.WebSocket;
using System.Buffers;

namespace OpenShock.DiscordBot.MessageHandler;

Expand Down Expand Up @@ -54,26 +55,42 @@ private static void GetContainedWordsCountAndWeight(ReadOnlySpan<char> lowerCase
}

private readonly record struct WordRange(int Start, int End);
private static readonly SearchValues<char> _whiteSpaceSearchValues = SearchValues.Create([' ', '\t', '\r', '\n']);
private static List<WordRange> GetWordRanges(ReadOnlySpan<char> span)
{
List<WordRange> wordRanges = [];

int rangeStart = 0;
for (int i = 0; i < span.Length; i++)
int spanIndex = 0;
while (true)
{
if (span[i] is not (' ' or '\t' or '\r' or '\n')) continue;
// Find the next white space character
int index = span.IndexOfAny(_whiteSpaceSearchValues);
if (index < 0)
{
// Add the remaining word range if the word is not empty
if (span.Length > 0)
{
wordRanges.Add(new WordRange(spanIndex, spanIndex + span.Length));
}

if (rangeStart < i)
// Exit the loop, span is fully processed
break;
}

// Skip the white space character if it is the first character in the span
if (index == 0)
{
wordRanges.Add(new WordRange(rangeStart, i));
spanIndex++;
span = span[1..];
continue;
}

rangeStart = i + 1;
}
// Add the word range if the word is not empty
wordRanges.Add(new WordRange(spanIndex, spanIndex + index));

if (rangeStart < span.Length)
{
wordRanges.Add(new WordRange(rangeStart, span.Length));
// Move the span index to after the white space character
spanIndex += index + 1;
span = span[(index + 1)..];
}

return wordRanges;
Expand Down

0 comments on commit bc4ca21

Please sign in to comment.