From bc4ca21ec514e4d5caf96c87e4c7ff3d35a68c6a Mon Sep 17 00:00:00 2001 From: hhvrc Date: Fri, 13 Sep 2024 00:29:04 +0200 Subject: [PATCH] Speed up word splitting by another 30% --- DiscordBot/MessageHandler/MessageHandler.cs | 37 +++++++++++++++------ 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/DiscordBot/MessageHandler/MessageHandler.cs b/DiscordBot/MessageHandler/MessageHandler.cs index d4f9e2b..138adb4 100644 --- a/DiscordBot/MessageHandler/MessageHandler.cs +++ b/DiscordBot/MessageHandler/MessageHandler.cs @@ -1,4 +1,5 @@ using Discord.WebSocket; +using System.Buffers; namespace OpenShock.DiscordBot.MessageHandler; @@ -54,26 +55,42 @@ private static void GetContainedWordsCountAndWeight(ReadOnlySpan lowerCase } private readonly record struct WordRange(int Start, int End); + private static readonly SearchValues _whiteSpaceSearchValues = SearchValues.Create([' ', '\t', '\r', '\n']); private static List GetWordRanges(ReadOnlySpan span) { List wordRanges = []; - int rangeStart = 0; - for (int i = 0; i < span.Length; i++) + int spanIndex = 0; + while (true) { - if (span[i] is not (' ' or '\t' or '\r' or '\n')) continue; + // Find the next white space character + int index = span.IndexOfAny(_whiteSpaceSearchValues); + if (index < 0) + { + // Add the remaining word range if the word is not empty + if (span.Length > 0) + { + wordRanges.Add(new WordRange(spanIndex, spanIndex + span.Length)); + } - if (rangeStart < i) + // Exit the loop, span is fully processed + break; + } + + // Skip the white space character if it is the first character in the span + if (index == 0) { - wordRanges.Add(new WordRange(rangeStart, i)); + spanIndex++; + span = span[1..]; + continue; } - rangeStart = i + 1; - } + // Add the word range if the word is not empty + wordRanges.Add(new WordRange(spanIndex, spanIndex + index)); - if (rangeStart < span.Length) - { - wordRanges.Add(new WordRange(rangeStart, span.Length)); + // Move the span index to after the white space character + spanIndex += index + 1; + span = span[(index + 1)..]; } return wordRanges;