From 69571641f8424f930a3e0b262c9227dcb37f0926 Mon Sep 17 00:00:00 2001 From: hhvrc Date: Thu, 12 Sep 2024 20:12:20 +0200 Subject: [PATCH] Simplify algorithms --- DiscordBot/MessageHandler/MessageHandler.cs | 71 +++++++++++---------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/DiscordBot/MessageHandler/MessageHandler.cs b/DiscordBot/MessageHandler/MessageHandler.cs index 24c19a3..2109b95 100644 --- a/DiscordBot/MessageHandler/MessageHandler.cs +++ b/DiscordBot/MessageHandler/MessageHandler.cs @@ -32,69 +32,76 @@ public sealed class MessageHandler { "ass", 0.2f }, }; - private readonly record struct WordRange(int Start, int Length); - private static float CalculateWeight(float accumulated, float weight) => accumulated == 0 ? weight : MathF.Max(accumulated, weight) + 0.1f; - private static bool TryGetProfanityWeight(string str, out int count, out float weight) + private static void GetContainedWordsCountAndWeight(ReadOnlySpan lowerCaseSpan, ref int count, ref float weight) { - count = 0; - weight = 0; - - if (string.IsNullOrEmpty(str)) return false; - - ReadOnlySpan strSpan = str.AsSpan(); - - // Look trough string for all contained matches foreach (var item in _containedProfanities) { - ReadOnlySpan badWord = item.Key.AsSpan(); - float badWordWeight = item.Value; - - int start = 0; - while (true) { - int index = strSpan[start..].IndexOf(badWord, StringComparison.OrdinalIgnoreCase); + while (true) + { + int index = lowerCaseSpan.IndexOf(item.Key, StringComparison.OrdinalIgnoreCase); if (index < 0) break; - start += index + badWord.Length; + lowerCaseSpan = lowerCaseSpan[(index + item.Key.Length)..]; count++; - weight = CalculateWeight(weight, badWordWeight); + weight = CalculateWeight(weight, item.Value); } } + } - // Collect all word ranges - int rangeStart = 0; + private readonly record struct WordRange(int Start, int End); + private static List GetWordRanges(ReadOnlySpan span) + { List wordRanges = []; - for (int i = 0; i < strSpan.Length; i++) - { - char c = strSpan[i]; - if (c is not (' ' or '\t' or '\r' or '\n')) continue; + int rangeStart = 0; + for (int i = 0; i < span.Length; i++) + { + if (span[i] is not (' ' or '\t' or '\r' or '\n')) continue; if (rangeStart < i) { - wordRanges.Add(new WordRange(rangeStart, i - rangeStart)); + wordRanges.Add(new WordRange(rangeStart, i)); } rangeStart = i + 1; } + return wordRanges; + } + + private static void GetStandaloneWordsCountAndWeight(ReadOnlySpan lowerCaseSpan, ref int count, ref float weight) + { + List wordRanges = GetWordRanges(lowerCaseSpan); + // Check if any of the words are standalone matches foreach (var item in _standaloneProfanities) { - ReadOnlySpan badWord = item.Key.AsSpan(); - float badWordWeight = item.Value; - foreach (var wordRange in wordRanges) { - if (strSpan.Slice(wordRange.Start, wordRange.Length).Equals(badWord, StringComparison.OrdinalIgnoreCase)) + if (lowerCaseSpan[wordRange.Start..wordRange.End].Equals(item.Key, StringComparison.OrdinalIgnoreCase)) { count++; - weight = CalculateWeight(weight, badWordWeight); + weight = CalculateWeight(weight, item.Value); } } } + } + + private static bool TryGetProfanityWeight(string str, out int count, out float weight) + { + count = 0; + weight = 0; + + if (string.IsNullOrEmpty(str)) return false; + + str = str.ToLowerInvariant(); + ReadOnlySpan strSpan = str.AsSpan(); + + GetContainedWordsCountAndWeight(strSpan, ref count, ref weight); + GetStandaloneWordsCountAndWeight(strSpan, ref count, ref weight); // Roof the weight to 1.0 weight = MathF.Min(weight, 1.0f); @@ -113,7 +120,7 @@ public static async Task HandleMessageAsync(SocketMessage message) if (TryGetProfanityWeight(message.Content, out int count, out float weight)) { // Respond to the message - await message.Channel.SendMessageAsync($"Profanity detected! {count} bad {(count > 1 ? "words" : "word")} with total weight: {weight}"); + await message.Channel.SendMessageAsync($"Profanity detected! {count} bad {(count > 1 ? "words" : "word")}, shocking at {weight * 100f}%"); } } }