Skip to content

Commit

Permalink
Simplify algorithms
Browse files Browse the repository at this point in the history
  • Loading branch information
hhvrc committed Sep 12, 2024
1 parent dc8338e commit 6957164
Showing 1 changed file with 39 additions and 32 deletions.
71 changes: 39 additions & 32 deletions DiscordBot/MessageHandler/MessageHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,69 +32,76 @@ public sealed class MessageHandler
{ "ass", 0.2f },
};

private readonly record struct WordRange(int Start, int Length);

private static float CalculateWeight(float accumulated, float weight) => accumulated == 0 ? weight : MathF.Max(accumulated, weight) + 0.1f;

private static bool TryGetProfanityWeight(string str, out int count, out float weight)
private static void GetContainedWordsCountAndWeight(ReadOnlySpan<char> lowerCaseSpan, ref int count, ref float weight)
{
count = 0;
weight = 0;

if (string.IsNullOrEmpty(str)) return false;

ReadOnlySpan<char> strSpan = str.AsSpan();

// Look trough string for all contained matches
foreach (var item in _containedProfanities)
{
ReadOnlySpan<char> badWord = item.Key.AsSpan();
float badWordWeight = item.Value;

int start = 0;
while (true) {
int index = strSpan[start..].IndexOf(badWord, StringComparison.OrdinalIgnoreCase);
while (true)
{
int index = lowerCaseSpan.IndexOf(item.Key, StringComparison.OrdinalIgnoreCase);
if (index < 0) break;

start += index + badWord.Length;
lowerCaseSpan = lowerCaseSpan[(index + item.Key.Length)..];

count++;
weight = CalculateWeight(weight, badWordWeight);
weight = CalculateWeight(weight, item.Value);
}
}
}

// Collect all word ranges
int rangeStart = 0;
private readonly record struct WordRange(int Start, int End);
private static List<WordRange> GetWordRanges(ReadOnlySpan<char> span)
{
List<WordRange> wordRanges = [];
for (int i = 0; i < strSpan.Length; i++)
{
char c = strSpan[i];

if (c is not (' ' or '\t' or '\r' or '\n')) continue;
int rangeStart = 0;
for (int i = 0; i < span.Length; i++)
{
if (span[i] is not (' ' or '\t' or '\r' or '\n')) continue;

if (rangeStart < i)
{
wordRanges.Add(new WordRange(rangeStart, i - rangeStart));
wordRanges.Add(new WordRange(rangeStart, i));
}

rangeStart = i + 1;
}

return wordRanges;
}

private static void GetStandaloneWordsCountAndWeight(ReadOnlySpan<char> lowerCaseSpan, ref int count, ref float weight)
{
List<WordRange> wordRanges = GetWordRanges(lowerCaseSpan);

// Check if any of the words are standalone matches
foreach (var item in _standaloneProfanities)
{
ReadOnlySpan<char> badWord = item.Key.AsSpan();
float badWordWeight = item.Value;

foreach (var wordRange in wordRanges)
{
if (strSpan.Slice(wordRange.Start, wordRange.Length).Equals(badWord, StringComparison.OrdinalIgnoreCase))
if (lowerCaseSpan[wordRange.Start..wordRange.End].Equals(item.Key, StringComparison.OrdinalIgnoreCase))
{
count++;
weight = CalculateWeight(weight, badWordWeight);
weight = CalculateWeight(weight, item.Value);
}
}
}
}

private static bool TryGetProfanityWeight(string str, out int count, out float weight)
{
count = 0;
weight = 0;

if (string.IsNullOrEmpty(str)) return false;

str = str.ToLowerInvariant();
ReadOnlySpan<char> strSpan = str.AsSpan();

GetContainedWordsCountAndWeight(strSpan, ref count, ref weight);
GetStandaloneWordsCountAndWeight(strSpan, ref count, ref weight);

// Roof the weight to 1.0
weight = MathF.Min(weight, 1.0f);
Expand All @@ -113,7 +120,7 @@ public static async Task HandleMessageAsync(SocketMessage message)
if (TryGetProfanityWeight(message.Content, out int count, out float weight))
{
// Respond to the message
await message.Channel.SendMessageAsync($"Profanity detected! {count} bad {(count > 1 ? "words" : "word")} with total weight: {weight}");
await message.Channel.SendMessageAsync($"Profanity detected! {count} bad {(count > 1 ? "words" : "word")}, shocking at {weight * 100f}%");
}
}
}

0 comments on commit 6957164

Please sign in to comment.