From 192a2954e88f5ac92216160241b4235837d5d093 Mon Sep 17 00:00:00 2001
From: Ned Palacios <npdoesmc@gmail.com>
Date: Mon, 15 Apr 2024 10:50:48 +0800
Subject: [PATCH] fix: improve log analysis insertion and sorting

---
 server/cli/main.go                            | 50 +++++++++----------
 server/logger/analyzer/internal/structures.go | 22 +-------
 server/logger/analyzer/nearest/nearest.go     | 31 ++++++++++++
 3 files changed, 58 insertions(+), 45 deletions(-)
 create mode 100644 server/logger/analyzer/nearest/nearest.go

diff --git a/server/cli/main.go b/server/cli/main.go
index 2af5523..41811a7 100644
--- a/server/cli/main.go
+++ b/server/cli/main.go
@@ -7,10 +7,11 @@ import (
 	"log"
 	"os"
 	"path/filepath"
+	"slices"
+	"sort"
 	"strings"
 	"time"
 
-	"github.com/lithammer/fuzzysearch/fuzzy"
 	"github.com/nedpals/bugbuddy/server/daemon"
 	"github.com/nedpals/bugbuddy/server/daemon/types"
 	"github.com/nedpals/bugbuddy/server/executor"
@@ -18,6 +19,7 @@ import (
 	"github.com/nedpals/bugbuddy/server/logger"
 	log_analyzer "github.com/nedpals/bugbuddy/server/logger/analyzer"
 	errorquotient "github.com/nedpals/bugbuddy/server/logger/analyzer/error_quotient"
+	la_nearest "github.com/nedpals/bugbuddy/server/logger/analyzer/nearest"
 	red "github.com/nedpals/bugbuddy/server/logger/analyzer/repeated_error_density"
 	timetosolve "github.com/nedpals/bugbuddy/server/logger/analyzer/time_to_solve"
 	"github.com/nedpals/bugbuddy/server/lsp_server"
@@ -239,29 +241,20 @@ type analyzerResultEntry struct {
 }
 
 func (a *analyzerResultEntry) Write(name string, filePath string, value any) {
+	filePath = strings.TrimSpace(filePath)
+
+	// check if the filePath is already in the list
 	if _, ok := a.FilenamesIndices[filePath]; !ok {
-		// check if the filePath is already an alias
 		if alias, ok := a.FilenameAliases[filePath]; ok {
+			// do not mutate the original file path
 			filePath = alias
-		} else if found := fuzzy.RankFindFold(filePath, a.Filenames); len(found) != 0 {
-			// find the closest file name first before adding the value
-			foundPath := found[0].Target
-
-			// check if the found path is a prefix of the file path
-			if found[0].Distance <= 5 && len(filePath) > len(foundPath) && strings.HasPrefix(filePath, foundPath) {
-				// if it is, replace the found path with the file path
-				a.FilenameAliases[foundPath] = filePath
-				a.FilenamesIndices[filePath] = a.FilenamesIndices[foundPath]
-				delete(a.FilenamesIndices, foundPath)
-
-				// replace the found path with the file path
-				a.Filenames[a.FilenamesIndices[filePath]] = filePath
-			} else {
-				// if it is not, add the file path
-				a.FilenamesIndices[filePath] = len(a.Filenames)
-				a.Filenames = append(a.Filenames, filePath)
-			}
-		} else {
+		} else if nearest := la_nearest.FilenameNearest(filePath, a.FilenamesIndices, a.Filenames); nearest != filePath && strings.HasPrefix(filePath, nearest) {
+			// if it is, replace the found path with the file path
+			a.Filenames[a.FilenamesIndices[nearest]] = filePath
+			a.FilenameAliases[nearest] = filePath
+			a.FilenamesIndices[filePath] = a.FilenamesIndices[nearest]
+			delete(a.FilenamesIndices, nearest)
+		} else if _, ok := a.FilenamesIndices[filePath]; !ok {
 			// if it is not, add the file path
 			a.FilenamesIndices[filePath] = len(a.Filenames)
 			a.Filenames = append(a.Filenames, filePath)
@@ -418,16 +411,23 @@ var analyzeLogCmd = &cobra.Command{
 				sheet.SetColAutoWidth(aCellRow+2, adjustToTextWidth)
 			}
 
-			for fileIdx, filePath := range result.Filenames {
+			// sort filenames
+			sortedFilenames := slices.Clone(result.Filenames)
+			sort.Slice(sortedFilenames, func(i, j int) bool {
+				return sortedFilenames[i] < sortedFilenames[j]
+			})
+
+			for idx, filePath := range sortedFilenames {
 				if len(strings.TrimSpace(filePath)) == 0 {
 					continue
 				}
 
-				row, _ := sheet.Row(fileIdx + 1)
+				fileIdx := result.FilenamesIndices[filePath]
+				row, _ := sheet.Row(idx + 1)
 				row.AddCell().SetValue(filePath)
 
 				for _, analyzerName := range selectedAnalyzers {
-					cell, _ := sheet.Cell(fileIdx+1, analyzerCellLocations[analyzerName])
+					cell, _ := sheet.Cell(idx+1, analyzerCellLocations[analyzerName])
 
 					switch analyzerName {
 					case "eq":
@@ -437,7 +437,7 @@ var analyzeLogCmd = &cobra.Command{
 					case "tts":
 						cell.SetValue(result.TimeToSolve[fileIdx].Seconds())
 
-						hhMmSsCell, _ := sheet.Cell(fileIdx+1, analyzerCellLocations[analyzerName]+1)
+						hhMmSsCell, _ := sheet.Cell(idx+1, analyzerCellLocations[analyzerName]+1)
 						hhMmSsCell.SetValue(formatDuration(result.TimeToSolve[fileIdx]))
 					}
 				}
diff --git a/server/logger/analyzer/internal/structures.go b/server/logger/analyzer/internal/structures.go
index baa8566..e24cba3 100644
--- a/server/logger/analyzer/internal/structures.go
+++ b/server/logger/analyzer/internal/structures.go
@@ -3,7 +3,7 @@ package internal
 import (
 	"strings"
 
-	"github.com/lithammer/fuzzysearch/fuzzy"
+	"github.com/nedpals/bugbuddy/server/logger/analyzer/nearest"
 )
 
 type ResultStore[T any] struct {
@@ -14,25 +14,7 @@ type ResultStore[T any] struct {
 }
 
 func (r *ResultStore[T]) FilenameNearest(filePath string) string {
-	if _, ok := r.FilenamesIndices[filePath]; !ok {
-		// check if the filePath is already an alias
-		found := fuzzy.RankFindNormalizedFold(filePath, r.Filenames)
-
-		// if the file path is not found, return the file path
-		if len(found) == 0 {
-			return filePath
-		}
-
-		// find the closest file name first before adding the value
-		foundPath := found[0].Target
-		distance := found[0].Distance
-
-		if distance <= 6 && (strings.HasPrefix(foundPath, filePath) || strings.HasPrefix(filePath, foundPath)) {
-			return foundPath
-		}
-	}
-
-	return filePath
+	return nearest.FilenameNearest(filePath, r.FilenamesIndices, r.Filenames)
 }
 
 func (r *ResultStore[T]) checkAndUpdateFilename(filePath string) string {
diff --git a/server/logger/analyzer/nearest/nearest.go b/server/logger/analyzer/nearest/nearest.go
new file mode 100644
index 0000000..09fd3d5
--- /dev/null
+++ b/server/logger/analyzer/nearest/nearest.go
@@ -0,0 +1,31 @@
+package nearest
+
+import (
+	"strings"
+
+	"github.com/lithammer/fuzzysearch/fuzzy"
+)
+
+const MAX_CLOSEST_FILE_DISTANCE = 6
+
+func FilenameNearest(filePath string, indices map[string]int, filenames []string) string {
+	if _, ok := indices[filePath]; !ok {
+		// check if the filePath is already an alias
+		found := fuzzy.RankFindNormalizedFold(filePath, filenames)
+
+		// if the file path is not found, return the file path
+		if len(found) == 0 {
+			return filePath
+		}
+
+		// find the closest file name first before adding the value
+		foundPath := found[0].Target
+		distance := found[0].Distance
+
+		if distance <= MAX_CLOSEST_FILE_DISTANCE && (strings.HasPrefix(foundPath, filePath) || strings.HasPrefix(filePath, foundPath)) {
+			return foundPath
+		}
+	}
+
+	return filePath
+}