From 192a2954e88f5ac92216160241b4235837d5d093 Mon Sep 17 00:00:00 2001 From: Ned Palacios <npdoesmc@gmail.com> Date: Mon, 15 Apr 2024 10:50:48 +0800 Subject: [PATCH] fix: improve log analysis insertion and sorting --- server/cli/main.go | 50 +++++++++---------- server/logger/analyzer/internal/structures.go | 22 +------- server/logger/analyzer/nearest/nearest.go | 31 ++++++++++++ 3 files changed, 58 insertions(+), 45 deletions(-) create mode 100644 server/logger/analyzer/nearest/nearest.go diff --git a/server/cli/main.go b/server/cli/main.go index 2af5523..41811a7 100644 --- a/server/cli/main.go +++ b/server/cli/main.go @@ -7,10 +7,11 @@ import ( "log" "os" "path/filepath" + "slices" + "sort" "strings" "time" - "github.com/lithammer/fuzzysearch/fuzzy" "github.com/nedpals/bugbuddy/server/daemon" "github.com/nedpals/bugbuddy/server/daemon/types" "github.com/nedpals/bugbuddy/server/executor" @@ -18,6 +19,7 @@ import ( "github.com/nedpals/bugbuddy/server/logger" log_analyzer "github.com/nedpals/bugbuddy/server/logger/analyzer" errorquotient "github.com/nedpals/bugbuddy/server/logger/analyzer/error_quotient" + la_nearest "github.com/nedpals/bugbuddy/server/logger/analyzer/nearest" red "github.com/nedpals/bugbuddy/server/logger/analyzer/repeated_error_density" timetosolve "github.com/nedpals/bugbuddy/server/logger/analyzer/time_to_solve" "github.com/nedpals/bugbuddy/server/lsp_server" @@ -239,29 +241,20 @@ type analyzerResultEntry struct { } func (a *analyzerResultEntry) Write(name string, filePath string, value any) { + filePath = strings.TrimSpace(filePath) + + // check if the filePath is already in the list if _, ok := a.FilenamesIndices[filePath]; !ok { - // check if the filePath is already an alias if alias, ok := a.FilenameAliases[filePath]; ok { + // do not mutate the original file path filePath = alias - } else if found := fuzzy.RankFindFold(filePath, a.Filenames); len(found) != 0 { - // find the closest file name first before adding the value - foundPath := found[0].Target - - // check if the found path is a prefix of the file path - if found[0].Distance <= 5 && len(filePath) > len(foundPath) && strings.HasPrefix(filePath, foundPath) { - // if it is, replace the found path with the file path - a.FilenameAliases[foundPath] = filePath - a.FilenamesIndices[filePath] = a.FilenamesIndices[foundPath] - delete(a.FilenamesIndices, foundPath) - - // replace the found path with the file path - a.Filenames[a.FilenamesIndices[filePath]] = filePath - } else { - // if it is not, add the file path - a.FilenamesIndices[filePath] = len(a.Filenames) - a.Filenames = append(a.Filenames, filePath) - } - } else { + } else if nearest := la_nearest.FilenameNearest(filePath, a.FilenamesIndices, a.Filenames); nearest != filePath && strings.HasPrefix(filePath, nearest) { + // if it is, replace the found path with the file path + a.Filenames[a.FilenamesIndices[nearest]] = filePath + a.FilenameAliases[nearest] = filePath + a.FilenamesIndices[filePath] = a.FilenamesIndices[nearest] + delete(a.FilenamesIndices, nearest) + } else if _, ok := a.FilenamesIndices[filePath]; !ok { // if it is not, add the file path a.FilenamesIndices[filePath] = len(a.Filenames) a.Filenames = append(a.Filenames, filePath) @@ -418,16 +411,23 @@ var analyzeLogCmd = &cobra.Command{ sheet.SetColAutoWidth(aCellRow+2, adjustToTextWidth) } - for fileIdx, filePath := range result.Filenames { + // sort filenames + sortedFilenames := slices.Clone(result.Filenames) + sort.Slice(sortedFilenames, func(i, j int) bool { + return sortedFilenames[i] < sortedFilenames[j] + }) + + for idx, filePath := range sortedFilenames { if len(strings.TrimSpace(filePath)) == 0 { continue } - row, _ := sheet.Row(fileIdx + 1) + fileIdx := result.FilenamesIndices[filePath] + row, _ := sheet.Row(idx + 1) row.AddCell().SetValue(filePath) for _, analyzerName := range selectedAnalyzers { - cell, _ := sheet.Cell(fileIdx+1, analyzerCellLocations[analyzerName]) + cell, _ := sheet.Cell(idx+1, analyzerCellLocations[analyzerName]) switch analyzerName { case "eq": @@ -437,7 +437,7 @@ var analyzeLogCmd = &cobra.Command{ case "tts": cell.SetValue(result.TimeToSolve[fileIdx].Seconds()) - hhMmSsCell, _ := sheet.Cell(fileIdx+1, analyzerCellLocations[analyzerName]+1) + hhMmSsCell, _ := sheet.Cell(idx+1, analyzerCellLocations[analyzerName]+1) hhMmSsCell.SetValue(formatDuration(result.TimeToSolve[fileIdx])) } } diff --git a/server/logger/analyzer/internal/structures.go b/server/logger/analyzer/internal/structures.go index baa8566..e24cba3 100644 --- a/server/logger/analyzer/internal/structures.go +++ b/server/logger/analyzer/internal/structures.go @@ -3,7 +3,7 @@ package internal import ( "strings" - "github.com/lithammer/fuzzysearch/fuzzy" + "github.com/nedpals/bugbuddy/server/logger/analyzer/nearest" ) type ResultStore[T any] struct { @@ -14,25 +14,7 @@ type ResultStore[T any] struct { } func (r *ResultStore[T]) FilenameNearest(filePath string) string { - if _, ok := r.FilenamesIndices[filePath]; !ok { - // check if the filePath is already an alias - found := fuzzy.RankFindNormalizedFold(filePath, r.Filenames) - - // if the file path is not found, return the file path - if len(found) == 0 { - return filePath - } - - // find the closest file name first before adding the value - foundPath := found[0].Target - distance := found[0].Distance - - if distance <= 6 && (strings.HasPrefix(foundPath, filePath) || strings.HasPrefix(filePath, foundPath)) { - return foundPath - } - } - - return filePath + return nearest.FilenameNearest(filePath, r.FilenamesIndices, r.Filenames) } func (r *ResultStore[T]) checkAndUpdateFilename(filePath string) string { diff --git a/server/logger/analyzer/nearest/nearest.go b/server/logger/analyzer/nearest/nearest.go new file mode 100644 index 0000000..09fd3d5 --- /dev/null +++ b/server/logger/analyzer/nearest/nearest.go @@ -0,0 +1,31 @@ +package nearest + +import ( + "strings" + + "github.com/lithammer/fuzzysearch/fuzzy" +) + +const MAX_CLOSEST_FILE_DISTANCE = 6 + +func FilenameNearest(filePath string, indices map[string]int, filenames []string) string { + if _, ok := indices[filePath]; !ok { + // check if the filePath is already an alias + found := fuzzy.RankFindNormalizedFold(filePath, filenames) + + // if the file path is not found, return the file path + if len(found) == 0 { + return filePath + } + + // find the closest file name first before adding the value + foundPath := found[0].Target + distance := found[0].Distance + + if distance <= MAX_CLOSEST_FILE_DISTANCE && (strings.HasPrefix(foundPath, filePath) || strings.HasPrefix(filePath, foundPath)) { + return foundPath + } + } + + return filePath +}