From 92b2a540e6ba52c7f0e1cbff1c7b9a12d21d2094 Mon Sep 17 00:00:00 2001 From: Jack Williamson <53199061+JackWilli@users.noreply.github.com> Date: Thu, 7 Nov 2024 16:19:50 -0500 Subject: [PATCH 1/4] feat: add graph serialization --- .gitignore | 2 ++ pkg/graph/graph.go | 85 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 pkg/graph/graph.go diff --git a/.gitignore b/.gitignore index 6f72f89..4dc0fa9 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ go.work.sum # env file .env + +oolong \ No newline at end of file diff --git a/pkg/graph/graph.go b/pkg/graph/graph.go new file mode 100644 index 0000000..45c2b6d --- /dev/null +++ b/pkg/graph/graph.go @@ -0,0 +1,85 @@ +package graph + +import ( + "encoding/json" + "path/filepath" + + "github.com/oolong-sh/oolong/pkg/keywords" + "github.com/oolong-sh/oolong/pkg/notes" +) + +type NodeJSON struct { + ID string `json:"id"` + Name string `json:"name"` + Val int `json:"val"` +} + +type LinkJSON struct { + Source string `json:"source"` + Target string `json:"target"` +} + +type Graph struct { + Nodes []NodeJSON `json:"nodes"` + Links []LinkJSON `json:"links"` +} + +func clamp(value, min, max float64) float64 { + if value < min { + return min + } + if value > max { + return max + } + return value +} + +const NOTE_NODE_VAL = 50 + +func SerializeGraph(keywordMap map[string]keywords.Keyword, notes []notes.Note, lowerBound, upperBound float64) (string, error) { + nodes := []NodeJSON{} + links := []LinkJSON{} + + for _, keyword := range keywordMap { + clampedWeight := clamp(keyword.Weight, lowerBound, upperBound) + nodes = append(nodes, NodeJSON{ + ID: keyword.Keyword, + Name: keyword.Keyword, + Val: int(clampedWeight), + }) + } + + for _, note := range notes { + // Add Note node + noteID := note.Path + noteName := filepath.Base(note.Path) // /home/patrick/notes/home/blogs/bayes.md -> bayes.md + nodes = append(nodes, NodeJSON{ + ID: noteID, + Name: noteName, + Val: NOTE_NODE_VAL, + }) + + // Link notes to keywords + for keywordID := range note.Weights { + keyword, exists := keywordMap[keywordID] + if exists { + links = append(links, LinkJSON{ + Source: noteID, + Target: keyword.Keyword, + }) + } + } + } + + graph := Graph{ + Nodes: nodes, + Links: links, + } + + jsonData, err := json.Marshal(graph) + if err != nil { + return "", err + } + + return string(jsonData), nil +} From dbedc4fc2d828c9d59c950b087deabefbe339555 Mon Sep 17 00:00:00 2001 From: Jack Williamson <53199061+JackWilli@users.noreply.github.com> Date: Thu, 7 Nov 2024 16:30:52 -0500 Subject: [PATCH 2/4] fix: use float instead of int for Node val --- pkg/graph/graph.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/graph/graph.go b/pkg/graph/graph.go index 45c2b6d..0ea2d35 100644 --- a/pkg/graph/graph.go +++ b/pkg/graph/graph.go @@ -9,9 +9,9 @@ import ( ) type NodeJSON struct { - ID string `json:"id"` - Name string `json:"name"` - Val int `json:"val"` + ID string `json:"id"` + Name string `json:"name"` + Val float64 `json:"val"` } type LinkJSON struct { @@ -45,7 +45,7 @@ func SerializeGraph(keywordMap map[string]keywords.Keyword, notes []notes.Note, nodes = append(nodes, NodeJSON{ ID: keyword.Keyword, Name: keyword.Keyword, - Val: int(clampedWeight), + Val: clampedWeight, }) } From 36c24f248de0df7f8472a3f80f3b5c6f33cd1caf Mon Sep 17 00:00:00 2001 From: ptdewey Date: Thu, 7 Nov 2024 16:50:30 -0500 Subject: [PATCH 3/4] feat: added extra keyword map function for serializing to graph --- internal/state/state.go | 12 ++++++++++++ pkg/graph/graph.go | 6 +++--- pkg/keywords/keywords.go | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/internal/state/state.go b/internal/state/state.go index 698556c..2092d44 100644 --- a/internal/state/state.go +++ b/internal/state/state.go @@ -7,6 +7,7 @@ import ( "github.com/oolong-sh/oolong/internal/documents" "github.com/oolong-sh/oolong/internal/linking/ngrams" + "github.com/oolong-sh/oolong/pkg/graph" "github.com/oolong-sh/oolong/pkg/keywords" "github.com/oolong-sh/oolong/pkg/notes" ) @@ -87,5 +88,16 @@ func UpdateState(docs []*documents.Document) error { panic(err) } + kw := keywords.NGramsToKeywordsMap(state.NGrams) + n := notes.DocumentsToNotes(state.Documents) + + dat, err := graph.SerializeGraph(kw, n, 0.1, 80) + if err != nil { + return err + } + if err := os.WriteFile("graph.json", dat, 0644); err != nil { + return err + } + return nil } diff --git a/pkg/graph/graph.go b/pkg/graph/graph.go index 0ea2d35..f9c6610 100644 --- a/pkg/graph/graph.go +++ b/pkg/graph/graph.go @@ -36,7 +36,7 @@ func clamp(value, min, max float64) float64 { const NOTE_NODE_VAL = 50 -func SerializeGraph(keywordMap map[string]keywords.Keyword, notes []notes.Note, lowerBound, upperBound float64) (string, error) { +func SerializeGraph(keywordMap map[string]keywords.Keyword, notes []notes.Note, lowerBound, upperBound float64) ([]byte, error) { nodes := []NodeJSON{} links := []LinkJSON{} @@ -78,8 +78,8 @@ func SerializeGraph(keywordMap map[string]keywords.Keyword, notes []notes.Note, jsonData, err := json.Marshal(graph) if err != nil { - return "", err + return nil, err } - return string(jsonData), nil + return jsonData, nil } diff --git a/pkg/keywords/keywords.go b/pkg/keywords/keywords.go index c15a166..3d2f002 100644 --- a/pkg/keywords/keywords.go +++ b/pkg/keywords/keywords.go @@ -59,3 +59,21 @@ func NGramsToKeywords(ngmap map[string]*ngrams.NGram) []Keyword { return keywords } + +func NGramsToKeywordsMap(ngmap map[string]*ngrams.NGram) map[string]Keyword { + keywords := map[string]Keyword{} + threshold := 8.0 + + for k, v := range ngmap { + w := v.Weight() + + if w > threshold { + keywords[k] = Keyword{ + Keyword: k, + Weight: w, + } + } + } + + return keywords +} From 6ea7bcd62b5ddb95f62806549a46cd6911caf581 Mon Sep 17 00:00:00 2001 From: Jack Williamson <53199061+JackWilli@users.noreply.github.com> Date: Fri, 8 Nov 2024 13:53:36 -0500 Subject: [PATCH 4/4] fix: don't add keywords below the lower bound instead of clamping --- pkg/graph/graph.go | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/pkg/graph/graph.go b/pkg/graph/graph.go index f9c6610..89abaa6 100644 --- a/pkg/graph/graph.go +++ b/pkg/graph/graph.go @@ -41,16 +41,19 @@ func SerializeGraph(keywordMap map[string]keywords.Keyword, notes []notes.Note, links := []LinkJSON{} for _, keyword := range keywordMap { - clampedWeight := clamp(keyword.Weight, lowerBound, upperBound) - nodes = append(nodes, NodeJSON{ - ID: keyword.Keyword, - Name: keyword.Keyword, - Val: clampedWeight, - }) + // Only add nodes above the minimum threshold + if keyword.Weight >= lowerBound { + clampedWeight := clamp(keyword.Weight, lowerBound, upperBound) + nodes = append(nodes, NodeJSON{ + ID: keyword.Keyword, + Name: keyword.Keyword, + Val: clampedWeight, + }) + } } for _, note := range notes { - // Add Note node + // Add Note node with a fixed value noteID := note.Path noteName := filepath.Base(note.Path) // /home/patrick/notes/home/blogs/bayes.md -> bayes.md nodes = append(nodes, NodeJSON{ @@ -62,7 +65,7 @@ func SerializeGraph(keywordMap map[string]keywords.Keyword, notes []notes.Note, // Link notes to keywords for keywordID := range note.Weights { keyword, exists := keywordMap[keywordID] - if exists { + if exists && keyword.Weight >= lowerBound { links = append(links, LinkJSON{ Source: noteID, Target: keyword.Keyword,