From 9b1c91f60d7f03359f740d4b42c0a77687716e1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Simas?= Date: Tue, 2 Jul 2024 08:38:46 -0300 Subject: [PATCH] feat: collect backlinks Adds support for collecting backlinks. For simplification we now assume that all notes have unique base file names. --- internal/collector/collector.go | 38 ++++++++++++++++++++-------- internal/collector/collector_test.go | 22 ++++++++-------- internal/collector/note.go | 12 ++++++--- internal/collector/note_test.go | 12 ++++----- 4 files changed, 54 insertions(+), 30 deletions(-) diff --git a/internal/collector/collector.go b/internal/collector/collector.go index 7fa9cf1..09ad738 100644 --- a/internal/collector/collector.go +++ b/internal/collector/collector.go @@ -52,10 +52,7 @@ func (c *Collector) CollectMetrics(root fs.FS, collectionTime time.Time) error { // collectMetrics collects all metrics from a Zettelkasten rooted in `root`. func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { - var noteCount uint - var linkCount uint - var wordCount uint - notes := make(map[string]metrics.NoteMetrics) + noteMetrics := make(map[string]metrics.NoteMetrics) err := fs.WalkDir(root, ".", func(path string, dir fs.DirEntry, err error) error { if err != nil { @@ -86,11 +83,7 @@ func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { slog.Error("Error reading file", slog.Any("error", err), slog.String("path", path)) return nil } - metrics := CollectNoteMetrics(content) - notes[path] = metrics - linkCount += metrics.LinkCount - wordCount += metrics.WordCount - noteCount += 1 + noteMetrics[nameFromFilename(path)] = CollectNoteMetrics(content) slog.Debug("collected metrics from file", slog.String("path", path), slog.Any("d", dir), slog.Any("err", err)) @@ -102,5 +95,30 @@ func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { return metrics.Metrics{}, err } - return metrics.Metrics{NoteCount: noteCount, LinkCount: linkCount, WordCount: wordCount, Notes: notes}, nil + zettelkastenMetrics := aggregateMetrics(noteMetrics) + return zettelkastenMetrics, nil +} + +// aggregateMetrics aggregates all individual note metrics into metrics in the context of a full Zettelkasten. +func aggregateMetrics(noteMetrics map[string]metrics.NoteMetrics) metrics.Metrics { + zettelkastenMetrics := metrics.Metrics{ + NoteCount: 0, + LinkCount: 0, + WordCount: 0, + Notes: make(map[string]metrics.NoteMetrics), + } + + for name, metric := range noteMetrics { + // Aggregate totals + zettelkastenMetrics.NoteCount += 1 + zettelkastenMetrics.LinkCount += metric.LinkCount + zettelkastenMetrics.WordCount += metric.WordCount + // Collect backlinks + for _, n := range noteMetrics { + metric.BacklinkCount += n.Links[name] + } + zettelkastenMetrics.Notes[name] = metric + } + + return zettelkastenMetrics } diff --git a/internal/collector/collector_test.go b/internal/collector/collector_test.go index 6540054..ecb77ca 100644 --- a/internal/collector/collector_test.go +++ b/internal/collector/collector_test.go @@ -18,7 +18,7 @@ created-at: "2024-05-29" Testing a note with no links. But there's a [markdown link](./dir1/two.md) -[[./dir1/two.md]] +[[two]] ![[./image.png]] `)}, @@ -42,7 +42,7 @@ Links to [[one]] but also to [[two|two with an alias]] --- created-at: "2024-05-29" --- -Link to [one](./one.md) and also a full link [[./dir1/dir2/three]] and a [[./dir1/two.md|full link with .md]] +Link to [one](one.md) and also a full link [[./dir1/dir2/three]] and a [[dir1/two.md|full link with .md]] `)}, "ignoredir/foo": {Data: []byte("Foo contents")}, "ignoredir/bar": {Data: []byte("Bar contents")}, @@ -55,26 +55,26 @@ Link to [one](./one.md) and also a full link [[./dir1/dir2/three]] and a [[./dir LinkCount: 8, WordCount: 43, Notes: map[string]metrics.NoteMetrics{ - "zettel/one.md": { - Links: map[string]uint{"./dir1/two.md": 2}, + "one": { + Links: map[string]uint{"two": 2}, LinkCount: 2, WordCount: 13, - BacklinkCount: 0, + BacklinkCount: 3, }, - "zettel/dir1/two.md": { + "two": { Links: map[string]uint{"one": 1}, LinkCount: 1, WordCount: 5, - BacklinkCount: 0, + BacklinkCount: 4, }, - "zettel/dir1/dir2/three.md": { + "three": { Links: map[string]uint{"one": 1, "two": 1}, LinkCount: 2, WordCount: 10, - BacklinkCount: 0, + BacklinkCount: 1, }, - "zettel/four.md": { - Links: map[string]uint{"./one.md": 1, "./dir1/dir2/three": 1, "./dir1/two.md": 1}, + "four": { + Links: map[string]uint{"one": 1, "three": 1, "two": 1}, LinkCount: 3, WordCount: 15, BacklinkCount: 0, diff --git a/internal/collector/note.go b/internal/collector/note.go index f66e3e0..2a66616 100644 --- a/internal/collector/note.go +++ b/internal/collector/note.go @@ -54,11 +54,12 @@ func CollectNoteMetrics(content []byte) metrics.NoteMetrics { return ast.WalkContinue, nil } - v, ok := noteMetrics.Links[linkTarget] + targetName := nameFromFilename(linkTarget) + v, ok := noteMetrics.Links[targetName] if !ok { - noteMetrics.Links[linkTarget] = 0 + noteMetrics.Links[targetName] = 0 } - noteMetrics.Links[linkTarget] = v + 1 + noteMetrics.Links[targetName] = v + 1 return ast.WalkContinue, nil }) if err != nil { @@ -89,3 +90,8 @@ func isNoteTarget(target string) bool { isNoteTarget := extension == "" || extension == ".md" return isNoteTarget } + +// nameFromFilename extracts the base note name from a full path. +func nameFromFilename(filename string) string { + return strings.TrimSuffix(filepath.Base(filename), filepath.Ext(filename)) +} diff --git a/internal/collector/note_test.go b/internal/collector/note_test.go index 871babe..f369edb 100644 --- a/internal/collector/note_test.go +++ b/internal/collector/note_test.go @@ -40,7 +40,7 @@ another [[link]]`, name: "markdown link", content: "[Link](target.md)", expected: metrics.NoteMetrics{ - Links: map[string]uint{"target.md": 1}, + Links: map[string]uint{"target": 1}, LinkCount: 1, WordCount: 1, BacklinkCount: 0, @@ -48,9 +48,9 @@ another [[link]]`, }, { name: "repeated links", - content: "[[target.md|link]] [link](target.md) [[link]]", + content: "[[target|link]] [link](target.md) [[link]]", expected: metrics.NoteMetrics{ - Links: map[string]uint{"target.md": 2, "link": 1}, + Links: map[string]uint{"target": 2, "link": 1}, LinkCount: 3, WordCount: 3, BacklinkCount: 0, @@ -60,7 +60,7 @@ another [[link]]`, name: "ignore links to non markdown files", content: "![[note.md]] [[test.pdf]] ![[target.png]] ![](another.jpeg) [[link]] [](link)", expected: metrics.NoteMetrics{ - Links: map[string]uint{"link": 2, "note.md": 1}, + Links: map[string]uint{"link": 2, "note": 1}, LinkCount: 3, WordCount: 4, BacklinkCount: 0, @@ -70,7 +70,7 @@ another [[link]]`, name: "ignore http links", content: "[[one]] [this is an http link](https://go.dev/) [[not/an/http/link]]", expected: metrics.NoteMetrics{ - Links: map[string]uint{"one": 1, "not/an/http/link": 1}, + Links: map[string]uint{"one": 1, "link": 1}, LinkCount: 2, WordCount: 7, BacklinkCount: 0, @@ -95,7 +95,7 @@ Another list: 1. First 2. Second [link](link-ordered.md)`, expected: metrics.NoteMetrics{ - Links: map[string]uint{"target.md": 1, "linked": 1, "another": 1, "yet-another.md": 1, "link-unordered.md": 1, "link-ordered.md": 1}, + Links: map[string]uint{"target": 1, "linked": 1, "another": 1, "yet-another": 1, "link-unordered": 1, "link-ordered": 1}, LinkCount: 6, WordCount: 23, BacklinkCount: 0,