diff --git a/README.md b/README.md index 5a59394..addf755 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,21 @@ All configuration is supplied via environment variables. You should supply at le | IGNORE_FILES | Comma separated list of files that will be ignored in the collection | .git,obsidian,.trash,README.md | No | | LOG_LEVEL | The minimum log level | INFO | No | +## Metrics + +The exporter collects metrics by parsing the contents of the markdown files present in the Zettelkasten. Currently the exporter stores metrics for individual notes and also aggregated metrics describing the entire Zettelkasten. The combination of raw and pre processed metrics allows for both flexibility and efficiency when querying the data, at the cost of a slightly higher storage usage. The two sets of metrics are stored in the same InfluxDB bucket under different [measurement names](https://docs.influxdata.com/influxdb/cloud/reference/key-concepts/data-elements/#measurement). + +The following table describes all metrics collected by the exporter and their respective measurement names: + +| Measurement | Name | Description | +|-------------|----------------|-----------------------------------------| +| notes | link_count | Number of links in the note | +| notes | word_count | Number of words in the note | +| notes | backlink_count | Number of links that reference the note | +| total | note_count | Number of notes in the Zettelkasten | +| total | link_count | Number of links in the Zettelkasten | +| total | word_count | Number of words in the Zettelkasten | + ## Roadmap These are some features that I'd like to include in the future. diff --git a/dashboards/Zettelkasten-InfluxDB.json b/dashboards/Zettelkasten-InfluxDB.json index 859a935..fb8ca15 100644 --- a/dashboards/Zettelkasten-InfluxDB.json +++ b/dashboards/Zettelkasten-InfluxDB.json @@ -17,19 +17,34 @@ }, "editable": true, "fiscalYearStartMonth": 0, - "graphTooltip": 0, + "graphTooltip": 2, "id": 1, "links": [], "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11, + "panels": [], + "title": "Overview", + "type": "row" + }, { "datasource": { "type": "influxdb", "uid": "${datasource}" }, + "description": "Number of notes created in the visualization period", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "fixedColor": "blue", + "mode": "fixed" }, "mappings": [], "thresholds": { @@ -38,10 +53,6 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] } @@ -50,9 +61,9 @@ }, "gridPos": { "h": 5, - "w": 3, + "w": 6, "x": 0, - "y": 0 + "y": 1 }, "id": 4, "options": { @@ -71,18 +82,18 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.0.0", + "pluginVersion": "11.0.1", "targets": [ { "datasource": { "type": "influxdb", "uid": "edogaymh9y96of" }, - "query": "import \"experimental/date/boundaries\"\n\nlastWeek = boundaries.week(week_offset: -1)\n\nfrom(bucket: v.defaultBucket)\n|> range(start: lastWeek.start)\n|> group()\n|> aggregateWindow(\n every: 1w,\n fn: (column, tables=<-) => tables |> distinct(column: column) |> count(),\n column: \"name\",\n createEmpty: false\n)\n|> increase()\n|> last()", + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"total\" and r[\"_field\"] == \"note_count\")\n |> spread()", "refId": "A" } ], - "title": "New notes this week", + "title": "New notes", "type": "stat" }, { @@ -90,10 +101,12 @@ "type": "influxdb", "uid": "${datasource}" }, + "description": "Number of links created in the visualization period", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "fixedColor": "purple", + "mode": "fixed" }, "mappings": [], "thresholds": { @@ -102,10 +115,6 @@ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] } @@ -114,9 +123,9 @@ }, "gridPos": { "h": 5, - "w": 3, - "x": 3, - "y": 0 + "w": 6, + "x": 6, + "y": 1 }, "id": 5, "options": { @@ -135,18 +144,80 @@ "textMode": "auto", "wideLayout": true }, - "pluginVersion": "11.0.0", + "pluginVersion": "11.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "edogaymh9y96of" + }, + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"total\" and r[\"_field\"] == \"link_count\")\n |> spread()", + "refId": "A" + } + ], + "title": "New links", + "type": "stat" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "description": "Number of words written in the visualization period", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "orange", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.1", "targets": [ { "datasource": { "type": "influxdb", "uid": "edogaymh9y96of" }, - "query": "import \"experimental/date/boundaries\"\n\nlastWeek = boundaries.week(week_offset: -1)\n\nfrom(bucket: v.defaultBucket)\n|> range(start: lastWeek.start)\n|> group()\n|> aggregateWindow(\n every: 1w,\n fn: (column, tables=<-) => tables |> group(columns: [\"name\", \"_start\", \"_stop\"]) |> last() |> group(columns: [\"_start\", \"_stop\"]) |> sum(),\n column: \"name\",\n createEmpty: false\n)\n|> increase()\n|> last()", + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"total\" and r[\"_field\"] == \"word_count\")\n |> spread()", "refId": "A" } ], - "title": "New links this week", + "title": "New words", "type": "stat" }, { @@ -154,10 +225,74 @@ "type": "influxdb", "uid": "${datasource}" }, + "description": "Time to read all notes created in the visualization period", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" + "fixedColor": "yellow", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "m" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "edogaymh9y96of" + }, + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"total\" and r[\"_field\"] == \"word_count\")\n |> map(fn: (r) => ({r with _value: r._value / uint(v: 212)}))\n |> spread()", + "refId": "A" + } + ], + "title": "Reading time", + "type": "stat" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "description": "Metrics from each markdown note in the Zettelkasten", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, "custom": { "align": "auto", @@ -165,7 +300,8 @@ "type": "auto" }, "filterable": true, - "inspect": false + "inspect": false, + "minWidth": 50 }, "mappings": [], "thresholds": { @@ -178,13 +314,26 @@ ] } }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Reading time" + }, + "properties": [ + { + "id": "unit", + "value": "m" + } + ] + } + ] }, "gridPos": { - "h": 15, - "w": 12, - "x": 12, - "y": 0 + "h": 12, + "w": 24, + "x": 0, + "y": 6 }, "id": 3, "options": { @@ -202,18 +351,18 @@ "sortBy": [ { "desc": true, - "displayName": "Links" + "displayName": "Backlinks" } ] }, - "pluginVersion": "11.0.0", + "pluginVersion": "11.0.1", "targets": [ { "datasource": { "type": "influxdb", "uid": "edogaymh9y96of" }, - "query": "from(bucket: v.defaultBucket)\n|> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n|> last()\n|> group()", + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"notes\")\n |> last()\n |> pivot(rowKey: [\"name\"], columnKey: [\"_field\"], valueColumn: \"_value\")\n |> map(fn: (r) => ({\n _measurement: r._measurement,\n name: r.name,\n backlink_count: r.backlink_count,\n link_count: r.link_count,\n word_count: r.word_count,\n time_to_read: r.word_count / uint(v: 212)\n }))\n |> group()", "refId": "A" } ], @@ -223,40 +372,55 @@ "id": "organize", "options": { "excludeByName": { - "_field": true, "_measurement": true, "_start": true, - "_stop": true, - "_time": true + "_stop": true }, "includeByName": {}, "indexByName": { - "_field": 5, - "_measurement": 6, - "_start": 2, - "_stop": 3, - "_time": 4, - "_value": 1, - "name": 0 + "_measurement": 0, + "backlink_count": 3, + "link_count": 2, + "name": 1, + "time_to_read": 5, + "word_count": 4 }, "renameByName": { - "_value": "Links", - "name": "Note" + "_start": "", + "backlink_count": "Backlinks", + "link_count": "Links", + "name": "Name", + "time_to_read": "Reading time", + "word_count": "Words" } } } ], "type": "table" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 12, + "panels": [], + "title": "Historical data", + "type": "row" + }, { "datasource": { "type": "influxdb", "uid": "${datasource}" }, + "description": "Historical evolution of the number of notes in the Zettelkasten", "fieldConfig": { "defaults": { "color": { - "fixedColor": "purple", + "fixedColor": "blue", "mode": "fixed" }, "custom": { @@ -276,6 +440,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -306,9 +473,9 @@ }, "gridPos": { "h": 8, - "w": 24, + "w": 12, "x": 0, - "y": 15 + "y": 19 }, "id": 1, "options": { @@ -330,11 +497,11 @@ "type": "influxdb", "uid": "${datasource}" }, - "query": "from(bucket: v.defaultBucket)\n|> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n|> group()\n|> aggregateWindow(\n every: v.windowPeriod,\n fn: (column, tables=<-) => tables |> distinct(column: column) |> count(),\n column: \"name\",\n createEmpty: false\n)", + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"total\")\n |> filter(fn: (r) => r[\"_field\"] == \"note_count\")", "refId": "A" } ], - "title": "Note count", + "title": "Notes", "type": "timeseries" }, { @@ -342,10 +509,11 @@ "type": "influxdb", "uid": "${datasource}" }, + "description": "Historical evolution of words in the Zettelkasten", "fieldConfig": { "defaults": { "color": { - "fixedColor": "blue", + "fixedColor": "orange", "mode": "fixed" }, "custom": { @@ -394,10 +562,100 @@ "overrides": [] }, "gridPos": { - "h": 9, - "w": 24, + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"total\")\n |> filter(fn: (r) => r[\"_field\"] == \"word_count\")", + "refId": "A" + } + ], + "title": "Words", + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "description": "Historical evolution of the number of links in the Zettelkasten", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "purple", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, "x": 0, - "y": 23 + "y": 27 }, "id": 2, "options": { @@ -420,15 +678,288 @@ "type": "influxdb", "uid": "${datasource}" }, - "query": "from(bucket: v.defaultBucket)\n|> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n|> group()\n|> aggregateWindow(\n every: v.windowPeriod,\n fn: (column, tables=<-) => tables |> group(columns: [\"name\", \"_start\", \"_stop\"]) |> last() |> group(columns: [\"_start\", \"_stop\"]) |> sum(),\n column: \"name\",\n createEmpty: false\n)", + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"total\")\n |> filter(fn: (r) => r[\"_field\"] == \"link_count\")", + "refId": "A" + } + ], + "title": "Links", + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "description": "Historical evolution of the total reading time for the Zettelkasten", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "yellow", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "m" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"total\")\n |> filter(fn: (r) => r[\"_field\"] == \"word_count\")\n |> map(fn: (r) => ({r with _value: r._value / uint(v: 212)}))", + "refId": "A" + } + ], + "title": "Reading time", + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "description": "Historical evolution of the average number of links per note in the Zettelkasten", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"notes\" and r[\"_field\"] == \"link_count\")\n |> group(columns: [\"__time\"])\n |> aggregateWindow(\n every: v.windowPeriod,\n fn: (column, tables=<-) => tables |> mean(),\n column: \"name\",\n createEmpty: false\n)", + "refId": "A" + } + ], + "title": "Average link count", + "type": "timeseries" + }, + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "description": "Historical evolution of the average number of words per note in the Zettelkasten", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "red", + "mode": "fixed" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.0.1", + "targets": [ + { + "datasource": { + "type": "influxdb", + "uid": "${datasource}" + }, + "query": "from(bucket: v.defaultBucket)\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r[\"_measurement\"] == \"notes\" and r[\"_field\"] == \"word_count\")\n |> group(columns: [\"__time\"])\n |> aggregateWindow(\n every: v.windowPeriod,\n fn: (column, tables=<-) => tables |> mean(),\n column: \"name\",\n createEmpty: false\n)", "refId": "A" } ], - "title": "Link count", + "title": "Average word count", "type": "timeseries" } ], - "refresh": "10s", + "refresh": "", "schemaVersion": 39, "tags": [], "templating": { @@ -437,7 +968,7 @@ "current": { "selected": false, "text": "influxdb", - "value": "bdp8tekqeihogd" + "value": "cdqmo6y19tc74d" }, "hide": 0, "includeAll": false, @@ -454,7 +985,7 @@ ] }, "time": { - "from": "now-1h", + "from": "now-30d", "to": "now" }, "timeRangeUpdatedDuringEditOrView": false, @@ -462,6 +993,6 @@ "timezone": "browser", "title": "Zettelkasten", "uid": "fdoghlpqzr5kwe", - "version": 2, + "version": 11, "weekStart": "" } \ No newline at end of file diff --git a/docs/assets/dashboard.png b/docs/assets/dashboard.png index 99653d2..61687de 100644 Binary files a/docs/assets/dashboard.png and b/docs/assets/dashboard.png differ diff --git a/internal/collector/collector.go b/internal/collector/collector.go index 35a29d1..4aaf65e 100644 --- a/internal/collector/collector.go +++ b/internal/collector/collector.go @@ -16,11 +16,13 @@ type CollectorConfig struct { IgnorePatterns []string } +// Collector represents a metrics collector. type Collector struct { config CollectorConfig storage storage.Storage } +// NewCollector creates a new collector func NewCollector(ignorePatterns []string, storage storage.Storage) Collector { return Collector{ config: CollectorConfig{ @@ -30,6 +32,7 @@ func NewCollector(ignorePatterns []string, storage storage.Storage) Collector { } } +// CollectMetrics collects all metrics from a Zettelkasten rooted in `root` and writes them to the storage with a timestamp of `collectionTime`. func (c *Collector) CollectMetrics(root fs.FS, collectionTime time.Time) error { slog.Debug("Collecting metrics", slog.Time("collection_time", collectionTime)) start := time.Now() @@ -38,18 +41,15 @@ func (c *Collector) CollectMetrics(root fs.FS, collectionTime time.Time) error { return err } - for name, metric := range collected.Notes { - c.storage.WriteMetric(name, metric, collectionTime) - } + c.storage.WriteMetrics(collected, collectionTime) slog.Debug("Collected metrics", slog.Duration("duration", time.Since(start))) return nil } +// collectMetrics collects all metrics from a Zettelkasten rooted in `root`. func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { - noteCount := 0 - linkCount := 0 - notes := make(map[string]metrics.NoteMetrics) + noteMetrics := make(map[string]metrics.NoteMetrics) err := fs.WalkDir(root, ".", func(path string, dir fs.DirEntry, err error) error { if err != nil { @@ -80,10 +80,7 @@ func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { slog.Error("Error reading file", slog.Any("error", err), slog.String("path", path)) return nil } - metrics := CollectNoteMetrics(content) - notes[path] = metrics - linkCount += metrics.LinkCount - noteCount += 1 + noteMetrics[nameFromFilename(path)] = CollectNoteMetrics(content) slog.Debug("collected metrics from file", slog.String("path", path), slog.Any("d", dir), slog.Any("err", err)) @@ -95,5 +92,30 @@ func (c *Collector) collectMetrics(root fs.FS) (metrics.Metrics, error) { return metrics.Metrics{}, err } - return metrics.Metrics{NoteCount: noteCount, LinkCount: linkCount, Notes: notes}, nil + zettelkastenMetrics := aggregateMetrics(noteMetrics) + return zettelkastenMetrics, nil +} + +// aggregateMetrics aggregates all individual note metrics into metrics in the context of a full Zettelkasten. +func aggregateMetrics(noteMetrics map[string]metrics.NoteMetrics) metrics.Metrics { + zettelkastenMetrics := metrics.Metrics{ + NoteCount: 0, + LinkCount: 0, + WordCount: 0, + Notes: make(map[string]metrics.NoteMetrics), + } + + for name, metric := range noteMetrics { + // Aggregate totals + zettelkastenMetrics.NoteCount += 1 + zettelkastenMetrics.LinkCount += metric.LinkCount + zettelkastenMetrics.WordCount += metric.WordCount + // Collect backlinks + for _, n := range noteMetrics { + metric.BacklinkCount += n.Links[name] + } + zettelkastenMetrics.Notes[name] = metric + } + + return zettelkastenMetrics } diff --git a/internal/collector/collector_test.go b/internal/collector/collector_test.go index 06e460f..ecb77ca 100644 --- a/internal/collector/collector_test.go +++ b/internal/collector/collector_test.go @@ -18,7 +18,7 @@ created-at: "2024-05-29" Testing a note with no links. But there's a [markdown link](./dir1/two.md) -[[./dir1/two.md]] +[[two]] ![[./image.png]] `)}, @@ -42,7 +42,7 @@ Links to [[one]] but also to [[two|two with an alias]] --- created-at: "2024-05-29" --- -Link to [one](./one.md) and also a full link [[./dir1/dir2/three]] and a [[./dir1/two.md|full link with .md]] +Link to [one](one.md) and also a full link [[./dir1/dir2/three]] and a [[dir1/two.md|full link with .md]] `)}, "ignoredir/foo": {Data: []byte("Foo contents")}, "ignoredir/bar": {Data: []byte("Bar contents")}, @@ -53,22 +53,31 @@ Link to [one](./one.md) and also a full link [[./dir1/dir2/three]] and a [[./dir expected := metrics.Metrics{ NoteCount: 4, LinkCount: 8, + WordCount: 43, Notes: map[string]metrics.NoteMetrics{ - "zettel/one.md": { - Links: map[string]int{"./dir1/two.md": 2}, - LinkCount: 2, + "one": { + Links: map[string]uint{"two": 2}, + LinkCount: 2, + WordCount: 13, + BacklinkCount: 3, }, - "zettel/dir1/two.md": { - Links: map[string]int{"one": 1}, - LinkCount: 1, + "two": { + Links: map[string]uint{"one": 1}, + LinkCount: 1, + WordCount: 5, + BacklinkCount: 4, }, - "zettel/dir1/dir2/three.md": { - Links: map[string]int{"one": 1, "two": 1}, - LinkCount: 2, + "three": { + Links: map[string]uint{"one": 1, "two": 1}, + LinkCount: 2, + WordCount: 10, + BacklinkCount: 1, }, - "zettel/four.md": { - Links: map[string]int{"./one.md": 1, "./dir1/dir2/three": 1, "./dir1/two.md": 1}, - LinkCount: 3, + "four": { + Links: map[string]uint{"one": 1, "three": 1, "two": 1}, + LinkCount: 3, + WordCount: 15, + BacklinkCount: 0, }, }, } diff --git a/internal/collector/note.go b/internal/collector/note.go index bebfd64..2a66616 100644 --- a/internal/collector/note.go +++ b/internal/collector/note.go @@ -2,7 +2,10 @@ package collector import ( "log/slog" - "slices" + "net/url" + "path/filepath" + "strings" + "unicode" "github.com/luissimas/zettelkasten-exporter/internal/metrics" "github.com/yuin/goldmark" @@ -17,47 +20,78 @@ var md = goldmark.New( ), ) +// CollectNoteMetrics collects all note metrics from a note with the given `content`. func CollectNoteMetrics(content []byte) metrics.NoteMetrics { - links := collectLinks(content) - linkCount := 0 - for _, v := range links { - linkCount += v + noteMetrics := metrics.NoteMetrics{ + Links: make(map[string]uint), + LinkCount: 0, + WordCount: 0, + BacklinkCount: 0, } - return metrics.NoteMetrics{Links: links, LinkCount: linkCount} -} - -func collectLinks(content []byte) map[string]int { - linkKinds := []ast.NodeKind{ast.KindLink, wikilink.Kind} reader := text.NewReader(content) root := md.Parser().Parse(reader) - links := make(map[string]int) err := ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) { - if entering && slices.Contains(linkKinds, n.Kind()) { - var target string - switch v := n.(type) { - case *ast.Link: - target = string(v.Destination) - case *wikilink.Node: - if v.Embed { - return ast.WalkContinue, nil - } - target = string(v.Target) - default: - return ast.WalkContinue, nil - } - - // TODO: check if target is not a http link - v, ok := links[target] - if !ok { - links[target] = 0 - } - links[target] = v + 1 + if !entering { + return ast.WalkContinue, nil + } + + linkTarget := "" + + switch v := n.(type) { + case *ast.Link: + linkTarget = string(v.Destination) + case *wikilink.Node: + linkTarget = string(v.Target) + case *ast.Paragraph, *ast.ListItem: + text := string(n.Text(content)) + fields := strings.FieldsFunc(string(text), func(r rune) bool { return unicode.IsSpace(r) || r == '\n' }) + noteMetrics.WordCount += uint(len(fields)) + default: + return ast.WalkContinue, nil + } + + if !isNoteTarget(linkTarget) { + return ast.WalkContinue, nil + } + + targetName := nameFromFilename(linkTarget) + v, ok := noteMetrics.Links[targetName] + if !ok { + noteMetrics.Links[targetName] = 0 } + noteMetrics.Links[targetName] = v + 1 return ast.WalkContinue, nil }) if err != nil { slog.Error("Error walking note AST", slog.Any("error", err)) } - slog.Debug("Collected links", slog.Any("links", links)) - return links + for _, linkCount := range noteMetrics.Links { + noteMetrics.LinkCount += linkCount + } + return noteMetrics +} + +// isNoteTarget determines whether a link target points to a markdown note. +func isNoteTarget(target string) bool { + // Empty strings are not valid targets + if target == "" { + return false + } + + // Check if target is a URL + u, err := url.Parse(target) + isUrl := err == nil && u.Scheme != "" && u.Host != "" + if isUrl { + return false + } + + // Check if target is either a markdown file or has no extension + extension := filepath.Ext(target) + isNoteTarget := extension == "" || extension == ".md" + return isNoteTarget +} + +// nameFromFilename extracts the base note name from a full path. +func nameFromFilename(filename string) string { + return strings.TrimSuffix(filepath.Base(filename), filepath.Ext(filename)) } diff --git a/internal/collector/note_test.go b/internal/collector/note_test.go index 20f223a..f369edb 100644 --- a/internal/collector/note_test.go +++ b/internal/collector/note_test.go @@ -17,48 +17,107 @@ func TestCollectNoteMetrics(t *testing.T) { name: "empty file", content: "", expected: metrics.NoteMetrics{ - Links: map[string]int{}, - LinkCount: 0, + Links: map[string]uint{}, + LinkCount: 0, + WordCount: 0, + BacklinkCount: 0, }, }, { - name: "wiki links", - content: "[[Link]]aksdjf[[something|another]]\n[[link]]", + name: "wiki links", + content: ` +[[Link]] some words [[something|another]] + +another [[link]]`, expected: metrics.NoteMetrics{ - Links: map[string]int{"Link": 1, "something": 1, "link": 1}, - LinkCount: 3, + Links: map[string]uint{"Link": 1, "something": 1, "link": 1}, + LinkCount: 3, + WordCount: 6, + BacklinkCount: 0, }, }, { name: "markdown link", content: "[Link](target.md)", expected: metrics.NoteMetrics{ - Links: map[string]int{"target.md": 1}, - LinkCount: 1, + Links: map[string]uint{"target": 1}, + LinkCount: 1, + WordCount: 1, + BacklinkCount: 0, }, }, { - name: "mixed links", - content: "okok[Link](target.md)\n**ddk**[[linked]]`test`[[another|link]]\n\n[test](yet-another.md)", + name: "repeated links", + content: "[[target|link]] [link](target.md) [[link]]", expected: metrics.NoteMetrics{ - Links: map[string]int{"target.md": 1, "linked": 1, "another": 1, "yet-another.md": 1}, - LinkCount: 4, + Links: map[string]uint{"target": 2, "link": 1}, + LinkCount: 3, + WordCount: 3, + BacklinkCount: 0, }, }, { - name: "repeated links", - content: "[[target.md|link]]\n[link](target.md)\n[[link]]", + name: "ignore links to non markdown files", + content: "![[note.md]] [[test.pdf]] ![[target.png]] ![](another.jpeg) [[link]] [](link)", expected: metrics.NoteMetrics{ - Links: map[string]int{"target.md": 2, "link": 1}, - LinkCount: 3, + Links: map[string]uint{"link": 2, "note": 1}, + LinkCount: 3, + WordCount: 4, + BacklinkCount: 0, }, }, { - name: "ignore embeddedlinks", - content: "![[target.png]]\n!()[another.jpeg]\n[[link]]", + name: "ignore http links", + content: "[[one]] [this is an http link](https://go.dev/) [[not/an/http/link]]", + expected: metrics.NoteMetrics{ + Links: map[string]uint{"one": 1, "link": 1}, + LinkCount: 2, + WordCount: 7, + BacklinkCount: 0, + }, + }, + { + name: "mixed links", + content: ` +Ok [Link](target.md). + +Another paragraph **bold text** and [[linked]] /test/ [[another|link]]. + +> Quote in [test](yet-another.md) + +A list + +- One [[link-unordered.md]] +- Two + +Another list: + +1. First +2. Second [link](link-ordered.md)`, + expected: metrics.NoteMetrics{ + Links: map[string]uint{"target": 1, "linked": 1, "another": 1, "yet-another": 1, "link-unordered": 1, "link-ordered": 1}, + LinkCount: 6, + WordCount: 23, + BacklinkCount: 0, + }, + }, + { + name: "long note", + content: ` +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. + +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. + +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. + +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis. + +Lorem ipsum dolor sit amet, officia excepteur ex fugiat reprehenderit enim labore culpa sint ad nisi Lorem pariatur mollit ex esse exercitation amet. Nisi anim cupidatat excepteur officia. Reprehenderit nostrud nostrud ipsum Lorem est aliquip amet voluptate voluptate dolor minim nulla est proident. Nostrud officia pariatur ut officia. Sit irure elit esse ea nulla sunt ex occaecat reprehenderit commodo officia dolor Lorem duis laboris cupidatat officia voluptate. Culpa proident adipisicing id nulla nisi laboris ex in Lorem sunt duis officia eiusmod. Aliqua reprehenderit commodo ex non excepteur duis sunt velit enim. Voluptate laboris sint cupidatat ullamco ut ea consectetur et est culpa et culpa duis.`, expected: metrics.NoteMetrics{ - Links: map[string]int{"link": 1}, - LinkCount: 1, + Links: map[string]uint{}, + LinkCount: 0, + WordCount: 525, + BacklinkCount: 0, }, }, } @@ -66,7 +125,7 @@ func TestCollectNoteMetrics(t *testing.T) { for _, d := range data { t.Run(d.name, func(t *testing.T) { result := CollectNoteMetrics([]byte(d.content)) - assert.Equal(t, d.expected.Links, result.Links) + assert.Equal(t, d.expected, result) }) } } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index b0901f1..f6279b6 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -1,12 +1,15 @@ package metrics type Metrics struct { - NoteCount int - LinkCount int + NoteCount uint + LinkCount uint + WordCount uint Notes map[string]NoteMetrics } type NoteMetrics struct { - Links map[string]int - LinkCount int + Links map[string]uint + LinkCount uint + WordCount uint + BacklinkCount uint } diff --git a/internal/storage/fake.go b/internal/storage/fake.go index 92be232..8be9300 100644 --- a/internal/storage/fake.go +++ b/internal/storage/fake.go @@ -14,8 +14,7 @@ func NewFakeStorage() FakeStorage { return FakeStorage{} } -func (f FakeStorage) WriteMetric(noteName string, metric metrics.NoteMetrics, timestamp time.Time) { - +func (f FakeStorage) WriteMetrics(zettelkastenMetrics metrics.Metrics, timestamp time.Time) { } func (f FakeStorage) IsEmpty() bool { diff --git a/internal/storage/influxdb.go b/internal/storage/influxdb.go index d7c4cf2..777325c 100644 --- a/internal/storage/influxdb.go +++ b/internal/storage/influxdb.go @@ -5,11 +5,13 @@ import ( influxdb2 "github.com/influxdata/influxdb-client-go/v2" "github.com/influxdata/influxdb-client-go/v2/api" + "github.com/luissimas/zettelkasten-exporter/internal/metrics" ) -// The measurement name to be used for all metrics within the InfluxDB bucket. -const measurementName = "notes" +// The measurement names to be used for metrics within the InfluxDB bucket. +const notesMeasurementName = "notes" +const totalMeasurementName = "total" // InfluxDBStorage represents the implementation of a metric storage using InfluxDB. type InfluxDBStorage struct { @@ -25,12 +27,33 @@ func NewInfluxDBStorage(url, org, bucket, token string) InfluxDBStorage { return InfluxDBStorage{writeAPI: writeAPI, queryAPI: queryAPI} } -func (i InfluxDBStorage) WriteMetric(noteName string, metric metrics.NoteMetrics, timestamp time.Time) { +// WriteMetric writes `metric` for `noteName` to the storage with `timestamp`. +func (i InfluxDBStorage) WriteMetrics(zettelkastenMetrics metrics.Metrics, timestamp time.Time) { + // Aggregated metrics point := influxdb2.NewPoint( - measurementName, - map[string]string{"name": noteName}, - map[string]interface{}{"link_count": metric.LinkCount}, + totalMeasurementName, + map[string]string{}, + map[string]interface{}{ + "note_count": zettelkastenMetrics.NoteCount, + "link_count": zettelkastenMetrics.LinkCount, + "word_count": zettelkastenMetrics.WordCount, + }, timestamp, ) i.writeAPI.WritePoint(point) + + // Individual note metrics + for name, metric := range zettelkastenMetrics.Notes { + point := influxdb2.NewPoint( + notesMeasurementName, + map[string]string{"name": name}, + map[string]interface{}{ + "link_count": metric.LinkCount, + "word_count": metric.WordCount, + "backlink_count": metric.BacklinkCount, + }, + timestamp, + ) + i.writeAPI.WritePoint(point) + } } diff --git a/internal/storage/storage.go b/internal/storage/storage.go index 60d5395..7861ec9 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -8,6 +8,6 @@ import ( // Storage represents a storage for metrics. type Storage interface { - // WriteMetric writes the note metric to the storage. - WriteMetric(noteName string, metric metrics.NoteMetrics, timestamp time.Time) + // WriteMetric writes the `zettelkastenMetrics` to the storage. + WriteMetrics(zettelkastenMetrics metrics.Metrics, timestamp time.Time) } diff --git a/internal/zettelkasten/git.go b/internal/zettelkasten/git.go index ccf1d1f..a380fc9 100644 --- a/internal/zettelkasten/git.go +++ b/internal/zettelkasten/git.go @@ -30,8 +30,7 @@ func (g GitZettelkasten) GetRoot() fs.FS { return os.DirFS(g.rootPath) } -// Ensure makes sure that the git repository is valid and updated with the -// latest changes from the remote. +// Ensure makes sure that the git repository is valid and updated with the latest changes from the remote. func (g GitZettelkasten) Ensure() error { f, err := os.Stat(g.rootPath) if errors.Is(err, fs.ErrNotExist) {