Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: file/dir watching #4

Merged
merged 6 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/go-test.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
name: Go

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Oolong looks for a configuration file at `~/.oolong.json`
3
],
"noteDirectories": [
"~/notes",
"~/notes"
],
"allowedExtensions": [
".md",
Expand Down
17 changes: 17 additions & 0 deletions examples/oolong.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"ngramRange": [
1,
2,
3
],
"noteDirectories": [],
"allowedExtensions": [
".md",
".mdx",
".tex",
".typ"
],
"pluginPaths": [
"./scripts/daily_note.lua"
]
}
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ require (
github.com/aaaton/golem/v4 v4.0.0
github.com/aaaton/golem/v4/dicts/en v1.0.1
)

require (
github.com/fsnotify/fsnotify v1.8.0
golang.org/x/sys v0.13.0 // indirect
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,9 @@ github.com/aaaton/golem/v4 v4.0.0 h1:YHieBS+5Fqir298nJ7fk3EvMcKM/+T5gpMRt4TIAiZ8
github.com/aaaton/golem/v4 v4.0.0/go.mod h1:OfK/S5v9Exsx1yO21WorREuIVV+Y5K2hygP0A9oJCCI=
github.com/aaaton/golem/v4/dicts/en v1.0.1 h1:/BsOsh8JTgTkuevwM9axPnAi9CD4rK7TWHNdW/6V3Uo=
github.com/aaaton/golem/v4/dicts/en v1.0.1/go.mod h1:1YKRrQNng+KbS+peA7sj3TIa8eqR6T2UqdJ+Tc9xeoA=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10 changes: 6 additions & 4 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@ type OolongConfig struct {
NGramRange []int `json:"ngramRange"`
AllowedExtensions []string `json:"allowedExtensions"`
PluginPaths []string `json:"pluginPaths"`
IgnoreDirectories []string `json:"ignoredDirectories"`
}

func Config() OolongConfig { return config }

func NotesDirPaths() []string { return config.NotesDirPaths }
func NGramRange() []int { return config.NGramRange }
func AllowedExtensions() []string { return config.AllowedExtensions }
func PluginPaths() []string { return config.PluginPaths }
func NotesDirPaths() []string { return config.NotesDirPaths }
func NGramRange() []int { return config.NGramRange }
func AllowedExtensions() []string { return config.AllowedExtensions }
func PluginPaths() []string { return config.PluginPaths }
func IgnoredDirectories() []string { return config.IgnoreDirectories }

// TODO: file watcher for config file, reload on change

Expand Down
11 changes: 11 additions & 0 deletions internal/daemon/daemon.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package daemon

import "github.com/oolong-sh/oolong/internal/config"

// Launch perpetually running watchers and run application in the background as a daemon
func Run() {
go runNotesDirsWatcher(config.NotesDirPaths()...)

// run forever
<-make(chan struct{})
}
83 changes: 83 additions & 0 deletions internal/daemon/watcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package daemon

import (
"errors"
"io/fs"
"log"
"path/filepath"
"slices"
"time"

"github.com/fsnotify/fsnotify"
"github.com/oolong-sh/oolong/internal/config"
"github.com/oolong-sh/oolong/internal/documents"
)

// Initialize and run file update watcher for notes directories
func runNotesDirsWatcher(dirs ...string) error {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return err
}
defer watcher.Close()

dirIgnores := config.IgnoredDirectories()

for _, dir := range dirs {
// TODO: add oolong ignore system to blacklist certain subdirs/files
if err = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if !d.IsDir() {
return nil
}

// NOTE: this may not be the exact desired behavior for ignores
// - this logic also needs to be replicated in the document reader
if slices.Contains(dirIgnores, filepath.Base(path)) {
return filepath.SkipDir
}

// TEST: this may need to add path as absolute to get correct results
err = watcher.Add(path)
if err != nil {
return err
}
log.Println("Added watcher on", path)

return nil
}); err != nil {
return err
}
}

// watcher handler
// go func() { // running entire function as a goroutine, handler doesn't need to be one
for {
select {
case event, ok := <-watcher.Events:
if !ok {
log.Println("Watcher event channel returned bad result.")
return errors.New("Invalid watcher errors channel value.")
}
// log.Println("Event:", event)

if event.Has(fsnotify.Write) {
log.Println("Modified file:", event.Name)

// write event is sent on write start, wait 500ms for write to finish
time.Sleep(500)

// re-read document
documents.ReadDocuments(event.Name)
// TODO: add dedup timer to prevent multi-write calls
}
case err, ok := <-watcher.Errors:
if !ok {
return errors.New("Invalid watcher errors channel value.")
}
log.Println("error:", err)
}
}
// }()
// <-make(chan struct{})
// return nil
}
146 changes: 57 additions & 89 deletions internal/documents/corpus.go
Original file line number Diff line number Diff line change
@@ -1,122 +1,90 @@
package documents

import (
"fmt"
"io/fs"
"os"
"log"
"path/filepath"
"slices"
"sync"

"github.com/oolong-sh/oolong/internal/config"
"github.com/oolong-sh/oolong/internal/linking/lexer"
"github.com/oolong-sh/oolong/internal/linking/ngrams"
)

// Read, lex, and extract NGrams for all documents in notes directories specified in config file
func ReadNotesDirs() ([]*Document, error) {
documents := []*Document{}
// DOC: meant to be called with watcher
// assumes paths should not be ignored (should be safe assumption due to watcher ignores)
func ReadDocuments(paths ...string) error {
// read all input files, update state with documents
docs := readHandler(paths...)

// merge ngram maps and calculate weights
err := updateState(docs)
if err != nil {
return err
}

for _, notesDirPath := range config.NotesDirPaths() {
// TODO: all weights change, but may not need to be recalculated every time

return nil
}

// Read, lex, and extract NGrams for all documents in notes directories specified in config file
func ReadNotesDirs() error {
docs := []*Document{}
for _, dir := range config.NotesDirPaths() {
// extract all note file paths from notes directory
notePaths := []string{}
if err := filepath.WalkDir(notesDirPath, func(path string, d fs.DirEntry, err error) error {
paths := []string{}
// TODO: add oolong ignore system to blacklist certain subdirs/files
if err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if d.IsDir() {
if slices.Contains(config.IgnoredDirectories(), filepath.Base(path)) {
return filepath.SkipDir
}
return nil
}

if slices.Contains(config.AllowedExtensions(), filepath.Ext(path)) {
notePaths = append(notePaths, path)
paths = append(paths, path)
}

return nil
}); err != nil {
return nil, err
return err
}

// perform a parallel read of found notes files
var wg sync.WaitGroup
wg.Add(len(notePaths))
docs := make([]*Document, len(notePaths))

for i, notePath := range notePaths {
go func(i int, notePath string) {
doc, err := ReadDocument(notePath)
if err != nil {
fmt.Printf("Failed to read file: '%s' %v", notePath, err)
return
}
docs[i] = doc
wg.Done()
}(i, notePath)
}

wg.Wait()

// append results to output array
documents = append(documents, docs...)
// read all documents and append results
docs = append(docs, readHandler(paths...)...)
}

//
// TEST: for debugging, remove later
//
// write out tokens
b := []byte{}
for _, d := range documents {
for _, t := range d.tokens {
if t.Value == lexer.BreakToken {
continue
}
b = append(b, []byte(fmt.Sprintf("%s, %s, %d\n", t.Lemma, t.Value, t.Zone))...)
}
}
err := os.WriteFile("./tokens.txt", b, 0666)
// merge maps and calculate weights
err := updateState(docs)
if err != nil {
panic(err)
return err
}

b = []byte{}
b = append(b, []byte("ngram,weight,count\n")...)
ngmap := make(map[string]*ngrams.NGram)
for _, d := range documents {
ngrams.Merge(ngmap, d.ngrams)
}
ngrams.CalcWeights(ngmap, len(documents))
for _, d := range documents {
for _, ng := range d.ngrams {
b = append(b, []byte(fmt.Sprintf("%s, %f, %d\n", ng.Keyword(), ng.Weight(), ng.Count()))...)
}
}
err = os.WriteFile("./ngrams.txt", b, 0666)
if err != nil {
panic(err)
}
b = []byte{}
b = append(b, []byte("ngram,weight,count,ndocs\n")...)
mng := ngrams.FilterMeaningfulNGrams(ngmap, 2, int(float64(len(documents))/1.5), 4.0)
for _, s := range mng {
b = append(b, []byte(fmt.Sprintf("%s,%f,%d,%d\n", s, ngmap[s].Weight(), ngmap[s].Count(), len(ngmap[s].Documents())))...)
}
err = os.WriteFile("./meaningful-ngrams.csv", b, 0666)
if err != nil {
panic(err)
}
// ngrams.CosineSimilarity(ngmap)
return nil
}

// ngcounts := ngrams.Count(ngmap)
// freq := ngrams.OrderByFrequency(ngcounts, 10)
freq := ngrams.OrderByFrequency(ngmap)
b = []byte{}
for _, v := range freq {
b = append(b, []byte(fmt.Sprintf("%s %f\n", v.Key, v.Value))...)
}
err = os.WriteFile("./ngram-counts.txt", b, 0666)
if err != nil {
panic(err)
// DOC:
func readHandler(paths ...string) []*Document {
docs := make([]*Document, len(paths))
var wg sync.WaitGroup

// perform a parallel read of found notes files
wg.Add(len(paths))
for i, p := range paths {
go func(i int, notePath string) {
doc, err := readDocumentByFile(notePath)
if err != nil {
log.Printf("Failed to read file: '%s' %v", notePath, err)
return
}
// TODO: this could be changed to use channels
docs[i] = doc
wg.Done()
}(i, p)
}
//
// TEST: for debugging, remove later
//
wg.Wait()

return documents, nil
// append results to output array
return docs
}
8 changes: 4 additions & 4 deletions internal/documents/document.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package documents

import (
"fmt"
"io"
"log"
"os"

"github.com/oolong-sh/oolong/internal/config"
Expand All @@ -25,7 +25,7 @@ func (d *Document) KeywordWeights() map[string]float64 { return d.ngwgts }

// Read in a single document file, lex, and generate NGrams
// Wraps readDocument for explicit use with files
func ReadDocument(documentPath string) (*Document, error) {
func readDocumentByFile(documentPath string) (*Document, error) {
f, err := os.Open(documentPath)
if err != nil {
return nil, err
Expand All @@ -44,15 +44,15 @@ func ReadDocument(documentPath string) (*Document, error) {
// internal reader function that allows usage of io readers for generalized use
func readDocument(r io.Reader, documentPath string) (*Document, error) {
l := lexer.New()
fmt.Printf("Running lexer on %s...\n", documentPath)
log.Printf("Running lexer on %s...\n", documentPath)
l.Lex(r)

doc := &Document{
path: documentPath,
tokens: l.Output,
}

fmt.Printf("Generating NGrams for %s...\n", documentPath)
log.Printf("Generating NGrams for %s...\n", documentPath)
doc.ngrams = ngrams.Generate(doc.tokens, config.NGramRange(), doc.path)

// FIX: weight setting must occur after document NGRam maps are merged
Expand Down
Loading
Loading