Skip to content

Commit

Permalink
Merge pull request #4 from oolong-sh/patrick-dev
Browse files Browse the repository at this point in the history
feat: file/dir watching
  • Loading branch information
ptdewey authored Nov 6, 2024
2 parents 811217d + 9b68499 commit bd60a5a
Show file tree
Hide file tree
Showing 20 changed files with 337 additions and 175 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/go-test.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
name: Go

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Oolong looks for a configuration file at `~/.oolong.json`
3
],
"noteDirectories": [
"~/notes",
"~/notes"
],
"allowedExtensions": [
".md",
Expand Down
17 changes: 17 additions & 0 deletions examples/oolong.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"ngramRange": [
1,
2,
3
],
"noteDirectories": [],
"allowedExtensions": [
".md",
".mdx",
".tex",
".typ"
],
"pluginPaths": [
"./scripts/daily_note.lua"
]
}
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,8 @@ require (
github.com/aaaton/golem/v4 v4.0.0
github.com/aaaton/golem/v4/dicts/en v1.0.1
)

require (
github.com/fsnotify/fsnotify v1.8.0
golang.org/x/sys v0.13.0 // indirect
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,9 @@ github.com/aaaton/golem/v4 v4.0.0 h1:YHieBS+5Fqir298nJ7fk3EvMcKM/+T5gpMRt4TIAiZ8
github.com/aaaton/golem/v4 v4.0.0/go.mod h1:OfK/S5v9Exsx1yO21WorREuIVV+Y5K2hygP0A9oJCCI=
github.com/aaaton/golem/v4/dicts/en v1.0.1 h1:/BsOsh8JTgTkuevwM9axPnAi9CD4rK7TWHNdW/6V3Uo=
github.com/aaaton/golem/v4/dicts/en v1.0.1/go.mod h1:1YKRrQNng+KbS+peA7sj3TIa8eqR6T2UqdJ+Tc9xeoA=
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
10 changes: 6 additions & 4 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@ type OolongConfig struct {
NGramRange []int `json:"ngramRange"`
AllowedExtensions []string `json:"allowedExtensions"`
PluginPaths []string `json:"pluginPaths"`
IgnoreDirectories []string `json:"ignoredDirectories"`
}

func Config() OolongConfig { return config }

func NotesDirPaths() []string { return config.NotesDirPaths }
func NGramRange() []int { return config.NGramRange }
func AllowedExtensions() []string { return config.AllowedExtensions }
func PluginPaths() []string { return config.PluginPaths }
func NotesDirPaths() []string { return config.NotesDirPaths }
func NGramRange() []int { return config.NGramRange }
func AllowedExtensions() []string { return config.AllowedExtensions }
func PluginPaths() []string { return config.PluginPaths }
func IgnoredDirectories() []string { return config.IgnoreDirectories }

// TODO: file watcher for config file, reload on change

Expand Down
11 changes: 11 additions & 0 deletions internal/daemon/daemon.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package daemon

import "github.com/oolong-sh/oolong/internal/config"

// Launch perpetually running watchers and run application in the background as a daemon
func Run() {
go runNotesDirsWatcher(config.NotesDirPaths()...)

// run forever
<-make(chan struct{})
}
83 changes: 83 additions & 0 deletions internal/daemon/watcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package daemon

import (
"errors"
"io/fs"
"log"
"path/filepath"
"slices"
"time"

"github.com/fsnotify/fsnotify"
"github.com/oolong-sh/oolong/internal/config"
"github.com/oolong-sh/oolong/internal/documents"
)

// Initialize and run file update watcher for notes directories
func runNotesDirsWatcher(dirs ...string) error {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return err
}
defer watcher.Close()

dirIgnores := config.IgnoredDirectories()

for _, dir := range dirs {
// TODO: add oolong ignore system to blacklist certain subdirs/files
if err = filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if !d.IsDir() {
return nil
}

// NOTE: this may not be the exact desired behavior for ignores
// - this logic also needs to be replicated in the document reader
if slices.Contains(dirIgnores, filepath.Base(path)) {
return filepath.SkipDir
}

// TEST: this may need to add path as absolute to get correct results
err = watcher.Add(path)
if err != nil {
return err
}
log.Println("Added watcher on", path)

return nil
}); err != nil {
return err
}
}

// watcher handler
// go func() { // running entire function as a goroutine, handler doesn't need to be one
for {
select {
case event, ok := <-watcher.Events:
if !ok {
log.Println("Watcher event channel returned bad result.")
return errors.New("Invalid watcher errors channel value.")
}
// log.Println("Event:", event)

if event.Has(fsnotify.Write) {
log.Println("Modified file:", event.Name)

// write event is sent on write start, wait 500ms for write to finish
time.Sleep(500)

// re-read document
documents.ReadDocuments(event.Name)
// TODO: add dedup timer to prevent multi-write calls
}
case err, ok := <-watcher.Errors:
if !ok {
return errors.New("Invalid watcher errors channel value.")
}
log.Println("error:", err)
}
}
// }()
// <-make(chan struct{})
// return nil
}
146 changes: 57 additions & 89 deletions internal/documents/corpus.go
Original file line number Diff line number Diff line change
@@ -1,122 +1,90 @@
package documents

import (
"fmt"
"io/fs"
"os"
"log"
"path/filepath"
"slices"
"sync"

"github.com/oolong-sh/oolong/internal/config"
"github.com/oolong-sh/oolong/internal/linking/lexer"
"github.com/oolong-sh/oolong/internal/linking/ngrams"
)

// Read, lex, and extract NGrams for all documents in notes directories specified in config file
func ReadNotesDirs() ([]*Document, error) {
documents := []*Document{}
// DOC: meant to be called with watcher
// assumes paths should not be ignored (should be safe assumption due to watcher ignores)
func ReadDocuments(paths ...string) error {
// read all input files, update state with documents
docs := readHandler(paths...)

// merge ngram maps and calculate weights
err := updateState(docs)
if err != nil {
return err
}

for _, notesDirPath := range config.NotesDirPaths() {
// TODO: all weights change, but may not need to be recalculated every time

return nil
}

// Read, lex, and extract NGrams for all documents in notes directories specified in config file
func ReadNotesDirs() error {
docs := []*Document{}
for _, dir := range config.NotesDirPaths() {
// extract all note file paths from notes directory
notePaths := []string{}
if err := filepath.WalkDir(notesDirPath, func(path string, d fs.DirEntry, err error) error {
paths := []string{}
// TODO: add oolong ignore system to blacklist certain subdirs/files
if err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if d.IsDir() {
if slices.Contains(config.IgnoredDirectories(), filepath.Base(path)) {
return filepath.SkipDir
}
return nil
}

if slices.Contains(config.AllowedExtensions(), filepath.Ext(path)) {
notePaths = append(notePaths, path)
paths = append(paths, path)
}

return nil
}); err != nil {
return nil, err
return err
}

// perform a parallel read of found notes files
var wg sync.WaitGroup
wg.Add(len(notePaths))
docs := make([]*Document, len(notePaths))

for i, notePath := range notePaths {
go func(i int, notePath string) {
doc, err := ReadDocument(notePath)
if err != nil {
fmt.Printf("Failed to read file: '%s' %v", notePath, err)
return
}
docs[i] = doc
wg.Done()
}(i, notePath)
}

wg.Wait()

// append results to output array
documents = append(documents, docs...)
// read all documents and append results
docs = append(docs, readHandler(paths...)...)
}

//
// TEST: for debugging, remove later
//
// write out tokens
b := []byte{}
for _, d := range documents {
for _, t := range d.tokens {
if t.Value == lexer.BreakToken {
continue
}
b = append(b, []byte(fmt.Sprintf("%s, %s, %d\n", t.Lemma, t.Value, t.Zone))...)
}
}
err := os.WriteFile("./tokens.txt", b, 0666)
// merge maps and calculate weights
err := updateState(docs)
if err != nil {
panic(err)
return err
}

b = []byte{}
b = append(b, []byte("ngram,weight,count\n")...)
ngmap := make(map[string]*ngrams.NGram)
for _, d := range documents {
ngrams.Merge(ngmap, d.ngrams)
}
ngrams.CalcWeights(ngmap, len(documents))
for _, d := range documents {
for _, ng := range d.ngrams {
b = append(b, []byte(fmt.Sprintf("%s, %f, %d\n", ng.Keyword(), ng.Weight(), ng.Count()))...)
}
}
err = os.WriteFile("./ngrams.txt", b, 0666)
if err != nil {
panic(err)
}
b = []byte{}
b = append(b, []byte("ngram,weight,count,ndocs\n")...)
mng := ngrams.FilterMeaningfulNGrams(ngmap, 2, int(float64(len(documents))/1.5), 4.0)
for _, s := range mng {
b = append(b, []byte(fmt.Sprintf("%s,%f,%d,%d\n", s, ngmap[s].Weight(), ngmap[s].Count(), len(ngmap[s].Documents())))...)
}
err = os.WriteFile("./meaningful-ngrams.csv", b, 0666)
if err != nil {
panic(err)
}
// ngrams.CosineSimilarity(ngmap)
return nil
}

// ngcounts := ngrams.Count(ngmap)
// freq := ngrams.OrderByFrequency(ngcounts, 10)
freq := ngrams.OrderByFrequency(ngmap)
b = []byte{}
for _, v := range freq {
b = append(b, []byte(fmt.Sprintf("%s %f\n", v.Key, v.Value))...)
}
err = os.WriteFile("./ngram-counts.txt", b, 0666)
if err != nil {
panic(err)
// DOC:
func readHandler(paths ...string) []*Document {
docs := make([]*Document, len(paths))
var wg sync.WaitGroup

// perform a parallel read of found notes files
wg.Add(len(paths))
for i, p := range paths {
go func(i int, notePath string) {
doc, err := readDocumentByFile(notePath)
if err != nil {
log.Printf("Failed to read file: '%s' %v", notePath, err)
return
}
// TODO: this could be changed to use channels
docs[i] = doc
wg.Done()
}(i, p)
}
//
// TEST: for debugging, remove later
//
wg.Wait()

return documents, nil
// append results to output array
return docs
}
8 changes: 4 additions & 4 deletions internal/documents/document.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package documents

import (
"fmt"
"io"
"log"
"os"

"github.com/oolong-sh/oolong/internal/config"
Expand All @@ -25,7 +25,7 @@ func (d *Document) KeywordWeights() map[string]float64 { return d.ngwgts }

// Read in a single document file, lex, and generate NGrams
// Wraps readDocument for explicit use with files
func ReadDocument(documentPath string) (*Document, error) {
func readDocumentByFile(documentPath string) (*Document, error) {
f, err := os.Open(documentPath)
if err != nil {
return nil, err
Expand All @@ -44,15 +44,15 @@ func ReadDocument(documentPath string) (*Document, error) {
// internal reader function that allows usage of io readers for generalized use
func readDocument(r io.Reader, documentPath string) (*Document, error) {
l := lexer.New()
fmt.Printf("Running lexer on %s...\n", documentPath)
log.Printf("Running lexer on %s...\n", documentPath)
l.Lex(r)

doc := &Document{
path: documentPath,
tokens: l.Output,
}

fmt.Printf("Generating NGrams for %s...\n", documentPath)
log.Printf("Generating NGrams for %s...\n", documentPath)
doc.ngrams = ngrams.Generate(doc.tokens, config.NGramRange(), doc.path)

// FIX: weight setting must occur after document NGRam maps are merged
Expand Down
Loading

0 comments on commit bd60a5a

Please sign in to comment.