diff --git a/Makefile b/Makefile index 8c8f2da..0da5ecc 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ $(eval isGuix := $(shell command -v guix > /dev/null 2>&1 && echo t || echo f)) $(eval destDir := $(shell [ "${isGuix}" = t ] && echo $${dotf}/bin || echo ~/bin)) orgRoamLink := ${HOME}/org-roam -all: show-environment clean install-deps +all: show-environment clean install-deps test [ ! -L "${orgRoamLink}" ] && ln -s ${dev}/notes/notes "${orgRoamLink}" || : [ ! -d ${destDir} ] && mkdir ${destDir} || : raco exe -o ${destDir}/search-notes main.rkt @@ -32,3 +32,5 @@ install-deps: clean: rm -rf ./compiled/ ./scribblings/compiled/ +test: + raco test ./ diff --git a/README.md b/README.md index 6d7984b..2443446 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ Installation: run `make`. See also Makefile. # Fix the 'loading code: version mismatch' error rm -rf ./compiled/ ./scribblings/compiled/ raco pkg install --auto ansi-color +# raco test ./ # optionally isGuix=$(command -v guix > /dev/null 2>&1 && echo t || echo f) [ ${isGuix} = t ] && destDir=$dotf/bin || destDir=~/bin [ ! -d ${destDir} ] && mkdir $destDir || : diff --git a/main.rkt b/main.rkt index 7069c97..e79119b 100644 --- a/main.rkt +++ b/main.rkt @@ -3,10 +3,6 @@ ;; TODO try #lang hacket - haskell + racket ;; https://lexi-lambda.github.io/hackett/index.html -(module+ test - (require rackunit - racket/match)) - ;; Notice ;; To install (from within the package directory): ;; $ raco pkg install @@ -27,28 +23,88 @@ ;; See the current version of the racket style guide here: ;; http://docs.racket-lang.org/style/index.html -;; Code here - - - -(module+ test - ;; Any code in this `test` submodule runs when this file is run using DrRacket - ;; or with `raco test`. The code here does not run when this file is - ;; required by another module. - - (check-equal? (+ 2 2) 4)) - (module+ main ;; (Optional) main submodule. Put code here if you need it to be executed when ;; this file is run using DrRacket or the `racket` executable. The code here ;; does not run when this file is required by another module. Documentation: ;; http://docs.racket-lang.org/guide/Module_Syntax.html#%28part._main-and-test%29 + (provide regexp-normalize-match* regexp-normalize-split) + (require ;; (prefix-in com: "common.rkt") "notes.rkt" ;; is used indeed "notes-reader.rkt" - ansi-color) + ansi-color + + ;; for string-replace + racket/string + ) + + (define diacritic-map + (hash "a" "[aáäàâæ]" + "c" "[cčç]" + "d" "[dď]" + "e" "[eéèêë]" + "i" "[iíîï]" + "l" "[lĺľ]" + "n" "[nň]" + "o" "[oóôöœ]" + "r" "[rŕř]" + "s" "[sš]" + "t" "[tť]" + "u" "[uúûüù]" + "y" "[yý]" + "z" "[zž]" + "A" "[AÁÄÀÂÆ]" + "C" "[CČÇ]" + "D" "[DĎ]" + "E" "[EÉÈÊË]" + "I" "[IÍÎÏ]" + "L" "[LĹĽ]" + "N" "[NŇ]" + "O" "[OÓÔÖŒ]" + "R" "[RŔŘ]" + "S" "[SŠ]" + "T" "[TŤ]" + "U" "[UÚÛÜÙ]" + "Y" "[YÝ]" + "Z" "[ZŽ]" + "ß" "ß")) ; German sharp S + + (define (string-normalize s) + ;; Normalization Form C, Canonical Decomposition followed by Canonical + ;; Composition: + ;; Decompose characters and then recomposes them using canonical + ;; equivalence. E.g., 'é' would first be split into 'e' and the combining + ;; accent, and then recomposed back into 'é'. + ;; Use this when you want to normalize characters to their composed forms + ;; while still respecting canonical equivalence. + (string-normalize-nfc s)) + + (define (regexp-normalize-match* rxs target-str) + (let* ((normalized-target (string-normalize target-str)) + (modified-regex + (string-append* + (map (lambda (char) + (hash-ref diacritic-map (string char) (string char))) + (string->list rxs))))) + ;; (printf "modified-regex: ~a\n" modified-regex) + ;; (printf "target-str: ~a\n" target-str) + ;; (printf "normalized-target: ~a\n" normalized-target) + (regexp-match* (regexp modified-regex) normalized-target))) + + (define (regexp-normalize-split rxs target-str) + (let* ((normalized-target (string-normalize target-str)) + (modified-regex + (string-append* + (map (lambda (char) + (hash-ref diacritic-map (string char) (string char))) + (string->list rxs))))) + ;; (printf "modified-regex: ~a\n" modified-regex) + ;; (printf "target-str: ~a\n" target-str) + ;; (printf "normalized-target: ~a\n" normalized-target) + (regexp-split (regexp modified-regex) normalized-target))) (define pattern-param (make-parameter "")) (define filepaths-param (make-parameter "")) @@ -164,10 +220,10 @@ racket main.rkt -e \"/home/bost/der/search-notes/main.rkt /home/bost/der/search- (displayln first-file-string)) (colorize colorize-matches? display-fn - (regexp-split regexp-split-match - relevant-file-strings-joined) - (regexp-match* regexp-split-match - relevant-file-strings-joined)) + (regexp-normalize-split + regexp-split-match relevant-file-strings-joined) + (regexp-normalize-match* + regexp-split-match relevant-file-strings-joined)) (printf "\n\n"))) relevant-file-strings))) (curry map diff --git a/scribblings/search-notes.scrbl b/scribblings/search-notes.scrbl index 41fb30e..55b6166 100644 --- a/scribblings/search-notes.scrbl +++ b/scribblings/search-notes.scrbl @@ -1,5 +1,6 @@ #lang scribble/manual -@require[@for-label[search-notes +@require[@for-label[ + ;; search-notes racket/base]] @title{search-notes} diff --git a/test.rkt b/test.rkt new file mode 100644 index 0000000..a1749bd --- /dev/null +++ b/test.rkt @@ -0,0 +1,31 @@ +#lang racket + +(module+ test + (require + ;; main + rackunit + racket/match + "main.rkt" + (submod "main.rkt" main) + ) + + ;; Any code in this `test` submodule runs when this file is run using DrRacket + ;; or with `raco test`. The code here does not run when this file is + ;; required by another module. + + ;; (test-case "Test for add function" + ;; (check-equal? (+ 2 2) 4)) + + (test-case "Test diacritics" + (define sdiacr "jkl \n abčd \n xyz \n 123 \n ábc \n 567") + (define sd sdiacr) + (define splain "jkl \n abcd \n xyz \n 123 \n abc \n 567") + (define sp splain) + (define rxs "abc") + + (check-equal? (length (regexp-normalize-match* rxs sdiacr)) + (length (regexp-match* rxs splain))) + (check-equal? (regexp-normalize-split rxs sdiacr) + (regexp-split rxs splain)) + (check-equal? (regexp-normalize-split rxs splain) + (regexp-split rxs splain))))