Skip to content

Commit

Permalink
The search ignores diacritics
Browse files Browse the repository at this point in the history
  • Loading branch information
BostX committed Jan 28, 2024
1 parent 38b9a17 commit 94b068b
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 30 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ $(eval isGuix := $(shell command -v guix > /dev/null 2>&1 && echo t || echo f))
$(eval destDir := $(shell [ "${isGuix}" = t ] && echo $${dotf}/bin || echo ~/bin))
orgRoamLink := ${HOME}/org-roam

all: show-environment clean install-deps
all: show-environment clean install-deps test
[ ! -L "${orgRoamLink}" ] && ln -s ${dev}/notes/notes "${orgRoamLink}" || :
[ ! -d ${destDir} ] && mkdir ${destDir} || :
raco exe -o ${destDir}/search-notes main.rkt
Expand All @@ -32,3 +32,5 @@ install-deps:
clean:
rm -rf ./compiled/ ./scribblings/compiled/

test:
raco test ./
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Installation: run `make`. See also Makefile.
# Fix the 'loading code: version mismatch' error
rm -rf ./compiled/ ./scribblings/compiled/
raco pkg install --auto ansi-color
# raco test ./ # optionally
isGuix=$(command -v guix > /dev/null 2>&1 && echo t || echo f)
[ ${isGuix} = t ] && destDir=$dotf/bin || destDir=~/bin
[ ! -d ${destDir} ] && mkdir $destDir || :
Expand Down
136 changes: 110 additions & 26 deletions main.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
;; TODO try #lang hacket - haskell + racket
;; https://lexi-lambda.github.io/hackett/index.html

(module+ test
(require rackunit
racket/match))

;; Notice
;; To install (from within the package directory):
;; $ raco pkg install
Expand All @@ -27,28 +23,82 @@
;; See the current version of the racket style guide here:
;; http://docs.racket-lang.org/style/index.html

;; Code here



(module+ test
;; Any code in this `test` submodule runs when this file is run using DrRacket
;; or with `raco test`. The code here does not run when this file is
;; required by another module.

(check-equal? (+ 2 2) 4))

(module+ main
;; (Optional) main submodule. Put code here if you need it to be executed when
;; this file is run using DrRacket or the `racket` executable. The code here
;; does not run when this file is required by another module. Documentation:
;; http://docs.racket-lang.org/guide/Module_Syntax.html#%28part._main-and-test%29

(provide
create-pattern-with-diacritics
create-regexp-split-match
regexp-normalize-match*
regexp-normalize-split)

(require
;; (prefix-in com: "common.rkt")
"notes.rkt" ;; is used indeed
"notes-reader.rkt"
ansi-color)
ansi-color

;; for string-replace
racket/string
)

(define diacritic-map
(hash "a" "[aáäàâæ]"
"c" "[cčç]"
"d" "[dď]"
"e" "[eéèêë]"
"i" "[iíîï]"
"l" "[lĺľ]"
"n" "[nň]"
"o" "[oóôöœ]"
"r" "[rŕř]"
"s" "[sš]"
"t" "[tť]"
"u" "[uúûüù]"
"y" "[yý]"
"z" "[zž]"
"A" "[AÁÄÀÂÆ]"
"C" "[CČÇ]"
"D" "[DĎ]"
"E" "[EÉÈÊË]"
"I" "[IÍÎÏ]"
"L" "[LĹĽ]"
"N" "[NŇ]"
"O" "[OÓÔÖŒ]"
"R" "[RŔŘ]"
"S" "[SŠ]"
"T" "[TŤ]"
"U" "[UÚÛÜÙ]"
"Y" "[YÝ]"
"Z" "[ZŽ]"
"ß" "ß")) ; German sharp S

(define (string-normalize s)
;; Normalization Form C, Canonical Decomposition followed by Canonical
;; Composition:
;; Decompose characters and then recomposes them using canonical
;; equivalence. E.g., 'é' would first be split into 'e' and the combining
;; accent, and then recomposed back into 'é'.
;; Use this when you want to normalize characters to their composed forms
;; while still respecting canonical equivalence.
(string-normalize-nfc s))

(define (regexp-normalize-match* regex target-str)
;; (printf "[regexp-normalize-match*] regex: ~a\n" regex)
;; (printf "[regexp-normalize-match*] target-str : ~a\n" target-str)
(let* ((normalized-target (string-normalize target-str)))
;; (printf "[regexp-normalize-match*] normalized-target: ~a\n" normalized-target)
(regexp-match* regex normalized-target)))

(define (regexp-normalize-split regex target-str)
;; (printf "[regexp-normalize-split] regex: ~a\n" regex)
;; (printf "[regexp-normalize-split] target-str : ~a\n" target-str)
(let* ((normalized-target (string-normalize target-str)))
;; (printf "[regexp-normalize-split] normalized-target: ~a\n" normalized-target)
(regexp-split regex normalized-target)))

(define pattern-param (make-parameter ""))
(define filepaths-param (make-parameter ""))
Expand Down Expand Up @@ -81,6 +131,8 @@ racket main.rkt -n -p title
racket main.rkt -np title
racket main.rkt -p rackjure
racket main.rkt -e ./main.rkt -p [[:blank:]]install([[:cntrl:]]|[[:blank:]])
racket main.rkt -e ./main.rkt -p \"[eeeee]\"
racket main.rkt -e ./main.rkt -p \"[eéèêë]\"
racket main.rkt -e \"/home/bost/der/search-notes/main.rkt /home/bost/der/search-notes/README.md\" -p subdir
"

Expand Down Expand Up @@ -138,11 +190,31 @@ racket main.rkt -e \"/home/bost/der/search-notes/main.rkt /home/bost/der/search-
(colorize colorize-matches? display-fn
(cdr matches) (cdr patterns)))]))

(define (create-pattern-with-diacritics pattern)
(string-append*
(map (lambda (char)
(hash-ref diacritic-map (string char) (string char)))
(string->list pattern))))

(define pattern-with-diacritics (create-pattern-with-diacritics (pattern-param)))

;; - For regexp vs. pregexp - the same must be used also in notes.rkt;
;; - In contrary to the regexp-defining string in the notes.rkt no '.*' must
;; be used.
(define regexp-split-match
(pregexp (format "(?~a:~a)" (case-sensitivity-params) (pattern-param))))
(define (create-regexp-split-match case-sensitivity diacritic-pattern)
;; (printf "case-sensitivity : ~a\n" case-sensitivity)
;; (printf "diacritic-pattern : ~a\n" diacritic-pattern)
;; (printf "pattern : ~a\n" pattern)
(let* [
(rgx (pregexp (format "(?~a:~a)"
case-sensitivity diacritic-pattern)))]
;; (printf "diacritic-pattern : ~a\n" diacritic-pattern)
;; (printf "rgx : ~a\n" rgx)
rgx))

(define regexp-split-match (create-regexp-split-match
(case-sensitivity-params)
pattern-with-diacritics))

(define colorize-matches? (colorize-matches-param))

Expand All @@ -152,7 +224,10 @@ racket main.rkt -e \"/home/bost/der/search-notes/main.rkt /home/bost/der/search-
(lambda (_) (display ""))
(curry map
(lambda (all-file-strings)
(let ((relevant-file-strings (cdr all-file-strings)))
;; (printf "all-file-strings : ~a\n" all-file-strings)
(let [(relevant-file-strings (cdr all-file-strings))]
;; (printf "relevant-file-strings : ~a\n" relevant-file-strings)
;; (printf "(empty? relevant-file-strings) : ~a\n" (empty? relevant-file-strings))
(unless (empty? relevant-file-strings)
(let ((first-file-string (car all-file-strings))
(relevant-file-strings-joined
Expand All @@ -164,28 +239,37 @@ racket main.rkt -e \"/home/bost/der/search-notes/main.rkt /home/bost/der/search-
(displayln first-file-string))
(colorize colorize-matches?
display-fn
(regexp-split regexp-split-match
relevant-file-strings-joined)
(regexp-match* regexp-split-match
relevant-file-strings-joined))
(regexp-normalize-split
regexp-split-match relevant-file-strings-joined)
(regexp-normalize-match*
regexp-split-match relevant-file-strings-joined))
(printf "\n\n")))
relevant-file-strings)))
;; (lambda (files) (printf "1. files:\n~a\n" files) files)
(curry map
(lambda (f)
(let ((strs (call-with-input-file f
(lambda (input-file)
;; (printf "1. input-file:\n~a\n" input-file)
(define expression
`(notes
,@((compose
(curry cons (pattern-param))
;; (lambda (p) (printf "13. ~a\n" p) p)
(curry cons pattern-with-diacritics)
;; (lambda (p) (printf "12. ~a\n" p) p)
(curry cons (case-sensitivity-params))
(curry cons (colorize-matches-param)))
;; (lambda (p) (printf "11. ~a\n" p) p)
(curry cons (colorize-matches-param))
;; (lambda (p) (printf "10. ~a\n" p) p)
)
(parse-notes add-src-location-info
input-file))))
;; (printf "1. expression:\n~a\n" expression)
;; (printf "1. namespace:\n~a\n" namespace)
(eval expression namespace)))))
(if (empty? strs)
(list f)
(list f (string-join strs "\n\n"))))))
;; (lambda (files) (printf "files:\n~a\n" files) files)
;; (lambda (files) (printf "0. files:\n~a\n" files) files)
string-split)
(filepaths-param)))
6 changes: 4 additions & 2 deletions notes-reader.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
(define note (parse-note src in))
(if (eof-object? note)
'()
(cons note
(parse-notes src in))))
(let [(notes (parse-notes src in))]
;; (printf "vvvvvvvvvvv note:\n~a\n^^^^^^^^^^^^^^^^^\n" note)
;; (printf "vvvvvvvvvvv notes:\n~a\n^^^^^^^^^^^^^^^^^\n\n" notes)
(cons note notes))))

(define (parse-note src in)
;; Don't remove whitespace for better formatting (column alignment)
Expand Down
3 changes: 2 additions & 1 deletion scribblings/search-notes.scrbl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#lang scribble/manual
@require[@for-label[search-notes
@require[@for-label[
;; search-notes
racket/base]]

@title{search-notes}
Expand Down
34 changes: 34 additions & 0 deletions test.rkt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#lang racket

(module+ test
(require
;; main
rackunit
racket/match
"main.rkt"
(submod "main.rkt" main)
)

;; Any code in this `test` submodule runs when this file is run using DrRacket
;; or with `raco test`. The code here does not run when this file is
;; required by another module.

;; (test-case "Test for add function"
;; (check-equal? (+ 2 2) 4))

(test-case "Test diacritics"
(define sdiacr "jkl \n abčd \n xyz \n 123 \n ábc \n 567")
(define sd sdiacr)
(define splain "jkl \n abcd \n xyz \n 123 \n abc \n 567")
(define sp splain)
(define rxs "abc")
(define diarxs (create-regexp-split-match
"i"
(create-pattern-with-diacritics rxs)))

(check-equal? (length (regexp-normalize-match* diarxs sdiacr))
(length (regexp-match* rxs splain)))
(check-equal? (regexp-normalize-split diarxs sdiacr)
(regexp-split rxs splain))
(check-equal? (regexp-normalize-split diarxs splain)
(regexp-split rxs splain))))

0 comments on commit 94b068b

Please sign in to comment.