-
Notifications
You must be signed in to change notification settings - Fork 6
/
leela-ruby-extract.R
66 lines (46 loc) · 2.01 KB
/
leela-ruby-extract.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
## An ugly kludge to place all PDF annotations (and highlight with the
## Ruby library) on an org-mode file.
## It turns out both leela and the ruby library are missing MANY
## annotations made in the tablet. So this is getting to be useless.
## Seems now it is safer to extract annotations with Zotfile, and then, in
## Zotero, do an advanced search for the term in "Note" (not Annotation)
## Of course, that requires having extracted annotations from all.
leelaPath <- "~/Sources/Leela-master/leela"
pdfextractPath <- "~/Sources/extract.rb/5277732"
pdfDir <- "/home/ramon/Zotero-storage"
runRubyExtract <- TRUE ## might want to set it to FALSE as this is very slow
setwd("~/tmp")
list.of.pdfs <- system(paste0("find ", pdfDir, " -name '*.pdf'"),
intern = TRUE)
leela_anot <- sapply(list.of.pdfs, function(x) {
a0 <- ""
a <- paste("* [[", x, "]]", sep = "")
b <- system(paste0(leelaPath, ' annot \"', x, '\"'),
intern = TRUE)
return(c(a0, a, b))
})
write(file = "anot-leela.txt", unlist(leela_anot))
## remove more stuff, as I find it, or become annoyed by it
system("egrep -v '^<[0-9]+,[0-9]+:link>$' anot-leela.txt \\
| egrep -v '^<[0-9]+,[0-9]+:highlight>$' \\
| egrep -v '^<[0-9]+,[0-9]+:widget>Citation Link$' \\
| egrep -v '^<[0-9]+,[0-9]+:underline>$' > \\
~/Zotero-data/storage/leela-annotations-in-PDFs-of-refs.org")
## It would be nice to remove those PDFs without any annotations. Some other time.
setwd(pdfextractPath)
## This is BAD! Calling this with each file. And this is extremely
## slow. But I know no ruby.
if(runRubyExtract){
pdfe_anot <- sapply(list.of.pdfs, function(x) {
a0 <- ""
a <- paste("* [[", x, "]]", sep = "")
b <- system(paste0("ruby extract.rb ", x),
intern = TRUE)
return(c(a0, a, b))
} )
}
setwd("~/tmp")
if(runRubyExtract) {
write(file = "anot-pdfe.txt", unlist(pdfe_anot))
system("mv anot-pdfe.txt ~/Zotero-data/storage/pdfe-annotations-in-PDFs-of-refs.org")
}