-
Notifications
You must be signed in to change notification settings - Fork 26
/
cpdftoc.ml
223 lines (211 loc) · 8.98 KB
/
cpdftoc.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
open Pdfutil
(* We allow \n in titles. Split for typesetter. *)
let rec split_toc_title_inner a = function
| '\\'::'n'::r -> rev a :: split_toc_title_inner [] r
| x::xs -> split_toc_title_inner (x::a) xs
| [] -> [rev a]
let split_toc_title = split_toc_title_inner []
(* And for new bookmark for TOC, change \\n to \n *)
let rec real_newline = function
| '\\'::'n'::r -> '\n'::real_newline r
| x::r -> x::real_newline r
| [] -> []
let width_table_cache = null_hash ()
let rec width_of_runs runs =
match runs with
| Cpdftype.Font (id, f, fontsize)::Cpdftype.Text t::more ->
let width_table =
match Hashtbl.find width_table_cache (id, fontsize) with
| w -> w
| exception Not_found ->
let ws = Cpdftype.font_widths id f fontsize in Hashtbl.add width_table_cache (id, fontsize) ws; ws
in
Cpdftype.width_of_string width_table t +. width_of_runs more
| [] -> 0.
| _ -> failwith "width_of_runs"
(* Run of Font / Text elements from a fontpack and UTF8 text *)
let of_utf8 fontpack fontsize t =
let codepoints = Pdftext.codepoints_of_utf8 t in
let fonted = option_map (Cpdfembed.get_char fontpack) codepoints in
let collated = Cpdfembed.collate_runs fonted in
flatten
(map
(function
| [] -> []
| (_, n, font) as h::t ->
let charcodes = map (fun (c, _, _) -> char_of_int c) (h::t) in
[Cpdftype.Font (string_of_int n, font, fontsize); Cpdftype.Text charcodes])
collated)
(* Cpdftype codepoints from a font and PDFDocEndoding string *)
let of_pdfdocencoding fontpack fontsize t =
of_utf8 fontpack fontsize (Pdftext.utf8_of_pdfdocstring t)
(* Remove characters until it is below the length. Then remove three more and
add dots for an ellipsis *)
let rec shorten_text_inner l t =
match rev t with
| Cpdftype.Text text::Cpdftype.Font (id, f, fs)::more ->
let width_table =
match Hashtbl.find width_table_cache (id, fs) with
| w -> w
| exception Not_found ->
let ws = Cpdftype.font_widths id f fs in Hashtbl.add width_table_cache (id, fs) ws; ws
in
if Cpdftype.width_of_string width_table text > l then
shorten_text_inner l (rev (Cpdftype.Text (all_but_last text)::Cpdftype.Font (id, f, fs)::more))
else
t
| _ -> t
let shorten_text fontpack fontsize l t =
let short = shorten_text_inner l t in
if short = t then t else
let charcode, dotfontnum, dotfont =
unopt (Cpdfembed.get_char fontpack (int_of_char '.'))
in
let charcode = char_of_int charcode in
short @ [Cpdftype.Font (string_of_int dotfontnum, dotfont, fontsize); Cpdftype.Text [charcode; charcode; charcode]]
(* Calculate the used codepoints *)
let used pdf fastrefnums labels title marks =
let codepoints = null_hash () in
Hashtbl.add codepoints (int_of_char '.') ();
let addtext t =
iter
(fun c -> Hashtbl.replace codepoints c ())
(Pdftext.codepoints_of_utf8 (Pdftext.utf8_of_pdfdocstring t))
in
iter (fun c -> Hashtbl.replace codepoints c ()) (Pdftext.codepoints_of_utf8 title);
iter
(fun m ->
addtext m.Pdfmarks.text;
let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf m.Pdfmarks.target in
let labeltext =
try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum
in
addtext labeltext)
marks;
codepoints
(* Make a dot leader *)
let make_dots space fontpack fontsize =
let dotruns = of_utf8 fontpack fontsize "." in
let dotwidth = width_of_runs dotruns in
let runs = flatten (many dotruns (int_of_float (floor (space /. dotwidth)))) in
let remainder = space -. width_of_runs runs in
[Cpdftype.HGlue remainder] @ runs
(* Typeset a table of contents with given font, font size and title. Mediabox
(and CropBox) copied from first page of existing PDF. Margin of 10% inside
CropBox. Font size of title twice body font size. Null page labels added for
TOC, others bumped up and so preserved. *)
let typeset_table_of_contents ~font ~fontsize ~title ~bookmark ~dotleader pdf =
Hashtbl.clear width_table_cache;
let marks = Pdfmarks.read_bookmarks pdf in
if marks = [] then (Pdfe.log "No bookmarks, not making table of contents\n"; pdf) else
let labels = Pdfpagelabels.read pdf in
let refnums = Pdf.page_reference_numbers pdf in
let fastrefnums = hashtable_of_dictionary (combine refnums (indx refnums)) in
let codepoints = map fst (list_of_hashtbl (used pdf fastrefnums labels title marks)) in
let fontpack =
match font with
| Cpdfembed.PreMadeFontPack t -> t
| Cpdfembed.EmbedInfo {fontfile; fontname; encoding} ->
Cpdfembed.embed_truetype pdf ~fontfile ~fontname ~codepoints ~encoding
| Cpdfembed.ExistingNamedFont -> raise (Pdf.PDFError "Cannot use existing font with -table-of-contents")
in
let firstpage = hd (Pdfpage.pages_of_pagetree pdf) in
let width, firstpage_papersize, pmaxx, pmaxy, margin =
let width, height, xmax, ymax =
match Pdf.parse_rectangle pdf firstpage.Pdfpage.mediabox with
xmin, ymin, xmax, ymax -> xmax -. xmin, ymax -. ymin, xmax, ymax
in
width, Pdfpaper.make Pdfunits.PdfPoint width height, xmax, ymax, fmin width height *. 0.1
in
let firstpage_cropbox =
match Pdf.lookup_direct pdf "/CropBox" firstpage.Pdfpage.rest with
| Some r -> Some (Pdf.parse_rectangle pdf r)
| None -> None
in
let width =
match firstpage_cropbox with
| Some (xmin, _, xmax, _) -> xmax -. xmin
| None -> width
in
let lines =
map
(fun mark ->
let indent = float mark.Pdfmarks.level *. fontsize *. 2. in
let textruns = of_pdfdocencoding fontpack fontsize mark.Pdfmarks.text in
let labelruns =
if mark.Pdfmarks.target = NullDestination then of_pdfdocencoding fontpack fontsize "" else
let pnum = Pdfpage.pagenumber_of_target ~fastrefnums pdf mark.Pdfmarks.target in
let pde = try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> string_of_int pnum in
of_pdfdocencoding fontpack fontsize pde
in
let textgap = width -. margin *. 2. -. indent -. width_of_runs labelruns in
let textruns = shorten_text fontpack fontsize (textgap -. fontsize *. 3.) textruns in
let space = textgap -. width_of_runs textruns in
let leader =
if dotleader && labelruns <> []
then make_dots space fontpack fontsize
else [Cpdftype.HGlue space]
in
[Cpdftype.BeginDest mark.Pdfmarks.target; Cpdftype.HGlue indent] @ textruns @ leader @ labelruns
@ [Cpdftype.EndDest; Cpdftype.NewLine])
(Pdfmarks.read_bookmarks pdf)
in
let toc_pages, _ =
let title =
let glue = Cpdftype.VGlue (fontsize *. 2.) in
if title = "" then [] else
flatten
(map
(fun l -> l @ [Cpdftype.NewLine])
(map (of_utf8 fontpack (fontsize *. 2.)) (map implode (split_toc_title (explode title)))))
@ [glue]
in
let lm, rm, tm, bm =
match firstpage_cropbox with
| None -> (margin, margin, margin, margin)
| Some (cminx, cminy, cmaxx, cmaxy) ->
(cminx +. margin, (pmaxx -. cmaxx) +. margin, cminy +. margin, (pmaxy -. cmaxy) +. margin)
in
let firstfont =
hd (keep (function Cpdftype.Font _ -> true | _ -> false) (title @ flatten lines))
in
Cpdftype.typeset ~process_struct_tree:false lm rm tm bm firstpage_papersize pdf
([firstfont; Cpdftype.BeginDocument] @ title @ flatten lines)
in
let toc_pages =
match firstpage_cropbox with
| Some (a, b, c, d) ->
let rect =
Pdf.Array [Pdf.Real a; Pdf.Real b; Pdf.Real c; Pdf.Real d]
in
map
(fun p -> {p with Pdfpage.rest = Pdf.add_dict_entry p.Pdfpage.rest "/CropBox" rect})
toc_pages
| None -> toc_pages
in
let original_pages = Pdfpage.pages_of_pagetree pdf in
let toc_pages_len = length toc_pages in
let changes = map (fun n -> (n, n + toc_pages_len)) (indx original_pages) in
let pdf = Pdfpage.change_pages ~changes true pdf (toc_pages @ original_pages) in
let label =
{Pdfpagelabels.labelstyle = NoLabelPrefixOnly;
Pdfpagelabels.labelprefix = None;
Pdfpagelabels.startpage = 1;
Pdfpagelabels.startvalue = 1}
in
let labels' = label::map (fun l -> {l with Pdfpagelabels.startpage = l.Pdfpagelabels.startpage + toc_pages_len}) labels in
Pdfpagelabels.write pdf labels';
if bookmark then
let marks = Pdfmarks.read_bookmarks pdf in
let refnums = Pdf.page_reference_numbers pdf in
let newmark =
{Pdfmarks.level = 0;
Pdfmarks.text = Pdftext.pdfdocstring_of_utf8 (implode (real_newline (explode title)));
Pdfmarks.target = Pdfdest.XYZ (Pdfdest.PageObject (hd refnums), None, None, None);
Pdfmarks.isopen = false;
Pdfmarks.colour = (0., 0., 0.);
Pdfmarks.flags = 0}
in
Pdfmarks.add_bookmarks (newmark::marks) pdf
else
pdf