-
Notifications
You must be signed in to change notification settings - Fork 26
/
cpdfpage.ml
1049 lines (977 loc) · 41.1 KB
/
cpdfpage.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
open Pdfutil
open Cpdferror
(*(* FIXME: Need to take account of inherited resources (among Xobjects and their children - pages
are regularized upon loading). Would be nice to see a failing example first though.
FIXME: What would happen if a pattern was used in a transforming and non-transforming way - we
would have to dedupulicate - again, no failing example available. *)
(* Transparency group soft masks appear to need altering with the inverse of
the transformation matrix. We find them all, deduplicate, and then process
in place. *)
(* For each xobject, look in /Resources -> /ExtGState -> /G, and get object number. *)
let rec change_softmask_matrices_xobject pdf xobject =
let objnums = ref [] in
begin match xobject with
| Pdf.Indirect i ->
let dict = Pdf.lookup_obj pdf i in
begin match Pdf.lookup_direct pdf "/Resources" dict with
| Some d ->
begin match Pdf.lookup_direct pdf "/ExtGState" d with
| Some (Pdf.Dictionary extgstates) ->
iter
(function extgstate ->
match Pdf.direct pdf extgstate with
| Pdf.Dictionary d ->
begin match Pdf.lookup_direct pdf "/SMask" (Pdf.Dictionary d) with
| Some (Pdf.Dictionary d) ->
begin match lookup "/G" d with
| Some (Pdf.Indirect i) ->
objnums := i::!objnums
| _ -> ()
end
| _ -> ()
end
| _ -> ())
(map snd extgstates)
| _ -> ()
end
| _ -> ()
end
| _ -> ()
end;
let subxobjects =
match Pdf.lookup_direct pdf "/Resources" xobject with
| Some d ->
begin match Pdf.lookup_direct pdf "/XObject" d with
| Some (Pdf.Dictionary d) -> map snd d
| _ -> []
end
| _ -> []
in
let descendants =
flatten (map (change_softmask_matrices_xobject pdf) subxobjects)
in
descendants @ !objnums
let change_softmask_matrices_page pdf tr page =
let xobjects =
match Pdf.lookup_direct pdf "/XObject" page.Pdfpage.resources with
| Some (Pdf.Dictionary d) -> d
| _ -> []
in
let objnums = setify (flatten (map (change_softmask_matrices_xobject pdf) (map snd xobjects))) in
iter
(fun objnum ->
let dict = Pdf.lookup_obj pdf objnum in
let matrix = Pdf.parse_matrix pdf "/Matrix" dict in
let matrix' = Pdftransform.matrix_compose (Pdftransform.matrix_invert tr) matrix in
let dict = Pdf.add_dict_entry dict "/Matrix" (Pdf.make_matrix matrix') in
Pdf.addobj_given_num pdf (objnum, dict))
objnums*)
(* When we transform a page by wrapping in an [Op_cm], we must also
change any /Matrix entries in (some) pattern dictionaries, including inside
xobjects. We only change the ones used with scn, to avoid pattern dictionaries
used in other ways, which must not be transformed. *)
let patterns_used pdf content resources =
let used = null_hash () in
match Pdf.lookup_direct pdf "/Pattern" resources with
| None -> used
| Some _ ->
let ops = Pdfops.parse_operators pdf resources content in
iter
(function Pdfops.Op_scnName (x, []) | Pdfops.Op_SCNName (x, []) -> Hashtbl.replace used x () | _ -> ())
ops;
used
let pats_done = null_hash ()
let rec change_pattern_matrices_resources pdf tr resources names_used_with_scn =
begin match Pdf.lookup_direct pdf "/XObject" resources with
| Some (Pdf.Dictionary elts) ->
iter
(fun (k, v) ->
match v with
| Pdf.Indirect i ->
(*Printf.printf "Processing form xobject %s for patterns\n%!" k;*)
change_pattern_matrices_xobject pdf tr v i
| _ -> raise (Pdf.PDFError "change_pattern_matrices_page"))
elts
| _ -> ()
end;
begin match Pdf.lookup_direct pdf "/Pattern" resources with
| Some (Pdf.Dictionary patterns) ->
let entries =
map
(fun (name, p) ->
match Hashtbl.find names_used_with_scn name with
| exception Not_found -> (name, p)
| _ ->
(*Printf.printf "Changing matrices of pattern %s\n%!" name;*)
let old_pattern = Pdf.direct pdf p in
let new_pattern =
let existing_tr = Pdf.parse_matrix pdf "/Matrix" old_pattern in
let new_tr = Pdftransform.matrix_compose tr existing_tr in
Pdf.add_dict_entry old_pattern "/Matrix" (Pdf.make_matrix new_tr)
in
name, Pdf.Indirect (Pdf.addobj pdf new_pattern))
patterns
in
Pdf.add_dict_entry resources "/Pattern" (Pdf.Dictionary entries)
| _ -> resources
end
and change_pattern_matrices_xobject pdf tr xobj xobjnum =
match xobj with
| Pdf.Indirect i when (try ignore (Hashtbl.find pats_done i); true with Not_found -> false) -> ()
| _ ->
begin match xobj with Pdf.Indirect i -> Hashtbl.add pats_done i () | _ -> () end;
let xobj = Pdf.direct pdf xobj in
match Pdf.lookup_direct pdf "/Subtype" xobj with
| Some (Pdf.Name "/Form") ->
Pdfcodec.decode_pdfstream pdf xobj;
let resources = match Pdf.lookup_direct pdf "/Resources" xobj with Some d -> d | None -> Pdf.Dictionary [] in
let used = patterns_used pdf [xobj] resources in
begin match Pdf.lookup_direct pdf "/Resources" xobj with
| Some resources ->
let xobj' =
Pdf.add_dict_entry xobj "/Resources" (change_pattern_matrices_resources pdf tr resources used)
in
Pdf.addobj_given_num pdf (xobjnum, xobj')
| _ -> ()
end
| _ -> ()
let change_pattern_matrices_page pdf tr page =
Hashtbl.clear pats_done;
(*change_softmask_matrices_page pdf tr page;*)
let used = patterns_used pdf page.Pdfpage.content page.Pdfpage.resources in
(*Printf.printf "Patterns for translation, due to being used as cs / CS";
Hashtbl.iter (fun x _ -> Printf.printf "%s " x) used;
Printf.printf "\n%!";*)
{page with Pdfpage.resources = change_pattern_matrices_resources pdf tr page.Pdfpage.resources used}
(* Output information for each page *)
exception Exceptjson of Cpdfyojson.Safe.t
let output_page_info ?(json=false) ?(raisejson=false) unit pdf range =
let ugetnum pdf o =
let n = Pdf.getnum pdf o in
match unit with
| Pdfunits.PdfPoint -> n
| Pdfunits.Inch -> Pdfunits.inches n Pdfunits.PdfPoint
| Pdfunits.Centimetre -> Pdfunits.centimetres n Pdfunits.PdfPoint
| Pdfunits.Millimetre -> Pdfunits.millimetres n Pdfunits.PdfPoint
in
let pages = Pdfpage.pages_of_pagetree pdf in
let labels = Pdfpagelabels.read pdf in
let getbox page box =
if box = "/MediaBox" then
match page.Pdfpage.mediabox with
| Pdf.Array [a; b; c; d] ->
Printf.sprintf "%f %f %f %f"
(ugetnum pdf a) (ugetnum pdf b) (ugetnum pdf c) (ugetnum pdf d)
| _ -> ""
else
match Pdf.lookup_direct pdf box page.Pdfpage.rest with
| Some (Pdf.Array [a; b; c; d]) ->
Printf.sprintf "%f %f %f %f"
(ugetnum pdf a) (ugetnum pdf b) (ugetnum pdf c) (ugetnum pdf d)
| _ -> ""
in
let rotation page =
Pdfpage.int_of_rotation page.Pdfpage.rotate
in
let num_annots page =
match Pdf.lookup_direct pdf "/Annots" page.Pdfpage.rest with
| Some (Pdf.Array a) -> length a
| _ -> 0
in
let json_entry_of_pnum pnum =
let getbox_json page box =
match getbox page box with
| "" -> `Null
| s ->
let a, b, c, d = Cpdfcoord.parse_rectangle (Pdf.empty ()) s in
`List [`Float a; `Float b; `Float c; `Float d]
in
let page = select pnum pages in
`Assoc
[("Page", `Int pnum);
("Label", (`String (try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> "")));
("MediaBox", getbox_json page "/MediaBox");
("CropBox", getbox_json page "/CropBox");
("BleedBox", getbox_json page "/BleedBox");
("TrimBox", getbox_json page "/TrimBox");
("ArtBox", getbox_json page "/ArtBox");
("Rotation", `Int (rotation page));
("Annotations", `Int (num_annots page))]
in
if json then
let thejson = `List (map json_entry_of_pnum range) in
if raisejson then
raise (Exceptjson thejson)
else
flprint (Cpdfyojson.Safe.pretty_to_string thejson)
else
iter
(fun pnum ->
let page = select pnum pages in
Printf.printf "Page %i:\n" pnum;
Printf.printf "Label: %s\n"
(try Pdfpagelabels.pagelabeltext_of_pagenumber pnum labels with Not_found -> "");
Printf.printf "MediaBox: %s\n" (getbox page "/MediaBox");
Printf.printf "CropBox: %s\n" (getbox page "/CropBox");
Printf.printf "BleedBox: %s\n" (getbox page "/BleedBox");
Printf.printf "TrimBox: %s\n" (getbox page "/TrimBox");
Printf.printf "ArtBox: %s\n" (getbox page "/ArtBox");
Printf.printf "Rotation: %i\n" (rotation page);
Printf.printf "Annotations: %i\n" (num_annots page))
range
let json_page_info pdf range unit =
try output_page_info ~json:true ~raisejson:true unit pdf range; `List [] with
Exceptjson j -> j
let process_pages f pdf range =
let pages = Pdfpage.pages_of_pagetree pdf in
let pages', pagenumbers, matrices = (* new page objects, page number, matrix *)
split3
(map2
(fun n p -> if mem n range then f n p else (p, n, Pdftransform.i_matrix))
(ilist 1 (length pages))
pages)
in
Pdfpage.change_pages ~matrices:(combine pagenumbers matrices) true pdf pages'
let iter_pages f pdf range =
let pages = Pdfpage.pages_of_pagetree pdf in
iter2
(fun n p -> if mem n range then f n p)
(ilist 1 (length pages))
pages
let map_pages f pdf range =
let pages = Pdfpage.pages_of_pagetree pdf in
option_map2
(fun n p -> if mem n range then Some (f n p) else None)
(ilist 1 (length pages))
pages
(* Clip a page to one of its boxes, or the media box if that box is not
* present. This is a hard clip, done by using a clipping rectangle, so that
* the page may then be used as a stamp without extraneous material reapearing.
* *)
let hard_box pdf range boxname mediabox_if_missing fast =
process_pages
(Pdfpage.ppstub (fun pagenum page ->
let minx, miny, maxx, maxy =
if boxname = "/MediaBox" then
Pdf.parse_rectangle pdf page.Pdfpage.mediabox
else
match Pdf.lookup_direct pdf boxname page.Pdfpage.rest with
| Some a -> Pdf.parse_rectangle pdf a
| _ ->
if mediabox_if_missing
then Pdf.parse_rectangle pdf page.Pdfpage.mediabox
else error (Printf.sprintf "hard_box: box %s not found" boxname)
in
let ops = [Pdfops.Op_re (minx, miny, maxx -. minx, maxy -. miny); Pdfops.Op_W; Pdfops.Op_n] in
Pdfpage.prepend_operators pdf ops ~fast page))
pdf
range
let shift_page ?(fast=false) dxdylist pdf pnum page =
let dx, dy = List.nth dxdylist (pnum - 1) in
let transform_op = Pdfops.Op_cm (Pdftransform.matrix_of_op (Pdftransform.Translate (dx, dy))) in
let tr = Pdftransform.mktranslate dx dy in
let page = change_pattern_matrices_page pdf tr page in
Pdfannot.transform_annotations pdf tr page.Pdfpage.rest;
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr)
let shift_pdf ?(fast=false) dxdylist pdf range =
process_pages (shift_page ~fast dxdylist pdf) pdf range
(* Shift page data *)
let make_mediabox (xmin, ymin, xmax, ymax) =
Pdf.Array
[Pdf.Real xmin; Pdf.Real ymin; Pdf.Real xmax; Pdf.Real ymax]
(* Change the media box and other known boxes by the function [f] which takes
xmin, xmax, ymin, ymax as input. *)
let change_boxes f pdf page =
let names = ["/TrimBox"; "/ArtBox"; "/CropBox"; "/BleedBox"]
in let getbox n =
Pdf.lookup_direct pdf n page.Pdfpage.rest
in
let boxes = combine names (map getbox names) in
let toreplace = lose (function (_, None) -> true | _ -> false) boxes in
let toreplace =
map
(function (name, Some value) -> (name, value) | _ -> assert false)
toreplace
in
let rest' =
fold_left
(fun e (k, v) ->
let v =
make_mediabox (f (Pdf.parse_rectangle pdf v))
in
Pdf.replace_dict_entry e k v)
page.Pdfpage.rest
toreplace
in
{page with
Pdfpage.mediabox =
make_mediabox (f (Pdf.parse_rectangle pdf page.Pdfpage.mediabox));
Pdfpage.rest = rest'}
let shift_boxes dxdylist pdf range =
let dx, dy = match dxdylist with (a, b)::_ -> a, b | _ -> 0.0, 0.0 in
let f (xmin, ymin, xmax, ymax) = (xmin +. dx, ymin +. dy, xmax +. dx, ymax +. dy) in
let fpage _ p = change_boxes f pdf p in
process_pages (Pdfpage.ppstub fpage) pdf range
(* Scale contents *)
let scale_page_contents ?(fast=false) scale position pdf pnum page =
let (minx, miny, maxx, maxy) as box =
(* Use cropbox if available *)
Pdf.parse_rectangle
pdf
(match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
| Some r -> r
| None -> page.Pdfpage.mediabox)
in
let sx, sy, _ = Cpdfposition.calculate_position true 0. box position in
let tx, ty =
let open Cpdfposition in
match position with
| Top t -> 0., -.t
| TopLeft (a, b) -> a, -.b
| TopRight (a, b) -> -.a, -.b
| Left t -> t, 0.
| BottomLeft (a, b) -> a, b
| Bottom t -> 0., t
| BottomRight (a, b) -> -.a, b
| Right t -> -.t, 0.
| _ -> 0., 0. (* centre it... FIXME: We will add a center position, eventually, for text and this... *)
in
let transform =
Pdftransform.matrix_of_transform
[Pdftransform.Translate (tx, ty);
Pdftransform.Scale ((sx, sy), scale, scale)]
in
let transform_op = Pdfops.Op_cm transform in
let page = change_pattern_matrices_page pdf transform page in
Pdfannot.transform_annotations pdf transform page.Pdfpage.rest;
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, transform)
let scale_contents ?(fast=false) position scale pdf range =
process_pages (scale_page_contents ~fast scale position pdf) pdf range
(* Set media box *)
let set_mediabox xywhlist pdf range =
let crop_page pnum page =
let x, y, w, h = List.nth xywhlist (pnum - 1) in
{page with
Pdfpage.mediabox =
(Pdf.Array
[Pdf.Real x; Pdf.Real y;
Pdf.Real (x +. w); Pdf.Real (y +. h)])}
in
process_pages (Pdfpage.ppstub crop_page) pdf range
let remove_cropping_pdf pdf range =
let remove_cropping_page _ page =
{page with
Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/CropBox")}
in
process_pages (Pdfpage.ppstub remove_cropping_page) pdf range
let remove_trim_pdf pdf range =
let remove_trim_page _ page =
{page with
Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/TrimBox")}
in
process_pages (Pdfpage.ppstub remove_trim_page) pdf range
let remove_art_pdf pdf range =
let remove_art_page _ page =
{page with
Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/ArtBox")}
in
process_pages (Pdfpage.ppstub remove_art_page) pdf range
let remove_bleed_pdf pdf range =
let remove_bleed_page _ page =
{page with
Pdfpage.rest =
(Pdf.remove_dict_entry page.Pdfpage.rest "/BleedBox")}
in
process_pages (Pdfpage.ppstub remove_bleed_page) pdf range
(* Upright functionality *)
(* Return the pages from the pdf in the range, unordered. *)
let select_pages range pdf =
let pages = Pdfpage.pages_of_pagetree pdf in
option_map (function n -> try Some (select n pages) with _ -> None) range
(* If all pages are already upright, and the mediabox is (0,0)-based, do nothing
to save time. *)
let allupright range pdf =
let page_is_upright page =
page.Pdfpage.rotate = Pdfpage.Rotate0 &&
(let (minx, miny, _, _) = Pdf.parse_rectangle pdf page.Pdfpage.mediabox in
minx < 0.001 && miny < 0.001 && minx > ~-.0.001 && miny > ~-.0.001)
in
not (mem false (map page_is_upright (select_pages range pdf)))
(* Same, but don't care about mediabox origin. This is for -scale-to-fit, now that
it is ok with nonzero origins. *)
let alluprightonly range pdf =
let page_is_upright page = page.Pdfpage.rotate = Pdfpage.Rotate0 in
not (mem false (map page_is_upright (select_pages range pdf)))
let upright_transform pdf page =
let rotate =
Pdfpage.int_of_rotation page.Pdfpage.rotate
and cx, cy =
let minx, miny, maxx, maxy = Pdf.parse_rectangle pdf page.Pdfpage.mediabox in
(minx +. maxx) /. 2., (miny +. maxy) /. 2.
in
Pdftransform.mkrotate (cx, cy) (rad_of_deg (~-.(float rotate)))
let transform_boxes tr pdf page =
let f (minx, miny, maxx, maxy) =
let minx, miny = Pdftransform.transform_matrix tr (minx, miny)
and maxx, maxy = Pdftransform.transform_matrix tr (maxx, maxy) in
(minx, miny, maxx, maxy)
in
change_boxes f pdf page
let transform_contents ?(fast=false) tr pdf page =
let transform_op = Pdfops.Op_cm tr in
let page = change_pattern_matrices_page pdf tr page in
Pdfannot.transform_annotations pdf tr page.Pdfpage.rest;
Pdfpage.prepend_operators pdf [transform_op] ~fast page
(* Change a page's media box so its minimum x and y are 0, making other
operations simpler to think about. Any shift that is done is reflected in
other boxes (clip etc.) *)
let rectify_boxes ?(fast=false) pdf page =
let minx, miny, _, _ =
Pdf.parse_rectangle pdf page.Pdfpage.mediabox
in
let f (iminx, iminy, imaxx, imaxy) =
iminx -. minx, iminy -. miny, imaxx -. minx, imaxy -. miny
in
let page = change_boxes f pdf page in
if minx <> 0. || miny <> 0.
then
begin let p, _, _ = shift_page ~fast [(-.minx),(-.miny)] pdf 1 page in p end
else page
let upright ?(fast=false) range pdf =
if allupright range pdf then pdf else
let upright_page _ pnum page =
let tr = upright_transform pdf page in
let page = transform_boxes tr pdf page in
let page = transform_contents ~fast tr pdf page in
(rectify_boxes ~fast pdf {page with Pdfpage.rotate = Pdfpage.Rotate0}, pnum, tr)
in
process_pages (upright_page pdf) pdf range
(* Rotating pages *)
let rotate_pdf r pdf range =
let rotate_page _ page =
{page with Pdfpage.rotate =
Pdfpage.rotation_of_int r}
in
process_pages (Pdfpage.ppstub rotate_page) pdf range
let rotate_pdf_by r pdf range =
let rotate_page_by _ page =
{page with Pdfpage.rotate =
Pdfpage.rotation_of_int ((Pdfpage.int_of_rotation page.Pdfpage.rotate + r) mod 360)}
in
process_pages (Pdfpage.ppstub rotate_page_by) pdf range
let rotate_page_contents ~fast rotpoint r pdf pnum page =
let rotation_point =
match rotpoint with
| None ->
let minx, miny, maxx, maxy = Pdf.parse_rectangle pdf page.Pdfpage.mediabox in
(minx +. maxx) /. 2., (miny +. maxy) /. 2.
| Some point -> point
in
let tr =
Pdftransform.matrix_of_op (Pdftransform.Rotate (rotation_point, -.(rad_of_deg r)))
in
let transform_op = Pdfops.Op_cm tr in
let page = change_pattern_matrices_page pdf tr page in
Pdfannot.transform_annotations pdf tr page.Pdfpage.rest;
(Pdfpage.prepend_operators pdf [transform_op] ~fast page, pnum, tr)
let rotate_contents ?(fast=false) r pdf range =
process_pages (rotate_page_contents ~fast None r pdf) pdf range
(* Scale page data *)
let scale_pdf ?(fast=false) sxsylist pdf range =
let scale_page pnum page =
let sx, sy = List.nth sxsylist (pnum - 1) in
let f (xmin, ymin, xmax, ymax) =
xmin *. sx, ymin *. sy, xmax *. sx, ymax *. sy
in
let page = change_boxes f pdf page
and matrix = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), sx, sy)) in
let transform_op =
Pdfops.Op_cm matrix
in let page =
change_pattern_matrices_page pdf matrix page
in
Pdfannot.transform_annotations pdf matrix page.Pdfpage.rest;
(Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix)
in
process_pages scale_page pdf range
(* Scale without regard to aspect ratio. *)
let stretch ?(fast=false) sxsylist pdf range =
let stretch_page pnum page =
let sx, sy = List.nth sxsylist (pnum - 1) in
let (minx, miny, maxx, maxy) =
Pdf.parse_rectangle
pdf
(match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
| Some r -> r
| None -> page.Pdfpage.mediabox)
in
let scale_x, scale_y = sx /. (maxx -. minx), sy /. (maxy -. miny) in
let f (xmin, ymin, xmax, ymax) = xmin *. scale_x, ymin *. scale_y, xmax *. scale_x, ymax *. scale_y in
let page = change_boxes f pdf page in
let matrix = Pdftransform.matrix_of_op (Pdftransform.Scale ((0., 0.), scale_x, scale_y)) in
let transform_op = Pdfops.Op_cm matrix in
let page = change_pattern_matrices_page pdf matrix page in
Pdfannot.transform_annotations pdf matrix page.Pdfpage.rest;
(Pdfpage.prepend_operators pdf ~fast [transform_op] page, pnum, matrix)
in
process_pages stretch_page pdf range
(* Cropping *)
let crop_pdf ?(box="/CropBox") xywhlist pdf range =
let crop_page pagenum page =
{page with
Pdfpage.rest =
(Pdf.add_dict_entry
page.Pdfpage.rest
box
(let x, y, w, h = List.nth xywhlist (pagenum - 1) in
(Pdf.Array
[Pdf.Real x; Pdf.Real y;
Pdf.Real (x +. w); Pdf.Real (y +. h)])))}
in
process_pages (Pdfpage.ppstub crop_page) pdf range
(* Centre page content (crop box) on given page size, with no scaling. *)
let center_to_fit sxsylist pdf range =
let dxdylist =
let tx, ty = hd sxsylist in
map
(fun page ->
let (minx, miny, maxx, maxy) =
Pdf.parse_rectangle
pdf
(match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
| Some r -> r
| None -> page.Pdfpage.mediabox)
in
(~-.((tx -. (maxx -. minx)) /. 2.),
~-.((ty -. (maxy -. miny)) /. 2.)))
(Pdfpage.pages_of_pagetree pdf)
in
let list4 = map (fun (x, y) -> (0., 0., x, y)) sxsylist in
let pdf = set_mediabox list4 pdf range in
let pdf = crop_pdf list4 pdf range in
let pdf = remove_bleed_pdf pdf range in
let pdf = remove_art_pdf pdf range in
let pdf = remove_bleed_pdf pdf range in
shift_boxes dxdylist pdf range
(* Scale to fit page of size x * y *)
let scale_to_fit_pdf ?(fast=false) position input_scale xylist op pdf range =
let scale_page_to_fit pnum page =
let x, y = List.nth xylist (pnum - 1) in
let matrix =
let (minx, miny, maxx, maxy) =
Pdf.parse_rectangle
pdf
(match Pdf.lookup_direct pdf "/CropBox" page.Pdfpage.rest with
| Some r -> r
| None -> page.Pdfpage.mediabox)
in
if maxx <= 0. || maxy <= 0. then failwith "Zero-sized pages are invalid" else
let fx = x /. (maxx -. minx) in let fy = y /. (maxy -. miny) in
let scale = fmin fx fy *. input_scale in
let trans_x = (x -. (maxx *. scale)) /. 2. in
let trans_y = (y -. (maxy *. scale)) /. 2. in
let fixup_trans_x = -. (minx *. scale) /. 2. in
let fixup_trans_y = -. (miny *. scale) /. 2. in
let position_trans_x =
let dx = (x -. ((maxx -. minx) *. scale)) /. 2. in
match position with
| Cpdfposition.Left _ -> ~-. dx
| Cpdfposition.Right _ -> dx
| _ -> 0.
in
let position_trans_y =
let dy = (y -. ((maxy -. miny) *. scale)) /. 2. in
match position with
| Cpdfposition.Top _ -> dy
| Cpdfposition.Bottom _ -> ~-. dy
| _ -> 0.
in
(Pdftransform.matrix_of_transform
[Pdftransform.Translate (fixup_trans_x, fixup_trans_y);
Pdftransform.Translate (position_trans_x, position_trans_y);
Pdftransform.Translate (trans_x, trans_y);
Pdftransform.Scale ((0., 0.), scale, scale)])
in
let page =
change_boxes
(function (minx, miny, maxx, maxy) -> 0., 0., x, y)
pdf page
in
Pdfannot.transform_annotations pdf matrix page.Pdfpage.rest;
(Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast
(change_pattern_matrices_page pdf matrix page), pnum, matrix)
in
process_pages scale_page_to_fit pdf range
(* Add stack operators to a content stream to ensure it is composeable. On
-fast, we don't check for Q deficit, assuming PDF is ISO. *)
let protect fast pdf resources content =
let deficit =
if fast then 0 else
let ops = Pdfops.parse_operators pdf resources content in
let qs = length (keep (eq Pdfops.Op_q) ops) in
let bigqs = length (keep (eq Pdfops.Op_Q) ops) in
let deficit = if qs > bigqs then qs - bigqs else 0 in
if deficit <> 0 then Pdfe.log (Printf.sprintf "Q Deficit was nonzero. Fixing. %i\n" deficit);
deficit
in
let addstream ops = Pdf.addobj pdf (Pdfops.stream_of_ops ops) in
let q = addstream [Pdfops.Op_q] in
let qs = addstream (many Pdfops.Op_Q deficit @ [Pdfops.Op_Q]) in
[Pdf.Indirect q] @ content @ [Pdf.Indirect qs]
(* Does the page have a defined box e.g "/CropBox" *)
let hasbox pdf page boxname =
let pages = Pdfpage.pages_of_pagetree pdf in
if page > length pages || page < 1 then raise (Failure "hasbox: bad page") else
let p = select page pages in
match Pdf.lookup_direct pdf boxname p.Pdfpage.rest with
| Some _ -> true
| _ -> false
(* Flip pages *)
let flip_page ?(fast=false) transform_op pdf pnum page =
let minx, miny, maxx, maxy =
Pdf.parse_rectangle pdf page.Pdfpage.mediabox
in
let tr = transform_op minx miny maxx maxy in
let page = change_pattern_matrices_page pdf tr page in
Pdfannot.transform_annotations pdf tr page.Pdfpage.rest;
(Pdfpage.prepend_operators pdf [Pdfops.Op_cm tr] ~fast page, pnum, tr)
let vflip_pdf ?(fast=false) pdf range =
let transform_op _ miny _ maxy =
Pdftransform.matrix_of_op
(Pdftransform.Scale ((0., ((miny +. maxy) /. 2.)), 1., -.1.))
in
process_pages (flip_page ~fast transform_op pdf) pdf range
let hflip_pdf ?(fast=false) pdf range =
let transform_op minx _ maxx _ =
Pdftransform.matrix_of_op
(Pdftransform.Scale (((minx +. maxx) /. 2., 0.), -.1., 1.))
in
process_pages (flip_page ~fast transform_op pdf) pdf range
let stamp_shift_of_position topline midline sw sh w h p =
let half x = x /. 2.
and dy =
if midline then sh /. 2.
else if topline then sh
else 0.
in
let open Cpdfposition in
match p with
| PosCentre (ox, oy) -> ox -. half sw, oy -. dy
| PosLeft (ox, oy) -> ox, oy -. dy
| PosRight (ox, oy) -> ox -. sw, oy -. dy
| Top o -> half w -. half sw, h -. o -. sh -. dy
| TopLeft (a, b) -> a, h -. sh -. b -. dy
| TopRight (a, b) -> w -. sw -. a, h -. sh -. b -. dy
| Left o -> o, half h -. half sh -. dy
| BottomLeft (a, b) -> a, b -. dy
| Bottom o -> half w -. half sw, o -. dy
| BottomRight (a, b) -> w -. sw -. a, b -. dy
| Right o -> w -. sw -. o, half h -. half sh -. dy
| Diagonal | ReverseDiagonal | Centre ->
half w -. half sw, half h -. half sh -. dy
(* Combine Pdfpage.rest items for two PDFs. For now, we combine /Annots, and
* copy everything else from adict. What else should we combine? *)
let combine_page_items pdf adict bdict =
let getannots dict =
begin match dict with
Pdf.Dictionary d ->
begin match lookup "/Annots" d with
Some (Pdf.Array items) -> items
| _ -> []
end
| _ -> []
end
in
let a_annots = getannots adict in
let b_annots = getannots bdict in
match a_annots @ b_annots with
[] -> adict
| annots -> Pdf.add_dict_entry adict "/Annots" (Pdf.Array annots)
let do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover pdf o u opdf =
(* Scale page stamp o to fit page u *)
let sxmin, symin, sxmax, symax =
Pdf.parse_rectangle
pdf
(match Pdf.lookup_direct pdf "/CropBox" o.Pdfpage.rest with | Some r -> r | None -> o.Pdfpage.mediabox)
in let txmin, tymin, txmax, tymax =
Pdf.parse_rectangle
pdf
(match Pdf.lookup_direct pdf "/CropBox" u.Pdfpage.rest with | Some r -> r | None -> u.Pdfpage.mediabox)
in
let o =
if scale_to_fit then
let xmag = (txmax -. txmin) /. (sxmax -. sxmin) in
let ymag = (tymax -. tymin) /. (symax -. symin) in
let scale =
if xmag < 0.999 && ymag < 0.999 then
if xmag > ymag then xmag else ymag
else if xmag >= 1.001 && ymag >= 1.001 then
if xmag > ymag then ymag else xmag
else if xmag >= 1.001 then ymag
else xmag
in
let dx = txmin +. ((txmax -. txmin) -. (sxmax -. sxmin) *. scale) /. 2. in
let dy = tymin +. ((tymax -. tymin) -. (symax -. symin) *. scale) /. 2. in
let matrix =
(Pdftransform.matrix_of_transform
([Pdftransform.Translate (dx, dy)] @
(if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @
[Pdftransform.Scale ((sxmin, symin), scale, scale)]))
in
Pdfannot.transform_annotations pdf matrix o.Pdfpage.rest;
let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in
change_pattern_matrices_page pdf matrix r
else
let sw = sxmax -. sxmin and sh = symax -. symin
and w = txmax -. txmin and h = tymax -. tymin in
let dx, dy = stamp_shift_of_position topline midline sw sh w h position in
let matrix =
(Pdftransform.matrix_of_transform
((if relative_to_cropbox then [Pdftransform.Translate (txmin, tymin)] else []) @
[Pdftransform.Translate (dx, dy)]))
in
Pdfannot.transform_annotations pdf matrix o.Pdfpage.rest;
let r = Pdfpage.prepend_operators pdf [Pdfops.Op_cm matrix] ~fast o in
change_pattern_matrices_page pdf matrix r
in
{u with
Pdfpage.content =
(if isover then ( @ ) else ( @@ ))
(protect fast pdf u.Pdfpage.resources u.Pdfpage.content)
(protect fast pdf o.Pdfpage.resources o.Pdfpage.content);
Pdfpage.rest =
combine_page_items pdf u.Pdfpage.rest o.Pdfpage.rest;
Pdfpage.resources =
Pdfpage.combine_pdf_resources pdf u.Pdfpage.resources o.Pdfpage.resources}
let stamp relative_to_cropbox position topline midline fast scale_to_fit isover range over pdf =
let prefix = Pdfpage.shortest_unused_prefix pdf in
Pdfpage.add_prefix over prefix;
let marks = Pdfmarks.read_bookmarks pdf in
let marks_refnumbers = Pdf.page_reference_numbers pdf in
let pdf = Pdfmarks.remove_bookmarks pdf in
let over = Pdfmarks.remove_bookmarks over in
let pageseqs = ilist 1 (Pdfpage.endpage pdf) in
let over_firstpage_pdf =
match Pdfpage.pages_of_pagetree over with
| [] -> error "empty PDF"
| h::_ -> Pdfpage.change_pages ~changes:[(1, 1)] true over [h]
in
let merged =
Pdfmerge.merge_pdfs
false false ["a"; "b"] [pdf; over_firstpage_pdf] [pageseqs; [1]]
in
let merged =
{merged with Pdf.saved_encryption = pdf.Pdf.saved_encryption}
in
let merged = Cpdfmetadata.copy_id true pdf merged in
let merged_pages = Pdfpage.pages_of_pagetree merged in
let under_pages, over_page =
all_but_last merged_pages, last merged_pages
in
let new_pages =
map2
(fun pageseq under_page ->
do_stamp relative_to_cropbox fast position topline midline scale_to_fit isover merged
(if mem pageseq range then over_page else
Pdfpage.blankpage Pdfpaper.a4)
under_page over)
pageseqs
under_pages
in
let changed =
let changes =
map (fun x -> (x, x)) (ilist 1 (length new_pages))
in
Pdfpage.change_pages ~changes true merged new_pages
in
let new_refnumbers = Pdf.page_reference_numbers changed in
let changetable = hashtable_of_dictionary (combine marks_refnumbers new_refnumbers) in
let new_marks = map (Cpdfbookmarks.change_bookmark changetable) marks in
Pdfmarks.add_bookmarks new_marks changed
(* Combine pages from two PDFs. For now, assume equal length. FIXME: Why? *)
(* If [over] has more pages than [under], chop the excess. If the converse, pad
[over] to the same length *)
let equalize_pages under over =
let length_under = Pdfpage.endpage under in
let length_over = Pdfpage.endpage over in
if length_over > length_under then
let changes =
map (fun x -> (x, x)) (ilist 1 length_under)
in
(under,
(Pdfpage.change_pages
~changes true over (take (Pdfpage.pages_of_pagetree over) length_under)))
else if length_under > length_over then
let changes =
map (fun x -> (x, x)) (ilist 1 length_over)
in
(under,
Pdfpage.change_pages
~changes true over
(Pdfpage.pages_of_pagetree over @
(many (Pdfpage.blankpage Pdfpaper.a4) (length_under - length_over))))
else
under, over
let combine_pages fast under over scaletofit swap equalize =
let debug_combine_pages = false in
let debug_pdf pdf n =
if debug_combine_pages then
begin Pdf.remove_unreferenced pdf; Pdfwrite.pdf_to_file pdf n end
in
Pdfpage.add_prefix over (Pdfpage.shortest_unused_prefix under);
let marks_under, marks_over = Pdfmarks.read_bookmarks under, Pdfmarks.read_bookmarks over in
let under, over = if equalize then equalize_pages under over else under, over in
let under_length, over_length = Pdfpage.endpage under, Pdfpage.endpage over in
if under_length <> over_length then
raise (Pdf.PDFError "combine_pages: not of equal length")
else
let pageseqs_under = ilist 1 (Pdfpage.endpage under) in
let pageseqs_over = ilist 1 (Pdfpage.endpage over) in
let merged =
Pdfmerge.merge_pdfs
false false ["a"; "b"] [under; over] [pageseqs_under; pageseqs_over]
in
debug_pdf merged "merged.pdf";
let under_pages, over_pages =
cleave (Pdfpage.pages_of_pagetree merged) under_length
in
let new_pages =
map2
(fun o u ->
do_stamp
false fast (BottomLeft (0., 0.)) false false scaletofit (not swap) merged o u over)
over_pages under_pages
in
(* Build the changes. 123456 -> 123123 *)
let changes =
let len = length new_pages in
combine (ilist 1 (len * 2)) (let x = ilist 1 len in x @ x)
in
let changed = Pdfpage.change_pages ~changes true merged new_pages in
let r = Pdfmarks.add_bookmarks (marks_under @ marks_over) changed in
debug_pdf r "final.pdf";
r
(* Just used by cpdflib for historical reasons *)
let setBox box minx maxx miny maxy pdf range =
let set_box_page _ page =
{page with
Pdfpage.rest =
Pdf.add_dict_entry
page.Pdfpage.rest box
(Pdf.Array [Pdf.Real minx; Pdf.Real miny; Pdf.Real maxx; Pdf.Real maxy])}
in
process_pages (Pdfpage.ppstub set_box_page) pdf range
(* Add rectangles on top of pages to show Media, Crop, Art, Trim, Bleed boxes.
*
* We use different dash lengths and colours to help distinguish coincident
* boxes The sequence of operators is postpended to the page content,
* appropriately protected to prevent pollution of matrices.
*
* /MediaBox: Solid red line
* /CropBox: Dashed 7 on 7 off green line
* /ArtBox: Dashed 5 on 5 off blue line
* /TrimBox: Dashed 3 on 3 off orange line
* /BleedBox: Dashed 2 on 2 off pink line *)
let get_rectangle pdf page box =
if box = "/MediaBox" then
match page.Pdfpage.mediabox with
Pdf.Array [a; b; c; d] as r -> Some (Pdf.parse_rectangle pdf r)
| _ -> None
else
match Pdf.lookup_direct pdf box page.Pdfpage.rest with
Some (Pdf.Array [a; b; c; d] as r) -> Some (Pdf.parse_rectangle pdf r)
| _ -> None
let show_boxes_page fast pdf _ page =
let make_ops (r, g, b) on off boxname =
match get_rectangle pdf page boxname with
Some (r1, r2, r3, r4) ->
[Pdfops.Op_q;
Pdfops.Op_RG (r /. 255., g /. 255., b /. 255.);
Pdfops.Op_w 1.;
Pdfops.Op_d ((if on = 0. && off = 0. then [] else [on; off]), 0.);
Pdfops.Op_re (r1, r2, r3 -. r1, r4 -. r2);
Pdfops.Op_S;
Pdfops.Op_Q]
| None -> []
in
let ops =
make_ops (255., 0., 0.) 0. 0. "/MediaBox"
@ make_ops (0., 255., 0.) 7. 7. "/CropBox"
@ make_ops (0., 0., 255.) 5. 5. "/ArtBox"
@ make_ops (255.,150.,0.) 3. 3. "/TrimBox"
@ make_ops (255.,9.,147.) 2. 2. "/BleedBox"
in
Pdfpage.postpend_operators pdf ops ~fast page
let show_boxes ?(fast=false) pdf range =
process_pages (Pdfpage.ppstub (show_boxes_page fast pdf)) pdf range
let allowance = 9.
let line (x0, y0, x1, y1) =
[Pdfops.Op_m (x0, y0);
Pdfops.Op_l (x1, y1);
Pdfops.Op_s]
let trim_marks_page fast pdf n page =
match get_rectangle pdf page "/TrimBox", get_rectangle pdf page "/MediaBox" with
| Some (tminx, tminy, tmaxx, tmaxy), Some (minx, miny, maxx, maxy) ->
let ops =
[Pdfops.Op_q;
Pdfops.Op_K (1., 1., 1., 1.);
Pdfops.Op_w 1.]
@ line (minx, tmaxy, tminy -. allowance, tmaxy) (* top left *)
@ line (tminx, tmaxy +. allowance, tminx, maxy)
@ line (tmaxx +. allowance, tmaxy, maxx, tmaxy) (* top right *)
@ line (tmaxx, tmaxy +. allowance, tmaxx, maxy)
@ line (tmaxx +. allowance, tminy, maxx, tminy) (* bottom right *)
@ line (tmaxx, tminy -. allowance, tmaxx, miny)
@ line (tminx -. allowance, tminy, minx, tminy) (* bottom left *)
@ line (tminx, tminy -. allowance, tminx, miny)
@ [Pdfops.Op_Q]
in