-
Notifications
You must be signed in to change notification settings - Fork 1
/
pycpdflib.py
2343 lines (1826 loc) · 68.5 KB
/
pycpdflib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Pycpdflib: a python interface to cpdf.
Before using the library, you must load the libpycpdf and libcpdf DLLs. This is
achieved with the pycpdflib.loadDLL function, given the filename or full path
of the libpycpdf DLL. On Windows, you may have to call os.add_dll_directory
first. On MacOS, you may need to give the full path, and you may need to
install libcpdf.so in a standard location /usr/local/lib/, or use the
install_name_tool command to tell libpycpdf.so where to find libcpdf.so.
"""
"""
Loading the libpypcdf and libcpdf DLLs
--------------------------------------
Before using the library, you must load the ``libpycpdf`` and ``libcpdf`` DLLs.
This is achieved with the ``pycpdflib.loadDLL`` function, given the filename or
full path of the ``libpycpdf`` DLL.
On Windows, you may have to call ``os.add_dll_directory`` first. On MacOS, you
may need to give the full path, and you may need to install ``libcpdf.so`` in a
standard location ``/usr/local/lib/``, or use the ``install_name_tool`` command
to tell ``libpycpdf.so`` where to find ``libcpdf.so``.
Conventions
-----------
Any function may raise the exception ``CPDFError``, carrying a string describing
the error.
A 'range' is a list of integers specifying page numbers. Page numbers start at
1. Range arguments are called `r`.
Text arguments and results are in UTF8.
Units are in PDF points (1/72 inch).
Angles are in degrees.
Built-in values
---------------
Paper sizes:
a0portrait a1portrait a2portrait a3portrait a4portrait a5portrait a0landscape
a1landscape a2landscape a3landscape a4landscape a5landscape usletterportrait
usletterlandscape uslegalportrait uslegallandscape
Permissions:
noEdit noPrint noCopy noAnnot noForms noExtract noAssemble noHqPrint
Encryption methods:
pdf40bit pdf128bit aes128bitfalse aes128bittrue aes256bitfalse aes256bittrue
aes256bitisofalse aes256bitisotrue
Positions:
Positions with two numbers in a tuple e.g (posLeft, 10.0, 20.0)
posCentre posLeft posRight
Positions with one number in a tuple e.g (top, 5.0)
top topLeft topRight left bottomLeft bottomRight right
Positions with no numbers e.g diagonal
diagonal reverseDiagonal
Fonts:
timesRoman timesBold timesItalic timesBoldItalic helvetica helveticaBold
helveticaOblique helveticaBoldOblique courier courierBold courierOblique
courierBoldOblique
Justification:
leftJustify centreJustify rightJustify
Page layouts:
singlePage oneColumn twoColumnLeft twoColumnRight twoPageLeft twoPageRight
Page modes:
useNone useOutlines useThumbs useOC useAttachments
Page label styles:
decimalArabic uppercaseRoman lowercaseRoman uppercaseLetters lowercaseLetters
"""
from ctypes import *
import sys
libc = None
# CHAPTER 0. Preliminaries
class Pdf:
"""The type of PDF documents."""
pdf = -1
def __init__(self, pdfnum):
self.pdf = pdfnum
def __del__(self):
libc.pycpdf_deletePdf(self.pdf)
def loadDLL(f):
"""Load the libpycpdf DLL from a given file, and set up pycpdflib. Must be
called prior to using any other function in the library."""
global libc
libc = CDLL(f)
libc.pycpdf_tableOfContents.argtypes = [
c_int, c_int, c_double, POINTER(c_char), c_int]
libc.pycpdf_version.restype = POINTER(c_char)
libc.pycpdf_lastErrorString.restype = POINTER(c_char)
libc.pycpdf_blankDocument.argtypes = [c_double, c_double, c_int]
libc.pycpdf_textToPDF.argtypes = [
c_double, c_double, c_int, c_double, POINTER(c_char)]
libc.pycpdf_textToPDFPaper.argtypes = [
c_int, c_int, c_double, POINTER(c_char)]
libc.pycpdf_ptOfCm.argtypes = [c_double]
libc.pycpdf_ptOfCm.restype = c_double
libc.pycpdf_ptOfMm.argtypes = [c_double]
libc.pycpdf_ptOfMm.restype = c_double
libc.pycpdf_ptOfIn.argtypes = [c_double]
libc.pycpdf_ptOfIn.restype = c_double
libc.pycpdf_cmOfPt.argtypes = [c_double]
libc.pycpdf_cmOfPt.restype = c_double
libc.pycpdf_mmOfPt.argtypes = [c_double]
libc.pycpdf_mmOfPt.restype = c_double
libc.pycpdf_inOfPt.argtypes = [c_double]
libc.pycpdf_inOfPt.restype = c_double
libc.pycpdf_stringOfPagespec.restype = POINTER(c_char)
libc.pycpdf_toMemory.restype = POINTER(c_uint8)
libc.pycpdf_outputJSONMemory.restype = POINTER(c_uint8)
libc.pycpdf_annotationsJSON.restype = POINTER(c_uint8)
libc.pycpdf_getBookmarksJSON.restype = POINTER(c_uint8)
libc.pycpdf_scalePages.argtypes = [c_int, c_int, c_double, c_double]
libc.pycpdf_scaleToFit.argtypes =\
[c_int, c_int, c_double, c_double, c_double]
libc.pycpdf_scaleToFitPaper.argtypes = [c_int, c_int, c_int, c_double]
libc.pycpdf_scaleContents.argtypes =\
[c_int, c_int, c_int, c_double, c_double, c_double]
libc.pycpdf_shiftContents.argtypes = [c_int, c_int, c_double, c_double]
libc.pycpdf_rotateContents.argtypes = [c_int, c_int, c_double]
libc.pycpdf_crop.argtypes =\
[c_int, c_int, c_double, c_double, c_double, c_double]
libc.pycpdf_thinLines.argtypes = [c_int, c_int, c_double]
libc.pycpdf_stampAsXObject.restype = POINTER(c_char)
libc.pycpdf_getTitle.restype = POINTER(c_char)
libc.pycpdf_getAuthor.restype = POINTER(c_char)
libc.pycpdf_getSubject.restype = POINTER(c_char)
libc.pycpdf_getKeywords.restype = POINTER(c_char)
libc.pycpdf_getCreator.restype = POINTER(c_char)
libc.pycpdf_getProducer.restype = POINTER(c_char)
libc.pycpdf_getCreationDate.restype = POINTER(c_char)
libc.pycpdf_getModificationDate.restype = POINTER(c_char)
libc.pycpdf_getTitleXMP.restype = POINTER(c_char)
libc.pycpdf_getAuthorXMP.restype = POINTER(c_char)
libc.pycpdf_getSubjectXMP.restype = POINTER(c_char)
libc.pycpdf_getKeywordsXMP.restype = POINTER(c_char)
libc.pycpdf_getCreatorXMP.restype = POINTER(c_char)
libc.pycpdf_getProducerXMP.restype = POINTER(c_char)
libc.pycpdf_getCreationDateXMP.restype = POINTER(c_char)
libc.pycpdf_getModificationDateXMP.restype = POINTER(c_char)
libc.pycpdf_setMediaBox.argtypes =\
[c_int, c_int, c_double, c_double, c_double, c_double]
libc.pycpdf_setCropBox.argtypes =\
[c_int, c_int, c_double, c_double, c_double, c_double]
libc.pycpdf_setTrimBox.argtypes =\
[c_int, c_int, c_double, c_double, c_double, c_double]
libc.pycpdf_setArtBox.argtypes =\
[c_int, c_int, c_double, c_double, c_double, c_double]
libc.pycpdf_setBleedBox.argtypes =\
[c_int, c_int, c_double, c_double, c_double, c_double]
libc.pycpdf_getBookmarkText.restype = POINTER(c_char)
libc.pycpdf_addText.argtypes =\
[c_int, c_int, c_int, POINTER(c_char), c_int, c_double, c_double,
c_double, c_int, c_int, c_double, c_double, c_double, c_double, c_int,
c_int, c_int, c_double, c_int, c_int, c_int, POINTER(c_char),
c_double, c_int]
libc.pycpdf_addTextSimple.argtypes =\
[c_int, c_int, POINTER(c_char), c_int, c_double,
c_double, c_int, c_double]
libc.pycpdf_getMetadata.restype = POINTER(c_uint8)
libc.pycpdf_getDictEntries.restype = POINTER(c_uint8)
libc.pycpdf_getAttachmentData.restype = POINTER(c_uint8)
libc.pycpdf_getAttachmentName.restype = POINTER(c_char)
libc.pycpdf_startGetImageResolution.argtypes = [c_int, c_double]
libc.pycpdf_getImageResolutionImageName.restype = POINTER(c_char)
libc.pycpdf_getFontName.restype = POINTER(c_char)
libc.pycpdf_getFontType.restype = POINTER(c_char)
libc.pycpdf_getFontEncoding.restype = POINTER(c_char)
libc.pycpdf_getPageLabelStringForPage.restype = POINTER(c_char)
libc.pycpdf_getPageLabelPrefix.restype = POINTER(c_char)
libc.pycpdf_dateStringOfComponents.restype = POINTER(c_char)
libc.pycpdf_OCGListEntry.restype = POINTER(c_char)
libc.pycpdf_stampExtended.argtypes = [
c_int, c_int, c_int, c_int, c_int, c_int, c_double, c_double, c_int]
libc.pycpdf_getImageResolutionXRes.restype = c_double
libc.pycpdf_getImageResolutionYRes.restype = c_double
libc.pycpdf_impose.argtypes = [c_int, c_double, c_double, c_int,
c_int, c_int, c_int, c_int, c_double, c_double, c_double]
LP_c_char = POINTER(c_char)
LP_LP_c_char = POINTER(LP_c_char)
argc = len(sys.argv)
argv = (LP_c_char * (argc + 1))()
for i, arg in enumerate(sys.argv):
enc_arg = arg.encode('utf-8')
argv[i] = create_string_buffer(enc_arg)
libc.pycpdf_startup.argtypes = [LP_LP_c_char]
libc.pycpdf_startup(argv)
checkerror()
class CPDFError(Exception):
"""Any function may raise an exception CPDFError, carrying a string
describing what went wrong."""
def __init__(self, message):
self.message = message
super().__init__(self.message)
def lastError():
"""Return the last error. Not usually used directly, since pycpdflib
functions raise exceptions."""
return libc.pycpdf_lastError()
def lastErrorString():
"""Return the last error string. Not usually used directly, since pycpdflib
functions raise exceptions."""
return string_at(libc.pycpdf_lastErrorString()).decode()
def checkerror():
"""Raise an exception if the last function call resulted in an error. Not
used directly, since pycpdflib functions will raise the exception
directly."""
if lastError() != 0:
s = lastErrorString()
clearError()
raise CPDFError(s)
def version():
"""Return the version number of the pycpdflib library."""
v = string_at(libc.pycpdf_version()).decode()
checkerror()
return v
def setFast():
""" Set fast mode. Some operations have a fast mode. The default is 'slow'
mode, which works even on old-fashioned files. For more details, see
section 1.13 of the CPDF manual. This function sets the mode globally. """
libc.pycpdf_setFast()
checkerror()
def setSlow():
""" Set slow mode. Some operations have a fast mode. The default is 'slow'
mode, which works even on old-fashioned files. For more details, see
section 1.13 of the CPDF manual. This function sets the mode globally. """
libc.pycpdf_setSlow()
checkerror()
def clearError():
""" Clear the current error state. """
libc.pycpdf_clearError()
checkerror()
def onExit():
""" A debug function which prints some information about
resource usage. This can be used to detect if PDFs or ranges are being
deallocated properly."""
libc.pycpdf_onExit()
checkerror()
# CHAPTER 1. Basics
def fromFile(filename, userpw):
""" Load a PDF file from a given file.
Supply a user password (possibly blank) in case the file is encypted. It
won't be decrypted, but sometimes the password is needed just to load the
file."""
pdf = Pdf(libc.pycpdf_fromFile(str.encode(filename), str.encode(userpw)))
checkerror()
return pdf
def fromFileLazy(filename, userpw):
""" Loads a PDF from a file, doing only
minimal parsing. The objects will be read and parsed when they are actually
needed. Use this when the whole file won't be required. Also supply a user
password (possibly blank) in case the file is encypted. It won't be
decrypted, but sometimes the password is needed just to load the file."""
pdf = Pdf(libc.pycpdf_fromFileLazy(
str.encode(filename), str.encode(userpw)))
checkerror()
return pdf
def fromMemory(data, userpw):
""" Load a file from a byte array and the user password (blank if none)."""
pdf = Pdf(libc.pycpdf_fromMemory(data, len(data), str.encode(userpw)))
checkerror()
return pdf
def fromMemoryLazy(data, userpw):
""" Load a file from from a byte array and the user password (blank if
none), but lazily like fromFileLazy."""
pdf = Pdf(libc.pycpdf_fromMemoryLazy(data, len(data), str.encode(userpw)))
checkerror()
return pdf
def ptOfCm(i):
"""Convert a figure in centimetres to points (72 points to 1 inch)."""
r = libc.pycpdf_ptOfCm(i)
checkerror()
return r
def ptOfMm(i):
"""Convert a figure in millimetres to points (72 points to 1 inch)."""
r = libc.pycpdf_ptOfMm(i)
checkerror()
return r
def ptOfIn(i):
"""Convert a figure in inches to points (72 points to 1 inch)."""
r = libc.pycpdf_ptOfIn(i)
checkerror()
return r
def cmOfPt(i):
"""Convert a figure in points to centimetres (72 points to 1 inch)."""
r = libc.pycpdf_cmOfPt(i)
checkerror()
return r
def mmOfPt(i):
"""Convert a figure in points to millimetres (72 points to 1 inch)."""
r = libc.pycpdf_mmOfPt(i)
checkerror()
return r
def inOfPt(i):
"""Convert a figure in points to inches (72 points to 1 inch)."""
r = libc.pycpdf_inOfPt(i)
checkerror()
return r
def parsePagespec(pdf, pagespec):
"""Parse a page specification such as "1-3,8-end" to a range with reference
to a given PDF (the PDF is supplied so that page ranges which reference
pages which do not exist are rejected)."""
rn = libc.pycpdf_parsePagespec(pdf.pdf, str.encode(pagespec))
r = list_of_range(rn)
deleteRange(rn)
checkerror()
return r
def validatePagespec(pagespec):
"""Validate a page specification, returning True or False, so far as is
possible in the absence of the actual document."""
r = libc.pycpdf_validatePagespec(str.encode(pagespec))
checkerror()
return r
def stringOfPagespec(pdf, r):
"""Build a page specification from a page
range. For example, the range containing 1,2,3,6,7,8 in a document of 8
pages might yield "1-3,6-end" """
rn = range_of_list(r)
r = string_at(libc.pycpdf_stringOfPagespec(pdf.pdf, rn)).decode()
deleteRange(rn)
checkerror()
return r
def blankRange():
"""Create a range with no pages in."""
r = libc.pycpdf_blankRange()
checkerror()
l = list_of_range(r)
deleteRange(r)
return l
def pageRange(f, t):
""" Nuild a range from one page to another inclusive.
For example, pageRange(3,7) gives the range 3,4,5,6,7. """
rn = libc.pycpdf_pageRange(f, t)
r = list_of_range(rn)
deleteRange(rn)
checkerror()
return r
def all(pdf):
"""The range containing all the pages in a given document."""
rn = libc.pycpdf_all(pdf.pdf)
r = list_of_range(rn)
deleteRange(rn)
checkerror()
return r
def even(r):
"""A range which contains just the even pages of another
range."""
rn = range_of_list(r)
reven = libc.pycpdf_even(rn)
rout = list_of_range(reven)
deleteRange(rn)
deleteRange(reven)
checkerror()
return rout
def odd(r):
"""A range which contains just the odd pages of another
range."""
rn = range_of_list(r)
rodd = libc.pycpdf_odd(rn)
rout = list_of_range(rodd)
deleteRange(rn)
deleteRange(rodd)
checkerror()
return rout
def rangeUnion(a, b):
"""The union of two ranges giving a range containing
the pages in range a and range b."""
ra = range_of_list(a)
rb = range_of_list(b)
runion = libc.pycpdf_rangeUnion(ra, rb)
rout = list_of_range(runion)
deleteRange(ra)
deleteRange(rb)
deleteRange(runion)
checkerror()
return rout
def difference(a, b):
"""The difference of two ranges, giving a range
containing all the pages in a except for those which are also in b."""
ra = range_of_list(a)
rb = range_of_list(b)
rdiff = libc.pycpdf_difference(ra, rb)
rout = list_of_range(rdiff)
deleteRange(ra)
deleteRange(rb)
deleteRange(rdiff)
checkerror()
return rout
def removeDuplicates(r):
"""Deduplicates a range, returning a new one."""
rn = range_of_list(r)
rdup = libc.pycpdf_removeDuplicates(rn)
rout = list_of_range(rdup)
deleteRange(rn)
deleteRange(rdup)
checkerror()
return rout
def rangeLength(r):
"""The number of pages in a range."""
rn = range_of_list(r)
l = libc.pycpdf_rangeLength(rn)
deleteRange(rn)
checkerror()
return l
def rangeGet(r, n):
"""Get the page number at position n in a range, where
n runs from 0 to rangeLength - 1."""
rn = range_of_list(r)
r2 = libc.pycpdf_rangeGet(rn, n)
deleteRange(rn)
checkerror()
return r2
def rangeAdd(r, p):
"""Add the page to a range, if it is not already
there."""
rn = range_of_list(r)
radd = libc.pycpdf_rangeAdd(rn, p)
rout = list_of_range(radd)
deleteRange(rn)
deleteRange(radd)
checkerror()
return rout
def isInRange(r, p):
"""Returns True if the page p is in the range r, False otherwise."""
rn = range_of_list(r)
r2 = libc.pycpdf_isInRange(rn, p)
deleteRange(rn)
checkerror()
return r2
def pages(pdf):
"""Return the number of pages in a PDF."""
r = libc.pycpdf_pages(pdf.pdf)
checkerror()
return r
def pagesFast(userpw, filename):
"""Return the number of pages in a given
PDF, with given user password. It tries to do this as fast as
possible, without loading the whole file."""
r = libc.pycpdf_pagesFast(str.encode(userpw), str.encode(filename))
checkerror()
return r
def toFile(pdf, filename, linearize, make_id):
"""Write the file to a given filename. If linearize is True, it will be
linearized, if supported by libcpdf. If make_id is True, it will be given a
new ID."""
libc.pycpdf_toFile(pdf.pdf, str.encode(filename), False, False)
checkerror()
def toFileExt(pdf, filename, linearize, make_id, preserve_objstm,
generate_objstm, compress_objstm):
"""Write the file to a given filename. If linearize is True, it will be
linearized, if supported by libcpdf. If make_id is True, it will be given a
new ID. If preserve_objstm is True, existing object streams will be
preserved. If generate_objstm is True, object streams will be generated
even if not originally present. If compress_objstm is True, object streams
will be compressed (what we usually want). WARNING: the pdf argument will
be invalid after this call and should not be used again."""
libc.pycpdf_toFileExt(pdf.pdf, str.encode(filename), linearize, make_id,
preserve_objstm, generate_objstm, compress_objstm)
checkerror()
def toMemory(pdf, linearize, make_id):
"""Write a file to memory, returning the buffer as a byte array of type
bytes."""
length = c_int32()
data = libc.pycpdf_toMemory(pdf.pdf, linearize, make_id, byref(length))
out_data = create_string_buffer(length.value)
memmove(out_data, data, length.value)
libc.pycpdf_toMemoryFree()
checkerror()
return out_data.raw
def isEncrypted(pdf):
"""Returns True if a documented is encrypted, False otherwise."""
r = libc.pycpdf_isEncrypted(pdf.pdf)
checkerror()
return r
"""Permissions."""
noEdit = 0
noPrint = 1
noCopy = 2
noAnnot = 3
noForms = 4
noExtract = 5
noAssemble = 6
noHqPrint = 7
"""Encryption Methods."""
pdf40bit = 0
pdf128bit = 1
aes128bitfalse = 2
aes128bittrue = 3
aes256bitfalse = 4
aes256bittrue = 5
aes256bitisofalse = 6
aes256bitisotrue = 7
def toFileEncrypted(pdf, method, permissions, ownerpw, userpw, linearize,
makeid, filename):
"""Write the file to a given filename encrypted with the given encryption
method, permissions list, and owener and user passwords. If linearize is
True, it will be linearized, if supported by libcpdf. If make_id is True,
it will be given a new ID."""
c_perms = (c_uint8 * len(permissions))(*permissions)
libc.pycpdf_toFileEncrypted(pdf.pdf, method, c_perms, len(permissions),
str.encode(ownerpw), str.encode(userpw),
linearize, makeid, str.encode(filename))
checkerror()
def toFileEncryptedExt(pdf, method, permissions, ownerpw, userpw, linearize,
makeid, preserve_objstm, generate_objstm,
compress_objstm, filename):
"""Write the file to a given filename encrypted with the given encryption
method, permissions list, and owener and user passwords. If linearize is
True, it will be linearized, if supported by libcpdf. If make_id is True,
it will be given a new ID. If preserve_objstm is True, existing object
streams will be preserved. If generate_objstm is True, object streams will
be generated even if not originally present. If compress_objstm is True,
object streams will be compressed (what we usually want). WARNING: the pdf
argument will be invalid after this call and should not be used again."""
c_perms = (c_uint8 * len(permissions))(*permissions)
libc.pycpdf_toFileEncryptedExt(pdf.pdf, method, c_perms, len(permissions),
str.encode(ownerpw), str.encode(userpw),
linearize, makeid, preserve_objstm,
generate_objstm, compress_objstm,
str.encode(filename))
checkerror()
def decryptPdf(pdf, userpw):
"""Attempts to decrypt a PDF using the given user password. An exception is
raised in the event of a bad password."""
libc.pycpdf_decryptPdf(pdf.pdf, str.encode(userpw))
checkerror()
def decryptPdfOwner(pdf, ownerpw):
"""Attempts to decrypt a PDF using the given owner password. An exception
is raised in the event of a bad password."""
libc.pycpdf_decryptPdfOwner(pdf.pdf, str.encode(ownerpw))
checkerror()
def hasPermission(pdf, perm):
"""Returns True if the given permission (restriction) is present."""
r = libc.pycpdf_hasPermission(pdf.pdf, perm)
checkerror()
return r
def encryptionKind(pdf):
"""Return the encryption method currently in use on a document."""
r = libc.pycpdf_encryptionKind(pdf.pdf)
checkerror()
return r
# CHAPTER 2. Merging and Splitting
def mergeSimple(pdfs):
"""Given a list of PDFs, merges the documents into a new PDF, which is
returned."""
raw_pdfs = list(map(lambda p: p.pdf, pdfs))
c_pdfs = (c_int * len(pdfs))(*raw_pdfs)
r = Pdf(libc.pycpdf_mergeSimple(c_pdfs, len(pdfs)))
checkerror()
return r
def merge(pdfs, retain_numbering, remove_duplicate_fonts):
"""Merges the list of PDFs. If retain_numbering is True page labels are not
rewritten. If remove_duplicate_fonts is True, duplicate fonts are merged.
This is useful when the source documents for merging originate from the
same source."""
raw_pdfs = map(lambda p: p.pdf, pdfs)
c_pdfs = (c_int * len(pdfs))(*raw_pdfs)
r = Pdf(libc.pycpdf_merge(c_pdfs, len(pdfs),
retain_numbering, remove_duplicate_fonts))
checkerror()
return r
def mergeSame(pdfs, retain_numbering, remove_duplicate_fonts, ranges):
"""The same as merge, except that it has an additional argument
- a list of page ranges. This is used to select the pages to pick from
each PDF. This avoids duplication of information when multiple discrete
parts of a single source PDF are included."""
ranges = list(map(range_of_list, ranges))
raw_pdfs = map(lambda p: p.pdf, pdfs)
c_pdfs = (c_int * len(pdfs))(*raw_pdfs)
c_ranges = (c_int * len(ranges))(*ranges)
r = Pdf(libc.pycpdf_mergeSame(c_pdfs, len(pdfs),
retain_numbering, remove_duplicate_fonts, c_ranges))
for x in ranges:
deleteRange(x)
checkerror()
return r
def selectPages(pdf, r):
""" Returns a new document which just those pages in the page range."""
rn = range_of_list(r)
r = Pdf(libc.pycpdf_selectPages(pdf.pdf, rn))
deleteRange(rn)
checkerror()
return r
# CHAPTER 3. Pages
def scalePages(pdf, r, sx, sy):
"""Scale the page dimensions and content of the given range of pages by
the given scale (sx, sy), about (0, 0). Other boxes (crop etc. are altered
as appropriate)."""
r = range_of_list(r)
libc.pycpdf_scalePages(pdf.pdf, r, sx, sy)
deleteRange(r)
checkerror()
def scaleToFit(pdf, r, w, h, scale_to_fit_scale):
"""Scales the pages in the range to fit new page dimensions (w and h)
multiplied by scale_to_fit_scale (typically 1.0). Other boxes (crop etc.)
are altered as appropriate."""
r = range_of_list(r)
libc.pycpdf_scaleToFit(pdf.pdf, r, w, h, scale_to_fit_scale)
deleteRange(r)
checkerror()
"""Paper sizes."""
a0portrait = 0
a1portrait = 1
a2portrait = 2
a3portrait = 3
a4portrait = 4
a5portrait = 5
a0landscape = 6
a1landscape = 7
a2landscape = 8
a3landscape = 9
a4landscape = 10
a5landscape = 11
usletterportrait = 12
usletterlandscape = 13
uslegalportrait = 14
uslegallandscape = 15
def scaleToFitPaper(pdf, r, papersize, scale_to_fit_scale):
"""Scales the given pages to fit the given page size, possibly multiplied
by scale_to_fit_scale (typically 1.0)"""
r = range_of_list(r)
libc.pycpdf_scaleToFitPaper(pdf.pdf, r, papersize, scale_to_fit_scale)
deleteRange(r)
checkerror()
"""Positions with two numbers in a tuple e.g (posLeft, 10.0, 20.0):"""
posCentre = 0
posLeft = 1
posRight = 2
"""Positions with one number in a tuple e.g (top, 5.0):"""
top = 3
topLeft = 4
topRight = 5
left = 6
bottomLeft = 7
bottomRight = 8
right = 9
"""Positions with no numbers e.g diagonal:"""
diagonal = 10
reverseDiagonal = 11
def tripleOfPosition(p):
if p == diagonal:
return (p, 0.0, 0.0)
if p == reverseDiagonal:
return (p, 0.0, 0.0)
if p[0] == top:
return (p[0], p[1], 0.0)
if p[0] == topLeft:
return (p[0], p[1], 0.0)
if p[0] == topRight:
return (p[0], p[1], 0.0)
if p[0] == left:
return (p[0], p[1], 0.0)
if p[0] == bottomLeft:
return (p[0], p[1], 0.0)
if p[0] == bottomRight:
return (p[0], p[1], 0.0)
if p[0] == right:
return (p[0], p[1], 0.0)
if p[0] == posCentre:
return (p[0], p[1], p[2])
if p[0] == posLeft:
return (p[0], p[1], p[2])
if p[0] == posRight:
return (p[0], p[1], p[2])
def scaleContents(pdf, r, pos, scale):
"""Scales the contents of the pages in the range about the point given by
the position, by the scale given."""
r = range_of_list(r)
a, b, c = tripleOfPosition(pos)
libc.pycpdf_scaleContents(pdf.pdf, r, a, b, c, scale)
deleteRange(r)
checkerror()
def shiftContents(pdf, r, dx, dy):
"""Shift the content of the pages in the range by (dx, dy)."""
r = range_of_list(r)
libc.pycpdf_shiftContents(pdf.pdf, r, dx, dy)
deleteRange(r)
checkerror()
def rotate(pdf, r, rotation):
"""Change the viewing rotation of the pages in the range to an
absolute value. Appropriate rotations are 0, 90, 180, 270."""
r = range_of_list(r)
libc.pycpdf_rotate(pdf.pdf, r, rotation)
deleteRange(r)
checkerror()
def rotateBy(pdf, r, rotation):
"""Change the viewing rotation of the pages in the range by a
given number of degrees. Appropriate values are 90, 180, 270."""
r = range_of_list(r)
libc.pycpdf_rotateBy(pdf.pdf, r, rotation)
deleteRange(r)
checkerror()
def rotateContents(pdf, r, rotation):
"""Rotate the content about the centre
of the page by the given number of degrees, in a clockwise direction."""
r = range_of_list(r)
libc.pycpdf_rotateContents(pdf.pdf, r, rotation)
deleteRange(r)
checkerror()
def upright(pdf, r):
"""Change the viewing rotation of the pages in the range, counter-rotating
the dimensions and content such that there is no visual change."""
r = range_of_list(r)
libc.pycpdf_upright(pdf.pdf, r)
deleteRange(r)
checkerror()
def hFlip(pdf, r):
"""Flip horizontally the pages in the range."""
r = range_of_list(r)
libc.pycpdf_hFlip(pdf.pdf, r)
deleteRange(r)
checkerror()
def vFlip(pdf, r):
"""Flip vertically the pages in the range."""
r = range_of_list(r)
libc.pycpdf_vFlip(pdf.pdf, r)
deleteRange(r)
checkerror()
def crop(pdf, r, x, y, w, h):
"""Crop a page to the box defined by (x, y, w, h), replacing any existing
crop box."""
r = range_of_list(r)
libc.pycpdf_crop(pdf.pdf, r, x, y, w, h)
deleteRange(r)
checkerror()
def removeCrop(pdf, r):
"""Remove any crop box from pages in the range."""
r = range_of_list(r)
libc.pycpdf_removeCrop(pdf.pdf, r)
deleteRange(r)
checkerror()
def removeTrim(pdf, r):
"""Remove any trim box from pages in the range."""
r = range_of_list(r)
libc.pycpdf_removeTrim(pdf.pdf, r)
deleteRange(r)
checkerror()
def removeArt(pdf, r):
"""Remove any art box from pages in the range."""
r = range_of_list(r)
libc.pycpdf_removeArt(pdf.pdf, r)
deleteRange(r)
checkerror()
def removeBleed(pdf, r):
"""Remove any bleed box from pages in the range."""
r = range_of_list(r)
libc.pycpdf_removeBleed(pdf.pdf, r)
deleteRange(r)
checkerror()
def trimMarks(pdf, r):
"""Add trim marks to the given pages, if the trimbox exists."""
r = range_of_list(r)
libc.pycpdf_trimMarks(pdf.pdf, r)
deleteRange(r)
checkerror()
def showBoxes(pdf, r):
"""Show the boxes on the given pages, for debug."""
r = range_of_list(r)
libc.pycpdf_showBoxes(pdf.pdf, r)
deleteRange(r)
checkerror()
def hardBox(pdf, r, boxname):
"""Make a given box a 'hard box' i.e clip it explicitly. Boxname could be,
for example "/TrimBox"."""
r = range_of_list(r)
libc.pycpdf_hardBox(pdf.pdf, r, str.encode(boxname))
deleteRange(r)
checkerror()
# CHAPTER 4. Encryption
# Encryption covered under Chapter 1 in pycpdflib
# CHAPTER 5. Compression
def compress(pdf):
"""Compress any uncompressed streams in the given PDF using the Flate
algorithm."""
libc.pycpdf_compress(pdf.pdf)
checkerror()
def decompress(pdf):
"""Decompress any streams in the given PDF, so long as the compression
method is supported."""
libc.pycpdf_decompress(pdf.pdf)
checkerror()
def squeezeInMemory(pdf):
"""squeezeToMemory(pdf) squeezes a pdf in memory. Squeezing is a lossless
compression method which works be rearrangement of a PDFs internal
structure."""
libc.pycpdf_squeezeInMemory(pdf.pdf)
checkerror()
# CHAPTER 6. Bookmarks
def getBookmarks(pdf):
"""Get the bookmarks for a PDF as a list of tuples of the form:
(level : int, page : int, text : string, openstatus : bool)"""
l = []
libc.pycpdf_startGetBookmarkInfo(pdf.pdf)
n = libc.pycpdf_numberBookmarks()
for x in range(n):
level = libc.pycpdf_getBookmarkLevel(x)
page = libc.pycpdf_getBookmarkPage(pdf.pdf, x)
text = string_at(libc.pycpdf_getBookmarkText(x)).decode()
openStatus = libc.pycpdf_getBookmarkOpenStatus(x)
l.append((level, page, text, openStatus))
libc.pycpdf_endGetBookmarkInfo(pdf.pdf)
checkerror()
return l
def setBookmarks(pdf, marks):