From 1aaed7fdb66924bf733006256b01a111b8e9975f Mon Sep 17 00:00:00 2001 From: Sampo Tolvanen Date: Tue, 29 Aug 2023 18:08:43 +0300 Subject: [PATCH 1/2] Add configuration for PDF/A conformance level --- programs/pdf2pdfa | 13 ++++++++++++- resources/public/index.html | 6 +++++- src/laundry/pdf.clj | 9 +++++---- test/laundry/pdf_test.clj | 1 + 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/programs/pdf2pdfa b/programs/pdf2pdfa index 084614b..33b5f58 100755 --- a/programs/pdf2pdfa +++ b/programs/pdf2pdfa @@ -7,6 +7,7 @@ OUTPUT=$2 DPI=$3 MAXBITMAP=$4 PDFSETTINGS=$5 +PDFA_CONFORMANCE=$6 if [ -z "$DPI" ]; then DPI=720 @@ -43,6 +44,16 @@ if ! [[ $MAXBITMAP =~ $re ]] ; then exit 2 fi +PDFA_CONFORMANCE_NUM="" +if [[ ! $PDFA_CONFORMANCE =~ $re ]] || [[ "$PDFA_CONFORMANCE" -lt 1 ]] || [[ "$PDFA_CONFORMANCE" -gt 3 ]]; then + echo "error: PDFA_CONFORMANCE invalid - only values from 1 to 3 are allowed" + exit 2 +fi +if [ -n "$PDFA_CONFORMANCE" ] && [ "$PDFA_CONFORMANCE" -gt 1 ]; then + # The ghostscript documentation specifies that -dPDFA parameter should only have the version value followed when using 2 or 3 + PDFA_CONFORMANCE_NUM="=$PDFA_CONFORMANCE" +fi + docker run \ --runtime="${LAUNDRY_DOCKER_RUNTIME:-runsc}" \ --network=none \ @@ -53,5 +64,5 @@ docker run \ --rm \ laundry-programs \ /bin/bash -c 'cat > /home/docconv/document.pdf && \ - gs -q -dPDFA -dBATCH -dNOPAUSE -r'$DPI' -dMaxBitmap='$MAXBITMAP' '$PDFSETTINGS' -sProcessColorModel=DeviceCMYK -sDEVICE=pdfwrite -dPDFACompatibilityPolicy=1 -sOutputFile=- /home/docconv/document.pdf' \ + gs -q -dPDFA'$PDFA_CONFORMANCE_NUM' -dBATCH -dNOPAUSE -r'$DPI' -dMaxBitmap='$MAXBITMAP' '$PDFSETTINGS' -sProcessColorModel=DeviceCMYK -sDEVICE=pdfwrite -dPDFACompatibilityPolicy=1 -sOutputFile=- /home/docconv/document.pdf' \ < "$INPUT" > "$OUTPUT" diff --git a/resources/public/index.html b/resources/public/index.html index a75d6ac..65d42b7 100644 --- a/resources/public/index.html +++ b/resources/public/index.html @@ -9,7 +9,8 @@ var pdfParameters = [ {"type": "pdf/pdf2pdfa", key: "dpi", dtype: "number", wrapper: "pdf_dpi_elem", elem: "pdf_dpi" }, {"type": "pdf/pdf2pdfa", key: "maxbitmap", dtype: "number", wrapper: "pdf_maxbitmap_elem", elem: "pdf_maxbitmap" }, - {"type": "pdf/pdf2pdfa", key: "pdfsettings", wrapper: "pdf_pdfsettings_elem", elem: "pdf_pdfsettings" } + {"type": "pdf/pdf2pdfa", key: "pdfsettings", wrapper: "pdf_pdfsettings_elem", elem: "pdf_pdfsettings" }, + {"type": "pdf/pdf2pdfa", key: "pdfaconformance", dtype: "number", wrapper: "pdf_pdfaconformance_elem", elem: "pdf_pdfaconformance" } ] function updateConversionParameters() { @@ -104,6 +105,9 @@

Laundry

Pdfsettings
+
+ Pdf/a conformance (Valid values: 1-3) +
diff --git a/src/laundry/pdf.clj b/src/laundry/pdf.clj index 21c7cd7..1d46095 100644 --- a/src/laundry/pdf.clj +++ b/src/laundry/pdf.clj @@ -18,14 +18,15 @@ (io/delete-file path))))) ;; pdf/a converter -(s/defn api-pdf2pdfa [env, tempfile :- java.io.File, dpinum :- s/Int, maxbitmapnum :- s/Int, pdfsettings :- s/Str] +(s/defn api-pdf2pdfa [env, tempfile :- java.io.File, dpinum :- s/Int, maxbitmapnum :- s/Int, pdfsettings :- s/Str, pdfaconformancenum :- s/Int] (let [in-path (.getAbsolutePath tempfile) out-path (str (.getAbsolutePath tempfile) ".pdf") dpi (str dpinum) maxbitmap (str maxbitmapnum) pdfsettings (str pdfsettings) + pdfaconformance (str pdfaconformancenum) res (shell-out! (str (:tools env) "/pdf2pdfa") - in-path out-path dpi maxbitmap pdfsettings)] + in-path out-path dpi maxbitmap pdfsettings pdfaconformance)] (.delete tempfile) (if (= (:exit res) 0) (htresp/content-type @@ -80,11 +81,11 @@ (api-pdf2txt env tempfile))) (POST "/pdf2pdfa" [] :summary "attempt to convert a PDF file to PDF/A" - :query-params #_{:clj-kondo/ignore [:unresolved-symbol]} [{dpi :- s/Int 720} {maxbitmap :- s/Int 0} {pdfsettings :- s/Str "/default"}] + :query-params #_{:clj-kondo/ignore [:unresolved-symbol]} [{dpi :- s/Int 720} {maxbitmap :- s/Int 0} {pdfsettings :- s/Str "/default"} {pdfaconformance :- s/Int 1}] :multipart-params [file :- upload/TempFileUpload] :middleware [wrap-multipart-params] (let [tempfile (:tempfile file) filename (:filename file)] (info "PDF converter received " filename "(" (:size file) "b)") (.deleteOnExit tempfile) ;; cleanup if VM is terminated - (api-pdf2pdfa env tempfile dpi maxbitmap pdfsettings)))))) + (api-pdf2pdfa env tempfile dpi maxbitmap pdfsettings pdfaconformance)))))) diff --git a/test/laundry/pdf_test.clj b/test/laundry/pdf_test.clj index bd47473..fc09f2f 100644 --- a/test/laundry/pdf_test.clj +++ b/test/laundry/pdf_test.clj @@ -47,6 +47,7 @@ (assoc-in [:query-params :dpi] 720) (assoc-in [:query-params :maxbitmap] 0) (assoc-in [:query-params :pdfsettings] "/default") + (assoc-in [:query-params :pdfaconformance] 2) (merge (peridot.multipart/build {:file file}))) response (app request) body (ring.util.request/body-string response)] From 4384133c4a494a7d975ee66c7483502000d905e3 Mon Sep 17 00:00:00 2001 From: Sampo Tolvanen Date: Thu, 31 Aug 2023 15:49:05 +0300 Subject: [PATCH 2/2] Validate pdfsettings values --- src/laundry/pdf.clj | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/laundry/pdf.clj b/src/laundry/pdf.clj index 1d46095..eb14a48 100644 --- a/src/laundry/pdf.clj +++ b/src/laundry/pdf.clj @@ -17,22 +17,28 @@ (proxy-super close) (io/delete-file path))))) +(s/defn validate-pdf-settings [pdfsettings :- s/Str] + (when-not (#{"/screen" "/ebook" "/printer" "/prepress" "/default"} pdfsettings) "pdfsettings is not given in correct format")) + ;; pdf/a converter (s/defn api-pdf2pdfa [env, tempfile :- java.io.File, dpinum :- s/Int, maxbitmapnum :- s/Int, pdfsettings :- s/Str, pdfaconformancenum :- s/Int] - (let [in-path (.getAbsolutePath tempfile) - out-path (str (.getAbsolutePath tempfile) ".pdf") - dpi (str dpinum) - maxbitmap (str maxbitmapnum) - pdfsettings (str pdfsettings) - pdfaconformance (str pdfaconformancenum) - res (shell-out! (str (:tools env) "/pdf2pdfa") - in-path out-path dpi maxbitmap pdfsettings pdfaconformance)] - (.delete tempfile) - (if (= (:exit res) 0) - (htresp/content-type - (htresp/ok (temp-file-input-stream out-path)) - "application/pdf") - (badness-resp "pdf2pdfa conversion failed" res)))) + (if-let [pdfsettings-error (validate-pdf-settings (str pdfsettings))] + (badness-resp pdfsettings-error pdfsettings-error) + + (let [in-path (.getAbsolutePath tempfile) + out-path (str (.getAbsolutePath tempfile) ".pdf") + dpi (str dpinum) + maxbitmap (str maxbitmapnum) + pdfsettings (str pdfsettings) + pdfaconformance (str pdfaconformancenum) + res (shell-out! (str (:tools env) "/pdf2pdfa") + in-path out-path dpi maxbitmap pdfsettings pdfaconformance)] + (.delete tempfile) + (if (= (:exit res) 0) + (htresp/content-type + (htresp/ok (temp-file-input-stream out-path)) + "application/pdf") + (badness-resp "pdf2pdfa conversion failed" res))))) ;; pdf → txt conversion (s/defn api-pdf2txt [env, tempfile :- java.io.File]