diff --git a/Cargo.toml b/Cargo.toml index 221d315..66e0009 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,13 +15,14 @@ categories = ["command-line-utilities"] description = "Copy & Redact cli tool to securely copy and redact files removing Personal Identifiable Information (PII) across various filesystems." [features] -default = [] +default = ["pdf-render"] ci-gcp = [] # For testing on CI/GCP ci-aws = [] # For testing on CI/AWS ci-ms-presidio = [] # For testing on CI/MS Presidiom ci-gcp-llm = [] # For testing on CI/GCP with LLM models ci-open-ai = [] # For testing on CI/OpenAIP ci = ["ci-gcp", "ci-aws", "ci-ms-presidio", "ci-gcp-llm", "ci-open-ai"] +pdf-render = ["pdfium-render"] [dependencies] @@ -56,7 +57,7 @@ reqwest = { version = "0.12", default-features = false, features = ["multipart", tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } rand = "0.8" -pdfium-render = { version = "0.8", features = ["thread_safe", "image"] } +pdfium-render = { version = "0.8", features = ["thread_safe", "image"], optional = true } image = "0.25" bytes = { version = "1" } diff --git a/src/file_converters/mod.rs b/src/file_converters/mod.rs index c8f7669..8b1f178 100644 --- a/src/file_converters/mod.rs +++ b/src/file_converters/mod.rs @@ -3,6 +3,9 @@ use crate::AppResult; pub mod pdf; +#[cfg(feature = "pdf-render")] +mod pdf_image_converter; + pub struct FileConverters { pub pdf_image_converter: Option>, } @@ -14,8 +17,9 @@ impl FileConverters { } } + #[cfg(feature = "pdf-render")] pub async fn init(&mut self) -> AppResult<()> { - match pdf::PdfImageConverter::new().ok() { + match crate::file_converters::pdf_image_converter::PdfImageConverter::new().ok() { Some(pdf_image_converter) => { self.pdf_image_converter = Some(Box::new(pdf_image_converter)); Ok(()) @@ -23,4 +27,9 @@ impl FileConverters { None => Ok(()), } } + + #[cfg(not(feature = "pdf-render"))] + pub async fn init(&mut self) -> AppResult<()> { + Ok(()) + } } diff --git a/src/file_converters/pdf.rs b/src/file_converters/pdf.rs index 199d2fe..4be8c2f 100644 --- a/src/file_converters/pdf.rs +++ b/src/file_converters/pdf.rs @@ -1,8 +1,5 @@ -use crate::errors::AppError; use crate::AppResult; use gcloud_sdk::prost::bytes; -use gcloud_sdk::prost::bytes::Bytes; -use pdfium_render::prelude::*; #[derive(Debug, Clone)] pub struct PdfInfo { @@ -16,89 +13,14 @@ pub struct PdfPageInfo { pub page_as_images: image::DynamicImage, } +#[cfg(feature = "pdf-render")] +type PdfPoints = pdfium_render::prelude::PdfPoints; + +#[cfg(not(feature = "pdf-render"))] +type PdfPoints = f32; + pub trait PdfToImage { fn convert_to_images(&self, pdf_bytes: bytes::Bytes) -> AppResult; fn images_to_pdf(&self, pdf_info: PdfInfo) -> AppResult; } - -pub struct PdfImageConverter { - pdfium: Pdfium, -} - -impl PdfImageConverter { - pub fn new() -> AppResult { - let executable = std::env::current_exe()?; - let current_dir = executable - .parent() - .ok_or(AppError::SystemError { - message: "No parent directory for executable".to_string(), - })? - .to_path_buf(); - - let bindings = Pdfium::bind_to_library( - // Attempt to bind to a pdfium library in the current working directory... - Pdfium::pdfium_platform_library_name_at_path("./"), - ) - .or_else(|_| Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./lib"))) - .or_else(|_| { - Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path( - ¤t_dir - .parent() - .map(|p| p.join("lib")) - .unwrap_or(current_dir.clone()), - )) - }) - .or_else(|_| { - Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(¤t_dir)) - }) - .or_else(|_| Pdfium::bind_to_system_library())?; - - let pdfium = Pdfium::new(bindings); - Ok(Self { pdfium }) - } -} - -impl PdfToImage for PdfImageConverter { - fn convert_to_images(&self, pdf_bytes: Bytes) -> AppResult { - let render_config = PdfRenderConfig::new() - .set_target_width(2000) - .set_maximum_height(2000) - .rotate_if_landscape(PdfPageRenderRotation::Degrees90, true); - let document = self - .pdfium - .load_pdf_from_byte_vec(pdf_bytes.to_vec(), None)?; - let mut pdf_info = PdfInfo { pages: Vec::new() }; - for page in document.pages().iter() { - let image = page.render_with_config(&render_config)?.as_image(); - let page_info = PdfPageInfo { - height: page.height(), - width: page.width(), - page_as_images: image, - }; - pdf_info.pages.push(page_info); - } - Ok(pdf_info) - } - - fn images_to_pdf(&self, pdf_info: PdfInfo) -> AppResult { - let mut document = self.pdfium.create_new_pdf()?; - for src_page in pdf_info.pages { - let mut page = - document - .pages_mut() - .create_page_at_start(PdfPagePaperSize::from_points( - src_page.width, - src_page.height, - ))?; - let object = PdfPageImageObject::new_with_size( - &document, - &src_page.page_as_images, - src_page.width, - src_page.height, - )?; - page.objects_mut().add_image_object(object)?; - } - Ok(document.save_to_bytes()?.into()) - } -} diff --git a/src/file_converters/pdf_image_converter.rs b/src/file_converters/pdf_image_converter.rs new file mode 100644 index 0000000..ff60e15 --- /dev/null +++ b/src/file_converters/pdf_image_converter.rs @@ -0,0 +1,86 @@ +use crate::errors::AppError; +use crate::file_converters::pdf::{PdfInfo, PdfPageInfo, PdfToImage}; +use crate::AppResult; +use bytes::Bytes; +use pdfium_render::prelude::*; + +pub struct PdfImageConverter { + pdfium: Pdfium, +} + +impl PdfImageConverter { + pub fn new() -> AppResult { + let executable = std::env::current_exe()?; + let current_dir = executable + .parent() + .ok_or(AppError::SystemError { + message: "No parent directory for executable".to_string(), + })? + .to_path_buf(); + + let bindings = Pdfium::bind_to_library( + // Attempt to bind to a pdfium library in the current working directory... + Pdfium::pdfium_platform_library_name_at_path("./"), + ) + .or_else(|_| Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./lib"))) + .or_else(|_| { + Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path( + ¤t_dir + .parent() + .map(|p| p.join("lib")) + .unwrap_or(current_dir.clone()), + )) + }) + .or_else(|_| { + Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(¤t_dir)) + }) + .or_else(|_| Pdfium::bind_to_system_library())?; + + let pdfium = Pdfium::new(bindings); + Ok(Self { pdfium }) + } +} + +impl PdfToImage for PdfImageConverter { + fn convert_to_images(&self, pdf_bytes: Bytes) -> AppResult { + let render_config = PdfRenderConfig::new() + .set_target_width(2000) + .set_maximum_height(2000) + .rotate_if_landscape(PdfPageRenderRotation::Degrees90, true); + let document = self + .pdfium + .load_pdf_from_byte_vec(pdf_bytes.to_vec(), None)?; + let mut pdf_info = PdfInfo { pages: Vec::new() }; + for page in document.pages().iter() { + let image = page.render_with_config(&render_config)?.as_image(); + let page_info = PdfPageInfo { + height: page.height(), + width: page.width(), + page_as_images: image, + }; + pdf_info.pages.push(page_info); + } + Ok(pdf_info) + } + + fn images_to_pdf(&self, pdf_info: PdfInfo) -> AppResult { + let mut document = self.pdfium.create_new_pdf()?; + for src_page in pdf_info.pages { + let mut page = + document + .pages_mut() + .create_page_at_start(PdfPagePaperSize::from_points( + src_page.width, + src_page.height, + ))?; + let object = PdfPageImageObject::new_with_size( + &document, + &src_page.page_as_images, + src_page.width, + src_page.height, + )?; + page.objects_mut().add_image_object(object)?; + } + Ok(document.save_to_bytes()?.into()) + } +}