Skip to content

Commit

Permalink
Make PDF render feature flag to disable
Browse files Browse the repository at this point in the history
  • Loading branch information
abdolence committed Aug 14, 2024
1 parent 7584063 commit b8d6e2f
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 87 deletions.
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ categories = ["command-line-utilities"]
description = "Copy & Redact cli tool to securely copy and redact files removing Personal Identifiable Information (PII) across various filesystems."

[features]
default = []
default = ["pdf-render"]
ci-gcp = [] # For testing on CI/GCP
ci-aws = [] # For testing on CI/AWS
ci-ms-presidio = [] # For testing on CI/MS Presidiom
ci-gcp-llm = [] # For testing on CI/GCP with LLM models
ci-open-ai = [] # For testing on CI/OpenAIP
ci = ["ci-gcp", "ci-aws", "ci-ms-presidio", "ci-gcp-llm", "ci-open-ai"]
pdf-render = ["pdfium-render"]


[dependencies]
Expand Down Expand Up @@ -56,7 +57,7 @@ reqwest = { version = "0.12", default-features = false, features = ["multipart",
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
rand = "0.8"
pdfium-render = { version = "0.8", features = ["thread_safe", "image"] }
pdfium-render = { version = "0.8", features = ["thread_safe", "image"], optional = true }
image = "0.25"
bytes = { version = "1" }

Expand Down
11 changes: 10 additions & 1 deletion src/file_converters/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ use crate::AppResult;

pub mod pdf;

#[cfg(feature = "pdf-render")]
mod pdf_image_converter;

pub struct FileConverters {
pub pdf_image_converter: Option<Box<dyn PdfToImage + 'static>>,
}
Expand All @@ -14,13 +17,19 @@ impl FileConverters {
}
}

#[cfg(feature = "pdf-render")]
pub async fn init(&mut self) -> AppResult<()> {
match pdf::PdfImageConverter::new().ok() {
match crate::file_converters::pdf_image_converter::PdfImageConverter::new().ok() {
Some(pdf_image_converter) => {
self.pdf_image_converter = Some(Box::new(pdf_image_converter));
Ok(())
}
None => Ok(()),
}
}

#[cfg(not(feature = "pdf-render"))]
pub async fn init(&mut self) -> AppResult<()> {
Ok(())
}
}
90 changes: 6 additions & 84 deletions src/file_converters/pdf.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use crate::errors::AppError;
use crate::AppResult;
use gcloud_sdk::prost::bytes;
use gcloud_sdk::prost::bytes::Bytes;
use pdfium_render::prelude::*;

#[derive(Debug, Clone)]
pub struct PdfInfo {
Expand All @@ -16,89 +13,14 @@ pub struct PdfPageInfo {
pub page_as_images: image::DynamicImage,
}

#[cfg(feature = "pdf-render")]
type PdfPoints = pdfium_render::prelude::PdfPoints;

#[cfg(not(feature = "pdf-render"))]
type PdfPoints = f32;

pub trait PdfToImage {
fn convert_to_images(&self, pdf_bytes: bytes::Bytes) -> AppResult<PdfInfo>;

fn images_to_pdf(&self, pdf_info: PdfInfo) -> AppResult<bytes::Bytes>;
}

pub struct PdfImageConverter {
pdfium: Pdfium,
}

impl PdfImageConverter {
pub fn new() -> AppResult<Self> {
let executable = std::env::current_exe()?;
let current_dir = executable
.parent()
.ok_or(AppError::SystemError {
message: "No parent directory for executable".to_string(),
})?
.to_path_buf();

let bindings = Pdfium::bind_to_library(
// Attempt to bind to a pdfium library in the current working directory...
Pdfium::pdfium_platform_library_name_at_path("./"),
)
.or_else(|_| Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./lib")))
.or_else(|_| {
Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(
&current_dir
.parent()
.map(|p| p.join("lib"))
.unwrap_or(current_dir.clone()),
))
})
.or_else(|_| {
Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(&current_dir))
})
.or_else(|_| Pdfium::bind_to_system_library())?;

let pdfium = Pdfium::new(bindings);
Ok(Self { pdfium })
}
}

impl PdfToImage for PdfImageConverter {
fn convert_to_images(&self, pdf_bytes: Bytes) -> AppResult<PdfInfo> {
let render_config = PdfRenderConfig::new()
.set_target_width(2000)
.set_maximum_height(2000)
.rotate_if_landscape(PdfPageRenderRotation::Degrees90, true);
let document = self
.pdfium
.load_pdf_from_byte_vec(pdf_bytes.to_vec(), None)?;
let mut pdf_info = PdfInfo { pages: Vec::new() };
for page in document.pages().iter() {
let image = page.render_with_config(&render_config)?.as_image();
let page_info = PdfPageInfo {
height: page.height(),
width: page.width(),
page_as_images: image,
};
pdf_info.pages.push(page_info);
}
Ok(pdf_info)
}

fn images_to_pdf(&self, pdf_info: PdfInfo) -> AppResult<Bytes> {
let mut document = self.pdfium.create_new_pdf()?;
for src_page in pdf_info.pages {
let mut page =
document
.pages_mut()
.create_page_at_start(PdfPagePaperSize::from_points(
src_page.width,
src_page.height,
))?;
let object = PdfPageImageObject::new_with_size(
&document,
&src_page.page_as_images,
src_page.width,
src_page.height,
)?;
page.objects_mut().add_image_object(object)?;
}
Ok(document.save_to_bytes()?.into())
}
}
86 changes: 86 additions & 0 deletions src/file_converters/pdf_image_converter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
use crate::errors::AppError;
use crate::file_converters::pdf::{PdfInfo, PdfPageInfo, PdfToImage};
use crate::AppResult;
use bytes::Bytes;
use pdfium_render::prelude::*;

pub struct PdfImageConverter {
pdfium: Pdfium,
}

impl PdfImageConverter {
pub fn new() -> AppResult<Self> {
let executable = std::env::current_exe()?;
let current_dir = executable
.parent()
.ok_or(AppError::SystemError {
message: "No parent directory for executable".to_string(),
})?
.to_path_buf();

let bindings = Pdfium::bind_to_library(
// Attempt to bind to a pdfium library in the current working directory...
Pdfium::pdfium_platform_library_name_at_path("./"),
)
.or_else(|_| Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path("./lib")))
.or_else(|_| {
Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(
&current_dir
.parent()
.map(|p| p.join("lib"))
.unwrap_or(current_dir.clone()),
))
})
.or_else(|_| {
Pdfium::bind_to_library(Pdfium::pdfium_platform_library_name_at_path(&current_dir))
})
.or_else(|_| Pdfium::bind_to_system_library())?;

let pdfium = Pdfium::new(bindings);
Ok(Self { pdfium })
}
}

impl PdfToImage for PdfImageConverter {
fn convert_to_images(&self, pdf_bytes: Bytes) -> AppResult<PdfInfo> {
let render_config = PdfRenderConfig::new()
.set_target_width(2000)
.set_maximum_height(2000)
.rotate_if_landscape(PdfPageRenderRotation::Degrees90, true);
let document = self
.pdfium
.load_pdf_from_byte_vec(pdf_bytes.to_vec(), None)?;
let mut pdf_info = PdfInfo { pages: Vec::new() };
for page in document.pages().iter() {
let image = page.render_with_config(&render_config)?.as_image();
let page_info = PdfPageInfo {
height: page.height(),
width: page.width(),
page_as_images: image,
};
pdf_info.pages.push(page_info);
}
Ok(pdf_info)
}

fn images_to_pdf(&self, pdf_info: PdfInfo) -> AppResult<Bytes> {
let mut document = self.pdfium.create_new_pdf()?;
for src_page in pdf_info.pages {
let mut page =
document
.pages_mut()
.create_page_at_start(PdfPagePaperSize::from_points(
src_page.width,
src_page.height,
))?;
let object = PdfPageImageObject::new_with_size(
&document,
&src_page.page_as_images,
src_page.width,
src_page.height,
)?;
page.objects_mut().add_image_object(object)?;
}
Ok(document.save_to_bytes()?.into())
}
}

0 comments on commit b8d6e2f

Please sign in to comment.