Skip to content

Commit

Permalink
Simplified Redacter options API
Browse files Browse the repository at this point in the history
  • Loading branch information
abdolence committed Aug 10, 2024
1 parent 10fb5e3 commit 2af5985
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 123 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "redacter"
version = "0.5.0"
version = "0.5.1"
edition = "2021"
authors = ["Abdulla Abdurakhmanov <[email protected]>"]
license = "Apache-2.0"
Expand Down
21 changes: 14 additions & 7 deletions src/commands/copy_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use crate::filesystems::{
AbsoluteFilePath, DetectFileSystem, FileMatcher, FileMatcherResult, FileSystemConnection,
FileSystemRef,
};
use crate::redacters::{RedactSupportedOptions, Redacter, RedacterOptions, Redacters};
use crate::redacters::{
RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterOptions, Redacters,
};
use crate::reporter::AppReporter;
use crate::AppResult;
use console::{Style, Term};
Expand Down Expand Up @@ -81,7 +83,10 @@ pub async fn command_copy(
let mut destination_fs = DetectFileSystem::open(destination, &app_reporter).await?;

let maybe_redacter = match redacter_options {
Some(options) => Some(Redacters::new_redacter(options, &app_reporter).await?),
Some(options) => Some((
options.base_options,
Redacters::new_redacter(options.provider_options, &app_reporter).await?,
)),
None => None,
};

Expand Down Expand Up @@ -174,7 +179,7 @@ async fn transfer_and_redact_file<
source_fs: &mut SFS,
destination_fs: &mut DFS,
options: &CopyCommandOptions,
redacter: &Option<impl Redacter>,
redacter: &Option<(RedacterBaseOptions, impl Redacter)>,
) -> AppResult<TransferFileResult> {
let bold_style = Style::new().bold().white();
let (base_file_ref, source_reader) = source_fs.download(source_file_ref).await?;
Expand Down Expand Up @@ -208,15 +213,15 @@ async fn transfer_and_redact_file<
)
.as_str(),
);
let transfer_result = if let Some(ref redacter) = redacter {
let transfer_result = if let Some(ref redacter_with_options) = redacter {
redact_upload_file::<SFS, DFS, _>(
bar,
destination_fs,
bold_style,
source_reader,
&base_resolved_file_ref,
file_ref,
redacter,
redacter_with_options,
)
.await?
} else {
Expand All @@ -241,12 +246,14 @@ async fn redact_upload_file<
source_reader: S,
base_resolved_file_ref: &AbsoluteFilePath,
dest_file_ref: &FileSystemRef,
redacter: &impl Redacter,
redacter_with_options: &(RedacterBaseOptions, impl Redacter),
) -> AppResult<crate::commands::copy_command::TransferFileResult> {
let (redacter_base_options, redacter) = redacter_with_options;
let redacter_supported_options = redacter.redact_supported_options(dest_file_ref).await?;
if redacter_supported_options != RedactSupportedOptions::Unsupported {
match crate::redacters::redact_stream(
redacter,
redacter_base_options,
&redacter_supported_options,
source_reader,
dest_file_ref,
Expand All @@ -273,7 +280,7 @@ async fn redact_upload_file<
Ok(TransferFileResult::Skipped)
}
}
} else if redacter.options().allow_unsupported_copies {
} else if redacter_base_options.allow_unsupported_copies {
bar.println(
format!(
"Still copying {} {} because it is allowed by arguments",
Expand Down
23 changes: 2 additions & 21 deletions src/redacters/aws_comprehend.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use crate::errors::AppError;
use crate::filesystems::FileSystemRef;
use crate::redacters::{
RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterDataItem,
RedacterDataItemContent, Redacters,
RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, Redacters,
};
use crate::reporter::AppReporter;
use crate::AppResult;
Expand All @@ -17,13 +16,11 @@ pub struct AwsComprehendRedacterOptions {
#[derive(Clone)]
pub struct AwsComprehendRedacter<'a> {
client: aws_sdk_comprehend::Client,
base_options: RedacterBaseOptions,
reporter: &'a AppReporter<'a>,
}

impl<'a> AwsComprehendRedacter<'a> {
pub async fn new(
base_options: RedacterBaseOptions,
aws_dlp_options: AwsComprehendRedacterOptions,
reporter: &'a AppReporter<'a>,
) -> AppResult<Self> {
Expand All @@ -33,11 +30,7 @@ impl<'a> AwsComprehendRedacter<'a> {
.or_default_provider();
let shared_config = aws_config::from_env().region(region_provider).load().await;
let client = aws_sdk_comprehend::Client::new(&shared_config);
Ok(Self {
client,
base_options,
reporter,
})
Ok(Self { client, reporter })
}

pub async fn redact_text_file(
Expand Down Expand Up @@ -113,10 +106,6 @@ impl<'a> Redacter for AwsComprehendRedacter<'a> {
_ => RedactSupportedOptions::Unsupported,
})
}

fn options(&self) -> &RedacterBaseOptions {
&self.base_options
}
}

#[allow(unused_imports)]
Expand All @@ -142,15 +131,7 @@ mod tests {
let content = RedacterDataItemContent::Value(test_content.to_string());
let input = RedacterDataItem { file_ref, content };

let redacter_options = RedacterBaseOptions {
allow_unsupported_copies: false,
csv_headers_disable: false,
csv_delimiter: None,
sampling_size: None,
};

let redacter = AwsComprehendRedacter::new(
redacter_options,
AwsComprehendRedacterOptions {
region: Some(Region::new(test_aws_region)),
},
Expand Down
18 changes: 1 addition & 17 deletions src/redacters/gcp_dlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use crate::common_types::GcpProjectId;
use crate::errors::AppError;
use crate::filesystems::FileSystemRef;
use crate::redacters::{
RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterDataItem,
RedacterDataItemContent, Redacters,
RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, Redacters,
};
use crate::reporter::AppReporter;
use crate::AppResult;
Expand All @@ -16,7 +15,6 @@ use rvstruct::ValueStruct;
#[derive(Clone)]
pub struct GcpDlpRedacter<'a> {
client: GoogleApi<DlpServiceClient<GoogleAuthMiddleware>>,
base_options: RedacterBaseOptions,
gcp_dlp_options: GcpDlpRedacterOptions,
reporter: &'a AppReporter<'a>,
}
Expand Down Expand Up @@ -50,7 +48,6 @@ impl<'a> GcpDlpRedacter<'a> {
"ENCRYPTION_KEY",
];
pub async fn new(
base_options: RedacterBaseOptions,
gcp_dlp_options: GcpDlpRedacterOptions,
reporter: &'a AppReporter<'a>,
) -> AppResult<Self> {
Expand All @@ -59,7 +56,6 @@ impl<'a> GcpDlpRedacter<'a> {
.await?;
Ok(GcpDlpRedacter {
client,
base_options,
gcp_dlp_options,
reporter,
})
Expand Down Expand Up @@ -230,10 +226,6 @@ impl<'a> Redacter for GcpDlpRedacter<'a> {
},
)
}

fn options(&self) -> &RedacterBaseOptions {
&self.base_options
}
}

impl TryInto<gcloud_sdk::google::privacy::dlp::v2::ContentItem> for RedacterDataItemContent {
Expand Down Expand Up @@ -402,15 +394,7 @@ mod tests {
let content = RedacterDataItemContent::Value(test_content.to_string());
let input = RedacterDataItem { file_ref, content };

let redacter_options = RedacterBaseOptions {
allow_unsupported_copies: false,
csv_headers_disable: false,
csv_delimiter: None,
sampling_size: None,
};

let redacter = GcpDlpRedacter::new(
redacter_options,
GcpDlpRedacterOptions {
project_id: GcpProjectId::new(test_gcp_project_id),
},
Expand Down
18 changes: 1 addition & 17 deletions src/redacters/gemini_llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use crate::common_types::GcpProjectId;
use crate::errors::AppError;
use crate::filesystems::FileSystemRef;
use crate::redacters::{
RedactSupportedOptions, Redacter, RedacterBaseOptions, RedacterDataItem,
RedacterDataItemContent, Redacters,
RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, Redacters,
};
use crate::reporter::AppReporter;
use crate::AppResult;
Expand All @@ -24,7 +23,6 @@ pub struct GeminiLlmModelName(String);
#[derive(Clone)]
pub struct GeminiLlmRedacter<'a> {
client: GoogleApi<GenerativeServiceClient<GoogleAuthMiddleware>>,
base_options: RedacterBaseOptions,
gemini_llm_options: crate::redacters::GeminiLlmRedacterOptions,
reporter: &'a AppReporter<'a>,
}
Expand All @@ -33,7 +31,6 @@ impl<'a> GeminiLlmRedacter<'a> {
const DEFAULT_GEMINI_MODEL: &'static str = "models/gemini-1.5-flash";

pub async fn new(
base_options: RedacterBaseOptions,
gemini_llm_options: GeminiLlmRedacterOptions,
reporter: &'a AppReporter<'a>,
) -> AppResult<Self> {
Expand All @@ -47,7 +44,6 @@ impl<'a> GeminiLlmRedacter<'a> {
).await?;
Ok(GeminiLlmRedacter {
client,
base_options,
gemini_llm_options,
reporter,
})
Expand Down Expand Up @@ -198,10 +194,6 @@ impl<'a> Redacter for GeminiLlmRedacter<'a> {
_ => RedactSupportedOptions::Unsupported,
})
}

fn options(&self) -> &RedacterBaseOptions {
&self.base_options
}
}

#[allow(unused_imports)]
Expand All @@ -228,15 +220,7 @@ mod tests {
let content = RedacterDataItemContent::Value(test_content.to_string());
let input = RedacterDataItem { file_ref, content };

let redacter_options = RedacterBaseOptions {
allow_unsupported_copies: false,
csv_headers_disable: false,
csv_delimiter: None,
sampling_size: None,
};

let redacter = GeminiLlmRedacter::new(
redacter_options,
GeminiLlmRedacterOptions {
project_id: GcpProjectId::new(test_gcp_project_id),
gemini_model: None,
Expand Down
37 changes: 12 additions & 25 deletions src/redacters/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,25 +87,24 @@ impl Display for RedacterOptions {

impl<'a> Redacters<'a> {
pub async fn new_redacter(
redacter_options: RedacterOptions,
provider_options: RedacterProviderOptions,
reporter: &'a AppReporter<'a>,
) -> AppResult<Self> {
match redacter_options.provider_options {
match provider_options {
RedacterProviderOptions::GcpDlp(options) => Ok(Redacters::GcpDlp(
GcpDlpRedacter::new(redacter_options.base_options, options, reporter).await?,
GcpDlpRedacter::new(options, reporter).await?,
)),
RedacterProviderOptions::AwsComprehend(options) => Ok(Redacters::AwsComprehendDlp(
AwsComprehendRedacter::new(redacter_options.base_options, options, reporter)
.await?,
AwsComprehendRedacter::new(options, reporter).await?,
)),
RedacterProviderOptions::MsPresidio(options) => Ok(Redacters::MsPresidio(
MsPresidioRedacter::new(redacter_options.base_options, options, reporter).await?,
MsPresidioRedacter::new(options, reporter).await?,
)),
RedacterProviderOptions::GeminiLlm(options) => Ok(Redacters::GeminiLlm(
GeminiLlmRedacter::new(redacter_options.base_options, options, reporter).await?,
GeminiLlmRedacter::new(options, reporter).await?,
)),
RedacterProviderOptions::OpenAiLlm(options) => Ok(Redacters::OpenAiLlm(
OpenAiLlmRedacter::new(redacter_options.base_options, options, reporter).await?,
OpenAiLlmRedacter::new(options, reporter).await?,
)),
}
}
Expand Down Expand Up @@ -149,8 +148,6 @@ pub trait Redacter {
&self,
file_ref: &FileSystemRef,
) -> AppResult<RedactSupportedOptions>;

fn options(&self) -> &RedacterBaseOptions;
}

impl<'a> Redacter for Redacters<'a> {
Expand Down Expand Up @@ -178,22 +175,13 @@ impl<'a> Redacter for Redacters<'a> {
Redacters::OpenAiLlm(redacter) => redacter.redact_supported_options(file_ref).await,
}
}

fn options(&self) -> &RedacterBaseOptions {
match self {
Redacters::GcpDlp(redacter) => redacter.options(),
Redacters::AwsComprehendDlp(redacter) => redacter.options(),
Redacters::MsPresidio(redacter) => redacter.options(),
Redacters::GeminiLlm(redacter) => redacter.options(),
Redacters::OpenAiLlm(redacter) => redacter.options(),
}
}
}

pub async fn redact_stream<
S: Stream<Item = AppResult<bytes::Bytes>> + Send + Unpin + Sync + 'static,
>(
redacter: &impl Redacter,
redacter_base_options: &RedacterBaseOptions,
supported_options: &RedactSupportedOptions,
input: S,
file_ref: &FileSystemRef,
Expand All @@ -210,7 +198,7 @@ pub async fn redact_stream<
String::from_utf8(all_bytes).map_err(|e| AppError::SystemError {
message: format!("Failed to convert bytes to string: {}", e),
})?;
let content = if let Some(sampling_size) = redacter.options().sampling_size {
let content = if let Some(sampling_size) = redacter_base_options.sampling_size {
let sampling_size = std::cmp::min(sampling_size, whole_content.len());
whole_content
.chars()
Expand Down Expand Up @@ -240,17 +228,16 @@ pub async fn redact_stream<
input.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)),
);
let mut reader = csv_async::AsyncReaderBuilder::default()
.has_headers(!redacter.options().csv_headers_disable)
.has_headers(!redacter_base_options.csv_headers_disable)
.delimiter(
redacter
.options()
redacter_base_options
.csv_delimiter
.as_ref()
.cloned()
.unwrap_or(b','),
)
.create_reader(reader);
let headers = if !redacter.options().csv_headers_disable {
let headers = if !redacter_base_options.csv_headers_disable {
reader
.headers()
.await?
Expand Down
Loading

0 comments on commit 2af5985

Please sign in to comment.