From 3fd6f8f3097c02a57c00579312b3d6fe7b7f8aa8 Mon Sep 17 00:00:00 2001 From: Abdulla Abdurakhmanov Date: Sun, 4 Aug 2024 21:40:49 +0200 Subject: [PATCH] CSV data table as text support in AWS --- src/commands/copy_command.rs | 6 +++-- src/redacters/aws_comprehend.rs | 30 ++++++++++++++++--------- src/redacters/gcp_dlp.rs | 24 ++++++++++++++------ src/redacters/mod.rs | 40 +++++++++++++++++++++++++-------- 4 files changed, 72 insertions(+), 28 deletions(-) diff --git a/src/commands/copy_command.rs b/src/commands/copy_command.rs index 6ffc1d3..dbbfc78 100644 --- a/src/commands/copy_command.rs +++ b/src/commands/copy_command.rs @@ -3,7 +3,7 @@ use crate::filesystems::{ AbsoluteFilePath, DetectFileSystem, FileMatcher, FileMatcherResult, FileSystemConnection, FileSystemRef, }; -use crate::redacters::{Redacter, RedacterOptions, Redacters}; +use crate::redacters::{RedactSupportedOptions, Redacter, RedacterOptions, Redacters}; use crate::reporter::AppReporter; use crate::AppResult; use console::{Style, Term}; @@ -228,7 +228,9 @@ async fn redact_upload_file<'a, SFS: FileSystemConnection<'a>, DFS: FileSystemCo dest_file_ref: &FileSystemRef, redacter: &impl Redacter, ) -> AppResult { - if redacter.is_redact_supported(dest_file_ref).await? { + if redacter.redact_supported_options(dest_file_ref).await? + != RedactSupportedOptions::Unsupported + { match redacter.redact_stream(source_reader, dest_file_ref).await { Ok(redacted_reader) => { destination_fs diff --git a/src/redacters/aws_comprehend.rs b/src/redacters/aws_comprehend.rs index 31c3772..142d598 100644 --- a/src/redacters/aws_comprehend.rs +++ b/src/redacters/aws_comprehend.rs @@ -1,7 +1,8 @@ use crate::errors::AppError; use crate::filesystems::FileSystemRef; use crate::redacters::{ - Redacter, RedacterDataItem, RedacterDataItemContent, RedacterOptions, Redacters, + RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, RedacterOptions, + Redacters, }; use crate::reporter::AppReporter; use crate::AppResult; @@ -89,19 +90,28 @@ impl<'a> AwsComprehendDlpRedacter<'a> { impl<'a> Redacter for AwsComprehendDlpRedacter<'a> { async fn redact(&self, input: RedacterDataItem) -> AppResult { match &input.content { - RedacterDataItemContent::Table { .. } | RedacterDataItemContent::Value(_) => { - self.redact_text_file(input).await + RedacterDataItemContent::Value(_) => self.redact_text_file(input).await, + RedacterDataItemContent::Table { .. } | RedacterDataItemContent::Image { .. } => { + Err(AppError::SystemError { + message: "Attempt to redact of unsupported image type".to_string(), + }) } - RedacterDataItemContent::Image { .. } => Err(AppError::SystemError { - message: "Attempt to redact of unsupported image type".to_string(), - }), } } - async fn is_redact_supported(&self, file_ref: &FileSystemRef) -> AppResult { - Ok(file_ref.media_type.as_ref().iter().all(|media_type| { - Redacters::is_mime_text(media_type) || Redacters::is_mime_table(media_type) - })) + async fn redact_supported_options( + &self, + file_ref: &FileSystemRef, + ) -> AppResult { + Ok(match file_ref.media_type.as_ref() { + Some(media_type) if Redacters::is_mime_text(media_type) => { + RedactSupportedOptions::Supported + } + Some(media_type) if Redacters::is_mime_table(media_type) => { + RedactSupportedOptions::SupportedAsText + } + _ => RedactSupportedOptions::Unsupported, + }) } fn options(&self) -> &RedacterOptions { diff --git a/src/redacters/gcp_dlp.rs b/src/redacters/gcp_dlp.rs index c0d9396..d22306a 100644 --- a/src/redacters/gcp_dlp.rs +++ b/src/redacters/gcp_dlp.rs @@ -2,7 +2,8 @@ use crate::common_types::GcpProjectId; use crate::errors::AppError; use crate::filesystems::FileSystemRef; use crate::redacters::{ - Redacter, RedacterDataItem, RedacterDataItemContent, RedacterOptions, Redacters, + RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, RedacterOptions, + Redacters, }; use crate::reporter::AppReporter; use crate::AppResult; @@ -204,12 +205,21 @@ impl<'a> Redacter for GcpDlpRedacter<'a> { } } - async fn is_redact_supported(&self, file_ref: &FileSystemRef) -> AppResult { - Ok(file_ref.media_type.as_ref().iter().all(|media_type| { - Redacters::is_mime_text(media_type) - || Redacters::is_mime_table(media_type) - || Self::check_supported_image_type(media_type) - })) + async fn redact_supported_options( + &self, + file_ref: &FileSystemRef, + ) -> AppResult { + Ok( + if file_ref.media_type.as_ref().iter().all(|media_type| { + Redacters::is_mime_text(media_type) + || Redacters::is_mime_table(media_type) + || Self::check_supported_image_type(media_type) + }) { + RedactSupportedOptions::Supported + } else { + RedactSupportedOptions::Unsupported + }, + ) } fn options(&self) -> &RedacterOptions { diff --git a/src/redacters/mod.rs b/src/redacters/mod.rs index f712fd8..7049630 100644 --- a/src/redacters/mod.rs +++ b/src/redacters/mod.rs @@ -108,10 +108,20 @@ impl<'a> Redacters<'a> { } } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RedactSupportedOptions { + Supported, + SupportedAsText, + Unsupported, +} + pub trait Redacter { async fn redact(&self, input: RedacterDataItem) -> AppResult; - async fn is_redact_supported(&self, file_ref: &FileSystemRef) -> AppResult; + async fn redact_supported_options( + &self, + file_ref: &FileSystemRef, + ) -> AppResult; fn options(&self) -> &RedacterOptions; @@ -123,14 +133,21 @@ pub trait Redacter { file_ref: &FileSystemRef, ) -> AppResult> + Send + Sync + Unpin + 'static>> { + let supported_options = self.redact_supported_options(file_ref).await?; let content_to_redact = match file_ref.media_type { - Some(ref mime) if Redacters::is_mime_text(mime) => { + Some(ref mime) + if Redacters::is_mime_text(mime) + || (Redacters::is_mime_table(mime) + && matches!( + supported_options, + RedactSupportedOptions::SupportedAsText + )) => + { let all_chunks: Vec = input.try_collect().await?; let all_bytes = all_chunks.concat(); - let content = - String::from_utf8(all_bytes).map_err(|e| crate::AppError::SystemError { - message: format!("Failed to convert bytes to string: {}", e), - })?; + let content = String::from_utf8(all_bytes).map_err(|e| AppError::SystemError { + message: format!("Failed to convert bytes to string: {}", e), + })?; Ok(RedacterDataItem { content: RedacterDataItemContent::Value(content), file_ref: file_ref.clone(), @@ -223,10 +240,15 @@ impl<'a> Redacter for Redacters<'a> { } } - async fn is_redact_supported(&self, file_ref: &FileSystemRef) -> AppResult { + async fn redact_supported_options( + &self, + file_ref: &FileSystemRef, + ) -> AppResult { match self { - Redacters::GcpDlp(redacter) => redacter.is_redact_supported(file_ref).await, - Redacters::AwsComprehendDlp(redacter) => redacter.is_redact_supported(file_ref).await, + Redacters::GcpDlp(redacter) => redacter.redact_supported_options(file_ref).await, + Redacters::AwsComprehendDlp(redacter) => { + redacter.redact_supported_options(file_ref).await + } } }