Skip to content

Commit

Permalink
CSV data table as text support in AWS
Browse files Browse the repository at this point in the history
  • Loading branch information
abdolence committed Aug 4, 2024
1 parent 150a3a7 commit 3fd6f8f
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 28 deletions.
6 changes: 4 additions & 2 deletions src/commands/copy_command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::filesystems::{
AbsoluteFilePath, DetectFileSystem, FileMatcher, FileMatcherResult, FileSystemConnection,
FileSystemRef,
};
use crate::redacters::{Redacter, RedacterOptions, Redacters};
use crate::redacters::{RedactSupportedOptions, Redacter, RedacterOptions, Redacters};
use crate::reporter::AppReporter;
use crate::AppResult;
use console::{Style, Term};
Expand Down Expand Up @@ -228,7 +228,9 @@ async fn redact_upload_file<'a, SFS: FileSystemConnection<'a>, DFS: FileSystemCo
dest_file_ref: &FileSystemRef,
redacter: &impl Redacter,
) -> AppResult<TransferFileResult> {
if redacter.is_redact_supported(dest_file_ref).await? {
if redacter.redact_supported_options(dest_file_ref).await?
!= RedactSupportedOptions::Unsupported
{
match redacter.redact_stream(source_reader, dest_file_ref).await {
Ok(redacted_reader) => {
destination_fs
Expand Down
30 changes: 20 additions & 10 deletions src/redacters/aws_comprehend.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::errors::AppError;
use crate::filesystems::FileSystemRef;
use crate::redacters::{
Redacter, RedacterDataItem, RedacterDataItemContent, RedacterOptions, Redacters,
RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, RedacterOptions,
Redacters,
};
use crate::reporter::AppReporter;
use crate::AppResult;
Expand Down Expand Up @@ -89,19 +90,28 @@ impl<'a> AwsComprehendDlpRedacter<'a> {
impl<'a> Redacter for AwsComprehendDlpRedacter<'a> {
async fn redact(&self, input: RedacterDataItem) -> AppResult<RedacterDataItemContent> {
match &input.content {
RedacterDataItemContent::Table { .. } | RedacterDataItemContent::Value(_) => {
self.redact_text_file(input).await
RedacterDataItemContent::Value(_) => self.redact_text_file(input).await,
RedacterDataItemContent::Table { .. } | RedacterDataItemContent::Image { .. } => {
Err(AppError::SystemError {
message: "Attempt to redact of unsupported image type".to_string(),
})
}
RedacterDataItemContent::Image { .. } => Err(AppError::SystemError {
message: "Attempt to redact of unsupported image type".to_string(),
}),
}
}

async fn is_redact_supported(&self, file_ref: &FileSystemRef) -> AppResult<bool> {
Ok(file_ref.media_type.as_ref().iter().all(|media_type| {
Redacters::is_mime_text(media_type) || Redacters::is_mime_table(media_type)
}))
async fn redact_supported_options(
&self,
file_ref: &FileSystemRef,
) -> AppResult<RedactSupportedOptions> {
Ok(match file_ref.media_type.as_ref() {
Some(media_type) if Redacters::is_mime_text(media_type) => {
RedactSupportedOptions::Supported
}
Some(media_type) if Redacters::is_mime_table(media_type) => {
RedactSupportedOptions::SupportedAsText
}
_ => RedactSupportedOptions::Unsupported,
})
}

fn options(&self) -> &RedacterOptions {
Expand Down
24 changes: 17 additions & 7 deletions src/redacters/gcp_dlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ use crate::common_types::GcpProjectId;
use crate::errors::AppError;
use crate::filesystems::FileSystemRef;
use crate::redacters::{
Redacter, RedacterDataItem, RedacterDataItemContent, RedacterOptions, Redacters,
RedactSupportedOptions, Redacter, RedacterDataItem, RedacterDataItemContent, RedacterOptions,
Redacters,
};
use crate::reporter::AppReporter;
use crate::AppResult;
Expand Down Expand Up @@ -204,12 +205,21 @@ impl<'a> Redacter for GcpDlpRedacter<'a> {
}
}

async fn is_redact_supported(&self, file_ref: &FileSystemRef) -> AppResult<bool> {
Ok(file_ref.media_type.as_ref().iter().all(|media_type| {
Redacters::is_mime_text(media_type)
|| Redacters::is_mime_table(media_type)
|| Self::check_supported_image_type(media_type)
}))
async fn redact_supported_options(
&self,
file_ref: &FileSystemRef,
) -> AppResult<RedactSupportedOptions> {
Ok(
if file_ref.media_type.as_ref().iter().all(|media_type| {
Redacters::is_mime_text(media_type)
|| Redacters::is_mime_table(media_type)
|| Self::check_supported_image_type(media_type)
}) {
RedactSupportedOptions::Supported
} else {
RedactSupportedOptions::Unsupported
},
)
}

fn options(&self) -> &RedacterOptions {
Expand Down
40 changes: 31 additions & 9 deletions src/redacters/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,20 @@ impl<'a> Redacters<'a> {
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RedactSupportedOptions {
Supported,
SupportedAsText,
Unsupported,
}

pub trait Redacter {
async fn redact(&self, input: RedacterDataItem) -> AppResult<RedacterDataItemContent>;

async fn is_redact_supported(&self, file_ref: &FileSystemRef) -> AppResult<bool>;
async fn redact_supported_options(
&self,
file_ref: &FileSystemRef,
) -> AppResult<RedactSupportedOptions>;

fn options(&self) -> &RedacterOptions;

Expand All @@ -123,14 +133,21 @@ pub trait Redacter {
file_ref: &FileSystemRef,
) -> AppResult<Box<dyn Stream<Item = AppResult<bytes::Bytes>> + Send + Sync + Unpin + 'static>>
{
let supported_options = self.redact_supported_options(file_ref).await?;
let content_to_redact = match file_ref.media_type {
Some(ref mime) if Redacters::is_mime_text(mime) => {
Some(ref mime)
if Redacters::is_mime_text(mime)
|| (Redacters::is_mime_table(mime)
&& matches!(
supported_options,
RedactSupportedOptions::SupportedAsText
)) =>
{
let all_chunks: Vec<bytes::Bytes> = input.try_collect().await?;
let all_bytes = all_chunks.concat();
let content =
String::from_utf8(all_bytes).map_err(|e| crate::AppError::SystemError {
message: format!("Failed to convert bytes to string: {}", e),
})?;
let content = String::from_utf8(all_bytes).map_err(|e| AppError::SystemError {
message: format!("Failed to convert bytes to string: {}", e),
})?;
Ok(RedacterDataItem {
content: RedacterDataItemContent::Value(content),
file_ref: file_ref.clone(),
Expand Down Expand Up @@ -223,10 +240,15 @@ impl<'a> Redacter for Redacters<'a> {
}
}

async fn is_redact_supported(&self, file_ref: &FileSystemRef) -> AppResult<bool> {
async fn redact_supported_options(
&self,
file_ref: &FileSystemRef,
) -> AppResult<RedactSupportedOptions> {
match self {
Redacters::GcpDlp(redacter) => redacter.is_redact_supported(file_ref).await,
Redacters::AwsComprehendDlp(redacter) => redacter.is_redact_supported(file_ref).await,
Redacters::GcpDlp(redacter) => redacter.redact_supported_options(file_ref).await,
Redacters::AwsComprehendDlp(redacter) => {
redacter.redact_supported_options(file_ref).await
}
}
}

Expand Down

0 comments on commit 3fd6f8f

Please sign in to comment.