Skip to content

Commit

Permalink
Support user defined InfoTypes for GCP DLP redacter
Browse files Browse the repository at this point in the history
  • Loading branch information
abdolence committed Aug 15, 2024
1 parent e2e58e8 commit 0223805
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 10 deletions.
23 changes: 20 additions & 3 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,27 @@ pub struct RedacterArgs {
#[arg(short = 'd', long, value_enum, help = "List of redacters to use")]
redact: Option<Vec<RedacterType>>,

#[arg(
long,
help = "Allow unsupported types to be copied without redaction",
default_value = "false"
)]
pub allow_unsupported_copies: bool,

#[arg(
long,
help = "GCP project id that will be used to redact and bill API calls"
)]
pub gcp_project_id: Option<GcpProjectId>,

#[arg(long, help = "Additional GCP DLP built in info types for redaction")]
pub gcp_dlp_built_in_info_type: Option<Vec<String>>,

#[arg(
long,
help = "Allow unsupported types to be copied without redaction",
default_value = "false"
help = "Additional GCP DLP user defined stored info types for redaction"
)]
pub allow_unsupported_copies: bool,
pub gcp_dlp_stored_info_type: Option<Vec<String>>,

#[arg(
long,
Expand Down Expand Up @@ -184,6 +193,14 @@ impl TryInto<RedacterOptions> for RedacterArgs {
Some(ref project_id) => {
Ok(RedacterProviderOptions::GcpDlp(GcpDlpRedacterOptions {
project_id: project_id.clone(),
user_defined_built_in_info_types: self
.gcp_dlp_built_in_info_type
.clone()
.unwrap_or_default(),
user_defined_stored_info_types: self
.gcp_dlp_stored_info_type
.clone()
.unwrap_or_default(),
}))
}
None => Err(AppError::RedacterConfigError {
Expand Down
70 changes: 63 additions & 7 deletions src/redacters/gcp_dlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use gcloud_sdk::tonic::metadata::MetadataValue;
use gcloud_sdk::{tonic, GoogleApi, GoogleAuthMiddleware};
use mime::Mime;
use rvstruct::ValueStruct;
use std::collections::HashSet;
use tokio_util::bytes;

#[derive(Clone)]
Expand All @@ -25,6 +26,8 @@ pub struct GcpDlpRedacter<'a> {
#[derive(Debug, Clone)]
pub struct GcpDlpRedacterOptions {
pub project_id: GcpProjectId,
pub user_defined_built_in_info_types: Vec<String>,
pub user_defined_stored_info_types: Vec<String>,
}

impl<'a> GcpDlpRedacter<'a> {
Expand Down Expand Up @@ -71,8 +74,8 @@ impl<'a> GcpDlpRedacter<'a> {
"projects/{}/locations/global",
self.gcp_dlp_options.project_id.value()
),
inspect_config: Some(Self::create_inspect_config()),
deidentify_config: Some(Self::create_deidentify_config()),
inspect_config: Some(self.create_inspect_config()),
deidentify_config: Some(self.create_deidentify_config()),
item: Some(input.content.try_into()?),
..gcloud_sdk::google::privacy::dlp::v2::DeidentifyContentRequest::default()
},
Expand Down Expand Up @@ -108,7 +111,7 @@ impl<'a> GcpDlpRedacter<'a> {
"projects/{}/locations/global",
self.gcp_dlp_options.project_id.value()
),
inspect_config: Some(Self::create_inspect_config()),
inspect_config: Some(self.create_inspect_config()),
byte_item: Some(input_bytes_content),
..gcloud_sdk::google::privacy::dlp::v2::RedactImageRequest::default()
});
Expand Down Expand Up @@ -142,26 +145,63 @@ impl<'a> GcpDlpRedacter<'a> {
}
}

fn create_inspect_config() -> gcloud_sdk::google::privacy::dlp::v2::InspectConfig {
fn create_inspect_config(&self) -> gcloud_sdk::google::privacy::dlp::v2::InspectConfig {
gcloud_sdk::google::privacy::dlp::v2::InspectConfig {
info_types: Self::INFO_TYPES
info_types: self
.create_built_in_info_types()
.iter()
.map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType {
name: v.to_string(),
..gcloud_sdk::google::privacy::dlp::v2::InfoType::default()
})
.collect(),
custom_info_types: self
.gcp_dlp_options
.user_defined_stored_info_types
.iter()
.map(
|stored_info_type_name| {
gcloud_sdk::google::privacy::dlp::v2::CustomInfoType {
info_type: Some(gcloud_sdk::google::privacy::dlp::v2::InfoType {
name: stored_info_type_name.clone(),
..gcloud_sdk::google::privacy::dlp::v2::InfoType::default()
}),
r#type: Some(
gcloud_sdk::google::privacy::dlp::v2::custom_info_type::Type::StoredType(
gcloud_sdk::google::privacy::dlp::v2::StoredType {
name: format!(
"projects/{}/storedInfoTypes/{}",
self.gcp_dlp_options.project_id.value(),
stored_info_type_name
),
..gcloud_sdk::google::privacy::dlp::v2::StoredType::default()
},
),
),
..gcloud_sdk::google::privacy::dlp::v2::CustomInfoType::default()
}
},
)
.collect(),
..gcloud_sdk::google::privacy::dlp::v2::InspectConfig::default()
}
}

fn create_deidentify_config() -> gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig {
fn create_deidentify_config(&self) -> gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig {
let user_stored_info_types_set: HashSet<&str> = self
.gcp_dlp_options
.user_defined_stored_info_types
.iter()
.map(|s| s.as_str())
.collect();
gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig {
transformation: Some(gcloud_sdk::google::privacy::dlp::v2::deidentify_config::Transformation::InfoTypeTransformations(
gcloud_sdk::google::privacy::dlp::v2::InfoTypeTransformations {
transformations: vec![
gcloud_sdk::google::privacy::dlp::v2::info_type_transformations::InfoTypeTransformation {
info_types: Self::INFO_TYPES.iter().map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType {
info_types: self.create_built_in_info_types().union(
&user_stored_info_types_set
).collect::<Vec<_>>().iter().map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType {
name: v.to_string(),
..gcloud_sdk::google::privacy::dlp::v2::InfoType::default()
}).collect(),
Expand All @@ -183,6 +223,20 @@ impl<'a> GcpDlpRedacter<'a> {
}
}

fn create_built_in_info_types(&self) -> HashSet<&str> {
[
Self::INFO_TYPES.to_vec(),
self.gcp_dlp_options
.user_defined_built_in_info_types
.iter()
.map(|v| v.as_str())
.collect(),
]
.concat()
.into_iter()
.collect()
}

fn check_supported_image_type(mime_type: &Mime) -> bool {
Redacters::is_mime_image(mime_type)
&& (mime_type.subtype() == "png"
Expand Down Expand Up @@ -410,6 +464,8 @@ mod tests {
let redacter = GcpDlpRedacter::new(
GcpDlpRedacterOptions {
project_id: GcpProjectId::new(test_gcp_project_id),
user_defined_built_in_info_types: vec![],
user_defined_stored_info_types: vec![],
},
&reporter,
)
Expand Down

0 comments on commit 0223805

Please sign in to comment.