Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support user defined InfoTypes for GCP DLP redacter #18

Merged
merged 1 commit into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,18 +118,27 @@ pub struct RedacterArgs {
#[arg(short = 'd', long, value_enum, help = "List of redacters to use")]
redact: Option<Vec<RedacterType>>,

#[arg(
long,
help = "Allow unsupported types to be copied without redaction",
default_value = "false"
)]
pub allow_unsupported_copies: bool,

#[arg(
long,
help = "GCP project id that will be used to redact and bill API calls"
)]
pub gcp_project_id: Option<GcpProjectId>,

#[arg(long, help = "Additional GCP DLP built in info types for redaction")]
pub gcp_dlp_built_in_info_type: Option<Vec<String>>,

#[arg(
long,
help = "Allow unsupported types to be copied without redaction",
default_value = "false"
help = "Additional GCP DLP user defined stored info types for redaction"
)]
pub allow_unsupported_copies: bool,
pub gcp_dlp_stored_info_type: Option<Vec<String>>,

#[arg(
long,
Expand Down Expand Up @@ -184,6 +193,14 @@ impl TryInto<RedacterOptions> for RedacterArgs {
Some(ref project_id) => {
Ok(RedacterProviderOptions::GcpDlp(GcpDlpRedacterOptions {
project_id: project_id.clone(),
user_defined_built_in_info_types: self
.gcp_dlp_built_in_info_type
.clone()
.unwrap_or_default(),
user_defined_stored_info_types: self
.gcp_dlp_stored_info_type
.clone()
.unwrap_or_default(),
}))
}
None => Err(AppError::RedacterConfigError {
Expand Down
70 changes: 63 additions & 7 deletions src/redacters/gcp_dlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use gcloud_sdk::tonic::metadata::MetadataValue;
use gcloud_sdk::{tonic, GoogleApi, GoogleAuthMiddleware};
use mime::Mime;
use rvstruct::ValueStruct;
use std::collections::HashSet;
use tokio_util::bytes;

#[derive(Clone)]
Expand All @@ -25,6 +26,8 @@ pub struct GcpDlpRedacter<'a> {
#[derive(Debug, Clone)]
pub struct GcpDlpRedacterOptions {
pub project_id: GcpProjectId,
pub user_defined_built_in_info_types: Vec<String>,
pub user_defined_stored_info_types: Vec<String>,
}

impl<'a> GcpDlpRedacter<'a> {
Expand Down Expand Up @@ -71,8 +74,8 @@ impl<'a> GcpDlpRedacter<'a> {
"projects/{}/locations/global",
self.gcp_dlp_options.project_id.value()
),
inspect_config: Some(Self::create_inspect_config()),
deidentify_config: Some(Self::create_deidentify_config()),
inspect_config: Some(self.create_inspect_config()),
deidentify_config: Some(self.create_deidentify_config()),
item: Some(input.content.try_into()?),
..gcloud_sdk::google::privacy::dlp::v2::DeidentifyContentRequest::default()
},
Expand Down Expand Up @@ -108,7 +111,7 @@ impl<'a> GcpDlpRedacter<'a> {
"projects/{}/locations/global",
self.gcp_dlp_options.project_id.value()
),
inspect_config: Some(Self::create_inspect_config()),
inspect_config: Some(self.create_inspect_config()),
byte_item: Some(input_bytes_content),
..gcloud_sdk::google::privacy::dlp::v2::RedactImageRequest::default()
});
Expand Down Expand Up @@ -142,26 +145,63 @@ impl<'a> GcpDlpRedacter<'a> {
}
}

fn create_inspect_config() -> gcloud_sdk::google::privacy::dlp::v2::InspectConfig {
fn create_inspect_config(&self) -> gcloud_sdk::google::privacy::dlp::v2::InspectConfig {
gcloud_sdk::google::privacy::dlp::v2::InspectConfig {
info_types: Self::INFO_TYPES
info_types: self
.create_built_in_info_types()
.iter()
.map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType {
name: v.to_string(),
..gcloud_sdk::google::privacy::dlp::v2::InfoType::default()
})
.collect(),
custom_info_types: self
.gcp_dlp_options
.user_defined_stored_info_types
.iter()
.map(
|stored_info_type_name| {
gcloud_sdk::google::privacy::dlp::v2::CustomInfoType {
info_type: Some(gcloud_sdk::google::privacy::dlp::v2::InfoType {
name: stored_info_type_name.clone(),
..gcloud_sdk::google::privacy::dlp::v2::InfoType::default()
}),
r#type: Some(
gcloud_sdk::google::privacy::dlp::v2::custom_info_type::Type::StoredType(
gcloud_sdk::google::privacy::dlp::v2::StoredType {
name: format!(
"projects/{}/storedInfoTypes/{}",
self.gcp_dlp_options.project_id.value(),
stored_info_type_name
),
..gcloud_sdk::google::privacy::dlp::v2::StoredType::default()
},
),
),
..gcloud_sdk::google::privacy::dlp::v2::CustomInfoType::default()
}
},
)
.collect(),
..gcloud_sdk::google::privacy::dlp::v2::InspectConfig::default()
}
}

fn create_deidentify_config() -> gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig {
fn create_deidentify_config(&self) -> gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig {
let user_stored_info_types_set: HashSet<&str> = self
.gcp_dlp_options
.user_defined_stored_info_types
.iter()
.map(|s| s.as_str())
.collect();
gcloud_sdk::google::privacy::dlp::v2::DeidentifyConfig {
transformation: Some(gcloud_sdk::google::privacy::dlp::v2::deidentify_config::Transformation::InfoTypeTransformations(
gcloud_sdk::google::privacy::dlp::v2::InfoTypeTransformations {
transformations: vec![
gcloud_sdk::google::privacy::dlp::v2::info_type_transformations::InfoTypeTransformation {
info_types: Self::INFO_TYPES.iter().map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType {
info_types: self.create_built_in_info_types().union(
&user_stored_info_types_set
).collect::<Vec<_>>().iter().map(|v| gcloud_sdk::google::privacy::dlp::v2::InfoType {
name: v.to_string(),
..gcloud_sdk::google::privacy::dlp::v2::InfoType::default()
}).collect(),
Expand All @@ -183,6 +223,20 @@ impl<'a> GcpDlpRedacter<'a> {
}
}

fn create_built_in_info_types(&self) -> HashSet<&str> {
[
Self::INFO_TYPES.to_vec(),
self.gcp_dlp_options
.user_defined_built_in_info_types
.iter()
.map(|v| v.as_str())
.collect(),
]
.concat()
.into_iter()
.collect()
}

fn check_supported_image_type(mime_type: &Mime) -> bool {
Redacters::is_mime_image(mime_type)
&& (mime_type.subtype() == "png"
Expand Down Expand Up @@ -410,6 +464,8 @@ mod tests {
let redacter = GcpDlpRedacter::new(
GcpDlpRedacterOptions {
project_id: GcpProjectId::new(test_gcp_project_id),
user_defined_built_in_info_types: vec![],
user_defined_stored_info_types: vec![],
},
&reporter,
)
Expand Down
Loading