diff --git a/Cargo.toml b/Cargo.toml index dd5916f..62342e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,8 @@ ci-gcp = [] # For testing on CI/GCP ci-aws = [] # For testing on CI/AWS ci-ms-presidio = [] # For testing on CI/MS Presidiom ci-gcp-llm = [] # For testing on CI/GCP with LLM models -ci = ["ci-gcp", "ci-aws", "ci-ms-presidio", "ci-gcp-llm"] +ci-open-ai = [] # For testing on CI/OpenAIP +ci = ["ci-gcp", "ci-aws", "ci-ms-presidio", "ci-gcp-llm", "ci-open-ai"] [dependencies] diff --git a/README.md b/README.md index 59b3b83..a62cdb7 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,8 @@ Google Cloud Platform's DLP API. * images * [Gemini LLM](https://ai.google.dev/gemini-api/docs) based redaction * text, html, csv, json files + * [Open AI LLM](https://openai.com/) based redaction + * text, html, csv, json files * ... more DLP providers can be added in the future. * **CLI:** Easy-to-use command-line interface for streamlined workflows. * Built with Rust to ensure speed, safety, and reliability. @@ -67,7 +69,7 @@ Options: -f, --filename-filter Filter by name using glob patterns such as *.txt -d, --redact - Redacter type [possible values: gcp-dlp, aws-comprehend, ms-presidio, gemini-llm] + Redacter type [possible values: gcp-dlp, aws-comprehend, ms-presidio, gemini-llm, open-ai-llm] --gcp-project-id GCP project id that will be used to redact and bill API calls --allow-unsupported-copies @@ -86,6 +88,10 @@ Options: Gemini model name for Gemini LLM redacter. Default is 'models/gemini-1.5-flash' --sampling-size Sampling size in bytes before redacting files. Disabled by default + --open-ai-api-key + API key for OpenAI LLM redacter + --open-ai-model + Open AI model name for OpenAI LLM redacter. Default is 'gpt-4o-mini' -h, --help Print help ``` @@ -135,6 +141,11 @@ To be able to use GCP DLP you need to: official [instructions](https://ai.google.dev/gemini-api/docs/oauth#set-cloud). - provide a GCP project id using `--gcp-project-id` option. +### Open AI LLM + +To be able to use Open AI LLM you need to provide an API key using `--open-ai-api-key` command line option. +Optionally, you can provide a model name using `--open-ai-model` option. Default is `gpt-4o-mini`. + ## Examples: ```sh diff --git a/src/args.rs b/src/args.rs index 9af2036..c228c60 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,7 +1,7 @@ use crate::common_types::GcpProjectId; use crate::errors::AppError; use crate::redacters::{ - GcpDlpRedacterOptions, GeminiLlmModelName, OpenAiLlmApiKey, RedacterOptions, + GcpDlpRedacterOptions, GeminiLlmModelName, OpenAiLlmApiKey, OpenAiModelName, RedacterOptions, RedacterProviderOptions, }; use clap::*; @@ -144,6 +144,12 @@ pub struct RedacterArgs { #[arg(long, help = "API key for OpenAI LLM redacter")] pub open_ai_api_key: Option, + + #[arg( + long, + help = "Open AI model name for OpenAI LLM redacter. Default is 'gpt-4o-mini'" + )] + pub open_ai_model: Option, } impl TryInto for RedacterArgs { @@ -200,6 +206,7 @@ impl TryInto for RedacterArgs { message: "OpenAI API key is required for OpenAI LLM redacter" .to_string(), })?, + model: self.open_ai_model, }, )), None => Err(AppError::RedacterConfigError { diff --git a/src/redacters/open_ai_llm.rs b/src/redacters/open_ai_llm.rs index ca24f99..698ea2f 100644 --- a/src/redacters/open_ai_llm.rs +++ b/src/redacters/open_ai_llm.rs @@ -14,9 +14,13 @@ use crate::AppResult; #[derive(Debug, Clone, ValueStruct)] pub struct OpenAiLlmApiKey(String); +#[derive(Debug, Clone, ValueStruct)] +pub struct OpenAiModelName(String); + #[derive(Debug, Clone)] pub struct OpenAiLlmRedacterOptions { pub api_key: OpenAiLlmApiKey, + pub model: Option, } #[derive(Clone)] @@ -50,6 +54,8 @@ struct OpenAiLlmAnalyzeChoice { } impl<'a> OpenAiLlmRedacter<'a> { + const DEFAULT_MODEL: &'static str = "gpt-4o-mini"; + pub async fn new( redacter_options: RedacterOptions, open_ai_llm_options: OpenAiLlmRedacterOptions, @@ -84,7 +90,7 @@ impl<'a> OpenAiLlmRedacter<'a> { let generate_random_text_separator = format!("---{}", rand.gen::()); let analyze_request = OpenAiLlmAnalyzeRequest { - model: "gpt-4o".to_string(), + model: self.open_ai_llm_options.model.as_ref().map(|v| v.value().clone()).unwrap_or_else(|| Self::DEFAULT_MODEL.to_string()), messages: vec![ OpenAiLlmAnalyzeMessage { role: "system".to_string(),