Skip to content

Commit

Permalink
Documentation update
Browse files Browse the repository at this point in the history
  • Loading branch information
abdolence committed Aug 9, 2024
1 parent 9973840 commit 7fbb883
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 4 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ ci-gcp = [] # For testing on CI/GCP
ci-aws = [] # For testing on CI/AWS
ci-ms-presidio = [] # For testing on CI/MS Presidiom
ci-gcp-llm = [] # For testing on CI/GCP with LLM models
ci = ["ci-gcp", "ci-aws", "ci-ms-presidio", "ci-gcp-llm"]
ci-open-ai = [] # For testing on CI/OpenAIP
ci = ["ci-gcp", "ci-aws", "ci-ms-presidio", "ci-gcp-llm", "ci-open-ai"]


[dependencies]
Expand Down
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ Google Cloud Platform's DLP API.
* images
* [Gemini LLM](https://ai.google.dev/gemini-api/docs) based redaction
* text, html, csv, json files
* [Open AI LLM](https://openai.com/) based redaction
* text, html, csv, json files
* ... more DLP providers can be added in the future.
* **CLI:** Easy-to-use command-line interface for streamlined workflows.
* Built with Rust to ensure speed, safety, and reliability.
Expand Down Expand Up @@ -67,7 +69,7 @@ Options:
-f, --filename-filter <FILENAME_FILTER>
Filter by name using glob patterns such as *.txt
-d, --redact <REDACT>
Redacter type [possible values: gcp-dlp, aws-comprehend, ms-presidio, gemini-llm]
Redacter type [possible values: gcp-dlp, aws-comprehend, ms-presidio, gemini-llm, open-ai-llm]
--gcp-project-id <GCP_PROJECT_ID>
GCP project id that will be used to redact and bill API calls
--allow-unsupported-copies
Expand All @@ -86,6 +88,10 @@ Options:
Gemini model name for Gemini LLM redacter. Default is 'models/gemini-1.5-flash'
--sampling-size <SAMPLING_SIZE>
Sampling size in bytes before redacting files. Disabled by default
--open-ai-api-key <OPEN_AI_API_KEY>
API key for OpenAI LLM redacter
--open-ai-model <OPEN_AI_MODEL>
Open AI model name for OpenAI LLM redacter. Default is 'gpt-4o-mini'
-h, --help
Print help
```
Expand Down Expand Up @@ -135,6 +141,11 @@ To be able to use GCP DLP you need to:
official [instructions](https://ai.google.dev/gemini-api/docs/oauth#set-cloud).
- provide a GCP project id using `--gcp-project-id` option.

### Open AI LLM

To be able to use Open AI LLM you need to provide an API key using `--open-ai-api-key` command line option.
Optionally, you can provide a model name using `--open-ai-model` option. Default is `gpt-4o-mini`.

## Examples:

```sh
Expand Down
9 changes: 8 additions & 1 deletion src/args.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::common_types::GcpProjectId;
use crate::errors::AppError;
use crate::redacters::{
GcpDlpRedacterOptions, GeminiLlmModelName, OpenAiLlmApiKey, RedacterOptions,
GcpDlpRedacterOptions, GeminiLlmModelName, OpenAiLlmApiKey, OpenAiModelName, RedacterOptions,
RedacterProviderOptions,
};
use clap::*;
Expand Down Expand Up @@ -144,6 +144,12 @@ pub struct RedacterArgs {

#[arg(long, help = "API key for OpenAI LLM redacter")]
pub open_ai_api_key: Option<OpenAiLlmApiKey>,

#[arg(
long,
help = "Open AI model name for OpenAI LLM redacter. Default is 'gpt-4o-mini'"
)]
pub open_ai_model: Option<OpenAiModelName>,
}

impl TryInto<RedacterOptions> for RedacterArgs {
Expand Down Expand Up @@ -200,6 +206,7 @@ impl TryInto<RedacterOptions> for RedacterArgs {
message: "OpenAI API key is required for OpenAI LLM redacter"
.to_string(),
})?,
model: self.open_ai_model,
},
)),
None => Err(AppError::RedacterConfigError {
Expand Down
8 changes: 7 additions & 1 deletion src/redacters/open_ai_llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@ use crate::AppResult;
#[derive(Debug, Clone, ValueStruct)]
pub struct OpenAiLlmApiKey(String);

#[derive(Debug, Clone, ValueStruct)]
pub struct OpenAiModelName(String);

#[derive(Debug, Clone)]
pub struct OpenAiLlmRedacterOptions {
pub api_key: OpenAiLlmApiKey,
pub model: Option<OpenAiModelName>,
}

#[derive(Clone)]
Expand Down Expand Up @@ -50,6 +54,8 @@ struct OpenAiLlmAnalyzeChoice {
}

impl<'a> OpenAiLlmRedacter<'a> {
const DEFAULT_MODEL: &'static str = "gpt-4o-mini";

pub async fn new(
redacter_options: RedacterOptions,
open_ai_llm_options: OpenAiLlmRedacterOptions,
Expand Down Expand Up @@ -84,7 +90,7 @@ impl<'a> OpenAiLlmRedacter<'a> {
let generate_random_text_separator = format!("---{}", rand.gen::<u64>());

let analyze_request = OpenAiLlmAnalyzeRequest {
model: "gpt-4o".to_string(),
model: self.open_ai_llm_options.model.as_ref().map(|v| v.value().clone()).unwrap_or_else(|| Self::DEFAULT_MODEL.to_string()),
messages: vec![
OpenAiLlmAnalyzeMessage {
role: "system".to_string(),
Expand Down

0 comments on commit 7fbb883

Please sign in to comment.