Skip to content

Commit

Permalink
List (ls) command implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
abdolence committed Aug 5, 2024
1 parent 522ae67 commit 7a6d613
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 3 deletions.
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,28 @@ MS Presidio redacter:
redacter cp -d ms-presidio --ms-presidio-text-analyze-url http://localhost:5002/analyze --ms-presidio-image-redact-url http://localhost:5003/redact ...
```

## List (LS) command

For convenience, the tool also supports listing files in the source directory so you can see what files will be copied:

```
Usage: redacter ls [OPTIONS] <SOURCE>
Arguments:
<SOURCE> Source directory or file such as /tmp, /tmp/file.txt or gs://bucket/file.txt and others supported providers
Options:
-m, --max-size-limit <MAX_SIZE_LIMIT> Maximum size of files to copy in bytes
-f, --filename-filter <FILENAME_FILTER> Filter by name using glob patterns such as *.txt
-h, --help Print help
```

Example: list files in the GCS bucket:

```sh
redacter ls gs://my-little-bucket/my-big-files/
```

## Security considerations

- Your file contents are sent to the DLP API for redaction. Make sure you trust the DLP API provider.
Expand Down
16 changes: 16 additions & 0 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub struct CliArgs {
}

#[derive(Subcommand, Debug)]
#[allow(clippy::large_enum_variant)]
pub enum CliCommand {
#[command(about = "Copy and redact files from source to destination")]
Cp {
Expand All @@ -36,6 +37,21 @@ pub enum CliCommand {
#[command(flatten)]
redacter_args: Option<RedacterArgs>,
},
#[command(about = "List files in the source")]
Ls {
#[arg(
help = "Source directory or file such as /tmp, /tmp/file.txt or gs://bucket/file.txt and others supported providers"
)]
source: String,
#[arg(short = 'm', long, help = "Maximum size of files to copy in bytes")]
max_size_limit: Option<u64>,
#[arg(
short = 'f',
long,
help = "Filter by name using glob patterns such as *.txt"
)]
filename_filter: Option<globset::Glob>,
},
}

#[derive(ValueEnum, Debug, Clone)]
Expand Down
113 changes: 113 additions & 0 deletions src/commands/ls_command.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
use crate::filesystems::FileSystemConnection;
use crate::filesystems::{DetectFileSystem, FileMatcher};
use crate::AppResult;
use console::{pad_str, Alignment, Style, Term};
use indicatif::{HumanBytes, TermLike};
use rvstruct::ValueStruct;

#[derive(Debug, Clone)]
pub struct LsCommandOptions {
pub file_matcher: FileMatcher,
}

impl LsCommandOptions {
pub fn new(filename_filter: Option<globset::Glob>, max_size_limit: Option<u64>) -> Self {
let filename_matcher = filename_filter
.as_ref()
.map(|filter| filter.compile_matcher());
LsCommandOptions {
file_matcher: FileMatcher::new(filename_matcher, max_size_limit),
}
}
}

pub async fn command_ls(term: &Term, source: &str, options: LsCommandOptions) -> AppResult<()> {
let bold_style = Style::new().bold();
let highlighted = bold_style.clone().white();
let dimmed_style = Style::new().dim();
term.write_line(format!("Listing files in {}.", bold_style.apply_to(source)).as_str())?;
let app_reporter = crate::reporter::AppReporter::from(term);
let mut source_fs = DetectFileSystem::open(source, &app_reporter).await?;
let list_files_result = source_fs.list_files(Some(&options.file_matcher)).await?;
let total_size: u64 = list_files_result
.files
.iter()
.map(|f| f.file_size.unwrap_or(0))
.sum();

if !list_files_result.files.is_empty() {
let max_filename_width = std::cmp::min(
list_files_result
.files
.iter()
.map(|f| f.relative_path.value().len())
.max()
.unwrap_or(25)
+ 5,
(term.width() * 2 / 3) as usize,
);
term.write_line(
format!(
"\n {} {} {}",
dimmed_style.apply_to(pad_str(
"Filename",
max_filename_width,
Alignment::Left,
None
)),
dimmed_style.apply_to(pad_str("Media Type", 40, Alignment::Left, None)),
dimmed_style.apply_to(pad_str("Size", 16, Alignment::Left, None))
)
.as_str(),
)?;

for file in &list_files_result.files {
term.write_line(
format!(
"- {} {} {}",
highlighted.apply_to(pad_str(
file.relative_path.value(),
max_filename_width,
Alignment::Left,
Some("...")
)),
pad_str(
file.media_type
.as_ref()
.map(|mime| mime.to_string())
.unwrap_or("".to_string())
.as_str(),
40,
Alignment::Left,
None
),
highlighted.apply_to(pad_str(
format!("{}", HumanBytes(file.file_size.unwrap_or(0))).as_str(),
16,
Alignment::Left,
None
))
)
.as_str(),
)?;
}
term.write_line("")?;
}
term.write_line(
format!(
"{} files found. Total size: {}",
highlighted.apply_to(list_files_result.files.len()),
highlighted.apply_to(HumanBytes(total_size))
)
.as_str(),
)?;
term.write_line(
format!(
"{} files skipped/filtered out.",
dimmed_style.apply_to(list_files_result.skipped.to_string())
)
.as_str(),
)?;
source_fs.close().await?;
Ok(())
}
3 changes: 3 additions & 0 deletions src/commands/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
mod copy_command;
pub use copy_command::*;

mod ls_command;
pub use ls_command::*;
15 changes: 12 additions & 3 deletions src/filesystems/gcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,12 @@ impl<'a> GoogleCloudStorageFileSystem<'a> {
let path = path.trim_start_matches("gs://");
let parts: Vec<&str> = path.split('/').collect();
let bucket = parts[0];
let object = parts[1..].join("/");
(bucket.to_string(), object.to_string())
if parts.len() == 1 || (parts.len() == 2 && parts[1].is_empty()) {
(bucket.to_string(), "/".to_string())
} else {
let object = parts[1..].join("/");
(bucket.to_string(), object.to_string())
}
}

#[async_recursion::async_recursion]
Expand Down Expand Up @@ -202,7 +206,12 @@ impl<'a> FileSystemConnection<'a> for GoogleCloudStorageFileSystem<'a> {
self.bucket_name, self.object_name
))?;
if self.object_name.ends_with('/') {
self.list_files_with_token(Some(self.object_name.clone()), None, &file_matcher)
let prefix = if self.object_name != "/" {
Some(self.object_name.clone())
} else {
None
};
self.list_files_with_token(prefix, None, &file_matcher)
.await
} else {
Ok(ListFilesResult::EMPTY)
Expand Down
8 changes: 8 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,14 @@ async fn handle_args(cli: CliArgs, term: &Term) -> AppResult<()> {
.as_str(),
)?;
}
CliCommand::Ls {
source,
max_size_limit,
filename_filter,
} => {
let options = LsCommandOptions::new(filename_filter, max_size_limit);
command_ls(term, &source, options).await?;
}
}

Ok(())
Expand Down

0 comments on commit 7a6d613

Please sign in to comment.