diff --git a/Cargo.lock b/Cargo.lock index a8fea16..4d1627a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c378d78423fdad8089616f827526ee33c19f2fddbd5de1629152c9593ba4783" +dependencies = [ + "memchr", +] + [[package]] name = "anstream" version = "0.3.2" @@ -139,10 +148,11 @@ dependencies = [ [[package]] name = "graby" -version = "0.1.0" +version = "1.0.0" dependencies = [ "anyhow", "clap", + "regex", ] [[package]] @@ -180,6 +190,12 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +[[package]] +name = "memchr" +version = "2.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" + [[package]] name = "once_cell" version = "1.18.0" @@ -204,6 +220,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + [[package]] name = "rustix" version = "0.38.4" diff --git a/Cargo.toml b/Cargo.toml index faa59f2..d9da826 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "graby" -version = "0.1.1" +version = "1.0.0" edition = "2021" authors = ["Raghav "] license = "MIT" @@ -17,3 +17,4 @@ categories = ["command-line-utilities"] [dependencies] clap = { version = "4.3.19", features = ["derive"] } anyhow = "1.0" +regex = "1.9.5" \ No newline at end of file diff --git a/README.md b/README.md index a21274b..f8c985e 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ # Introduction This is a small implementation of `grep` command line tool in rust (see References). -Unlike `grep` this implementation does not support `regex`. +From version 1.0.0 onwards regular expressions or `regex` is also supported. For complete implementation of `grep` in rust, check `ripgrep`. ## Installation @@ -17,7 +17,7 @@ cargo add graby ``` or manually add following in `Cargo.toml` file. ``` -graby = "0.1.1" # graby = "version" +graby = "1.0.0" # graby = "version" ``` To build `graby` from source you need to install rust on your device and run the following commands: ``` @@ -25,6 +25,7 @@ git clone https://github.com/Raghav-Bell/graby.git cd graby cargo run -- --help ``` +or you can also build it from released binaries. ## Usage For searching `QUERY` pattern in `FILE_PATH` use following command: ``` @@ -33,6 +34,18 @@ graby --q QUERY --f FILE_PATH For more options run ``` graby --help + +Usage: graby.exe [OPTIONS] --query --file-path + +Options: + -q, --query Pattern to search in the file + -r, --regex-match Take pattern as regular expression + -f, --file-path Path to the file + -i, --ignore-case Ignore case distinctions while searching QUERY in FILE_PATH + -v, --invert-match Print the lines without QUERY pattern in the FILE_PATH + -h, --help Print help + -V, --version Print version + ``` or check [documentation](https://docs.rs/graby/0.1.0/graby/).
It is licensed under MIT. diff --git a/src/lib.rs b/src/lib.rs index 084927a..5a034dc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ #![allow(unused)] use anyhow::{Context, Result}; +pub mod regex_feature; use clap::Parser; use std::io::{self, read_to_string, BufRead, BufReader, Write}; use std::{error::Error, fs::File, path::PathBuf}; @@ -11,6 +12,9 @@ pub struct Config { /// Pattern to search in the file. #[arg(short, long)] pub query: String, + /// Take pattern as regular expression. + #[arg(short = 'r', long)] + pub regex_match: bool, /// Path to the file. #[arg(short, long)] pub file_path: PathBuf, @@ -21,8 +25,17 @@ pub struct Config { #[arg(short = 'v', long)] pub invert_match: bool, } -/// Search for the query pattern in the given file and display the lines that contain it. +/// Checks if pattern is regular expression or not. pub fn run(config: Config) -> Result<()> { + if config.regex_match { + regex_feature::run_regex(config) + } else { + run_string(config) + } +} + +/// Search for the string query in the given file and display the lines that contain it. +pub fn run_string(config: Config) -> Result<()> { // Open the file let file = File::open(&config.file_path) .with_context(|| format!("could not open the file `{:?}`", &config.file_path))?; @@ -30,7 +43,7 @@ pub fn run(config: Config) -> Result<()> { let reader = BufReader::new(file); let contents = read_to_string(reader) .with_context(|| format!("could not read the file `{:?}`", &config.file_path))?; - // Searching for the query with ignore_case option in the contents. + // Searching for the query string with ignore_case option in the contents. let results = if config.ignore_case { search_case_insensitive(&config.query, &contents, config.invert_match) } else { @@ -43,7 +56,7 @@ pub fn run(config: Config) -> Result<()> { } Ok(()) } -/// This function search for the query with exact case. +/// This function search for the query string with exact case. pub fn search<'a>(query: &str, contents: &'a str, invert: bool) -> Vec<&'a str> { let mut results = Vec::new(); //Branches if invert_match option is active or not. @@ -62,9 +75,9 @@ pub fn search<'a>(query: &str, contents: &'a str, invert: bool) -> Vec<&'a str> } results } -/// This function search for the query without case distinction. +/// This function search for the query string without case distinction. pub fn search_case_insensitive<'a>(query: &str, contents: &'a str, invert: bool) -> Vec<&'a str> { - // Lower the case of query pattern. + // Lower the case of query string. let query = query.to_lowercase(); let mut results = Vec::new(); if invert { @@ -87,7 +100,7 @@ pub fn search_case_insensitive<'a>(query: &str, contents: &'a str, invert: bool) mod tests { use super::*; #[test] - // Test for case sensitive search function. + // Test for case sensitive `search` function. fn case_sensitive() { let query = "duct"; let contents = "\ @@ -101,7 +114,7 @@ Duckt three."; } #[test] - // Test for case search_case_insensitive function. + // Test for `case search_case_insensitive function`. fn case_insensitive() { let query = "rUsT"; let contents = "\ @@ -115,7 +128,7 @@ Trust me."; ); } #[test] - // Test for the invert_match option. + // Test for the `invert_match` option. fn invert_search() { let query = "Duckt"; let contents = "\ diff --git a/src/regex_feature.rs b/src/regex_feature.rs new file mode 100644 index 0000000..7d9a27d --- /dev/null +++ b/src/regex_feature.rs @@ -0,0 +1,112 @@ +#![allow(unused)] +use crate::Config; +use anyhow::{Context, Result}; +use regex::Regex; +use std::io::{self, read_to_string, BufRead, BufReader, Write}; +use std::{error::Error, fs::File, path::PathBuf}; + +/// Search for the query regex in the given file and display the lines that contain it. +pub fn run_regex(config: Config) -> Result<()> { + // Open the file + let file = File::open(&config.file_path) + .with_context(|| format!("could not open the file `{:?}`", &config.file_path))?; + // Read the file in buffer (8 KB). + let reader = BufReader::new(file); + let contents = read_to_string(reader) + .with_context(|| format!("could not read the file `{:?}`", &config.file_path))?; + + // Searching for the query regex with ignore_case option in the contents. + let results = if config.ignore_case { + // adding regex ignore case flag in query string. + let query_formatted = format!("(?i){}", &config.query); + // Making regex from given `query_formatted`. + let query_re = Regex::new(&query_formatted[..]) + .with_context(|| format!("given regular expression `{:?}` is wrong", &config.query))?; + search_regex(&query_re, &contents, config.invert_match) + } else { + // Making regex from given query. + let query_re = Regex::new(&config.query) + .with_context(|| format!("given regular expression `{:?}` is wrong", &config.query))?; + search_regex(&query_re, &contents, config.invert_match) + }; + + // Writing buffer of lines on the terminal which satisfies the command. + for line in results { + writeln!(io::BufWriter::new(io::stdout().lock()), "{line}"); + } + Ok(()) +} +/// This function search for the query regex with exact case. +pub fn search_regex<'a>(query_re: &Regex, contents: &'a str, invert: bool) -> Vec<&'a str> { + let mut results = Vec::new(); + + //Branches if `invert_match` option is active or not. + if invert { + for line in contents.lines() { + if !query_re.is_match(line) { + results.push(line); + } + } + } else { + for line in contents.lines() { + if query_re.is_match(line) { + results.push(line); + } + } + } + results +} + +/// Small unitest to check the functionality of the program. +#[cfg(test)] +mod tests { + use super::*; + #[test] + // Test for exact case regex search function. + fn regex_case_sensitive() { + let query = Regex::new("HR[0-9]{3}-[0-9]{3}-[0-9]{4}").unwrap(); + let contents = "\ +Rust: +safe, fast, productive. +Duckt three. +phone: HR111-222-3333"; + assert_eq!( + vec!["phone: HR111-222-3333"], + search_regex(&query, contents, false) + ); + } + + #[test] + // Test for ignore case regex search. + fn regex_case_insensitive() { + let query = Regex::new("(?i)rUsT[0-9]{3}").unwrap(); + let contents = "\ +Rust123: +RUst324 +safe, fast, productive. +Pick three. +Trust me."; + assert_eq!( + vec!["Rust123:", "RUst324"], + search_regex(&query, contents, false) + ); + } + #[test] + // Test for the `invert_match` option. + fn regex_invert_search() { + let query = Regex::new("Duckt[0-9]{3}").unwrap(); + let query_insensitive = Regex::new("(?i)Duckt[0-9]{3}").unwrap(); + let contents = "\ +safe, fast, productive. +Duckt567 three. +duckt980"; + assert_ne!( + vec!["Duckt567 three."], + search_regex(&query, contents, true) + ); + assert_eq!( + vec!["safe, fast, productive."], + search_regex(&query_insensitive, contents, true) + ); + } +}