From ec563d33a05580eff1b2561e512a46b5b0898d00 Mon Sep 17 00:00:00 2001 From: Adam Cimarosti Date: Mon, 29 Jan 2024 12:28:50 +0000 Subject: [PATCH] feat: excluded control chars in values --- README.md | 7 ++++--- src/lib.rs | 7 ++++++- tests/tests.rs | 19 +++++++++++++++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b049537..2a89f85 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,10 @@ service::key1=value1;key2=value2;key3=value3; A few rules: * The last semicolon is mandatory. * Service name and keys are case-insensitive. -* Values are case-sensitive. * Keys are ASCII alphanumeric and start with a letter. -* If a semicolon `;` appears in a value, escaped it as a double semicolon `;;`. +* Values are case-sensitive unicode strings which can contain any characters, + * Except control characters (`0x00..=0x1f` and `0x7f..=0x9f`). + * If semicolons `;` appears in a value, these are escaped as double semicolon `;;`. ## Grammar @@ -33,7 +34,7 @@ alpha ::= "a".."z" | "A".."Z" alphanumeric ::= "a".."z" | "A".."Z" | "0".."9" value_char ::= non_semicolon_char | escaped_semicolon escaped_semicolon ::= ";;" -non_semicolon_char ::= ? any character except ';' ? +non_semicolon_char ::= ? any unicode character except ';', 0x00..=0x1f and 0x7f..=0x9f ? ``` ## Usage diff --git a/src/lib.rs b/src/lib.rs index 1d4ad95..bcb2baf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -91,6 +91,7 @@ pub enum ErrorKind { ExpectedIdentifierNotEmpty, BadSeparator((char, char)), IncompleteKeyValue, + InvalidValueChar(char), MissingTrailingSemicolon, DuplicateKey(String), } @@ -128,6 +129,7 @@ impl Display for ErrorKind { ErrorKind::IncompleteKeyValue => { write!(f, "incomplete key-value pair before end of input") } + ErrorKind::InvalidValueChar(c) => write!(f, "invalid value char {:?}", c), ErrorKind::MissingTrailingSemicolon => write!(f, "missing trailing semicolon"), ErrorKind::DuplicateKey(s) => write!(f, "duplicate key {:?}", s), } @@ -215,7 +217,10 @@ fn parse_value( value.push(';'); } (Some((_, ';')), _) => break, - (Some((_, c)), _) => { + (Some((p, c)), _) => { + if matches!(c, '\u{0}'..='\u{1f}' | '\u{7f}'..='\u{9f}') { + return Err(parse_err(ErrorKind::InvalidValueChar(c), p)); + } value.push(c); let _ = iter.next(); } diff --git a/tests/tests.rs b/tests/tests.rs index f3988a8..f537351 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -328,3 +328,22 @@ fn unicode_value() -> Result<(), ParsingError> { assert_eq!(config.get("x"), Some("協定")); Ok(()) } + +#[test] +fn invalid_ctrl_chars_in_value() { + let bad_chars = [ + '\x00', '\x01', '\x02', '\x03', '\x04', '\x1f', '\x7f', '\u{80}', '\u{8a}', '\u{9f}', + ]; + for bad in bad_chars { + let input = format!("http::x={};", bad); + let config = parse_conf_str(&input); + assert!(config.is_err()); + let err = config.unwrap_err(); + assert_eq!(err.kind(), ErrorKind::InvalidValueChar(bad)); + assert_eq!(err.position(), 8); + assert_eq!( + err.to_string(), + format!("invalid value char {:?} at position 8", bad) + ); + } +}