From 2a0f501e89c967563a4bf32d924ff02910515354 Mon Sep 17 00:00:00 2001 From: dekkonot Date: Tue, 18 Aug 2020 13:16:43 -0700 Subject: [PATCH] Add lint to guard against bad escape sequences (#146) * Add lint to guard against bad escape sequences closes #46 * Don't mention feature flag in bad_string_escape docs * Change ReasonWhy enum to be in alphabetical order * Add spacing around operators * Clean up nonsense with pointers: - Dereferenced quote_type - Swap to using `str` instead of `&str` where possible --- CHANGELOG.md | 1 + docs/src/lints/bad_string_escape.md | 26 ++ selene-lib/src/lib.rs | 1 + selene-lib/src/rules.rs | 1 + selene-lib/src/rules/bad_string_escape.rs | 257 ++++++++++++++++++ .../lua51_string_escapes.lua | 34 +++ .../lua51_string_escapes.stderr | 105 +++++++ .../roblox_string_escapes.lua | 34 +++ .../roblox_string_escapes.std.toml | 2 + .../roblox_string_escapes.stderr | 67 +++++ 10 files changed, 528 insertions(+) create mode 100644 docs/src/lints/bad_string_escape.md create mode 100644 selene-lib/src/rules/bad_string_escape.rs create mode 100644 selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.lua create mode 100644 selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.stderr create mode 100644 selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.lua create mode 100644 selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.std.toml create mode 100644 selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.stderr diff --git a/CHANGELOG.md b/CHANGELOG.md index 8dcf705a..6a5ad6dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Added `RaycastParams.new`. - Added support for `string.pack`, `string.packsize`, and `string.unpack` to the Roblox standard library. - Added lint `compare_nan` to guard against comparing directly to nan (e.g. `x ~= 0/0`). +- Add lint `bad_string_escape` to guard invalid or malformed string escape sequences. ### Fixed - Fixed `coroutine.yield` only accepting coroutines as a first argument. diff --git a/docs/src/lints/bad_string_escape.md b/docs/src/lints/bad_string_escape.md new file mode 100644 index 00000000..4a590998 --- /dev/null +++ b/docs/src/lints/bad_string_escape.md @@ -0,0 +1,26 @@ +# bad_string_escape +## What it does +Checks for invalid, malformed, or unnecessary string escape sequences. + +## Why this is bad +Invalid string escapes don't do anything, so should obviously be caught in dealt with. Additionally, in double strings, you shouldn't escape single quote strings since it makes the string less readable. Same with single quote strings and double quotes. + +In some cases (specifically `\x` and `\u` in a Roblox codebase) it's possible to write an escape sequence that looks right but doesn't work when ran. Because this is probably not intentional, they are caught by this lint. + +## Example +```lua +print("\m") -- This escape sequence is invalid. + +print("don\'t") -- This escape makes the string less readable than `don't` + +print('\"foo\"') -- This escape makes the string less readable than `"foo"` +``` + +In Roblox: +```lua +print("\x1") -- This escape sequence is malformed (\x expects two hex digits after it) + +print("\u{1234") -- This escape sequence is *also* malformed (\u needs a closing bracket) + +print("\u{110000}") -- This escape sequence is invalid because the max codepoint passed to \u is `10ffff`. +``` \ No newline at end of file diff --git a/selene-lib/src/lib.rs b/selene-lib/src/lib.rs index 495e30cb..783f3a99 100644 --- a/selene-lib/src/lib.rs +++ b/selene-lib/src/lib.rs @@ -214,6 +214,7 @@ pub struct CheckerDiagnostic { use_rules! { almost_swapped: rules::almost_swapped::AlmostSwappedLint, + bad_string_escape: rules::bad_string_escape::BadStringEscapeLint, compare_nan: rules::compare_nan::CompareNanLint, divide_by_zero: rules::divide_by_zero::DivideByZeroLint, empty_if: rules::empty_if::EmptyIfLint, diff --git a/selene-lib/src/rules.rs b/selene-lib/src/rules.rs index 3635efd6..d28cc003 100644 --- a/selene-lib/src/rules.rs +++ b/selene-lib/src/rules.rs @@ -8,6 +8,7 @@ use full_moon::node::Node; use serde::de::DeserializeOwned; pub mod almost_swapped; +pub mod bad_string_escape; pub mod compare_nan; pub mod divide_by_zero; pub mod empty_if; diff --git a/selene-lib/src/rules/bad_string_escape.rs b/selene-lib/src/rules/bad_string_escape.rs new file mode 100644 index 00000000..f8ae8dee --- /dev/null +++ b/selene-lib/src/rules/bad_string_escape.rs @@ -0,0 +1,257 @@ +use super::*; +use std::convert::Infallible; + +use full_moon::{ + ast::{self, Ast}, + tokenizer, + visitors::Visitor, +}; +use regex::Regex; + +lazy_static::lazy_static! { + static ref STRING_ESCAPE_REGEX: Regex = Regex::new(r"\\(u\{|.)([\da-fA-F]*)(\}?)").unwrap(); +} + +enum ReasonWhy { + CodepointTooHigh, + DecimalTooHigh, + DoubleInSingle, + Invalid, + Malformed, + SingleInDouble, +} + +pub struct BadStringEscapeLint; + +impl Rule for BadStringEscapeLint { + type Config = (); + type Error = Infallible; + + fn new(_: Self::Config) -> Result { + Ok(BadStringEscapeLint) + } + + fn pass(&self, ast: &Ast, context: &Context) -> Vec { + let mut visitor = BadStringEscapeVisitor { + sequences: Vec::new(), + roblox: context.is_roblox(), + }; + + visitor.visit_ast(&ast); + + visitor + .sequences + .iter() + .map(|sequence| match sequence.issue { + ReasonWhy::Invalid => Diagnostic::new( + "bad_string_escape", + "string escape sequence doesn't exist".to_owned(), + Label::new(sequence.range.to_owned()), + ), + ReasonWhy::Malformed => Diagnostic::new( + "bad_string_escape", + "string escape sequence is malformed".to_owned(), + Label::new(sequence.range.to_owned()), + ), + ReasonWhy::DecimalTooHigh => Diagnostic::new_complete( + "bad_string_escape", + "decimal escape is too high".to_owned(), + Label::new(sequence.range.to_owned()), + vec![ + "help: the maximum codepoint allowed in decimal escapes is `255`" + .to_owned(), + ], + Vec::new(), + ), + ReasonWhy::CodepointTooHigh => Diagnostic::new_complete( + "bad_string_escape", + "unicode codepoint is too high for this escape sequence".to_owned(), + Label::new(sequence.range.to_owned()), + vec![ + "help: the maximum codepoint allowed in unicode escapes is `10ffff`" + .to_owned(), + ], + Vec::new(), + ), + ReasonWhy::DoubleInSingle => Diagnostic::new( + "bad_string_escape", + "double quotes do not have to be escaped when inside single quoted strings" + .to_owned(), + Label::new(sequence.range.to_owned()), + ), + ReasonWhy::SingleInDouble => Diagnostic::new( + "bad_string_escape", + "single quotes do not have to be escaped when inside double quoted strings" + .to_owned(), + Label::new(sequence.range.to_owned()), + ), + }) + .collect() + } + + fn severity(&self) -> Severity { + Severity::Warning + } + + fn rule_type(&self) -> RuleType { + RuleType::Correctness + } +} + +struct BadStringEscapeVisitor { + sequences: Vec, + roblox: bool, +} + +struct StringEscapeSequence { + range: (usize, usize), + issue: ReasonWhy, +} + +impl Visitor<'_> for BadStringEscapeVisitor { + fn visit_value(&mut self, node: &ast::Value) { + if_chain::if_chain! { + if let ast::Value::String(token) = node; + if let tokenizer::TokenType::StringLiteral { literal, multi_line, quote_type } = token.token_type(); + if multi_line.is_none(); + then { + let quote_type = *quote_type; + let value_start = node.range().unwrap().0.bytes(); + + for captures in STRING_ESCAPE_REGEX.captures_iter(literal) { + let start = value_start + captures.get(1).unwrap().start(); + + match &captures[1] { + "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" => {}, + "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" => { + if captures[2].len() > 1 { + let hundreds = u16::from_str_radix(&captures[1], 10).unwrap() * 100; + let tens = u16::from_str_radix(&captures[2][1..2], 10).unwrap(); + if hundreds + tens > 0xff { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + 4), + issue: ReasonWhy::DecimalTooHigh, + } + ); + } + } + }, + "\"" => { + if quote_type == tokenizer::StringLiteralQuoteType::Single { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + 2), + issue: ReasonWhy::DoubleInSingle, + } + ); + } + }, + "'" => { + if quote_type == tokenizer::StringLiteralQuoteType::Double { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + 2), + issue: ReasonWhy::SingleInDouble, + } + ); + } + }, + "z" => { + if !self.roblox { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + 2), + issue: ReasonWhy::Invalid, + } + ); + } + }, + "x" => { + if !self.roblox { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + 2), + issue: ReasonWhy::Invalid, + } + ); + continue; + } + let second_capture_len = captures[2].len(); + if second_capture_len != 2 { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + second_capture_len + 2), + issue: ReasonWhy::Malformed + } + ); + } + }, + "u{" => { + if !self.roblox { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + 2), + issue: ReasonWhy::Invalid, + } + ); + continue; + } + let second_capture_len = captures[2].len(); + if captures[3].len() == 0 { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + second_capture_len + 3), + issue: ReasonWhy::Malformed, + } + ); + continue; + } + let codepoint = u32::from_str_radix(&captures[2], 16).unwrap_or(0x110000); + if codepoint > 0x10ffff { + self.sequences.push( + StringEscapeSequence { + range: (start, start + second_capture_len + 4), + issue: ReasonWhy::CodepointTooHigh, + } + ); + } + }, + _ => { + self.sequences.push( + StringEscapeSequence{ + range: (start, start + 2), + issue: ReasonWhy::Invalid, + } + ); + }, + } + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::{super::test_util::test_lint, *}; + + #[test] + fn test_bad_string_escape() { + test_lint( + BadStringEscapeLint::new(()).unwrap(), + "bad_string_escape", + "lua51_string_escapes", + ); + } + + #[test] + #[cfg(feature = "roblox")] + fn test_bad_string_escape_roblox() { + test_lint( + BadStringEscapeLint::new(()).unwrap(), + "bad_string_escape", + "roblox_string_escapes", + ); + } +} diff --git a/selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.lua b/selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.lua new file mode 100644 index 00000000..6d113668 --- /dev/null +++ b/selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.lua @@ -0,0 +1,34 @@ +local x = "\a" +local x = "\b" +local x = "\f" +local x = "\n" +local x = "\r" +local x = "\t" +local x = "\v" +local x = "\"" +local x = "\\" + +local x = "\'\"" + +local x = '\"\'' + +local x = "\97" + +-- local x = "\ -- Full-moon doesn't like this style of string +-- " + +local bad = "\z" + +local bad2 = "\x1\x10" + +local bad3 = "\u{1337}\u{1234\u{1337}" + +local bad4 = "\u{10ffff}\u{110000}" + +local bad5 = "\m" + +local bad6 = "\999" + +local bad7 = "\u{ffffffffff}" + +local good = [[\z\x1\u{1234\u{110000}\m\"\'\999]] \ No newline at end of file diff --git a/selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.stderr b/selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.stderr new file mode 100644 index 00000000..d0f12add --- /dev/null +++ b/selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.stderr @@ -0,0 +1,105 @@ +error[bad_string_escape]: single quotes do not have to be escaped when inside double quoted strings + + ┌── lua51_string_escapes.lua:11:12 ─── + │ + 11 │ local x = "\'\"" + │ ^^ + │ + +error[bad_string_escape]: double quotes do not have to be escaped when inside single quoted strings + + ┌── lua51_string_escapes.lua:13:12 ─── + │ + 13 │ local x = '\"\'' + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:20:14 ─── + │ + 20 │ local bad = "\z" + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:22:15 ─── + │ + 22 │ local bad2 = "\x1\x10" + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:22:18 ─── + │ + 22 │ local bad2 = "\x1\x10" + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:24:15 ─── + │ + 24 │ local bad3 = "\u{1337}\u{1234\u{1337}" + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:24:23 ─── + │ + 24 │ local bad3 = "\u{1337}\u{1234\u{1337}" + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:24:30 ─── + │ + 24 │ local bad3 = "\u{1337}\u{1234\u{1337}" + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:26:15 ─── + │ + 26 │ local bad4 = "\u{10ffff}\u{110000}" + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:26:25 ─── + │ + 26 │ local bad4 = "\u{10ffff}\u{110000}" + │ ^^ + │ + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:28:15 ─── + │ + 28 │ local bad5 = "\m" + │ ^^ + │ + +error[bad_string_escape]: decimal escape is too high + + ┌── lua51_string_escapes.lua:30:15 ─── + │ + 30 │ local bad6 = "\999" + │ ^^^^ + │ + = help: the maximum codepoint allowed in decimal escapes is `255` + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── lua51_string_escapes.lua:32:15 ─── + │ + 32 │ local bad7 = "\u{ffffffffff}" + │ ^^ + │ + diff --git a/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.lua b/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.lua new file mode 100644 index 00000000..6d113668 --- /dev/null +++ b/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.lua @@ -0,0 +1,34 @@ +local x = "\a" +local x = "\b" +local x = "\f" +local x = "\n" +local x = "\r" +local x = "\t" +local x = "\v" +local x = "\"" +local x = "\\" + +local x = "\'\"" + +local x = '\"\'' + +local x = "\97" + +-- local x = "\ -- Full-moon doesn't like this style of string +-- " + +local bad = "\z" + +local bad2 = "\x1\x10" + +local bad3 = "\u{1337}\u{1234\u{1337}" + +local bad4 = "\u{10ffff}\u{110000}" + +local bad5 = "\m" + +local bad6 = "\999" + +local bad7 = "\u{ffffffffff}" + +local good = [[\z\x1\u{1234\u{110000}\m\"\'\999]] \ No newline at end of file diff --git a/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.std.toml b/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.std.toml new file mode 100644 index 00000000..98886350 --- /dev/null +++ b/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.std.toml @@ -0,0 +1,2 @@ +[selene] +name = "roblox" diff --git a/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.stderr b/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.stderr new file mode 100644 index 00000000..12d510c7 --- /dev/null +++ b/selene-lib/tests/lints/bad_string_escape/roblox_string_escapes.stderr @@ -0,0 +1,67 @@ +error[bad_string_escape]: single quotes do not have to be escaped when inside double quoted strings + + ┌── roblox_string_escapes.lua:11:12 ─── + │ + 11 │ local x = "\'\"" + │ ^^ + │ + +error[bad_string_escape]: double quotes do not have to be escaped when inside single quoted strings + + ┌── roblox_string_escapes.lua:13:12 ─── + │ + 13 │ local x = '\"\'' + │ ^^ + │ + +error[bad_string_escape]: string escape sequence is malformed + + ┌── roblox_string_escapes.lua:22:15 ─── + │ + 22 │ local bad2 = "\x1\x10" + │ ^^^ + │ + +error[bad_string_escape]: string escape sequence is malformed + + ┌── roblox_string_escapes.lua:24:23 ─── + │ + 24 │ local bad3 = "\u{1337}\u{1234\u{1337}" + │ ^^^^^^^ + │ + +error[bad_string_escape]: unicode codepoint is too high for this escape sequence + + ┌── roblox_string_escapes.lua:26:25 ─── + │ + 26 │ local bad4 = "\u{10ffff}\u{110000}" + │ ^^^^^^^^^^ + │ + = help: the maximum codepoint allowed in unicode escapes is `10ffff` + +error[bad_string_escape]: string escape sequence doesn't exist + + ┌── roblox_string_escapes.lua:28:15 ─── + │ + 28 │ local bad5 = "\m" + │ ^^ + │ + +error[bad_string_escape]: decimal escape is too high + + ┌── roblox_string_escapes.lua:30:15 ─── + │ + 30 │ local bad6 = "\999" + │ ^^^^ + │ + = help: the maximum codepoint allowed in decimal escapes is `255` + +error[bad_string_escape]: unicode codepoint is too high for this escape sequence + + ┌── roblox_string_escapes.lua:32:15 ─── + │ + 32 │ local bad7 = "\u{ffffffffff}" + │ ^^^^^^^^^^^^^^ + │ + = help: the maximum codepoint allowed in unicode escapes is `10ffff` +