-
-
Notifications
You must be signed in to change notification settings - Fork 77
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add lint to guard against bad escape sequences (#146)
* Add lint to guard against bad escape sequences closes #46 * Don't mention feature flag in bad_string_escape docs * Change ReasonWhy enum to be in alphabetical order * Add spacing around operators * Clean up nonsense with pointers: - Dereferenced quote_type - Swap to using `str` instead of `&str` where possible
- Loading branch information
Showing
10 changed files
with
528 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# bad_string_escape | ||
## What it does | ||
Checks for invalid, malformed, or unnecessary string escape sequences. | ||
|
||
## Why this is bad | ||
Invalid string escapes don't do anything, so should obviously be caught in dealt with. Additionally, in double strings, you shouldn't escape single quote strings since it makes the string less readable. Same with single quote strings and double quotes. | ||
|
||
In some cases (specifically `\x` and `\u` in a Roblox codebase) it's possible to write an escape sequence that looks right but doesn't work when ran. Because this is probably not intentional, they are caught by this lint. | ||
|
||
## Example | ||
```lua | ||
print("\m") -- This escape sequence is invalid. | ||
|
||
print("don\'t") -- This escape makes the string less readable than `don't` | ||
|
||
print('\"foo\"') -- This escape makes the string less readable than `"foo"` | ||
``` | ||
|
||
In Roblox: | ||
```lua | ||
print("\x1") -- This escape sequence is malformed (\x expects two hex digits after it) | ||
|
||
print("\u{1234") -- This escape sequence is *also* malformed (\u needs a closing bracket) | ||
|
||
print("\u{110000}") -- This escape sequence is invalid because the max codepoint passed to \u is `10ffff`. | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,257 @@ | ||
use super::*; | ||
use std::convert::Infallible; | ||
|
||
use full_moon::{ | ||
ast::{self, Ast}, | ||
tokenizer, | ||
visitors::Visitor, | ||
}; | ||
use regex::Regex; | ||
|
||
lazy_static::lazy_static! { | ||
static ref STRING_ESCAPE_REGEX: Regex = Regex::new(r"\\(u\{|.)([\da-fA-F]*)(\}?)").unwrap(); | ||
} | ||
|
||
enum ReasonWhy { | ||
CodepointTooHigh, | ||
DecimalTooHigh, | ||
DoubleInSingle, | ||
Invalid, | ||
Malformed, | ||
SingleInDouble, | ||
} | ||
|
||
pub struct BadStringEscapeLint; | ||
|
||
impl Rule for BadStringEscapeLint { | ||
type Config = (); | ||
type Error = Infallible; | ||
|
||
fn new(_: Self::Config) -> Result<Self, Self::Error> { | ||
Ok(BadStringEscapeLint) | ||
} | ||
|
||
fn pass(&self, ast: &Ast, context: &Context) -> Vec<Diagnostic> { | ||
let mut visitor = BadStringEscapeVisitor { | ||
sequences: Vec::new(), | ||
roblox: context.is_roblox(), | ||
}; | ||
|
||
visitor.visit_ast(&ast); | ||
|
||
visitor | ||
.sequences | ||
.iter() | ||
.map(|sequence| match sequence.issue { | ||
ReasonWhy::Invalid => Diagnostic::new( | ||
"bad_string_escape", | ||
"string escape sequence doesn't exist".to_owned(), | ||
Label::new(sequence.range.to_owned()), | ||
), | ||
ReasonWhy::Malformed => Diagnostic::new( | ||
"bad_string_escape", | ||
"string escape sequence is malformed".to_owned(), | ||
Label::new(sequence.range.to_owned()), | ||
), | ||
ReasonWhy::DecimalTooHigh => Diagnostic::new_complete( | ||
"bad_string_escape", | ||
"decimal escape is too high".to_owned(), | ||
Label::new(sequence.range.to_owned()), | ||
vec![ | ||
"help: the maximum codepoint allowed in decimal escapes is `255`" | ||
.to_owned(), | ||
], | ||
Vec::new(), | ||
), | ||
ReasonWhy::CodepointTooHigh => Diagnostic::new_complete( | ||
"bad_string_escape", | ||
"unicode codepoint is too high for this escape sequence".to_owned(), | ||
Label::new(sequence.range.to_owned()), | ||
vec![ | ||
"help: the maximum codepoint allowed in unicode escapes is `10ffff`" | ||
.to_owned(), | ||
], | ||
Vec::new(), | ||
), | ||
ReasonWhy::DoubleInSingle => Diagnostic::new( | ||
"bad_string_escape", | ||
"double quotes do not have to be escaped when inside single quoted strings" | ||
.to_owned(), | ||
Label::new(sequence.range.to_owned()), | ||
), | ||
ReasonWhy::SingleInDouble => Diagnostic::new( | ||
"bad_string_escape", | ||
"single quotes do not have to be escaped when inside double quoted strings" | ||
.to_owned(), | ||
Label::new(sequence.range.to_owned()), | ||
), | ||
}) | ||
.collect() | ||
} | ||
|
||
fn severity(&self) -> Severity { | ||
Severity::Warning | ||
} | ||
|
||
fn rule_type(&self) -> RuleType { | ||
RuleType::Correctness | ||
} | ||
} | ||
|
||
struct BadStringEscapeVisitor { | ||
sequences: Vec<StringEscapeSequence>, | ||
roblox: bool, | ||
} | ||
|
||
struct StringEscapeSequence { | ||
range: (usize, usize), | ||
issue: ReasonWhy, | ||
} | ||
|
||
impl Visitor<'_> for BadStringEscapeVisitor { | ||
fn visit_value(&mut self, node: &ast::Value) { | ||
if_chain::if_chain! { | ||
if let ast::Value::String(token) = node; | ||
if let tokenizer::TokenType::StringLiteral { literal, multi_line, quote_type } = token.token_type(); | ||
if multi_line.is_none(); | ||
then { | ||
let quote_type = *quote_type; | ||
let value_start = node.range().unwrap().0.bytes(); | ||
|
||
for captures in STRING_ESCAPE_REGEX.captures_iter(literal) { | ||
let start = value_start + captures.get(1).unwrap().start(); | ||
|
||
match &captures[1] { | ||
"a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" => {}, | ||
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" => { | ||
if captures[2].len() > 1 { | ||
let hundreds = u16::from_str_radix(&captures[1], 10).unwrap() * 100; | ||
let tens = u16::from_str_radix(&captures[2][1..2], 10).unwrap(); | ||
if hundreds + tens > 0xff { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + 4), | ||
issue: ReasonWhy::DecimalTooHigh, | ||
} | ||
); | ||
} | ||
} | ||
}, | ||
"\"" => { | ||
if quote_type == tokenizer::StringLiteralQuoteType::Single { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + 2), | ||
issue: ReasonWhy::DoubleInSingle, | ||
} | ||
); | ||
} | ||
}, | ||
"'" => { | ||
if quote_type == tokenizer::StringLiteralQuoteType::Double { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + 2), | ||
issue: ReasonWhy::SingleInDouble, | ||
} | ||
); | ||
} | ||
}, | ||
"z" => { | ||
if !self.roblox { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + 2), | ||
issue: ReasonWhy::Invalid, | ||
} | ||
); | ||
} | ||
}, | ||
"x" => { | ||
if !self.roblox { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + 2), | ||
issue: ReasonWhy::Invalid, | ||
} | ||
); | ||
continue; | ||
} | ||
let second_capture_len = captures[2].len(); | ||
if second_capture_len != 2 { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + second_capture_len + 2), | ||
issue: ReasonWhy::Malformed | ||
} | ||
); | ||
} | ||
}, | ||
"u{" => { | ||
if !self.roblox { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + 2), | ||
issue: ReasonWhy::Invalid, | ||
} | ||
); | ||
continue; | ||
} | ||
let second_capture_len = captures[2].len(); | ||
if captures[3].len() == 0 { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + second_capture_len + 3), | ||
issue: ReasonWhy::Malformed, | ||
} | ||
); | ||
continue; | ||
} | ||
let codepoint = u32::from_str_radix(&captures[2], 16).unwrap_or(0x110000); | ||
if codepoint > 0x10ffff { | ||
self.sequences.push( | ||
StringEscapeSequence { | ||
range: (start, start + second_capture_len + 4), | ||
issue: ReasonWhy::CodepointTooHigh, | ||
} | ||
); | ||
} | ||
}, | ||
_ => { | ||
self.sequences.push( | ||
StringEscapeSequence{ | ||
range: (start, start + 2), | ||
issue: ReasonWhy::Invalid, | ||
} | ||
); | ||
}, | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::{super::test_util::test_lint, *}; | ||
|
||
#[test] | ||
fn test_bad_string_escape() { | ||
test_lint( | ||
BadStringEscapeLint::new(()).unwrap(), | ||
"bad_string_escape", | ||
"lua51_string_escapes", | ||
); | ||
} | ||
|
||
#[test] | ||
#[cfg(feature = "roblox")] | ||
fn test_bad_string_escape_roblox() { | ||
test_lint( | ||
BadStringEscapeLint::new(()).unwrap(), | ||
"bad_string_escape", | ||
"roblox_string_escapes", | ||
); | ||
} | ||
} |
34 changes: 34 additions & 0 deletions
34
selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.lua
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
local x = "\a" | ||
local x = "\b" | ||
local x = "\f" | ||
local x = "\n" | ||
local x = "\r" | ||
local x = "\t" | ||
local x = "\v" | ||
local x = "\"" | ||
local x = "\\" | ||
|
||
local x = "\'\"" | ||
|
||
local x = '\"\'' | ||
|
||
local x = "\97" | ||
|
||
-- local x = "\ -- Full-moon doesn't like this style of string | ||
-- " | ||
|
||
local bad = "\z" | ||
|
||
local bad2 = "\x1\x10" | ||
|
||
local bad3 = "\u{1337}\u{1234\u{1337}" | ||
|
||
local bad4 = "\u{10ffff}\u{110000}" | ||
|
||
local bad5 = "\m" | ||
|
||
local bad6 = "\999" | ||
|
||
local bad7 = "\u{ffffffffff}" | ||
|
||
local good = [[\z\x1\u{1234\u{110000}\m\"\'\999]] |
Oops, something went wrong.