Skip to content

Commit

Permalink
Add lint to guard against bad escape sequences (#146)
Browse files Browse the repository at this point in the history
* Add lint to guard against bad escape sequences

closes #46

* Don't mention feature flag in bad_string_escape docs

* Change ReasonWhy enum to be in alphabetical order

* Add spacing around operators

* Clean up nonsense with pointers:
 - Dereferenced quote_type
 - Swap to using `str` instead of `&str` where possible
  • Loading branch information
Dekkonot authored Aug 18, 2020
1 parent 7700dc0 commit 2a0f501
Show file tree
Hide file tree
Showing 10 changed files with 528 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- Added `RaycastParams.new`.
- Added support for `string.pack`, `string.packsize`, and `string.unpack` to the Roblox standard library.
- Added lint `compare_nan` to guard against comparing directly to nan (e.g. `x ~= 0/0`).
- Add lint `bad_string_escape` to guard invalid or malformed string escape sequences.

### Fixed
- Fixed `coroutine.yield` only accepting coroutines as a first argument.
Expand Down
26 changes: 26 additions & 0 deletions docs/src/lints/bad_string_escape.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# bad_string_escape
## What it does
Checks for invalid, malformed, or unnecessary string escape sequences.

## Why this is bad
Invalid string escapes don't do anything, so should obviously be caught in dealt with. Additionally, in double strings, you shouldn't escape single quote strings since it makes the string less readable. Same with single quote strings and double quotes.

In some cases (specifically `\x` and `\u` in a Roblox codebase) it's possible to write an escape sequence that looks right but doesn't work when ran. Because this is probably not intentional, they are caught by this lint.

## Example
```lua
print("\m") -- This escape sequence is invalid.

print("don\'t") -- This escape makes the string less readable than `don't`

print('\"foo\"') -- This escape makes the string less readable than `"foo"`
```

In Roblox:
```lua
print("\x1") -- This escape sequence is malformed (\x expects two hex digits after it)

print("\u{1234") -- This escape sequence is *also* malformed (\u needs a closing bracket)

print("\u{110000}") -- This escape sequence is invalid because the max codepoint passed to \u is `10ffff`.
```
1 change: 1 addition & 0 deletions selene-lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ pub struct CheckerDiagnostic {

use_rules! {
almost_swapped: rules::almost_swapped::AlmostSwappedLint,
bad_string_escape: rules::bad_string_escape::BadStringEscapeLint,
compare_nan: rules::compare_nan::CompareNanLint,
divide_by_zero: rules::divide_by_zero::DivideByZeroLint,
empty_if: rules::empty_if::EmptyIfLint,
Expand Down
1 change: 1 addition & 0 deletions selene-lib/src/rules.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use full_moon::node::Node;
use serde::de::DeserializeOwned;

pub mod almost_swapped;
pub mod bad_string_escape;
pub mod compare_nan;
pub mod divide_by_zero;
pub mod empty_if;
Expand Down
257 changes: 257 additions & 0 deletions selene-lib/src/rules/bad_string_escape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
use super::*;
use std::convert::Infallible;

use full_moon::{
ast::{self, Ast},
tokenizer,
visitors::Visitor,
};
use regex::Regex;

lazy_static::lazy_static! {
static ref STRING_ESCAPE_REGEX: Regex = Regex::new(r"\\(u\{|.)([\da-fA-F]*)(\}?)").unwrap();
}

enum ReasonWhy {
CodepointTooHigh,
DecimalTooHigh,
DoubleInSingle,
Invalid,
Malformed,
SingleInDouble,
}

pub struct BadStringEscapeLint;

impl Rule for BadStringEscapeLint {
type Config = ();
type Error = Infallible;

fn new(_: Self::Config) -> Result<Self, Self::Error> {
Ok(BadStringEscapeLint)
}

fn pass(&self, ast: &Ast, context: &Context) -> Vec<Diagnostic> {
let mut visitor = BadStringEscapeVisitor {
sequences: Vec::new(),
roblox: context.is_roblox(),
};

visitor.visit_ast(&ast);

visitor
.sequences
.iter()
.map(|sequence| match sequence.issue {
ReasonWhy::Invalid => Diagnostic::new(
"bad_string_escape",
"string escape sequence doesn't exist".to_owned(),
Label::new(sequence.range.to_owned()),
),
ReasonWhy::Malformed => Diagnostic::new(
"bad_string_escape",
"string escape sequence is malformed".to_owned(),
Label::new(sequence.range.to_owned()),
),
ReasonWhy::DecimalTooHigh => Diagnostic::new_complete(
"bad_string_escape",
"decimal escape is too high".to_owned(),
Label::new(sequence.range.to_owned()),
vec![
"help: the maximum codepoint allowed in decimal escapes is `255`"
.to_owned(),
],
Vec::new(),
),
ReasonWhy::CodepointTooHigh => Diagnostic::new_complete(
"bad_string_escape",
"unicode codepoint is too high for this escape sequence".to_owned(),
Label::new(sequence.range.to_owned()),
vec![
"help: the maximum codepoint allowed in unicode escapes is `10ffff`"
.to_owned(),
],
Vec::new(),
),
ReasonWhy::DoubleInSingle => Diagnostic::new(
"bad_string_escape",
"double quotes do not have to be escaped when inside single quoted strings"
.to_owned(),
Label::new(sequence.range.to_owned()),
),
ReasonWhy::SingleInDouble => Diagnostic::new(
"bad_string_escape",
"single quotes do not have to be escaped when inside double quoted strings"
.to_owned(),
Label::new(sequence.range.to_owned()),
),
})
.collect()
}

fn severity(&self) -> Severity {
Severity::Warning
}

fn rule_type(&self) -> RuleType {
RuleType::Correctness
}
}

struct BadStringEscapeVisitor {
sequences: Vec<StringEscapeSequence>,
roblox: bool,
}

struct StringEscapeSequence {
range: (usize, usize),
issue: ReasonWhy,
}

impl Visitor<'_> for BadStringEscapeVisitor {
fn visit_value(&mut self, node: &ast::Value) {
if_chain::if_chain! {
if let ast::Value::String(token) = node;
if let tokenizer::TokenType::StringLiteral { literal, multi_line, quote_type } = token.token_type();
if multi_line.is_none();
then {
let quote_type = *quote_type;
let value_start = node.range().unwrap().0.bytes();

for captures in STRING_ESCAPE_REGEX.captures_iter(literal) {
let start = value_start + captures.get(1).unwrap().start();

match &captures[1] {
"a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" => {},
"0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" => {
if captures[2].len() > 1 {
let hundreds = u16::from_str_radix(&captures[1], 10).unwrap() * 100;
let tens = u16::from_str_radix(&captures[2][1..2], 10).unwrap();
if hundreds + tens > 0xff {
self.sequences.push(
StringEscapeSequence{
range: (start, start + 4),
issue: ReasonWhy::DecimalTooHigh,
}
);
}
}
},
"\"" => {
if quote_type == tokenizer::StringLiteralQuoteType::Single {
self.sequences.push(
StringEscapeSequence{
range: (start, start + 2),
issue: ReasonWhy::DoubleInSingle,
}
);
}
},
"'" => {
if quote_type == tokenizer::StringLiteralQuoteType::Double {
self.sequences.push(
StringEscapeSequence{
range: (start, start + 2),
issue: ReasonWhy::SingleInDouble,
}
);
}
},
"z" => {
if !self.roblox {
self.sequences.push(
StringEscapeSequence{
range: (start, start + 2),
issue: ReasonWhy::Invalid,
}
);
}
},
"x" => {
if !self.roblox {
self.sequences.push(
StringEscapeSequence{
range: (start, start + 2),
issue: ReasonWhy::Invalid,
}
);
continue;
}
let second_capture_len = captures[2].len();
if second_capture_len != 2 {
self.sequences.push(
StringEscapeSequence{
range: (start, start + second_capture_len + 2),
issue: ReasonWhy::Malformed
}
);
}
},
"u{" => {
if !self.roblox {
self.sequences.push(
StringEscapeSequence{
range: (start, start + 2),
issue: ReasonWhy::Invalid,
}
);
continue;
}
let second_capture_len = captures[2].len();
if captures[3].len() == 0 {
self.sequences.push(
StringEscapeSequence{
range: (start, start + second_capture_len + 3),
issue: ReasonWhy::Malformed,
}
);
continue;
}
let codepoint = u32::from_str_radix(&captures[2], 16).unwrap_or(0x110000);
if codepoint > 0x10ffff {
self.sequences.push(
StringEscapeSequence {
range: (start, start + second_capture_len + 4),
issue: ReasonWhy::CodepointTooHigh,
}
);
}
},
_ => {
self.sequences.push(
StringEscapeSequence{
range: (start, start + 2),
issue: ReasonWhy::Invalid,
}
);
},
}
}
}
}
}
}

#[cfg(test)]
mod tests {
use super::{super::test_util::test_lint, *};

#[test]
fn test_bad_string_escape() {
test_lint(
BadStringEscapeLint::new(()).unwrap(),
"bad_string_escape",
"lua51_string_escapes",
);
}

#[test]
#[cfg(feature = "roblox")]
fn test_bad_string_escape_roblox() {
test_lint(
BadStringEscapeLint::new(()).unwrap(),
"bad_string_escape",
"roblox_string_escapes",
);
}
}
34 changes: 34 additions & 0 deletions selene-lib/tests/lints/bad_string_escape/lua51_string_escapes.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
local x = "\a"
local x = "\b"
local x = "\f"
local x = "\n"
local x = "\r"
local x = "\t"
local x = "\v"
local x = "\""
local x = "\\"

local x = "\'\""

local x = '\"\''

local x = "\97"

-- local x = "\ -- Full-moon doesn't like this style of string
-- "

local bad = "\z"

local bad2 = "\x1\x10"

local bad3 = "\u{1337}\u{1234\u{1337}"

local bad4 = "\u{10ffff}\u{110000}"

local bad5 = "\m"

local bad6 = "\999"

local bad7 = "\u{ffffffffff}"

local good = [[\z\x1\u{1234\u{110000}\m\"\'\999]]
Loading

0 comments on commit 2a0f501

Please sign in to comment.