Skip to content

Commit

Permalink
Rollup merge of rust-lang#134192 - nnethercote:rm-Lexer-Parser-dep, r…
Browse files Browse the repository at this point in the history
…=compiler-errors

Remove `Lexer`'s dependency on `Parser`.

Lexing precedes parsing, as you'd expect: `Lexer` creates a `TokenStream` and `Parser` then parses that `TokenStream`.

But, in a horrendous violation of layering abstractions and common sense, `Lexer` depends on `Parser`! The `Lexer::unclosed_delim_err` method does some error recovery that relies on creating a `Parser` to do some post-processing of the `TokenStream` that the `Lexer` just created.

This commit just removes `unclosed_delim_err`. This change removes `Lexer`'s dependency on `Parser`, and also means that `lex_token_tree`'s return value can have a more typical form.

The cost is slightly worse error messages in two obscure cases, as shown in these tests:
- tests/ui/parser/brace-in-let-chain.rs: there is slightly less explanation in this case involving an extra `{`.
- tests/ui/parser/diff-markers/unclosed-delims{,-in-macro}.rs: the diff marker detection is no longer supported (because that detection is implemented in the parser).

In my opinion this cost is outweighed by the magnitude of the code cleanup.

r? ```@chenyukang```
  • Loading branch information
matthiaskrgr authored Dec 13, 2024
2 parents 7d1aba6 + 2e412fe commit af2f25b
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 174 deletions.
40 changes: 23 additions & 17 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,30 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
token: Token::dummy(),
diag_info: TokenTreeDiagInfo::default(),
};
let (_open_spacing, stream, res) = lexer.lex_token_trees(/* is_delimited */ false);
let unmatched_delims = lexer.diag_info.unmatched_delims;

if res.is_ok() && unmatched_delims.is_empty() {
Ok(stream)
} else {
// Return error if there are unmatched delimiters or unclosed delimiters.
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
// because the delimiter mismatch is more likely to be the root cause of error
let mut buffer: Vec<_> = unmatched_delims
.into_iter()
.filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
.collect();
if let Err(errs) = res {
// Add unclosing delimiter or diff marker errors
buffer.extend(errs);
let res = lexer.lex_token_trees(/* is_delimited */ false);

let mut unmatched_delims: Vec<_> = lexer
.diag_info
.unmatched_delims
.into_iter()
.filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
.collect();

match res {
Ok((_open_spacing, stream)) => {
if unmatched_delims.is_empty() {
Ok(stream)
} else {
// Return error if there are unmatched delimiters or unclosed delimiters.
Err(unmatched_delims)
}
}
Err(errs) => {
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
// because the delimiter mismatch is more likely to be the root cause of error
unmatched_delims.extend(errs);
Err(unmatched_delims)
}
Err(buffer)
}
}

Expand Down
94 changes: 14 additions & 80 deletions compiler/rustc_parse/src/lexer/tokentrees.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
use rustc_ast::token::{self, Delimiter, Token};
use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
use rustc_ast_pretty::pprust::token_to_string;
use rustc_errors::{Applicability, Diag};
use rustc_span::symbol::kw;
use rustc_errors::Diag;

use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level};
use super::{Lexer, UnmatchedDelim};
use crate::Parser;

impl<'psess, 'src> Lexer<'psess, 'src> {
// Lex into a token stream. The `Spacing` in the result is that of the
// opening delimiter.
pub(super) fn lex_token_trees(
&mut self,
is_delimited: bool,
) -> (Spacing, TokenStream, Result<(), Vec<Diag<'psess>>>) {
) -> Result<(Spacing, TokenStream), Vec<Diag<'psess>>> {
// Move past the opening delimiter.
let open_spacing = self.bump_minimal();

Expand All @@ -27,25 +25,25 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
buf.push(match self.lex_token_tree_open_delim(delim) {
Ok(val) => val,
Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)),
Err(errs) => return Err(errs),
})
}
token::CloseDelim(delim) => {
// Invisible delimiters cannot occur here because `TokenTreesReader` parses
// code directly from strings, with no macro expansion involved.
debug_assert!(!matches!(delim, Delimiter::Invisible(_)));
return (
open_spacing,
TokenStream::new(buf),
if is_delimited { Ok(()) } else { Err(vec![self.close_delim_err(delim)]) },
);
return if is_delimited {
Ok((open_spacing, TokenStream::new(buf)))
} else {
Err(vec![self.close_delim_err(delim)])
};
}
token::Eof => {
return (
open_spacing,
TokenStream::new(buf),
if is_delimited { Err(vec![self.eof_err()]) } else { Ok(()) },
);
return if is_delimited {
Err(vec![self.eof_err()])
} else {
Ok((open_spacing, TokenStream::new(buf)))
};
}
_ => {
// Get the next normal token.
Expand Down Expand Up @@ -107,10 +105,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
// Lex the token trees within the delimiters.
// We stop at any delimiter so we can try to recover if the user
// uses an incorrect delimiter.
let (open_spacing, tts, res) = self.lex_token_trees(/* is_delimited */ true);
if let Err(errs) = res {
return Err(self.unclosed_delim_err(tts, errs));
}
let (open_spacing, tts) = self.lex_token_trees(/* is_delimited */ true)?;

// Expand to cover the entire delimited token tree.
let delim_span = DelimSpan::from_pair(pre_span, self.token.span);
Expand Down Expand Up @@ -247,67 +242,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
this_spacing
}

fn unclosed_delim_err(
&mut self,
tts: TokenStream,
mut errs: Vec<Diag<'psess>>,
) -> Vec<Diag<'psess>> {
// If there are unclosed delims, see if there are diff markers and if so, point them
// out instead of complaining about the unclosed delims.
let mut parser = Parser::new(self.psess, tts, None);
let mut diff_errs = vec![];
// Suggest removing a `{` we think appears in an `if`/`while` condition.
// We want to suggest removing a `{` only if we think we're in an `if`/`while` condition,
// but we have no way of tracking this in the lexer itself, so we piggyback on the parser.
let mut in_cond = false;
while parser.token != token::Eof {
if let Err(diff_err) = parser.err_vcs_conflict_marker() {
diff_errs.push(diff_err);
} else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) {
in_cond = true;
} else if matches!(
parser.token.kind,
token::CloseDelim(Delimiter::Brace) | token::FatArrow
) {
// End of the `if`/`while` body, or the end of a `match` guard.
in_cond = false;
} else if in_cond && parser.token == token::OpenDelim(Delimiter::Brace) {
// Store the `&&` and `let` to use their spans later when creating the diagnostic
let maybe_andand = parser.look_ahead(1, |t| t.clone());
let maybe_let = parser.look_ahead(2, |t| t.clone());
if maybe_andand == token::OpenDelim(Delimiter::Brace) {
// This might be the beginning of the `if`/`while` body (i.e., the end of the
// condition).
in_cond = false;
} else if maybe_andand == token::AndAnd && maybe_let.is_keyword(kw::Let) {
let mut err = parser.dcx().struct_span_err(
parser.token.span,
"found a `{` in the middle of a let-chain",
);
err.span_suggestion(
parser.token.span,
"consider removing this brace to parse the `let` as part of the same chain",
"",
Applicability::MachineApplicable,
);
err.span_label(
maybe_andand.span.to(maybe_let.span),
"you might have meant to continue the let-chain here",
);
errs.push(err);
}
}
parser.bump();
}
if !diff_errs.is_empty() {
for err in errs {
err.cancel();
}
return diff_errs;
}
errs
}

fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> {
// An unexpected closing delimiter (i.e., there is no matching opening delimiter).
let token_str = token_to_string(&self.token);
Expand Down
4 changes: 2 additions & 2 deletions tests/ui/parser/brace-in-let-chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
#![feature(let_chains)]
fn main() {
if let () = ()
&& let () = () { //~ERROR: found a `{` in the middle of a let-chain
&& let () = () {
&& let () = ()
{
}
}

fn quux() {
while let () = ()
&& let () = () { //~ERROR: found a `{` in the middle of a let-chain
&& let () = () {
&& let () = ()
{
}
Expand Down
30 changes: 1 addition & 29 deletions tests/ui/parser/brace-in-let-chain.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -27,33 +27,5 @@ LL | }
LL | }
| ^

error: found a `{` in the middle of a let-chain
--> $DIR/brace-in-let-chain.rs:14:24
|
LL | && let () = () {
| ^
LL | && let () = ()
| ------ you might have meant to continue the let-chain here
|
help: consider removing this brace to parse the `let` as part of the same chain
|
LL - && let () = () {
LL + && let () = ()
|

error: found a `{` in the middle of a let-chain
--> $DIR/brace-in-let-chain.rs:6:24
|
LL | && let () = () {
| ^
LL | && let () = ()
| ------ you might have meant to continue the let-chain here
|
help: consider removing this brace to parse the `let` as part of the same chain
|
LL - && let () = () {
LL + && let () = ()
|

error: aborting due to 3 previous errors
error: aborting due to 1 previous error

6 changes: 4 additions & 2 deletions tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
// The diff marker detection was removed for this example, because it relied on
// the lexer having a dependency on the parser, which was horrible.

macro_rules! foo {
<<<<<<< HEAD
//~^ ERROR encountered diff marker
() {
=======
() { //
>>>>>>> 7a4f13c blah blah blah
}
}
} //~ this file contains an unclosed delimiter
27 changes: 10 additions & 17 deletions tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
error: encountered diff marker
--> $DIR/unclosed-delims-in-macro.rs:2:1
error: this file contains an unclosed delimiter
--> $DIR/unclosed-delims-in-macro.rs:11:48
|
LL | macro_rules! foo {
| - unclosed delimiter
LL | <<<<<<< HEAD
| ^^^^^^^ between this marker and `=======` is the code that we're merging into
LL | () {
| - this delimiter might not be properly closed...
...
LL | =======
| ------- between this marker and `>>>>>>>` is the incoming code
LL | () { //
LL | >>>>>>> 7a4f13c blah blah blah
| ^^^^^^^ this marker concludes the conflict region
|
= note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts
to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers
= help: if you're having merge conflicts after pulling new code:
the top section is the code you already had and the bottom section is the remote code
if you're in the middle of a rebase:
the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased
= note: for an explanation on these markers from the `git` documentation:
visit <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts>
LL | }
| - ^
| |
| ...as it matches this but it has different indentation

error: aborting due to 1 previous error

14 changes: 4 additions & 10 deletions tests/ui/parser/diff-markers/unclosed-delims.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
// The diff marker detection was removed for this example, because it relied on
// the lexer having a dependency on the parser, which was horrible.

mod tests {
#[test]
<<<<<<< HEAD
//~^ ERROR encountered diff marker
//~| NOTE between this marker and `=======`

//~| NOTE conflict markers indicate that
//~| HELP if you're having merge conflicts
//~| NOTE for an explanation on these markers

fn test1() {
=======
//~^ NOTE between this marker and `>>>>>>>`
fn test2() {
>>>>>>> 7a4f13c blah blah blah
//~^ NOTE this marker concludes the conflict region
}
}
} //~ this file contains an unclosed delimiter
27 changes: 10 additions & 17 deletions tests/ui/parser/diff-markers/unclosed-delims.stderr
Original file line number Diff line number Diff line change
@@ -1,23 +1,16 @@
error: encountered diff marker
--> $DIR/unclosed-delims.rs:3:1
error: this file contains an unclosed delimiter
--> $DIR/unclosed-delims.rs:12:48
|
LL | <<<<<<< HEAD
| ^^^^^^^ between this marker and `=======` is the code that we're merging into
LL | mod tests {
| - unclosed delimiter
...
LL | =======
| ------- between this marker and `>>>>>>>` is the incoming code
LL | fn test1() {
| - this delimiter might not be properly closed...
...
LL | >>>>>>> 7a4f13c blah blah blah
| ^^^^^^^ this marker concludes the conflict region
|
= note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts
to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers
= help: if you're having merge conflicts after pulling new code:
the top section is the code you already had and the bottom section is the remote code
if you're in the middle of a rebase:
the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased
= note: for an explanation on these markers from the `git` documentation:
visit <https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging#_checking_out_conflicts>
LL | }
| - ^
| |
| ...as it matches this but it has different indentation

error: aborting due to 1 previous error

0 comments on commit af2f25b

Please sign in to comment.