From 2e412fef75ff2f45186863a76fdd7c5a9ec1f018 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 12 Dec 2024 11:49:31 +1100 Subject: [PATCH] Remove `Lexer`'s dependency on `Parser`. Lexing precedes parsing, as you'd expect: `Lexer` creates a `TokenStream` and `Parser` then parses that `TokenStream`. But, in a horrendous violation of layering abstractions and common sense, `Lexer` depends on `Parser`! The `Lexer::unclosed_delim_err` method does some error recovery that relies on creating a `Parser` to do some post-processing of the `TokenStream` that the `Lexer` just created. This commit just removes `unclosed_delim_err`. This change removes `Lexer`'s dependency on `Parser`, and also means that `lex_token_tree`'s return value can have a more typical form. The cost is slightly worse error messages in two obscure cases, as shown in these tests: - tests/ui/parser/brace-in-let-chain.rs: there is slightly less explanation in this case involving an extra `{`. - tests/ui/parser/diff-markers/unclosed-delims{,-in-macro}.rs: the diff marker detection is no longer supported (because that detection is implemented in the parser). In my opinion this cost is outweighed by the magnitude of the code cleanup. --- compiler/rustc_parse/src/lexer/mod.rs | 40 ++++---- compiler/rustc_parse/src/lexer/tokentrees.rs | 94 +++---------------- tests/ui/parser/brace-in-let-chain.rs | 4 +- tests/ui/parser/brace-in-let-chain.stderr | 30 +----- .../diff-markers/unclosed-delims-in-macro.rs | 6 +- .../unclosed-delims-in-macro.stderr | 27 ++---- .../ui/parser/diff-markers/unclosed-delims.rs | 14 +-- .../diff-markers/unclosed-delims.stderr | 27 ++---- 8 files changed, 68 insertions(+), 174 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 2426eb81678e1..443ddfc94ec2b 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -69,24 +69,30 @@ pub(crate) fn lex_token_trees<'psess, 'src>( token: Token::dummy(), diag_info: TokenTreeDiagInfo::default(), }; - let (_open_spacing, stream, res) = lexer.lex_token_trees(/* is_delimited */ false); - let unmatched_delims = lexer.diag_info.unmatched_delims; - - if res.is_ok() && unmatched_delims.is_empty() { - Ok(stream) - } else { - // Return error if there are unmatched delimiters or unclosed delimiters. - // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch - // because the delimiter mismatch is more likely to be the root cause of error - let mut buffer: Vec<_> = unmatched_delims - .into_iter() - .filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess)) - .collect(); - if let Err(errs) = res { - // Add unclosing delimiter or diff marker errors - buffer.extend(errs); + let res = lexer.lex_token_trees(/* is_delimited */ false); + + let mut unmatched_delims: Vec<_> = lexer + .diag_info + .unmatched_delims + .into_iter() + .filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess)) + .collect(); + + match res { + Ok((_open_spacing, stream)) => { + if unmatched_delims.is_empty() { + Ok(stream) + } else { + // Return error if there are unmatched delimiters or unclosed delimiters. + Err(unmatched_delims) + } + } + Err(errs) => { + // We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch + // because the delimiter mismatch is more likely to be the root cause of error + unmatched_delims.extend(errs); + Err(unmatched_delims) } - Err(buffer) } } diff --git a/compiler/rustc_parse/src/lexer/tokentrees.rs b/compiler/rustc_parse/src/lexer/tokentrees.rs index ee38f16d4ecd5..b3f83a320241e 100644 --- a/compiler/rustc_parse/src/lexer/tokentrees.rs +++ b/compiler/rustc_parse/src/lexer/tokentrees.rs @@ -1,12 +1,10 @@ use rustc_ast::token::{self, Delimiter, Token}; use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree}; use rustc_ast_pretty::pprust::token_to_string; -use rustc_errors::{Applicability, Diag}; -use rustc_span::symbol::kw; +use rustc_errors::Diag; use super::diagnostics::{report_suspicious_mismatch_block, same_indentation_level}; use super::{Lexer, UnmatchedDelim}; -use crate::Parser; impl<'psess, 'src> Lexer<'psess, 'src> { // Lex into a token stream. The `Spacing` in the result is that of the @@ -14,7 +12,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { pub(super) fn lex_token_trees( &mut self, is_delimited: bool, - ) -> (Spacing, TokenStream, Result<(), Vec>>) { + ) -> Result<(Spacing, TokenStream), Vec>> { // Move past the opening delimiter. let open_spacing = self.bump_minimal(); @@ -27,25 +25,25 @@ impl<'psess, 'src> Lexer<'psess, 'src> { debug_assert!(!matches!(delim, Delimiter::Invisible(_))); buf.push(match self.lex_token_tree_open_delim(delim) { Ok(val) => val, - Err(errs) => return (open_spacing, TokenStream::new(buf), Err(errs)), + Err(errs) => return Err(errs), }) } token::CloseDelim(delim) => { // Invisible delimiters cannot occur here because `TokenTreesReader` parses // code directly from strings, with no macro expansion involved. debug_assert!(!matches!(delim, Delimiter::Invisible(_))); - return ( - open_spacing, - TokenStream::new(buf), - if is_delimited { Ok(()) } else { Err(vec![self.close_delim_err(delim)]) }, - ); + return if is_delimited { + Ok((open_spacing, TokenStream::new(buf))) + } else { + Err(vec![self.close_delim_err(delim)]) + }; } token::Eof => { - return ( - open_spacing, - TokenStream::new(buf), - if is_delimited { Err(vec![self.eof_err()]) } else { Ok(()) }, - ); + return if is_delimited { + Err(vec![self.eof_err()]) + } else { + Ok((open_spacing, TokenStream::new(buf))) + }; } _ => { // Get the next normal token. @@ -107,10 +105,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { // Lex the token trees within the delimiters. // We stop at any delimiter so we can try to recover if the user // uses an incorrect delimiter. - let (open_spacing, tts, res) = self.lex_token_trees(/* is_delimited */ true); - if let Err(errs) = res { - return Err(self.unclosed_delim_err(tts, errs)); - } + let (open_spacing, tts) = self.lex_token_trees(/* is_delimited */ true)?; // Expand to cover the entire delimited token tree. let delim_span = DelimSpan::from_pair(pre_span, self.token.span); @@ -247,67 +242,6 @@ impl<'psess, 'src> Lexer<'psess, 'src> { this_spacing } - fn unclosed_delim_err( - &mut self, - tts: TokenStream, - mut errs: Vec>, - ) -> Vec> { - // If there are unclosed delims, see if there are diff markers and if so, point them - // out instead of complaining about the unclosed delims. - let mut parser = Parser::new(self.psess, tts, None); - let mut diff_errs = vec![]; - // Suggest removing a `{` we think appears in an `if`/`while` condition. - // We want to suggest removing a `{` only if we think we're in an `if`/`while` condition, - // but we have no way of tracking this in the lexer itself, so we piggyback on the parser. - let mut in_cond = false; - while parser.token != token::Eof { - if let Err(diff_err) = parser.err_vcs_conflict_marker() { - diff_errs.push(diff_err); - } else if parser.is_keyword_ahead(0, &[kw::If, kw::While]) { - in_cond = true; - } else if matches!( - parser.token.kind, - token::CloseDelim(Delimiter::Brace) | token::FatArrow - ) { - // End of the `if`/`while` body, or the end of a `match` guard. - in_cond = false; - } else if in_cond && parser.token == token::OpenDelim(Delimiter::Brace) { - // Store the `&&` and `let` to use their spans later when creating the diagnostic - let maybe_andand = parser.look_ahead(1, |t| t.clone()); - let maybe_let = parser.look_ahead(2, |t| t.clone()); - if maybe_andand == token::OpenDelim(Delimiter::Brace) { - // This might be the beginning of the `if`/`while` body (i.e., the end of the - // condition). - in_cond = false; - } else if maybe_andand == token::AndAnd && maybe_let.is_keyword(kw::Let) { - let mut err = parser.dcx().struct_span_err( - parser.token.span, - "found a `{` in the middle of a let-chain", - ); - err.span_suggestion( - parser.token.span, - "consider removing this brace to parse the `let` as part of the same chain", - "", - Applicability::MachineApplicable, - ); - err.span_label( - maybe_andand.span.to(maybe_let.span), - "you might have meant to continue the let-chain here", - ); - errs.push(err); - } - } - parser.bump(); - } - if !diff_errs.is_empty() { - for err in errs { - err.cancel(); - } - return diff_errs; - } - errs - } - fn close_delim_err(&mut self, delim: Delimiter) -> Diag<'psess> { // An unexpected closing delimiter (i.e., there is no matching opening delimiter). let token_str = token_to_string(&self.token); diff --git a/tests/ui/parser/brace-in-let-chain.rs b/tests/ui/parser/brace-in-let-chain.rs index 1f34c73a2c3bd..2009bc88d9e8d 100644 --- a/tests/ui/parser/brace-in-let-chain.rs +++ b/tests/ui/parser/brace-in-let-chain.rs @@ -3,7 +3,7 @@ #![feature(let_chains)] fn main() { if let () = () - && let () = () { //~ERROR: found a `{` in the middle of a let-chain + && let () = () { && let () = () { } @@ -11,7 +11,7 @@ fn main() { fn quux() { while let () = () - && let () = () { //~ERROR: found a `{` in the middle of a let-chain + && let () = () { && let () = () { } diff --git a/tests/ui/parser/brace-in-let-chain.stderr b/tests/ui/parser/brace-in-let-chain.stderr index 913a34700dfc9..12af95c278688 100644 --- a/tests/ui/parser/brace-in-let-chain.stderr +++ b/tests/ui/parser/brace-in-let-chain.stderr @@ -27,33 +27,5 @@ LL | } LL | } | ^ -error: found a `{` in the middle of a let-chain - --> $DIR/brace-in-let-chain.rs:14:24 - | -LL | && let () = () { - | ^ -LL | && let () = () - | ------ you might have meant to continue the let-chain here - | -help: consider removing this brace to parse the `let` as part of the same chain - | -LL - && let () = () { -LL + && let () = () - | - -error: found a `{` in the middle of a let-chain - --> $DIR/brace-in-let-chain.rs:6:24 - | -LL | && let () = () { - | ^ -LL | && let () = () - | ------ you might have meant to continue the let-chain here - | -help: consider removing this brace to parse the `let` as part of the same chain - | -LL - && let () = () { -LL + && let () = () - | - -error: aborting due to 3 previous errors +error: aborting due to 1 previous error diff --git a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs index da1774acea542..41a7de03d4b79 100644 --- a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs +++ b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.rs @@ -1,9 +1,11 @@ +// The diff marker detection was removed for this example, because it relied on +// the lexer having a dependency on the parser, which was horrible. + macro_rules! foo { <<<<<<< HEAD - //~^ ERROR encountered diff marker () { ======= () { // >>>>>>> 7a4f13c blah blah blah } -} +} //~ this file contains an unclosed delimiter diff --git a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr index 927821ddfaedb..b33f2b5d1b8b2 100644 --- a/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr +++ b/tests/ui/parser/diff-markers/unclosed-delims-in-macro.stderr @@ -1,23 +1,16 @@ -error: encountered diff marker - --> $DIR/unclosed-delims-in-macro.rs:2:1 +error: this file contains an unclosed delimiter + --> $DIR/unclosed-delims-in-macro.rs:11:48 | +LL | macro_rules! foo { + | - unclosed delimiter LL | <<<<<<< HEAD - | ^^^^^^^ between this marker and `=======` is the code that we're merging into +LL | () { + | - this delimiter might not be properly closed... ... -LL | ======= - | ------- between this marker and `>>>>>>>` is the incoming code -LL | () { // -LL | >>>>>>> 7a4f13c blah blah blah - | ^^^^^^^ this marker concludes the conflict region - | - = note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts - to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers - = help: if you're having merge conflicts after pulling new code: - the top section is the code you already had and the bottom section is the remote code - if you're in the middle of a rebase: - the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased - = note: for an explanation on these markers from the `git` documentation: - visit +LL | } + | - ^ + | | + | ...as it matches this but it has different indentation error: aborting due to 1 previous error diff --git a/tests/ui/parser/diff-markers/unclosed-delims.rs b/tests/ui/parser/diff-markers/unclosed-delims.rs index 7d400c3827bb6..827c1eebb9d5f 100644 --- a/tests/ui/parser/diff-markers/unclosed-delims.rs +++ b/tests/ui/parser/diff-markers/unclosed-delims.rs @@ -1,18 +1,12 @@ +// The diff marker detection was removed for this example, because it relied on +// the lexer having a dependency on the parser, which was horrible. + mod tests { #[test] <<<<<<< HEAD -//~^ ERROR encountered diff marker -//~| NOTE between this marker and `=======` - -//~| NOTE conflict markers indicate that -//~| HELP if you're having merge conflicts -//~| NOTE for an explanation on these markers - fn test1() { ======= -//~^ NOTE between this marker and `>>>>>>>` fn test2() { >>>>>>> 7a4f13c blah blah blah -//~^ NOTE this marker concludes the conflict region } -} +} //~ this file contains an unclosed delimiter diff --git a/tests/ui/parser/diff-markers/unclosed-delims.stderr b/tests/ui/parser/diff-markers/unclosed-delims.stderr index 1eab96442b4f2..b2541aa47baed 100644 --- a/tests/ui/parser/diff-markers/unclosed-delims.stderr +++ b/tests/ui/parser/diff-markers/unclosed-delims.stderr @@ -1,23 +1,16 @@ -error: encountered diff marker - --> $DIR/unclosed-delims.rs:3:1 +error: this file contains an unclosed delimiter + --> $DIR/unclosed-delims.rs:12:48 | -LL | <<<<<<< HEAD - | ^^^^^^^ between this marker and `=======` is the code that we're merging into +LL | mod tests { + | - unclosed delimiter ... -LL | ======= - | ------- between this marker and `>>>>>>>` is the incoming code +LL | fn test1() { + | - this delimiter might not be properly closed... ... -LL | >>>>>>> 7a4f13c blah blah blah - | ^^^^^^^ this marker concludes the conflict region - | - = note: conflict markers indicate that a merge was started but could not be completed due to merge conflicts - to resolve a conflict, keep only the code you want and then delete the lines containing conflict markers - = help: if you're having merge conflicts after pulling new code: - the top section is the code you already had and the bottom section is the remote code - if you're in the middle of a rebase: - the top section is the code being rebased onto and the bottom section is the code coming from the current commit being rebased - = note: for an explanation on these markers from the `git` documentation: - visit +LL | } + | - ^ + | | + | ...as it matches this but it has different indentation error: aborting due to 1 previous error