From 39d3d2950fa11f2b322dffd8e48a2021dd4d568e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Povi=C5=A1er?= Date: Thu, 3 Oct 2024 20:22:25 +0200 Subject: [PATCH] Fix locations in presence of nested trivia, take 2 (#1136) --- include/slang/parsing/Token.h | 12 +++++----- source/parsing/Preprocessor.cpp | 5 +++-- source/parsing/Token.cpp | 31 +++++++++++++++++++++----- tests/unittests/parsing/LexerTests.cpp | 17 +++++++++++++- 4 files changed, 51 insertions(+), 14 deletions(-) diff --git a/include/slang/parsing/Token.h b/include/slang/parsing/Token.h index 8c8031ef8..553356367 100644 --- a/include/slang/parsing/Token.h +++ b/include/slang/parsing/Token.h @@ -95,13 +95,15 @@ class SLANG_EXPORT Trivia { explicit operator bool() const { return valid(); } - /// If the trivia is raw source text, creates a new trivia with the specified location - /// (instead of implicitly offset from the parent token). If this trivia is for a - /// directive or skipped tokens, returns a copy without modification. - [[nodiscard]] Trivia withLocation(BumpAllocator& alloc, SourceLocation location) const; + /// If the trivia is raw source text, creates a new trivia attached from behind + /// to the specified location (instead of implicitly offset from the parent token). + /// If this trivia is for a directive or skipped tokens, returns a copy without + /// modification. + [[nodiscard]] Trivia withLocation(BumpAllocator& alloc, SourceLocation anchorLocation) const; /// Gets the source location of the trivia if one is explicitly known. If not, nullopt - /// is returned to signify that the location is implicitly relative to the parent token. + /// is returned to signify that the location is implicitly relative to the parent token + /// or subsequent trivia. std::optional getExplicitLocation() const; /// If this trivia is tracking a skipped syntax node or a directive, returns that node. diff --git a/source/parsing/Preprocessor.cpp b/source/parsing/Preprocessor.cpp index be574c328..0a053abd6 100644 --- a/source/parsing/Preprocessor.cpp +++ b/source/parsing/Preprocessor.cpp @@ -438,9 +438,10 @@ Token Preprocessor::nextRaw() { // that one, but we do want to merge its trivia with whatever comes next. SmallVector trivia; auto appendTrivia = [&trivia, this](Token token) { + trivia.append_range(token.trivia()); SourceLocation loc = token.location(); - for (const auto& t : token.trivia()) - trivia.push_back(t.withLocation(alloc, loc)); + if (!token.trivia().empty()) + trivia.back() = trivia.back().withLocation(alloc, loc); }; appendTrivia(token); diff --git a/source/parsing/Token.cpp b/source/parsing/Token.cpp index 08683be3c..bf3aaa78e 100644 --- a/source/parsing/Token.cpp +++ b/source/parsing/Token.cpp @@ -94,7 +94,7 @@ Trivia::Trivia(TriviaKind kind, std::span tokens) : Trivia::Trivia(TriviaKind kind, SyntaxNode* syntax) : syntaxNode(syntax), kind(kind) { } -Trivia Trivia::withLocation(BumpAllocator& alloc, SourceLocation location) const { +Trivia Trivia::withLocation(BumpAllocator& alloc, SourceLocation anchorLocation) const { switch (kind) { case TriviaKind::Directive: case TriviaKind::SkippedSyntax: @@ -104,23 +104,42 @@ Trivia Trivia::withLocation(BumpAllocator& alloc, SourceLocation location) const break; } + auto resultLocation = alloc.emplace(); + resultLocation->text = getRawText(); + resultLocation->location = anchorLocation - resultLocation->text.size(); + Trivia result; result.kind = kind; result.hasFullLocation = true; - result.fullLocation = alloc.emplace(); - result.fullLocation->text = getRawText(); - result.fullLocation->location = location; + result.fullLocation = resultLocation; return result; } +// Get the start location of a token including its trivia +static SourceLocation tokenLocationInclTrivia(const Token& token) { + size_t locOffset = 0; + + // We iterate over trivia until we hit one which has explicit location. + // All trivia without explicit location must be raw source text, for which + // we can easily query its length and add it to the offset. + for (const Trivia& trivia : token.trivia()) { + if (auto loc = trivia.getExplicitLocation()) + return *loc - locOffset; + else + locOffset += trivia.getRawText().size(); + } + + return token.location() - locOffset; +} + std::optional Trivia::getExplicitLocation() const { switch (kind) { case TriviaKind::Directive: case TriviaKind::SkippedSyntax: - return syntaxNode->getFirstToken().location(); + return tokenLocationInclTrivia(syntaxNode->getFirstToken()); case TriviaKind::SkippedTokens: SLANG_ASSERT(tokens.len); - return tokens.ptr[0].location(); + return tokenLocationInclTrivia(tokens.ptr[0]); default: if (hasFullLocation) return fullLocation->location; diff --git a/tests/unittests/parsing/LexerTests.cpp b/tests/unittests/parsing/LexerTests.cpp index 345fc419d..34cae390c 100644 --- a/tests/unittests/parsing/LexerTests.cpp +++ b/tests/unittests/parsing/LexerTests.cpp @@ -1226,7 +1226,22 @@ TEST_CASE("Token with lots of trivia") { trivia = trivia.withLocation(alloc, SourceLocation(BufferID(1, "asdf"), 5)); CHECK(trivia.getRawText() == "/**/"); - CHECK(trivia.getExplicitLocation()->offset() == 5); + CHECK(trivia.getExplicitLocation()->offset() == 1); +} + +TEST_CASE("Directive trivia location") { + auto& text = "//bar\n`define FOO bar"; + Token token = lexToken(text); + + CHECK(token.kind == TokenKind::EndOfFile); + REQUIRE(token.trivia().size() == 1); + + Trivia t = token.trivia()[0]; + CHECK(t.kind == TriviaKind::Directive); + REQUIRE(t.syntax()->kind == SyntaxKind::DefineDirective); + CHECK(t.getExplicitLocation()->offset() == 0); + + CHECK_DIAGNOSTICS_EMPTY; } void testExpect(TokenKind kind) {