From 85f7289921ac9629084d910f5dc8b525c0b04ee1 Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Tue, 2 Nov 2021 16:50:08 +0100 Subject: [PATCH 1/2] Add convenience function: TextStructureView::GetRangeForToken(). There are various places in which we need the line column range for a token. Put that in one place. Signed-off-by: Henner Zeller --- common/analysis/file_analyzer.cc | 37 ++++++++-------------- common/strings/line_column_map.h | 17 +++++++--- common/strings/line_column_map_test.cc | 27 ++++++++++++++++ common/text/text_structure.cc | 6 ++++ common/text/text_structure.h | 4 +++ verilog/tools/ls/document-symbol-filler.cc | 9 ++---- verilog/tools/ls/verible-lsp-adapter.cc | 10 +++--- 7 files changed, 70 insertions(+), 40 deletions(-) diff --git a/common/analysis/file_analyzer.cc b/common/analysis/file_analyzer.cc index 96216a301..2cb42b47f 100644 --- a/common/analysis/file_analyzer.cc +++ b/common/analysis/file_analyzer.cc @@ -104,26 +104,25 @@ absl::Status FileAnalyzer::Parse(Parser* parser) { std::string FileAnalyzer::TokenErrorMessage( const TokenInfo& error_token) const { // TODO(fangism): accept a RejectedToken to get an explanation message. - const absl::string_view base_text = Data().Contents(); std::ostringstream output_stream; if (!error_token.isEOF()) { // TODO(hzeller): simply print LineColumnRange ? - const auto left = Data().GetLineColAtOffset(error_token.left(base_text)); - auto right = Data().GetLineColAtOffset(error_token.right(base_text)); - --right.column; // Point to last character, not one-past-the-end. - output_stream << "token: \"" << error_token.text() << "\" at " << left; - if (left.line == right.line) { + LineColumnRange range = Data().GetRangeForToken(error_token); + --range.end.column; // Point to last character, not one-past-the-end. + output_stream << "token: \"" << error_token.text() << "\" at " + << range.start; + if (range.start.line == range.end.line) { // Only print upper bound if it differs by > 1 character. - if (left.column + 1 < right.column) { + if (range.start.column + 1 < range.end.column) { // .column is 0-based index, so +1 to get 1-based index. - output_stream << '-' << right.column + 1; + output_stream << '-' << range.end.column + 1; } } else { // Already prints 1-based index. - output_stream << '-' << right; + output_stream << '-' << range.end; } } else { - const auto end = Data().GetLineColAtOffset(base_text.length()); + const auto end = Data().GetLineColAtOffset(Data().Contents().length()); output_stream << "token: <> at " << end; } return output_stream.str(); @@ -141,25 +140,15 @@ std::vector FileAnalyzer::TokenErrorMessages() const { void FileAnalyzer::ExtractLinterTokenErrorDetail( const RejectedToken& error_token, const ReportLinterErrorFunction& error_report) const { - const absl::string_view base_text = Data().Contents(); - - const LineColumn start = - error_token.token_info.isEOF() - ? Data().GetLineColAtOffset(base_text.length()) - : Data().GetLineColAtOffset(error_token.token_info.left(base_text)); - const LineColumn end = - error_token.token_info.isEOF() - ? start - : Data().GetLineColAtOffset(error_token.token_info.right(base_text)); - + const LineColumnRange range = Data().GetRangeForToken(error_token.token_info); absl::string_view context_line = ""; const auto& lines = Data().Lines(); - if (start.line < static_cast(lines.size())) { - context_line = lines[start.line]; + if (range.start.line < static_cast(lines.size())) { + context_line = lines[range.start.line]; } // TODO(b/63893567): Explain syntax errors by inspecting state stack. error_report( - filename_, {start, end}, error_token.phase, + filename_, range, error_token.phase, error_token.token_info.isEOF() ? "" : error_token.token_info.text(), context_line, error_token.explanation); } diff --git a/common/strings/line_column_map.h b/common/strings/line_column_map.h index d37099666..cd12e4028 100644 --- a/common/strings/line_column_map.h +++ b/common/strings/line_column_map.h @@ -38,21 +38,30 @@ struct LineColumn { int line; // 0-based index int column; // 0-based index - bool operator==(const LineColumn& r) const { + constexpr bool operator==(const LineColumn& r) const { return line == r.line && column == r.column; } + constexpr bool operator<(const LineColumn& r) const { + if (line < r.line) return true; + if (line > r.line) return false; + return column < r.column; + } + constexpr bool operator>=(const LineColumn& r) const { return !(*this < r); } }; std::ostream& operator<<(std::ostream&, const LineColumn&); // A complete range. struct LineColumnRange { - LineColumn start; - LineColumn end; + LineColumn start; // Inclusive + LineColumn end; // Exclusive - bool operator==(const LineColumnRange& r) const { + constexpr bool operator==(const LineColumnRange& r) const { return start == r.start && end == r.end; } + constexpr bool PositionInRange(const LineColumn& pos) const { + return pos >= start && pos < end; + } }; std::ostream& operator<<(std::ostream&, const LineColumnRange&); diff --git a/common/strings/line_column_map_test.cc b/common/strings/line_column_map_test.cc index 87a2793c9..808ee89b2 100644 --- a/common/strings/line_column_map_test.cc +++ b/common/strings/line_column_map_test.cc @@ -179,5 +179,32 @@ TEST(LineColumnMapTest, Lookup) { } } +TEST(LineColumnTest, LineColumnComparison) { + constexpr LineColumn before_line{.line = 41, .column = 1}; + constexpr LineColumn before_col{.line = 42, .column = 1}; + constexpr LineColumn center{.line = 42, .column = 8}; + constexpr LineColumn after_col{.line = 42, .column = 8}; + + EXPECT_LT(before_line, center); + EXPECT_LT(before_col, center); + EXPECT_EQ(center, center); + EXPECT_GE(center, center); + EXPECT_GE(after_col, center); +} + +TEST(LineColumnTest, LineColumnRangeComparison) { + constexpr LineColumnRange range{{.line = 42, .column = 17}, + {.line = 42, .column = 22}}; + + constexpr LineColumn before{.line = 42, .column = 16}; + constexpr LineColumn inside_start{.line = 42, .column = 17}; + constexpr LineColumn inside_end{.line = 42, .column = 21}; + constexpr LineColumn outside_after_end{.line = 42, .column = 22}; + + EXPECT_FALSE(range.PositionInRange(before)); + EXPECT_TRUE(range.PositionInRange(inside_start)); + EXPECT_TRUE(range.PositionInRange(inside_end)); + EXPECT_FALSE(range.PositionInRange(outside_after_end)); +} } // namespace } // namespace verible diff --git a/common/text/text_structure.cc b/common/text/text_structure.cc index 7ea8628d0..8c890e06a 100644 --- a/common/text/text_structure.cc +++ b/common/text/text_structure.cc @@ -127,6 +127,12 @@ TokenRange TextStructureView::TokenRangeSpanningOffsets(size_t lower, return make_range(left, right); } +LineColumnRange TextStructureView::GetRangeForToken( + const TokenInfo& token) const { + return {GetLineColAtOffset(token.left(Contents())), + GetLineColAtOffset(token.right(Contents()))}; +} + TokenRange TextStructureView::TokenRangeOnLine(size_t lineno) const { if (lineno + 1 < line_token_map_.size()) { return make_range(line_token_map_[lineno], line_token_map_[lineno + 1]); diff --git a/common/text/text_structure.h b/common/text/text_structure.h index 7f2a1492f..9064213e3 100644 --- a/common/text/text_structure.h +++ b/common/text/text_structure.h @@ -95,10 +95,14 @@ class TextStructureView { const LineColumnMap& GetLineColumnMap() const { return line_column_map_; } + // Given a byte offset, return the line/column LineColumn GetLineColAtOffset(int bytes_offset) const { return line_column_map_.GetLineColAtOffset(contents_, bytes_offset); } + // Convenience function: Given the token, return the range it covers. + LineColumnRange GetRangeForToken(const TokenInfo& token) const; + const std::vector& GetLineTokenMap() const { return line_token_map_; } diff --git a/verilog/tools/ls/document-symbol-filler.cc b/verilog/tools/ls/document-symbol-filler.cc index 7723ac60e..59df1fade 100644 --- a/verilog/tools/ls/document-symbol-filler.cc +++ b/verilog/tools/ls/document-symbol-filler.cc @@ -147,11 +147,8 @@ verible::lsp::Range DocumentSymbolFiller::RangeFromLeaf( verible::lsp::Range DocumentSymbolFiller::RangeFromToken( const verible::TokenInfo &token) const { - verible::LineColumn start = - text_view_.GetLineColAtOffset(token.left(text_view_.Contents())); - verible::LineColumn end = - text_view_.GetLineColAtOffset(token.right(text_view_.Contents())); - return {.start = {.line = start.line, .character = start.column}, - .end = {.line = end.line, .character = end.column}}; + const verible::LineColumnRange range = text_view_.GetRangeForToken(token); + return {.start = {.line = range.start.line, .character = range.start.column}, + .end = {.line = range.end.line, .character = range.end.column}}; } } // namespace verilog diff --git a/verilog/tools/ls/verible-lsp-adapter.cc b/verilog/tools/ls/verible-lsp-adapter.cc index c2b51030b..613db8aae 100644 --- a/verilog/tools/ls/verible-lsp-adapter.cc +++ b/verilog/tools/ls/verible-lsp-adapter.cc @@ -30,16 +30,14 @@ static verible::lsp::Diagnostic ViolationToDiagnostic( const verilog::LintViolationWithStatus &v, const verible::TextStructureView &text) { const verible::LintViolation &violation = *v.violation; - verible::LineColumn start = - text.GetLineColAtOffset(violation.token.left(text.Contents())); - verible::LineColumn end = - text.GetLineColAtOffset(violation.token.right(text.Contents())); + const verible::LineColumnRange range = text.GetRangeForToken(violation.token); const char *fix_msg = violation.autofixes.empty() ? "" : " (fix available)"; return verible::lsp::Diagnostic{ .range = { - .start = {.line = start.line, .character = start.column}, - .end = {.line = end.line, .character = end.column}, + .start = {.line = range.start.line, + .character = range.start.column}, + .end = {.line = range.end.line, .character = range.end.column}, }, .message = absl::StrCat(violation.reason, " ", v.status->url, "[", v.status->lint_rule_name, "]", fix_msg), From a00981883197b7533623ec4924b3ad32e1eb9cf0 Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Tue, 2 Nov 2021 19:39:42 +0100 Subject: [PATCH 2/2] Add unit test for GetRangeForToken(). Treat EOF universally. The EOF token is not always relative to the TextStructureView but the static TokenInfo::EOFToken() constant. Make sure that this is also properly handled in the GetRangeForToken() method. Signed-off-by: Henner Zeller --- common/text/text_structure.cc | 6 ++++++ common/text/text_structure_test.cc | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/common/text/text_structure.cc b/common/text/text_structure.cc index 8c890e06a..026cc618d 100644 --- a/common/text/text_structure.cc +++ b/common/text/text_structure.cc @@ -129,6 +129,12 @@ TokenRange TextStructureView::TokenRangeSpanningOffsets(size_t lower, LineColumnRange TextStructureView::GetRangeForToken( const TokenInfo& token) const { + if (token.isEOF()) { + // In particular some unit tests pass in an artificial EOF token, not a + // EOF token generated from this view. So handle this directly. + const LineColumn eofPos = GetLineColAtOffset(Contents().length()); + return {eofPos, eofPos}; + } return {GetLineColAtOffset(token.left(Contents())), GetLineColAtOffset(token.right(Contents()))}; } diff --git a/common/text/text_structure_test.cc b/common/text/text_structure_test.cc index c8c46fd0d..1c0dda678 100644 --- a/common/text/text_structure_test.cc +++ b/common/text/text_structure_test.cc @@ -204,6 +204,25 @@ TEST_F(TokenRangeTest, CalculateFirstTokensPerLineTest) { EXPECT_EQ(line_token_map[3], tokens.begin() + 11); } +TEST_F(TokenRangeTest, GetRangeOfTokenVerifyAllRangesExclusive) { + // Bulk testing: let's see that we constantly progress in emitted ranges. + LineColumnRange previous{{0, 0}, {0, 0}}; + for (const TokenInfo& token : data_.TokenStream()) { + LineColumnRange token_range = data_.GetRangeForToken(token); + EXPECT_EQ(token_range.start, previous.end); + EXPECT_LT(previous.end, token_range.end); + EXPECT_GE(token_range.end, token_range.start); + previous = token_range; + } +} + +TEST_F(TokenRangeTest, GetRangeOfTokenEofTokenAcceptedUniversally) { + // For the EOF token, the returned range should automatically be relative + // to the TextView no matter where it comes from. + EXPECT_EQ(data_.GetRangeForToken(data_.EOFToken()), + data_.GetRangeForToken(TokenInfo::EOFToken())); +} + // Checks that when lower == upper, returned range is empty. TEST_F(TokenRangeTest, TokenRangeSpanningOffsetsEmpty) { const size_t test_offsets[] = {0, 1, 4, 12, 18, 22, 26};