Skip to content

Commit

Permalink
Output token columns to be _character_ not _byte_-offset in line.
Browse files Browse the repository at this point in the history
Take multi-byte characters into account.

Remove the non-descript operator() in LineColumnMap and replace
with GetLineColAtOffset(). Also add such a convenience method
in text view to reduce abstraction leakage. Ideally, we would
actually never have to deal with 'raw' LineColumnMaps inside
code, only with attached text view.

The GetLineColAtOffset() needs the original text to correctly
count UTF8-characters in the line in question; this should
probably be replaced with either
  * remembering the original string-view from constructing
    the LineColumnMap (but CAVE: lifetime issues)
  * or maybe a sub-structure that remembers multi-byte
    offset within a line.
For now, this is not a big deal as the original string-view
always is available.

Fixes chipsalliance#1047 chipsalliance#1059

Signed-off-by: Henner Zeller <[email protected]>
  • Loading branch information
hzeller committed Oct 27, 2021
1 parent ffc6265 commit 8dd719f
Show file tree
Hide file tree
Showing 12 changed files with 104 additions and 46 deletions.
14 changes: 6 additions & 8 deletions common/analysis/file_analyzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,12 @@ absl::Status FileAnalyzer::Parse(Parser* parser) {
std::string FileAnalyzer::TokenErrorMessage(
const TokenInfo& error_token) const {
// TODO(fangism): accept a RejectedToken to get an explanation message.
const LineColumnMap& line_column_map = Data().GetLineColumnMap();
const absl::string_view base_text = Data().Contents();
std::ostringstream output_stream;
if (!error_token.isEOF()) {
// TODO(hzeller): simply print LineColumnRange ?
const auto left = line_column_map(error_token.left(base_text));
auto right = line_column_map(error_token.right(base_text));
const auto left = Data().GetLineColAtOffset(error_token.left(base_text));
auto right = Data().GetLineColAtOffset(error_token.right(base_text));
--right.column; // Point to last character, not one-past-the-end.
output_stream << "token: \"" << error_token.text() << "\" at " << left;
if (left.line == right.line) {
Expand All @@ -124,7 +123,7 @@ std::string FileAnalyzer::TokenErrorMessage(
output_stream << '-' << right;
}
} else {
const auto end = line_column_map(base_text.length());
const auto end = Data().GetLineColAtOffset(base_text.length());
output_stream << "token: <<EOF>> at " << end;
}
return output_stream.str();
Expand All @@ -142,17 +141,16 @@ std::vector<std::string> FileAnalyzer::TokenErrorMessages() const {
void FileAnalyzer::ExtractLinterTokenErrorDetail(
const RejectedToken& error_token,
const ReportLinterErrorFunction& error_report) const {
const LineColumnMap& line_column_map = Data().GetLineColumnMap();
const absl::string_view base_text = Data().Contents();

const LineColumn start =
error_token.token_info.isEOF()
? line_column_map(base_text.length())
: line_column_map(error_token.token_info.left(base_text));
? Data().GetLineColAtOffset(base_text.length())
: Data().GetLineColAtOffset(error_token.token_info.left(base_text));
const LineColumn end =
error_token.token_info.isEOF()
? start
: line_column_map(error_token.token_info.right(base_text));
: Data().GetLineColAtOffset(error_token.token_info.right(base_text));

absl::string_view context_line = "";
const auto& lines = Data().Lines();
Expand Down
7 changes: 5 additions & 2 deletions common/analysis/lint_rule_status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ void LintStatusFormatter::FormatLintRuleStatuses(
*stream << " (autofix available)";
}
*stream << std::endl;
auto cursor = line_column_map_(violation.violation->token.left(base));
auto cursor = line_column_map_.GetLineColAtOffset(
base, violation.violation->token.left(base));
if (cursor.line < static_cast<int>(lines.size())) {
*stream << lines[cursor.line] << std::endl;
*stream << verible::Spacer(cursor.column) << "^" << std::endl;
Expand All @@ -143,7 +144,9 @@ void LintStatusFormatter::FormatViolation(std::ostream* stream,
absl::string_view rule_name) const {
// TODO(fangism): Use the context member to print which named construct or
// design element the violation appears in (or full stack thereof).
(*stream) << path << ':' << line_column_map_(violation.token.left(base))
(*stream) << path << ':'
<< line_column_map_.GetLineColAtOffset(base,
violation.token.left(base))
<< ": " << violation.reason << ' ' << url << " [" << rule_name
<< ']';
}
Expand Down
19 changes: 19 additions & 0 deletions common/analysis/lint_rule_status_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,25 @@ TEST(LintRuleStatusFormatterTestWithContext, MultipleStatusesSimpleOutput) {
RunLintStatusesTest(test, true);
}

TEST(LintRuleStatusFormatterTestWithContext, PointToCorrectUtf8Char) {
SymbolPtr root = Node();
static const int dont_care_tag = 0;
constexpr absl::string_view text("äöüß\n");
// ^ä^ü
LintStatusTest test = {
"rule",
"URL",
"some/file.sv",
text,
{{"reason1", TokenInfo(dont_care_tag, text.substr(0, 2)),
"some/file.sv:1:1: reason1 URL "
"[rule]\näöüß\n"},
{"reason2", TokenInfo(dont_care_tag, text.substr(strlen("äö"), 2)),
"some/file.sv:1:3: reason2 URL "
"[rule]\näöüß\n "}}};
RunLintStatusesTest(test, true);
}

TEST(AutoFixTest, ValidUseCases) {
// 0123456789abcdef
static constexpr absl::string_view text("This is an image");
Expand Down
8 changes: 5 additions & 3 deletions common/analysis/lint_waiver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ void LintWaiver::RegexToLines(absl::string_view contents,
for (std::cregex_iterator i(contents.begin(), contents.end(), *re);
i != std::cregex_iterator(); i++) {
std::cmatch match = *i;
WaiveOneLine(rule.first, line_map(match.position()).line);
WaiveOneLine(rule.first, line_map.LineAtOffset(match.position()));
}
}
}
Expand Down Expand Up @@ -251,7 +251,8 @@ static absl::Status WaiveCommandHandler(
LineColumn regex_token_pos = {};

for (const auto& token : tokens) {
token_pos = line_map(token.left(waive_content));
token_pos =
line_map.GetLineColAtOffset(waive_content, token.left(waive_content));

switch (token.token_enum()) {
case CFG_TK_COMMAND:
Expand Down Expand Up @@ -434,7 +435,8 @@ absl::Status LintWaiverBuilder::ApplyExternalWaivers(
for (const auto c_range : commands) {
const auto command = make_container_range(c_range.begin(), c_range.end());

command_pos = line_map(command.begin()->left(waivers_config_content));
command_pos = line_map.GetLineColAtOffset(
waivers_config_content, command.begin()->left(waivers_config_content));

if (command[0].token_enum() == CFG_TK_COMMENT) {
continue;
Expand Down
5 changes: 4 additions & 1 deletion common/strings/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,10 @@ cc_library(
"//verilog/analysis:__pkg__",
"//verilog/formatting:__pkg__",
],
deps = ["@com_google_absl//absl/strings"],
deps = [
"utf8",
"@com_google_absl//absl/strings"
],
)

cc_test(
Expand Down
22 changes: 15 additions & 7 deletions common/strings/line_column_map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <vector>

#include "absl/strings/string_view.h"
#include "common/strings/utf8.h"

namespace verible {

Expand Down Expand Up @@ -71,16 +72,23 @@ LineColumnMap::LineColumnMap(const std::vector<absl::string_view>& lines) {
}
}

// Translate byte-offset into line-column.
// Byte offsets beyond the end-of-file will return an unspecified result.
LineColumn LineColumnMap::operator()(int offset) const {
LineColumn LineColumnMap::GetLineColAtOffset(absl::string_view base,
int bytes_offset) const {
const auto begin = beginning_of_line_offsets_.begin();
const auto end = beginning_of_line_offsets_.end();
const auto base = std::upper_bound(begin, end, offset) - 1;
// std::upper_bound is a binary search.
const int line_number = std::distance(begin, base);
const int column = offset - *base;
return LineColumn{line_number, column};
const auto line_at_offset = std::upper_bound(begin, end, bytes_offset) - 1;
const int line_number = std::distance(begin, line_at_offset);
const int len_within_line = bytes_offset - *line_at_offset;
absl::string_view line = base.substr(*line_at_offset, len_within_line);
return LineColumn{line_number, utf8_len(line)};
}

int LineColumnMap::LineAtOffset(int bytes_offset) const {
const auto begin = beginning_of_line_offsets_.begin();
const auto end = beginning_of_line_offsets_.end();
// std::upper_bound is a binary search.
const auto line_at_offset = std::upper_bound(begin, end, bytes_offset) - 1;
return std::distance(begin, line_at_offset);
}
} // namespace verible
13 changes: 11 additions & 2 deletions common/strings/line_column_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,17 @@ class LineColumnMap {
return beginning_of_line_offsets_[index];
}

// Translate byte-offset into line and column.
LineColumn operator()(int bytes_offset) const;
// Get line number at the given byte offset.
int LineAtOffset(int bytes_offset) const;

// Get line and column at the given offset. The column takes multi-byte
// encodings into account, so the column represents the true character not
// simply the byte-offset within the line.
//
// TODO(hzeller): technically, we don't need the base as we already got it
// in the constructor, but change separately after lifetime questions have
// been considered.
LineColumn GetLineColAtOffset(absl::string_view base, int bytes_offset) const;

const std::vector<int>& GetBeginningOfLineOffsets() const {
return beginning_of_line_offsets_;
Expand Down
14 changes: 11 additions & 3 deletions common/strings/line_column_map_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,9 @@ struct LineColumnMapTestData {

// Raw line and column are 0-indexed.
const LineColumnMapTestData map_test_data[] = {
{"", {0}, {{0, {0, 0}}, {1, {0, 1}}}}, // empty file
{"_", {0}, {{0, {0, 0}}, {1, {0, 1}}}}, // no \n before EOF
// Also testing beyond the end of the file - it should return last position
{"", {0}, {{0, {0, 0}}, {1, {0, 0}}, {100, {0, 0}}}}, // empty file
{"_", {0}, {{0, {0, 0}}, {1, {0, 1}}}}, // no \n before EOF
{"abc", {0}, {{0, {0, 0}}, {2, {0, 2}}, {3, {0, 3}}}},
{"\n", {0, 1}, {{0, {0, 0}}, {1, {1, 0}}}}, // one empty line
{"\n\n", {0, 1, 2}, {{0, {0, 0}}, {1, {1, 0}}, {2, {2, 0}}}},
Expand All @@ -60,6 +61,12 @@ const LineColumnMapTestData map_test_data[] = {
{"hello\ndarkness\nmy old friend\n",
{0, 6, 15, 29},
{{0, {0, 0}}, {10, {1, 4}}, {15, {2, 0}}, {20, {2, 5}}}},
// Multi-byte characters. Let's use strlen() to count the bytes, the
// column should accurately point to the character.
{"😀😀😀", {0}, {{static_cast<int>(2 * strlen("😀")), {0, 2}}}},
{"Heizölrückstoßabdämpfung",
{0},
{{static_cast<int>(strlen("Heizölrückstoß")), {0, 14}}}},
};

// Test test verifies that line-column offset appear to the user correctly.
Expand Down Expand Up @@ -163,7 +170,8 @@ TEST(LineColumnMapTest, Lookup) {
for (const auto& test_case : map_test_data) {
const LineColumnMap line_map(test_case.text);
for (const auto& q : test_case.queries) {
EXPECT_EQ(q.line_col, line_map(q.offset))
EXPECT_EQ(q.line_col,
line_map.GetLineColAtOffset(test_case.text, q.offset))
<< "Text: \"" << test_case.text << "\"\n"
<< "Failed testing offset " << q.offset;
}
Expand Down
6 changes: 5 additions & 1 deletion common/text/text_structure.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ class TextStructureView {

const LineColumnMap& GetLineColumnMap() const { return line_column_map_; }

LineColumn GetLineColAtOffset(int bytes_offset) const {
return line_column_map_.GetLineColAtOffset(contents_, bytes_offset);
}

const std::vector<TokenSequence::const_iterator>& GetLineTokenMap() const {
return line_token_map_;
}
Expand Down Expand Up @@ -201,7 +205,7 @@ class TextStructureView {
// the contents_ string view.
absl::Status SyntaxTreeConsistencyCheck() const;

private:
private:
void RecalculateLineColumnMap() {
line_column_map_ = LineColumnMap(contents_);
}
Expand Down
2 changes: 1 addition & 1 deletion verilog/analysis/verilog_linter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ static void AppendLintRuleStatuses(
[&](const verible::LintViolation& violation) {
// Lookup the line number on which the offending token resides.
const size_t offset = violation.token.left(text_base);
const size_t line = line_map(offset).line;
const size_t line = line_map.LineAtOffset(offset);
// Check that line number against the set of waived lines.
const bool waived =
LintWaiver::LineNumberSetContains(*waived_lines, line);
Expand Down
7 changes: 5 additions & 2 deletions verilog/formatting/formatter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,8 @@ static void PrintLargestPartitions(
stream << hline << "\n[" << partition->Size() << " tokens";
if (!partition->IsEmpty()) {
stream << ", starting at line:col "
<< line_column_map(
<< line_column_map.GetLineColAtOffset(
base_text,
partition->TokensRange().front().token->left(base_text));
}
stream << "]: " << *partition << std::endl;
Expand Down Expand Up @@ -616,7 +617,9 @@ class ContinuationCommentAligner {
private:
int GetTokenColumn(const verible::TokenInfo* token) {
CHECK_NOTNULL(token);
const int column = line_column_map_(token->left(base_text_)).column;
const int column =
line_column_map_.GetLineColAtOffset(base_text_, token->left(base_text_))
.column;
CHECK_GE(column, 0);
return column;
}
Expand Down
33 changes: 17 additions & 16 deletions verilog/tools/ls/verible-lsp-adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@
namespace verilog {
// Convert our representation of a linter violation to a LSP-Diagnostic
static verible::lsp::Diagnostic ViolationToDiagnostic(
const verilog::LintViolationWithStatus &v, absl::string_view base,
const verible::LineColumnMap &lc_map) {
const verilog::LintViolationWithStatus &v,
const verible::TextStructureView &text) {
const verible::LintViolation &violation = *v.violation;
verible::LineColumn start = lc_map(violation.token.left(base));
verible::LineColumn end = lc_map(violation.token.right(base));
verible::LineColumn start =
text.GetLineColAtOffset(violation.token.left(text.Contents()));
verible::LineColumn end =
text.GetLineColAtOffset(violation.token.right(text.Contents()));
const char *fix_msg = violation.autofixes.empty() ? "" : " (fix available)";
return verible::lsp::Diagnostic{
.range =
Expand Down Expand Up @@ -81,24 +83,24 @@ std::vector<verible::lsp::Diagnostic> CreateDiagnostics(
if (--remaining <= 0) break;
}

const absl::string_view base = current->parser().Data().Contents();
const verible::LineColumnMap& line_column_map = current->parser().Data().GetLineColumnMap();
for (const auto &v : lint_violations) {
result.emplace_back(ViolationToDiagnostic(v, base, line_column_map));
result.emplace_back(ViolationToDiagnostic(v, current->parser().Data()));
if (--remaining <= 0) break;
}
return result;
}

static std::vector<verible::lsp::TextEdit> AutofixToTextEdits(
const verible::AutoFix &fix, absl::string_view base,
const verible::LineColumnMap &lc_map) {
const verible::AutoFix &fix, const verible::TextStructureView &text) {
std::vector<verible::lsp::TextEdit> result;
// TODO(hzeller): figure out if edits are stacking or are all based
// on the same start status.
const absl::string_view base = text.Contents();
for (const verible::ReplacementEdit &edit : fix.Edits()) {
verible::LineColumn start = lc_map(edit.fragment.begin() - base.begin());
verible::LineColumn end = lc_map(edit.fragment.end() - base.begin());
verible::LineColumn start =
text.GetLineColAtOffset(edit.fragment.begin() - base.begin());
verible::LineColumn end =
text.GetLineColAtOffset(edit.fragment.end() - base.begin());
result.emplace_back(verible::lsp::TextEdit{
.range =
{
Expand All @@ -122,13 +124,12 @@ std::vector<verible::lsp::CodeAction> GenerateLinterCodeActions(
verilog::GetSortedViolations(current->lint_result());
if (lint_violations.empty()) return result;

const absl::string_view base = current->parser().Data().Contents();
const verible::LineColumnMap& line_column_map = current->parser().Data().GetLineColumnMap();
const verible::TextStructureView &text = current->parser().Data();

for (const auto &v : lint_violations) {
const verible::LintViolation &violation = *v.violation;
if (violation.autofixes.empty()) continue;
auto diagnostic = ViolationToDiagnostic(v, base, line_column_map);
auto diagnostic = ViolationToDiagnostic(v, text);

// The editor usually has the cursor on a line or word, so we
// only want to output edits that are relevant.
Expand All @@ -144,8 +145,8 @@ std::vector<verible::lsp::CodeAction> GenerateLinterCodeActions(
// The following is translated from json, map uri -> edits.
// We're only sending changes for one document, the current one.
.edit = {.changes = {{p.textDocument.uri,
AutofixToTextEdits(fix, base,
line_column_map)}}},
AutofixToTextEdits(fix,
current->parser().Data())}}},
});
preferred_fix = false; // only the first is preferred.
}
Expand Down

0 comments on commit 8dd719f

Please sign in to comment.