From a184acaebf126aebdb3f32ac2c17e7d84cabb801 Mon Sep 17 00:00:00 2001 From: Henner Zeller Date: Thu, 17 Feb 2022 17:58:56 -0800 Subject: [PATCH] Store string_view in Signature() array, not std::string. All strings are backed by the text structure ultimately. There is one string that is generated ad-hoc, which is the assembled location string "@123:456"; this is now kept in a stable backing store. In a test with a particular large file, this resulted in some significant improvement: 311s -> 232s (25% improvement). TODO: storing these vectors of strings seems like a waste of space; couldn't we just have a string-view and a pointer to the parent signature ? Signed-off-by: Henner Zeller --- verilog/tools/kythe/BUILD | 3 ++- verilog/tools/kythe/kythe_facts.h | 9 ++++----- verilog/tools/kythe/kythe_facts_extractor.cc | 13 +++++++++++-- verilog/tools/kythe/scope_resolver.h | 2 +- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/verilog/tools/kythe/BUILD b/verilog/tools/kythe/BUILD index aa2d0a0ca..756bc7453 100644 --- a/verilog/tools/kythe/BUILD +++ b/verilog/tools/kythe/BUILD @@ -49,8 +49,8 @@ cc_library( ":kythe_facts", "//common/util:auto_pop_stack", "//common/util:iterator_range", - "@com_google_absl//absl/strings", "@com_google_absl//absl/container:node_hash_map", + "@com_google_absl//absl/strings", ], ) @@ -94,6 +94,7 @@ cc_library( "//verilog/analysis:verilog_project", "@com_google_absl//absl/container:btree", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/container:node_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", ], diff --git a/verilog/tools/kythe/kythe_facts.h b/verilog/tools/kythe/kythe_facts.h index a3d2608a8..81b251222 100644 --- a/verilog/tools/kythe/kythe_facts.h +++ b/verilog/tools/kythe/kythe_facts.h @@ -30,12 +30,11 @@ inline constexpr absl::string_view kEmptyKytheLanguage = ""; // Unique identifier for Kythe facts. class Signature { public: - explicit Signature(absl::string_view name = "") - : names_({std::string(name)}) {} + explicit Signature(absl::string_view name = "") : names_({name}) {} Signature(const Signature& parent, absl::string_view name) : names_(parent.Names()) { - names_.push_back(std::string(name)); + names_.push_back(name); } bool operator==(const Signature& other) const { @@ -52,7 +51,7 @@ class Signature { // Returns the signature concatenated as a string in base 64. std::string ToBase64() const; - const std::vector& Names() const { return names_; } + const std::vector& Names() const { return names_; } private: // List that uniquely determines this signature and differentiates it from any @@ -65,7 +64,7 @@ class Signature { // // for "m" ==> ["m"] // for "x" ==> ["m", "x"] - std::vector names_; + std::vector names_; }; template H AbslHashValue(H state, const Signature& v) { diff --git a/verilog/tools/kythe/kythe_facts_extractor.cc b/verilog/tools/kythe/kythe_facts_extractor.cc index 05fac7d51..759f6b295 100644 --- a/verilog/tools/kythe/kythe_facts_extractor.cc +++ b/verilog/tools/kythe/kythe_facts_extractor.cc @@ -24,6 +24,7 @@ #include "absl/container/btree_map.h" #include "absl/container/flat_hash_set.h" +#include "absl/container/node_hash_set.h" #include "absl/memory/memory.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" @@ -279,6 +280,13 @@ class KytheFactsExtractor { // Contains resulting kythe facts and edges to output. KytheIndexingData kythe_data_; + + // Location signature backing store. + // Inside signatures, we record locations as string_views, + // this is the backing store for assembled strings as + // location markers such as "@123:456" + // Needs to be a node_hash_set to provide value stability. + absl::node_hash_set signature_locations_; }; void StreamKytheFactsEntries(KytheOutput* kythe_output, @@ -1266,10 +1274,11 @@ VName KytheFactsExtractor::CreateAnchor(const Anchor& anchor) { const int start_location = std::distance(source_text.begin(), anchor.Text().begin()); const int end_location = start_location + anchor.Text().length(); + const auto [location_str, _] = signature_locations_.emplace( + absl::StrCat("@", start_location, ":", end_location)); const VName anchor_vname = {.path = FilePath(), .root = "", - .signature = Signature(absl::StrCat( - "@", start_location, ":", end_location)), + .signature = Signature(*location_str), .corpus = Corpus()}; CreateFact(anchor_vname, kFactNodeKind, kNodeAnchor); diff --git a/verilog/tools/kythe/scope_resolver.h b/verilog/tools/kythe/scope_resolver.h index e36328c0c..54a9bd8d3 100644 --- a/verilog/tools/kythe/scope_resolver.h +++ b/verilog/tools/kythe/scope_resolver.h @@ -54,7 +54,7 @@ class Scope { // Appends the member of the given scope to the current scope. void AppendScope(const Scope& scope); - using MemberMap = absl::node_hash_map; + using MemberMap = absl::node_hash_map; const MemberMap& Members() const { return members_; } const Signature& GetSignature() const { return signature_; }