forked from chipsalliance/verible
-
Notifications
You must be signed in to change notification settings - Fork 0
/
symbol_table.h
525 lines (431 loc) · 21.7 KB
/
symbol_table.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
// Copyright 2017-2020 The Verible Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_
#define VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_
#include <functional>
#include <iosfwd>
#include <map>
#include "absl/status/status.h"
#include "absl/strings/string_view.h"
#include "common/strings/compare.h"
#include "common/text/symbol.h"
#include "common/util/map_tree.h"
#include "common/util/vector_tree.h"
#include "verilog/analysis/verilog_project.h"
namespace verilog {
struct SymbolInfo; // forward declaration, defined below
// SymbolTableNode represents a named element in the syntax.
// When it represents a scope, it may have named subtrees.
// The string_view key carries positional information, it corresponds to a
// substring owned by a VerilogSourceFile (which must outlive the symbol table),
// and can be used to look up file origin and position within file.
using SymbolTableNode =
verible::MapTree<absl::string_view, SymbolInfo, verible::StringViewCompare>;
std::ostream& SymbolTableNodeFullPath(std::ostream&, const SymbolTableNode&);
// Classify what type of element a particular symbol is defining.
enum class SymbolMetaType {
kRoot,
kClass,
kModule,
kGenerate, // loop or conditional generate block
kPackage,
kParameter,
kTypeAlias, // typedef
kDataNetVariableInstance,
kFunction,
kTask,
kStruct,
kEnumType,
kEnumConstant,
kInterface,
// The following enums represent classes/groups of the above types,
// and are used for validating metatypes of symbol references.
kUnspecified, // matches all of the above (any type)
kCallable, // matches only kFunction or kTask
};
std::ostream& operator<<(std::ostream&, SymbolMetaType);
// This classifies the type of reference that a single identifier is.
enum class ReferenceType {
// The base identifier in any chain.
// These symbols are resolved by potentially searching up-scope from the
// current context.
kUnqualified,
// Similar to kUnqualified in that it is in base position, however, this
// symbol must be resolved only locally in the current context, without any
// upward search. This is suitable for out-of-line definitions, where the
// base (in "base::member") must be resolved in only the enclosing scope.
kImmediate,
// ::id (for packages, and class static members)
// These symbols are resolved by searching in the parent symbol's
// context (or its imported/inherited namespaces).
kDirectMember,
// .id (for object of struct/class type members)
// These symbols are resolved by searching in the parent object's *type* scope
// (or its imported/inherited namespaces).
kMemberOfTypeOfParent,
};
std::ostream& operator<<(std::ostream&, ReferenceType);
// References may form "trees" of dependencies (ReferenceComponentNode).
// ReferenceComponent is the data portion of each node an a reference tree.
// The overall tree structure drives the order in which references are resolved.
//
// Examples:
// * "a::b" and "a.b", resolving "b" depends on resolving "a" first.
// * "foo bar(.x(y));" resolving "x" depends on typeof(bar) -> resolving
// "foo", but "y" is only resolved using local context, and forms its own
// independent reference chain.
struct ReferenceComponent {
// This is the token substring of the identifier being referenced.
// String memory is expected to be owned by a VerilogSourceFile.
// This string_view also carries positional information: it can be used to
// locate the originating VerilogSourceFile via
// VerilogProject::LookupFileOrigin() and in-file position from the file's
// LineColumnMap.
const absl::string_view identifier;
// What kind of reference is this, and how should it be resolved?
// See enum definition above.
const ReferenceType ref_type;
// Inform the symbol resolver that this symbol must be of a certain metatype.
// SymbolInfo::kUnspecified is interpreted as "any metatype" for cases
// where it is not known in advance.
const SymbolMetaType required_metatype = SymbolMetaType::kUnspecified;
// This points to the definition with which this symbol was resolved.
// During symbol table construction, this remains nullptr.
// If symbol resolution succeeds, this will be updated to non-nullptr.
// This may remain as nullptr if symbol resolution fails.
// Symbol table merge operations may invalidate these pointers, so make sure
// merges are done before attempting symbol resolution.
// This should only be set by ResolveSymbol().
// TODO: privatize this member.
const SymbolTableNode* resolved_symbol = nullptr;
public:
ReferenceComponent(const ReferenceComponent&) = default; // safe to copy
ReferenceComponent(ReferenceComponent&&) = default;
ReferenceComponent& operator=(const ReferenceComponent&) = delete;
ReferenceComponent& operator=(ReferenceComponent&&) = delete;
absl::Status MatchesMetatype(SymbolMetaType) const;
// Resolves this symbol and verifies that metatypes are compatible, which is
// reflected in the returned Status.
absl::Status ResolveSymbol(const SymbolTableNode&);
// Only print ref_type and identifier.
std::ostream& PrintPathComponent(std::ostream&) const;
// Print everything, showing symbol path if it is resolved.
std::ostream& PrintVerbose(std::ostream&) const;
// Structural consistency check.
void VerifySymbolTableRoot(const SymbolTableNode* root) const;
};
std::ostream& operator<<(std::ostream&, const ReferenceComponent&);
// A node in a tree of *dependent* hierchical references.
// An expression like "x.y.z" will form a linear chain, where resolving 'y'
// depends on 'x', and resolving 'z' depends on 'y'. Named ports are manifest as
// wide nodes: in "f(.a(...), .b(...))", both 'a' and 'b' depend on resolving
// 'f' (and thus are siblings).
using ReferenceComponentNode = verible::VectorTree<ReferenceComponent>;
// Human-readable representation of a node's path from root.
std::ostream& ReferenceNodeFullPath(std::ostream&,
const ReferenceComponentNode&);
// View a ReferenceComponentNode's children as an ordered map, keyed by
// reference string. A single node may have multiple references to the same
// child id, and they are expected to resolve the same way, so one of them is
// arbitrarily chosen to be included in the map, while the others are dropped.
// Primarily for debugging and visualization.
using ReferenceComponentMap =
std::map<absl::string_view, const ReferenceComponentNode*,
verible::StringViewCompare>;
ReferenceComponentMap ReferenceComponentNodeMapView(
const ReferenceComponentNode&);
// Represents any (chained) qualified or unqualified reference.
struct DependentReferences {
// Sequence of identifiers in a chain like "a.b.c", or "x::y::z".
// The first element always has ReferenceType::kUnqualified.
// This is wrapped in a unique_ptr to guarantee address stability on-move.
std::unique_ptr<ReferenceComponentNode> components;
public:
DependentReferences() = default;
// move-only
DependentReferences(const DependentReferences&) = delete;
DependentReferences(DependentReferences&&) = default;
DependentReferences& operator=(const DependentReferences&) = delete;
DependentReferences& operator=(DependentReferences&&) = delete;
// Returns true of no references were collected.
bool Empty() const { return components == nullptr; }
// Returns the current terminal descendant.
const ReferenceComponentNode* LastLeaf() const;
// Returns the last type component of a reference tree.
// e.g. from "A#(.B())::C#(.D())" -> "C"
const ReferenceComponentNode* LastTypeComponent() const;
ReferenceComponentNode* LastTypeComponent();
// Structural consistency check.
// When traversing an unqualified or qualified reference, use this to grow a
// new leaf node in the reference tree.
// Returns a pointer to the new node.
ReferenceComponentNode* PushReferenceComponent(
const ReferenceComponent& component);
// Structural consistency check.
void VerifySymbolTableRoot(const SymbolTableNode* root) const;
// Attempt to resolve all symbol references.
void Resolve(const SymbolTableNode& context,
std::vector<absl::Status>* diagnostics) const;
// Attempt to resolve only local symbol references.
void ResolveLocally(const SymbolTableNode& context) const;
// Attempt to only resolve the base of the reference (the first component).
absl::StatusOr<SymbolTableNode*> ResolveOnlyBaseLocally(
SymbolTableNode* context);
};
std::ostream& operator<<(std::ostream&, const DependentReferences&);
// Contains information about a type used to declare data/instances/variables.
struct DeclarationTypeInfo {
// Pointer to the syntax tree origin, e.g. a NodeEnum::kDataType node.
// This is useful for diagnostic that detail the relevant text,
// which can be recovered by StringSpanOfSymbol(const verible::Symbol&).
const verible::Symbol* syntax_origin = nullptr;
// Pointer to the reference node that represents a user-defined type, if
// applicable.
// For built-in and primitive types, this is left as nullptr.
// This will be used to lookup named members of instances/objects of this
// type.
//
// These pointers should point to an element inside a
// DependentReferences::components in the same SymbolTable.
// (Note: to verify this membership at run-time could be expensive because it
// would require checking all DeclarationTypeInfos in the SymbolTable
// hierarchy.)
//
// This pointer must remain stable, even as reference trees grow, which
// mandates reserve()-ing ReferenceComponentNodes' children one-time in
// advance, and only ever moving ReferenceComponents, never copying them.
const ReferenceComponentNode* user_defined_type = nullptr;
// Indicates that this is implicit declaration.
// FIXME(ldk): Check if this could be replaced by user_defined_type pointing
// to default implicit type.
bool implicit = false;
public:
DeclarationTypeInfo() = default;
// copy-able, move-able, assignable
DeclarationTypeInfo(const DeclarationTypeInfo&) = default;
DeclarationTypeInfo(DeclarationTypeInfo&&) = default;
DeclarationTypeInfo& operator=(const DeclarationTypeInfo&) = default;
DeclarationTypeInfo& operator=(DeclarationTypeInfo&&) = default;
// Structural consistency check.
void VerifySymbolTableRoot(const SymbolTableNode* root) const;
};
std::ostream& operator<<(std::ostream&, const DeclarationTypeInfo&);
// This data type holds information about what each SystemVerilog symbol is.
// An alternative implementation could be done using an abstract base class,
// and subclasses for each element type.
struct SymbolInfo {
// What is this symbol? (package, module, type, variable, etc.)
SymbolMetaType metatype;
// This symbol's name is stored in the .Key() of the SymbolTableNode that
// encloses this structure. (This is SymbolTableNode::Value().)
// In which file is this considered "defined"?
// Technically, this can be recovered by looking up a string_view range of
// text in VerilogProject (StringSpanOfSymbol(*syntax_origin)).
const VerilogSourceFile* file_origin = nullptr;
// Pointer to the syntax tree origin.
// An easy way to view this text is StringSpanOfSymbol(*syntax_origin).
// Reminder: Parts of the syntax tree may originate from included files.
const verible::Symbol* syntax_origin = nullptr;
// What is the type associated with this symbol?
// Only applicable to typed elements: variables, nets, instances,
// typedefs, etc.
// This is only set after resolving type references.
DeclarationTypeInfo declared_type;
// Collection of generated scope names that exists for the sake of persistent
// string memory storage (since all other symbol table node keys rely on
// string_views that belong the source file's string memory buffer).
// We cannot use std::string directly due to the move-ability requirements
// along with the possibility of short-string-optimization.
// std::vector is move-able on reallocation and unique_ptr guarantees
// move-stability.
std::vector<std::unique_ptr<const std::string>> anonymous_scope_names;
// TODO(fangism): symbol attributes
// visibility: is this symbol (as a member of its parent) public?
// ports and parameters can be public, whereas local variables are not.
// Is this class-member static or non-static?
// Is this definition complete or only a forward declaration?
// TODO(fangism): imported symbols and namespaces
// Applicable to packages, classes, modules.
// These should be looked up when searching for symbols (without copying).
// For elements with inheritance only: this points to a base class.
// Currently limited to single-inheritance.
// TODO: extend to support multiple-implementation and interface-class
// multiple inheritance.
DeclarationTypeInfo parent_type;
// Collection of references to resolve and bind that appear in the same
// context. There is no sequential ordering dependency among these references,
// theoretically, they could all be resolved in parallel.
// This does not need to be insertion-iterator-stable.
std::vector<DependentReferences> local_references_to_bind;
// TODO(fangism): make this searchable by substring offsets.
// TODO(fangism): cache/memoize the resolution of this node so that resolution
// can be triggered on-demand, and also avoid duplicate work.
public: // methods
SymbolInfo() = default;
// move-only
SymbolInfo(const SymbolInfo&) = delete;
SymbolInfo(SymbolInfo&&) = default;
SymbolInfo& operator=(const SymbolInfo&) = delete;
SymbolInfo& operator=(SymbolInfo&&) = delete;
// Generate a scope name whose string memory lives and moves with this object.
// 'base' is used as part of the generated name.
absl::string_view CreateAnonymousScope(absl::string_view base);
// Attempt to resolve all symbol references.
void Resolve(const SymbolTableNode& context,
std::vector<absl::Status>* diagnostics);
// Attempt to resolve only symbols local to 'context' (no upward search).
void ResolveLocally(const SymbolTableNode& context);
// Internal consistency check.
void VerifySymbolTableRoot(const SymbolTableNode* root) const;
// Show definition info of this symbol.
std::ostream& PrintDefinition(std::ostream& stream, size_t indent = 0) const;
// Show references that are to be resolved starting with this node's scope.
std::ostream& PrintReferences(std::ostream& stream, size_t indent = 0) const;
// Functor to compare string starting address, for positional sorting.
struct StringAddressCompare {
using is_transparent = void; // heterogeneous lookup
static absl::string_view ToString(absl::string_view s) { return s; }
static absl::string_view ToString(const DependentReferences* ref) {
return ref->components->Value().identifier;
}
template <typename L, typename R>
bool operator()(L l, R r) const {
static constexpr std::less<const void*> compare_address;
return compare_address(ToString(l).begin(), ToString(r).begin());
}
};
typedef std::set<const DependentReferences*, StringAddressCompare>
address_ordered_set_type;
typedef std::map<absl::string_view, address_ordered_set_type,
verible::StringViewCompare>
references_map_view_type;
// For testing only, quickly find reference candidates by name, and positional
// occurence. The outer map is ordered by string contents, and the inner
// associative container is ordered by substring memory address as a secondary
// key. The nested map is storage-inefficient (compared to a flat
// std::vector), and is only suitable for testing.
references_map_view_type LocalReferencesMapViewForTesting() const;
};
// This map type represents the global namespace of preprocessor macro
// definitions.
// The string_view key should be a substring of text whose memory is owned by
// any VerilogSourceFile that defines the macro.
// TODO(fangism): This should come from a preprocessor and possibly maintained
// per translation-unit in multi-file compilation mode.
using MacroSymbolMap =
std::map<absl::string_view, SymbolInfo, verible::StringViewCompare>;
// SymbolTable maintains a named hierarchy of named symbols and scopes for
// SystemVerilog. This can be built up separately per translation unit,
// or in a unified table across all translation units.
//
// Typical usage:
// VerilogProject project(...);
// project.OpenTranslationUnit(...); // open files in loop
//
// SymbolTable symbol_table(&project);
//
// std::vector<absl::Status> diagnostics;
// symbol_table.Build(&diagnostics);
// // ... optional work ...
//
// symbol_table.Resolve(&diagnostics);
// // report diagnostics
// // navigate results from symbol_table.Root().
//
class SymbolTable {
public:
class Builder; // implementation detail
class Tester; // test-only
public:
// If 'project' is nullptr, caller assumes responsibility for managing files
// and string memory, otherwise string memory is owned by 'project'.
explicit SymbolTable(VerilogProject* project)
: project_(project),
symbol_table_root_(SymbolInfo{.metatype = SymbolMetaType::kRoot}) {}
// can become move-able when needed
SymbolTable(const SymbolTable&) = delete;
SymbolTable(SymbolTable&&) = delete;
SymbolTable& operator=(const SymbolTable&) = delete;
SymbolTable& operator=(SymbolTable&&) = delete;
~SymbolTable() { CheckIntegrity(); }
const SymbolTableNode& Root() const { return symbol_table_root_; }
const VerilogProject* Project() const { return project_; }
// TODO(fangism): multi-translation-unit merge operation,
// to be done before any symbol resolution
// Incrementally construct the symbol table from one translation unit.
// This gives the user control over the ordering of processing.
// It is safe to build the same unit multiple times, subsequent invocations
// will not change the symbol table structure, but will give duplicate symbol
// diagnostics.
void BuildSingleTranslationUnit(absl::string_view referenced_file_name,
std::vector<absl::Status>* diagnostics);
// Construct symbol table definitions and references hierarchically, but do
// not attempt to resolve the symbols.
// The ordering of translation units processing is implementation defined,
// and should not be relied upon, but this only maatters when there are
// duplicate definitions among translation units.
void Build(std::vector<absl::Status>* diagnostics);
// Lookup all symbol references, and bind references where successful.
// Only attempt to resolve after merging symbol tables.
void Resolve(std::vector<absl::Status>* diagnostics);
// A "weaker" version of Resolve() that only attempts to resolve symbol
// references to definitions belonging to the same scope as the reference
// (without upward search).
// This can dramatically prune the number of unresolved references that
// require root-level resolution.
// No diagnostics are collected because silent no-change-on-failure behavior
// is intended.
void ResolveLocallyOnly();
// Print only the information about symbols defined (no references).
// This will print the results of Build().
std::ostream& PrintSymbolDefinitions(std::ostream&) const;
// Print only the information about symbol references, and possibly resolved
// links to definitions.
// This will print the results of Build() or Resolve().
std::ostream& PrintSymbolReferences(std::ostream&) const;
protected: // methods
// Direct mutation is only intended for the Builder implementation.
SymbolTableNode& MutableRoot() { return symbol_table_root_; }
// Verify internal structural and pointer consistency.
void CheckIntegrity() const;
private: // data
// This owns all files used to construct the symbol table and therefore,
// owns all string_views inside the symbol table and outlives objects of
// this class.
// The project needs to be mutable for the sake of opening `include-d files
// encountered during tree traversal.
// TODO(fangism): once a preprocessor is implemented, includes can be expanded
// before symbol table construction, and this can become read-only.
VerilogProject* const project_;
// Global symbol table root for SystemVerilog language elements:
// modules, packages, classes, tasks, functions, interfaces, etc.
// Known limitation: All of the above elements share the same namespace,
// but the language actually compartmentalizes namespaces of some element
// types.
SymbolTableNode symbol_table_root_;
// All macro definitions/references interact through this global namespace.
MacroSymbolMap macro_symbols_;
};
// Construct a partial symbol table and bindings locations from a single source
// file. This does not actually resolve symbol references, there is an
// opportunity to merge symbol tables across files before resolving references.
// 'source' should already be Parse()d in advance, so that its syntax tree can
// be accessed.
// If 'project' is provided, then it can be used to open preprocessing-included
// files, otherwise include directives will be ignored.
std::vector<absl::Status> BuildSymbolTable(const VerilogSourceFile& source,
SymbolTable* symbol_table,
VerilogProject* project = nullptr);
} // namespace verilog
#endif // VERIBLE_VERILOG_ANALYSIS_SYMBOL_TABLE_H_