From c13cf115c772fb2647d86f8d6a43221aa21ee322 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 23 Dec 2022 23:41:18 +0800 Subject: [PATCH 01/45] file based filter --- refresh.template.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 03f28ec..f37618c 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -835,7 +835,9 @@ def _get_commands(target: str, flags: str): # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") - additional_flags = shlex.split(flags) + sys.argv[1:] + additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] + file_flags = [arg for arg in sys.argv[1:] if arg.startswith('--file=')] + assert len(file_flags) < 2, f"Only one or zero --file is supported current args = {sys.argv[1:]}" # Detect anything that looks like a build target in the flags, and issue a warning. # Note that positional arguments after -- are all interpreted as target patterns. (If it's at the end, then no worries.) @@ -858,6 +860,18 @@ def _get_commands(target: str, flags: str): if {exclude_external_sources}: # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. target_statment = f"filter('^(//|@//)',{target_statment})" + if (len(file_flags) == 1): + # Strip --file= + file_path = file_flags[0][7:] + # Query escape + file_path = file_path.replace("+", "\+").replace("-", "\-") + # For header file we try to find from hdrs and srcs to get the targets + if file_path.endswith('.h'): + # Since attr function can't query with full path, get the file name to query + head, tail = os.path.split(file_path) + target_statment = f"attr(hdrs, '{tail}', {target_statment}) + attr(srcs, '{tail}', {target_statment})" + else: + target_statment = f"inputs('{file_path}', {target_statment})" aquery_args = [ 'bazel', 'aquery', From 0ea3fd662a54bdb835107f65850f9fb0bf737d8c Mon Sep 17 00:00:00 2001 From: Christopher Peterson Sauer Date: Fri, 23 Dec 2022 16:53:22 -0800 Subject: [PATCH 02/45] Unify --file parsing logic Promote multiple --file assert to log_error Why? It's a usage error we're communicating to the user, not a code invariant we're checking in debug mode. We could recover, but we don't, because we expect only automated usage and want to give clear feedback. More pythonic if Improve my --file flag messaging consistency Call library to escape regex Helps avoid edge cases! Also handles the most common special character, '.' Add an error to catch unsupported --file form Tweak -- logic now that --file arguments could be after --file docs Make aquery spacing consistent Flip the if statement in constructor target_statment Use let to combine attr query Merge compile_commands.json if --file Small simplification: endswith already can take a tuple of possibilities (as elsewhere) Small simplification of header path for file flag. (This code section likely to be replaced later, though.) Fix my typo on --file= discoverable docs Some tweaks to merge Adds note about behavior, some edge case handling around other flags that might start with --file, permissions issues, etc. --- refresh.template.py | 49 +++++++++++++++++++++++------------- refresh_compile_commands.bzl | 2 ++ 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index f37618c..4b16402 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -3,8 +3,9 @@ Interface (after template expansion): - `bazel run` to regenerate compile_commands.json, so autocomplete (and any other clang tooling!) reflect the latest Bazel build files. - - No arguments are needed; info from the rule baked into the template expansion. + - No arguments are needed; info from the rule is baked into the template expansion. - Any arguments passed are interpreted as arguments needed for the builds being analyzed. + - The one exception is --file=, which can be used to update commands for just one file. This is intended for programmatic use from editor plugins. - Requires being run under Bazel so we can access the workspace root environment variable. - Output: a compile_commands.json in the workspace root that clang tooling (or you!) can look at to figure out how files are being compiled by Bazel - Crucially, this output is de-Bazeled; The result is a command that could be run from the workspace root directly, with no Bazel-specific requirements, environment variables, etc. @@ -835,14 +836,20 @@ def _get_commands(target: str, flags: str): # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") + # Pass along all arguments to aquery, except for --file= additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] - file_flags = [arg for arg in sys.argv[1:] if arg.startswith('--file=')] - assert len(file_flags) < 2, f"Only one or zero --file is supported current args = {sys.argv[1:]}" + file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')] + if len(file_flags) > 1: + log_error(">>> At most one --file flag is supported.") + sys.exit(1) + if any(arg.startswith('--file') for arg in additional_flags): + log_error(">>> Only the --file= form is supported.") + sys.exit(1) # Detect anything that looks like a build target in the flags, and issue a warning. - # Note that positional arguments after -- are all interpreted as target patterns. (If it's at the end, then no worries.) + # Note that positional arguments after -- are all interpreted as target patterns. # And that we have to look for targets. checking for a - prefix is not enough. Consider the case of `-c opt` leading to a false positive - if ('--' in additional_flags[:-1] + if ('--' in additional_flags or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)): log_warning(""">>> The flags you passed seem to contain targets. Try adding them as targets in your refresh_compile_commands rather than flags. @@ -860,25 +867,22 @@ def _get_commands(target: str, flags: str): if {exclude_external_sources}: # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. target_statment = f"filter('^(//|@//)',{target_statment})" - if (len(file_flags) == 1): - # Strip --file= - file_path = file_flags[0][7:] - # Query escape - file_path = file_path.replace("+", "\+").replace("-", "\-") - # For header file we try to find from hdrs and srcs to get the targets - if file_path.endswith('.h'): - # Since attr function can't query with full path, get the file name to query - head, tail = os.path.split(file_path) - target_statment = f"attr(hdrs, '{tail}', {target_statment}) + attr(srcs, '{tail}', {target_statment})" + if file_flags: + file_path = file_flags[0] + if file_path.endswith(_get_files.source_extensions): + target_statment = f"inputs('{re.escape(file_path)}', {target_statment})" else: - target_statment = f"inputs('{file_path}', {target_statment})" + # For header files we try to find from hdrs and srcs to get the targets + # Since attr function can't query with full path, get the file name to query + fname = os.path.basename(file_path) + target_statment = f"let v = {target_statment} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" aquery_args = [ 'bazel', 'aquery', # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto # One bummer, not described in the docs, is that aquery filters over *all* actions for a given target, rather than just those that would be run by a build to produce a given output. This mostly isn't a problem, but can sometimes surface extra, unnecessary, misconfigured actions. Chris has emailed the authors to discuss and filed an issue so anyone reading this could track it: https://github.com/bazelbuild/bazel/issues/14156. - f"mnemonic('(Objc|Cpp)Compile',{target_statment})", + f"mnemonic('(Objc|Cpp)Compile', {target_statment})", # We switched to jsonproto instead of proto because of https://github.com/bazelbuild/bazel/issues/13404. We could change back when fixed--reverting most of the commit that added this line and tweaking the build file to depend on the target in that issue. That said, it's kinda nice to be free of the dependency, unless (OPTIMNOTE) jsonproto becomes a performance bottleneck compated to binary protos. '--output=jsonproto', # We'll disable artifact output for efficiency, since it's large and we don't use them. Small win timewise, but dramatically less json output from aquery. @@ -1081,6 +1085,17 @@ def _ensure_cwd_is_workspace_root(): log_error(""">>> Not (over)writing compile_commands.json, since no commands were extracted and an empty file is of no use. There should be actionable warnings, above, that led to this.""") sys.exit(1) + # --file triggers incremental update of compile_commands.json + if any(arg.startswith('--file=') for arg in sys.argv[1:]) and os.path.isfile('compile_commands.json'): + previous_compile_command_entries = [] + try: + with open('compile_commands.json') as compile_commands_file: + previous_compile_command_entries = json.load(compile_commands_file) + except: + log_warning(">>> Couldn't read previous compile_commands.json. Overwriting instead of merging...") + else: + updated_files = set(entry['file'] for entry in compile_command_entries) + compile_command_entries += [entry for entry in previous_compile_command_entries if entry['file'] not in updated_files] # Chain output into compile_commands.json with open('compile_commands.json', 'w') as output_file: diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index b9c5d32..70cf8f5 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -49,6 +49,8 @@ refresh_compile_commands( # exclude_headers = "external", # Still not fast enough? # Make sure you're specifying just the targets you care about by setting `targets`, above. + # That's still not enough; I'm working on a huge codebase! + # This tool supports a fast, incremental mode that can be used to add/update commands as individual files are opened. If you'd be willing to collaborate on writing a simple editor plugin invokes this tool on file open, please write in! (And see --file flag in refresh.template.py) ``` """ From 4244747c788fea5dd99a012aa3e5dcda584bb7b1 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sun, 19 Mar 2023 03:37:40 +0800 Subject: [PATCH 03/45] file based filter: deal with header lib --- refresh.template.py | 172 +++++++++++++++++++++++++------------------- 1 file changed, 98 insertions(+), 74 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 4b16402..57f257e 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -832,7 +832,75 @@ def _convert_compile_commands(aquery_output): def _get_commands(target: str, flags: str): - """Yields compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" + """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" + def _get_commands(target_statment): + aquery_args = [ + 'bazel', + 'aquery', + # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html + # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto + # One bummer, not described in the docs, is that aquery filters over *all* actions for a given target, rather than just those that would be run by a build to produce a given output. This mostly isn't a problem, but can sometimes surface extra, unnecessary, misconfigured actions. Chris has emailed the authors to discuss and filed an issue so anyone reading this could track it: https://github.com/bazelbuild/bazel/issues/14156. + f"mnemonic('(Objc|Cpp)Compile', {target_statment})", + # We switched to jsonproto instead of proto because of https://github.com/bazelbuild/bazel/issues/13404. We could change back when fixed--reverting most of the commit that added this line and tweaking the build file to depend on the target in that issue. That said, it's kinda nice to be free of the dependency, unless (OPTIMNOTE) jsonproto becomes a performance bottleneck compated to binary protos. + '--output=jsonproto', + # We'll disable artifact output for efficiency, since it's large and we don't use them. Small win timewise, but dramatically less json output from aquery. + '--include_artifacts=false', + # Shush logging. Just for readability. + '--ui_event_filters=-info', + '--noshow_progress', + # Disable param files, which would obscure compile actions + # Mostly, people enable param files on Windows to avoid the relatively short command length limit. + # For more, see compiler_param_file in https://bazel.build/docs/windows + # They are, however, technically supported on other platforms/compilers. + # That's all well and good, but param files would prevent us from seeing compile actions before the param files had been generated by compilation. + # Since clangd has no such length limit, we'll disable param files for our aquery run. + '--features=-compiler_param_file', + # Disable layering_check during, because it causes large-scale dependence on generated module map files that prevent header extraction before their generation + # For more context, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/83 + # If https://github.com/clangd/clangd/issues/123 is resolved and we're not doing header extraction, we could try removing this, checking that there aren't erroneous red squigglies squigglies before the module maps are generated. + # If Bazel starts supporting modules (https://github.com/bazelbuild/bazel/issues/4005), we'll probably need to make changes that subsume this. + '--features=-layering_check', + ] + additional_flags + + aquery_process = subprocess.run( + aquery_args, + # MIN_PY=3.7: Replace PIPEs with capture_output. + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding=locale.getpreferredencoding(), + check=False, # We explicitly ignore errors from `bazel aquery` and carry on. + ) + + + # Filter aquery error messages to just those the user should care about. + # Shush known warnings about missing graph targets. + # The missing graph targets are not things we want to introspect anyway. + # Tracking issue https://github.com/bazelbuild/bazel/issues/13007 + missing_targets_warning: typing.Pattern[str] = re.compile(r'(\(\d+:\d+:\d+\) )?(\033\[[\d;]+m)?WARNING: (\033\[[\d;]+m)?Targets were missing from graph:') # Regex handles --show_timestamps and --color=yes. Could use "in" if we ever need more flexibility. + aquery_process.stderr = '\n'.join(line for line in aquery_process.stderr.splitlines() if not missing_targets_warning.match(line)) + if aquery_process.stderr: print(aquery_process.stderr, file=sys.stderr) + + # Parse proto output from aquery + try: + # object_hook -> SimpleNamespace allows object.member syntax, like a proto, while avoiding the protobuf dependency + parsed_aquery_output = json.loads(aquery_process.stdout, object_hook=lambda d: types.SimpleNamespace(**d)) + except json.JSONDecodeError: + print("Bazel aquery failed. Command:", aquery_args, file=sys.stderr) + log_warning(f">>> Failed extracting commands for {target}\n Continuing gracefully...") + return [] + + if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) + if aquery_process.stderr: + log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. + Continuing gracefully...""") + else: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). + Continuing gracefully...""") + return [] + + return _convert_compile_commands(parsed_aquery_output) + # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") @@ -862,90 +930,46 @@ def _get_commands(target: str, flags: str): Try adding them as flags in your refresh_compile_commands rather than targets. In a moment, Bazel will likely fail to parse.""") + compile_commands = [] # First, query Bazel's C-family compile actions for that configured target target_statment = f'deps({target})' - if {exclude_external_sources}: - # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. - target_statment = f"filter('^(//|@//)',{target_statment})" + if file_flags: file_path = file_flags[0] - if file_path.endswith(_get_files.source_extensions): - target_statment = f"inputs('{re.escape(file_path)}', {target_statment})" + found = False + target_statment_canidates = [] + if file_flags[0].endswith(_get_files.source_extensions): + target_statment_canidates.append(f"inputs('{re.escape(file_path)}', {target_statment})") else: - # For header files we try to find from hdrs and srcs to get the targets - # Since attr function can't query with full path, get the file name to query fname = os.path.basename(file_path) - target_statment = f"let v = {target_statment} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" - aquery_args = [ - 'bazel', - 'aquery', - # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html - # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto - # One bummer, not described in the docs, is that aquery filters over *all* actions for a given target, rather than just those that would be run by a build to produce a given output. This mostly isn't a problem, but can sometimes surface extra, unnecessary, misconfigured actions. Chris has emailed the authors to discuss and filed an issue so anyone reading this could track it: https://github.com/bazelbuild/bazel/issues/14156. - f"mnemonic('(Objc|Cpp)Compile', {target_statment})", - # We switched to jsonproto instead of proto because of https://github.com/bazelbuild/bazel/issues/13404. We could change back when fixed--reverting most of the commit that added this line and tweaking the build file to depend on the target in that issue. That said, it's kinda nice to be free of the dependency, unless (OPTIMNOTE) jsonproto becomes a performance bottleneck compated to binary protos. - '--output=jsonproto', - # We'll disable artifact output for efficiency, since it's large and we don't use them. Small win timewise, but dramatically less json output from aquery. - '--include_artifacts=false', - # Shush logging. Just for readability. - '--ui_event_filters=-info', - '--noshow_progress', - # Disable param files, which would obscure compile actions - # Mostly, people enable param files on Windows to avoid the relatively short command length limit. - # For more, see compiler_param_file in https://bazel.build/docs/windows - # They are, however, technically supported on other platforms/compilers. - # That's all well and good, but param files would prevent us from seeing compile actions before the param files had been generated by compilation. - # Since clangd has no such length limit, we'll disable param files for our aquery run. - '--features=-compiler_param_file', - # Disable layering_check during, because it causes large-scale dependence on generated module map files that prevent header extraction before their generation - # For more context, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/83 - # If https://github.com/clangd/clangd/issues/123 is resolved and we're not doing header extraction, we could try removing this, checking that there aren't erroneous red squigglies squigglies before the module maps are generated. - # If Bazel starts supporting modules (https://github.com/bazelbuild/bazel/issues/4005), we'll probably need to make changes that subsume this. - '--features=-layering_check', - ] + additional_flags - - aquery_process = subprocess.run( - aquery_args, - # MIN_PY=3.7: Replace PIPEs with capture_output. - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding=locale.getpreferredencoding(), - check=False, # We explicitly ignore errors from `bazel aquery` and carry on. - ) - - - # Filter aquery error messages to just those the user should care about. - # Shush known warnings about missing graph targets. - # The missing graph targets are not things we want to introspect anyway. - # Tracking issue https://github.com/bazelbuild/bazel/issues/13007 - missing_targets_warning: typing.Pattern[str] = re.compile(r'(\(\d+:\d+:\d+\) )?(\033\[[\d;]+m)?WARNING: (\033\[[\d;]+m)?Targets were missing from graph:') # Regex handles --show_timestamps and --color=yes. Could use "in" if we ever need more flexibility. - aquery_process.stderr = '\n'.join(line for line in aquery_process.stderr.splitlines() if not missing_targets_warning.match(line)) - if aquery_process.stderr: print(aquery_process.stderr, file=sys.stderr) - - # Parse proto output from aquery - try: - # object_hook -> SimpleNamespace allows object.member syntax, like a proto, while avoiding the protobuf dependency - parsed_aquery_output = json.loads(aquery_process.stdout, object_hook=lambda d: types.SimpleNamespace(**d)) - except json.JSONDecodeError: - print("Bazel aquery failed. Command:", aquery_args, file=sys.stderr) - log_warning(f">>> Failed extracting commands for {target}\n Continuing gracefully...") - return - - if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) - if aquery_process.stderr: - log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. - Continuing gracefully...""") - else: + target_statment_canidates.extend([ + f"let v = {target_statment} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)", + f"inputs('{re.escape(file_path)}', {target_statment})", + f'deps({target})', + ]) + + for target_statment in target_statment_canidates: + compile_commands.extend( _get_commands(target_statment)) + if any(command['file'].endswith(file_path) for command in reversed(compile_commands)): + found = True + break + if not found: + log_warning(f""">>> Bazel lists no applicable compile commands for {file_path} in {target}. + Continuing gracefully...""") + else: + if {exclude_external_sources}: + # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. + target_statment = f"filter('^(//|@//)',{target_statment})" + compile_commands.extend(_get_commands(target_statment)) + if len(compile_commands) == 0: log_warning(f""">>> Bazel lists no applicable compile commands for {target} - If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). - Continuing gracefully...""") - return - - yield from _convert_compile_commands(parsed_aquery_output) + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). + Continuing gracefully...""") # Log clear completion messages log_success(f">>> Finished extracting commands for {target}") + return compile_commands def _ensure_external_workspaces_link_exists(): From 0befc0907e28860649ca485806573d4c5ccf8139 Mon Sep 17 00:00:00 2001 From: snorlax Date: Fri, 3 Mar 2023 21:50:59 +0800 Subject: [PATCH 04/45] End loop if header is found & Add max attempts limit --- refresh.template.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 57f257e..e8b2dd6 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -37,6 +37,8 @@ import time import types import typing # MIN_PY=3.9: Switch e.g. typing.List[str] -> list[str] +import threading +import itertools @enum.unique @@ -182,6 +184,7 @@ def _get_cached_adjusted_modified_time(path: str): # Roughly 1 year into the future. This is safely below bazel's 10 year margin, but large enough that no sane normal file should be past this. BAZEL_INTERNAL_SOURCE_CUTOFF = time.time() + 60*60*24*365 +BAZEL_INTERNAL_MAX_HEADER_SEARCH_COUNT = 500 def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_key: str): """Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.) @@ -766,11 +769,15 @@ def _all_platform_patch(compile_args: typing.List[str]): return compile_args -def _get_cpp_command_for_files(compile_action): +def _get_cpp_command_for_files(args): """Reformat compile_action into a compile command clangd can understand. Undo Bazel-isms and figures out which files clangd should apply the command to. """ + (compile_action, event, should_stop_lambda) = args + if event.is_set(): + return set(), set(), [] + # Patch command by platform compile_action.arguments = _all_platform_patch(compile_action.arguments) compile_action.arguments = _apple_platform_patch(compile_action.arguments) @@ -778,10 +785,12 @@ def _get_cpp_command_for_files(compile_action): source_files, header_files = _get_files(compile_action) + if not event.is_set() and should_stop_lambda(source_files, header_files): + event.set() return source_files, header_files, compile_action.arguments -def _convert_compile_commands(aquery_output): +def _convert_compile_commands(aquery_output, should_stop_lambda): """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries. Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) compile actions for a given build. @@ -805,8 +814,8 @@ def _convert_compile_commands(aquery_output): with concurrent.futures.ThreadPoolExecutor( max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor ) as threadpool: - outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions) - + event = threading.Event() + outputs = threadpool.map(_get_cpp_command_for_files, map(lambda action: (action, event, should_stop_lambda), aquery_output.actions)) # Yield as compile_commands.json entries header_files_already_written = set() for source_files, header_files, compile_command_args in outputs: @@ -833,7 +842,19 @@ def _convert_compile_commands(aquery_output): def _get_commands(target: str, flags: str): """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" - def _get_commands(target_statment): + lock = threading.RLock() + counter = itertools.count() + def _should_stop(headers, file_path): + if file_path: + with lock: + tried_count = next(counter) + if tried_count >= BAZEL_INTERNAL_MAX_HEADER_SEARCH_COUNT: + log_warning(f""">>> Bazel lists no applicable compile commands for {file_path} in {target} under {tried_count} Attempt.""") + return True + return any(header.endswith(file_path) for header in headers) + return False + + def _get_commands(target_statment, file_path): aquery_args = [ 'bazel', 'aquery', @@ -899,7 +920,7 @@ def _get_commands(target_statment): Continuing gracefully...""") return [] - return _convert_compile_commands(parsed_aquery_output) + return _convert_compile_commands(parsed_aquery_output, lambda _, headers: _should_stop(headers, file_path)) # Log clear completion messages log_info(f">>> Analyzing commands used in {target}") @@ -949,7 +970,7 @@ def _get_commands(target_statment): ]) for target_statment in target_statment_canidates: - compile_commands.extend( _get_commands(target_statment)) + compile_commands.extend( _get_commands(target_statment, file_path)) if any(command['file'].endswith(file_path) for command in reversed(compile_commands)): found = True break @@ -960,7 +981,7 @@ def _get_commands(target_statment): if {exclude_external_sources}: # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. target_statment = f"filter('^(//|@//)',{target_statment})" - compile_commands.extend(_get_commands(target_statment)) + compile_commands.extend(_get_commands(target_statment, None)) if len(compile_commands) == 0: log_warning(f""">>> Bazel lists no applicable compile commands for {target} If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). From 92e83c177f5aa7ff1b95e16375fd2610836976ab Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Sat, 18 Mar 2023 16:03:05 -0700 Subject: [PATCH 05/45] Improve my language for --file docstring --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index e8b2dd6..beed66f 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -5,7 +5,7 @@ - `bazel run` to regenerate compile_commands.json, so autocomplete (and any other clang tooling!) reflect the latest Bazel build files. - No arguments are needed; info from the rule is baked into the template expansion. - Any arguments passed are interpreted as arguments needed for the builds being analyzed. - - The one exception is --file=, which can be used to update commands for just one file. This is intended for programmatic use from editor plugins. + - The one exception is --file=, which can be used to incrementally update commands, focused around that one file. This is intended for programmatic use from editor plugins. - Requires being run under Bazel so we can access the workspace root environment variable. - Output: a compile_commands.json in the workspace root that clang tooling (or you!) can look at to figure out how files are being compiled by Bazel - Crucially, this output is de-Bazeled; The result is a command that could be run from the workspace root directly, with no Bazel-specific requirements, environment variables, etc. From bf0405e0fb32fcef2fcf2cdcdadff0da060428c8 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Sat, 18 Mar 2023 16:03:29 -0700 Subject: [PATCH 06/45] rm dupe itertools import --- refresh.template.py | 1 - 1 file changed, 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index beed66f..b309c9e 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -38,7 +38,6 @@ import types import typing # MIN_PY=3.9: Switch e.g. typing.List[str] -> list[str] import threading -import itertools @enum.unique From 9d7f9ab3f6371aefd2d2c08c5c78fee4c9d3d8fe Mon Sep 17 00:00:00 2001 From: Snorlax Date: Mon, 27 Mar 2023 18:45:37 +0800 Subject: [PATCH 07/45] use variable above --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index b309c9e..061d88e 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -958,7 +958,7 @@ def _get_commands(target_statment, file_path): file_path = file_flags[0] found = False target_statment_canidates = [] - if file_flags[0].endswith(_get_files.source_extensions): + if file_path.endswith(_get_files.source_extensions): target_statment_canidates.append(f"inputs('{re.escape(file_path)}', {target_statment})") else: fname = os.path.basename(file_path) From 9903410b0f21124d6628844d96442028195bcf0f Mon Sep 17 00:00:00 2001 From: Snorlax Date: Mon, 27 Mar 2023 18:47:12 +0800 Subject: [PATCH 08/45] check file if resolved in a smaller scope --- refresh.template.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 061d88e..9b250ce 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -969,8 +969,10 @@ def _get_commands(target_statment, file_path): ]) for target_statment in target_statment_canidates: - compile_commands.extend( _get_commands(target_statment, file_path)) - if any(command['file'].endswith(file_path) for command in reversed(compile_commands)): + commands = _get_commands(target_statment, file_path) + # Prevent waste and return them + compile_commands.extend(commands) + if any(command['file'].endswith(file_path) for command in commands): found = True break if not found: From d7e1e31306a211764094b065b92e8cfa7e35ccbe Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Mon, 24 Apr 2023 16:18:09 -0700 Subject: [PATCH 09/45] Improve --file editor wrapper message --- refresh_compile_commands.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 70cf8f5..2e853d2 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -50,7 +50,7 @@ refresh_compile_commands( # Still not fast enough? # Make sure you're specifying just the targets you care about by setting `targets`, above. # That's still not enough; I'm working on a huge codebase! - # This tool supports a fast, incremental mode that can be used to add/update commands as individual files are opened. If you'd be willing to collaborate on writing a simple editor plugin invokes this tool on file open, please write in! (And see --file flag in refresh.template.py) + # This tool supports a fast, incremental mode that can be used to add/update commands as individual files are opened. Let's work together to get it integrated into editors, invoking this tool on file open. Please file an issue to let us know about your interest--or willingness to help (https://github.com/hedronvision/bazel-compile-commands-extractor/issues/new)! (See also: --file flag in refresh.template.py) ``` """ From d6f60cb84211445b273d15f0f358af3f08e52f60 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Mon, 24 Apr 2023 16:40:55 -0700 Subject: [PATCH 10/45] Fix no output error message and spacing. Also removes my out of date comment where code is self-explanatory. --- refresh.template.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 9b250ce..fb258f6 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -1128,9 +1128,9 @@ def _ensure_cwd_is_workspace_root(): compile_command_entries.extend(_get_commands(target, flags)) if not compile_command_entries: - log_error(""">>> Not (over)writing compile_commands.json, since no commands were extracted and an empty file is of no use. - There should be actionable warnings, above, that led to this.""") + log_error(">>> Not writing to compile_commands.json, since no commands were extracted.") sys.exit(1) + # --file triggers incremental update of compile_commands.json if any(arg.startswith('--file=') for arg in sys.argv[1:]) and os.path.isfile('compile_commands.json'): previous_compile_command_entries = [] @@ -1143,7 +1143,6 @@ def _ensure_cwd_is_workspace_root(): updated_files = set(entry['file'] for entry in compile_command_entries) compile_command_entries += [entry for entry in previous_compile_command_entries if entry['file'] not in updated_files] - # Chain output into compile_commands.json with open('compile_commands.json', 'w') as output_file: json.dump( compile_command_entries, From 2b69f05749bde0751d3ea51a50599ea3d9a93cd5 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Mon, 1 May 2023 17:56:39 -0700 Subject: [PATCH 11/45] Fix small old typo --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index fb258f6..10f0aa9 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -1008,7 +1008,7 @@ def _ensure_external_workspaces_link_exists(): # Traverse into output_base via bazel-out, keeping the workspace position-independent, so it can be moved without rerunning dest = pathlib.Path('bazel-out/../../../external') if is_windows: - # On Windows, unfortunately, bazel-out is a junction, and acessing .. of a junction brings you back out the way you came. So we have to resolve bazel-out first. Not position-independent, but I think the best we can do + # On Windows, unfortunately, bazel-out is a junction, and accessing .. of a junction brings you back out the way you came. So we have to resolve bazel-out first. Not position-independent, but I think the best we can do dest = (pathlib.Path('bazel-out').resolve()/'../../../external').resolve() # Handle problem cases where //external exists From 242d6014296d89d4bc581fd604e0ab40bbbcaa0d Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Mon, 1 May 2023 19:48:21 -0700 Subject: [PATCH 12/45] Prevent template substitution in comment --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 10f0aa9..ac75fff 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -479,7 +479,7 @@ def _get_headers(compile_action, source_path: str): return set() elif {exclude_headers} == "external" and not {exclude_external_sources} and compile_action.is_external: # Shortcut - an external action can't include headers in the workspace (or, non-external headers) - # The `not {exclude_external_sources}`` clause makes sure is_external was precomputed; there are no external actions if they've already been filtered in the process of excluding external sources. + # The `not exclude_external_sources` clause makes sure is_external was precomputed; there are no external actions if they've already been filtered in the process of excluding external sources. return set() output_file = None From e17428fe9799bbff405e238bc158462707c19c32 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Mon, 1 May 2023 20:27:28 -0700 Subject: [PATCH 13/45] ws --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index ac75fff..5ce1475 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -832,7 +832,7 @@ def _convert_compile_commands(aquery_output, should_stop_lambda): yield { # Docs about compile_commands.json format: https://clang.llvm.org/docs/JSONCompilationDatabase.html#format 'file': file, - # Using `arguments' instead of 'command' because it's now preferred by clangd. Heads also that shlex.join doesn't work for windows cmd, so you'd need to use windows_list2cmdline if we ever switched back. For more, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/8#issuecomment-1090262263 + # Using `arguments' instead of 'command' because it's now preferred by clangd. Heads also that shlex.join doesn't work for windows cmd, so you'd need to use windows_list2cmdline if we ever switched back. For more, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/8#issuecomment-1090262263 'arguments': compile_command_args, # Bazel gotcha warning: If you were tempted to use `bazel info execution_root` as the build working directory for compile_commands...search ImplementationReadme.md to learn why that breaks. 'directory': os.environ["BUILD_WORKSPACE_DIRECTORY"], From 5be98b188cfbf370b02247b856288c25d85a66aa Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Mon, 1 May 2023 20:28:44 -0700 Subject: [PATCH 14/45] Further improve --file editor wrapper message --- refresh_compile_commands.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 2e853d2..b298e24 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -50,7 +50,7 @@ refresh_compile_commands( # Still not fast enough? # Make sure you're specifying just the targets you care about by setting `targets`, above. # That's still not enough; I'm working on a huge codebase! - # This tool supports a fast, incremental mode that can be used to add/update commands as individual files are opened. Let's work together to get it integrated into editors, invoking this tool on file open. Please file an issue to let us know about your interest--or willingness to help (https://github.com/hedronvision/bazel-compile-commands-extractor/issues/new)! (See also: --file flag in refresh.template.py) + # This tool supports a fast, incremental mode that can be used to add/update commands for individual files. Let's work together to configure editors to use it, invoking this tool on file open and BUILD-file save. Please file an issue to let us know about your interest--or willingness to help (https://github.com/hedronvision/bazel-compile-commands-extractor/issues/new)! (See also: --file flag in refresh.template.py) ``` """ From 2e980ab8f440ef60e0563cc0a81fe9e853f5b138 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 13:47:54 -0700 Subject: [PATCH 15/45] Alphabetize threading import --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 5ce1475..8877bcd 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -34,10 +34,10 @@ import shutil import subprocess import tempfile +import threading import time import types import typing # MIN_PY=3.9: Switch e.g. typing.List[str] -> list[str] -import threading @enum.unique From 8d5f2fe644208ac658ad2a369e3299c5617f1212 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 16:44:56 -0700 Subject: [PATCH 16/45] Document interaction of --file and exclude_* template arguments --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 8877bcd..7997d8c 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -5,7 +5,7 @@ - `bazel run` to regenerate compile_commands.json, so autocomplete (and any other clang tooling!) reflect the latest Bazel build files. - No arguments are needed; info from the rule is baked into the template expansion. - Any arguments passed are interpreted as arguments needed for the builds being analyzed. - - The one exception is --file=, which can be used to incrementally update commands, focused around that one file. This is intended for programmatic use from editor plugins. + - The one exception is --file=, which can be used to incrementally add/update commands, focused around that one file. This is intended to be auto-called by editor plugins. It overrides any exclude_* template arguments enough that you can get a command for the file you asked about. - Requires being run under Bazel so we can access the workspace root environment variable. - Output: a compile_commands.json in the workspace root that clang tooling (or you!) can look at to figure out how files are being compiled by Bazel - Crucially, this output is de-Bazeled; The result is a command that could be run from the workspace root directly, with no Bazel-specific requirements, environment variables, etc. From e288d49b83548d601247792b0a8ef0f4b8e11111 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 16:52:14 -0700 Subject: [PATCH 17/45] Entangled cleanups around thread structure Move to timeout instead of dynamically computed depth cap, unwinding stop lambda. Un-nests the aquery call function for readibility and a smaller diff Starts marking potential problems to resolve with TODO --- refresh.template.py | 183 ++++++++++++++++++++++---------------------- 1 file changed, 91 insertions(+), 92 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 7997d8c..d4b32dc 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -183,7 +183,6 @@ def _get_cached_adjusted_modified_time(path: str): # Roughly 1 year into the future. This is safely below bazel's 10 year margin, but large enough that no sane normal file should be past this. BAZEL_INTERNAL_SOURCE_CUTOFF = time.time() + 60*60*24*365 -BAZEL_INTERNAL_MAX_HEADER_SEARCH_COUNT = 500 def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_key: str): """Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.) @@ -464,7 +463,7 @@ def _file_is_in_main_workspace_and_not_external(file_str: str): return True -def _get_headers(compile_action, source_path: str): +def _get_headers(compile_action, source_path: str, found_header_focused_upon: threading.Event, focused_on_file: str = None): """Gets the headers used by a particular compile command. Relatively slow. Requires running the C preprocessor. @@ -560,7 +559,7 @@ def _get_headers(compile_action, source_path: str): _get_headers.has_logged = False -def _get_files(compile_action): +def _get_files(compile_action, found_header_focused_upon: threading.Event, focused_on_file: str = None): """Gets the ({source files}, {header files}) clangd should be told the command applies to.""" # Getting the source file is a little trickier than it might seem. @@ -620,7 +619,7 @@ def _get_files(compile_action): if os.path.splitext(source_file)[1] in _get_files.assembly_source_extensions: return {source_file}, set() - header_files = _get_headers(compile_action, source_file) + header_files = _get_headers(compile_action, source_file, found_header_focused_upon, focused_on_file) # Ambiguous .h headers need a language specified if they aren't C, or clangd sometimes makes mistakes # Delete this and unused extension variables when clangd >= 16 is released, since their underlying issues are resolved at HEAD @@ -768,28 +767,22 @@ def _all_platform_patch(compile_args: typing.List[str]): return compile_args -def _get_cpp_command_for_files(args): +def _get_cpp_command_for_files(compile_action, found_header_focused_upon: threading.Event, focused_on_file: str = None): """Reformat compile_action into a compile command clangd can understand. Undo Bazel-isms and figures out which files clangd should apply the command to. """ - (compile_action, event, should_stop_lambda) = args - if event.is_set(): - return set(), set(), [] - # Patch command by platform compile_action.arguments = _all_platform_patch(compile_action.arguments) compile_action.arguments = _apple_platform_patch(compile_action.arguments) # Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed. - source_files, header_files = _get_files(compile_action) + source_files, header_files = _get_files(compile_action, found_header_focused_upon, focused_on_file) - if not event.is_set() and should_stop_lambda(source_files, header_files): - event.set() return source_files, header_files, compile_action.arguments -def _convert_compile_commands(aquery_output, should_stop_lambda): +def _convert_compile_commands(aquery_output, focused_on_file: str = None): """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries. Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) compile actions for a given build. @@ -813,8 +806,24 @@ def _convert_compile_commands(aquery_output, should_stop_lambda): with concurrent.futures.ThreadPoolExecutor( max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor ) as threadpool: - event = threading.Event() - outputs = threadpool.map(_get_cpp_command_for_files, map(lambda action: (action, event, should_stop_lambda), aquery_output.actions)) + found_header_focused_upon = threading.Event() # MIN_PY=3.9: Consider replacing with threadpool.shutdown(cancel_futures=True), possibly also eliminating threading import + output_iterator = threadpool.map( + lambda compile_action: _get_cpp_command_for_files(compile_action, found_header_focused_upon, focused_on_file), # Binding along side inputs + aquery_output.actions, + timeout=3 if focused_on_file else None # If running in fast, interactive mode with --file, we need to cap latency. + ) + # Collect outputs, tolerating any timeouts + outputs = [] + try: + for output in output_iterator: + outputs.append(output) + except concurrent.futures.TimeoutError: + assert focused_on_file, "Timeout should only have been set in the fast, interactive --file mode, focused on a single file." + if not found_header_focused_upon.is_set(): + log_warning(f""">>> Timed out looking for a command for {focused_on_file} + If that's a source file, please report this. We should work to improve the performance. + If that's a header file, we should probably do the same, but it may be unavoidable.""") + # Yield as compile_commands.json entries header_files_already_written = set() for source_files, header_files, compile_command_args in outputs: @@ -839,89 +848,79 @@ def _convert_compile_commands(aquery_output, should_stop_lambda): } -def _get_commands(target: str, flags: str): - """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" - lock = threading.RLock() - counter = itertools.count() - def _should_stop(headers, file_path): - if file_path: - with lock: - tried_count = next(counter) - if tried_count >= BAZEL_INTERNAL_MAX_HEADER_SEARCH_COUNT: - log_warning(f""">>> Bazel lists no applicable compile commands for {file_path} in {target} under {tried_count} Attempt.""") - return True - return any(header.endswith(file_path) for header in headers) - return False +def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aquery_flags: typing.List[str], focused_on_file: str = None): + """Return compile_commands.json entries for a given aquery invocation and file focus.""" + aquery_args = [ + 'bazel', + 'aquery', + # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html + # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto + # One bummer, not described in the docs, is that aquery filters over *all* actions for a given target, rather than just those that would be run by a build to produce a given output. This mostly isn't a problem, but can sometimes surface extra, unnecessary, misconfigured actions. Chris has emailed the authors to discuss and filed an issue so anyone reading this could track it: https://github.com/bazelbuild/bazel/issues/14156. + f"mnemonic('(Objc|Cpp)Compile', {aquery_target_statement})", + # We switched to jsonproto instead of proto because of https://github.com/bazelbuild/bazel/issues/13404. We could change back when fixed--reverting most of the commit that added this line and tweaking the build file to depend on the target in that issue. That said, it's kinda nice to be free of the dependency, unless (OPTIMNOTE) jsonproto becomes a performance bottleneck compated to binary protos. + '--output=jsonproto', + # We'll disable artifact output for efficiency, since it's large and we don't use them. Small win timewise, but dramatically less json output from aquery. + '--include_artifacts=false', + # Shush logging. Just for readability. + '--ui_event_filters=-info', + '--noshow_progress', + # Disable param files, which would obscure compile actions + # Mostly, people enable param files on Windows to avoid the relatively short command length limit. + # For more, see compiler_param_file in https://bazel.build/docs/windows + # They are, however, technically supported on other platforms/compilers. + # That's all well and good, but param files would prevent us from seeing compile actions before the param files had been generated by compilation. + # Since clangd has no such length limit, we'll disable param files for our aquery run. + '--features=-compiler_param_file', + # Disable layering_check during, because it causes large-scale dependence on generated module map files that prevent header extraction before their generation + # For more context, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/83 + # If https://github.com/clangd/clangd/issues/123 is resolved and we're not doing header extraction, we could try removing this, checking that there aren't erroneous red squigglies squigglies before the module maps are generated. + # If Bazel starts supporting modules (https://github.com/bazelbuild/bazel/issues/4005), we'll probably need to make changes that subsume this. + '--features=-layering_check', + ] + additional_aquery_flags + + aquery_process = subprocess.run( + aquery_args, + # MIN_PY=3.7: Replace PIPEs with capture_output. + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding=locale.getpreferredencoding(), + check=False, # We explicitly ignore errors from `bazel aquery` and carry on. + ) - def _get_commands(target_statment, file_path): - aquery_args = [ - 'bazel', - 'aquery', - # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html - # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto - # One bummer, not described in the docs, is that aquery filters over *all* actions for a given target, rather than just those that would be run by a build to produce a given output. This mostly isn't a problem, but can sometimes surface extra, unnecessary, misconfigured actions. Chris has emailed the authors to discuss and filed an issue so anyone reading this could track it: https://github.com/bazelbuild/bazel/issues/14156. - f"mnemonic('(Objc|Cpp)Compile', {target_statment})", - # We switched to jsonproto instead of proto because of https://github.com/bazelbuild/bazel/issues/13404. We could change back when fixed--reverting most of the commit that added this line and tweaking the build file to depend on the target in that issue. That said, it's kinda nice to be free of the dependency, unless (OPTIMNOTE) jsonproto becomes a performance bottleneck compated to binary protos. - '--output=jsonproto', - # We'll disable artifact output for efficiency, since it's large and we don't use them. Small win timewise, but dramatically less json output from aquery. - '--include_artifacts=false', - # Shush logging. Just for readability. - '--ui_event_filters=-info', - '--noshow_progress', - # Disable param files, which would obscure compile actions - # Mostly, people enable param files on Windows to avoid the relatively short command length limit. - # For more, see compiler_param_file in https://bazel.build/docs/windows - # They are, however, technically supported on other platforms/compilers. - # That's all well and good, but param files would prevent us from seeing compile actions before the param files had been generated by compilation. - # Since clangd has no such length limit, we'll disable param files for our aquery run. - '--features=-compiler_param_file', - # Disable layering_check during, because it causes large-scale dependence on generated module map files that prevent header extraction before their generation - # For more context, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/83 - # If https://github.com/clangd/clangd/issues/123 is resolved and we're not doing header extraction, we could try removing this, checking that there aren't erroneous red squigglies squigglies before the module maps are generated. - # If Bazel starts supporting modules (https://github.com/bazelbuild/bazel/issues/4005), we'll probably need to make changes that subsume this. - '--features=-layering_check', - ] + additional_flags - - aquery_process = subprocess.run( - aquery_args, - # MIN_PY=3.7: Replace PIPEs with capture_output. - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - encoding=locale.getpreferredencoding(), - check=False, # We explicitly ignore errors from `bazel aquery` and carry on. - ) + # Filter aquery error messages to just those the user should care about. + # Shush known warnings about missing graph targets. + # The missing graph targets are not things we want to introspect anyway. + # Tracking issue https://github.com/bazelbuild/bazel/issues/13007 + missing_targets_warning: typing.Pattern[str] = re.compile(r'(\(\d+:\d+:\d+\) )?(\033\[[\d;]+m)?WARNING: (\033\[[\d;]+m)?Targets were missing from graph:') # Regex handles --show_timestamps and --color=yes. Could use "in" if we ever need more flexibility. + aquery_process.stderr = '\n'.join(line for line in aquery_process.stderr.splitlines() if not missing_targets_warning.match(line)) + if aquery_process.stderr: print(aquery_process.stderr, file=sys.stderr) - # Filter aquery error messages to just those the user should care about. - # Shush known warnings about missing graph targets. - # The missing graph targets are not things we want to introspect anyway. - # Tracking issue https://github.com/bazelbuild/bazel/issues/13007 - missing_targets_warning: typing.Pattern[str] = re.compile(r'(\(\d+:\d+:\d+\) )?(\033\[[\d;]+m)?WARNING: (\033\[[\d;]+m)?Targets were missing from graph:') # Regex handles --show_timestamps and --color=yes. Could use "in" if we ever need more flexibility. - aquery_process.stderr = '\n'.join(line for line in aquery_process.stderr.splitlines() if not missing_targets_warning.match(line)) - if aquery_process.stderr: print(aquery_process.stderr, file=sys.stderr) + # Parse proto output from aquery + try: + # object_hook -> SimpleNamespace allows object.member syntax, like a proto, while avoiding the protobuf dependency + parsed_aquery_output = json.loads(aquery_process.stdout, object_hook=lambda d: types.SimpleNamespace(**d)) + except json.JSONDecodeError: + print("Bazel aquery failed. Command:", aquery_args, file=sys.stderr) + log_warning(f">>> Failed extracting commands for {target}\n Continuing gracefully...") + return [] + + if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) + # TODO other stderr & focused on file intersection + if aquery_process.stderr: + log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. + Continuing gracefully...""") + elif not focused_on_file: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). + Continuing gracefully...""") + return [] - # Parse proto output from aquery - try: - # object_hook -> SimpleNamespace allows object.member syntax, like a proto, while avoiding the protobuf dependency - parsed_aquery_output = json.loads(aquery_process.stdout, object_hook=lambda d: types.SimpleNamespace(**d)) - except json.JSONDecodeError: - print("Bazel aquery failed. Command:", aquery_args, file=sys.stderr) - log_warning(f">>> Failed extracting commands for {target}\n Continuing gracefully...") - return [] - - if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) - if aquery_process.stderr: - log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. - Continuing gracefully...""") - else: - log_warning(f""">>> Bazel lists no applicable compile commands for {target} - If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). - Continuing gracefully...""") - return [] + return _convert_compile_commands(parsed_aquery_output, focused_on_file) - return _convert_compile_commands(parsed_aquery_output, lambda _, headers: _should_stop(headers, file_path)) - # Log clear completion messages +def _get_commands(target: str, flags: str): + """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" log_info(f">>> Analyzing commands used in {target}") # Pass along all arguments to aquery, except for --file= From 4a251e5490e24778bbb926e31774b1eda6be61f9 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 16:54:27 -0700 Subject: [PATCH 18/45] Reduce unnecessary work with in header finding with --file flag Also fixes a preexisting bug where we weren't filtering external headers from cache --- refresh.template.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index d4b32dc..5d0a56a 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -463,6 +463,19 @@ def _file_is_in_main_workspace_and_not_external(file_str: str): return True +def _filter_through_headers(headers: set, found_header_focused_upon: threading.Event, focused_on_file: str = None): + """Call me before returning a set of headers from _get_headers(). + + Either checks to see if the header being focused on with --file has been found or excludes external headers, if appropriate. + """ + if focused_on_file: + if focused_on_file in headers: + found_header_focused_upon.set() + elif {exclude_headers} == "external": + headers = {header for header in headers if _file_is_in_main_workspace_and_not_external(header)} + return headers + + def _get_headers(compile_action, source_path: str, found_header_focused_upon: threading.Event, focused_on_file: str = None): """Gets the headers used by a particular compile command. @@ -474,7 +487,14 @@ def _get_headers(compile_action, source_path: str, found_header_focused_upon: th # As an alternative approach, you might consider trying to get the headers by inspecting the Middlemen actions in the aquery output, but I don't see a way to get just the ones actually #included--or an easy way to get the system headers--without invoking the preprocessor's header search logic. # For more on this, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/5#issuecomment-1031148373 - if {exclude_headers} == "all": + # First, check to see if there's a reason we shouldn't be running this relatively expensive header search + if focused_on_file: + # No need to get headers if we're focused on a source file. + # Do not un-nest; if we're explicitly told to focus on a header, we shouldn't exclude headers. + # OR We've already found what we were looking for! -> Don't do unnecessary, time consuming work. + if focused_on_file.endswith(_get_files.source_extensions) or found_header_focused_upon.is_set(): + return set() + elif {exclude_headers} == "all": return set() elif {exclude_headers} == "external" and not {exclude_external_sources} and compile_action.is_external: # Shortcut - an external action can't include headers in the workspace (or, non-external headers) @@ -528,7 +548,7 @@ def _get_headers(compile_action, source_path: str, found_header_focused_upon: th if (action_key == compile_action.actionKey and _get_cached_adjusted_modified_time(source_path) <= cache_last_modified and all(_get_cached_adjusted_modified_time(header_path) <= cache_last_modified for header_path in headers)): - return set(headers) + return _filter_through_headers(set(headers), found_header_focused_upon, focused_on_file) if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe headers = _get_headers_msvc(compile_action.arguments, source_path) @@ -552,10 +572,7 @@ def _get_headers(compile_action, source_path: str, found_header_focused_upon: th with open(cache_file_path, 'w') as cache_file: json.dump((compile_action.actionKey, list(headers)), cache_file) - if {exclude_headers} == "external": - headers = {header for header in headers if _file_is_in_main_workspace_and_not_external(header)} - - return headers + return _filter_through_headers(headers, found_header_focused_upon, focused_on_file) _get_headers.has_logged = False @@ -793,7 +810,7 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): """ # Tag actions as external if we're going to need to know that later. - if {exclude_headers} == "external" and not {exclude_external_sources}: + if not focused_on_file and {exclude_headers} == "external" and not {exclude_external_sources}: targets_by_id = {target.id : target.label for target in aquery_output.targets} for action in aquery_output.actions: # Tag action as external if it's created by an external target From 0d8603851af8b52688725def35771652e6cd2b8c Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 16:54:50 -0700 Subject: [PATCH 19/45] improve comments in _get_command --- refresh.template.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 5d0a56a..9125a1e 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -940,7 +940,7 @@ def _get_commands(target: str, flags: str): """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" log_info(f">>> Analyzing commands used in {target}") - # Pass along all arguments to aquery, except for --file= + # Parse the --file= flag, if any, passing along all other arguments to aquery additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')] if len(file_flags) > 1: @@ -950,9 +950,12 @@ def _get_commands(target: str, flags: str): log_error(">>> Only the --file= form is supported.") sys.exit(1) + + # Screen the remaining flags for obvious issues to help people debug. + # Detect anything that looks like a build target in the flags, and issue a warning. # Note that positional arguments after -- are all interpreted as target patterns. - # And that we have to look for targets. checking for a - prefix is not enough. Consider the case of `-c opt` leading to a false positive + # And that we have to look for targets. Checking for a - prefix is not enough. Consider the case of `-c opt`, leading to a false positive. if ('--' in additional_flags or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)): log_warning(""">>> The flags you passed seem to contain targets. From da95ea6fd93a24d85ee606bb6fdc26703a6d775f Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 16:58:56 -0700 Subject: [PATCH 20/45] Mark problems, fix typos, fix depleted iterator bug --- refresh.template.py | 46 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 9125a1e..e858b31 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -923,7 +923,8 @@ def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aq return [] if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) - # TODO other stderr & focused on file intersection + # TODO other stderr + # TODO simplify/unify logic around warning about not being able to find commands. if aquery_process.stderr: log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. Continuing gracefully...""") @@ -969,46 +970,43 @@ def _get_commands(target: str, flags: str): Try adding them as flags in your refresh_compile_commands rather than targets. In a moment, Bazel will likely fail to parse.""") - compile_commands = [] - # First, query Bazel's C-family compile actions for that configured target - target_statment = f'deps({target})' + # Then, actually query Bazel's compile actions for that configured target + target_statement = f'deps({target})' + compile_commands = [] # TODO simplify loop? Move messages outside. TODO fallback in other dimentsion if file_flags: file_path = file_flags[0] found = False - target_statment_canidates = [] + target_statement_canidates = [] if file_path.endswith(_get_files.source_extensions): - target_statment_canidates.append(f"inputs('{re.escape(file_path)}', {target_statment})") + target_statement_canidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: - fname = os.path.basename(file_path) - target_statment_canidates.extend([ - f"let v = {target_statment} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)", - f"inputs('{re.escape(file_path)}', {target_statment})", + fname = os.path.basename(file_path) # TODO query to be more specific. Outputs function + target_statement_canidates.extend([ + f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)", + f"inputs('{re.escape(file_path)}', {target_statement})", # TODO doesn't actually work because it doesn't pick up transitive dependencies. f'deps({target})', - ]) + ]) # TODO check sort--and filter to files that depend on this - for target_statment in target_statment_canidates: - commands = _get_commands(target_statment, file_path) - # Prevent waste and return them - compile_commands.extend(commands) + for target_statement in target_statement_canidates: + commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) + compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. if any(command['file'].endswith(file_path) for command in commands): found = True break if not found: - log_warning(f""">>> Bazel lists no applicable compile commands for {file_path} in {target}. - Continuing gracefully...""") + log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} + Continuing gracefully...""") # TODO simplify/unify logic around warning about not being able to find commands. else: if {exclude_external_sources}: # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. - target_statment = f"filter('^(//|@//)',{target_statment})" - compile_commands.extend(_get_commands(target_statment, None)) - if len(compile_commands) == 0: + target_statement = f"filter('^(//|@//)',{target_statement})" + compile_commands.extend(_get_compile_commands_for_aquery(target_statement, additional_flags)) + if not compile_commands: log_warning(f""">>> Bazel lists no applicable compile commands for {target} - If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). - Continuing gracefully...""") - + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). + Continuing gracefully...""") # TODO simplify/unify logic around warning about not being able to find commands. - # Log clear completion messages log_success(f">>> Finished extracting commands for {target}") return compile_commands From 2a1020765ba68af3d0c969091c1625f0e4eaecf3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 2 May 2023 23:59:07 +0000 Subject: [PATCH 21/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- refresh.template.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index e858b31..6e13f1f 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -465,7 +465,7 @@ def _file_is_in_main_workspace_and_not_external(file_str: str): def _filter_through_headers(headers: set, found_header_focused_upon: threading.Event, focused_on_file: str = None): """Call me before returning a set of headers from _get_headers(). - + Either checks to see if the header being focused on with --file has been found or excludes external headers, if appropriate. """ if focused_on_file: @@ -490,7 +490,7 @@ def _get_headers(compile_action, source_path: str, found_header_focused_upon: th # First, check to see if there's a reason we shouldn't be running this relatively expensive header search if focused_on_file: # No need to get headers if we're focused on a source file. - # Do not un-nest; if we're explicitly told to focus on a header, we shouldn't exclude headers. + # Do not un-nest; if we're explicitly told to focus on a header, we shouldn't exclude headers. # OR We've already found what we were looking for! -> Don't do unnecessary, time consuming work. if focused_on_file.endswith(_get_files.source_extensions) or found_header_focused_upon.is_set(): return set() From f4fa941834fcb86ef31923ca2c2978a120d14d9c Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 17:35:30 -0700 Subject: [PATCH 22/45] Fix aquery inputs() statement not working for headers --- refresh.template.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 6e13f1f..a9096ca 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -982,9 +982,10 @@ def _get_commands(target: str, flags: str): target_statement_canidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: fname = os.path.basename(file_path) # TODO query to be more specific. Outputs function + header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" target_statement_canidates.extend([ - f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)", - f"inputs('{re.escape(file_path)}', {target_statement})", # TODO doesn't actually work because it doesn't pick up transitive dependencies. + header_target_statement, + f"allpaths({target}, {header_target_statement})", f'deps({target})', ]) # TODO check sort--and filter to files that depend on this From 2e10a53f6779c0743a32d19216df1a544e9c168f Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 18:58:36 -0700 Subject: [PATCH 23/45] Add a few more WIP notes --- refresh.template.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index a9096ca..e17c0dd 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -444,6 +444,8 @@ def _is_relative_to(sub: pathlib.PurePath, parent: pathlib.PurePath): def _file_is_in_main_workspace_and_not_external(file_str: str): file_path = pathlib.PurePath(file_str) if file_path.is_absolute(): + # MSVC emits absolute paths for all headers, so we need to the case where there are absolute paths into the workspace + # TODO be careful about this case with --file workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"]) if not _is_relative_to(file_path, workspace_absolute): return False @@ -973,7 +975,7 @@ def _get_commands(target: str, flags: str): # Then, actually query Bazel's compile actions for that configured target target_statement = f'deps({target})' - compile_commands = [] # TODO simplify loop? Move messages outside. TODO fallback in other dimentsion + compile_commands = [] # TODO simplify loop? Move messages outside. if file_flags: file_path = file_flags[0] found = False @@ -986,7 +988,7 @@ def _get_commands(target: str, flags: str): target_statement_canidates.extend([ header_target_statement, f"allpaths({target}, {header_target_statement})", - f'deps({target})', + f'deps({target})', # TODO: Let's detect out-of-bazel paths and only run this if and only if we're looking for a system header. ]) # TODO check sort--and filter to files that depend on this for target_statement in target_statement_canidates: @@ -1135,6 +1137,7 @@ def _ensure_cwd_is_workspace_root(): _ensure_gitignore_entries_exist() _ensure_external_workspaces_link_exists() + # TODO for --file, don't continue traversing targets after the first command has been found target_flag_pairs = [ # Begin: template filled by Bazel {target_flag_pairs} From 0851bcad92bad5817b3b5eba143d8cdf816c0895 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 May 2023 01:58:47 +0000 Subject: [PATCH 24/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index e17c0dd..dddf4a6 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -988,7 +988,7 @@ def _get_commands(target: str, flags: str): target_statement_canidates.extend([ header_target_statement, f"allpaths({target}, {header_target_statement})", - f'deps({target})', # TODO: Let's detect out-of-bazel paths and only run this if and only if we're looking for a system header. + f'deps({target})', # TODO: Let's detect out-of-bazel paths and only run this if and only if we're looking for a system header. ]) # TODO check sort--and filter to files that depend on this for target_statement in target_statement_canidates: From 0f120b6182bbd9b60d59f8044413161e96112852 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 19:31:57 -0700 Subject: [PATCH 25/45] Fix typo --- refresh.template.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index dddf4a6..1f49a07 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -979,19 +979,19 @@ def _get_commands(target: str, flags: str): if file_flags: file_path = file_flags[0] found = False - target_statement_canidates = [] + target_statement_candidates = [] if file_path.endswith(_get_files.source_extensions): - target_statement_canidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") + target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: fname = os.path.basename(file_path) # TODO query to be more specific. Outputs function header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" - target_statement_canidates.extend([ + target_statement_candidates.extend([ header_target_statement, f"allpaths({target}, {header_target_statement})", f'deps({target})', # TODO: Let's detect out-of-bazel paths and only run this if and only if we're looking for a system header. ]) # TODO check sort--and filter to files that depend on this - for target_statement in target_statement_canidates: + for target_statement in target_statement_candidates: commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. if any(command['file'].endswith(file_path) for command in commands): From 9d238666a3df641a7ee2d4c8504cdf6a49aac1a5 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Tue, 2 May 2023 22:36:57 -0700 Subject: [PATCH 26/45] Add note about allpaths issue --- refresh.template.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 1f49a07..e18ec6b 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -987,10 +987,9 @@ def _get_commands(target: str, flags: str): header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" target_statement_candidates.extend([ header_target_statement, - f"allpaths({target}, {header_target_statement})", + f"allpaths({target}, {header_target_statement})", # Ordering is correct. TODO needs --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 f'deps({target})', # TODO: Let's detect out-of-bazel paths and only run this if and only if we're looking for a system header. ]) # TODO check sort--and filter to files that depend on this - for target_statement in target_statement_candidates: commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. From 2d09c75a886228120db3ddad1173568b285cb41e Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Wed, 3 May 2023 21:26:59 -0700 Subject: [PATCH 27/45] Refine and note discussion points in _filter_through_headers --- refresh.template.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/refresh.template.py b/refresh.template.py index e18ec6b..69846f4 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -471,8 +471,13 @@ def _filter_through_headers(headers: set, found_header_focused_upon: threading.E Either checks to see if the header being focused on with --file has been found or excludes external headers, if appropriate. """ if focused_on_file: + # TODO full paths on windows--maybe easier to normalize them to relative, above. + # TODO discuss the below. + # If we're focusing on a header, we return just that header. Why? Only source files are useful for background indexing, since they include headers, and we'll already save the work for header finding in our internal caches. if focused_on_file in headers: found_header_focused_upon.set() + return {focused_on_file} + return set() elif {exclude_headers} == "external": headers = {header for header in headers if _file_is_in_main_workspace_and_not_external(header)} return headers From 07226151c82bf13b4256ee9c346727b28d285750 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Wed, 3 May 2023 22:24:16 -0700 Subject: [PATCH 28/45] Improve TODOs --- refresh.template.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 69846f4..f30bd32 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -444,7 +444,7 @@ def _is_relative_to(sub: pathlib.PurePath, parent: pathlib.PurePath): def _file_is_in_main_workspace_and_not_external(file_str: str): file_path = pathlib.PurePath(file_str) if file_path.is_absolute(): - # MSVC emits absolute paths for all headers, so we need to the case where there are absolute paths into the workspace + # MSVC emits absolute paths for all headers, so we need to handle the case where there are absolute paths into the workspace # TODO be careful about this case with --file workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"]) if not _is_relative_to(file_path, workspace_absolute): @@ -471,7 +471,7 @@ def _filter_through_headers(headers: set, found_header_focused_upon: threading.E Either checks to see if the header being focused on with --file has been found or excludes external headers, if appropriate. """ if focused_on_file: - # TODO full paths on windows--maybe easier to normalize them to relative, above. + # TODO full paths on windows--maybe easier to normalize them to relative upon generation, above. # TODO discuss the below. # If we're focusing on a header, we return just that header. Why? Only source files are useful for background indexing, since they include headers, and we'll already save the work for header finding in our internal caches. if focused_on_file in headers: @@ -834,7 +834,7 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): output_iterator = threadpool.map( lambda compile_action: _get_cpp_command_for_files(compile_action, found_header_focused_upon, focused_on_file), # Binding along side inputs aquery_output.actions, - timeout=3 if focused_on_file else None # If running in fast, interactive mode with --file, we need to cap latency. + timeout=3 if focused_on_file else None # If running in fast, interactive mode with --file, we need to cap latency. #TODO test timeout--if it doesn't work, cap input length here, before passing in array. Might also want to divide timeout/cap by #targets ) # Collect outputs, tolerating any timeouts outputs = [] @@ -862,7 +862,7 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): for file in itertools.chain(source_files, header_files_not_already_written): if file == 'external/bazel_tools/src/tools/launcher/dummy.cc': continue # Suppress Bazel internal files leaking through. Hopefully will prevent issues like https://github.com/hedronvision/bazel-compile-commands-extractor/issues/77 - yield { + yield { # TODO for consistency, let's have this return a list if its parent is returning a list. That'd also let us strip the list() cast in the found = True block, below # Docs about compile_commands.json format: https://clang.llvm.org/docs/JSONCompilationDatabase.html#format 'file': file, # Using `arguments' instead of 'command' because it's now preferred by clangd. Heads also that shlex.join doesn't work for windows cmd, so you'd need to use windows_list2cmdline if we ever switched back. For more, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/8#issuecomment-1090262263 @@ -930,7 +930,7 @@ def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aq return [] if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) - # TODO other stderr + # TODO the different flags warning in stderr is common enough that we might want to either filter it or adjust this messaging. # TODO simplify/unify logic around warning about not being able to find commands. if aquery_process.stderr: log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. @@ -980,7 +980,7 @@ def _get_commands(target: str, flags: str): # Then, actually query Bazel's compile actions for that configured target target_statement = f'deps({target})' - compile_commands = [] # TODO simplify loop? Move messages outside. + compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move messages outside? if file_flags: file_path = file_flags[0] found = False @@ -988,13 +988,13 @@ def _get_commands(target: str, flags: str): if file_path.endswith(_get_files.source_extensions): target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: - fname = os.path.basename(file_path) # TODO query to be more specific. Outputs function + fname = os.path.basename(file_path) # TODO consider running a preliminary aquery to make this more specific, getting the targets that generate the given file? Use outputs aquery function. Should also test the case where the file is generated/is coming in as a filegroup. header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" target_statement_candidates.extend([ header_target_statement, - f"allpaths({target}, {header_target_statement})", # Ordering is correct. TODO needs --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 - f'deps({target})', # TODO: Let's detect out-of-bazel paths and only run this if and only if we're looking for a system header. - ]) # TODO check sort--and filter to files that depend on this + f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth first from the deepest dependency, despite the docs. TODO There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). We might also also want to *just* run this query, not the whole list, since it captures the former and is therfore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets. + f'deps({target})', # TODO: Let's detect out-of-bazel paths and run this if and only if we're looking for a system header. + ]) for target_statement in target_statement_candidates: commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. @@ -1141,7 +1141,7 @@ def _ensure_cwd_is_workspace_root(): _ensure_gitignore_entries_exist() _ensure_external_workspaces_link_exists() - # TODO for --file, don't continue traversing targets after the first command has been found + # TODO for --file, don't continue traversing targets after the first command has been found. Probably push this looping and template expansion inside of _get_commands(). target_flag_pairs = [ # Begin: template filled by Bazel {target_flag_pairs} From 14e707e34267189d2df9c08a78b548d2c5ab203c Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Thu, 4 May 2023 14:39:35 -0700 Subject: [PATCH 29/45] Enrich todos and notes after another read through of the bazel [ac]query docs --- refresh.template.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index f30bd32..8dbd065 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -888,13 +888,13 @@ def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aq # Shush logging. Just for readability. '--ui_event_filters=-info', '--noshow_progress', - # Disable param files, which would obscure compile actions + # Don't hide command-line arguments in param files, which would obscure compile commands. # Mostly, people enable param files on Windows to avoid the relatively short command length limit. # For more, see compiler_param_file in https://bazel.build/docs/windows # They are, however, technically supported on other platforms/compilers. # That's all well and good, but param files would prevent us from seeing compile actions before the param files had been generated by compilation. # Since clangd has no such length limit, we'll disable param files for our aquery run. - '--features=-compiler_param_file', + '--features=-compiler_param_file', # TODO switch to --include_param_files (somewhat confusingly named, but includes the *contents* of param files on the command line) and test on Windows. Including in this diff to not create conflicts. # Disable layering_check during, because it causes large-scale dependence on generated module map files that prevent header extraction before their generation # For more context, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/83 # If https://github.com/clangd/clangd/issues/123 is resolved and we're not doing header extraction, we could try removing this, checking that there aren't erroneous red squigglies squigglies before the module maps are generated. @@ -979,8 +979,8 @@ def _get_commands(target: str, flags: str): # Then, actually query Bazel's compile actions for that configured target - target_statement = f'deps({target})' - compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move messages outside? + target_statement = f'deps({target})' # TODO we should always be quoting targets when we splice them in. Let's use single quotes like with mnemonic, above. See slightly down in https://bazel.build/query/language#tokens + compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move warning messages outside? if file_flags: file_path = file_flags[0] found = False @@ -988,12 +988,12 @@ def _get_commands(target: str, flags: str): if file_path.endswith(_get_files.source_extensions): target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: - fname = os.path.basename(file_path) # TODO consider running a preliminary aquery to make this more specific, getting the targets that generate the given file? Use outputs aquery function. Should also test the case where the file is generated/is coming in as a filegroup. - header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" + fname = os.path.basename(file_path) # TODO consider running a preliminary aquery to make this more specific, getting the targets that generate the given file. Use outputs() aquery function. Should also test the case where the file is generated/is coming in as a filegroup--that's likely to be fixed by this change. + header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. target_statement_candidates.extend([ header_target_statement, - f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth first from the deepest dependency, despite the docs. TODO There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). We might also also want to *just* run this query, not the whole list, since it captures the former and is therfore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets. - f'deps({target})', # TODO: Let's detect out-of-bazel paths and run this if and only if we're looking for a system header. + f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. + f'deps({target})', # TODO: Let's detect out-of-bazel, absolute paths and run this if and only if we're looking for a system header. We need to think about how we want to handle absolute paths more generally, perhaps normalizing them to relative if possible, like with the windows absolute path issue, above. ]) for target_statement in target_statement_candidates: commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) From bd7bde160b16d2258b82f75010c5e2fd2d97acc0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 4 May 2023 21:40:00 +0000 Subject: [PATCH 30/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 8dbd065..75f8e53 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -979,7 +979,7 @@ def _get_commands(target: str, flags: str): # Then, actually query Bazel's compile actions for that configured target - target_statement = f'deps({target})' # TODO we should always be quoting targets when we splice them in. Let's use single quotes like with mnemonic, above. See slightly down in https://bazel.build/query/language#tokens + target_statement = f'deps({target})' # TODO we should always be quoting targets when we splice them in. Let's use single quotes like with mnemonic, above. See slightly down in https://bazel.build/query/language#tokens compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move warning messages outside? if file_flags: file_path = file_flags[0] From 89f583a40cad5bca085130e319f9bf049fcfc6fa Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 5 May 2023 19:14:25 +0800 Subject: [PATCH 31/45] refresh.template.py: Estimate the number of actions that can be run based on timeout --- refresh.template.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 75f8e53..482c1d5 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -831,11 +831,20 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor ) as threadpool: found_header_focused_upon = threading.Event() # MIN_PY=3.9: Consider replacing with threadpool.shutdown(cancel_futures=True), possibly also eliminating threading import + timeout = 3 + measure_action_count = len(aquery_output.actions) if not focused_on_file else 100 + measure_start_time = time.time() output_iterator = threadpool.map( lambda compile_action: _get_cpp_command_for_files(compile_action, found_header_focused_upon, focused_on_file), # Binding along side inputs - aquery_output.actions, - timeout=3 if focused_on_file else None # If running in fast, interactive mode with --file, we need to cap latency. #TODO test timeout--if it doesn't work, cap input length here, before passing in array. Might also want to divide timeout/cap by #targets + aquery_output.actions[:measure_action_count] ) + cost_time = time.time() - measure_start_time + if focused_on_file and cost_time < timeout: + estimate_action_count = min(int((timeout - cost_time) / cost_time * measure_action_count), 5000) + output_iterator = itertools.chain(output_iterator, threadpool.map( + lambda compile_action: _get_cpp_command_for_files(compile_action, found_header_focused_upon, focused_on_file), # Binding along side inputs + aquery_output.actions[measure_action_count:measure_action_count + estimate_action_count] + )) # Collect outputs, tolerating any timeouts outputs = [] try: From ef335095c91eb5ca53a7c02d6e4ba7b15bccf3f0 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 5 May 2023 19:16:29 +0800 Subject: [PATCH 32/45] refresh.template.py: quoting target_statement --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 482c1d5..fb7a064 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -988,7 +988,7 @@ def _get_commands(target: str, flags: str): # Then, actually query Bazel's compile actions for that configured target - target_statement = f'deps({target})' # TODO we should always be quoting targets when we splice them in. Let's use single quotes like with mnemonic, above. See slightly down in https://bazel.build/query/language#tokens + target_statement = f"deps('{target}')" compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move warning messages outside? if file_flags: file_path = file_flags[0] From f8b046e3e23e4fce4a65bac0eef8ac57cb0f5e4f Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 5 May 2023 19:17:38 +0800 Subject: [PATCH 33/45] refersh.template.py: running a preliminary query to get specified file label --- refresh.template.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index fb7a064..0befd3a 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -997,8 +997,13 @@ def _get_commands(target: str, flags: str): if file_path.endswith(_get_files.source_extensions): target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: - fname = os.path.basename(file_path) # TODO consider running a preliminary aquery to make this more specific, getting the targets that generate the given file. Use outputs() aquery function. Should also test the case where the file is generated/is coming in as a filegroup--that's likely to be fixed by this change. - header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. + fname = os.path.basename(file_path) + label_candidates = subprocess.check_output(['bazel', 'query', f"filter('{fname}$', {target_statement})"], stderr = subprocess.PIPE, text = True).split() + # TODO compatible with windows file path + file_candidates = list(filter(lambda label: file_path in label.replace(':', '/'), label_candidates)) + file_statement = '|'.join(file_candidates) if len(file_candidates) > 0 else fname + + header_target_statement = f"let v = {target_statement} in attr(hdrs, '{file_statement}', $v) + attr(srcs, '{file_statement}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. target_statement_candidates.extend([ header_target_statement, f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. From 9ce17168b54a2838705114b94c28bba3fb3efb84 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 5 May 2023 20:40:42 +0800 Subject: [PATCH 34/45] refresh.template.py: simplify/unify logic around target_statement_candidates and warning --- refresh.template.py | 49 +++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 0befd3a..5225e76 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -939,14 +939,8 @@ def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aq return [] if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) - # TODO the different flags warning in stderr is common enough that we might want to either filter it or adjust this messaging. - # TODO simplify/unify logic around warning about not being able to find commands. if aquery_process.stderr: log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. - Continuing gracefully...""") - elif not focused_on_file: - log_warning(f""">>> Bazel lists no applicable compile commands for {target} - If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). Continuing gracefully...""") return [] @@ -988,12 +982,13 @@ def _get_commands(target: str, flags: str): # Then, actually query Bazel's compile actions for that configured target - target_statement = f"deps('{target}')" - compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move warning messages outside? + target_statement_candidates = [] + file_path = None + compile_commands = [] + if file_flags: file_path = file_flags[0] - found = False - target_statement_candidates = [] + target_statement = f"deps('{target}')" if file_path.endswith(_get_files.source_extensions): target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: @@ -1009,24 +1004,30 @@ def _get_commands(target: str, flags: str): f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. f'deps({target})', # TODO: Let's detect out-of-bazel, absolute paths and run this if and only if we're looking for a system header. We need to think about how we want to handle absolute paths more generally, perhaps normalizing them to relative if possible, like with the windows absolute path issue, above. ]) - for target_statement in target_statement_candidates: - commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) - compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. - if any(command['file'].endswith(file_path) for command in commands): - found = True - break - if not found: - log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} - Continuing gracefully...""") # TODO simplify/unify logic around warning about not being able to find commands. else: if {exclude_external_sources}: # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. - target_statement = f"filter('^(//|@//)',{target_statement})" - compile_commands.extend(_get_compile_commands_for_aquery(target_statement, additional_flags)) - if not compile_commands: - log_warning(f""">>> Bazel lists no applicable compile commands for {target} + target_statement_candidates.append(f"filter('^(//|@//)',{target_statement})") + + found = False + for target_statement in target_statement_candidates: + commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) + compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. + if file_flags: + if any(command['file'].endswith(file_path) for command in commands): + found = True + break + log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement} + Continuing gracefully...""") + + if file_flags and not found: + log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} + Continuing gracefully...""") + + if not compile_commands: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). - Continuing gracefully...""") # TODO simplify/unify logic around warning about not being able to find commands. + Continuing gracefully...""") log_success(f">>> Finished extracting commands for {target}") return compile_commands From 40b9131c9dd4821b538c750aaf98c084762ec026 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 5 May 2023 21:23:50 +0800 Subject: [PATCH 35/45] refresh.template.py: changed the _convert_compile_commands to return a list of SimpleNamespace objects instead of yielding a dictionary & Removal type cast --- refresh.template.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 5225e76..5af6b1d 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -810,7 +810,7 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries. Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) compile actions for a given build. - Yields: Corresponding entries for a compile_commands.json, with commas after each entry, describing all ways every file is being compiled. + Returns: Corresponding entries for a compile_commands.json, with commas after each entry, describing all ways every file is being compiled. Also includes one entry per header, describing one way it is compiled (to work around https://github.com/clangd/clangd/issues/123). Crucially, this de-Bazels the compile commands it takes as input, leaving something clangd can understand. The result is a command that could be run from the workspace root directly, with no bazel-specific environment variables, etc. @@ -857,7 +857,8 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): If that's a source file, please report this. We should work to improve the performance. If that's a header file, we should probably do the same, but it may be unavoidable.""") - # Yield as compile_commands.json entries + # Return as compile_commands.json entries + output_list = [] header_files_already_written = set() for source_files, header_files, compile_command_args in outputs: # Only emit one entry per header @@ -871,14 +872,8 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): for file in itertools.chain(source_files, header_files_not_already_written): if file == 'external/bazel_tools/src/tools/launcher/dummy.cc': continue # Suppress Bazel internal files leaking through. Hopefully will prevent issues like https://github.com/hedronvision/bazel-compile-commands-extractor/issues/77 - yield { # TODO for consistency, let's have this return a list if its parent is returning a list. That'd also let us strip the list() cast in the found = True block, below - # Docs about compile_commands.json format: https://clang.llvm.org/docs/JSONCompilationDatabase.html#format - 'file': file, - # Using `arguments' instead of 'command' because it's now preferred by clangd. Heads also that shlex.join doesn't work for windows cmd, so you'd need to use windows_list2cmdline if we ever switched back. For more, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/8#issuecomment-1090262263 - 'arguments': compile_command_args, - # Bazel gotcha warning: If you were tempted to use `bazel info execution_root` as the build working directory for compile_commands...search ImplementationReadme.md to learn why that breaks. - 'directory': os.environ["BUILD_WORKSPACE_DIRECTORY"], - } + output_list.append(types.SimpleNamespace(file=file, arguments=compile_command_args, directory=os.environ["BUILD_WORKSPACE_DIRECTORY"])) + return output_list def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aquery_flags: typing.List[str], focused_on_file: str = None): @@ -1011,10 +1006,10 @@ def _get_commands(target: str, flags: str): found = False for target_statement in target_statement_candidates: - commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) + commands = _get_compile_commands_for_aquery(target_statement, additional_flags, file_path) compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. if file_flags: - if any(command['file'].endswith(file_path) for command in commands): + if any(command.file.endswith(file_path) for command in commands): found = True break log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement} @@ -1176,16 +1171,16 @@ def _ensure_cwd_is_workspace_root(): previous_compile_command_entries = [] try: with open('compile_commands.json') as compile_commands_file: - previous_compile_command_entries = json.load(compile_commands_file) + previous_compile_command_entries = [types.SimpleNamespace(**d) for d in json.load(compile_commands_file)] except: log_warning(">>> Couldn't read previous compile_commands.json. Overwriting instead of merging...") else: - updated_files = set(entry['file'] for entry in compile_command_entries) - compile_command_entries += [entry for entry in previous_compile_command_entries if entry['file'] not in updated_files] + updated_files = set(entry.file for entry in compile_command_entries) + compile_command_entries += [entry for entry in previous_compile_command_entries if entry.file not in updated_files] with open('compile_commands.json', 'w') as output_file: json.dump( - compile_command_entries, + [vars(entry) for entry in compile_command_entries], output_file, indent=2, # Yay, human readability! check_circular=False # For speed. From 6a6777a8a5054c2cf8302537863998d3783f9a9b Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 5 May 2023 21:47:15 +0800 Subject: [PATCH 36/45] normalize file path if the --file is a subpath of the cwd --- refresh.template.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 5af6b1d..4c02afa 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -983,6 +983,11 @@ def _get_commands(target: str, flags: str): if file_flags: file_path = file_flags[0] + rel_path = os.path.relpath(file_path, os.getcwd()) + if not rel_path.startswith(".."): + log_info(f">>> Detected file path {file_path} is relative path changed to {rel_path}") + file_path = rel_path + target_statement = f"deps('{target}')" if file_path.endswith(_get_files.source_extensions): target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") @@ -997,7 +1002,7 @@ def _get_commands(target: str, flags: str): target_statement_candidates.extend([ header_target_statement, f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. - f'deps({target})', # TODO: Let's detect out-of-bazel, absolute paths and run this if and only if we're looking for a system header. We need to think about how we want to handle absolute paths more generally, perhaps normalizing them to relative if possible, like with the windows absolute path issue, above. + target_statement, ]) else: if {exclude_external_sources}: From 0453be435112dfec20ff549273ab69347c28df4b Mon Sep 17 00:00:00 2001 From: Snorlax Date: Fri, 5 May 2023 22:11:57 +0800 Subject: [PATCH 37/45] refresh.template.py: for --file, don't continue traversing targets after the first command has been found --- refresh.template.py | 178 +++++++++++++++++++++++--------------------- 1 file changed, 92 insertions(+), 86 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 4c02afa..df73615 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -942,95 +942,104 @@ def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aq return _convert_compile_commands(parsed_aquery_output, focused_on_file) -def _get_commands(target: str, flags: str): - """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" - log_info(f">>> Analyzing commands used in {target}") - - # Parse the --file= flag, if any, passing along all other arguments to aquery - additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] - file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')] - if len(file_flags) > 1: - log_error(">>> At most one --file flag is supported.") - sys.exit(1) - if any(arg.startswith('--file') for arg in additional_flags): - log_error(">>> Only the --file= form is supported.") - sys.exit(1) +def _get_commands(pairs: list): + """Return compile_commands.json entries for a given list of target and flags, gracefully tolerating errors.""" + + all_compile_commands = [] + + for target, flags in pairs: + log_info(f">>> Analyzing commands used in {target}") + # Parse the --file= flag, if any, passing along all other arguments to aquery + additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] + file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')] + if len(file_flags) > 1: + log_error(">>> At most one --file flag is supported.") + sys.exit(1) + if any(arg.startswith('--file') for arg in additional_flags): + log_error(">>> Only the --file= form is supported.") + sys.exit(1) + + + # Screen the remaining flags for obvious issues to help people debug. + + # Detect anything that looks like a build target in the flags, and issue a warning. + # Note that positional arguments after -- are all interpreted as target patterns. + # And that we have to look for targets. Checking for a - prefix is not enough. Consider the case of `-c opt`, leading to a false positive. + if ('--' in additional_flags + or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)): + log_warning(""">>> The flags you passed seem to contain targets. + Try adding them as targets in your refresh_compile_commands rather than flags. + [Specifying targets at runtime isn't supported yet, and in a moment, Bazel will likely fail to parse without our help. If you need to be able to specify targets at runtime, and can't easily just add them to your refresh_compile_commands, please open an issue or file a PR. You may also want to refer to https://github.com/hedronvision/bazel-compile-commands-extractor/issues/62.]""") + + # Quick (imperfect) effort at detecting flags in the targets. + # Can't detect flags starting with -, because they could be subtraction patterns. + if any(target.startswith('--') for target in shlex.split(target)): + log_warning(""">>> The target you specified seems to contain flags. + Try adding them as flags in your refresh_compile_commands rather than targets. + In a moment, Bazel will likely fail to parse.""") + + # Then, actually query Bazel's compile actions for that configured target + target_statement_candidates = [] + file_path = None - # Screen the remaining flags for obvious issues to help people debug. - - # Detect anything that looks like a build target in the flags, and issue a warning. - # Note that positional arguments after -- are all interpreted as target patterns. - # And that we have to look for targets. Checking for a - prefix is not enough. Consider the case of `-c opt`, leading to a false positive. - if ('--' in additional_flags - or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)): - log_warning(""">>> The flags you passed seem to contain targets. - Try adding them as targets in your refresh_compile_commands rather than flags. - [Specifying targets at runtime isn't supported yet, and in a moment, Bazel will likely fail to parse without our help. If you need to be able to specify targets at runtime, and can't easily just add them to your refresh_compile_commands, please open an issue or file a PR. You may also want to refer to https://github.com/hedronvision/bazel-compile-commands-extractor/issues/62.]""") - - # Quick (imperfect) effort at detecting flags in the targets. - # Can't detect flags starting with -, because they could be subtraction patterns. - if any(target.startswith('--') for target in shlex.split(target)): - log_warning(""">>> The target you specified seems to contain flags. - Try adding them as flags in your refresh_compile_commands rather than targets. - In a moment, Bazel will likely fail to parse.""") - - - # Then, actually query Bazel's compile actions for that configured target - target_statement_candidates = [] - file_path = None - compile_commands = [] - - if file_flags: - file_path = file_flags[0] - rel_path = os.path.relpath(file_path, os.getcwd()) - if not rel_path.startswith(".."): - log_info(f">>> Detected file path {file_path} is relative path changed to {rel_path}") - file_path = rel_path - - target_statement = f"deps('{target}')" - if file_path.endswith(_get_files.source_extensions): - target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") + if file_flags: + file_path = file_flags[0] + rel_path = os.path.relpath(file_path, os.getcwd()) + if not rel_path.startswith(".."): + log_info(f">>> Detected file path {file_path} is relative path changed to {rel_path}") + file_path = rel_path + + target_statement = f"deps('{target}')" + if file_path.endswith(_get_files.source_extensions): + target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") + else: + fname = os.path.basename(file_path) + label_candidates = subprocess.check_output(['bazel', 'query', f"filter('{fname}$', {target_statement})"], stderr = subprocess.PIPE, text = True).split() + # TODO compatible with windows file path + file_candidates = list(filter(lambda label: file_path in label.replace(':', '/'), label_candidates)) + file_statement = '|'.join(file_candidates) if len(file_candidates) > 0 else fname + + header_target_statement = f"let v = {target_statement} in attr(hdrs, '{file_statement}', $v) + attr(srcs, '{file_statement}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. + target_statement_candidates.extend([ + header_target_statement, + f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. + target_statement, + ]) else: - fname = os.path.basename(file_path) - label_candidates = subprocess.check_output(['bazel', 'query', f"filter('{fname}$', {target_statement})"], stderr = subprocess.PIPE, text = True).split() - # TODO compatible with windows file path - file_candidates = list(filter(lambda label: file_path in label.replace(':', '/'), label_candidates)) - file_statement = '|'.join(file_candidates) if len(file_candidates) > 0 else fname - - header_target_statement = f"let v = {target_statement} in attr(hdrs, '{file_statement}', $v) + attr(srcs, '{file_statement}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. - target_statement_candidates.extend([ - header_target_statement, - f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. - target_statement, - ]) - else: - if {exclude_external_sources}: - # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. - target_statement_candidates.append(f"filter('^(//|@//)',{target_statement})") - - found = False - for target_statement in target_statement_candidates: - commands = _get_compile_commands_for_aquery(target_statement, additional_flags, file_path) - compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. + if {exclude_external_sources}: + # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. + target_statement_candidates.append(f"filter('^(//|@//)',{target_statement})") + + found = False + compile_commands = [] + for target_statement in target_statement_candidates: + commands = _get_compile_commands_for_aquery(target_statement, additional_flags, file_path) + compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. + if file_flags: + if any(command.file.endswith(file_path) for command in commands): + found = True + break + log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement} + Continuing gracefully...""") + + all_compile_commands.extend(compile_commands) + if file_flags: - if any(command.file.endswith(file_path) for command in commands): - found = True + if found: + log_success(f">>> Finished extracting commands for {target} with --file {file_path}") break - log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement} - Continuing gracefully...""") - - if file_flags and not found: - log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} - Continuing gracefully...""") + else: + log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} + Continuing gracefully...""") - if not compile_commands: - log_warning(f""">>> Bazel lists no applicable compile commands for {target} - If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). - Continuing gracefully...""") + if not compile_commands: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). + Continuing gracefully...""") - log_success(f">>> Finished extracting commands for {target}") - return compile_commands + log_success(f">>> Finished extracting commands for {target}") + return all_compile_commands def _ensure_external_workspaces_link_exists(): @@ -1156,16 +1165,13 @@ def _ensure_cwd_is_workspace_root(): _ensure_gitignore_entries_exist() _ensure_external_workspaces_link_exists() - # TODO for --file, don't continue traversing targets after the first command has been found. Probably push this looping and template expansion inside of _get_commands(). target_flag_pairs = [ # Begin: template filled by Bazel {target_flag_pairs} # End: template filled by Bazel ] - compile_command_entries = [] - for (target, flags) in target_flag_pairs: - compile_command_entries.extend(_get_commands(target, flags)) + compile_command_entries = _get_commands(target_flag_pairs) if not compile_command_entries: log_error(">>> Not writing to compile_commands.json, since no commands were extracted.") From 97644fd6eaa4e2e0b58313a90233fb92e7f0a2d9 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sat, 6 May 2023 13:44:24 +0800 Subject: [PATCH 38/45] Revert "refresh.template.py: for --file, don't continue traversing targets after the first command has been found" This reverts commit 0453be435112dfec20ff549273ab69347c28df4b. --- refresh.template.py | 178 +++++++++++++++++++++----------------------- 1 file changed, 86 insertions(+), 92 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index df73615..4c02afa 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -942,104 +942,95 @@ def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aq return _convert_compile_commands(parsed_aquery_output, focused_on_file) -def _get_commands(pairs: list): - """Return compile_commands.json entries for a given list of target and flags, gracefully tolerating errors.""" - - all_compile_commands = [] - - for target, flags in pairs: - log_info(f">>> Analyzing commands used in {target}") - # Parse the --file= flag, if any, passing along all other arguments to aquery - additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] - file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')] - if len(file_flags) > 1: - log_error(">>> At most one --file flag is supported.") - sys.exit(1) - if any(arg.startswith('--file') for arg in additional_flags): - log_error(">>> Only the --file= form is supported.") - sys.exit(1) - - - # Screen the remaining flags for obvious issues to help people debug. - - # Detect anything that looks like a build target in the flags, and issue a warning. - # Note that positional arguments after -- are all interpreted as target patterns. - # And that we have to look for targets. Checking for a - prefix is not enough. Consider the case of `-c opt`, leading to a false positive. - if ('--' in additional_flags - or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)): - log_warning(""">>> The flags you passed seem to contain targets. - Try adding them as targets in your refresh_compile_commands rather than flags. - [Specifying targets at runtime isn't supported yet, and in a moment, Bazel will likely fail to parse without our help. If you need to be able to specify targets at runtime, and can't easily just add them to your refresh_compile_commands, please open an issue or file a PR. You may also want to refer to https://github.com/hedronvision/bazel-compile-commands-extractor/issues/62.]""") - - # Quick (imperfect) effort at detecting flags in the targets. - # Can't detect flags starting with -, because they could be subtraction patterns. - if any(target.startswith('--') for target in shlex.split(target)): - log_warning(""">>> The target you specified seems to contain flags. - Try adding them as flags in your refresh_compile_commands rather than targets. - In a moment, Bazel will likely fail to parse.""") - +def _get_commands(target: str, flags: str): + """Return compile_commands.json entries for a given target and flags, gracefully tolerating errors.""" + log_info(f">>> Analyzing commands used in {target}") + + # Parse the --file= flag, if any, passing along all other arguments to aquery + additional_flags = shlex.split(flags) + [arg for arg in sys.argv[1:] if not arg.startswith('--file=')] + file_flags = [arg[len('--file='):] for arg in sys.argv[1:] if arg.startswith('--file=')] + if len(file_flags) > 1: + log_error(">>> At most one --file flag is supported.") + sys.exit(1) + if any(arg.startswith('--file') for arg in additional_flags): + log_error(">>> Only the --file= form is supported.") + sys.exit(1) - # Then, actually query Bazel's compile actions for that configured target - target_statement_candidates = [] - file_path = None - if file_flags: - file_path = file_flags[0] - rel_path = os.path.relpath(file_path, os.getcwd()) - if not rel_path.startswith(".."): - log_info(f">>> Detected file path {file_path} is relative path changed to {rel_path}") - file_path = rel_path - - target_statement = f"deps('{target}')" - if file_path.endswith(_get_files.source_extensions): - target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") - else: - fname = os.path.basename(file_path) - label_candidates = subprocess.check_output(['bazel', 'query', f"filter('{fname}$', {target_statement})"], stderr = subprocess.PIPE, text = True).split() - # TODO compatible with windows file path - file_candidates = list(filter(lambda label: file_path in label.replace(':', '/'), label_candidates)) - file_statement = '|'.join(file_candidates) if len(file_candidates) > 0 else fname - - header_target_statement = f"let v = {target_statement} in attr(hdrs, '{file_statement}', $v) + attr(srcs, '{file_statement}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. - target_statement_candidates.extend([ - header_target_statement, - f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. - target_statement, - ]) + # Screen the remaining flags for obvious issues to help people debug. + + # Detect anything that looks like a build target in the flags, and issue a warning. + # Note that positional arguments after -- are all interpreted as target patterns. + # And that we have to look for targets. Checking for a - prefix is not enough. Consider the case of `-c opt`, leading to a false positive. + if ('--' in additional_flags + or any(re.match(r'-?(@|:|//)', f) for f in additional_flags)): + log_warning(""">>> The flags you passed seem to contain targets. + Try adding them as targets in your refresh_compile_commands rather than flags. + [Specifying targets at runtime isn't supported yet, and in a moment, Bazel will likely fail to parse without our help. If you need to be able to specify targets at runtime, and can't easily just add them to your refresh_compile_commands, please open an issue or file a PR. You may also want to refer to https://github.com/hedronvision/bazel-compile-commands-extractor/issues/62.]""") + + # Quick (imperfect) effort at detecting flags in the targets. + # Can't detect flags starting with -, because they could be subtraction patterns. + if any(target.startswith('--') for target in shlex.split(target)): + log_warning(""">>> The target you specified seems to contain flags. + Try adding them as flags in your refresh_compile_commands rather than targets. + In a moment, Bazel will likely fail to parse.""") + + + # Then, actually query Bazel's compile actions for that configured target + target_statement_candidates = [] + file_path = None + compile_commands = [] + + if file_flags: + file_path = file_flags[0] + rel_path = os.path.relpath(file_path, os.getcwd()) + if not rel_path.startswith(".."): + log_info(f">>> Detected file path {file_path} is relative path changed to {rel_path}") + file_path = rel_path + + target_statement = f"deps('{target}')" + if file_path.endswith(_get_files.source_extensions): + target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: - if {exclude_external_sources}: - # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. - target_statement_candidates.append(f"filter('^(//|@//)',{target_statement})") - - found = False - compile_commands = [] - for target_statement in target_statement_candidates: - commands = _get_compile_commands_for_aquery(target_statement, additional_flags, file_path) - compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. - if file_flags: - if any(command.file.endswith(file_path) for command in commands): - found = True - break - log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement} - Continuing gracefully...""") - - all_compile_commands.extend(compile_commands) - + fname = os.path.basename(file_path) + label_candidates = subprocess.check_output(['bazel', 'query', f"filter('{fname}$', {target_statement})"], stderr = subprocess.PIPE, text = True).split() + # TODO compatible with windows file path + file_candidates = list(filter(lambda label: file_path in label.replace(':', '/'), label_candidates)) + file_statement = '|'.join(file_candidates) if len(file_candidates) > 0 else fname + + header_target_statement = f"let v = {target_statement} in attr(hdrs, '{file_statement}', $v) + attr(srcs, '{file_statement}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. + target_statement_candidates.extend([ + header_target_statement, + f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. + target_statement, + ]) + else: + if {exclude_external_sources}: + # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. + target_statement_candidates.append(f"filter('^(//|@//)',{target_statement})") + + found = False + for target_statement in target_statement_candidates: + commands = _get_compile_commands_for_aquery(target_statement, additional_flags, file_path) + compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. if file_flags: - if found: - log_success(f">>> Finished extracting commands for {target} with --file {file_path}") + if any(command.file.endswith(file_path) for command in commands): + found = True break - else: - log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} - Continuing gracefully...""") + log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement} + Continuing gracefully...""") + + if file_flags and not found: + log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} + Continuing gracefully...""") - if not compile_commands: - log_warning(f""">>> Bazel lists no applicable compile commands for {target} - If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). - Continuing gracefully...""") + if not compile_commands: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). + Continuing gracefully...""") - log_success(f">>> Finished extracting commands for {target}") - return all_compile_commands + log_success(f">>> Finished extracting commands for {target}") + return compile_commands def _ensure_external_workspaces_link_exists(): @@ -1165,13 +1156,16 @@ def _ensure_cwd_is_workspace_root(): _ensure_gitignore_entries_exist() _ensure_external_workspaces_link_exists() + # TODO for --file, don't continue traversing targets after the first command has been found. Probably push this looping and template expansion inside of _get_commands(). target_flag_pairs = [ # Begin: template filled by Bazel {target_flag_pairs} # End: template filled by Bazel ] - compile_command_entries = _get_commands(target_flag_pairs) + compile_command_entries = [] + for (target, flags) in target_flag_pairs: + compile_command_entries.extend(_get_commands(target, flags)) if not compile_command_entries: log_error(">>> Not writing to compile_commands.json, since no commands were extracted.") From e8a986b6a6b6a00c194d3ff8a0953e92a2c369bc Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sat, 6 May 2023 13:44:31 +0800 Subject: [PATCH 39/45] Revert "normalize file path if the --file is a subpath of the cwd" This reverts commit 6a6777a8a5054c2cf8302537863998d3783f9a9b. --- refresh.template.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 4c02afa..5af6b1d 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -983,11 +983,6 @@ def _get_commands(target: str, flags: str): if file_flags: file_path = file_flags[0] - rel_path = os.path.relpath(file_path, os.getcwd()) - if not rel_path.startswith(".."): - log_info(f">>> Detected file path {file_path} is relative path changed to {rel_path}") - file_path = rel_path - target_statement = f"deps('{target}')" if file_path.endswith(_get_files.source_extensions): target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") @@ -1002,7 +997,7 @@ def _get_commands(target: str, flags: str): target_statement_candidates.extend([ header_target_statement, f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. - target_statement, + f'deps({target})', # TODO: Let's detect out-of-bazel, absolute paths and run this if and only if we're looking for a system header. We need to think about how we want to handle absolute paths more generally, perhaps normalizing them to relative if possible, like with the windows absolute path issue, above. ]) else: if {exclude_external_sources}: From e67c0bd2b625b350f25c4c5ca50746117b47435c Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sat, 6 May 2023 13:44:39 +0800 Subject: [PATCH 40/45] Revert "refresh.template.py: changed the _convert_compile_commands to return a list of SimpleNamespace objects instead of yielding a dictionary & Removal type cast" This reverts commit 40b9131c9dd4821b538c750aaf98c084762ec026. --- refresh.template.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 5af6b1d..5225e76 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -810,7 +810,7 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): """Converts from Bazel's aquery format to de-Bazeled compile_commands.json entries. Input: jsonproto output from aquery, pre-filtered to (Objective-)C(++) compile actions for a given build. - Returns: Corresponding entries for a compile_commands.json, with commas after each entry, describing all ways every file is being compiled. + Yields: Corresponding entries for a compile_commands.json, with commas after each entry, describing all ways every file is being compiled. Also includes one entry per header, describing one way it is compiled (to work around https://github.com/clangd/clangd/issues/123). Crucially, this de-Bazels the compile commands it takes as input, leaving something clangd can understand. The result is a command that could be run from the workspace root directly, with no bazel-specific environment variables, etc. @@ -857,8 +857,7 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): If that's a source file, please report this. We should work to improve the performance. If that's a header file, we should probably do the same, but it may be unavoidable.""") - # Return as compile_commands.json entries - output_list = [] + # Yield as compile_commands.json entries header_files_already_written = set() for source_files, header_files, compile_command_args in outputs: # Only emit one entry per header @@ -872,8 +871,14 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): for file in itertools.chain(source_files, header_files_not_already_written): if file == 'external/bazel_tools/src/tools/launcher/dummy.cc': continue # Suppress Bazel internal files leaking through. Hopefully will prevent issues like https://github.com/hedronvision/bazel-compile-commands-extractor/issues/77 - output_list.append(types.SimpleNamespace(file=file, arguments=compile_command_args, directory=os.environ["BUILD_WORKSPACE_DIRECTORY"])) - return output_list + yield { # TODO for consistency, let's have this return a list if its parent is returning a list. That'd also let us strip the list() cast in the found = True block, below + # Docs about compile_commands.json format: https://clang.llvm.org/docs/JSONCompilationDatabase.html#format + 'file': file, + # Using `arguments' instead of 'command' because it's now preferred by clangd. Heads also that shlex.join doesn't work for windows cmd, so you'd need to use windows_list2cmdline if we ever switched back. For more, see https://github.com/hedronvision/bazel-compile-commands-extractor/issues/8#issuecomment-1090262263 + 'arguments': compile_command_args, + # Bazel gotcha warning: If you were tempted to use `bazel info execution_root` as the build working directory for compile_commands...search ImplementationReadme.md to learn why that breaks. + 'directory': os.environ["BUILD_WORKSPACE_DIRECTORY"], + } def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aquery_flags: typing.List[str], focused_on_file: str = None): @@ -1006,10 +1011,10 @@ def _get_commands(target: str, flags: str): found = False for target_statement in target_statement_candidates: - commands = _get_compile_commands_for_aquery(target_statement, additional_flags, file_path) + commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. if file_flags: - if any(command.file.endswith(file_path) for command in commands): + if any(command['file'].endswith(file_path) for command in commands): found = True break log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement} @@ -1171,16 +1176,16 @@ def _ensure_cwd_is_workspace_root(): previous_compile_command_entries = [] try: with open('compile_commands.json') as compile_commands_file: - previous_compile_command_entries = [types.SimpleNamespace(**d) for d in json.load(compile_commands_file)] + previous_compile_command_entries = json.load(compile_commands_file) except: log_warning(">>> Couldn't read previous compile_commands.json. Overwriting instead of merging...") else: - updated_files = set(entry.file for entry in compile_command_entries) - compile_command_entries += [entry for entry in previous_compile_command_entries if entry.file not in updated_files] + updated_files = set(entry['file'] for entry in compile_command_entries) + compile_command_entries += [entry for entry in previous_compile_command_entries if entry['file'] not in updated_files] with open('compile_commands.json', 'w') as output_file: json.dump( - [vars(entry) for entry in compile_command_entries], + compile_command_entries, output_file, indent=2, # Yay, human readability! check_circular=False # For speed. From b785c3e01e29a47c68b9abee39a779998c4c219f Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sat, 6 May 2023 13:44:45 +0800 Subject: [PATCH 41/45] Revert "refresh.template.py: simplify/unify logic around target_statement_candidates and warning" This reverts commit 9ce17168b54a2838705114b94c28bba3fb3efb84. --- refresh.template.py | 49 ++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 5225e76..0befd3a 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -939,8 +939,14 @@ def _get_compile_commands_for_aquery(aquery_target_statement: str, additional_aq return [] if not getattr(parsed_aquery_output, 'actions', None): # Unifies cases: No actions (or actions list is empty) + # TODO the different flags warning in stderr is common enough that we might want to either filter it or adjust this messaging. + # TODO simplify/unify logic around warning about not being able to find commands. if aquery_process.stderr: log_warning(f""">>> Bazel lists no applicable compile commands for {target}, probably because of errors in your BUILD files, printed above. + Continuing gracefully...""") + elif not focused_on_file: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} + If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). Continuing gracefully...""") return [] @@ -982,13 +988,12 @@ def _get_commands(target: str, flags: str): # Then, actually query Bazel's compile actions for that configured target - target_statement_candidates = [] - file_path = None - compile_commands = [] - + target_statement = f"deps('{target}')" + compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move warning messages outside? if file_flags: file_path = file_flags[0] - target_statement = f"deps('{target}')" + found = False + target_statement_candidates = [] if file_path.endswith(_get_files.source_extensions): target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: @@ -1004,30 +1009,24 @@ def _get_commands(target: str, flags: str): f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. f'deps({target})', # TODO: Let's detect out-of-bazel, absolute paths and run this if and only if we're looking for a system header. We need to think about how we want to handle absolute paths more generally, perhaps normalizing them to relative if possible, like with the windows absolute path issue, above. ]) - else: - if {exclude_external_sources}: - # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. - target_statement_candidates.append(f"filter('^(//|@//)',{target_statement})") - - found = False - for target_statement in target_statement_candidates: - commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) - compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. - if file_flags: + for target_statement in target_statement_candidates: + commands = list(_get_compile_commands_for_aquery(target_statement, additional_flags, file_path)) + compile_commands.extend(commands) # If we did the work to generate a command, we'll update it, whether it's for the requested file or not. if any(command['file'].endswith(file_path) for command in commands): found = True break - log_info(f""">>> Couldn't quickly find a compile command for {file_path} in {target} under {target_statement} - Continuing gracefully...""") - - if file_flags and not found: - log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} - Continuing gracefully...""") - - if not compile_commands: - log_warning(f""">>> Bazel lists no applicable compile commands for {target} + if not found: + log_warning(f""">>> Couldn't quickly find a compile command for {file_path} in {target} + Continuing gracefully...""") # TODO simplify/unify logic around warning about not being able to find commands. + else: + if {exclude_external_sources}: + # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. + target_statement = f"filter('^(//|@//)',{target_statement})" + compile_commands.extend(_get_compile_commands_for_aquery(target_statement, additional_flags)) + if not compile_commands: + log_warning(f""">>> Bazel lists no applicable compile commands for {target} If this is a header-only library, please instead specify a test or binary target that compiles it (search "header-only" in README.md). - Continuing gracefully...""") + Continuing gracefully...""") # TODO simplify/unify logic around warning about not being able to find commands. log_success(f">>> Finished extracting commands for {target}") return compile_commands From a1520557359da57c45971d0574eae7a246838687 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sat, 6 May 2023 13:44:51 +0800 Subject: [PATCH 42/45] Revert "refersh.template.py: running a preliminary query to get specified file label" This reverts commit f8b046e3e23e4fce4a65bac0eef8ac57cb0f5e4f. --- refresh.template.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 0befd3a..fb7a064 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -997,13 +997,8 @@ def _get_commands(target: str, flags: str): if file_path.endswith(_get_files.source_extensions): target_statement_candidates.append(f"inputs('{re.escape(file_path)}', {target_statement})") else: - fname = os.path.basename(file_path) - label_candidates = subprocess.check_output(['bazel', 'query', f"filter('{fname}$', {target_statement})"], stderr = subprocess.PIPE, text = True).split() - # TODO compatible with windows file path - file_candidates = list(filter(lambda label: file_path in label.replace(':', '/'), label_candidates)) - file_statement = '|'.join(file_candidates) if len(file_candidates) > 0 else fname - - header_target_statement = f"let v = {target_statement} in attr(hdrs, '{file_statement}', $v) + attr(srcs, '{file_statement}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. + fname = os.path.basename(file_path) # TODO consider running a preliminary aquery to make this more specific, getting the targets that generate the given file. Use outputs() aquery function. Should also test the case where the file is generated/is coming in as a filegroup--that's likely to be fixed by this change. + header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. target_statement_candidates.extend([ header_target_statement, f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. From 1d8f62e638397691323fcb86ddef362c90c33e3f Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sat, 6 May 2023 13:44:57 +0800 Subject: [PATCH 43/45] Revert "refresh.template.py: quoting target_statement" This reverts commit ef335095c91eb5ca53a7c02d6e4ba7b15bccf3f0. --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index fb7a064..482c1d5 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -988,7 +988,7 @@ def _get_commands(target: str, flags: str): # Then, actually query Bazel's compile actions for that configured target - target_statement = f"deps('{target}')" + target_statement = f'deps({target})' # TODO we should always be quoting targets when we splice them in. Let's use single quotes like with mnemonic, above. See slightly down in https://bazel.build/query/language#tokens compile_commands = [] # TODO simplify loop, especially if we can reduce it to one command per case (see below)? Move warning messages outside? if file_flags: file_path = file_flags[0] From 33b2310cdfa69858ddcf9621caefa936914b6826 Mon Sep 17 00:00:00 2001 From: Snorlax Date: Sat, 6 May 2023 13:45:04 +0800 Subject: [PATCH 44/45] Revert "refresh.template.py: Estimate the number of actions that can be run based on timeout" This reverts commit 89f583a40cad5bca085130e319f9bf049fcfc6fa. --- refresh.template.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 482c1d5..75f8e53 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -831,20 +831,11 @@ def _convert_compile_commands(aquery_output, focused_on_file: str = None): max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor ) as threadpool: found_header_focused_upon = threading.Event() # MIN_PY=3.9: Consider replacing with threadpool.shutdown(cancel_futures=True), possibly also eliminating threading import - timeout = 3 - measure_action_count = len(aquery_output.actions) if not focused_on_file else 100 - measure_start_time = time.time() output_iterator = threadpool.map( lambda compile_action: _get_cpp_command_for_files(compile_action, found_header_focused_upon, focused_on_file), # Binding along side inputs - aquery_output.actions[:measure_action_count] + aquery_output.actions, + timeout=3 if focused_on_file else None # If running in fast, interactive mode with --file, we need to cap latency. #TODO test timeout--if it doesn't work, cap input length here, before passing in array. Might also want to divide timeout/cap by #targets ) - cost_time = time.time() - measure_start_time - if focused_on_file and cost_time < timeout: - estimate_action_count = min(int((timeout - cost_time) / cost_time * measure_action_count), 5000) - output_iterator = itertools.chain(output_iterator, threadpool.map( - lambda compile_action: _get_cpp_command_for_files(compile_action, found_header_focused_upon, focused_on_file), # Binding along side inputs - aquery_output.actions[measure_action_count:measure_action_count + estimate_action_count] - )) # Collect outputs, tolerating any timeouts outputs = [] try: From 84822bad197c258d1699f0497c12b48657c265d9 Mon Sep 17 00:00:00 2001 From: Chris Sauer Date: Fri, 12 May 2023 16:09:54 -0700 Subject: [PATCH 45/45] Add link to deduped aspects cquery/aquery aspects issue --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 75f8e53..5d5f12d 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -992,7 +992,7 @@ def _get_commands(target: str, flags: str): header_target_statement = f"let v = {target_statement} in attr(hdrs, '{fname}', $v) + attr(srcs, '{fname}', $v)" # Bazel does not list headers as direct inputs, but rather hides them behind "middlemen", necessitating a query like this. target_statement_candidates.extend([ header_target_statement, - f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. + f"allpaths({target}, {header_target_statement})", # Ordering is ideal, breadth-first from the deepest dependency, despite the docs. TODO (1) There's a bazel bug that produces extra actions, not on the path but downstream, so we probably want to pass --noinclude_aspects per https://github.com/bazelbuild/bazel/issues/18289 and https://github.com/bazelbuild/bazel/issues/16310 to eliminate them (at the cost of some valid aspects). (2) We might want to benchmark with --infer_universe_scope (if supported) and --universe-scope=target with query allrdeps({header_target_statement}, ) or rdeps, checking speed but also ordering (the docs indicate it is likely to be lost, which is a problem) and for inclusion of the header target. We'd guess it'll have the same aspects bug as allpaths. (3) We probably also also want to *just* run this query, not the whole list, since it captures the former and is therefore unlikely to add much latency, since a given header is probabably either used internally to the target (find on first match) for header-only (must traverse all paths in all targets until you get a match) for all top-level targets, and since we can separate out the last, see below. f'deps({target})', # TODO: Let's detect out-of-bazel, absolute paths and run this if and only if we're looking for a system header. We need to think about how we want to handle absolute paths more generally, perhaps normalizing them to relative if possible, like with the windows absolute path issue, above. ]) for target_statement in target_statement_candidates: