From 357b6269deaa88eb8b5e3d435c7e18d7666ecc3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Urba=C5=84ski?= <122983254+mjurbanski-reef@users.noreply.github.com> Date: Fri, 10 Nov 2023 10:34:59 +0100 Subject: [PATCH] Fix stdout noise on auth breaking ls json output (#942) * fix `ls --json` being corrupted by `Using https://...` msg * refactor ls json streaming * remove leftovers after ls json streaming lefactor --- CHANGELOG.md | 6 +++ b2/console_tool.py | 81 ++++++++++++++++++++++------------ test/integration/helpers.py | 1 + test/unit/test_console_tool.py | 61 +++++++++++++------------ 4 files changed, 91 insertions(+), 58 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2d2f873d..4d7e6429a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +* Emit `Using https://api.backblazeb2.com` message to stderr instead of stdout, therefor prevent JSON output corruption + +### Changed +* Stream `ls --json` JSON output instead of dumping it only after all objects have been fetched + ## [3.12.0] - 2023-10-28 ### Added diff --git a/b2/console_tool.py b/b2/console_tool.py index c2f25d711..c9f8d0eac 100644 --- a/b2/console_tool.py +++ b/b2/console_tool.py @@ -685,23 +685,50 @@ def _parse_file_infos(cls, args_info): file_infos[parts[0]] = parts[1] return file_infos - def _print_json(self, data): - self._print( + def _print_json(self, data) -> None: + return self._print( json.dumps(data, indent=4, sort_keys=True, cls=B2CliJsonEncoder), enforce_output=True ) - def _print(self, *args, enforce_output=False): - self._print_standard_descriptor(self.stdout, 'stdout', *args, enforce_output=enforce_output) + def _print(self, *args, enforce_output: bool = False, end: Optional[str] = None) -> None: + return self._print_standard_descriptor( + self.stdout, "stdout", *args, enforce_output=enforce_output, end=end + ) - def _print_stderr(self, *args, **kwargs): - self._print_standard_descriptor(self.stderr, 'stderr', *args, enforce_output=True) + def _print_stderr(self, *args, end: Optional[str] = None) -> None: + return self._print_standard_descriptor( + self.stderr, "stderr", *args, enforce_output=True, end=end + ) - def _print_standard_descriptor(self, descriptor, descriptor_name, *args, enforce_output=False): + def _print_standard_descriptor( + self, + descriptor, + descriptor_name: str, + *args, + enforce_output: bool = False, + end: Optional[str] = None, + ) -> None: + """ + Prints to fd, unless quiet is set. + + :param descriptor: file descriptor to print to + :param descriptor_name: name of the descriptor, used for error reporting + :param args: object to be printed + :param enforce_output: overrides quiet setting; Should not be used for anything other than data + :param end: end of the line characters; None for default newline + """ if not self.quiet or enforce_output: - self._print_helper(descriptor, descriptor.encoding, descriptor_name, *args) + self._print_helper(descriptor, descriptor.encoding, descriptor_name, *args, end=end) @classmethod - def _print_helper(cls, descriptor, descriptor_encoding, descriptor_name, *args): + def _print_helper( + cls, + descriptor, + descriptor_encoding: str, + descriptor_name: str, + *args, + end: Optional[str] = None + ): try: descriptor.write(' '.join(args)) except UnicodeEncodeError: @@ -714,7 +741,7 @@ def _print_helper(cls, descriptor, descriptor_encoding, descriptor_name, *args): args = [arg.encode('ascii', 'backslashreplace').decode() for arg in args] sys.stderr.write("Trying to print: %s\n" % args) descriptor.write(' '.join(args)) - descriptor.write('\n') + descriptor.write("\n" if end is None else end) def __str__(self): return f'{self.__class__.__module__}.{self.__class__.__name__}' @@ -861,7 +888,7 @@ def authorize(self, application_key_id, application_key, realm): :return: exit status """ url = REALM_URLS.get(realm, realm) - self._print('Using %s' % url) + self._print_stderr(f"Using {url}") try: self.api.authorize_account(realm, application_key_id, application_key) @@ -1889,15 +1916,12 @@ def _setup_parser(cls, parser): parser.add_argument('folderName', nargs='?').completer = file_name_completer super()._setup_parser(parser) - def run(self, args): - super().run(args) + def _print_files(self, args): generator = self._get_ls_generator(args) for file_version, folder_name in generator: self._print_file_version(args, file_version, folder_name) - return 0 - def _print_file_version( self, args, @@ -1991,17 +2015,19 @@ def _setup_parser(cls, parser): super()._setup_parser(parser) def run(self, args): + super().run(args) if args.json: - # TODO: Make this work for an infinite generation. - # Use `_print_file_version` to print a single `file_version` and manage the external JSON list - # e.g. manually. However, to do it right, some sort of state needs to be kept e.g. info whether - # at least one element was written to the stream, so we can add a `,` on the start of another. - # That would sadly lead to an ugly formatting, so `_print` needs to be expanded with an ability - # to not print end-line character(s). - self._print_json([file_version for file_version, _ in self._get_ls_generator(args)]) - return 0 - - return super().run(args) + i = -1 + for i, (file_version, _) in enumerate(self._get_ls_generator(args)): + if i: + self._print(',', end='') + else: + self._print('[') + self._print_json(file_version) + self._print(']' if i >= 0 else '[]') + else: + self._print_files(args) + return 0 def _print_file_version( self, @@ -2215,9 +2241,10 @@ def _setup_parser(cls, parser): super()._setup_parser(parser) def run(self, args): - if args.dryRun: - return super().run(args) super().run(args) + if args.dryRun: + self._print_files(args) + return 0 failed_on_any_file = False messages_queue = queue.Queue() diff --git a/test/integration/helpers.py b/test/integration/helpers.py index 5a5e6d298..3d6893855 100755 --- a/test/integration/helpers.py +++ b/test/integration/helpers.py @@ -337,6 +337,7 @@ def should_equal(expected, actual): class CommandLine: EXPECTED_STDERR_PATTERNS = [ + re.compile(r'^Using https?://[\w.]+$'), # account auth re.compile(r'.*B/s]$', re.DOTALL), # progress bar re.compile(r'^\r?$'), # empty line re.compile( diff --git a/test/unit/test_console_tool.py b/test/unit/test_console_tool.py index 9416345b3..57aa6a606 100644 --- a/test/unit/test_console_tool.py +++ b/test/unit/test_console_tool.py @@ -84,7 +84,7 @@ def _run_command_ignore_output(self, argv): print('ACTUAL STDERR: ', repr(actual_stderr)) print(actual_stderr) - self.assertEqual('', actual_stderr, 'stderr') + assert re.match(r'^(|Using https?://[\w.]+)$', actual_stderr), f"stderr: {actual_stderr!r}" self.assertEqual(0, actual_status, 'exit status code') def _trim_leading_spaces(self, s): @@ -253,11 +253,9 @@ def _upload_multiple_files(cls, bucket): class TestConsoleTool(BaseConsoleToolTest): def test_authorize_with_bad_key(self): - expected_stdout = ''' - Using http://production.example.com - ''' - + expected_stdout = '' expected_stderr = ''' + Using http://production.example.com ERROR: unable to authorize account: Invalid authorization token. Server said: secret key is wrong (unauthorized) ''' @@ -271,12 +269,12 @@ def test_authorize_with_good_key_using_hyphen(self): assert self.account_info.get_account_auth_token() is None # Authorize an account with a good api key. - expected_stdout = """ + expected_stderr = """ Using http://production.example.com """ self._run_command( - ['authorize-account', self.account_id, self.master_key], expected_stdout, '', 0 + ['authorize-account', self.account_id, self.master_key], '', expected_stderr, 0 ) # Auth token should be in account info now @@ -287,12 +285,11 @@ def test_authorize_with_good_key_using_underscore(self): assert self.account_info.get_account_auth_token() is None # Authorize an account with a good api key. - expected_stdout = """ + expected_stderr = """ Using http://production.example.com """ - self._run_command( - ['authorize_account', self.account_id, self.master_key], expected_stdout, '', 0 + ['authorize_account', self.account_id, self.master_key], '', expected_stderr, 0 ) # Auth token should be in account info now @@ -303,7 +300,7 @@ def test_authorize_using_env_variables(self): assert self.account_info.get_account_auth_token() is None # Authorize an account with a good api key. - expected_stdout = """ + expected_stderr = """ Using http://production.example.com """ @@ -317,7 +314,7 @@ def test_authorize_using_env_variables(self): assert B2_APPLICATION_KEY_ID_ENV_VAR in os.environ assert B2_APPLICATION_KEY_ENV_VAR in os.environ - self._run_command(['authorize-account'], expected_stdout, '', 0) + self._run_command(['authorize-account'], '', expected_stderr, 0) # Auth token should be in account info now assert self.account_info.get_account_auth_token() is not None @@ -327,7 +324,7 @@ def test_authorize_towards_custom_realm(self): assert self.account_info.get_account_auth_token() is None # Authorize an account with a good api key. - expected_stdout = """ + expected_stderr = """ Using http://custom.example.com """ @@ -336,13 +333,13 @@ def test_authorize_towards_custom_realm(self): [ 'authorize-account', '--environment', 'http://custom.example.com', self.account_id, self.master_key - ], expected_stdout, '', 0 + ], '', expected_stderr, 0 ) # Auth token should be in account info now assert self.account_info.get_account_auth_token() is not None - expected_stdout = """ + expected_stderr = """ Using http://custom2.example.com """ # realm provided with env var @@ -352,7 +349,7 @@ def test_authorize_towards_custom_realm(self): } ): self._run_command( - ['authorize-account', self.account_id, self.master_key], expected_stdout, '', 0 + ['authorize-account', self.account_id, self.master_key], '', expected_stderr, 0 ) def test_create_key_and_authorize_with_it(self): @@ -370,8 +367,8 @@ def test_create_key_and_authorize_with_it(self): # Authorize with the key self._run_command( ['authorize-account', 'appKeyId0', 'appKey0'], - 'Using http://production.example.com\n', '', + 'Using http://production.example.com\n', 0, ) @@ -394,9 +391,8 @@ def test_create_key_with_authorization_from_env_vars(self): # The first time we're running on this cache there will be output from the implicit "authorize-account" call self._run_command( ['create-key', 'key1', 'listBuckets,listKeys'], - 'Using http://production.example.com\n' 'appKeyId0 appKey0\n', - '', + 'Using http://production.example.com\n', 0, ) @@ -417,9 +413,8 @@ def test_create_key_with_authorization_from_env_vars(self): # "authorize-account" is called when the key changes self._run_command( ['create-key', 'key1', 'listBuckets,listKeys'], - 'Using http://production.example.com\n' 'appKeyId2 appKey2\n', - '', + 'Using http://production.example.com\n', 0, ) @@ -431,9 +426,8 @@ def test_create_key_with_authorization_from_env_vars(self): ): self._run_command( ['create-key', 'key1', 'listBuckets,listKeys'], - 'Using http://custom.example.com\n' 'appKeyId3 appKey3\n', - '', + 'Using http://custom.example.com\n', 0, ) @@ -446,7 +440,8 @@ def test_authorize_key_without_list_buckets(self): # Authorize with the key self._run_command( ['authorize-account', 'appKeyId0', 'appKey0'], - 'Using http://production.example.com\n', + '', + 'Using http://production.example.com\n' 'ERROR: application key has no listBuckets capability, which is required for the b2 command-line tool\n', 1, ) @@ -519,8 +514,8 @@ def test_create_bucket_key_and_authorize_with_it(self): # Authorize with the key self._run_command( ['authorize-account', 'appKeyId0', 'appKey0'], - 'Using http://production.example.com\n', '', + 'Using http://production.example.com\n', 0, ) @@ -772,8 +767,8 @@ def test_keys(self): # authorize and make calls using application key with no restrictions self._run_command( - ['authorize-account', 'appKeyId0', 'appKey0'], 'Using http://production.example.com\n', - '', 0 + ['authorize-account', 'appKeyId0', 'appKey0'], '', + 'Using http://production.example.com\n', 0 ) self._run_command( ['list-buckets'], @@ -798,8 +793,8 @@ def test_keys(self): # authorize and make calls using an application key with bucket restrictions self._run_command( - ['authorize-account', 'appKeyId1', 'appKey1'], 'Using http://production.example.com\n', - '', 0 + ['authorize-account', 'appKeyId1', 'appKey1'], '', + 'Using http://production.example.com\n', 0 ) self._run_command( @@ -2351,7 +2346,8 @@ def test_list_buckets_not_allowed_for_app_key(self): # Authorize with the key, which should result in an error. self._run_command( ['authorize-account', 'appKeyId0', 'appKey0'], - 'Using http://production.example.com\n', + '', + 'Using http://production.example.com\n' 'ERROR: application key has no listBuckets capability, which is required for the b2 command-line tool\n', 1, ) @@ -2379,7 +2375,8 @@ def test_bucket_missing_for_bucket_key(self): # to be able to look up the name of the bucket. self._run_command( ['authorize-account', 'appKeyId0', 'appKey0'], - 'Using http://production.example.com\n', + '', + "Using http://production.example.com\n" "ERROR: unable to authorize account: Application key is restricted to a bucket that doesn't exist\n", 1, ) @@ -2739,6 +2736,8 @@ def test_rm_skipping_over_errors(self): ''' self._run_command(['ls', '--recursive', 'my-bucket'], expected_stdout) + +class TestVersionConsoleTool(BaseConsoleToolTest): def test_version(self): self._run_command(['version', '--short'], expected_stdout=f'{VERSION}\n') self._run_command(['version'], expected_stdout=f'b2 command line tool, version {VERSION}\n')