From f11821ea82aeda58729ba153c2ef6f15bbe4aa8b Mon Sep 17 00:00:00 2001 From: Maciej Urbanski Date: Mon, 13 Nov 2023 18:11:28 +0100 Subject: [PATCH] add `cat` command and basic B2 URI support --- CHANGELOG.md | 3 + b2/_utils/uri.py | 67 ++++++++++++++++++++ b2/arg_parser.py | 18 ++++++ b2/console_tool.py | 55 ++++++++++++++++ test/integration/test_b2_command_line.py | 8 +++ test/unit/_utils/test_uri.py | 63 ++++++++++++++++++ test/unit/console_tool/test_download_file.py | 51 ++++++++++++--- test/unit/test_console_tool.py | 8 ++- 8 files changed, 262 insertions(+), 11 deletions(-) create mode 100644 b2/_utils/uri.py create mode 100644 test/unit/_utils/test_uri.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b707b9c6..6e6613b4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +* Add `cat` command for downloading file contents directly to stdout + ### Fixed * Emit `Using https://api.backblazeb2.com` message to stderr instead of stdout, therefor prevent JSON output corruption diff --git a/b2/_utils/uri.py b/b2/_utils/uri.py new file mode 100644 index 000000000..c625c4e60 --- /dev/null +++ b/b2/_utils/uri.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import dataclasses +import pathlib +import urllib +from pathlib import Path + + +class B2URIBase: + pass + + +@dataclasses.dataclass +class B2URI(B2URIBase): + bucket: str + path: str + + def __str__(self) -> str: + return f"b2://{self.bucket}{self.path}" + + def is_dir(self) -> bool: + """ + Return if the path is a directory. + + Please note this is symbolical. + It is possible for file to have a trailing slash, but it is HIGHLY discouraged, and not supported by B2 CLI. + + :return: True if the path is a file, False if it's a directory + """ + return self.path.endswith("/") + + +@dataclasses.dataclass +class B2FileIdURI(B2URIBase): + file_id: str + + def __str__(self) -> str: + return f"b2id://{self.file_id}" + + +def parse_uri(uri: str) -> Path | B2URI | B2FileIdURI: + parsed = urllib.parse.urlparse(uri) + if parsed.scheme == "": + return pathlib.Path(uri) + return _parse_b2_uri(uri, parsed) + + +def parse_b2_uri(uri: str) -> B2URI | B2FileIdURI: + parsed = urllib.parse.urlparse(uri) + return _parse_b2_uri(uri, parsed) + + +def _parse_b2_uri(uri, parsed: urllib.parse.ParseResult) -> B2URI | B2FileIdURI: + if parsed.scheme in ("b2", "b2id"): + if not parsed.netloc: + raise ValueError(f"Invalid B2 URI: {uri!r}") + elif parsed.password or parsed.username: + raise ValueError( + "Invalid B2 URI: credentials passed using `user@password:` syntax are not supported in URI" + ) + + if parsed.scheme == "b2": + return B2URI(bucket=parsed.netloc, path=parsed.path[1:]) + elif parsed.scheme == "b2id": + return B2FileIdURI(file_id=parsed.netloc) + else: + raise ValueError(f"Unsupported URI scheme: {parsed.scheme!r}") diff --git a/b2/arg_parser.py b/b2/arg_parser.py index 3f61ae3aa..45d60673f 100644 --- a/b2/arg_parser.py +++ b/b2/arg_parser.py @@ -9,6 +9,7 @@ ###################################################################### import argparse +import functools import locale import re import sys @@ -148,3 +149,20 @@ def parse_default_retention_period(s): 'default retention period must be in the form of "X days|years "' ) return RetentionPeriod(**{m.group('unit'): int(m.group('duration'))}) + + +def wrap_with_argument_type_error(func, translator=str, exc_type=ValueError): + """ + Wrap function that may raise an exception into a function that raises ArgumentTypeError error. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as e: + if isinstance(e, exc_type): + raise argparse.ArgumentTypeError(translator(e)) + raise + + return wrapper diff --git a/b2/console_tool.py b/b2/console_tool.py index f90a84c32..3f254e080 100644 --- a/b2/console_tool.py +++ b/b2/console_tool.py @@ -122,12 +122,14 @@ from b2._cli.obj_loads import validated_loads from b2._cli.shell import detect_shell from b2._utils.filesystem import STDOUT_FILE_PATH, points_to_fifo +from b2._utils.uri import B2URI, B2FileIdURI, B2URIBase, parse_b2_uri from b2.arg_parser import ( ArgumentParser, parse_comma_separated_list, parse_default_retention_period, parse_millis_from_float_timestamp, parse_range, + wrap_with_argument_type_error, ) from b2.json_encoder import B2CliJsonEncoder from b2.version import VERSION @@ -202,6 +204,9 @@ def local_path_to_b2_path(path): return path.replace(os.path.sep, '/') +B2_URI_ARG_TYPE = wrap_with_argument_type_error(parse_b2_uri) + + def keyboard_interrupt_handler(signum, frame): raise KeyboardInterrupt() @@ -1514,6 +1519,56 @@ def run(self, args): return 0 +@B2.register_subcommand +class Cat( + DownloadFileMixin, +): + """ + Download content of a file identified by B2 URI directly to stdout. + + {PROGRESSMIXIN} + {SOURCESSEMIXIN} + {WRITEBUFFERSIZEMIXIN} + {SKIPHASHVERIFICATIONMIXIN} + + Requires capability: + + - **readFiles** + """ + + @classmethod + def _setup_parser(cls, parser): + parser.add_argument( + 'b2uri', + type=B2_URI_ARG_TYPE, + help= + "B2 URI identifying the file to print, e.g. b2://yourBucket/file.txt or b2id://fileId", + ) + super()._setup_parser(parser) + + def download_by_b2_uri( + self, b2_uri: B2URIBase, args: argparse.Namespace, local_filename + ) -> DownloadedFile: + progress_listener = make_progress_listener(local_filename, args.noProgress or args.quiet) + encryption_setting = self._get_source_sse_setting(args) + if isinstance(b2_uri, B2FileIdURI): + download = functools.partial(self.api.download_file_by_id, b2_uri.file_id) + elif isinstance(b2_uri, B2URI): + bucket = self.api.get_bucket_by_name(b2_uri.bucket) + download = functools.partial(bucket.download_file_by_name, b2_uri.path) + else: # This should never happen since there are no more subclasses of B2URIBase + raise ValueError(f'Unsupported B2 URI: {b2_uri!r}') + + return download(progress_listener=progress_listener, encryption=encryption_setting) + + def run(self, args): + super().run(args) + local_filename = self.get_local_output_filename('-') + downloaded_file = self.download_by_b2_uri(args.b2uri, args, local_filename) + downloaded_file.save_to(local_filename) + return 0 + + @B2.register_subcommand class GetAccountInfo(Command): """ diff --git a/test/integration/test_b2_command_line.py b/test/integration/test_b2_command_line.py index 3396b1d9c..737a1ca30 100755 --- a/test/integration/test_b2_command_line.py +++ b/test/integration/test_b2_command_line.py @@ -2698,3 +2698,11 @@ def test_download_file_stdout( assert b2_tool.should_succeed( ['download-file-by-id', '--quiet', uploaded_sample_file['fileId'], '-'], ) == sample_filepath.read_text() + + +def test_cat(b2_tool, bucket_name, sample_filepath, tmp_path, uploaded_sample_file): + assert b2_tool.should_succeed( + ['cat', f"b2://{bucket_name}/{uploaded_sample_file['fileName']}"], + ) == sample_filepath.read_text() + assert b2_tool.should_succeed(['cat', f"b2id://{uploaded_sample_file['fileId']}"] + ) == sample_filepath.read_text() diff --git a/test/unit/_utils/test_uri.py b/test/unit/_utils/test_uri.py new file mode 100644 index 000000000..b93b0e51d --- /dev/null +++ b/test/unit/_utils/test_uri.py @@ -0,0 +1,63 @@ +from pathlib import Path + +import pytest + +from b2._utils.uri import B2URI, B2FileIdURI, parse_uri + + +def test_b2pathuri_str(): + uri = B2URI(bucket="testbucket", path="/path/to/file") + assert str(uri) == "b2://testbucket/path/to/file" + + +def test_b2pathuri_is_dir_true(): + uri = B2URI(bucket="testbucket", path="/path/to/directory/") + assert uri.is_dir() is True + + +def test_b2pathuri_is_dir_false(): + uri = B2URI(bucket="testbucket", path="/path/to/file") + assert uri.is_dir() is False + + +def test_b2fileuri_str(): + uri = B2FileIdURI(file_id="file123") + assert str(uri) == "b2id://file123" + + +@pytest.mark.parametrize( + "uri,expected", + [ + ("some/local/path", Path("some/local/path")), + ("./some/local/path", Path("some/local/path")), + ("b2://bucket/path/to/dir/", B2URI(bucket="bucket", path="path/to/dir/")), + ("b2id://file123", B2FileIdURI(file_id="file123")), + ], +) +def test_parse_uri(uri, expected): + assert parse_uri(uri) == expected + + +@pytest.mark.parametrize( + "uri, expected_exception_message", + [ + # Test cases for invalid B2 URIs (missing netloc part) + ("b2://", "Invalid B2 URI: 'b2://'"), + ("b2id://", "Invalid B2 URI: 'b2id://'"), + # Test cases for B2 URIs with credentials + ( + "b2://user@password:bucket/path", + "Invalid B2 URI: credentials passed using `user@password:` syntax are not supported in URI", + ), + ( + "b2id://user@password:file123", + "Invalid B2 URI: credentials passed using `user@password:` syntax are not supported in URI", + ), + # Test cases for unsupported URI schemes + ("unknown://bucket/path", "Unsupported URI scheme: 'unknown'"), + ], +) +def test_parse_uri_exceptions(uri, expected_exception_message): + with pytest.raises(ValueError) as exc_info: + parse_uri(uri) + assert expected_exception_message in str(exc_info.value) diff --git a/test/unit/console_tool/test_download_file.py b/test/unit/console_tool/test_download_file.py index 174129fc2..5d80c0bdf 100644 --- a/test/unit/console_tool/test_download_file.py +++ b/test/unit/console_tool/test_download_file.py @@ -50,7 +50,7 @@ def uploaded_file(b2_cli, bucket, local_file): b2_cli.run(['upload-file', bucket, str(local_file), filename]) return { 'bucket': bucket, - 'filename': filename, + 'fileName': filename, 'content': local_file.read_text(), } @@ -61,7 +61,7 @@ def test_download_file_by_name(b2_cli, local_file, uploaded_file, tmp_path): b2_cli.run( [ 'download-file-by-name', '--noProgress', uploaded_file['bucket'], - uploaded_file['filename'], + uploaded_file['fileName'], str(output_path) ], expected_stdout=EXPECTED_STDOUT_DOWNLOAD @@ -74,7 +74,7 @@ def test_download_file_by_name_quietly(b2_cli, uploaded_file, tmp_path): b2_cli.run( [ - 'download-file-by-name', '--quiet', uploaded_file['bucket'], uploaded_file['filename'], + 'download-file-by-name', '--quiet', uploaded_file['bucket'], uploaded_file['fileName'], str(output_path) ], expected_stdout='' @@ -117,7 +117,7 @@ def reader(): b2_cli.run( [ 'download-file-by-name', '--noProgress', uploaded_file['bucket'], - uploaded_file['filename'], + uploaded_file['fileName'], str(output_path) ], expected_stdout=EXPECTED_STDOUT_DOWNLOAD @@ -126,11 +126,44 @@ def reader(): assert output_string == uploaded_file['content'] -def test_download_file_by_name__to_stdout_by_alias(b2_cli, bucket, local_file, tmp_path): - """Test download_file_by_name stdout alias support""" +@pytest.fixture +def uploaded_stdout_txt(b2_cli, bucket, local_file, tmp_path): local_file.write_text('non-mocked /dev/stdout test ignore me') - b2_cli.run(['upload-file', bucket, str(local_file), 'stdout']) + b2_cli.run(['upload-file', bucket, str(local_file), 'stdout.txt']) + return { + 'bucket': bucket, + 'fileName': 'stdout.txt', + 'content': local_file.read_text(), + } + - b2_cli.run(['download-file-by-name', '--noProgress', bucket, 'stdout', '-'],) - assert True # the only expectation we have is that this doesn't explode, as we cannot capture /dev/stdout +def test_download_file_by_name__to_stdout_by_alias( + b2_cli, bucket, uploaded_stdout_txt, tmp_path, capfd +): + """Test download_file_by_name stdout alias support""" + b2_cli.run( + ['download-file-by-name', '--noProgress', bucket, uploaded_stdout_txt['fileName'], '-'], + ) + assert capfd.readouterr().out == uploaded_stdout_txt['content'] assert not pathlib.Path('-').exists() + + +def test_cat__b2_uri(b2_cli, bucket, uploaded_stdout_txt, tmp_path, capfd): + """Test download_file_by_name stdout alias support""" + b2_cli.run(['cat', '--noProgress', f"b2://{bucket}/{uploaded_stdout_txt['fileName']}"],) + assert capfd.readouterr().out == uploaded_stdout_txt['content'] + + +def test_cat__b2_uri__invalid(b2_cli, capfd): + b2_cli.run( + ['cat', "nothing/meaningful"], + expected_stderr=None, + expected_status=2, + ) + assert "argument b2uri: Unsupported URI scheme: ''" in capfd.readouterr().err + + +def test_cat__b2id_uri(b2_cli, bucket, uploaded_stdout_txt, tmp_path, capfd): + """Test download_file_by_name stdout alias support""" + b2_cli.run(['cat', '--noProgress', "b2id://9999"],) + assert capfd.readouterr().out == uploaded_stdout_txt['content'] diff --git a/test/unit/test_console_tool.py b/test/unit/test_console_tool.py index 57aa6a606..285ba3312 100644 --- a/test/unit/test_console_tool.py +++ b/test/unit/test_console_tool.py @@ -134,6 +134,8 @@ def _remove_api_version_number(self, s): return re.sub(self.RE_API_VERSION, '/vx/', s) def _normalize_expected_output(self, text, format_vars=None): + if text is None: + return None format_vars = format_vars or {} return self._trim_leading_spaces(text).format( account_id=self.account_id, master_key=self.master_key, **format_vars @@ -213,7 +215,7 @@ def _run_command( ) print('EXPECTED TO FIND IN STDOUT:', repr(expected_part_of_stdout)) print('ACTUAL STDOUT: ', repr(actual_stdout)) - if expected_stderr != actual_stderr: + if expected_stderr is not None and expected_stderr != actual_stderr: print('EXPECTED STDERR:', repr(expected_stderr)) print('ACTUAL STDERR: ', repr(actual_stderr)) print(actual_stderr) @@ -235,8 +237,10 @@ def _run_command( self.assertIn(expected_part_of_stdout, actual_stdout) if unexpected_part_of_stdout is not None: self.assertNotIn(unexpected_part_of_stdout, actual_stdout) - self.assertEqual(expected_stderr, actual_stderr, 'stderr') + if expected_stderr is not None: + self.assertEqual(expected_stderr, actual_stderr, 'stderr') self.assertEqual(expected_status, actual_status, 'exit status code') + return actual_status, actual_stdout, actual_stderr @classmethod def _upload_multiple_files(cls, bucket):