Skip to content

Commit

Permalink
Merge pull request #946 from reef-technologies/download_to_stdout
Browse files Browse the repository at this point in the history
Download to stdout & cat command
  • Loading branch information
mjurbanski-reef authored Nov 16, 2023
2 parents 38cb1ae + 06f2a9d commit 26a90d4
Show file tree
Hide file tree
Showing 22 changed files with 778 additions and 388 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,13 @@ jobs:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy-3.10"]
# pypy version pin was required due 7.3.13 being broken https://foss.heptapod.net/pypy/pypy/-/issues/4021
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "pypy-3.10-v7.3.12"]
exclude:
- os: "macos-latest"
python-version: "pypy-3.10"
python-version: "pypy-3.10-v7.3.12"
- os: "windows-latest"
python-version: "pypy-3.10"
python-version: "pypy-3.10-v7.3.12"
steps:
- uses: actions/checkout@v3
with:
Expand All @@ -103,7 +104,7 @@ jobs:
run: nox -vs integration -- -m "not require_secrets"
- name: Run integration tests (with secrets)
# Limit CI workload by running integration tests with secrets only on edge Python versions.
if: ${{ env.B2_TEST_APPLICATION_KEY != '' && env.B2_TEST_APPLICATION_KEY_ID != '' && contains(fromJSON('["3.7", "pypy-3.10", "3.11"]'), matrix.python-version) }}
if: ${{ env.B2_TEST_APPLICATION_KEY != '' && env.B2_TEST_APPLICATION_KEY_ID != '' && contains(fromJSON('["3.7", "pypy-3.10-v7.3.12", "3.11"]'), matrix.python-version) }}
run: nox -vs integration -- -m "require_secrets" --cleanup
test-docker:
needs: cleanup_buckets
Expand Down
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
* Add linux/arm64 as a build platform for the official Docker image
* Add linux/arm64 platform support to the official Docker image
* Add `cat` command for downloading file contents directly to stdout

### Fixed
* Emit `Using https://api.backblazeb2.com` message to stderr instead of stdout, therefor prevent JSON output corruption

### Changed
* Stream `ls --json` JSON output instead of dumping it only after all objects have been fetched
* Alias `-` to stdout in `download-file-by-name` or `download-file-by-id` command

## [3.12.0] - 2023-10-28

Expand Down Expand Up @@ -61,7 +63,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
* Add s3 endpoint to `get-account-info`

### Deprecated
* Support of `-` as a valid filename in `upload-file` command. In future `-` will be an alias for standard input.
* Deprecate support of `-` as a valid filename in `upload-file` command. In the future `-` will always be interpreted as standard input

### Changed
* Better help text for --corsRules
Expand Down
2 changes: 1 addition & 1 deletion b2/_cli/argcompleters.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from functools import wraps
from itertools import islice

from b2sdk.v2 import LIST_FILE_NAMES_MAX_LIMIT
from b2sdk.v2.api import B2Api

from b2._cli.b2api import _get_b2api_for_profile
from b2._cli.const import LIST_FILE_NAMES_MAX_LIMIT


def _with_api(func):
Expand Down
3 changes: 0 additions & 3 deletions b2/_cli/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,4 @@
DEFAULT_THREADS = 10

# Constants used in the B2 API
# TODO B2-47 move API related constants to b2sdk
CREATE_BUCKET_TYPES = ('allPublic', 'allPrivate')
DEFAULT_MIN_PART_SIZE = 5 * 1000 * 1000 # 5MB
LIST_FILE_NAMES_MAX_LIMIT = 10000 # https://www.backblaze.com/b2/docs/b2_list_file_names.html
20 changes: 0 additions & 20 deletions b2/_utils/filesystem.py

This file was deleted.

65 changes: 65 additions & 0 deletions b2/_utils/uri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
######################################################################
#
# File: b2/_utils/uri.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
from __future__ import annotations

import dataclasses
import pathlib
import urllib
from pathlib import Path


class B2URIBase:
pass


@dataclasses.dataclass
class B2URI(B2URIBase):
bucket: str
path: str

def __str__(self) -> str:
return f"b2://{self.bucket}{self.path}"


@dataclasses.dataclass
class B2FileIdURI(B2URIBase):
file_id: str

def __str__(self) -> str:
return f"b2id://{self.file_id}"


def parse_uri(uri: str) -> Path | B2URI | B2FileIdURI:
parsed = urllib.parse.urlparse(uri)
if parsed.scheme == "":
return pathlib.Path(uri)
return _parse_b2_uri(uri, parsed)


def parse_b2_uri(uri: str) -> B2URI | B2FileIdURI:
parsed = urllib.parse.urlparse(uri)
return _parse_b2_uri(uri, parsed)


def _parse_b2_uri(uri, parsed: urllib.parse.ParseResult) -> B2URI | B2FileIdURI:
if parsed.scheme in ("b2", "b2id"):
if not parsed.netloc:
raise ValueError(f"Invalid B2 URI: {uri!r}")
elif parsed.password or parsed.username:
raise ValueError(
"Invalid B2 URI: credentials passed using `user@password:` syntax are not supported in URI"
)

if parsed.scheme == "b2":
return B2URI(bucket=parsed.netloc, path=parsed.path[1:])
elif parsed.scheme == "b2id":
return B2FileIdURI(file_id=parsed.netloc)
else:
raise ValueError(f"Unsupported URI scheme: {parsed.scheme!r}")
16 changes: 16 additions & 0 deletions b2/arg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
######################################################################

import argparse
import functools
import locale
import re
import sys
Expand Down Expand Up @@ -148,3 +149,18 @@ def parse_default_retention_period(s):
'default retention period must be in the form of "X days|years "'
)
return RetentionPeriod(**{m.group('unit'): int(m.group('duration'))})


def wrap_with_argument_type_error(func, translator=str, exc_type=ValueError):
"""
Wrap function that may raise an exception into a function that raises ArgumentTypeError error.
"""

@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except exc_type as e:
raise argparse.ArgumentTypeError(translator(e))

return wrapper
89 changes: 80 additions & 9 deletions b2/console_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,13 @@
B2_ACCOUNT_INFO_DEFAULT_FILE,
B2_ACCOUNT_INFO_ENV_VAR,
B2_ACCOUNT_INFO_PROFILE_FILE,
DEFAULT_MIN_PART_SIZE,
DEFAULT_SCAN_MANAGER,
NO_RETENTION_BUCKET_SETTING,
REALM_URLS,
SRC_LAST_MODIFIED_MILLIS,
SSE_C_KEY_ID_FILE_INFO_KEY_NAME,
STDOUT_FILEPATH,
UNKNOWN_KEY_ID,
XDG_CONFIG_HOME_ENV_VAR,
ApplicationKey,
Expand Down Expand Up @@ -86,6 +88,7 @@
get_included_sources,
make_progress_listener,
parse_sync_folder,
points_to_fifo,
)
from b2sdk.v2.exception import (
B2Error,
Expand Down Expand Up @@ -116,18 +119,18 @@
B2_SOURCE_SSE_C_KEY_B64_ENV_VAR,
B2_USER_AGENT_APPEND_ENV_VAR,
CREATE_BUCKET_TYPES,
DEFAULT_MIN_PART_SIZE,
DEFAULT_THREADS,
)
from b2._cli.obj_loads import validated_loads
from b2._cli.shell import detect_shell
from b2._utils.filesystem import points_to_fifo
from b2._utils.uri import B2URI, B2FileIdURI, B2URIBase, parse_b2_uri
from b2.arg_parser import (
ArgumentParser,
parse_comma_separated_list,
parse_default_retention_period,
parse_millis_from_float_timestamp,
parse_range,
wrap_with_argument_type_error,
)
from b2.json_encoder import B2CliJsonEncoder
from b2.version import VERSION
Expand Down Expand Up @@ -202,6 +205,9 @@ def local_path_to_b2_path(path):
return path.replace(os.path.sep, '/')


B2_URI_ARG_TYPE = wrap_with_argument_type_error(parse_b2_uri)


def keyboard_interrupt_handler(signum, frame):
raise KeyboardInterrupt()

Expand Down Expand Up @@ -1412,18 +1418,27 @@ def _represent_legal_hold(cls, legal_hold: LegalHold):
def _print_file_attribute(self, label, value):
self._print((label + ':').ljust(20) + ' ' + value)

def get_local_output_filepath(self, filename: str) -> pathlib.Path:
if filename == '-':
return STDOUT_FILEPATH
return pathlib.Path(filename)


@B2.register_subcommand
class DownloadFileById(
ThreadsMixin, ProgressMixin, SourceSseMixin, WriteBufferSizeMixin, SkipHashVerificationMixin,
MaxDownloadStreamsMixin, DownloadCommand
ThreadsMixin,
ProgressMixin,
SourceSseMixin,
WriteBufferSizeMixin,
SkipHashVerificationMixin,
MaxDownloadStreamsMixin,
DownloadCommand,
):
"""
Downloads the given file, and stores it in the given local file.
{PROGRESSMIXIN}
{THREADSMIXIN}
{THREADSMIXIN}
{SOURCESSEMIXIN}
{WRITEBUFFERSIZEMIXIN}
{SKIPHASHVERIFICATIONMIXIN}
Expand Down Expand Up @@ -1452,16 +1467,17 @@ def run(self, args):
)

self._print_download_info(downloaded_file)
downloaded_file.save_to(args.localFileName)
output_filepath = self.get_local_output_filepath(args.localFileName)
downloaded_file.save_to(output_filepath)
self._print('Download finished')

return 0


@B2.register_subcommand
class DownloadFileByName(
ProgressMixin,
ThreadsMixin,
ProgressMixin,
SourceSseMixin,
WriteBufferSizeMixin,
SkipHashVerificationMixin,
Expand Down Expand Up @@ -1503,12 +1519,67 @@ def run(self, args):
)

self._print_download_info(downloaded_file)
downloaded_file.save_to(args.localFileName)
output_filepath = self.get_local_output_filepath(args.localFileName)
downloaded_file.save_to(output_filepath)
self._print('Download finished')

return 0


@B2.register_subcommand
class Cat(
ProgressMixin,
SourceSseMixin,
WriteBufferSizeMixin,
SkipHashVerificationMixin,
DownloadCommand,
):
"""
Download content of a file identified by B2 URI directly to stdout.
{PROGRESSMIXIN}
{SOURCESSEMIXIN}
{WRITEBUFFERSIZEMIXIN}
{SKIPHASHVERIFICATIONMIXIN}
Requires capability:
- **readFiles**
"""

@classmethod
def _setup_parser(cls, parser):
parser.add_argument(
'b2uri',
type=B2_URI_ARG_TYPE,
help=
"B2 URI identifying the file to print, e.g. b2://yourBucket/file.txt or b2id://fileId",
)
super()._setup_parser(parser)

def download_by_b2_uri(
self, b2_uri: B2URIBase, args: argparse.Namespace, local_filename: str
) -> DownloadedFile:
progress_listener = make_progress_listener(local_filename, args.noProgress or args.quiet)
encryption_setting = self._get_source_sse_setting(args)
if isinstance(b2_uri, B2FileIdURI):
download = functools.partial(self.api.download_file_by_id, b2_uri.file_id)
elif isinstance(b2_uri, B2URI):
bucket = self.api.get_bucket_by_name(b2_uri.bucket)
download = functools.partial(bucket.download_file_by_name, b2_uri.path)
else: # This should never happen since there are no more subclasses of B2URIBase
raise ValueError(f'Unsupported B2 URI: {b2_uri!r}')

return download(progress_listener=progress_listener, encryption=encryption_setting)

def run(self, args):
super().run(args)
downloaded_file = self.download_by_b2_uri(args.b2uri, args, '-')
output_filepath = self.get_local_output_filepath('-')
downloaded_file.save_to(output_filepath)
return 0


@B2.register_subcommand
class GetAccountInfo(Command):
"""
Expand Down Expand Up @@ -2913,7 +2984,7 @@ def get_input_stream(self, filename: str) -> 'str | int | io.BinaryIO':
if filename == "-":
if os.path.exists('-'):
self._print_stderr(
"WARNING: Filename `-` won't be supported in the future and will be treated as stdin alias."
"WARNING: Filename `-` won't be supported in the future and will always be treated as stdin alias."
)
else:
return sys.stdin.buffer if platform.system() == "Windows" else sys.stdin.fileno()
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def run_integration_test(session, pytest_posargs):
'test/integration',
'-s',
'-n',
'auto',
'2' if CI else 'auto',
'--log-level',
'INFO',
'-W',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
argcomplete>=2,<4
arrow>=1.0.2,<2.0.0
b2sdk>=1.24.1,<2
b2sdk>=1.25.0,<2
docutils>=0.18.1
idna~=3.4; platform_system == 'Java'
importlib-metadata~=3.3; python_version < '3.8'
Expand Down
2 changes: 1 addition & 1 deletion test/integration/cleanup_buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ def test_cleanup_buckets(b2_api):
# this is not a test, but it is intended to be called
# via pytest because it reuses fixtures which have everything
# set up
b2_api.clean_buckets()
pass # b2_api calls b2_api.clean_buckets() in its finalizer
Loading

0 comments on commit 26a90d4

Please sign in to comment.