diff --git a/docs/changelog.md b/docs/changelog.md index e4a31ad..b2bd574 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,16 @@ # Changelog +## Version 2.1.0 + +### Added + +* A progress bar for file downloads + +### Changed + +* All terminal output is now through the `logging` module. You can use the new `--log-level` CLI parameter to configure the amount of info that is printed out. +* Update the CLI default concurrency to 2 for chunks and 1 for files. This seems to be moderately performant without ever failing + ## Version 2.0.0 ### Added diff --git a/filesender/api.py b/filesender/api.py index bd04701..ca0bbb3 100644 --- a/filesender/api.py +++ b/filesender/api.py @@ -1,5 +1,5 @@ -from typing import Any, Iterable, List, Optional, Tuple, AsyncIterator, Set -from bs4 import BeautifulSoup +from typing import Any, Iterable, List, Optional, Tuple, AsyncIterator, Union +from filesender.download import files_from_page, DownloadFile import filesender.response_types as response import filesender.request_types as request from urllib.parse import urlparse, urlunparse, unquote @@ -10,7 +10,7 @@ import aiofiles from aiostream import stream from contextlib import contextmanager -from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception +from tenacity import RetryCallState, retry, stop_after_attempt, wait_fixed, retry_if_exception import logging from tqdm.asyncio import tqdm @@ -25,9 +25,13 @@ def should_retry(e: BaseException) -> bool: # Seems to be just a bug in the backend # https://github.com/encode/httpx/discussions/2941 return True - elif isinstance(e, HTTPStatusError) and e.response.status_code == 500 and e.response.json()["message"] == "auth_remote_too_late": + elif isinstance(e, HTTPStatusError) and e.response.status_code == 500: + message = e.response.json()["message"] + if message == "auth_remote_too_late": + return True + if message == "auth_remote_signature_check_failed": + return True # These errors are caused by lag between creating the response and it being received - return True return False @@ -40,6 +44,13 @@ def url_without_scheme(url: str) -> str: """ return unquote(urlunparse(urlparse(url)._replace(scheme="")).lstrip("/")) +def exception_to_message(e: BaseException) -> str: + if isinstance(e, HTTPStatusError): + return f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}." + elif isinstance(e, RequestError): + return f"Request failed for request {e.request.method} {e.request.url}. {repr(e)}" + else: + return repr(e) @contextmanager def raise_status(): @@ -49,16 +60,8 @@ def raise_status(): """ try: yield - except HTTPStatusError as e: - raise Exception( - f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}" - ) from e - except RequestError as e: - # TODO: check for SSL read error - raise Exception( - f"Request failed for request {e.request.method} {e.request.url}" - ) from e - + except BaseException as e: + raise Exception(exception_to_message(e)) from e async def yield_chunks(path: Path, chunk_size: int) -> AsyncIterator[Tuple[bytes, int]]: """ @@ -166,11 +169,21 @@ async def _sign_send(self, request: Request) -> Any: with raise_status(): return await self._sign_send_inner(request) + @staticmethod + def on_retry(state: RetryCallState) -> None: + message = str(state.outcome) + if state.outcome is not None: + e = state.outcome.exception() + if e is not None: + message = exception_to_message(e) + + logger.warn(f"Attempt {state.attempt_number}. {message}") + @retry( retry=retry_if_exception(should_retry), wait=wait_fixed(0.1), stop=stop_after_attempt(5), - before_sleep=lambda x: logger.warn(f"Attempt {x.attempt_number}.{x.outcome}") + before_sleep=on_retry ) async def _sign_send_inner(self, request: Request) -> Any: # Needs to be a separate function to handle retry policy correctly @@ -313,19 +326,14 @@ async def create_guest(self, body: request.Guest) -> response.Guest: self.http_client.build_request("POST", f"{self.base_url}/guest", json=body) ) - async def _files_from_token(self, token: str) -> Set[int]: + async def _files_from_token(self, token: str) -> Iterable[DownloadFile]: """ Internal function that returns a list of file IDs for a given guest token """ download_page = await self.http_client.get( "https://filesender.aarnet.edu.au", params={"s": "download", "token": token} ) - files: Set[int] = set() - for file in BeautifulSoup(download_page.content, "html.parser").find_all( - class_="file" - ): - files.add(int(file.attrs["data-id"])) - return files + return files_from_page(download_page.content) async def download_files( self, @@ -342,12 +350,12 @@ async def download_files( out_dir: The path to write the downloaded files. """ - file_ids = await self._files_from_token(token) + file_meta = await self._files_from_token(token) - async def _download_args() -> AsyncIterator[Tuple[str, Any, Path]]: + async def _download_args() -> AsyncIterator[Tuple[str, Any, Path, int, str]]: "Yields tuples of arguments to pass to download_file" - for file_id in file_ids: - yield token, file_id, out_dir + for file in file_meta: + yield token, file["id"], out_dir, file["size"], file["name"] # Each file is downloaded in parallel # Pyright messes this up @@ -358,8 +366,8 @@ async def download_file( token: str, file_id: int, out_dir: Path, - key: Optional[bytes] = None, - algorithm: Optional[str] = None, + file_size: Union[int, float, None] = None, + file_name: Optional[str] = None ) -> None: """ Downloads a single file. @@ -368,6 +376,8 @@ async def download_file( token: Obtained from the transfer email. The same as [`GuestAuth`][filesender.GuestAuth]'s `guest_token`. file_id: A single file ID indicating the file to be downloaded. out_dir: The path to write the downloaded file. + file_size: The file size in bytes, optionally. + file_name: The file name of the file being downloaded. This will impact the name by which it's saved. """ download_endpoint = urlunparse( urlparse(self.base_url)._replace(path="/download.php") @@ -375,16 +385,26 @@ async def download_file( async with self.http_client.stream( "GET", download_endpoint, params={"files_ids": file_id, "token": token} ) as res: - for content_param in res.headers["Content-Disposition"].split(";"): - if "filename" in content_param: - filename = content_param.split("=")[1].lstrip('"').rstrip('"') - break - else: - raise Exception("No filename found") - - async with aiofiles.open(out_dir / filename, "wb") as fp: - async for chunk in res.aiter_raw(chunk_size=8192): - await fp.write(chunk) + # Determine filename from response, if not provided + if file_name is None: + for content_param in res.headers["Content-Disposition"].split(";"): + if "filename" in content_param: + file_name = content_param.split("=")[1].lstrip('"').rstrip('"') + break + else: + raise Exception("No filename found") + + file_path = out_dir / file_name + file_path.parent.mkdir(parents=True, exist_ok=True) + chunk_size = 8192 + chunk_size_mb = chunk_size / 1024 / 1024 + with tqdm(desc=file_name, unit="MB", total=None if file_size is None else int(file_size / 1024 / 1024)) as progress: + async with aiofiles.open(out_dir / file_name, "wb") as fp: + # We can't add the total here, because we don't know it: + # https://github.com/filesender/filesender/issues/1555 + async for chunk in res.aiter_raw(chunk_size=chunk_size): + await fp.write(chunk) + progress.update(chunk_size_mb) async def get_server_info(self) -> response.ServerInfo: """ diff --git a/filesender/download.py b/filesender/download.py new file mode 100644 index 0000000..aed9b93 --- /dev/null +++ b/filesender/download.py @@ -0,0 +1,50 @@ +from typing import Iterable, TypedDict + +from bs4 import BeautifulSoup + + +class DownloadFile(TypedDict): + client_entropy: str + encrypted: str + encrypted_size: int + fileaead: str + fileiv: str + id: int + key_salt: str + key_version: int + mime: str + #: filename + name: str + password_encoding: str + password_hash_iterations: int + password_version: int + size: int + transferid: int + +def files_from_page(content: bytes) -> Iterable[DownloadFile]: + """ + Yields dictionaries describing the files listed on a FileSender web page + + Params: + content: The HTML content of the FileSender download page + """ + for file in BeautifulSoup(content, "html.parser").find_all( + class_="file" + ): + yield { + "client_entropy": file.attrs[f"data-client-entropy"], + "encrypted": file.attrs["data-encrypted"], + "encrypted_size": int(file.attrs["data-encrypted-size"]), + "fileaead": file.attrs["data-fileaead"], + "fileiv": file.attrs["data-fileiv"], + "id": int(file.attrs["data-id"]), + "key_salt": file.attrs["data-key-salt"], + "key_version": int(file.attrs["data-key-version"]), + "mime": file.attrs["data-mime"], + "name": file.attrs["data-name"], + "password_encoding": file.attrs["data-password-encoding"], + "password_hash_iterations": int(file.attrs["data-password-hash-iterations"]), + "password_version": int(file.attrs["data-password-version"]), + "size": int(file.attrs["data-size"]), + "transferid": int(file.attrs["data-transferid"]), + } diff --git a/filesender/log.py b/filesender/log.py new file mode 100644 index 0000000..ce0e2f5 --- /dev/null +++ b/filesender/log.py @@ -0,0 +1,46 @@ +from typing import Union +from click import ParamType, Context, Parameter +from enum import Enum +import logging + +class LogLevel(Enum): + NOTSET = 0 + DEBUG = 10 + #: Used for verbose logging that the average user wouldn't want + VERBOSE = 15 + INFO = 20 + #: Used for basic feedback that a CLI user would expect + FEEDBACK = 25 + WARNING = 30 + ERROR = 40 + CRITICAL = 50 + + def configure_label(self): + """ + Configures the logging module to understand this log level + """ + logging.addLevelName(self.value, self.name) + +def configure_extra_levels(): + """ + Configures the logging module to understand the additional log levels + """ + for level in (LogLevel.VERBOSE, LogLevel.FEEDBACK): + level.configure_label() + +class LogParam(ParamType): + name = "LogParam" + + def convert(self, value: Union[int, str], param: Union[Parameter, None], ctx: Union[Context, None]) -> int: + if isinstance(value, int): + return value + + # Convert string representation to int + if not hasattr(LogLevel, value): + self.fail(f"{value!r} is not a valid log level", param, ctx) + + return LogLevel[value].value + + def get_metavar(self, param: Parameter) -> Union[str, None]: + # Print out the choices + return "|".join(LogLevel._member_map_) diff --git a/filesender/main.py b/filesender/main.py index be67ad0..21299fb 100644 --- a/filesender/main.py +++ b/filesender/main.py @@ -1,15 +1,21 @@ from __future__ import annotations +import logging from typing import Any, List, Optional, Callable, Coroutine, Dict from typing_extensions import Annotated, ParamSpec, TypeVar from filesender.api import FileSenderClient from typer import Typer, Option, Argument, Context, Exit from rich import print +from rich.pretty import pretty_repr from pathlib import Path from filesender.auth import Auth, UserAuth, GuestAuth from filesender.config import get_defaults from functools import wraps from asyncio import run from importlib.metadata import version +from rich.logging import RichHandler +from filesender.log import LogParam, LogLevel, configure_extra_levels + +logger = logging.getLogger(__name__) from filesender.response_types import Guest, Transfer @@ -42,8 +48,11 @@ def version_callback(value: bool): @app.callback(context_settings=context) def common_args( - base_url: Annotated[str, Option(help="The URL of the FileSender REST API")], context: Context, + base_url: Annotated[str, Option(help="The URL of the FileSender REST API")], + log_level: Annotated[ + int, Option(click_type=LogParam(), help="Logging verbosity", ) + ] = LogLevel.FEEDBACK.value, version: Annotated[ Optional[bool], Option("--version", callback=version_callback) ] = None @@ -51,6 +60,14 @@ def common_args( context.obj = { "base_url": base_url } + configure_extra_levels() + logging.basicConfig( + level=log_level, + format= "%(message)s", + datefmt="[%X]", + handlers=[RichHandler()] + ) + @app.command(context_settings=context) def invite( @@ -58,7 +75,6 @@ def invite( apikey: Annotated[str, Option(help="Your API token. This is the token of the person doing the inviting, not the person being invited.")], recipient: Annotated[str, Argument(help="The email address of the person to invite")], context: Context, - verbose: Verbose = False, # Although these parameters are exact duplicates of those in GuestOptions, # typer doesn't support re-using argument lists: https://github.com/tiangolo/typer/discussions/665 one_time: Annotated[bool, Option(help="If true, this voucher is only valid for one use, otherwise it can be re-used.")] = True, @@ -99,9 +115,8 @@ def invite( } } })) - if verbose: - print(result) - print("Invitation successfully sent") + logger.log(LogLevel.VERBOSE.value, pretty_repr(result)) + logger.log(LogLevel.FEEDBACK.value, "Invitation successfully sent") @app.command(context_settings=context) @typer_async @@ -110,10 +125,9 @@ async def upload_voucher( guest_token: Annotated[str, Option(help="The guest token. This is the part of the upload URL after 'vid='")], email: Annotated[str, Option(help="The email address that was invited to upload files")], context: Context, - concurrent_files: ConcurrentFiles = None, - concurrent_chunks: ConcurrentChunks = None, + concurrent_files: ConcurrentFiles = 1, + concurrent_chunks: ConcurrentChunks = 2, chunk_size: ChunkSize = None, - verbose: Verbose = False ): """ Uploads files to a voucher that you have been invited to @@ -129,9 +143,8 @@ async def upload_voucher( await auth.prepare(client.http_client) await client.prepare() result: Transfer = await client.upload_workflow(files, {"from": email, "recipients": []}) - if verbose: - print(result) - print("Upload completed successfully") + logger.log(LogLevel.VERBOSE.value, pretty_repr(result)) + logger.log(LogLevel.FEEDBACK.value, "Upload completed successfully") @app.command(context_settings=context) @typer_async @@ -141,9 +154,8 @@ async def upload( files: UploadFiles, recipients: Annotated[List[str], Option(show_default=False, help="One or more email addresses to send the files")], context: Context, - verbose: Verbose = False, - concurrent_files: ConcurrentFiles = None, - concurrent_chunks: ConcurrentChunks = None, + concurrent_files: ConcurrentFiles = 1, + concurrent_chunks: ConcurrentChunks = 2, chunk_size: ChunkSize = None, delay: Delay = 0 ): @@ -163,9 +175,8 @@ async def upload( ) await client.prepare() result: Transfer = await client.upload_workflow(files, {"recipients": recipients, "from": username}) - if verbose: - print(result) - print("Upload completed successfully") + logger.log(LogLevel.VERBOSE.value, pretty_repr(result)) + logger.log(LogLevel.FEEDBACK.value, "Upload completed successfully") @app.command(context_settings=context) def download( @@ -182,7 +193,7 @@ def download( token=token, out_dir=out_dir )) - print(f"Download completed successfully. Files can be found in {out_dir}") + logger.log(LogLevel.FEEDBACK.value, f"Download completed successfully. Files can be found in {out_dir}") @app.command(context_settings=context) @typer_async @@ -192,7 +203,7 @@ async def server_info( """Prints out information about the FileSender server you are interfacing with""" client = FileSenderClient(base_url=context.obj["base_url"]) result = await client.get_server_info() - print(result) + logger.log(LogLevel.FEEDBACK.value, pretty_repr(result)) if __name__ == "__main__": app() diff --git a/pyproject.toml b/pyproject.toml index 7c15c92..f38d3e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "filesender-client" description = "FileSender Python CLI and API client" -version = "2.0.0" +version = "2.1.0" readme = "README.md" requires-python = ">=3.8" keywords = ["one", "two"] diff --git a/test/test_client.py b/test/test_client.py index cca61dd..4d36d0f 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -6,6 +6,11 @@ from filesender.request_types import GuestOptions from filesender.benchmark import make_tempfile, make_tempfiles, benchmark +def count_files_recursively(path: Path) -> int: + """ + Returns a recursive count of the number of files within a directory. Subdirectories are not counted. + """ + return sum([1 if child.is_file() else 0 for child in path.rglob("*")]) @pytest.mark.asyncio async def test_round_trip(base_url: str, username: str, apikey: str, recipient: str): @@ -34,7 +39,7 @@ async def test_round_trip(base_url: str, username: str, apikey: str, recipient: file_id=transfer["files"][0]["id"], out_dir=Path(download_dir), ) - assert len(list(Path(download_dir).iterdir())) == 1 + assert count_files_recursively(Path(download_dir)) == 1 @pytest.mark.asyncio @@ -62,7 +67,7 @@ async def test_round_trip_dir(base_url: str, username: str, apikey: str, recipie token=transfer["recipients"][0]["token"], out_dir=Path(download_dir), ) - assert len(list(Path(download_dir).iterdir())) == 2 + assert count_files_recursively(Path(download_dir)) == 2 @pytest.mark.asyncio @@ -113,7 +118,7 @@ async def test_voucher_round_trip( file_id=transfer["files"][0]["id"], out_dir=Path(download_dir), ) - assert len(list(Path(download_dir).iterdir())) == 1 + assert count_files_recursively(Path(download_dir)) == 1 @pytest.mark.asyncio