Skip to content

Commit

Permalink
Merge pull request #27 from WEHI-ResearchComputing/logging
Browse files Browse the repository at this point in the history
Logging
  • Loading branch information
multimeric authored Sep 6, 2024
2 parents 99d6c73 + 86a0d03 commit a494f6d
Show file tree
Hide file tree
Showing 7 changed files with 205 additions and 62 deletions.
11 changes: 11 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Changelog

## Version 2.1.0

### Added

* A progress bar for file downloads

### Changed

* All terminal output is now through the `logging` module. You can use the new `--log-level` CLI parameter to configure the amount of info that is printed out.
* Update the CLI default concurrency to 2 for chunks and 1 for files. This seems to be moderately performant without ever failing

## Version 2.0.0

### Added
Expand Down
98 changes: 59 additions & 39 deletions filesender/api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Any, Iterable, List, Optional, Tuple, AsyncIterator, Set
from bs4 import BeautifulSoup
from typing import Any, Iterable, List, Optional, Tuple, AsyncIterator, Union
from filesender.download import files_from_page, DownloadFile
import filesender.response_types as response
import filesender.request_types as request
from urllib.parse import urlparse, urlunparse, unquote
Expand All @@ -10,7 +10,7 @@
import aiofiles
from aiostream import stream
from contextlib import contextmanager
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception
from tenacity import RetryCallState, retry, stop_after_attempt, wait_fixed, retry_if_exception
import logging
from tqdm.asyncio import tqdm

Expand All @@ -25,9 +25,13 @@ def should_retry(e: BaseException) -> bool:
# Seems to be just a bug in the backend
# https://github.com/encode/httpx/discussions/2941
return True
elif isinstance(e, HTTPStatusError) and e.response.status_code == 500 and e.response.json()["message"] == "auth_remote_too_late":
elif isinstance(e, HTTPStatusError) and e.response.status_code == 500:
message = e.response.json()["message"]
if message == "auth_remote_too_late":
return True
if message == "auth_remote_signature_check_failed":
return True
# These errors are caused by lag between creating the response and it being received
return True
return False


Expand All @@ -40,6 +44,13 @@ def url_without_scheme(url: str) -> str:
"""
return unquote(urlunparse(urlparse(url)._replace(scheme="")).lstrip("/"))

def exception_to_message(e: BaseException) -> str:
if isinstance(e, HTTPStatusError):
return f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}."
elif isinstance(e, RequestError):
return f"Request failed for request {e.request.method} {e.request.url}. {repr(e)}"
else:
return repr(e)

@contextmanager
def raise_status():
Expand All @@ -49,16 +60,8 @@ def raise_status():
"""
try:
yield
except HTTPStatusError as e:
raise Exception(
f"Request failed with content {e.response.text} for request {e.request.method} {e.request.url}"
) from e
except RequestError as e:
# TODO: check for SSL read error
raise Exception(
f"Request failed for request {e.request.method} {e.request.url}"
) from e

except BaseException as e:
raise Exception(exception_to_message(e)) from e

async def yield_chunks(path: Path, chunk_size: int) -> AsyncIterator[Tuple[bytes, int]]:
"""
Expand Down Expand Up @@ -166,11 +169,21 @@ async def _sign_send(self, request: Request) -> Any:
with raise_status():
return await self._sign_send_inner(request)

@staticmethod
def on_retry(state: RetryCallState) -> None:
message = str(state.outcome)
if state.outcome is not None:
e = state.outcome.exception()
if e is not None:
message = exception_to_message(e)

logger.warn(f"Attempt {state.attempt_number}. {message}")

@retry(
retry=retry_if_exception(should_retry),
wait=wait_fixed(0.1),
stop=stop_after_attempt(5),
before_sleep=lambda x: logger.warn(f"Attempt {x.attempt_number}.{x.outcome}")
before_sleep=on_retry
)
async def _sign_send_inner(self, request: Request) -> Any:
# Needs to be a separate function to handle retry policy correctly
Expand Down Expand Up @@ -313,19 +326,14 @@ async def create_guest(self, body: request.Guest) -> response.Guest:
self.http_client.build_request("POST", f"{self.base_url}/guest", json=body)
)

async def _files_from_token(self, token: str) -> Set[int]:
async def _files_from_token(self, token: str) -> Iterable[DownloadFile]:
"""
Internal function that returns a list of file IDs for a given guest token
"""
download_page = await self.http_client.get(
"https://filesender.aarnet.edu.au", params={"s": "download", "token": token}
)
files: Set[int] = set()
for file in BeautifulSoup(download_page.content, "html.parser").find_all(
class_="file"
):
files.add(int(file.attrs["data-id"]))
return files
return files_from_page(download_page.content)

async def download_files(
self,
Expand All @@ -342,12 +350,12 @@ async def download_files(
out_dir: The path to write the downloaded files.
"""

file_ids = await self._files_from_token(token)
file_meta = await self._files_from_token(token)

async def _download_args() -> AsyncIterator[Tuple[str, Any, Path]]:
async def _download_args() -> AsyncIterator[Tuple[str, Any, Path, int, str]]:
"Yields tuples of arguments to pass to download_file"
for file_id in file_ids:
yield token, file_id, out_dir
for file in file_meta:
yield token, file["id"], out_dir, file["size"], file["name"]

# Each file is downloaded in parallel
# Pyright messes this up
Expand All @@ -358,8 +366,8 @@ async def download_file(
token: str,
file_id: int,
out_dir: Path,
key: Optional[bytes] = None,
algorithm: Optional[str] = None,
file_size: Union[int, float, None] = None,
file_name: Optional[str] = None
) -> None:
"""
Downloads a single file.
Expand All @@ -368,23 +376,35 @@ async def download_file(
token: Obtained from the transfer email. The same as [`GuestAuth`][filesender.GuestAuth]'s `guest_token`.
file_id: A single file ID indicating the file to be downloaded.
out_dir: The path to write the downloaded file.
file_size: The file size in bytes, optionally.
file_name: The file name of the file being downloaded. This will impact the name by which it's saved.
"""
download_endpoint = urlunparse(
urlparse(self.base_url)._replace(path="/download.php")
)
async with self.http_client.stream(
"GET", download_endpoint, params={"files_ids": file_id, "token": token}
) as res:
for content_param in res.headers["Content-Disposition"].split(";"):
if "filename" in content_param:
filename = content_param.split("=")[1].lstrip('"').rstrip('"')
break
else:
raise Exception("No filename found")

async with aiofiles.open(out_dir / filename, "wb") as fp:
async for chunk in res.aiter_raw(chunk_size=8192):
await fp.write(chunk)
# Determine filename from response, if not provided
if file_name is None:
for content_param in res.headers["Content-Disposition"].split(";"):
if "filename" in content_param:
file_name = content_param.split("=")[1].lstrip('"').rstrip('"')
break
else:
raise Exception("No filename found")

file_path = out_dir / file_name
file_path.parent.mkdir(parents=True, exist_ok=True)
chunk_size = 8192
chunk_size_mb = chunk_size / 1024 / 1024
with tqdm(desc=file_name, unit="MB", total=None if file_size is None else int(file_size / 1024 / 1024)) as progress:
async with aiofiles.open(out_dir / file_name, "wb") as fp:
# We can't add the total here, because we don't know it:
# https://github.com/filesender/filesender/issues/1555
async for chunk in res.aiter_raw(chunk_size=chunk_size):
await fp.write(chunk)
progress.update(chunk_size_mb)

async def get_server_info(self) -> response.ServerInfo:
"""
Expand Down
50 changes: 50 additions & 0 deletions filesender/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Iterable, TypedDict

from bs4 import BeautifulSoup


class DownloadFile(TypedDict):
client_entropy: str
encrypted: str
encrypted_size: int
fileaead: str
fileiv: str
id: int
key_salt: str
key_version: int
mime: str
#: filename
name: str
password_encoding: str
password_hash_iterations: int
password_version: int
size: int
transferid: int

def files_from_page(content: bytes) -> Iterable[DownloadFile]:
"""
Yields dictionaries describing the files listed on a FileSender web page
Params:
content: The HTML content of the FileSender download page
"""
for file in BeautifulSoup(content, "html.parser").find_all(
class_="file"
):
yield {
"client_entropy": file.attrs[f"data-client-entropy"],
"encrypted": file.attrs["data-encrypted"],
"encrypted_size": int(file.attrs["data-encrypted-size"]),
"fileaead": file.attrs["data-fileaead"],
"fileiv": file.attrs["data-fileiv"],
"id": int(file.attrs["data-id"]),
"key_salt": file.attrs["data-key-salt"],
"key_version": int(file.attrs["data-key-version"]),
"mime": file.attrs["data-mime"],
"name": file.attrs["data-name"],
"password_encoding": file.attrs["data-password-encoding"],
"password_hash_iterations": int(file.attrs["data-password-hash-iterations"]),
"password_version": int(file.attrs["data-password-version"]),
"size": int(file.attrs["data-size"]),
"transferid": int(file.attrs["data-transferid"]),
}
46 changes: 46 additions & 0 deletions filesender/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from typing import Union
from click import ParamType, Context, Parameter
from enum import Enum
import logging

class LogLevel(Enum):
NOTSET = 0
DEBUG = 10
#: Used for verbose logging that the average user wouldn't want
VERBOSE = 15
INFO = 20
#: Used for basic feedback that a CLI user would expect
FEEDBACK = 25
WARNING = 30
ERROR = 40
CRITICAL = 50

def configure_label(self):
"""
Configures the logging module to understand this log level
"""
logging.addLevelName(self.value, self.name)

def configure_extra_levels():
"""
Configures the logging module to understand the additional log levels
"""
for level in (LogLevel.VERBOSE, LogLevel.FEEDBACK):
level.configure_label()

class LogParam(ParamType):
name = "LogParam"

def convert(self, value: Union[int, str], param: Union[Parameter, None], ctx: Union[Context, None]) -> int:
if isinstance(value, int):
return value

# Convert string representation to int
if not hasattr(LogLevel, value):
self.fail(f"{value!r} is not a valid log level", param, ctx)

return LogLevel[value].value

def get_metavar(self, param: Parameter) -> Union[str, None]:
# Print out the choices
return "|".join(LogLevel._member_map_)
Loading

0 comments on commit a494f6d

Please sign in to comment.