From 4c39c1868a88c814a18a5168d10db8c2d4fa8311 Mon Sep 17 00:00:00 2001 From: Peter Kraus Date: Tue, 10 Sep 2024 17:53:35 +0200 Subject: [PATCH 1/8] Hook up cli beam --- beam/__init__.py | 43 +++++++++++++++++++++++++++++++++++++++---- pyproject.toml | 3 +++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/beam/__init__.py b/beam/__init__.py index a4dea37..55a66d6 100644 --- a/beam/__init__.py +++ b/beam/__init__.py @@ -24,10 +24,12 @@ import urllib.error import urllib.request import venv +import argparse from enum import Enum from pathlib import Path from types import ModuleType from typing import Any, Callable, Optional +from importlib import metadata __all__ = ("extract", "Extractor") @@ -46,6 +48,35 @@ class SupportedInstallationMethod(Enum): CONDA = "conda" +def run_beam(): + argparser = argparse.ArgumentParser() + argparser.add_argument( + "--version", + action="version", + version=f'%(prog)s version {metadata.version("datatractor_beam")}', + ) + + argparser.add_argument( + "filetype", + help="FileType.ID of the input file", + default=None, + ) + + argparser.add_argument( + "infile", + help="Path of the input file", + default=None, + ) + + args = argparser.parse_args() + + extract( + input_path=args.infile, + input_type=args.filetype, + preferred_mode=SupportedExecutionMethod.CLI, + ) + + def extract( input_path: Path | str, input_type: str, @@ -61,15 +92,16 @@ def extract( Parameters: input_path: The path or URL of the file to parse. - input_type: The ID of the `FileType` in the registry. + input_type: The ID of the ``FileType`` in the registry. output_path: The path to write the output to. If not provided, the output will be requested to be written - to a file with the same name as the input file, but with a .json extension. + to a file with the same name as the input file, but with an extension as + defined using the ``output_type``. Defaults to ``{input_path}.out``. output_type: A string specifying the desired output type. preferred_mode: The preferred execution method. If the extractor supports both Python and CLI, this will be used to determine which to use. If the extractor only supports one method, this will be ignored. - Accepts the `SupportedExecutionMethod` values of "cli" or "python". + Accepts the ``SupportedExecutionMethod`` values of "cli" or "python". install: Whether to install the extractor package before running it. Defaults to True. extractor_definition: A dictionary containing the extractor definition to use instead of a registry lookup. @@ -265,7 +297,10 @@ def execute( ) if output_path is None: - output_path = input_path.with_suffix(".json") + suffix = ".out" if output_type is None else f".{output_type}" + output_path = input_path.with_suffix(suffix) + + print(f"{output_type=}") command = self.apply_template_args( command, diff --git a/pyproject.toml b/pyproject.toml index 81ed56b..60b5cf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,9 @@ dev = [ [project.urls] repository = "https://github.com/datatractor/beam" +[project.scripts] +beam = "beam:run_beam" + [tool.ruff] extend-exclude = [ "providers", From 553382d3d672bb71326fa3a32b6c7e3a741d700a Mon Sep 17 00:00:00 2001 From: Peter Kraus Date: Tue, 10 Sep 2024 17:59:58 +0200 Subject: [PATCH 2/8] Add test. --- beam/__init__.py | 13 ++++++++++--- tests/test_mpr.py | 11 +++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/beam/__init__.py b/beam/__init__.py index 55a66d6..a215154 100644 --- a/beam/__init__.py +++ b/beam/__init__.py @@ -13,7 +13,7 @@ .. _yardsite: https://yard.datatractor.org/ """ - +import argparse import json import multiprocessing.managers import multiprocessing.shared_memory @@ -24,12 +24,11 @@ import urllib.error import urllib.request import venv -import argparse from enum import Enum +from importlib import metadata from pathlib import Path from types import ModuleType from typing import Any, Callable, Optional -from importlib import metadata __all__ = ("extract", "Extractor") @@ -68,11 +67,19 @@ def run_beam(): default=None, ) + argparser.add_argument( + "--outfile", + "-o", + help="Optional path of the output file", + default=None, + ) + args = argparser.parse_args() extract( input_path=args.infile, input_type=args.filetype, + output_path=args.outfile, preferred_mode=SupportedExecutionMethod.CLI, ) diff --git a/tests/test_mpr.py b/tests/test_mpr.py index e6edd71..a8df145 100644 --- a/tests/test_mpr.py +++ b/tests/test_mpr.py @@ -1,3 +1,4 @@ +import subprocess import urllib.request from pathlib import Path @@ -146,3 +147,13 @@ def test_extractorplan_python_method(): function, args, kwargs = ExtractorPlan._prepare_python( 'extract(filename="example.txt", type={"test": "example", "dictionary": "example"})' ) + + +def test_biologic_beam(tmp_path, test_mprs): + for ind, test_mpr in enumerate(test_mprs): + input_path = tmp_path / test_mpr + output_path = tmp_path / test_mpr.name.replace(".mpr", ".nc") + task = ["beam", "biologic-mpr", str(input_path), "--outfile", str(output_path)] + subprocess.run(task) + assert output_path.exists() + break From cf4db8cd1a3413dd566566abb4338b9a12ef68d0 Mon Sep 17 00:00:00 2001 From: Peter Kraus Date: Wed, 11 Sep 2024 09:33:23 +0200 Subject: [PATCH 3/8] ruff --- beam/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/beam/__init__.py b/beam/__init__.py index a215154..965ee1f 100644 --- a/beam/__init__.py +++ b/beam/__init__.py @@ -13,6 +13,7 @@ .. _yardsite: https://yard.datatractor.org/ """ + import argparse import json import multiprocessing.managers From cc90e224f85c686479f2396768acd14ca49e8139 Mon Sep 17 00:00:00 2001 From: Peter Kraus Date: Thu, 17 Oct 2024 15:52:17 +0200 Subject: [PATCH 4/8] Changes requested by Matt. --- beam/__init__.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/beam/__init__.py b/beam/__init__.py index 965ee1f..57630ae 100644 --- a/beam/__init__.py +++ b/beam/__init__.py @@ -15,6 +15,7 @@ """ import argparse +import importlib.metadata import json import multiprocessing.managers import multiprocessing.shared_memory @@ -26,12 +27,12 @@ import urllib.request import venv from enum import Enum -from importlib import metadata from pathlib import Path from types import ModuleType from typing import Any, Callable, Optional __all__ = ("extract", "Extractor") +__version__ = importlib.metadata.version("datatractor-beam") REGISTRY_BASE_URL = "https://yard.datatractor.org/api/v0.1.0" BIN = "Scripts" if platform.system() == "Windows" else "bin" @@ -49,11 +50,15 @@ class SupportedInstallationMethod(Enum): def run_beam(): - argparser = argparse.ArgumentParser() + argparser = argparse.ArgumentParser( + prog="beam", + description="""CLI for datatractor extractors that takes a filename and a filetype, then installs and runs an appropriate extractor, if available, from the chosen registry (default: https://registry.datatractor.org/). Filetype IDs can be found in the registry API at e.g., https://registry.datatractor.org/api/filetypes. If a matching extractor is found at https://registry.datatractor.org/api/extractors, it will be installed into a virtual environment local to the beam installation. The results of the extractor will be written out to a file at --outfile, or in the default location for that output file type.""", + ) + argparser.add_argument( "--version", action="version", - version=f'%(prog)s version {metadata.version("datatractor_beam")}', + version=f"%(prog)s version {__version__}", ) argparser.add_argument( @@ -308,8 +313,6 @@ def execute( suffix = ".out" if output_type is None else f".{output_type}" output_path = input_path.with_suffix(suffix) - print(f"{output_type=}") - command = self.apply_template_args( command, method, From 55fcac0a0224f6c9745cd0400bde03363eda539a Mon Sep 17 00:00:00 2001 From: Peter Kraus Date: Thu, 17 Oct 2024 15:57:59 +0200 Subject: [PATCH 5/8] Update readme. --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8c3a417..bc99bbe 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,12 @@ ret = extract("example.mpr", "biologic-mpr", output_path="output.nc", preferred_ In this case, the `ret` will be empty bytes, and the output of the extractor should appear in the `output.nc` file. +Finally, `beam` can also be executed from the command line, implying `preferred_mode="cli"`. The command line invocation equivalent to the above python syntax is: + +```bash +beam biologic-mpr example.mpr --outfile output.nc +``` + ### Plans @@ -77,7 +83,7 @@ In this case, the `ret` will be empty bytes, and the output of the extractor sho across subprocesses without any extractor specific classes, e.g., raw JSON/Python dicts, pandas dataframes or xarray datasets (as optional requirements, by demand). -- [ ] A command-line for quickly running e.g., `beam ` +- [x] A command-line for quickly running e.g., `beam ` - [ ] Extractor scaffold/template/plugin - If it can be kept similarly low-dependency, this package could also implement an extractor scaffold for those who want to modify existing From a99b1b764a91a68297cc0d6173753f9b26bff0df Mon Sep 17 00:00:00 2001 From: Peter Kraus Date: Thu, 17 Oct 2024 14:01:02 +0000 Subject: [PATCH 6/8] Update README.md Co-authored-by: Matthew Evans <7916000+ml-evs@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bc99bbe..b9a44dd 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ ret = extract("example.mpr", "biologic-mpr", output_path="output.nc", preferred_ In this case, the `ret` will be empty bytes, and the output of the extractor should appear in the `output.nc` file. -Finally, `beam` can also be executed from the command line, implying `preferred_mode="cli"`. The command line invocation equivalent to the above python syntax is: +Finally, `beam` can also be executed from the command line, implying `preferred_mode="cli"`. The command line invocation equivalent to the above Python syntax is: ```bash beam biologic-mpr example.mpr --outfile output.nc From a7001ecdd92ad85318612d7f2d62abb3faae1772 Mon Sep 17 00:00:00 2001 From: Peter Kraus Date: Thu, 17 Oct 2024 16:03:01 +0200 Subject: [PATCH 7/8] datatractor_beam to datatractor-beam --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b9a44dd..b19573a 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ Repository containing the reference implementation of the Datatractor API, published at [![Datatractor Yard](https://badgen.net/static/%F0%9F%9A%9Cdatatractor/yard)](https://yard.datatractor.org/). -## `datatractor_beam` package +## `datatractor-beam` package This repository contains a draft Python 3.10 package, located under the `./beam` directory. The package can be used to: From eb35ce0cb6e8d311d589349c3e97333803a074d0 Mon Sep 17 00:00:00 2001 From: Peter Kraus Date: Thu, 17 Oct 2024 16:04:43 +0200 Subject: [PATCH 8/8] Add tested versions into pyproject. --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 60b5cf9..a31d4f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,8 @@ dynamic = ["version"] classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Science/Research", "Intended Audience :: System Administrators",