diff --git a/dp_creator_ii/__init__.py b/dp_creator_ii/__init__.py index d42f46c..1cb98dd 100644 --- a/dp_creator_ii/__init__.py +++ b/dp_creator_ii/__init__.py @@ -1,8 +1,7 @@ """DP Creator II makes it easier to get started with Differential Privacy.""" -import os from pathlib import Path -from argparse import ArgumentParser +from argparse import ArgumentParser, ArgumentTypeError import shiny @@ -10,12 +9,21 @@ __version__ = "0.0.1" +def existing_csv(arg): + path = Path(arg) + if not path.exists(): + raise ArgumentTypeError(f"No such file: {arg}") + if path.suffix != ".csv": + raise ArgumentTypeError(f'Must have ".csv" extension: {arg}') + return path + + def get_arg_parser(): parser = ArgumentParser(description=__doc__) parser.add_argument( "--csv", dest="csv_path", - type=Path, + type=existing_csv, help="Path to CSV containing private data", ) parser.add_argument( @@ -29,13 +37,12 @@ def get_arg_parser(): def main(): # pragma: no cover # We call parse_args() again inside the app. - # We only call it here so "--help" is handled. + # We only call it here so "--help" is handled, + # and to validate inputs. get_arg_parser().parse_args() - # run_app() depends on the CWD. - os.chdir(Path(__file__).parent) - - run_app_kwargs = { - "reload": True, - } - shiny.run_app(launch_browser=True, **run_app_kwargs) + shiny.run_app( + app="dp_creator_ii.app", + launch_browser=True, + reload=True, + ) diff --git a/dp_creator_ii/app/dataset_panel.py b/dp_creator_ii/app/dataset_panel.py index 10e93d3..57464e3 100644 --- a/dp_creator_ii/app/dataset_panel.py +++ b/dp_creator_ii/app/dataset_panel.py @@ -3,13 +3,19 @@ from shiny import ui, reactive, render from dp_creator_ii import get_arg_parser +from dp_creator_ii.csv_helper import read_field_names def dataset_ui(): return ui.nav_panel( "Select Dataset", "TODO: Pick dataset", - ui.output_text("csv_path_text"), + ui.input_file("csv_path_from_ui", "Choose CSV file", accept=[".csv"]), + "CSV path from either CLI or UI:", + ui.output_text("csv_path"), + "CSV fields:", + ui.output_text("csv_fields"), + "Unit of privacy:", ui.output_text("unit_of_privacy_text"), ui.input_action_button("go_to_analysis", "Define analysis"), value="dataset_panel", @@ -26,12 +32,30 @@ def dataset_server(input, output, session): arg_csv_path = args.csv_path arg_unit_of_privacy = args.unit_of_privacy - csv_path = reactive.value(arg_csv_path) + csv_path_from_cli_value = reactive.value(arg_csv_path) unit_of_privacy = reactive.value(arg_unit_of_privacy) + @reactive.calc + def csv_path_calc(): + csv_path_from_ui = input.csv_path_from_ui() + if csv_path_from_ui is not None: + return csv_path_from_ui[0]["datapath"] + return csv_path_from_cli_value.get() + + @render.text + def csv_path(): + return csv_path_calc() + + @reactive.calc + def csv_fields_calc(): + path = csv_path_calc() + if path is None: + return None + return read_field_names(path) + @render.text - def csv_path_text(): - return str(csv_path.get()) + def csv_fields(): + return csv_fields_calc() @render.text def unit_of_privacy_text(): diff --git a/dp_creator_ii/csv_helper.py b/dp_creator_ii/csv_helper.py new file mode 100644 index 0000000..577df68 --- /dev/null +++ b/dp_creator_ii/csv_helper.py @@ -0,0 +1,7 @@ +import csv + + +def read_field_names(csv_path): + with open(csv_path, newline="") as csv_handle: + reader = csv.DictReader(csv_handle) + return reader.fieldnames diff --git a/dp_creator_ii/tests/fixtures/fake.csv b/dp_creator_ii/tests/fixtures/fake.csv index 4393e1a..c59dfdd 100644 --- a/dp_creator_ii/tests/fixtures/fake.csv +++ b/dp_creator_ii/tests/fixtures/fake.csv @@ -1 +1,7 @@ -fake-column \ No newline at end of file +student_id,class_year,assignment_type,grade +1234,1,quiz,90 +1234,1,quiz,95 +1234,1,exam,85 +6789,2,quiz,70 +6789,2,quiz,100 +6789,2,exam,90 diff --git a/dp_creator_ii/tests/test_app.py b/dp_creator_ii/tests/test_app.py index b1845fc..5264a6f 100644 --- a/dp_creator_ii/tests/test_app.py +++ b/dp_creator_ii/tests/test_app.py @@ -1,3 +1,5 @@ +from pathlib import Path + from shiny.run import ShinyAppProc from playwright.sync_api import Page, expect from shiny.pytest import create_app_fixture @@ -19,24 +21,37 @@ def expect_visible(text): def expect_not_visible(text): expect(page.get_by_text(text)).not_to_be_visible() + def expect_no_error(): + expect(page.locator(".shiny-output-error")).not_to_be_attached() + page.goto(app.url) expect(page).to_have_title("DP Creator II") expect_visible(pick_dataset_text) expect_not_visible(perform_analysis_text) expect_not_visible(download_results_text) + expect_no_error() + + csv_path = Path(__file__).parent / "fixtures" / "fake.csv" + page.get_by_label("Choose CSV file").set_input_files(csv_path.resolve()) + expect_visible("student_id") + expect_no_error() page.get_by_role("button", name="Define analysis").click() expect_not_visible(pick_dataset_text) expect_visible(perform_analysis_text) expect_not_visible(download_results_text) + expect_no_error() page.get_by_role("button", name="Download results").click() expect_not_visible(pick_dataset_text) expect_not_visible(perform_analysis_text) expect_visible(download_results_text) + expect_no_error() with page.expect_download() as download_info: page.get_by_text("Download script").click() + expect_no_error() + download = download_info.value script = download.path().read_text() assert "privacy_unit=dp.unit_of(contributions=1)" in script diff --git a/dp_creator_ii/tests/test_arg_parser.py b/dp_creator_ii/tests/test_arg_parser.py new file mode 100644 index 0000000..f57d45e --- /dev/null +++ b/dp_creator_ii/tests/test_arg_parser.py @@ -0,0 +1,38 @@ +from pathlib import Path +from argparse import ArgumentTypeError + +import pytest + +from dp_creator_ii import get_arg_parser, existing_csv + + +def test_help(): + help = ( + get_arg_parser() + .format_help() + # argparse doesn't actually know the name of the script + # and inserts the name of the running program instead. + .replace("__main__.py", "dp-creator-ii") + .replace("pytest", "dp-creator-ii") + # Text is different under Python 3.9: + .replace("optional arguments:", "options:") + ) + print(help) + + readme_md = (Path(__file__).parent.parent.parent / "README.md").read_text() + assert help in readme_md + + +def test_arg_validation_no_file(): + with pytest.raises(ArgumentTypeError, match="No such file: no-such-file"): + existing_csv("no-such-file") + + +def test_arg_validation_not_csv(): + with pytest.raises(ArgumentTypeError, match='Must have ".csv" extension:'): + existing_csv(Path(__file__).parent / "fixtures" / "fake.ipynb") + + +def test_arg_validation_works(): + path = existing_csv(Path(__file__).parent / "fixtures" / "fake.csv") + assert path.name == "fake.csv" diff --git a/dp_creator_ii/tests/test_csv.py b/dp_creator_ii/tests/test_csv.py index bf59515..51e2e86 100644 --- a/dp_creator_ii/tests/test_csv.py +++ b/dp_creator_ii/tests/test_csv.py @@ -3,6 +3,15 @@ import polars.testing import tempfile import pytest +from pathlib import Path + +from dp_creator_ii.csv_helper import read_field_names + + +def test_read_field_names(): + csv_path = Path(__file__).parent / "fixtures" / "fake.csv" + field_names = read_field_names(csv_path) + assert field_names == ["student_id", "class_year", "assignment_type", "grade"] @pytest.mark.parametrize("encoding", ["latin1", "utf8"]) diff --git a/dp_creator_ii/tests/test_help.py b/dp_creator_ii/tests/test_help.py deleted file mode 100644 index 2f56769..0000000 --- a/dp_creator_ii/tests/test_help.py +++ /dev/null @@ -1,20 +0,0 @@ -from pathlib import Path - -import dp_creator_ii - - -def test_help(): - help = ( - dp_creator_ii.get_arg_parser() - .format_help() - # argparse doesn't actually know the name of the script - # and inserts the name of the running program instead. - .replace("__main__.py", "dp-creator-ii") - .replace("pytest", "dp-creator-ii") - # Text is different under Python 3.9: - .replace("optional arguments:", "options:") - ) - print(help) - - readme_md = (Path(__file__).parent.parent.parent / "README.md").read_text() - assert help in readme_md