Read CSV columns from uploaded file (#45)

* column reading utility * file upload * Get a CSV path, either from the CLI or UI * use render.text and reactive.calc together * hardcoded CSV path works * split calc and text: Multiple decorators do not work? * stronger validation of CLI parameter * read csv in Playwright * add a test of arg parsing * Add failing test * look for any shiny error * do not parse if no file present
opendp · Oct 10, 2024 · e8ecf6e · e8ecf6e
1 parent ceb4674
commit e8ecf6e
Show file tree

Hide file tree

Showing 8 changed files with 122 additions and 36 deletions.
diff --git a/dp_creator_ii/__init__.py b/dp_creator_ii/__init__.py
@@ -1,21 +1,29 @@
 """DP Creator II makes it easier to get started with Differential Privacy."""
 
-import os
 from pathlib import Path
-from argparse import ArgumentParser
+from argparse import ArgumentParser, ArgumentTypeError
 
 import shiny
 
 
 __version__ = "0.0.1"
 
 
+def existing_csv(arg):
+    path = Path(arg)
+    if not path.exists():
+        raise ArgumentTypeError(f"No such file: {arg}")
+    if path.suffix != ".csv":
+        raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
+    return path
+
+
 def get_arg_parser():
     parser = ArgumentParser(description=__doc__)
     parser.add_argument(
         "--csv",
         dest="csv_path",
-        type=Path,
+        type=existing_csv,
         help="Path to CSV containing private data",
     )
     parser.add_argument(
@@ -29,13 +37,12 @@ def get_arg_parser():
 
 def main():  # pragma: no cover
     # We call parse_args() again inside the app.
-    # We only call it here so "--help" is handled.
+    # We only call it here so "--help" is handled,
+    # and to validate inputs.
     get_arg_parser().parse_args()
 
-    # run_app() depends on the CWD.
-    os.chdir(Path(__file__).parent)
-
-    run_app_kwargs = {
-        "reload": True,
-    }
-    shiny.run_app(launch_browser=True, **run_app_kwargs)
+    shiny.run_app(
+        app="dp_creator_ii.app",
+        launch_browser=True,
+        reload=True,
+    )
diff --git a/dp_creator_ii/app/dataset_panel.py b/dp_creator_ii/app/dataset_panel.py
@@ -3,13 +3,19 @@
 from shiny import ui, reactive, render
 
 from dp_creator_ii import get_arg_parser
+from dp_creator_ii.csv_helper import read_field_names
 
 
 def dataset_ui():
     return ui.nav_panel(
         "Select Dataset",
         "TODO: Pick dataset",
-        ui.output_text("csv_path_text"),
+        ui.input_file("csv_path_from_ui", "Choose CSV file", accept=[".csv"]),
+        "CSV path from either CLI or UI:",
+        ui.output_text("csv_path"),
+        "CSV fields:",
+        ui.output_text("csv_fields"),
+        "Unit of privacy:",
         ui.output_text("unit_of_privacy_text"),
         ui.input_action_button("go_to_analysis", "Define analysis"),
         value="dataset_panel",
@@ -26,12 +32,30 @@ def dataset_server(input, output, session):
         arg_csv_path = args.csv_path
         arg_unit_of_privacy = args.unit_of_privacy
 
-    csv_path = reactive.value(arg_csv_path)
+    csv_path_from_cli_value = reactive.value(arg_csv_path)
     unit_of_privacy = reactive.value(arg_unit_of_privacy)
 
+    @reactive.calc
+    def csv_path_calc():
+        csv_path_from_ui = input.csv_path_from_ui()
+        if csv_path_from_ui is not None:
+            return csv_path_from_ui[0]["datapath"]
+        return csv_path_from_cli_value.get()
+
+    @render.text
+    def csv_path():
+        return csv_path_calc()
+
+    @reactive.calc
+    def csv_fields_calc():
+        path = csv_path_calc()
+        if path is None:
+            return None
+        return read_field_names(path)
+
     @render.text
-    def csv_path_text():
-        return str(csv_path.get())
+    def csv_fields():
+        return csv_fields_calc()
 
     @render.text
     def unit_of_privacy_text():

diff --git a/dp_creator_ii/csv_helper.py b/dp_creator_ii/csv_helper.py
@@ -0,0 +1,7 @@
+import csv
+
+
+def read_field_names(csv_path):
+    with open(csv_path, newline="") as csv_handle:
+        reader = csv.DictReader(csv_handle)
+        return reader.fieldnames
diff --git a/dp_creator_ii/tests/fixtures/fake.csv b/dp_creator_ii/tests/fixtures/fake.csv
@@ -1 +1,7 @@
-fake-column
+student_id,class_year,assignment_type,grade
+1234,1,quiz,90
+1234,1,quiz,95
+1234,1,exam,85
+6789,2,quiz,70
+6789,2,quiz,100
+6789,2,exam,90
diff --git a/dp_creator_ii/tests/test_app.py b/dp_creator_ii/tests/test_app.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 from shiny.run import ShinyAppProc
 from playwright.sync_api import Page, expect
 from shiny.pytest import create_app_fixture
@@ -19,24 +21,37 @@ def expect_visible(text):
     def expect_not_visible(text):
         expect(page.get_by_text(text)).not_to_be_visible()
 
+    def expect_no_error():
+        expect(page.locator(".shiny-output-error")).not_to_be_attached()
+
     page.goto(app.url)
     expect(page).to_have_title("DP Creator II")
     expect_visible(pick_dataset_text)
     expect_not_visible(perform_analysis_text)
     expect_not_visible(download_results_text)
+    expect_no_error()
+
+    csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
+    page.get_by_label("Choose CSV file").set_input_files(csv_path.resolve())
+    expect_visible("student_id")
+    expect_no_error()
 
     page.get_by_role("button", name="Define analysis").click()
     expect_not_visible(pick_dataset_text)
     expect_visible(perform_analysis_text)
     expect_not_visible(download_results_text)
+    expect_no_error()
 
     page.get_by_role("button", name="Download results").click()
     expect_not_visible(pick_dataset_text)
     expect_not_visible(perform_analysis_text)
     expect_visible(download_results_text)
+    expect_no_error()
 
     with page.expect_download() as download_info:
         page.get_by_text("Download script").click()
+    expect_no_error()
+
     download = download_info.value
     script = download.path().read_text()
     assert "privacy_unit=dp.unit_of(contributions=1)" in script
diff --git a/dp_creator_ii/tests/test_arg_parser.py b/dp_creator_ii/tests/test_arg_parser.py
@@ -0,0 +1,38 @@
+from pathlib import Path
+from argparse import ArgumentTypeError
+
+import pytest
+
+from dp_creator_ii import get_arg_parser, existing_csv
+
+
+def test_help():
+    help = (
+        get_arg_parser()
+        .format_help()
+        # argparse doesn't actually know the name of the script
+        # and inserts the name of the running program instead.
+        .replace("__main__.py", "dp-creator-ii")
+        .replace("pytest", "dp-creator-ii")
+        # Text is different under Python 3.9:
+        .replace("optional arguments:", "options:")
+    )
+    print(help)
+
+    readme_md = (Path(__file__).parent.parent.parent / "README.md").read_text()
+    assert help in readme_md
+
+
+def test_arg_validation_no_file():
+    with pytest.raises(ArgumentTypeError, match="No such file: no-such-file"):
+        existing_csv("no-such-file")
+
+
+def test_arg_validation_not_csv():
+    with pytest.raises(ArgumentTypeError, match='Must have ".csv" extension:'):
+        existing_csv(Path(__file__).parent / "fixtures" / "fake.ipynb")
+
+
+def test_arg_validation_works():
+    path = existing_csv(Path(__file__).parent / "fixtures" / "fake.csv")
+    assert path.name == "fake.csv"
diff --git a/dp_creator_ii/tests/test_csv.py b/dp_creator_ii/tests/test_csv.py
@@ -3,6 +3,15 @@
 import polars.testing
 import tempfile
 import pytest
+from pathlib import Path
+
+from dp_creator_ii.csv_helper import read_field_names
+
+
+def test_read_field_names():
+    csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
+    field_names = read_field_names(csv_path)
+    assert field_names == ["student_id", "class_year", "assignment_type", "grade"]
 
 
 @pytest.mark.parametrize("encoding", ["latin1", "utf8"])

diff --git a/dp_creator_ii/tests/test_help.py b/dp_creator_ii/tests/test_help.py