Skip to content

Commit

Permalink
Read CSV columns from uploaded file (#45)
Browse files Browse the repository at this point in the history
* column reading utility

* file upload

* Get a CSV path, either from the CLI or UI

* use render.text  and reactive.calc together

* hardcoded CSV path works

* split calc and text: Multiple decorators do not work?

* stronger validation of CLI parameter

* read csv in Playwright

* add a test of arg parsing

* Add failing test

* look for any shiny error

* do not parse if no file present
  • Loading branch information
mccalluc authored Oct 10, 2024
1 parent ceb4674 commit e8ecf6e
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 36 deletions.
29 changes: 18 additions & 11 deletions dp_creator_ii/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
"""DP Creator II makes it easier to get started with Differential Privacy."""

import os
from pathlib import Path
from argparse import ArgumentParser
from argparse import ArgumentParser, ArgumentTypeError

import shiny


__version__ = "0.0.1"


def existing_csv(arg):
path = Path(arg)
if not path.exists():
raise ArgumentTypeError(f"No such file: {arg}")
if path.suffix != ".csv":
raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
return path


def get_arg_parser():
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--csv",
dest="csv_path",
type=Path,
type=existing_csv,
help="Path to CSV containing private data",
)
parser.add_argument(
Expand All @@ -29,13 +37,12 @@ def get_arg_parser():

def main(): # pragma: no cover
# We call parse_args() again inside the app.
# We only call it here so "--help" is handled.
# We only call it here so "--help" is handled,
# and to validate inputs.
get_arg_parser().parse_args()

# run_app() depends on the CWD.
os.chdir(Path(__file__).parent)

run_app_kwargs = {
"reload": True,
}
shiny.run_app(launch_browser=True, **run_app_kwargs)
shiny.run_app(
app="dp_creator_ii.app",
launch_browser=True,
reload=True,
)
32 changes: 28 additions & 4 deletions dp_creator_ii/app/dataset_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,19 @@
from shiny import ui, reactive, render

from dp_creator_ii import get_arg_parser
from dp_creator_ii.csv_helper import read_field_names


def dataset_ui():
return ui.nav_panel(
"Select Dataset",
"TODO: Pick dataset",
ui.output_text("csv_path_text"),
ui.input_file("csv_path_from_ui", "Choose CSV file", accept=[".csv"]),
"CSV path from either CLI or UI:",
ui.output_text("csv_path"),
"CSV fields:",
ui.output_text("csv_fields"),
"Unit of privacy:",
ui.output_text("unit_of_privacy_text"),
ui.input_action_button("go_to_analysis", "Define analysis"),
value="dataset_panel",
Expand All @@ -26,12 +32,30 @@ def dataset_server(input, output, session):
arg_csv_path = args.csv_path
arg_unit_of_privacy = args.unit_of_privacy

csv_path = reactive.value(arg_csv_path)
csv_path_from_cli_value = reactive.value(arg_csv_path)
unit_of_privacy = reactive.value(arg_unit_of_privacy)

@reactive.calc
def csv_path_calc():
csv_path_from_ui = input.csv_path_from_ui()
if csv_path_from_ui is not None:
return csv_path_from_ui[0]["datapath"]
return csv_path_from_cli_value.get()

@render.text
def csv_path():
return csv_path_calc()

@reactive.calc
def csv_fields_calc():
path = csv_path_calc()
if path is None:
return None
return read_field_names(path)

@render.text
def csv_path_text():
return str(csv_path.get())
def csv_fields():
return csv_fields_calc()

@render.text
def unit_of_privacy_text():
Expand Down
7 changes: 7 additions & 0 deletions dp_creator_ii/csv_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import csv


def read_field_names(csv_path):
with open(csv_path, newline="") as csv_handle:
reader = csv.DictReader(csv_handle)
return reader.fieldnames
8 changes: 7 additions & 1 deletion dp_creator_ii/tests/fixtures/fake.csv
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
fake-column
student_id,class_year,assignment_type,grade
1234,1,quiz,90
1234,1,quiz,95
1234,1,exam,85
6789,2,quiz,70
6789,2,quiz,100
6789,2,exam,90
15 changes: 15 additions & 0 deletions dp_creator_ii/tests/test_app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

from shiny.run import ShinyAppProc
from playwright.sync_api import Page, expect
from shiny.pytest import create_app_fixture
Expand All @@ -19,24 +21,37 @@ def expect_visible(text):
def expect_not_visible(text):
expect(page.get_by_text(text)).not_to_be_visible()

def expect_no_error():
expect(page.locator(".shiny-output-error")).not_to_be_attached()

page.goto(app.url)
expect(page).to_have_title("DP Creator II")
expect_visible(pick_dataset_text)
expect_not_visible(perform_analysis_text)
expect_not_visible(download_results_text)
expect_no_error()

csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
page.get_by_label("Choose CSV file").set_input_files(csv_path.resolve())
expect_visible("student_id")
expect_no_error()

page.get_by_role("button", name="Define analysis").click()
expect_not_visible(pick_dataset_text)
expect_visible(perform_analysis_text)
expect_not_visible(download_results_text)
expect_no_error()

page.get_by_role("button", name="Download results").click()
expect_not_visible(pick_dataset_text)
expect_not_visible(perform_analysis_text)
expect_visible(download_results_text)
expect_no_error()

with page.expect_download() as download_info:
page.get_by_text("Download script").click()
expect_no_error()

download = download_info.value
script = download.path().read_text()
assert "privacy_unit=dp.unit_of(contributions=1)" in script
38 changes: 38 additions & 0 deletions dp_creator_ii/tests/test_arg_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pathlib import Path
from argparse import ArgumentTypeError

import pytest

from dp_creator_ii import get_arg_parser, existing_csv


def test_help():
help = (
get_arg_parser()
.format_help()
# argparse doesn't actually know the name of the script
# and inserts the name of the running program instead.
.replace("__main__.py", "dp-creator-ii")
.replace("pytest", "dp-creator-ii")
# Text is different under Python 3.9:
.replace("optional arguments:", "options:")
)
print(help)

readme_md = (Path(__file__).parent.parent.parent / "README.md").read_text()
assert help in readme_md


def test_arg_validation_no_file():
with pytest.raises(ArgumentTypeError, match="No such file: no-such-file"):
existing_csv("no-such-file")


def test_arg_validation_not_csv():
with pytest.raises(ArgumentTypeError, match='Must have ".csv" extension:'):
existing_csv(Path(__file__).parent / "fixtures" / "fake.ipynb")


def test_arg_validation_works():
path = existing_csv(Path(__file__).parent / "fixtures" / "fake.csv")
assert path.name == "fake.csv"
9 changes: 9 additions & 0 deletions dp_creator_ii/tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
import polars.testing
import tempfile
import pytest
from pathlib import Path

from dp_creator_ii.csv_helper import read_field_names


def test_read_field_names():
csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
field_names = read_field_names(csv_path)
assert field_names == ["student_id", "class_year", "assignment_type", "grade"]


@pytest.mark.parametrize("encoding", ["latin1", "utf8"])
Expand Down
20 changes: 0 additions & 20 deletions dp_creator_ii/tests/test_help.py

This file was deleted.

0 comments on commit e8ecf6e

Please sign in to comment.