Skip to content

Commit

Permalink
Add --demo CLI option (#61)
Browse files Browse the repository at this point in the history
* move argparse details to new file

* add underscores to distinguish private functions

* Copy CSV generation from notebook in other branch

* pytest.ini. Add doctests

* tweak ranges on fake csv

* a little more coverage
  • Loading branch information
mccalluc authored Oct 18, 2024
1 parent 792a768 commit 236be93
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 61 deletions.
10 changes: 10 additions & 0 deletions .pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[pytest]

# Treat warnings as errors:
filterwarnings =
error

addopts = --doctest-glob '*.md' --doctest-modules --ignore dp_creator_ii/templates/ --ignore dp_creator_ii/tests/fixtures/

# If an xfail starts passing unexpectedly, that should count as a failure:
xfail_strict=true
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ We plan to implement a [proof of concept](https://docs.google.com/document/d/1dt
## Usage

```
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--contrib CONTRIB]
DP Creator II makes it easier to get started with Differential Privacy.
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--contrib CONTRIB] [--demo]
options:
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--contrib CONTRIB How many rows can an individual contribute?
--demo Use generated fake CSV for a quick demo
```


Expand Down
39 changes: 4 additions & 35 deletions dp_creator_ii/__init__.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,16 @@
"""DP Creator II makes it easier to get started with Differential Privacy."""

from pathlib import Path
from argparse import ArgumentParser, ArgumentTypeError

import shiny
from dp_creator_ii.argparse_helpers import get_csv_contrib


__version__ = "0.0.1"


def existing_csv(arg):
path = Path(arg)
if not path.exists():
raise ArgumentTypeError(f"No such file: {arg}")
if path.suffix != ".csv":
raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
return path


def get_arg_parser():
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--csv",
dest="csv_path",
type=existing_csv,
help="Path to CSV containing private data",
)
parser.add_argument(
"--contrib",
dest="contributions",
metavar="CONTRIB",
type=int,
default=1,
help="How many rows can an individual contribute?",
)
return parser


def main(): # pragma: no cover
# We call parse_args() again inside the app.
# We only call it here so "--help" is handled,
# and to validate inputs.
get_arg_parser().parse_args()
# We only call this here so "--help" is handled,
# and to validate inputs before starting the server.
get_csv_contrib()

shiny.run_app(
app="dp_creator_ii.app",
Expand Down
23 changes: 5 additions & 18 deletions dp_creator_ii/app/dataset_panel.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,13 @@
from sys import argv

from shiny import ui, reactive, render

from dp_creator_ii import get_arg_parser
from dp_creator_ii.argparse_helpers import get_csv_contrib
from dp_creator_ii.csv_helper import read_field_names
from dp_creator_ii.app.ui_helpers import output_code_sample
from dp_creator_ii.template import make_privacy_unit_block


def get_args():
arg_parser = get_arg_parser()
if argv[1:3] == ["run", "--port"]:
# We are running a Playwright test,
# and ARGV is polluted, so override:
return arg_parser.parse_args([])
else:
# Normal parsing:
return arg_parser.parse_args()


def dataset_ui():
args = get_args()
(_csv_path, contributions) = get_csv_contrib()

return ui.nav_panel(
"Select Dataset",
Expand All @@ -34,17 +21,17 @@ def dataset_ui():
'This is the "unit of privacy" which will be protected.'
),
ui.output_text("csv_fields"),
ui.input_numeric("contributions", "Contributions", args.contributions),
ui.input_numeric("contributions", "Contributions", contributions),
output_code_sample("unit_of_privacy_python"),
ui.input_action_button("go_to_analysis", "Define analysis"),
value="dataset_panel",
)


def dataset_server(input, output, session):
args = get_args()
(csv_path, _contributions) = get_csv_contrib()

csv_path_from_cli_value = reactive.value(args.csv_path)
csv_path_from_cli_value = reactive.value(csv_path)

@reactive.calc
def csv_path_calc():
Expand Down
113 changes: 113 additions & 0 deletions dp_creator_ii/argparse_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from sys import argv
from pathlib import Path
from argparse import ArgumentParser, ArgumentTypeError
import csv
import random
from warnings import warn


def _existing_csv_type(arg):
path = Path(arg)
if not path.exists():
raise ArgumentTypeError(f"No such file: {arg}")
if path.suffix != ".csv":
raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
return path


def _get_arg_parser():
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--csv",
dest="csv_path",
type=_existing_csv_type,
help="Path to CSV containing private data",
)
parser.add_argument(
"--contrib",
dest="contributions",
metavar="CONTRIB",
type=int,
default=1,
help="How many rows can an individual contribute?",
)
parser.add_argument(
"--demo", action="store_true", help="Use generated fake CSV for a quick demo"
)
return parser


def _get_args():
"""
>>> _get_args()
Namespace(csv_path=None, contributions=1, demo=False)
"""
arg_parser = _get_arg_parser()
if "--port" in argv or "-v" in argv or "-k" in argv:
# We are running a test,
# and ARGV is polluted, so override:
return arg_parser.parse_args([])
else:
# Normal parsing:
return arg_parser.parse_args() # pragma: no cover


def _clip(n, lower, upper):
"""
>>> _clip(-5, 0, 10)
0
>>> _clip(5, 0, 10)
5
>>> _clip(15, 0, 10)
10
"""
return max(min(n, upper), lower)


def _get_demo_csv_contrib():
"""
>>> csv_path, contributions = _get_demo_csv_contrib()
>>> with open(csv_path, newline="") as csv_handle:
... reader = csv.DictReader(csv_handle)
... reader.fieldnames
... rows = list(reader)
... rows[0]
... rows[-1]
['student_id', 'class_year', 'hw_number', 'grade']
{'student_id': '1', 'class_year': '2', 'hw_number': '1', 'grade': '73'}
{'student_id': '100', 'class_year': '1', 'hw_number': '10', 'grade': '78'}
"""
random.seed(0) # So the mock data will be stable across runs.

csv_path = "/tmp/demo.csv"
contributions = 10

with open(csv_path, "w", newline="") as demo_handle:
fields = ["student_id", "class_year", "hw_number", "grade"]
writer = csv.DictWriter(demo_handle, fieldnames=fields)
writer.writeheader()
for student_id in range(1, 101):
class_year = int(_clip(random.gauss(2, 1), 1, 4))
# Older students do slightly better in the class:
mean_grade = random.gauss(80, 5) + class_year * 2
for hw_number in range(1, contributions + 1):
grade = int(_clip(random.gauss(mean_grade, 5), 0, 100))
writer.writerow(
{
"student_id": student_id,
"class_year": class_year,
"hw_number": hw_number,
"grade": grade,
}
)

return csv_path, contributions


def get_csv_contrib(): # pragma: no cover
args = _get_args()
if args.demo:
if args.csv_path is not None or args.contributions is not None:
warn('"--demo" overrides "--csv" and "--contrib"')
return _get_demo_csv_contrib()
return (args.csv_path, args.contributions)
10 changes: 5 additions & 5 deletions dp_creator_ii/tests/test_arg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

import pytest

from dp_creator_ii import get_arg_parser, existing_csv
from dp_creator_ii.argparse_helpers import _get_arg_parser, _existing_csv_type


def test_help():
help = (
get_arg_parser()
_get_arg_parser()
.format_help()
# argparse doesn't actually know the name of the script
# and inserts the name of the running program instead.
Expand All @@ -25,14 +25,14 @@ def test_help():

def test_arg_validation_no_file():
with pytest.raises(ArgumentTypeError, match="No such file: no-such-file"):
existing_csv("no-such-file")
_existing_csv_type("no-such-file")


def test_arg_validation_not_csv():
with pytest.raises(ArgumentTypeError, match='Must have ".csv" extension:'):
existing_csv(Path(__file__).parent / "fixtures" / "fake.ipynb")
_existing_csv_type(Path(__file__).parent / "fixtures" / "fake.ipynb")


def test_arg_validation_works():
path = existing_csv(Path(__file__).parent / "fixtures" / "fake.csv")
path = _existing_csv_type(Path(__file__).parent / "fixtures" / "fake.csv")
assert path.name == "fake.csv"

0 comments on commit 236be93

Please sign in to comment.