Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --demo CLI option #61

Merged
merged 7 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[pytest]

# Treat warnings as errors:
filterwarnings =
error

addopts = --doctest-glob '*.md' --doctest-modules --ignore dp_creator_ii/templates/ --ignore dp_creator_ii/tests/fixtures/

# If an xfail starts passing unexpectedly, that should count as a failure:
xfail_strict=true
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ We plan to implement a [proof of concept](https://docs.google.com/document/d/1dt
## Usage

```
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--contrib CONTRIB]
DP Creator II makes it easier to get started with Differential Privacy.
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--contrib CONTRIB] [--demo]
options:
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--contrib CONTRIB How many rows can an individual contribute?
--demo Use generated fake CSV for a quick demo
```


Expand Down
39 changes: 4 additions & 35 deletions dp_creator_ii/__init__.py
Original file line number Diff line number Diff line change
@@ -1,47 +1,16 @@
"""DP Creator II makes it easier to get started with Differential Privacy."""

from pathlib import Path
from argparse import ArgumentParser, ArgumentTypeError

import shiny
from dp_creator_ii.argparse_helpers import get_csv_contrib


__version__ = "0.0.1"


def existing_csv(arg):
path = Path(arg)
if not path.exists():
raise ArgumentTypeError(f"No such file: {arg}")
if path.suffix != ".csv":
raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
return path


def get_arg_parser():
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--csv",
dest="csv_path",
type=existing_csv,
help="Path to CSV containing private data",
)
parser.add_argument(
"--contrib",
dest="contributions",
metavar="CONTRIB",
type=int,
default=1,
help="How many rows can an individual contribute?",
)
return parser


def main(): # pragma: no cover
# We call parse_args() again inside the app.
# We only call it here so "--help" is handled,
# and to validate inputs.
get_arg_parser().parse_args()
# We only call this here so "--help" is handled,
# and to validate inputs before starting the server.
get_csv_contrib()

shiny.run_app(
app="dp_creator_ii.app",
Expand Down
23 changes: 5 additions & 18 deletions dp_creator_ii/app/dataset_panel.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,13 @@
from sys import argv

from shiny import ui, reactive, render

from dp_creator_ii import get_arg_parser
from dp_creator_ii.argparse_helpers import get_csv_contrib
from dp_creator_ii.csv_helper import read_field_names
from dp_creator_ii.app.ui_helpers import output_code_sample
from dp_creator_ii.template import make_privacy_unit_block


def get_args():
arg_parser = get_arg_parser()
if argv[1:3] == ["run", "--port"]:
# We are running a Playwright test,
# and ARGV is polluted, so override:
return arg_parser.parse_args([])
else:
# Normal parsing:
return arg_parser.parse_args()


def dataset_ui():
args = get_args()
(_csv_path, contributions) = get_csv_contrib()

return ui.nav_panel(
"Select Dataset",
Expand All @@ -34,17 +21,17 @@ def dataset_ui():
'This is the "unit of privacy" which will be protected.'
),
ui.output_text("csv_fields"),
ui.input_numeric("contributions", "Contributions", args.contributions),
ui.input_numeric("contributions", "Contributions", contributions),
output_code_sample("unit_of_privacy_python"),
ui.input_action_button("go_to_analysis", "Define analysis"),
value="dataset_panel",
)


def dataset_server(input, output, session):
args = get_args()
(csv_path, _contributions) = get_csv_contrib()

csv_path_from_cli_value = reactive.value(args.csv_path)
csv_path_from_cli_value = reactive.value(csv_path)

@reactive.calc
def csv_path_calc():
Expand Down
113 changes: 113 additions & 0 deletions dp_creator_ii/argparse_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from sys import argv
from pathlib import Path
from argparse import ArgumentParser, ArgumentTypeError
import csv
import random
from warnings import warn


def _existing_csv_type(arg):
path = Path(arg)
if not path.exists():
raise ArgumentTypeError(f"No such file: {arg}")
if path.suffix != ".csv":
raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
return path


def _get_arg_parser():
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--csv",
dest="csv_path",
type=_existing_csv_type,
help="Path to CSV containing private data",
)
parser.add_argument(
"--contrib",
dest="contributions",
metavar="CONTRIB",
type=int,
default=1,
help="How many rows can an individual contribute?",
)
parser.add_argument(
"--demo", action="store_true", help="Use generated fake CSV for a quick demo"
)
return parser


def _get_args():
"""
>>> _get_args()
Namespace(csv_path=None, contributions=1, demo=False)
"""
arg_parser = _get_arg_parser()
if "--port" in argv or "-v" in argv or "-k" in argv:
# We are running a test,
# and ARGV is polluted, so override:
return arg_parser.parse_args([])
else:
# Normal parsing:
return arg_parser.parse_args() # pragma: no cover


def _clip(n, lower, upper):
"""
>>> _clip(-5, 0, 10)
0
>>> _clip(5, 0, 10)
5
>>> _clip(15, 0, 10)
10
"""
return max(min(n, upper), lower)


def _get_demo_csv_contrib():
"""
>>> csv_path, contributions = _get_demo_csv_contrib()
>>> with open(csv_path, newline="") as csv_handle:
... reader = csv.DictReader(csv_handle)
... reader.fieldnames
... rows = list(reader)
... rows[0]
... rows[-1]
['student_id', 'class_year', 'hw_number', 'grade']
{'student_id': '1', 'class_year': '2', 'hw_number': '1', 'grade': '73'}
{'student_id': '100', 'class_year': '1', 'hw_number': '10', 'grade': '78'}
"""
random.seed(0) # So the mock data will be stable across runs.

csv_path = "/tmp/demo.csv"
contributions = 10

with open(csv_path, "w", newline="") as demo_handle:
fields = ["student_id", "class_year", "hw_number", "grade"]
writer = csv.DictWriter(demo_handle, fieldnames=fields)
writer.writeheader()
for student_id in range(1, 101):
class_year = int(_clip(random.gauss(2, 1), 1, 4))
# Older students do slightly better in the class:
mean_grade = random.gauss(80, 5) + class_year * 2
for hw_number in range(1, contributions + 1):
grade = int(_clip(random.gauss(mean_grade, 5), 0, 100))
writer.writerow(
{
"student_id": student_id,
"class_year": class_year,
"hw_number": hw_number,
"grade": grade,
}
)

return csv_path, contributions


def get_csv_contrib(): # pragma: no cover
args = _get_args()
if args.demo:
if args.csv_path is not None or args.contributions is not None:
warn('"--demo" overrides "--csv" and "--contrib"')
return _get_demo_csv_contrib()
return (args.csv_path, args.contributions)
10 changes: 5 additions & 5 deletions dp_creator_ii/tests/test_arg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

import pytest

from dp_creator_ii import get_arg_parser, existing_csv
from dp_creator_ii.argparse_helpers import _get_arg_parser, _existing_csv_type


def test_help():
help = (
get_arg_parser()
_get_arg_parser()
.format_help()
# argparse doesn't actually know the name of the script
# and inserts the name of the running program instead.
Expand All @@ -25,14 +25,14 @@ def test_help():

def test_arg_validation_no_file():
with pytest.raises(ArgumentTypeError, match="No such file: no-such-file"):
existing_csv("no-such-file")
_existing_csv_type("no-such-file")


def test_arg_validation_not_csv():
with pytest.raises(ArgumentTypeError, match='Must have ".csv" extension:'):
existing_csv(Path(__file__).parent / "fixtures" / "fake.ipynb")
_existing_csv_type(Path(__file__).parent / "fixtures" / "fake.ipynb")


def test_arg_validation_works():
path = existing_csv(Path(__file__).parent / "fixtures" / "fake.csv")
path = _existing_csv_type(Path(__file__).parent / "fixtures" / "fake.csv")
assert path.name == "fake.csv"