Skip to content

Commit

Permalink
Merge branch 'main' into 46-dp-demo-notebook
Browse files Browse the repository at this point in the history
  • Loading branch information
mccalluc committed Oct 10, 2024
2 parents 28e5ca1 + 1f02fa9 commit e283a99
Show file tree
Hide file tree
Showing 24 changed files with 427 additions and 146 deletions.
4 changes: 2 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ extend-select = B950
extend-ignore = E203,E501,E701

per-file-ignores =
# Ignore undefined names
*/templates/*:F821,F401
# Ignore undefined names in templates.
*/templates/*:F821,F401,E302
50 changes: 25 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,19 @@ Building on what we've learned from [DP Creator](https://github.com/opendp/dpcre
- UI development in Python with [Shiny](https://shiny.posit.co/py/)
- Tracking of cumulative privacy consumption between sessions

We plan to implement a [proof of concept](https://docs.google.com/document/d/1dteip598-jYj6KFuoYRyrZDPUuwDl9fHgxARiSieVGw/edit) over a couple months, and then get feedback from users before deciding on next steps.

## Usage

```
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--unit UNIT_OF_PRIVACY]
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--contrib CONTRIB]
DP Creator II makes it easier to get started with Differential Privacy.
options:
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--unit UNIT_OF_PRIVACY
Unit of privacy: How many rows can an individual
contribute?
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--contrib CONTRIB How many rows can an individual contribute?
```


Expand All @@ -32,40 +32,40 @@ options:
### Getting Started

To get started, clone the repo and install dev dependencies in a virtual environment:
```
git clone https://github.com/opendp/dp-creator-ii.git
cd dp-creator-ii
python3 -m venv .venv
source .venv/bin/activate
pip install -r requirements-dev.txt
pre-commit install
playwright install
```shell
$ git clone https://github.com/opendp/dp-creator-ii.git
$ cd dp-creator-ii
$ python3 -m venv .venv
$ source .venv/bin/activate
$ pip install -r requirements-dev.txt
$ pre-commit install
$ playwright install
```

Now install the application itself and run it:
```
flit install --symlink
dp-creator-ii
```shell
$ flit install --symlink
$ dp-creator-ii
```
Your browser should open and connect you to the application.

### Testing

Tests should pass, and code coverage should be complete (except blocks we explicitly ignore):
```
coverage run -m pytest -v
coverage report
```shell
$ coverage run -m pytest -v
$ coverage report
```

We're using [Playwright](https://playwright.dev/python/) for end-to-end tests. You can use it to [generate test code](https://playwright.dev/python/docs/codegen-intro) just by interacting with the app in a browser:
```
dp-creator-ii&
playwright codegen http://127.0.0.1:8000/
```shell
$ dp-creator-ii # The server will continue to run, so open a new terminal to continue.
$ playwright codegen http://127.0.0.1:8000/
```

You can also [step through these tests](https://playwright.dev/python/docs/running-tests#debugging-tests) and see what the browser sees:
```
PWDEBUG=1 pytest
```shell
$ PWDEBUG=1 pytest
```

### Conventions
Expand Down
37 changes: 23 additions & 14 deletions dp_creator_ii/__init__.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,50 @@
"""DP Creator II makes it easier to get started with Differential Privacy."""

import os
from pathlib import Path
from argparse import ArgumentParser
from argparse import ArgumentParser, ArgumentTypeError

import shiny


__version__ = "0.0.1"


def existing_csv(arg):
path = Path(arg)
if not path.exists():
raise ArgumentTypeError(f"No such file: {arg}")
if path.suffix != ".csv":
raise ArgumentTypeError(f'Must have ".csv" extension: {arg}')
return path


def get_arg_parser():
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--csv",
dest="csv_path",
type=Path,
type=existing_csv,
help="Path to CSV containing private data",
)
parser.add_argument(
"--unit",
dest="unit_of_privacy",
"--contrib",
dest="contributions",
metavar="CONTRIB",
type=int,
help="Unit of privacy: How many rows can an individual contribute?",
default=1,
help="How many rows can an individual contribute?",
)
return parser


def main(): # pragma: no cover
# We call parse_args() again inside the app.
# We only call it here so "--help" is handled.
# We only call it here so "--help" is handled,
# and to validate inputs.
get_arg_parser().parse_args()

# run_app() depends on the CWD.
os.chdir(Path(__file__).parent)

run_app_kwargs = {
"reload": True,
}
shiny.run_app(launch_browser=True, **run_app_kwargs)
shiny.run_app(
app="dp_creator_ii.app",
launch_browser=True,
reload=True,
)
20 changes: 19 additions & 1 deletion dp_creator_ii/app/analysis_panel.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
from shiny import ui, reactive
from shiny import ui, reactive, render

from dp_creator_ii.mock_data import mock_data, ColumnDef
from dp_creator_ii.app.plots import plot_error_bars_with_cutoff


def analysis_ui():
return ui.nav_panel(
"Define Analysis",
"TODO: Define analysis",
ui.output_plot("plot_preview"),
"(This plot is only to demonstrate that plotting works.)",
ui.input_action_button("go_to_results", "Download results"),
value="analysis_panel",
)


def analysis_server(input, output, session):
@render.plot()
def plot_preview():
min_x = 0
max_x = 100
df = mock_data({"col_0_100": ColumnDef(min_x, max_x)}, row_count=20)
return plot_error_bars_with_cutoff(
df["col_0_100"].to_list(),
x_min_label=min_x,
x_max_label=max_x,
y_cutoff=30,
y_error=5,
)

@reactive.effect
@reactive.event(input.go_to_results)
def go_to_results():
Expand Down
64 changes: 48 additions & 16 deletions dp_creator_ii/app/dataset_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,71 @@
from shiny import ui, reactive, render

from dp_creator_ii import get_arg_parser
from dp_creator_ii.csv_helper import read_field_names
from dp_creator_ii.app.ui_helpers import output_code_sample
from dp_creator_ii.template import make_privacy_unit_block


def get_args():
arg_parser = get_arg_parser()
if argv[1:3] == ["run", "--port"]:
# We are running a Playwright test,
# and ARGV is polluted, so override:
return arg_parser.parse_args([])
else:
# Normal parsing:
return arg_parser.parse_args()


def dataset_ui():
args = get_args()

return ui.nav_panel(
"Select Dataset",
"TODO: Pick dataset",
ui.output_text("csv_path_text"),
ui.output_text("unit_of_privacy_text"),
ui.input_file("csv_path_from_ui", "Choose CSV file", accept=[".csv"]),
"CSV path from either CLI or UI:",
ui.output_text("csv_path"),
"CSV fields:",
ui.output_text("csv_fields"),
ui.input_numeric("contributions", "Contributions", args.contributions),
output_code_sample("unit_of_privacy_python"),
ui.input_action_button("go_to_analysis", "Define analysis"),
value="dataset_panel",
)


def dataset_server(input, output, session):
if argv[1:3] == ["run", "--port"]:
# Started by playwright
arg_csv_path = None
arg_unit_of_privacy = None
else:
args = get_arg_parser().parse_args()
arg_csv_path = args.csv_path
arg_unit_of_privacy = args.unit_of_privacy
args = get_args()

csv_path_from_cli_value = reactive.value(args.csv_path)

csv_path = reactive.value(arg_csv_path)
unit_of_privacy = reactive.value(arg_unit_of_privacy)
@reactive.calc
def csv_path_calc():
csv_path_from_ui = input.csv_path_from_ui()
if csv_path_from_ui is not None:
return csv_path_from_ui[0]["datapath"]
return csv_path_from_cli_value.get()

@render.text
def csv_path_text():
return str(csv_path.get())
def csv_path():
return csv_path_calc()

@reactive.calc
def csv_fields_calc():
path = csv_path_calc()
if path is None:
return None
return read_field_names(path)

@render.text
def unit_of_privacy_text():
return str(unit_of_privacy.get())
def csv_fields():
return csv_fields_calc()

@render.code
def unit_of_privacy_python():
contributions = input.contributions()
return make_privacy_unit_block(contributions)

@reactive.effect
@reactive.event(input.go_to_analysis)
Expand Down
40 changes: 40 additions & 0 deletions dp_creator_ii/app/plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import matplotlib.pyplot as plt
import numpy as np


def plot_error_bars_with_cutoff(
y_values, x_min_label="min", x_max_label="max", y_cutoff=0, y_error=0
):
x_values = 0.5 + np.arange(len(y_values))
x_values_above = []
x_values_below = []
y_values_above = []
y_values_below = []
for x, y in zip(x_values, y_values):
if y < y_cutoff:
x_values_below.append(x)
y_values_below.append(y)
else:
x_values_above.append(x)
y_values_above.append(y)

figure, axes = plt.subplots()
color = "skyblue"
shared = {
"width": 0.8,
"edgecolor": color,
"linewidth": 1,
"yerr": y_error,
}
axes.bar(x_values_above, y_values_above, color=color, **shared)
axes.bar(x_values_below, y_values_below, color="white", **shared)
axes.hlines([y_cutoff], 0, len(y_values), colors=["black"], linestyles=["dotted"])

axes.set(xlim=(0, len(y_values)), ylim=(0, max(y_values)))
axes.get_xaxis().set_ticks(
ticks=[x_values[0], x_values[-1]],
labels=[x_min_label, x_max_label],
)
axes.get_yaxis().set_ticks([])

return figure
9 changes: 6 additions & 3 deletions dp_creator_ii/app/results_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def results_server(input, output, session):
media_type="text/x-python",
)
async def download_script():
contributions = input.contributions()
script_py = make_script_py(
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand All @@ -32,9 +33,10 @@ async def download_script():
media_type="application/x-ipynb+json",
)
async def download_notebook_unexecuted():
contributions = input.contributions()
notebook_py = make_notebook_py(
csv_path="todo.csv",
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand All @@ -46,9 +48,10 @@ async def download_notebook_unexecuted():
media_type="application/x-ipynb+json",
)
async def download_notebook_executed():
contributions = input.contributions()
notebook_py = make_notebook_py(
csv_path="todo.csv",
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand Down
9 changes: 9 additions & 0 deletions dp_creator_ii/app/ui_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from htmltools.tags import details, summary
from shiny import ui


def output_code_sample(name_of_render_function):
return details(
summary("Code sample"),
ui.output_code(name_of_render_function),
)
7 changes: 7 additions & 0 deletions dp_creator_ii/csv_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import csv


def read_field_names(csv_path):
with open(csv_path, newline="") as csv_handle:
reader = csv.DictReader(csv_handle)
return reader.fieldnames
Loading

0 comments on commit e283a99

Please sign in to comment.