Skip to content

Commit

Permalink
user provided contributions flows all the way to the generated code (#41
Browse files Browse the repository at this point in the history
)

* input for unit-of-privacy, and show code

* factor out code sample into helper

* value is now in generated code

* rename CLI param

* add a test

* use shiny code instead of pre

* privacy unit template

* renaming: unit -> contributions

* use contributions on results page

* test that user input shows up in generated notebook

* better unit tests for template filling

* flush instead of close so we will cleanup on exit

* check that script is what we expect

* finally style script correctly?
  • Loading branch information
mccalluc authored Oct 10, 2024
1 parent e0c2ab6 commit 1f02fa9
Show file tree
Hide file tree
Showing 12 changed files with 143 additions and 54 deletions.
4 changes: 2 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ extend-select = B950
extend-ignore = E203,E501,E701

per-file-ignores =
# Ignore undefined names
*/templates/*:F821,F401
# Ignore undefined names in templates.
*/templates/*:F821,F401,E302
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,14 @@ We plan to implement a [proof of concept](https://docs.google.com/document/d/1dt
## Usage

```
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--unit UNIT_OF_PRIVACY]
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--contrib CONTRIB]
DP Creator II makes it easier to get started with Differential Privacy.
options:
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--unit UNIT_OF_PRIVACY
Unit of privacy: How many rows can an individual
contribute?
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--contrib CONTRIB How many rows can an individual contribute?
```


Expand Down
8 changes: 5 additions & 3 deletions dp_creator_ii/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ def get_arg_parser():
help="Path to CSV containing private data",
)
parser.add_argument(
"--unit",
dest="unit_of_privacy",
"--contrib",
dest="contributions",
metavar="CONTRIB",
type=int,
help="Unit of privacy: How many rows can an individual contribute?",
default=1,
help="How many rows can an individual contribute?",
)
return parser

Expand Down
38 changes: 23 additions & 15 deletions dp_creator_ii/app/dataset_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,24 @@

from dp_creator_ii import get_arg_parser
from dp_creator_ii.csv_helper import read_field_names
from dp_creator_ii.app.ui_helpers import output_code_sample
from dp_creator_ii.template import make_privacy_unit_block


def get_args():
arg_parser = get_arg_parser()
if argv[1:3] == ["run", "--port"]:
# We are running a Playwright test,
# and ARGV is polluted, so override:
return arg_parser.parse_args([])
else:
# Normal parsing:
return arg_parser.parse_args()


def dataset_ui():
args = get_args()

return ui.nav_panel(
"Select Dataset",
"TODO: Pick dataset",
Expand All @@ -15,25 +30,17 @@ def dataset_ui():
ui.output_text("csv_path"),
"CSV fields:",
ui.output_text("csv_fields"),
"Unit of privacy:",
ui.output_text("unit_of_privacy_text"),
ui.input_numeric("contributions", "Contributions", args.contributions),
output_code_sample("unit_of_privacy_python"),
ui.input_action_button("go_to_analysis", "Define analysis"),
value="dataset_panel",
)


def dataset_server(input, output, session):
if argv[1:3] == ["run", "--port"]:
# Started by playwright
arg_csv_path = None
arg_unit_of_privacy = None
else:
args = get_arg_parser().parse_args()
arg_csv_path = args.csv_path
arg_unit_of_privacy = args.unit_of_privacy
args = get_args()

csv_path_from_cli_value = reactive.value(arg_csv_path)
unit_of_privacy = reactive.value(arg_unit_of_privacy)
csv_path_from_cli_value = reactive.value(args.csv_path)

@reactive.calc
def csv_path_calc():
Expand All @@ -57,9 +64,10 @@ def csv_fields_calc():
def csv_fields():
return csv_fields_calc()

@render.text
def unit_of_privacy_text():
return str(unit_of_privacy.get())
@render.code
def unit_of_privacy_python():
contributions = input.contributions()
return make_privacy_unit_block(contributions)

@reactive.effect
@reactive.event(input.go_to_analysis)
Expand Down
9 changes: 6 additions & 3 deletions dp_creator_ii/app/results_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def results_server(input, output, session):
media_type="text/x-python",
)
async def download_script():
contributions = input.contributions()
script_py = make_script_py(
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand All @@ -32,9 +33,10 @@ async def download_script():
media_type="application/x-ipynb+json",
)
async def download_notebook_unexecuted():
contributions = input.contributions()
notebook_py = make_notebook_py(
csv_path="todo.csv",
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand All @@ -46,9 +48,10 @@ async def download_notebook_unexecuted():
media_type="application/x-ipynb+json",
)
async def download_notebook_executed():
contributions = input.contributions()
notebook_py = make_notebook_py(
csv_path="todo.csv",
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand Down
9 changes: 9 additions & 0 deletions dp_creator_ii/app/ui_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from htmltools.tags import details, summary
from shiny import ui


def output_code_sample(name_of_render_function):
return details(
summary("Code sample"),
ui.output_code(name_of_render_function),
)
29 changes: 20 additions & 9 deletions dp_creator_ii/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,57 +57,68 @@ def __str__(self):
return self._template


def _make_context_for_notebook(csv_path, unit, loss, weights):
def _make_context_for_notebook(csv_path, contributions, loss, weights):
privacy_unit_block = make_privacy_unit_block(contributions)
return str(
_Template("context.py").fill_values(
_Template("context.py")
.fill_values(
CSV_PATH=csv_path,
UNIT=unit,
LOSS=loss,
WEIGHTS=weights,
)
.fill_blocks(
PRIVACY_UNIT_BLOCK=privacy_unit_block,
)
)


def _make_context_for_script(unit, loss, weights):
def _make_context_for_script(contributions, loss, weights):
privacy_unit_block = make_privacy_unit_block(contributions)
return str(
_Template("context.py")
.fill_expressions(
CSV_PATH="csv_path",
)
.fill_values(
UNIT=unit,
LOSS=loss,
WEIGHTS=weights,
)
.fill_blocks(
PRIVACY_UNIT_BLOCK=privacy_unit_block,
)
)


def _make_imports():
return str(_Template("imports.py").fill_values())


def make_notebook_py(csv_path, unit, loss, weights):
def make_notebook_py(csv_path, contributions, loss, weights):
return str(
_Template("notebook.py").fill_blocks(
IMPORTS_BLOCK=_make_imports(),
CONTEXT_BLOCK=_make_context_for_notebook(
csv_path=csv_path,
unit=unit,
contributions=contributions,
loss=loss,
weights=weights,
),
)
)


def make_script_py(unit, loss, weights):
def make_script_py(contributions, loss, weights):
return str(
_Template("script.py").fill_blocks(
IMPORTS_BLOCK=_make_imports(),
CONTEXT_BLOCK=_make_context_for_script(
unit=unit,
contributions=contributions,
loss=loss,
weights=weights,
),
)
)


def make_privacy_unit_block(contributions):
return str(_Template("privacy_unit.py").fill_values(CONTRIBUTIONS=contributions))
3 changes: 2 additions & 1 deletion dp_creator_ii/templates/context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
PRIVACY_UNIT_BLOCK
context = dp.Context.compositor(
data=pl.scan_csv(CSV_PATH, encoding="utf8-lossy"),
privacy_unit=dp.unit_of(contributions=UNIT),
privacy_unit=privacy_unit,
privacy_loss=dp.loss_of(epsilon=LOSS),
split_by_weights=WEIGHTS,
)
1 change: 1 addition & 0 deletions dp_creator_ii/templates/privacy_unit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
privacy_unit = dp.unit_of(contributions=CONTRIBUTIONS)
29 changes: 29 additions & 0 deletions dp_creator_ii/tests/fixtures/expected-script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from argparse import ArgumentParser

import polars as pl
import opendp.prelude as dp

dp.enable_features("contrib")


def get_context(csv_path):
privacy_unit = dp.unit_of(contributions=1)

context = dp.Context.compositor(
data=pl.scan_csv(csv_path, encoding="utf8-lossy"),
privacy_unit=privacy_unit,
privacy_loss=dp.loss_of(epsilon=1),
split_by_weights=[1],
)

return context


if __name__ == "__main__":
parser = ArgumentParser(
description="Creates a differentially private release from a csv"
)
parser.add_argument("--csv", help="Path to csv containing private data")
args = parser.parse_args()
context = get_context(csv_path=args.csv)
print(context)
7 changes: 5 additions & 2 deletions dp_creator_ii/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# TODO: Why is incomplete coverage reported here?
# https://github.com/opendp/dp-creator-ii/issues/18
def test_app(page: Page, app: ShinyAppProc): # pragma: no cover
def test_navigation(page: Page, app: ShinyAppProc): # pragma: no cover
pick_dataset_text = "TODO: Pick dataset"
perform_analysis_text = "TODO: Define analysis"
download_results_text = "TODO: Download results"
Expand All @@ -29,6 +29,9 @@ def expect_no_error():
expect_visible(pick_dataset_text)
expect_not_visible(perform_analysis_text)
expect_not_visible(download_results_text)
page.get_by_label("Contributions").fill("42")
page.get_by_text("Code sample").click()
expect_visible("dp.unit_of(contributions=42)")
expect_no_error()

csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
Expand All @@ -54,4 +57,4 @@ def expect_no_error():

download = download_info.value
script = download.path().read_text()
assert "privacy_unit=dp.unit_of(contributions=1)" in script
assert "privacy_unit = dp.unit_of(contributions=42)" in script
50 changes: 37 additions & 13 deletions dp_creator_ii/tests/test_template.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from tempfile import NamedTemporaryFile
import subprocess
from pathlib import Path
import re
import pytest
import opendp.prelude as dp
Expand All @@ -9,17 +10,28 @@
fake_csv = "dp_creator_ii/tests/fixtures/fake.csv"


def test_fill_expressions():
template = _Template(None, template="No one VERB the ADJ NOUN!")
filled = str(
template.fill_expressions(
VERB="expects",
ADJ="Spanish",
NOUN="Inquisition",
)
)
assert filled == "No one expects the Spanish Inquisition!"


def test_fill_values():
context_template = _Template("context.py")
context_block = str(
context_template.fill_values(
CSV_PATH=fake_csv,
UNIT=1,
LOSS=1,
WEIGHTS=[1],
template = _Template(None, template="assert [STRING] * NUM == LIST")
filled = str(
template.fill_values(
STRING="🙂",
NUM=3,
LIST=["🙂", "🙂", "🙂"],
)
)
assert f"data=pl.scan_csv('{fake_csv}', encoding=\"utf8-lossy\")" in context_block
assert filled == "assert ['🙂'] * 3 == ['🙂', '🙂', '🙂']"


def test_fill_blocks():
Expand Down Expand Up @@ -65,15 +77,18 @@ def test_fill_template_unfilled_slots():
context_template = _Template("context.py")
with pytest.raises(
Exception,
match=re.escape("context.py has unfilled slots: CSV_PATH, LOSS, UNIT, WEIGHTS"),
match=re.escape(
"context.py has unfilled slots: "
"CSV_PATH, LOSS, PRIVACY_UNIT_BLOCK, WEIGHTS"
),
):
str(context_template.fill_values())


def test_make_notebook():
notebook = make_notebook_py(
csv_path=fake_csv,
unit=1,
contributions=1,
loss=1,
weights=[1],
)
Expand All @@ -84,14 +99,23 @@ def test_make_notebook():

def test_make_script():
script = make_script_py(
unit=1,
contributions=1,
loss=1,
weights=[1],
)

with NamedTemporaryFile(mode="w", delete=False) as fp:
def clear_empty_lines(text):
# Cleanup whitespace after indenting blocks
return re.sub(r"^\s+$", "", text, flags=re.MULTILINE).strip()

expected_script = (
Path(__file__).parent / "fixtures" / "expected-script.py"
).read_text()
assert clear_empty_lines(script) == clear_empty_lines(expected_script)

with NamedTemporaryFile(mode="w") as fp:
fp.write(script)
fp.close()
fp.flush()

result = subprocess.run(["python", fp.name, "--csv", fake_csv])
assert result.returncode == 0

0 comments on commit 1f02fa9

Please sign in to comment.