Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

user provided contributions flows all the way to the generated code #41

Merged
merged 15 commits into from
Oct 10, 2024
Merged
4 changes: 2 additions & 2 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ extend-select = B950
extend-ignore = E203,E501,E701

per-file-ignores =
# Ignore undefined names
*/templates/*:F821,F401
# Ignore undefined names in templates.
*/templates/*:F821,F401,E302
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,14 @@ We plan to implement a [proof of concept](https://docs.google.com/document/d/1dt
## Usage

```
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--unit UNIT_OF_PRIVACY]
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--contrib CONTRIB]

DP Creator II makes it easier to get started with Differential Privacy.

options:
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--unit UNIT_OF_PRIVACY
Unit of privacy: How many rows can an individual
contribute?
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--contrib CONTRIB How many rows can an individual contribute?
```


Expand Down
8 changes: 5 additions & 3 deletions dp_creator_ii/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ def get_arg_parser():
help="Path to CSV containing private data",
)
parser.add_argument(
"--unit",
dest="unit_of_privacy",
"--contrib",
dest="contributions",
metavar="CONTRIB",
type=int,
help="Unit of privacy: How many rows can an individual contribute?",
default=1,
help="How many rows can an individual contribute?",
)
return parser

Expand Down
38 changes: 23 additions & 15 deletions dp_creator_ii/app/dataset_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,24 @@

from dp_creator_ii import get_arg_parser
from dp_creator_ii.csv_helper import read_field_names
from dp_creator_ii.app.ui_helpers import output_code_sample
from dp_creator_ii.template import make_privacy_unit_block


def get_args():
arg_parser = get_arg_parser()
if argv[1:3] == ["run", "--port"]:
# We are running a Playwright test,
# and ARGV is polluted, so override:
return arg_parser.parse_args([])
else:
# Normal parsing:
return arg_parser.parse_args()


def dataset_ui():
args = get_args()
ekraffmiller marked this conversation as resolved.
Show resolved Hide resolved

return ui.nav_panel(
"Select Dataset",
"TODO: Pick dataset",
Expand All @@ -15,25 +30,17 @@ def dataset_ui():
ui.output_text("csv_path"),
"CSV fields:",
ui.output_text("csv_fields"),
"Unit of privacy:",
ui.output_text("unit_of_privacy_text"),
ui.input_numeric("contributions", "Contributions", args.contributions),
output_code_sample("unit_of_privacy_python"),
ui.input_action_button("go_to_analysis", "Define analysis"),
value="dataset_panel",
)


def dataset_server(input, output, session):
if argv[1:3] == ["run", "--port"]:
# Started by playwright
arg_csv_path = None
arg_unit_of_privacy = None
else:
args = get_arg_parser().parse_args()
arg_csv_path = args.csv_path
arg_unit_of_privacy = args.unit_of_privacy
args = get_args()

csv_path_from_cli_value = reactive.value(arg_csv_path)
unit_of_privacy = reactive.value(arg_unit_of_privacy)
csv_path_from_cli_value = reactive.value(args.csv_path)

@reactive.calc
def csv_path_calc():
Expand All @@ -57,9 +64,10 @@ def csv_fields_calc():
def csv_fields():
return csv_fields_calc()

@render.text
def unit_of_privacy_text():
return str(unit_of_privacy.get())
@render.code
def unit_of_privacy_python():
contributions = input.contributions()
return make_privacy_unit_block(contributions)

@reactive.effect
@reactive.event(input.go_to_analysis)
Expand Down
9 changes: 6 additions & 3 deletions dp_creator_ii/app/results_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def results_server(input, output, session):
media_type="text/x-python",
)
async def download_script():
contributions = input.contributions()
script_py = make_script_py(
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand All @@ -32,9 +33,10 @@ async def download_script():
media_type="application/x-ipynb+json",
)
async def download_notebook_unexecuted():
contributions = input.contributions()
notebook_py = make_notebook_py(
csv_path="todo.csv",
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand All @@ -46,9 +48,10 @@ async def download_notebook_unexecuted():
media_type="application/x-ipynb+json",
)
async def download_notebook_executed():
contributions = input.contributions()
notebook_py = make_notebook_py(
csv_path="todo.csv",
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand Down
9 changes: 9 additions & 0 deletions dp_creator_ii/app/ui_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from htmltools.tags import details, summary
from shiny import ui


def output_code_sample(name_of_render_function):
return details(
summary("Code sample"),
ui.output_code(name_of_render_function),
)
29 changes: 20 additions & 9 deletions dp_creator_ii/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,57 +57,68 @@ def __str__(self):
return self._template


def _make_context_for_notebook(csv_path, unit, loss, weights):
def _make_context_for_notebook(csv_path, contributions, loss, weights):
privacy_unit_block = make_privacy_unit_block(contributions)
return str(
_Template("context.py").fill_values(
_Template("context.py")
.fill_values(
CSV_PATH=csv_path,
UNIT=unit,
LOSS=loss,
WEIGHTS=weights,
)
.fill_blocks(
PRIVACY_UNIT_BLOCK=privacy_unit_block,
)
)


def _make_context_for_script(unit, loss, weights):
def _make_context_for_script(contributions, loss, weights):
privacy_unit_block = make_privacy_unit_block(contributions)
return str(
_Template("context.py")
.fill_expressions(
CSV_PATH="csv_path",
)
.fill_values(
UNIT=unit,
LOSS=loss,
WEIGHTS=weights,
)
.fill_blocks(
PRIVACY_UNIT_BLOCK=privacy_unit_block,
)
)


def _make_imports():
return str(_Template("imports.py").fill_values())


def make_notebook_py(csv_path, unit, loss, weights):
def make_notebook_py(csv_path, contributions, loss, weights):
return str(
_Template("notebook.py").fill_blocks(
IMPORTS_BLOCK=_make_imports(),
CONTEXT_BLOCK=_make_context_for_notebook(
csv_path=csv_path,
unit=unit,
contributions=contributions,
loss=loss,
weights=weights,
),
)
)


def make_script_py(unit, loss, weights):
def make_script_py(contributions, loss, weights):
return str(
_Template("script.py").fill_blocks(
IMPORTS_BLOCK=_make_imports(),
CONTEXT_BLOCK=_make_context_for_script(
unit=unit,
contributions=contributions,
loss=loss,
weights=weights,
),
)
)


def make_privacy_unit_block(contributions):
return str(_Template("privacy_unit.py").fill_values(CONTRIBUTIONS=contributions))
3 changes: 2 additions & 1 deletion dp_creator_ii/templates/context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
PRIVACY_UNIT_BLOCK
context = dp.Context.compositor(
data=pl.scan_csv(CSV_PATH, encoding="utf8-lossy"),
privacy_unit=dp.unit_of(contributions=UNIT),
privacy_unit=privacy_unit,
privacy_loss=dp.loss_of(epsilon=LOSS),
split_by_weights=WEIGHTS,
)
1 change: 1 addition & 0 deletions dp_creator_ii/templates/privacy_unit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
privacy_unit = dp.unit_of(contributions=CONTRIBUTIONS)
29 changes: 29 additions & 0 deletions dp_creator_ii/tests/fixtures/expected-script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from argparse import ArgumentParser

import polars as pl
import opendp.prelude as dp

dp.enable_features("contrib")


def get_context(csv_path):
privacy_unit = dp.unit_of(contributions=1)

context = dp.Context.compositor(
data=pl.scan_csv(csv_path, encoding="utf8-lossy"),
privacy_unit=privacy_unit,
privacy_loss=dp.loss_of(epsilon=1),
split_by_weights=[1],
)

return context


if __name__ == "__main__":
parser = ArgumentParser(
description="Creates a differentially private release from a csv"
)
parser.add_argument("--csv", help="Path to csv containing private data")
args = parser.parse_args()
context = get_context(csv_path=args.csv)
print(context)
7 changes: 5 additions & 2 deletions dp_creator_ii/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# TODO: Why is incomplete coverage reported here?
# https://github.com/opendp/dp-creator-ii/issues/18
def test_app(page: Page, app: ShinyAppProc): # pragma: no cover
def test_navigation(page: Page, app: ShinyAppProc): # pragma: no cover
pick_dataset_text = "TODO: Pick dataset"
perform_analysis_text = "TODO: Define analysis"
download_results_text = "TODO: Download results"
Expand All @@ -29,6 +29,9 @@ def expect_no_error():
expect_visible(pick_dataset_text)
expect_not_visible(perform_analysis_text)
expect_not_visible(download_results_text)
page.get_by_label("Contributions").fill("42")
page.get_by_text("Code sample").click()
expect_visible("dp.unit_of(contributions=42)")
expect_no_error()

csv_path = Path(__file__).parent / "fixtures" / "fake.csv"
Expand All @@ -54,4 +57,4 @@ def expect_no_error():

download = download_info.value
script = download.path().read_text()
assert "privacy_unit=dp.unit_of(contributions=1)" in script
assert "privacy_unit = dp.unit_of(contributions=42)" in script
50 changes: 37 additions & 13 deletions dp_creator_ii/tests/test_template.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from tempfile import NamedTemporaryFile
import subprocess
from pathlib import Path
import re
import pytest
import opendp.prelude as dp
Expand All @@ -9,17 +10,28 @@
fake_csv = "dp_creator_ii/tests/fixtures/fake.csv"


def test_fill_expressions():
template = _Template(None, template="No one VERB the ADJ NOUN!")
filled = str(
template.fill_expressions(
VERB="expects",
ADJ="Spanish",
NOUN="Inquisition",
)
)
assert filled == "No one expects the Spanish Inquisition!"


def test_fill_values():
context_template = _Template("context.py")
context_block = str(
context_template.fill_values(
CSV_PATH=fake_csv,
UNIT=1,
LOSS=1,
WEIGHTS=[1],
template = _Template(None, template="assert [STRING] * NUM == LIST")
filled = str(
template.fill_values(
STRING="🙂",
NUM=3,
LIST=["🙂", "🙂", "🙂"],
)
)
assert f"data=pl.scan_csv('{fake_csv}', encoding=\"utf8-lossy\")" in context_block
assert filled == "assert ['🙂'] * 3 == ['🙂', '🙂', '🙂']"


def test_fill_blocks():
Expand Down Expand Up @@ -65,15 +77,18 @@ def test_fill_template_unfilled_slots():
context_template = _Template("context.py")
with pytest.raises(
Exception,
match=re.escape("context.py has unfilled slots: CSV_PATH, LOSS, UNIT, WEIGHTS"),
match=re.escape(
"context.py has unfilled slots: "
"CSV_PATH, LOSS, PRIVACY_UNIT_BLOCK, WEIGHTS"
),
):
str(context_template.fill_values())


def test_make_notebook():
notebook = make_notebook_py(
csv_path=fake_csv,
unit=1,
contributions=1,
loss=1,
weights=[1],
)
Expand All @@ -84,14 +99,23 @@ def test_make_notebook():

def test_make_script():
script = make_script_py(
unit=1,
contributions=1,
loss=1,
weights=[1],
)

with NamedTemporaryFile(mode="w", delete=False) as fp:
def clear_empty_lines(text):
# Cleanup whitespace after indenting blocks
return re.sub(r"^\s+$", "", text, flags=re.MULTILINE).strip()

expected_script = (
Path(__file__).parent / "fixtures" / "expected-script.py"
).read_text()
assert clear_empty_lines(script) == clear_empty_lines(expected_script)

with NamedTemporaryFile(mode="w") as fp:
fp.write(script)
fp.close()
fp.flush()

result = subprocess.run(["python", fp.name, "--csv", fake_csv])
assert result.returncode == 0