Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

user provided contributions flows all the way to the generated code #41

Merged
merged 15 commits into from
Oct 10, 2024
10 changes: 4 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,14 @@ Building on what we've learned from [DP Creator](https://github.com/opendp/dpcre
## Usage

```
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--unit UNIT_OF_PRIVACY]
usage: dp-creator-ii [-h] [--csv CSV_PATH] [--contrib CONTRIB]

DP Creator II makes it easier to get started with Differential Privacy.

options:
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--unit UNIT_OF_PRIVACY
Unit of privacy: How many rows can an individual
contribute?
-h, --help show this help message and exit
--csv CSV_PATH Path to CSV containing private data
--contrib CONTRIB How many rows can an individual contribute?
```


Expand Down
8 changes: 5 additions & 3 deletions dp_creator_ii/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ def get_arg_parser():
help="Path to CSV containing private data",
)
parser.add_argument(
"--unit",
dest="unit_of_privacy",
"--contrib",
dest="contributions",
metavar="CONTRIB",
type=int,
help="Unit of privacy: How many rows can an individual contribute?",
default=1,
help="How many rows can an individual contribute?",
)
return parser

Expand Down
37 changes: 23 additions & 14 deletions dp_creator_ii/app/dataset_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,48 @@
from shiny import ui, reactive, render

from dp_creator_ii import get_arg_parser
from dp_creator_ii.app.ui_helpers import output_code_sample
from dp_creator_ii.template import make_privacy_unit_block


def get_args():
arg_parser = get_arg_parser()
if argv[1:3] == ["run", "--port"]:
# We are running a Playwright test,
# and ARGV is polluted, so override:
return arg_parser.parse_args([])
else:
# Normal parsing:
return arg_parser.parse_args()


def dataset_ui():
args = get_args()
ekraffmiller marked this conversation as resolved.
Show resolved Hide resolved

return ui.nav_panel(
"Select Dataset",
"TODO: Pick dataset",
ui.output_text("csv_path_text"),
ui.output_text("unit_of_privacy_text"),
ui.input_numeric("contributions", "Contributions", args.contributions),
output_code_sample("unit_of_privacy_python"),
ui.input_action_button("go_to_analysis", "Define analysis"),
value="dataset_panel",
)


def dataset_server(input, output, session):
if argv[1:3] == ["run", "--port"]:
# Started by playwright
arg_csv_path = None
arg_unit_of_privacy = None
else:
args = get_arg_parser().parse_args()
arg_csv_path = args.csv_path
arg_unit_of_privacy = args.unit_of_privacy
args = get_args()

csv_path = reactive.value(arg_csv_path)
unit_of_privacy = reactive.value(arg_unit_of_privacy)
csv_path = reactive.value(args.csv_path)

@render.text
def csv_path_text():
return str(csv_path.get())

@render.text
def unit_of_privacy_text():
return str(unit_of_privacy.get())
@render.code
def unit_of_privacy_python():
contributions = input.contributions()
return make_privacy_unit_block(contributions)

@reactive.effect
@reactive.event(input.go_to_analysis)
Expand Down
9 changes: 6 additions & 3 deletions dp_creator_ii/app/results_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ def results_server(input, output, session):
media_type="text/x-python",
)
async def download_script():
contributions = input.contributions()
script_py = make_script_py(
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand All @@ -32,9 +33,10 @@ async def download_script():
media_type="application/x-ipynb+json",
)
async def download_notebook_unexecuted():
contributions = input.contributions()
notebook_py = make_notebook_py(
csv_path="todo.csv",
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand All @@ -46,9 +48,10 @@ async def download_notebook_unexecuted():
media_type="application/x-ipynb+json",
)
async def download_notebook_executed():
contributions = input.contributions()
notebook_py = make_notebook_py(
csv_path="todo.csv",
unit=1,
contributions=contributions,
loss=1,
weights=[1],
)
Expand Down
9 changes: 9 additions & 0 deletions dp_creator_ii/app/ui_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from htmltools.tags import details, summary
from shiny import ui


def output_code_sample(name_of_render_function):
return details(
summary("Code sample"),
ui.output_code(name_of_render_function),
)
29 changes: 20 additions & 9 deletions dp_creator_ii/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,57 +57,68 @@ def __str__(self):
return self._template


def _make_context_for_notebook(csv_path, unit, loss, weights):
def _make_context_for_notebook(csv_path, contributions, loss, weights):
privacy_unit_block = make_privacy_unit_block(contributions)
return str(
_Template("context.py").fill_values(
_Template("context.py")
.fill_values(
CSV_PATH=csv_path,
UNIT=unit,
LOSS=loss,
WEIGHTS=weights,
)
.fill_blocks(
PRIVACY_UNIT_BLOCK=privacy_unit_block,
)
)


def _make_context_for_script(unit, loss, weights):
def _make_context_for_script(contributions, loss, weights):
privacy_unit_block = make_privacy_unit_block(contributions)
return str(
_Template("context.py")
.fill_expressions(
CSV_PATH="csv_path",
)
.fill_values(
UNIT=unit,
LOSS=loss,
WEIGHTS=weights,
)
.fill_blocks(
PRIVACY_UNIT_BLOCK=privacy_unit_block,
)
)


def _make_imports():
return str(_Template("imports.py").fill_values())


def make_notebook_py(csv_path, unit, loss, weights):
def make_notebook_py(csv_path, contributions, loss, weights):
return str(
_Template("notebook.py").fill_blocks(
IMPORTS_BLOCK=_make_imports(),
CONTEXT_BLOCK=_make_context_for_notebook(
csv_path=csv_path,
unit=unit,
contributions=contributions,
loss=loss,
weights=weights,
),
)
)


def make_script_py(unit, loss, weights):
def make_script_py(contributions, loss, weights):
return str(
_Template("script.py").fill_blocks(
IMPORTS_BLOCK=_make_imports(),
CONTEXT_BLOCK=_make_context_for_script(
unit=unit,
contributions=contributions,
loss=loss,
weights=weights,
),
)
)


def make_privacy_unit_block(contributions):
return str(_Template("privacy_unit.py").fill_values(CONTRIBUTIONS=contributions))
4 changes: 3 additions & 1 deletion dp_creator_ii/templates/context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
PRIVACY_UNIT_BLOCK

context = dp.Context.compositor(
data=pl.scan_csv(CSV_PATH, encoding="utf8-lossy"),
privacy_unit=dp.unit_of(contributions=UNIT),
privacy_unit=privacy_unit,
privacy_loss=dp.loss_of(epsilon=LOSS),
split_by_weights=WEIGHTS,
)
1 change: 1 addition & 0 deletions dp_creator_ii/templates/privacy_unit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
privacy_unit = dp.unit_of(contributions=CONTRIBUTIONS)
39 changes: 22 additions & 17 deletions dp_creator_ii/tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,42 @@
app = create_app_fixture("../app/__init__.py")


def expect_visible(page, text):
expect(page.get_by_text(text)).to_be_visible()


def expect_not_visible(page, text):
expect(page.get_by_text(text)).not_to_be_visible()


# TODO: Why is incomplete coverage reported here?
# https://github.com/opendp/dp-creator-ii/issues/18
def test_app(page: Page, app: ShinyAppProc): # pragma: no cover
def test_navigation(page: Page, app: ShinyAppProc): # pragma: no cover
pick_dataset_text = "TODO: Pick dataset"
perform_analysis_text = "TODO: Define analysis"
download_results_text = "TODO: Download results"

def expect_visible(text):
expect(page.get_by_text(text)).to_be_visible()

def expect_not_visible(text):
expect(page.get_by_text(text)).not_to_be_visible()

page.goto(app.url)
expect(page).to_have_title("DP Creator II")
expect_visible(pick_dataset_text)
expect_not_visible(perform_analysis_text)
expect_not_visible(download_results_text)
expect_visible(page, pick_dataset_text)
expect_not_visible(page, perform_analysis_text)
expect_not_visible(page, download_results_text)
page.get_by_label("Contributions").fill("42")
page.get_by_text("Code sample").click()
expect_visible(page, "dp.unit_of(contributions=42)")

page.get_by_role("button", name="Define analysis").click()
expect_not_visible(pick_dataset_text)
expect_visible(perform_analysis_text)
expect_not_visible(download_results_text)
expect_not_visible(page, pick_dataset_text)
expect_visible(page, perform_analysis_text)
expect_not_visible(page, download_results_text)

page.get_by_role("button", name="Download results").click()
expect_not_visible(pick_dataset_text)
expect_not_visible(perform_analysis_text)
expect_visible(download_results_text)
expect_not_visible(page, pick_dataset_text)
expect_not_visible(page, perform_analysis_text)
expect_visible(page, download_results_text)

with page.expect_download() as download_info:
page.get_by_text("Download script").click()
download = download_info.value
script = download.path().read_text()
assert "privacy_unit=dp.unit_of(contributions=1)" in script
assert "privacy_unit = dp.unit_of(contributions=42)" in script
9 changes: 6 additions & 3 deletions dp_creator_ii/tests/test_template.py
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes in this file for the PR look good, but I wanted to ask about test_fill_values() - can there be more assertions here, for the other lines in the context_block? Also in test_make_script(), the test is checking that the python script runs without error, but it doesn't check the contents of the script. Even though you are testing the other functions individually, it could be good to test that the final script has the correct contents.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For test_fill_values, I was thinking of it as a unit test of the template filling, so I think the best resolution is to make it simpler, so it only does that.

I will make test_make_script stronger, since it is more of an integration test. Thanks!

Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,18 @@ def test_fill_template_unfilled_slots():
context_template = _Template("context.py")
with pytest.raises(
Exception,
match=re.escape("context.py has unfilled slots: CSV_PATH, LOSS, UNIT, WEIGHTS"),
match=re.escape(
"context.py has unfilled slots: "
"CSV_PATH, LOSS, PRIVACY_UNIT_BLOCK, WEIGHTS"
),
):
str(context_template.fill_values())


def test_make_notebook():
notebook = make_notebook_py(
csv_path=fake_csv,
unit=1,
contributions=1,
loss=1,
weights=[1],
)
Expand All @@ -84,7 +87,7 @@ def test_make_notebook():

def test_make_script():
script = make_script_py(
unit=1,
contributions=1,
loss=1,
weights=[1],
)
Expand Down