Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Text (yaml) report for download #181

Merged
merged 18 commits into from
Dec 3, 2024
Merged
85 changes: 73 additions & 12 deletions dp_wizard/app/results_panel.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from pathlib import Path

from shiny import ui, render, reactive, Inputs, Outputs, Session
from faicons import icon_svg
from htmltools.tags import table, tr, td

from dp_wizard.utils.code_generators import (
NotebookGenerator,
Expand All @@ -9,17 +13,38 @@
from dp_wizard.utils.converters import convert_py_to_nb


wait_message = "Please wait."


def td_button(name: str, ext: str, icon: str):
function_name = f'download_{name.lower().replace(" ", "_")}'
return (
td(
ui.download_button(
function_name,
[
icon_svg(icon, margin_right="0.5em"),
f"Download {name} ({ext})",
],
width="20em",
)
),
)


def results_ui():
return ui.nav_panel(
"Download results",
ui.markdown("You can now make a differentially private release of your data."),
ui.download_button(
"download_script",
"Download Script (.py)",
),
ui.download_button(
"download_notebook",
"Download Notebook (.ipynb)",
table(
tr(
td_button("Notebook", ".ipynb", "book"),
td_button("Script", ".py", "python"),
),
tr(
td_button("Report", ".txt", "file-lines"),
td_button("Table", ".csv", "file-csv"),
),
),
value="results_panel",
)
Expand Down Expand Up @@ -58,19 +83,55 @@ def analysis_plan() -> AnalysisPlan:
columns=columns,
)

@reactive.calc
def notebook_nb():
# This creates the notebook, and evaluates it,
# and drops reports in the tmp dir.
# Could be slow!
# Luckily, reactive calcs are lazy.
notebook_py = NotebookGenerator(analysis_plan()).make_py()
return convert_py_to_nb(notebook_py, execute=True)

@render.download(
filename="dp-wizard-script.py",
media_type="text/x-python",
)
async def download_script():
script_py = ScriptGenerator(analysis_plan()).make_py()
yield script_py
with ui.Progress() as progress:
progress.set(message=wait_message)
yield ScriptGenerator(analysis_plan()).make_py()

@render.download(
filename="dp-wizard-notebook.ipynb",
media_type="application/x-ipynb+json",
)
async def download_notebook():
notebook_py = NotebookGenerator(analysis_plan()).make_py()
notebook_nb = convert_py_to_nb(notebook_py, execute=True)
yield notebook_nb
with ui.Progress() as progress:
progress.set(message=wait_message)
yield notebook_nb()

@render.download(
filename="dp-wizard-report.txt",
media_type="text/plain",
)
async def download_report():
with ui.Progress() as progress:
progress.set(message=wait_message)
notebook_nb() # Evaluate just for the side effect of creating report.
report_txt = (
Path(__file__).parent.parent / "tmp" / "report.txt"
).read_text()
yield report_txt

@render.download(
filename="dp-wizard-report.csv",
media_type="text/plain",
)
async def download_table():
with ui.Progress() as progress:
progress.set(message=wait_message)
notebook_nb() # Evaluate just for the side effect of creating report.
report_csv = (
Path(__file__).parent.parent / "tmp" / "report.csv"
).read_text()
yield report_csv
2 changes: 2 additions & 0 deletions dp_wizard/tmp/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
demo.csv
report.txt
report.csv
50 changes: 45 additions & 5 deletions dp_wizard/utils/code_generators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,17 @@ def __init__(self, analysis_plan: AnalysisPlan):
@abstractmethod
def _make_context(self) -> str: ... # pragma: no cover

def _make_extra_blocks(self):
return {}

def make_py(self):
return str(
Template(self.root_template).fill_blocks(
IMPORTS_BLOCK=_make_imports(),
COLUMNS_BLOCK=self._make_columns(self.columns),
COLUMNS_BLOCK=self._make_columns(),
CONTEXT_BLOCK=self._make_context(),
QUERIES_BLOCK=self._make_queries(self.columns.keys()),
QUERIES_BLOCK=self._make_queries(),
**self._make_extra_blocks(),
)
)

Expand All @@ -66,22 +70,23 @@ def _make_margins_dict(self, bin_names: Iterable[str]):
margins_dict = "{" + "".join(margins) + "\n }"
return margins_dict

def _make_columns(self, columns: dict[str, AnalysisPlanColumn]):
def _make_columns(self):
return "\n".join(
make_column_config_block(
name=name,
lower_bound=col.lower_bound,
upper_bound=col.upper_bound,
bin_count=col.bin_count,
)
for name, col in columns.items()
for name, col in self.columns.items()
)

def _make_queries(self, column_names: Iterable[str]):
def _make_queries(self):
confidence_note = (
"The actual value is within the shown range "
f"with {int(confidence * 100)}% confidence."
)
column_names = self.columns.keys()
return f"confidence = {confidence} # {confidence_note}\n\n" + "\n".join(
_make_query(column_name) for column_name in column_names
)
Expand Down Expand Up @@ -115,6 +120,41 @@ class NotebookGenerator(_CodeGenerator):
def _make_context(self):
return str(self._make_partial_context().fill_values(CSV_PATH=self.csv_path))

def _make_extra_blocks(self):
outputs_expression = (
"{"
+ ",".join(
str(
Template("report_kv")
.fill_values(
NAME=name,
CONFIDENCE=confidence,
)
.fill_expressions(
IDENTIFIER_HISTOGRAM=f"{name_to_identifier(name)}_histogram",
IDENTIFIER_ACCURACY=f"{name_to_identifier(name)}_accuracy",
)
)
for name in self.columns.keys()
)
+ "}"
)
tmp_path = Path(__file__).parent.parent.parent / "tmp"
reports_block = str(
Template("reports")
.fill_expressions(
OUTPUTS=outputs_expression,
COLUMNS={k: v._asdict() for k, v in self.columns.items()},
)
.fill_values(
CSV_PATH=self.csv_path,
EPSILON=self.epsilon,
TXT_REPORT_PATH=str(tmp_path / "report.txt"),
CSV_REPORT_PATH=str(tmp_path / "report.csv"),
)
)
return {"REPORTS_BLOCK": reports_block}


class ScriptGenerator(_CodeGenerator):
root_template = "script"
Expand Down
6 changes: 6 additions & 0 deletions dp_wizard/utils/code_generators/no-tests/_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@
# +
QUERIES_BLOCK
# -

# The code below produces a summary report.

# +
REPORTS_BLOCK
# -
5 changes: 5 additions & 0 deletions dp_wizard/utils/code_generators/no-tests/_report_kv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
NAME: {
"histogram": dict(zip(*df_to_columns(IDENTIFIER_HISTOGRAM))),
"accuracy": IDENTIFIER_ACCURACY,
"confidence": CONFIDENCE,
}
53 changes: 53 additions & 0 deletions dp_wizard/utils/code_generators/no-tests/_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from yaml import dump
from pathlib import Path
import csv


# https://stackoverflow.com/a/6027615/10727889
def flatten_dict(dictionary, parent_key=""):
"""
Walk tree to return flat dictionary.
>>> from pprint import pp
>>> pp(flatten_dict({
... "inputs": {
... "data": "fake.csv"
... },
... "outputs": {
... "a column": {
... "(0, 1]": 24,
... "(1, 2]": 42,
... }
... }
... }))
{'inputs: data': 'fake.csv',
'outputs: a column: (0, 1]': 24,
'outputs: a column: (1, 2]': 42}
"""
separator = ": "
items = []
for key, value in dictionary.items():
new_key = parent_key + separator + key if parent_key else key
if isinstance(value, dict):
items.extend(flatten_dict(value, new_key).items())
else:
items.append((new_key, value))
return dict(items)


report = {
"inputs": {
"data": CSV_PATH,
"epsilon": EPSILON,
"columns": COLUMNS,
},
"outputs": OUTPUTS,
}

print(dump(report))
Path(TXT_REPORT_PATH).write_text(dump(report))

flat_report = flatten_dict(report)
with Path(CSV_REPORT_PATH).open(mode="w", newline="") as handle:
writer = csv.writer(handle)
for kv_pair in flat_report.items():
writer.writerow(kv_pair)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies = [
"jupyter-client",
"nbconvert",
"ipykernel",
"pyyaml",
]

[project.scripts]
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ scipy<1.14
# Conversion:
jupytext
jupyter-client
pyyaml
nbconvert
ipykernel
# May also require:
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ python-slugify==8.0.4
# via pytest-playwright
pyyaml==6.0.2
# via
# -r requirements-dev.in
# jupytext
# pre-commit
pyzmq==26.2.0
Expand Down
34 changes: 31 additions & 3 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,42 @@ def expect_no_error():
expect_visible(download_results_text)
expect_no_error()

with page.expect_download() as download_info:
# Text Report:
with page.expect_download() as text_report_download_info:
page.get_by_text("Download report (.txt)").click()
expect_no_error()

report_download = text_report_download_info.value
report = report_download.path().read_text()
assert "confidence: 0.95" in report

# CSV Report:
with page.expect_download() as csv_report_download_info:
page.get_by_text("Download table (.csv)").click()
expect_no_error()

report_download = csv_report_download_info.value
report = report_download.path().read_text()
assert "outputs: grade: confidence,0.95" in report

# Script:
with page.expect_download() as script_download_info:
page.get_by_text("Download script").click()
expect_no_error()

download = download_info.value
script = download.path().read_text()
script_download = script_download_info.value
script = script_download.path().read_text()
assert "privacy_unit = dp.unit_of(contributions=42)" in script

# Notebook:
with page.expect_download() as notebook_download_info:
page.get_by_text("Download notebook").click()
expect_no_error()

notebook_download = notebook_download_info.value
notebook = notebook_download.path().read_text()
assert "privacy_unit = dp.unit_of(contributions=42)" in notebook

# -- Feedback --
page.get_by_text("Feedback").click()
iframe = page.locator("#feedback-iframe")
Expand Down
Loading