From 4305b14566c8051d55077f89bb6d1bc7fba246b0 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Mon, 2 Dec 2024 17:11:50 -0500 Subject: [PATCH] CSV report (#182) * use original column names in our report * flatten util * checkpoint on CSV report; tests not passing * csv report works * add a test * button grid * factor out button function * update test to match --- dp_wizard/app/results_panel.py | 51 +++++++++++++++---- dp_wizard/tmp/.gitignore | 1 + dp_wizard/utils/code_generators/__init__.py | 15 +++--- .../code_generators/no-tests/_report_kv.py | 2 +- .../code_generators/no-tests/_reports.py | 41 ++++++++++++++- tests/test_app.py | 19 +++++-- 6 files changed, 103 insertions(+), 26 deletions(-) diff --git a/dp_wizard/app/results_panel.py b/dp_wizard/app/results_panel.py index 5d5ce02..fa02296 100644 --- a/dp_wizard/app/results_panel.py +++ b/dp_wizard/app/results_panel.py @@ -1,6 +1,8 @@ from pathlib import Path from shiny import ui, render, reactive, Inputs, Outputs, Session +from faicons import icon_svg +from htmltools.tags import table, tr, td from dp_wizard.utils.code_generators import ( NotebookGenerator, @@ -14,21 +16,35 @@ wait_message = "Please wait." +def td_button(name: str, ext: str, icon: str): + function_name = f'download_{name.lower().replace(" ", "_")}' + return ( + td( + ui.download_button( + function_name, + [ + icon_svg(icon, margin_right="0.5em"), + f"Download {name} ({ext})", + ], + width="20em", + ) + ), + ) + + def results_ui(): return ui.nav_panel( "Download results", ui.markdown("You can now make a differentially private release of your data."), - ui.download_button( - "download_report", - "Download Report (.txt)", - ), - ui.download_button( - "download_script", - "Download Script (.py)", - ), - ui.download_button( - "download_notebook", - "Download Notebook (.ipynb)", + table( + tr( + td_button("Notebook", ".ipynb", "book"), + td_button("Script", ".py", "python"), + ), + tr( + td_button("Report", ".txt", "file-lines"), + td_button("Table", ".csv", "file-csv"), + ), ), value="results_panel", ) @@ -106,3 +122,16 @@ async def download_report(): Path(__file__).parent.parent / "tmp" / "report.txt" ).read_text() yield report_txt + + @render.download( + filename="dp-wizard-report.csv", + media_type="text/plain", + ) + async def download_table(): + with ui.Progress() as progress: + progress.set(message=wait_message) + notebook_nb() # Evaluate just for the side effect of creating report. + report_csv = ( + Path(__file__).parent.parent / "tmp" / "report.csv" + ).read_text() + yield report_csv diff --git a/dp_wizard/tmp/.gitignore b/dp_wizard/tmp/.gitignore index 1e64c47..8f15d7b 100644 --- a/dp_wizard/tmp/.gitignore +++ b/dp_wizard/tmp/.gitignore @@ -1,2 +1,3 @@ demo.csv report.txt +report.csv diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py index 92d62f1..d1e02ca 100644 --- a/dp_wizard/utils/code_generators/__init__.py +++ b/dp_wizard/utils/code_generators/__init__.py @@ -120,25 +120,25 @@ def _make_context(self): return str(self._make_partial_context().fill_values(CSV_PATH=self.csv_path)) def _make_extra_blocks(self): - identifiers = [name_to_identifier(name) for name in self.columns.keys()] outputs_expression = ( "{" + ",".join( str( Template("report_kv") .fill_values( - IDENTIFIER=id, + NAME=name, CONFIDENCE=confidence, ) .fill_expressions( - IDENTIFIER_HISTOGRAM=f"{id}_histogram", - IDENTIFIER_ACCURACY=f"{id}_accuracy", + IDENTIFIER_HISTOGRAM=f"{name_to_identifier(name)}_histogram", + IDENTIFIER_ACCURACY=f"{name_to_identifier(name)}_accuracy", ) ) - for id in identifiers + for name in self.columns.keys() ) + "}" ) + tmp_path = Path(__file__).parent.parent.parent / "tmp" reports_block = str( Template("reports") .fill_expressions( @@ -146,9 +146,8 @@ def _make_extra_blocks(self): ) .fill_values( CSV_PATH=self.csv_path, - REPORT_PATH=str( - Path(__file__).parent.parent.parent / "tmp" / "report.txt" - ), + TXT_REPORT_PATH=str(tmp_path / "report.txt"), + CSV_REPORT_PATH=str(tmp_path / "report.csv"), ) ) return {"REPORTS_BLOCK": reports_block} diff --git a/dp_wizard/utils/code_generators/no-tests/_report_kv.py b/dp_wizard/utils/code_generators/no-tests/_report_kv.py index 544c85a..b27bf5f 100644 --- a/dp_wizard/utils/code_generators/no-tests/_report_kv.py +++ b/dp_wizard/utils/code_generators/no-tests/_report_kv.py @@ -1,4 +1,4 @@ -IDENTIFIER: { +NAME: { "histogram": dict(zip(*df_to_columns(IDENTIFIER_HISTOGRAM))), "accuracy": IDENTIFIER_ACCURACY, "confidence": CONFIDENCE, diff --git a/dp_wizard/utils/code_generators/no-tests/_reports.py b/dp_wizard/utils/code_generators/no-tests/_reports.py index 3b4c84b..bfbbcb1 100644 --- a/dp_wizard/utils/code_generators/no-tests/_reports.py +++ b/dp_wizard/utils/code_generators/no-tests/_reports.py @@ -1,5 +1,38 @@ from yaml import dump from pathlib import Path +import csv + + +# https://stackoverflow.com/a/6027615/10727889 +def flatten_dict(dictionary, parent_key=""): + """ + Walk tree to return flat dictionary. + >>> from pprint import pp + >>> pp(flatten_dict({ + ... "inputs": { + ... "data": "fake.csv" + ... }, + ... "outputs": { + ... "a column": { + ... "(0, 1]": 24, + ... "(1, 2]": 42, + ... } + ... } + ... })) + {'inputs: data': 'fake.csv', + 'outputs: a column: (0, 1]': 24, + 'outputs: a column: (1, 2]': 42} + """ + separator = ": " + items = [] + for key, value in dictionary.items(): + new_key = parent_key + separator + key if parent_key else key + if isinstance(value, dict): + items.extend(flatten_dict(value, new_key).items()) + else: + items.append((new_key, value)) + return dict(items) + report = { "inputs": { @@ -9,4 +42,10 @@ } print(dump(report)) -Path(REPORT_PATH).write_text(dump(report)) +Path(TXT_REPORT_PATH).write_text(dump(report)) + +flat_report = flatten_dict(report) +with Path(CSV_REPORT_PATH).open(mode="w", newline="") as handle: + writer = csv.writer(handle) + for kv_pair in flat_report.items(): + writer.writerow(kv_pair) diff --git a/tests/test_app.py b/tests/test_app.py index 93c0ed6..64c46cc 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -117,14 +117,23 @@ def expect_no_error(): expect_visible(download_results_text) expect_no_error() - # Notebook: - with page.expect_download() as report_download_info: - page.get_by_text("Download report").click() + # Text Report: + with page.expect_download() as text_report_download_info: + page.get_by_text("Download report (.txt)").click() + expect_no_error() + + report_download = text_report_download_info.value + report = report_download.path().read_text() + assert "confidence: 0.95" in report + + # CSV Report: + with page.expect_download() as csv_report_download_info: + page.get_by_text("Download table (.csv)").click() expect_no_error() - report_download = report_download_info.value + report_download = csv_report_download_info.value report = report_download.path().read_text() - assert "inputs:" in report + assert "outputs: grade: confidence,0.95" in report # Script: with page.expect_download() as script_download_info: