From 3b89001a19034100110427825583990fe3eab02f Mon Sep 17 00:00:00 2001
From: Chuck McCallum <mccalluc@users.noreply.github.com>
Date: Mon, 18 Nov 2024 09:22:00 -0500
Subject: [PATCH] Make notebook with plots for columns (#152)

* lower and upper more consistently

* one more

* handle bounds/bins/counts the same way

* lots of reactive dicts, but the UI has not changed

* data dump on the results page

* add a pragma: no cover

* reset widget values after checkbox change

* do not clean up values

* put tooltips in labels

* pull warning up to analysis panel. TODO: conditional

* move warning to bottom of list

* analysis definition JSON

* stubs for python

* stub a script on results page

* include column info in generated script

* closer to a runable notebook

* stuck on split_by_weight... maybe a library bug?

* margin stubs

* format python identifiers correctly

* script has gotten longer: does not make sense to check for exact equality

* fix syntactic problems in generated code

* fill in columns, but still WIP

* fix column names; tests pass

* move confidence

* simplify download panel

* add markdown cells

* tidy up

* fix copy-paste of util functions

* sort the intervals
---
 WHAT-WE-LEARNED.md                            |   2 +
 dp_wizard/app/components/column_module.py     |   2 +-
 dp_wizard/app/components/plots.py             |  29 -----
 dp_wizard/app/results_panel.py                | 108 +++++++++---------
 dp_wizard/utils/csv_helper.py                 |   8 +-
 dp_wizard/utils/dp_helper.py                  |  21 +---
 dp_wizard/utils/shared.py                     |  57 +++++++++
 dp_wizard/utils/templates/__init__.py         | 103 +++++++++++++++--
 .../templates/no-tests/_column_config.py      |   8 +-
 .../utils/templates/no-tests/_context.py      |   3 +-
 .../utils/templates/no-tests/_imports.py      |   3 +
 .../utils/templates/no-tests/_notebook.py     |  35 +++++-
 dp_wizard/utils/templates/no-tests/_query.py  |   4 +
 dp_wizard/utils/templates/no-tests/_script.py |   5 +-
 tests/fixtures/expected-script.py             |  31 -----
 tests/utils/test_templates.py                 |  34 +++---
 16 files changed, 290 insertions(+), 163 deletions(-)
 delete mode 100644 dp_wizard/app/components/plots.py
 create mode 100644 dp_wizard/utils/shared.py
 create mode 100644 dp_wizard/utils/templates/no-tests/_query.py
 delete mode 100644 tests/fixtures/expected-script.py

diff --git a/WHAT-WE-LEARNED.md b/WHAT-WE-LEARNED.md
index dff73d5..812b320 100644
--- a/WHAT-WE-LEARNED.md
+++ b/WHAT-WE-LEARNED.md
@@ -46,6 +46,8 @@ but that returns an error:
 Renderer.__call__() missing 1 required positional argument: '_fn'
 ```
 
+If I just refer to a reactive calc directly in the UI there is no error in the log, just a spinner in the UI.
+
 ## No component testing
 
 It feels like a gap in the library that there is no component testing. The only advice is to pull out testable logic from the server functions, and for the rest, use end-to-end tests: There's not a recommended way to test the ui+server interaction for just one component.
diff --git a/dp_wizard/app/components/column_module.py b/dp_wizard/app/components/column_module.py
index a84a13d..3e152bf 100644
--- a/dp_wizard/app/components/column_module.py
+++ b/dp_wizard/app/components/column_module.py
@@ -3,7 +3,7 @@
 from shiny import ui, render, module, reactive
 
 from dp_wizard.utils.dp_helper import make_confidence_accuracy_histogram
-from dp_wizard.app.components.plots import plot_histogram
+from dp_wizard.utils.shared import plot_histogram
 from dp_wizard.utils.templates import make_column_config_block
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
 
diff --git a/dp_wizard/app/components/plots.py b/dp_wizard/app/components/plots.py
deleted file mode 100644
index bd017b6..0000000
--- a/dp_wizard/app/components/plots.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import matplotlib.pyplot as plt
-
-
-def _df_to_columns(df):
-    """
-    >>> import polars as pl
-    >>> df = pl.DataFrame({
-    ...     "bin": ["A", "B", "C"],
-    ...     "len": [0, 10, 20],
-    ... })
-    >>> _df_to_columns(df)
-    (['A', 'B', 'C'], [0, 10, 20])
-    """
-    return tuple(list(df[col]) for col in df.columns)
-
-
-def plot_histogram(histogram_df, error, cutoff):  # pragma: no cover
-    bins, values = _df_to_columns(histogram_df)
-    mod = (len(bins) // 12) + 1
-    majors = [label for i, label in enumerate(bins) if i % mod == 0]
-    minors = [label for i, label in enumerate(bins) if i % mod != 0]
-    _figure, axes = plt.subplots()
-    bar_colors = ["blue" if v > cutoff else "lightblue" for v in values]
-    axes.bar(bins, values, color=bar_colors, yerr=error)
-    axes.set_xticks(majors, majors)
-    axes.set_xticks(minors, ["" for _ in minors], minor=True)
-    axes.axhline(cutoff, color="lightgrey", zorder=-1)
-    axes.set_ylim(bottom=0)
-    # TODO: Since this seems to return None, how does the information flow?
diff --git a/dp_wizard/app/results_panel.py b/dp_wizard/app/results_panel.py
index b50aca1..58a23a6 100644
--- a/dp_wizard/app/results_panel.py
+++ b/dp_wizard/app/results_panel.py
@@ -1,6 +1,6 @@
 from json import dumps
 
-from shiny import ui, render
+from shiny import ui, render, reactive
 
 from dp_wizard.utils.templates import make_notebook_py, make_script_py
 from dp_wizard.utils.converters import convert_py_to_nb
@@ -9,24 +9,13 @@
 def results_ui():
     return ui.nav_panel(
         "Download results",
-        ui.p("TODO: Use this information to fill in a template!"),
-        ui.output_code("data_dump"),
-        ui.markdown(
-            "You can now make a differentially private release of your data. "
-            "This will lock the configuration you’ve provided on the previous pages."
-        ),
-        ui.markdown("TODO: Button: “Download Report (.txt)” (implemented as yaml?)"),
-        ui.markdown("TODO: Button: “Download Report (.csv)"),
-        ui.markdown(
-            "You can also download code that can be executed to produce a DP release. "
-            "Downloaded code does not lock the configuration."
-        ),
+        ui.markdown("You can now make a differentially private release of your data."),
         ui.download_button(
             "download_script",
             "Download Script (.py)",
         ),
         ui.download_button(
-            "download_notebook_unexecuted",
+            "download_notebook",
             "Download Notebook (.ipynb)",
         ),
         value="results_panel",
@@ -45,32 +34,64 @@ def results_server(
     weights,
     epsilon,
 ):  # pragma: no cover
-    @render.code
-    def data_dump():
-        # TODO: Use this information in a template!
+    @reactive.calc
+    def analysis_dict():
+        # weights().keys() will reflect the desired columns:
+        # The others retain inactive columns, so user
+        # inputs aren't lost when toggling checkboxes.
+        columns = {
+            col: {
+                "lower_bound": lower_bounds()[col],
+                "upper_bound": upper_bounds()[col],
+                "bin_count": int(bin_counts()[col]),
+                # TODO: Floats should work for weight, but they don't:
+                # https://github.com/opendp/opendp/issues/2140
+                "weight": int(weights()[col]),
+            }
+            for col in weights().keys()
+        }
+        return {
+            "csv_path": csv_path(),
+            "contributions": contributions(),
+            "epsilon": epsilon(),
+            "columns": columns,
+        }
+
+    @reactive.calc
+    def analysis_json():
         return dumps(
-            {
-                "csv_path": csv_path(),
-                "contributions": contributions(),
-                "lower_bounds": lower_bounds(),
-                "upper_bounds": upper_bounds(),
-                "bin_counts": bin_counts(),
-                "weights": weights(),
-                "epsilon": epsilon(),
-            },
+            analysis_dict(),
             indent=2,
         )
 
+    @render.text
+    def analysis_json_text():
+        return analysis_json()
+
+    @reactive.calc
+    def analysis_python():
+        analysis = analysis_dict()
+        return make_notebook_py(
+            csv_path=analysis["csv_path"],
+            contributions=analysis["contributions"],
+            epsilon=analysis["epsilon"],
+            columns=analysis["columns"],
+        )
+
+    @render.text
+    def analysis_python_text():
+        return analysis_python()
+
     @render.download(
         filename="dp-wizard-script.py",
         media_type="text/x-python",
     )
     async def download_script():
-        contributions = input.contributions()
+        analysis = analysis_dict()
         script_py = make_script_py(
-            contributions=contributions,
-            epsilon=1,
-            weights=[1],
+            contributions=analysis["contributions"],
+            epsilon=analysis["epsilon"],
+            columns=analysis["columns"],
         )
         yield script_py
 
@@ -78,28 +99,13 @@ async def download_script():
         filename="dp-wizard-notebook.ipynb",
         media_type="application/x-ipynb+json",
     )
-    async def download_notebook_unexecuted():
-        contributions = input.contributions()
-        notebook_py = make_notebook_py(
-            csv_path="todo.csv",
-            contributions=contributions,
-            epsilon=1,
-            weights=[1],
-        )
-        notebook_nb = convert_py_to_nb(notebook_py)
-        yield notebook_nb
-
-    @render.download(
-        filename="dp-wizard-notebook-executed.ipynb",
-        media_type="application/x-ipynb+json",
-    )
-    async def download_notebook_executed():
-        contributions = input.contributions()
+    async def download_notebook():
+        analysis = analysis_dict()
         notebook_py = make_notebook_py(
-            csv_path="todo.csv",
-            contributions=contributions,
-            epsilon=1,
-            weights=[1],
+            csv_path=analysis["csv_path"],
+            contributions=analysis["contributions"],
+            epsilon=analysis["epsilon"],
+            columns=analysis["columns"],
         )
         notebook_nb = convert_py_to_nb(notebook_py, execute=True)
         yield notebook_nb
diff --git a/dp_wizard/utils/csv_helper.py b/dp_wizard/utils/csv_helper.py
index 4c279ee..b7b92dd 100644
--- a/dp_wizard/utils/csv_helper.py
+++ b/dp_wizard/utils/csv_helper.py
@@ -2,9 +2,11 @@
 We'll use the following terms consistently throughout the application:
 - name: This is the exact column header in the CSV.
 - label: This is the string we'll display.
-- id: This is the string we'll pass as a module ID.
+- id: This is the opaque string we'll pass as a module ID.
+- identifier: This is a form that can be used as a Python identifier.
 """
 
+import re
 import polars as pl
 
 
@@ -34,3 +36,7 @@ def name_to_id(name):
     # Shiny is fussy about module IDs,
     # but we don't need them to be human readable.
     return str(hash(name)).replace("-", "_")
+
+
+def name_to_identifier(name):
+    return re.sub(r"\W+", "_", name).lower()
diff --git a/dp_wizard/utils/dp_helper.py b/dp_wizard/utils/dp_helper.py
index b7949bd..f7ec1d9 100644
--- a/dp_wizard/utils/dp_helper.py
+++ b/dp_wizard/utils/dp_helper.py
@@ -2,28 +2,11 @@
 import opendp.prelude as dp
 
 from dp_wizard.utils.mock_data import mock_data, ColumnDef
+from dp_wizard.utils.shared import make_cut_points
 
 dp.enable_features("contrib")
 
 
-def _make_cut_points(lower, upper, bin_count):
-    """
-    Returns one more cut point than the bin_count.
-    (There are actually two more bins, extending to
-    -inf and +inf, but we'll ignore those.)
-    Cut points are evenly spaced from lower to upper.
-
-    >>> _make_cut_points(0, 10, 1)
-    [0.0, 10.0]
-    >>> _make_cut_points(0, 10, 2)
-    [0.0, 5.0, 10.0]
-    >>> _make_cut_points(0, 10, 3)
-    [0.0, 3.33, 6.67, 10.0]
-    """
-    bin_width = (upper - lower) / bin_count
-    return [round(lower + i * bin_width, 2) for i in range(bin_count + 1)]
-
-
 def make_confidence_accuracy_histogram(
     lower=None, upper=None, bin_count=None, contributions=None, weighted_epsilon=None
 ):
@@ -58,7 +41,7 @@ def make_confidence_accuracy_histogram(
     # TODO: When this is stable, merge it to templates, so we can be
     # sure that we're using the same code in the preview that we
     # use in the generated notebook.
-    cut_points = _make_cut_points(lower, upper, bin_count)
+    cut_points = make_cut_points(lower, upper, bin_count)
     context = dp.Context.compositor(
         data=pl.LazyFrame(df).with_columns(
             # The cut() method returns a Polars categorical type.
diff --git a/dp_wizard/utils/shared.py b/dp_wizard/utils/shared.py
new file mode 100644
index 0000000..75719ff
--- /dev/null
+++ b/dp_wizard/utils/shared.py
@@ -0,0 +1,57 @@
+# These functions are used both in the application and in generated notebooks.
+
+
+def make_cut_points(lower_bound, upper_bound, bin_count):
+    """
+    Returns one more cut point than the bin_count.
+    (There are actually two more bins, extending to
+    -inf and +inf, but we'll ignore those.)
+    Cut points are evenly spaced from lower_bound to upper_bound.
+    >>> make_cut_points(0, 10, 2)
+    [0.0, 5.0, 10.0]
+    """
+    bin_width = (upper_bound - lower_bound) / bin_count
+    return [round(lower_bound + i * bin_width, 2) for i in range(bin_count + 1)]
+
+
+def interval_bottom(interval):
+    """
+    >>> interval_bottom("(10, 20]")
+    10.0
+    """
+    return float(interval.split(",")[0][1:])
+
+
+def df_to_columns(df):
+    """
+    Transform a Dataframe into a format that is easier to plot,
+    parsing the interval strings to sort them as numbers.
+    >>> import polars as pl
+    >>> df = pl.DataFrame({
+    ...     "bin": ["(-inf, 5]", "(10, 20]", "(5, 10]"],
+    ...     "len": [0, 20, 10],
+    ... })
+    >>> df_to_columns(df)
+    (('(-inf, 5]', '(5, 10]', '(10, 20]'), (0, 10, 20))
+    """
+    sorted_rows = sorted(df.rows(), key=lambda pair: interval_bottom(pair[0]))
+    return tuple(zip(*sorted_rows))
+
+
+def plot_histogram(histogram_df, error, cutoff):  # pragma: no cover
+    """
+    Given a Dataframe for a histogram, plot the data.
+    """
+    import matplotlib.pyplot as plt
+
+    bins, values = df_to_columns(histogram_df)
+    mod = (len(bins) // 12) + 1
+    majors = [label for i, label in enumerate(bins) if i % mod == 0]
+    minors = [label for i, label in enumerate(bins) if i % mod != 0]
+    _figure, axes = plt.subplots()
+    bar_colors = ["blue" if v > cutoff else "lightblue" for v in values]
+    axes.bar(bins, values, color=bar_colors, yerr=error)
+    axes.set_xticks(majors, majors)
+    axes.set_xticks(minors, ["" for _ in minors], minor=True)
+    axes.axhline(cutoff, color="lightgrey", zorder=-1)
+    axes.set_ylim(bottom=0)
diff --git a/dp_wizard/utils/templates/__init__.py b/dp_wizard/utils/templates/__init__.py
index 8251890..591e1eb 100644
--- a/dp_wizard/utils/templates/__init__.py
+++ b/dp_wizard/utils/templates/__init__.py
@@ -7,6 +7,7 @@
 
 from pathlib import Path
 import re
+from dp_wizard.utils.csv_helper import name_to_identifier
 
 
 class _Template:
@@ -70,11 +71,41 @@ def __str__(self):
         return self._template
 
 
-def _make_context_for_notebook(csv_path, contributions, epsilon, weights):
+def _make_margins_dict(bin_names):
+    # TODO: Don't worry too much about the formatting here.
+    # Plan to run the output through black for consistency.
+    # https://github.com/opendp/dp-creator-ii/issues/50
+    margins = (
+        [
+            """
+        (): dp.polars.Margin(
+            public_info="lengths",
+        ),"""
+        ]
+        + [
+            f"""
+        ("{bin_name}",): dp.polars.Margin(
+            public_info="keys",
+        ),"""
+            for bin_name in bin_names
+        ]
+    )
+
+    margins_dict = "{" + "".join(margins) + "\n    }"
+    return margins_dict
+
+
+def _make_context_for_notebook(csv_path, contributions, epsilon, weights, column_names):
     privacy_unit_block = make_privacy_unit_block(contributions)
     privacy_loss_block = make_privacy_loss_block(epsilon)
+    margins_dict = _make_margins_dict([f"{name}_bin" for name in column_names])
+    columns = ", ".join([f"{name}_config" for name in column_names])
     return str(
         _Template("context")
+        .fill_expressions(
+            MARGINS_DICT=margins_dict,
+            COLUMNS=columns,
+        )
         .fill_values(
             CSV_PATH=csv_path,
             WEIGHTS=weights,
@@ -86,13 +117,17 @@ def _make_context_for_notebook(csv_path, contributions, epsilon, weights):
     )
 
 
-def _make_context_for_script(contributions, epsilon, weights):
+def _make_context_for_script(contributions, epsilon, weights, column_names):
     privacy_unit_block = make_privacy_unit_block(contributions)
     privacy_loss_block = make_privacy_loss_block(epsilon)
+    margins_dict = _make_margins_dict([f"{name}_bin" for name in column_names])
+    columns = ",".join([f"{name}_config" for name in column_names])
     return str(
         _Template("context")
         .fill_expressions(
             CSV_PATH="csv_path",
+            MARGINS_DICT=margins_dict,
+            COLUMNS=columns,
         )
         .fill_values(
             WEIGHTS=weights,
@@ -100,37 +135,81 @@ def _make_context_for_script(contributions, epsilon, weights):
         .fill_blocks(
             PRIVACY_UNIT_BLOCK=privacy_unit_block,
             PRIVACY_LOSS_BLOCK=privacy_loss_block,
+            MARGINS_DICT=margins_dict,
         )
     )
 
 
 def _make_imports():
-    return str(_Template("imports").fill_values())
+    return (
+        str(_Template("imports").fill_values())
+        + (Path(__file__).parent.parent / "shared.py").read_text()
+    )
+
+
+def _make_columns(columns):
+    return "\n".join(
+        make_column_config_block(
+            name=name,
+            lower_bound=col["lower_bound"],
+            upper_bound=col["upper_bound"],
+            bin_count=col["bin_count"],
+        )
+        for name, col in columns.items()
+    )
 
 
-def make_notebook_py(csv_path, contributions, epsilon, weights):
+def _make_query(column_name):
+    indentifier = name_to_identifier(column_name)
+    return str(
+        _Template("query")
+        .fill_values(
+            BIN_NAME=f"{indentifier}_bin",
+        )
+        .fill_expressions(
+            QUERY_NAME=f"{indentifier}_query",
+            ACCURACY_NAME=f"{indentifier}_accuracy",
+            HISTOGRAM_NAME=f"{indentifier}_histogram",
+        )
+    )
+
+
+def _make_queries(column_names):
+    return "confidence = 0.95\n\n" + "\n".join(
+        _make_query(column_name) for column_name in column_names
+    )
+
+
+def make_notebook_py(csv_path, contributions, epsilon, columns):
     return str(
         _Template("notebook").fill_blocks(
             IMPORTS_BLOCK=_make_imports(),
+            COLUMNS_BLOCK=_make_columns(columns),
             CONTEXT_BLOCK=_make_context_for_notebook(
                 csv_path=csv_path,
                 contributions=contributions,
                 epsilon=epsilon,
-                weights=weights,
+                weights=[column["weight"] for column in columns.values()],
+                column_names=[name_to_identifier(name) for name in columns.keys()],
             ),
+            QUERIES_BLOCK=_make_queries(columns.keys()),
         )
     )
 
 
-def make_script_py(contributions, epsilon, weights):
+def make_script_py(contributions, epsilon, columns):
     return str(
         _Template("script").fill_blocks(
             IMPORTS_BLOCK=_make_imports(),
+            COLUMNS_BLOCK=_make_columns(columns),
             CONTEXT_BLOCK=_make_context_for_script(
+                # csv_path is a CLI parameter in the script
                 contributions=contributions,
                 epsilon=epsilon,
-                weights=weights,
+                weights=[column["weight"] for column in columns.values()],
+                column_names=[name_to_identifier(name) for name in columns.keys()],
             ),
+            QUERIES_BLOCK=_make_queries(columns.keys()),
         )
     )
 
@@ -151,8 +230,12 @@ def make_column_config_block(name, lower_bound, upper_bound, bin_count):
     ...     upper_bound=100,
     ...     bin_count=10
     ... ))
-    # From the public information, determine the bins:
-    hw_grade_cut_points = make_cut_points(0, 100, 10)
+    # From the public information, determine the bins for 'HW GRADE':
+    hw_grade_cut_points = make_cut_points(
+        lower_bound=0,
+        upper_bound=100,
+        bin_count=10,
+    )
     <BLANKLINE>
     # Use these bins to define a Polars column:
     hw_grade_config = (
@@ -173,7 +256,7 @@ def make_column_config_block(name, lower_bound, upper_bound, bin_count):
         .fill_values(
             LOWER_BOUND=lower_bound,
             UPPER_BOUND=upper_bound,
-            BINS=bin_count,
+            BIN_COUNT=bin_count,
             COLUMN_NAME=name,
             BIN_COLUMN_NAME=f"{snake_name}_bin",
         )
diff --git a/dp_wizard/utils/templates/no-tests/_column_config.py b/dp_wizard/utils/templates/no-tests/_column_config.py
index ddb44bd..bb367e6 100644
--- a/dp_wizard/utils/templates/no-tests/_column_config.py
+++ b/dp_wizard/utils/templates/no-tests/_column_config.py
@@ -1,5 +1,9 @@
-# From the public information, determine the bins:
-CUT_LIST_NAME = make_cut_points(LOWER_BOUND, UPPER_BOUND, BINS)
+# From the public information, determine the bins for COLUMN_NAME:
+CUT_LIST_NAME = make_cut_points(
+    lower_bound=LOWER_BOUND,
+    upper_bound=UPPER_BOUND,
+    bin_count=BIN_COUNT,
+)
 
 # Use these bins to define a Polars column:
 POLARS_CONFIG_NAME = (
diff --git a/dp_wizard/utils/templates/no-tests/_context.py b/dp_wizard/utils/templates/no-tests/_context.py
index cdd8194..32ca440 100644
--- a/dp_wizard/utils/templates/no-tests/_context.py
+++ b/dp_wizard/utils/templates/no-tests/_context.py
@@ -1,8 +1,9 @@
 PRIVACY_UNIT_BLOCK
 PRIVACY_LOSS_BLOCK
 context = dp.Context.compositor(
-    data=pl.scan_csv(CSV_PATH, encoding="utf8-lossy"),
+    data=pl.scan_csv(CSV_PATH, encoding="utf8-lossy").with_columns(COLUMNS),
     privacy_unit=privacy_unit,
     privacy_loss=privacy_loss,
     split_by_weights=WEIGHTS,
+    margins=MARGINS_DICT,
 )
diff --git a/dp_wizard/utils/templates/no-tests/_imports.py b/dp_wizard/utils/templates/no-tests/_imports.py
index 5df8d79..9418f72 100644
--- a/dp_wizard/utils/templates/no-tests/_imports.py
+++ b/dp_wizard/utils/templates/no-tests/_imports.py
@@ -1,4 +1,7 @@
 import polars as pl
 import opendp.prelude as dp
+import matplotlib.pyplot as plt
 
+# The OpenDP team is working to vet the core algorithms.
+# Until that is complete we need to opt-in to use these features.
 dp.enable_features("contrib")
diff --git a/dp_wizard/utils/templates/no-tests/_notebook.py b/dp_wizard/utils/templates/no-tests/_notebook.py
index c6aeed3..16862ba 100644
--- a/dp_wizard/utils/templates/no-tests/_notebook.py
+++ b/dp_wizard/utils/templates/no-tests/_notebook.py
@@ -1,9 +1,36 @@
-# This is a demonstration how OpenDP can be used to create
-# a differentially private release. To customize this,
-# see the documentation for OpenDP: https://docs.opendp.org/
+# This is a demonstration of how OpenDP can be used to create
+# a differentially private release. To learn more about what's
+# going on here, see the documentation for OpenDP: https://docs.opendp.org/
 
 # +
 IMPORTS_BLOCK
+# -
 
+# Based on the input you provided, for each column we'll create a set of cut points,
+# and a Polars expression that describes how we want to summarize that column.
+
+# +
+COLUMNS_BLOCK
+# -
+
+# Next, we'll define our Context. This is where we set the privacy budget,
+# and set the weight for each query under that overall budget.
+# If we try to run more one more query than we have weights, it will error.
+# Once the privacy budget is consumed, you shouldn't run more queries.
+
+# +
 CONTEXT_BLOCK
-print(context)
+# -
+
+# A note on `utf8-lossy`: CSVs can use different "character encodings" to
+# represent characters outside the plain ascii character set, but out of the box
+# the Polars library only supports UTF8. Specifying `utf8-lossy` preserves as
+# much information as possible, and any unrecognized characters will be replaced
+# by "�". If this is not sufficient, you will need to preprocess your data to
+# reencode it as UTF8.
+#
+# Finally, we run the queries and plot the results.
+
+# +
+QUERIES_BLOCK
+# -
diff --git a/dp_wizard/utils/templates/no-tests/_query.py b/dp_wizard/utils/templates/no-tests/_query.py
new file mode 100644
index 0000000..0d78f0d
--- /dev/null
+++ b/dp_wizard/utils/templates/no-tests/_query.py
@@ -0,0 +1,4 @@
+QUERY_NAME = context.query().group_by(BIN_NAME).agg(pl.len().dp.noise())
+ACCURACY_NAME = QUERY_NAME.summarize(alpha=1 - confidence)["accuracy"].item()
+HISTOGRAM_NAME = QUERY_NAME.release().collect().sort(BIN_NAME)
+plot_histogram(HISTOGRAM_NAME, ACCURACY_NAME, 0)
diff --git a/dp_wizard/utils/templates/no-tests/_script.py b/dp_wizard/utils/templates/no-tests/_script.py
index 37be6c5..ab43f5c 100644
--- a/dp_wizard/utils/templates/no-tests/_script.py
+++ b/dp_wizard/utils/templates/no-tests/_script.py
@@ -2,6 +2,8 @@
 
 IMPORTS_BLOCK
 
+COLUMNS_BLOCK
+
 
 def get_context(csv_path):
     CONTEXT_BLOCK
@@ -15,4 +17,5 @@ def get_context(csv_path):
     parser.add_argument("--csv", help="Path to csv containing private data")
     args = parser.parse_args()
     context = get_context(csv_path=args.csv)
-    print(context)
+
+    QUERIES_BLOCK
diff --git a/tests/fixtures/expected-script.py b/tests/fixtures/expected-script.py
deleted file mode 100644
index 2f83281..0000000
--- a/tests/fixtures/expected-script.py
+++ /dev/null
@@ -1,31 +0,0 @@
-from argparse import ArgumentParser
-
-import polars as pl
-import opendp.prelude as dp
-
-dp.enable_features("contrib")
-
-
-def get_context(csv_path):
-    privacy_unit = dp.unit_of(contributions=1)
-
-    privacy_loss = dp.loss_of(epsilon=1, delta=1e-7)
-
-    context = dp.Context.compositor(
-        data=pl.scan_csv(csv_path, encoding="utf8-lossy"),
-        privacy_unit=privacy_unit,
-        privacy_loss=privacy_loss,
-        split_by_weights=[1],
-    )
-
-    return context
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser(
-        description="Creates a differentially private release from a csv"
-    )
-    parser.add_argument("--csv", help="Path to csv containing private data")
-    args = parser.parse_args()
-    context = get_context(csv_path=args.csv)
-    print(context)
diff --git a/tests/utils/test_templates.py b/tests/utils/test_templates.py
index a5970a2..7bcdfc3 100644
--- a/tests/utils/test_templates.py
+++ b/tests/utils/test_templates.py
@@ -83,10 +83,7 @@ def test_fill_template_unfilled_slots():
     context_template = _Template("context")
     with pytest.raises(
         Exception,
-        match=re.escape(
-            "context.py has unfilled slots: "
-            "CSV_PATH, PRIVACY_LOSS_BLOCK, PRIVACY_UNIT_BLOCK, WEIGHTS"
-        ),
+        match=re.escape("context.py has unfilled slots"),
     ):
         str(context_template.fill_values())
 
@@ -96,8 +93,18 @@ def test_make_notebook():
         csv_path=fake_csv,
         contributions=1,
         epsilon=1,
-        weights=[1],
+        columns={
+            # For a strong test, use a column whose name
+            # doesn't work as a python identifier.
+            "hw-number": {
+                "lower_bound": 5,
+                "upper_bound": 15,
+                "bin_count": 20,
+                "weight": 4,
+            }
+        },
     )
+    print(notebook)
     globals = {}
     exec(notebook, globals)
     assert isinstance(globals["context"], dp.Context)
@@ -107,15 +114,16 @@ def test_make_script():
     script = make_script_py(
         contributions=1,
         epsilon=1,
-        weights=[1],
+        columns={
+            "hw-number": {
+                "lower_bound": 5,
+                "upper_bound": 15,
+                "bin_count": 20,
+                "weight": 4,
+            }
+        },
     )
-
-    def clear_empty_lines(text):
-        # Cleanup whitespace after indenting blocks
-        return re.sub(r"^\s+$", "", text, flags=re.MULTILINE).strip()
-
-    expected_script = (fixtures_path / "expected-script.py").read_text()
-    assert clear_empty_lines(script) == clear_empty_lines(expected_script)
+    print(script)
 
     with NamedTemporaryFile(mode="w") as fp:
         fp.write(script)