From fc2aa18212747fbb93e2087acf91c53ce7fd6be4 Mon Sep 17 00:00:00 2001
From: Chuck McCallum <mccalluc@users.noreply.github.com>
Date: Wed, 20 Nov 2024 09:49:52 -0500
Subject: [PATCH] OO-ify the code generation code (#168)

* stub classes, but not really oo. Remove unused app code

* named tuple for analysis plan

* Move functions which depend on analysis plan into base class

* function params -> constructor params

* define "root_template" and dedup

* fix copy paste in make_context

* make base class abstract

* rename to code_generators: templates are just a means

* Template class to own file

* create a README

* AnalysisPlanColumn

* pass analysis plan to code generators

* analysis plan in tests

* check for expression and value slots

* more systematic tests of bad template filling

* more tests of template filling
---
 .flake8                                       |   2 +-
 .pytest.ini                                   |   2 +-
 dp_wizard/app/analysis_panel.py               |   2 +-
 dp_wizard/app/components/column_module.py     |   2 +-
 dp_wizard/app/dataset_panel.py                |   2 +-
 dp_wizard/app/results_panel.py                |  75 ++---
 dp_wizard/utils/code_generators/__init__.py   | 204 +++++++++++++
 dp_wizard/utils/code_generators/_template.py  |  86 ++++++
 .../utils/code_generators/no-tests/README.md  |   4 +
 .../no-tests/_column_config.py                |   0
 .../no-tests/_context.py                      |   0
 .../no-tests/_imports.py                      |   0
 .../no-tests/_notebook.py                     |   0
 .../no-tests/_privacy_loss.py                 |   0
 .../no-tests/_privacy_unit.py                 |   0
 .../no-tests/_query.py                        |   0
 .../no-tests/_script.py                       |   0
 dp_wizard/utils/templates/__init__.py         | 271 ------------------
 tests/utils/test_code_generators.py           | 202 +++++++++++++
 tests/utils/test_templates.py                 | 133 ---------
 20 files changed, 521 insertions(+), 464 deletions(-)
 create mode 100644 dp_wizard/utils/code_generators/__init__.py
 create mode 100644 dp_wizard/utils/code_generators/_template.py
 create mode 100644 dp_wizard/utils/code_generators/no-tests/README.md
 rename dp_wizard/utils/{templates => code_generators}/no-tests/_column_config.py (100%)
 rename dp_wizard/utils/{templates => code_generators}/no-tests/_context.py (100%)
 rename dp_wizard/utils/{templates => code_generators}/no-tests/_imports.py (100%)
 rename dp_wizard/utils/{templates => code_generators}/no-tests/_notebook.py (100%)
 rename dp_wizard/utils/{templates => code_generators}/no-tests/_privacy_loss.py (100%)
 rename dp_wizard/utils/{templates => code_generators}/no-tests/_privacy_unit.py (100%)
 rename dp_wizard/utils/{templates => code_generators}/no-tests/_query.py (100%)
 rename dp_wizard/utils/{templates => code_generators}/no-tests/_script.py (100%)
 delete mode 100644 dp_wizard/utils/templates/__init__.py
 create mode 100644 tests/utils/test_code_generators.py
 delete mode 100644 tests/utils/test_templates.py

diff --git a/.flake8 b/.flake8
index 05c4fa7..9873a77 100644
--- a/.flake8
+++ b/.flake8
@@ -9,4 +9,4 @@ extend-ignore = E203,E501,E701
 
 per-file-ignores =
     # Ignore undefined names in templates.
-    */templates/no-tests/*.py:F821,F401,E302
+    */code_generators/no-tests/*.py:F821,F401,E302
diff --git a/.pytest.ini b/.pytest.ini
index 25385dd..5aa7a5d 100644
--- a/.pytest.ini
+++ b/.pytest.ini
@@ -4,7 +4,7 @@
 filterwarnings =
 	error
 
-addopts = --doctest-glob '*.md' --doctest-modules --ignore dp_wizard/utils/templates/no-tests --ignore dp_wizard/tests/fixtures/ --tracing=retain-on-failure
+addopts = --doctest-glob '*.md' --doctest-modules --ignore dp_wizard/utils/code_generators/no-tests --ignore dp_wizard/tests/fixtures/ --tracing=retain-on-failure
 
 # If an xfail starts passing unexpectedly, that should count as a failure:
 xfail_strict=true
diff --git a/dp_wizard/app/analysis_panel.py b/dp_wizard/app/analysis_panel.py
index a9171a6..8465c1b 100644
--- a/dp_wizard/app/analysis_panel.py
+++ b/dp_wizard/app/analysis_panel.py
@@ -6,7 +6,7 @@
 from dp_wizard.app.components.column_module import column_ui, column_server
 from dp_wizard.utils.csv_helper import read_csv_ids_labels, read_csv_ids_names
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
-from dp_wizard.utils.templates import make_privacy_loss_block
+from dp_wizard.utils.code_generators import make_privacy_loss_block
 from dp_wizard.app.components.column_module import col_widths
 
 
diff --git a/dp_wizard/app/components/column_module.py b/dp_wizard/app/components/column_module.py
index 3e152bf..7296943 100644
--- a/dp_wizard/app/components/column_module.py
+++ b/dp_wizard/app/components/column_module.py
@@ -4,7 +4,7 @@
 
 from dp_wizard.utils.dp_helper import make_confidence_accuracy_histogram
 from dp_wizard.utils.shared import plot_histogram
-from dp_wizard.utils.templates import make_column_config_block
+from dp_wizard.utils.code_generators import make_column_config_block
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
 
 
diff --git a/dp_wizard/app/dataset_panel.py b/dp_wizard/app/dataset_panel.py
index 52ed4f4..9cd9836 100644
--- a/dp_wizard/app/dataset_panel.py
+++ b/dp_wizard/app/dataset_panel.py
@@ -4,7 +4,7 @@
 
 from dp_wizard.utils.argparse_helpers import get_cli_info
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
-from dp_wizard.utils.templates import make_privacy_unit_block
+from dp_wizard.utils.code_generators import make_privacy_unit_block
 
 
 def dataset_ui():
diff --git a/dp_wizard/app/results_panel.py b/dp_wizard/app/results_panel.py
index 58a23a6..8d448c5 100644
--- a/dp_wizard/app/results_panel.py
+++ b/dp_wizard/app/results_panel.py
@@ -1,8 +1,11 @@
-from json import dumps
-
 from shiny import ui, render, reactive
 
-from dp_wizard.utils.templates import make_notebook_py, make_script_py
+from dp_wizard.utils.code_generators import (
+    NotebookGenerator,
+    ScriptGenerator,
+    AnalysisPlan,
+    AnalysisPlanColumn,
+)
 from dp_wizard.utils.converters import convert_py_to_nb
 
 
@@ -35,64 +38,32 @@ def results_server(
     epsilon,
 ):  # pragma: no cover
     @reactive.calc
-    def analysis_dict():
+    def analysis_plan() -> AnalysisPlan:
         # weights().keys() will reflect the desired columns:
         # The others retain inactive columns, so user
         # inputs aren't lost when toggling checkboxes.
         columns = {
-            col: {
-                "lower_bound": lower_bounds()[col],
-                "upper_bound": upper_bounds()[col],
-                "bin_count": int(bin_counts()[col]),
-                # TODO: Floats should work for weight, but they don't:
-                # https://github.com/opendp/opendp/issues/2140
-                "weight": int(weights()[col]),
-            }
+            col: AnalysisPlanColumn(
+                lower_bound=lower_bounds()[col],
+                upper_bound=upper_bounds()[col],
+                bin_count=int(bin_counts()[col]),
+                weight=int(weights()[col]),
+            )
             for col in weights().keys()
         }
-        return {
-            "csv_path": csv_path(),
-            "contributions": contributions(),
-            "epsilon": epsilon(),
-            "columns": columns,
-        }
-
-    @reactive.calc
-    def analysis_json():
-        return dumps(
-            analysis_dict(),
-            indent=2,
+        return AnalysisPlan(
+            csv_path=csv_path(),
+            contributions=contributions(),
+            epsilon=epsilon(),
+            columns=columns,
         )
 
-    @render.text
-    def analysis_json_text():
-        return analysis_json()
-
-    @reactive.calc
-    def analysis_python():
-        analysis = analysis_dict()
-        return make_notebook_py(
-            csv_path=analysis["csv_path"],
-            contributions=analysis["contributions"],
-            epsilon=analysis["epsilon"],
-            columns=analysis["columns"],
-        )
-
-    @render.text
-    def analysis_python_text():
-        return analysis_python()
-
     @render.download(
         filename="dp-wizard-script.py",
         media_type="text/x-python",
     )
     async def download_script():
-        analysis = analysis_dict()
-        script_py = make_script_py(
-            contributions=analysis["contributions"],
-            epsilon=analysis["epsilon"],
-            columns=analysis["columns"],
-        )
+        script_py = ScriptGenerator(analysis_plan()).make_py()
         yield script_py
 
     @render.download(
@@ -100,12 +71,6 @@ async def download_script():
         media_type="application/x-ipynb+json",
     )
     async def download_notebook():
-        analysis = analysis_dict()
-        notebook_py = make_notebook_py(
-            csv_path=analysis["csv_path"],
-            contributions=analysis["contributions"],
-            epsilon=analysis["epsilon"],
-            columns=analysis["columns"],
-        )
+        notebook_py = NotebookGenerator(analysis_plan()).make_py()
         notebook_nb = convert_py_to_nb(notebook_py, execute=True)
         yield notebook_nb
diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py
new file mode 100644
index 0000000..521c4a4
--- /dev/null
+++ b/dp_wizard/utils/code_generators/__init__.py
@@ -0,0 +1,204 @@
+from typing import NamedTuple
+from abc import ABC, abstractmethod
+from pathlib import Path
+import re
+from dp_wizard.utils.csv_helper import name_to_identifier
+from dp_wizard.utils.code_generators._template import Template
+
+
+class AnalysisPlanColumn(NamedTuple):
+    lower_bound: float
+    upper_bound: float
+    bin_count: int
+    weight: int
+
+
+class AnalysisPlan(NamedTuple):
+    csv_path: str
+    contributions: int
+    epsilon: float
+    columns: dict[str, AnalysisPlanColumn]
+
+
+class _CodeGenerator(ABC):
+    def __init__(self, analysis_plan):
+        self.csv_path = analysis_plan.csv_path
+        self.contributions = analysis_plan.contributions
+        self.epsilon = analysis_plan.epsilon
+        self.columns = analysis_plan.columns
+
+    @abstractmethod
+    def _make_context(self): ...  # pragma: no cover
+
+    def make_py(self):
+        return str(
+            Template(self.root_template).fill_blocks(
+                IMPORTS_BLOCK=_make_imports(),
+                COLUMNS_BLOCK=self._make_columns(self.columns),
+                CONTEXT_BLOCK=self._make_context(),
+                QUERIES_BLOCK=self._make_queries(self.columns.keys()),
+            )
+        )
+
+    def _make_margins_dict(self, bin_names):
+        # TODO: Don't worry too much about the formatting here.
+        # Plan to run the output through black for consistency.
+        # https://github.com/opendp/dp-creator-ii/issues/50
+        margins = (
+            [
+                """
+            (): dp.polars.Margin(
+                public_info="lengths",
+            ),"""
+            ]
+            + [
+                f"""
+            ("{bin_name}",): dp.polars.Margin(
+                public_info="keys",
+            ),"""
+                for bin_name in bin_names
+            ]
+        )
+
+        margins_dict = "{" + "".join(margins) + "\n    }"
+        return margins_dict
+
+    def _make_columns(self, columns):
+        return "\n".join(
+            make_column_config_block(
+                name=name,
+                lower_bound=col.lower_bound,
+                upper_bound=col.upper_bound,
+                bin_count=col.bin_count,
+            )
+            for name, col in columns.items()
+        )
+
+    def _make_queries(self, column_names):
+        return "confidence = 0.95\n\n" + "\n".join(
+            _make_query(column_name) for column_name in column_names
+        )
+
+    def _make_partial_context(self):
+        weights = [column.weight for column in self.columns.values()]
+        column_names = [name_to_identifier(name) for name in self.columns.keys()]
+        privacy_unit_block = make_privacy_unit_block(self.contributions)
+        privacy_loss_block = make_privacy_loss_block(self.epsilon)
+        margins_dict = self._make_margins_dict([f"{name}_bin" for name in column_names])
+        columns = ", ".join([f"{name}_config" for name in column_names])
+        return (
+            Template("context")
+            .fill_expressions(
+                MARGINS_DICT=margins_dict,
+                COLUMNS=columns,
+            )
+            .fill_values(
+                WEIGHTS=weights,
+            )
+            .fill_blocks(
+                PRIVACY_UNIT_BLOCK=privacy_unit_block,
+                PRIVACY_LOSS_BLOCK=privacy_loss_block,
+            )
+        )
+
+
+class NotebookGenerator(_CodeGenerator):
+    root_template = "notebook"
+
+    def _make_context(self):
+        return str(self._make_partial_context().fill_values(CSV_PATH=self.csv_path))
+
+
+class ScriptGenerator(_CodeGenerator):
+    root_template = "script"
+
+    def _make_context(self):
+        return str(self._make_partial_context().fill_expressions(CSV_PATH="csv_path"))
+
+
+# Public functions used to generate code snippets in the UI;
+# These do not require an entire analysis plan, so they stand on their own.
+
+
+def make_privacy_unit_block(contributions):
+    return str(Template("privacy_unit").fill_values(CONTRIBUTIONS=contributions))
+
+
+def make_privacy_loss_block(epsilon):
+    return str(Template("privacy_loss").fill_values(EPSILON=epsilon))
+
+
+def make_column_config_block(name, lower_bound, upper_bound, bin_count):
+    """
+    >>> print(make_column_config_block(
+    ...     name="HW GRADE",
+    ...     lower_bound=0,
+    ...     upper_bound=100,
+    ...     bin_count=10
+    ... ))
+    # From the public information, determine the bins for 'HW GRADE':
+    hw_grade_cut_points = make_cut_points(
+        lower_bound=0,
+        upper_bound=100,
+        bin_count=10,
+    )
+    <BLANKLINE>
+    # Use these bins to define a Polars column:
+    hw_grade_config = (
+        pl.col('HW GRADE')
+        .cut(hw_grade_cut_points)
+        .alias('hw_grade_bin')  # Give the new column a name.
+        .cast(pl.String)
+    )
+    <BLANKLINE>
+    """
+    snake_name = _snake_case(name)
+    return str(
+        Template("column_config")
+        .fill_expressions(
+            CUT_LIST_NAME=f"{snake_name}_cut_points",
+            POLARS_CONFIG_NAME=f"{snake_name}_config",
+        )
+        .fill_values(
+            LOWER_BOUND=lower_bound,
+            UPPER_BOUND=upper_bound,
+            BIN_COUNT=bin_count,
+            COLUMN_NAME=name,
+            BIN_COLUMN_NAME=f"{snake_name}_bin",
+        )
+    )
+
+
+# Private helper functions:
+# These do not depend on the AnalysisPlan,
+# so it's better to keep them out of the class.
+
+
+def _make_query(column_name):
+    indentifier = name_to_identifier(column_name)
+    return str(
+        Template("query")
+        .fill_values(
+            BIN_NAME=f"{indentifier}_bin",
+        )
+        .fill_expressions(
+            QUERY_NAME=f"{indentifier}_query",
+            ACCURACY_NAME=f"{indentifier}_accuracy",
+            HISTOGRAM_NAME=f"{indentifier}_histogram",
+        )
+    )
+
+
+def _snake_case(name: str):
+    """
+    >>> _snake_case("HW GRADE")
+    'hw_grade'
+    """
+    return re.sub(r"\W+", "_", name.lower())
+
+
+def _make_imports():
+    return (
+        str(Template("imports").fill_values())
+        + (Path(__file__).parent.parent / "shared.py").read_text()
+    )
diff --git a/dp_wizard/utils/code_generators/_template.py b/dp_wizard/utils/code_generators/_template.py
new file mode 100644
index 0000000..35d2d97
--- /dev/null
+++ b/dp_wizard/utils/code_generators/_template.py
@@ -0,0 +1,86 @@
+import re
+from pathlib import Path
+
+
+class Template:
+    def __init__(self, path, template=None):
+        if path is not None:
+            self._path = f"_{path}.py"
+            template_path = Path(__file__).parent / "no-tests" / self._path
+            self._template = template_path.read_text()
+        if template is not None:
+            if path is not None:
+                raise Exception('"path" and "template" are mutually exclusive')
+            self._path = "template-instead-of-path"
+            self._template = template
+        # We want a list of the initial slots, because substitutions
+        # can produce sequences of upper case letters that could be mistaken for slots.
+        self._initial_slots = self._find_slots()
+
+    def _find_slots(self):
+        # Slots:
+        # - are all caps or underscores
+        # - have word boundary on either side
+        # - are at least three characters
+        slot_re = r"\b[A-Z][A-Z_]{2,}\b"
+        return set(re.findall(slot_re, self._template))
+
+    def fill_expressions(self, **kwargs):
+        for k, v in kwargs.items():
+            k_re = re.escape(k)
+            self._template, count = re.subn(rf"\b{k_re}\b", str(v), self._template)
+            if count == 0:
+                raise Exception(
+                    f"No '{k}' slot to fill with '{v}' in "
+                    f"'{self._path}':\n\n{self._template}"
+                )
+        return self
+
+    def fill_values(self, **kwargs):
+        for k, v in kwargs.items():
+            k_re = re.escape(k)
+            self._template, count = re.subn(rf"\b{k_re}\b", repr(v), self._template)
+            if count == 0:
+                raise Exception(
+                    f"No '{k}' slot to fill with '{v}' in "
+                    f"'{self._path}':\n\n{self._template}"
+                )
+        return self
+
+    def fill_blocks(self, **kwargs):
+        for k, v in kwargs.items():
+
+            def match_indent(match):
+                # This does what we want, but binding is confusing.
+                return "\n".join(
+                    match.group(1) + line for line in v.split("\n")  # noqa: B023
+                )
+
+            k_re = re.escape(k)
+            self._template, count = re.subn(
+                rf"^([ \t]*){k_re}$",
+                match_indent,
+                self._template,
+                flags=re.MULTILINE,
+            )
+            if count == 0:
+                base_message = (
+                    f"No '{k}' slot to fill with '{v}' in "
+                    f"'{self._path}':\n\n{self._template}"
+                )
+                if k in self._template:
+                    raise Exception(
+                        f"Block slots must be alone on line; {base_message}"
+                    )
+                else:
+                    raise Exception(base_message)
+        return self
+
+    def __str__(self):
+        unfilled_slots = self._initial_slots & self._find_slots()
+        if unfilled_slots:
+            slots_str = ", ".join(sorted(f"'{slot}'" for slot in unfilled_slots))
+            raise Exception(
+                f"{slots_str} slot not filled in '{self._path}':\n\n{self._template}"
+            )
+        return self._template
diff --git a/dp_wizard/utils/code_generators/no-tests/README.md b/dp_wizard/utils/code_generators/no-tests/README.md
new file mode 100644
index 0000000..fbdcd5e
--- /dev/null
+++ b/dp_wizard/utils/code_generators/no-tests/README.md
@@ -0,0 +1,4 @@
+Strings of ALL CAPS are replaced in these templates.
+Keeping them in a format which can actually be parsed as python
+makes some things easier, but it is also reinventing the wheel.
+We may revisit this.
diff --git a/dp_wizard/utils/templates/no-tests/_column_config.py b/dp_wizard/utils/code_generators/no-tests/_column_config.py
similarity index 100%
rename from dp_wizard/utils/templates/no-tests/_column_config.py
rename to dp_wizard/utils/code_generators/no-tests/_column_config.py
diff --git a/dp_wizard/utils/templates/no-tests/_context.py b/dp_wizard/utils/code_generators/no-tests/_context.py
similarity index 100%
rename from dp_wizard/utils/templates/no-tests/_context.py
rename to dp_wizard/utils/code_generators/no-tests/_context.py
diff --git a/dp_wizard/utils/templates/no-tests/_imports.py b/dp_wizard/utils/code_generators/no-tests/_imports.py
similarity index 100%
rename from dp_wizard/utils/templates/no-tests/_imports.py
rename to dp_wizard/utils/code_generators/no-tests/_imports.py
diff --git a/dp_wizard/utils/templates/no-tests/_notebook.py b/dp_wizard/utils/code_generators/no-tests/_notebook.py
similarity index 100%
rename from dp_wizard/utils/templates/no-tests/_notebook.py
rename to dp_wizard/utils/code_generators/no-tests/_notebook.py
diff --git a/dp_wizard/utils/templates/no-tests/_privacy_loss.py b/dp_wizard/utils/code_generators/no-tests/_privacy_loss.py
similarity index 100%
rename from dp_wizard/utils/templates/no-tests/_privacy_loss.py
rename to dp_wizard/utils/code_generators/no-tests/_privacy_loss.py
diff --git a/dp_wizard/utils/templates/no-tests/_privacy_unit.py b/dp_wizard/utils/code_generators/no-tests/_privacy_unit.py
similarity index 100%
rename from dp_wizard/utils/templates/no-tests/_privacy_unit.py
rename to dp_wizard/utils/code_generators/no-tests/_privacy_unit.py
diff --git a/dp_wizard/utils/templates/no-tests/_query.py b/dp_wizard/utils/code_generators/no-tests/_query.py
similarity index 100%
rename from dp_wizard/utils/templates/no-tests/_query.py
rename to dp_wizard/utils/code_generators/no-tests/_query.py
diff --git a/dp_wizard/utils/templates/no-tests/_script.py b/dp_wizard/utils/code_generators/no-tests/_script.py
similarity index 100%
rename from dp_wizard/utils/templates/no-tests/_script.py
rename to dp_wizard/utils/code_generators/no-tests/_script.py
diff --git a/dp_wizard/utils/templates/__init__.py b/dp_wizard/utils/templates/__init__.py
deleted file mode 100644
index 591e1eb..0000000
--- a/dp_wizard/utils/templates/__init__.py
+++ /dev/null
@@ -1,271 +0,0 @@
-"""
-Strings of ALL CAPS are replaced in these templates.
-Keeping them in a format which can actually be parsed as python
-makes some things easier, but it is also reinventing the wheel.
-We may revisit this.
-"""
-
-from pathlib import Path
-import re
-from dp_wizard.utils.csv_helper import name_to_identifier
-
-
-class _Template:
-    def __init__(self, path, template=None):
-        if path is not None:
-            self._path = f"_{path}.py"
-            template_path = Path(__file__).parent / "no-tests" / self._path
-            self._template = template_path.read_text()
-        if template is not None:
-            if path is not None:
-                raise Exception('"path" and "template" are mutually exclusive')
-            self._path = "template-instead-of-path"
-            self._template = template
-        self._initial_slots = self._find_slots()
-
-    def _find_slots(self):
-        # Slots:
-        # - are all caps or underscores
-        # - have word boundary on either side
-        # - are at least three characters
-        slot_re = r"\b[A-Z][A-Z_]{2,}\b"
-        return set(re.findall(slot_re, self._template))
-
-    def fill_expressions(self, **kwargs):
-        for k, v in kwargs.items():
-            k_re = re.escape(k)
-            self._template = re.sub(rf"\b{k_re}\b", str(v), self._template)
-        return self
-
-    def fill_values(self, **kwargs):
-        for k, v in kwargs.items():
-            k_re = re.escape(k)
-            self._template = re.sub(rf"\b{k_re}\b", repr(v), self._template)
-        return self
-
-    def fill_blocks(self, **kwargs):
-        for k, v in kwargs.items():
-
-            def match_indent(match):
-                # This does what we want, but binding is confusing.
-                return "\n".join(
-                    match.group(1) + line for line in v.split("\n")  # noqa: B023
-                )
-
-            k_re = re.escape(k)
-            self._template = re.sub(
-                rf"^([ \t]*){k_re}$",
-                match_indent,
-                self._template,
-                flags=re.MULTILINE,
-            )
-        return self
-
-    def __str__(self):
-        unfilled_slots = self._initial_slots & self._find_slots()
-        if unfilled_slots:
-            raise Exception(
-                f"Template {self._path} has unfilled slots: "
-                f'{", ".join(sorted(unfilled_slots))}\n\n{self._template}'
-            )
-        return self._template
-
-
-def _make_margins_dict(bin_names):
-    # TODO: Don't worry too much about the formatting here.
-    # Plan to run the output through black for consistency.
-    # https://github.com/opendp/dp-creator-ii/issues/50
-    margins = (
-        [
-            """
-        (): dp.polars.Margin(
-            public_info="lengths",
-        ),"""
-        ]
-        + [
-            f"""
-        ("{bin_name}",): dp.polars.Margin(
-            public_info="keys",
-        ),"""
-            for bin_name in bin_names
-        ]
-    )
-
-    margins_dict = "{" + "".join(margins) + "\n    }"
-    return margins_dict
-
-
-def _make_context_for_notebook(csv_path, contributions, epsilon, weights, column_names):
-    privacy_unit_block = make_privacy_unit_block(contributions)
-    privacy_loss_block = make_privacy_loss_block(epsilon)
-    margins_dict = _make_margins_dict([f"{name}_bin" for name in column_names])
-    columns = ", ".join([f"{name}_config" for name in column_names])
-    return str(
-        _Template("context")
-        .fill_expressions(
-            MARGINS_DICT=margins_dict,
-            COLUMNS=columns,
-        )
-        .fill_values(
-            CSV_PATH=csv_path,
-            WEIGHTS=weights,
-        )
-        .fill_blocks(
-            PRIVACY_UNIT_BLOCK=privacy_unit_block,
-            PRIVACY_LOSS_BLOCK=privacy_loss_block,
-        )
-    )
-
-
-def _make_context_for_script(contributions, epsilon, weights, column_names):
-    privacy_unit_block = make_privacy_unit_block(contributions)
-    privacy_loss_block = make_privacy_loss_block(epsilon)
-    margins_dict = _make_margins_dict([f"{name}_bin" for name in column_names])
-    columns = ",".join([f"{name}_config" for name in column_names])
-    return str(
-        _Template("context")
-        .fill_expressions(
-            CSV_PATH="csv_path",
-            MARGINS_DICT=margins_dict,
-            COLUMNS=columns,
-        )
-        .fill_values(
-            WEIGHTS=weights,
-        )
-        .fill_blocks(
-            PRIVACY_UNIT_BLOCK=privacy_unit_block,
-            PRIVACY_LOSS_BLOCK=privacy_loss_block,
-            MARGINS_DICT=margins_dict,
-        )
-    )
-
-
-def _make_imports():
-    return (
-        str(_Template("imports").fill_values())
-        + (Path(__file__).parent.parent / "shared.py").read_text()
-    )
-
-
-def _make_columns(columns):
-    return "\n".join(
-        make_column_config_block(
-            name=name,
-            lower_bound=col["lower_bound"],
-            upper_bound=col["upper_bound"],
-            bin_count=col["bin_count"],
-        )
-        for name, col in columns.items()
-    )
-
-
-def _make_query(column_name):
-    indentifier = name_to_identifier(column_name)
-    return str(
-        _Template("query")
-        .fill_values(
-            BIN_NAME=f"{indentifier}_bin",
-        )
-        .fill_expressions(
-            QUERY_NAME=f"{indentifier}_query",
-            ACCURACY_NAME=f"{indentifier}_accuracy",
-            HISTOGRAM_NAME=f"{indentifier}_histogram",
-        )
-    )
-
-
-def _make_queries(column_names):
-    return "confidence = 0.95\n\n" + "\n".join(
-        _make_query(column_name) for column_name in column_names
-    )
-
-
-def make_notebook_py(csv_path, contributions, epsilon, columns):
-    return str(
-        _Template("notebook").fill_blocks(
-            IMPORTS_BLOCK=_make_imports(),
-            COLUMNS_BLOCK=_make_columns(columns),
-            CONTEXT_BLOCK=_make_context_for_notebook(
-                csv_path=csv_path,
-                contributions=contributions,
-                epsilon=epsilon,
-                weights=[column["weight"] for column in columns.values()],
-                column_names=[name_to_identifier(name) for name in columns.keys()],
-            ),
-            QUERIES_BLOCK=_make_queries(columns.keys()),
-        )
-    )
-
-
-def make_script_py(contributions, epsilon, columns):
-    return str(
-        _Template("script").fill_blocks(
-            IMPORTS_BLOCK=_make_imports(),
-            COLUMNS_BLOCK=_make_columns(columns),
-            CONTEXT_BLOCK=_make_context_for_script(
-                # csv_path is a CLI parameter in the script
-                contributions=contributions,
-                epsilon=epsilon,
-                weights=[column["weight"] for column in columns.values()],
-                column_names=[name_to_identifier(name) for name in columns.keys()],
-            ),
-            QUERIES_BLOCK=_make_queries(columns.keys()),
-        )
-    )
-
-
-def make_privacy_unit_block(contributions):
-    return str(_Template("privacy_unit").fill_values(CONTRIBUTIONS=contributions))
-
-
-def make_privacy_loss_block(epsilon):
-    return str(_Template("privacy_loss").fill_values(EPSILON=epsilon))
-
-
-def make_column_config_block(name, lower_bound, upper_bound, bin_count):
-    """
-    >>> print(make_column_config_block(
-    ...     name="HW GRADE",
-    ...     lower_bound=0,
-    ...     upper_bound=100,
-    ...     bin_count=10
-    ... ))
-    # From the public information, determine the bins for 'HW GRADE':
-    hw_grade_cut_points = make_cut_points(
-        lower_bound=0,
-        upper_bound=100,
-        bin_count=10,
-    )
-    <BLANKLINE>
-    # Use these bins to define a Polars column:
-    hw_grade_config = (
-        pl.col('HW GRADE')
-        .cut(hw_grade_cut_points)
-        .alias('hw_grade_bin')  # Give the new column a name.
-        .cast(pl.String)
-    )
-    <BLANKLINE>
-    """
-    snake_name = _snake_case(name)
-    return str(
-        _Template("column_config")
-        .fill_expressions(
-            CUT_LIST_NAME=f"{snake_name}_cut_points",
-            POLARS_CONFIG_NAME=f"{snake_name}_config",
-        )
-        .fill_values(
-            LOWER_BOUND=lower_bound,
-            UPPER_BOUND=upper_bound,
-            BIN_COUNT=bin_count,
-            COLUMN_NAME=name,
-            BIN_COLUMN_NAME=f"{snake_name}_bin",
-        )
-    )
-
-
-def _snake_case(name: str):
-    """
-    >>> _snake_case("HW GRADE")
-    'hw_grade'
-    """
-    return re.sub(r"\W+", "_", name.lower())
diff --git a/tests/utils/test_code_generators.py b/tests/utils/test_code_generators.py
new file mode 100644
index 0000000..aad08d4
--- /dev/null
+++ b/tests/utils/test_code_generators.py
@@ -0,0 +1,202 @@
+from tempfile import NamedTemporaryFile
+import subprocess
+from pathlib import Path
+import pytest
+import opendp.prelude as dp
+from dp_wizard.utils.code_generators import (
+    Template,
+    ScriptGenerator,
+    NotebookGenerator,
+    AnalysisPlan,
+    AnalysisPlanColumn,
+)
+
+
+fixtures_path = Path(__file__).parent.parent / "fixtures"
+fake_csv = "tests/fixtures/fake.csv"
+
+
+def test_param_conflict():
+    with pytest.raises(Exception, match=r"mutually exclusive"):
+        Template("context", template="Not allowed if path present")
+
+
+def test_fill_expressions():
+    template = Template(None, template="No one VERB the ADJ NOUN!")
+    filled = str(
+        template.fill_expressions(
+            VERB="expects",
+            ADJ="Spanish",
+            NOUN="Inquisition",
+        )
+    )
+    assert filled == "No one expects the Spanish Inquisition!"
+
+
+def test_fill_expressions_missing_slot_in_template():
+    template = Template(None, template="No one ... the ADJ NOUN!")
+    with pytest.raises(Exception, match=r"No 'VERB' slot to fill with 'expects'"):
+        str(
+            template.fill_expressions(
+                VERB="expects",
+                ADJ="Spanish",
+                NOUN="Inquisition",
+            )
+        )
+
+
+def test_fill_expressions_extra_slot_in_template():
+    template = Template(None, template="No one VERB ARTICLE ADJ NOUN!")
+    with pytest.raises(Exception, match=r"'ARTICLE' slot not filled"):
+        str(
+            template.fill_expressions(
+                VERB="expects",
+                ADJ="Spanish",
+                NOUN="Inquisition",
+            )
+        )
+
+
+def test_fill_values():
+    template = Template(None, template="assert [STRING] * NUM == LIST")
+    filled = str(
+        template.fill_values(
+            STRING="🙂",
+            NUM=3,
+            LIST=["🙂", "🙂", "🙂"],
+        )
+    )
+    assert filled == "assert ['🙂'] * 3 == ['🙂', '🙂', '🙂']"
+
+
+def test_fill_values_missing_slot_in_template():
+    template = Template(None, template="assert [STRING] * ... == LIST")
+    with pytest.raises(Exception, match=r"No 'NUM' slot to fill with '3'"):
+        str(
+            template.fill_values(
+                STRING="🙂",
+                NUM=3,
+                LIST=["🙂", "🙂", "🙂"],
+            )
+        )
+
+
+def test_fill_values_extra_slot_in_template():
+    template = Template(None, template="CMD [STRING] * NUM == LIST")
+    with pytest.raises(Exception, match=r"'CMD' slot not filled"):
+        str(
+            template.fill_values(
+                STRING="🙂",
+                NUM=3,
+                LIST=["🙂", "🙂", "🙂"],
+            )
+        )
+
+
+def test_fill_blocks():
+    # "OK" is less than three characters, so it is not a slot.
+    template = Template(
+        None,
+        template="""# MixedCase is OK
+
+FIRST
+
+with fake:
+    SECOND
+    if True:
+        THIRD
+""",
+    )
+    template.fill_blocks(
+        FIRST="\n".join(f"import {i}" for i in "abc"),
+        SECOND="\n".join(f"f({i})" for i in "123"),
+        THIRD="\n".join(f"{i}()" for i in "xyz"),
+    )
+    assert (
+        str(template)
+        == """# MixedCase is OK
+
+import a
+import b
+import c
+
+with fake:
+    f(1)
+    f(2)
+    f(3)
+    if True:
+        x()
+        y()
+        z()
+"""
+    )
+
+
+def test_fill_blocks_missing_slot_in_template_alone():
+    template = Template(None, template="No block slot")
+    with pytest.raises(Exception, match=r"No 'SLOT' slot"):
+        str(template.fill_blocks(SLOT="placeholder"))
+
+
+def test_fill_blocks_missing_slot_in_template_not_alone():
+    template = Template(None, template="No block SLOT")
+    with pytest.raises(
+        Exception, match=r"Block slots must be alone on line; No 'SLOT' slot"
+    ):
+        str(template.fill_blocks(SLOT="placeholder"))
+
+
+def test_fill_blocks_extra_slot_in_template():
+    template = Template(None, template="EXTRA\nSLOT")
+    with pytest.raises(Exception, match=r"'EXTRA' slot not filled"):
+        str(template.fill_blocks(SLOT="placeholder"))
+
+
+def test_make_notebook():
+    notebook = NotebookGenerator(
+        AnalysisPlan(
+            csv_path=fake_csv,
+            contributions=1,
+            epsilon=1,
+            columns={
+                # For a strong test, use a column whose name
+                # doesn't work as a python identifier.
+                "hw-number": AnalysisPlanColumn(
+                    lower_bound=5,
+                    upper_bound=15,
+                    bin_count=20,
+                    weight=4,
+                )
+            },
+        )
+    ).make_py()
+    print(notebook)
+    globals = {}
+    exec(notebook, globals)
+    assert isinstance(globals["context"], dp.Context)
+
+
+def test_make_script():
+    script = ScriptGenerator(
+        AnalysisPlan(
+            csv_path=None,
+            contributions=1,
+            epsilon=1,
+            columns={
+                "hw-number": AnalysisPlanColumn(
+                    lower_bound=5,
+                    upper_bound=15,
+                    bin_count=20,
+                    weight=4,
+                )
+            },
+        )
+    ).make_py()
+    print(script)
+
+    with NamedTemporaryFile(mode="w") as fp:
+        fp.write(script)
+        fp.flush()
+
+        result = subprocess.run(["python", fp.name, "--csv", fake_csv])
+        assert result.returncode == 0
diff --git a/tests/utils/test_templates.py b/tests/utils/test_templates.py
deleted file mode 100644
index 7bcdfc3..0000000
--- a/tests/utils/test_templates.py
+++ /dev/null
@@ -1,133 +0,0 @@
-from tempfile import NamedTemporaryFile
-import subprocess
-from pathlib import Path
-import re
-import pytest
-import opendp.prelude as dp
-from dp_wizard.utils.templates import _Template, make_notebook_py, make_script_py
-
-
-fixtures_path = Path(__file__).parent.parent / "fixtures"
-fake_csv = "tests/fixtures/fake.csv"
-
-
-def test_param_conflict():
-    with pytest.raises(Exception, match=r"mutually exclusive"):
-        _Template("context", template="Not allowed if path present")
-
-
-def test_fill_expressions():
-    template = _Template(None, template="No one VERB the ADJ NOUN!")
-    filled = str(
-        template.fill_expressions(
-            VERB="expects",
-            ADJ="Spanish",
-            NOUN="Inquisition",
-        )
-    )
-    assert filled == "No one expects the Spanish Inquisition!"
-
-
-def test_fill_values():
-    template = _Template(None, template="assert [STRING] * NUM == LIST")
-    filled = str(
-        template.fill_values(
-            STRING="🙂",
-            NUM=3,
-            LIST=["🙂", "🙂", "🙂"],
-        )
-    )
-    assert filled == "assert ['🙂'] * 3 == ['🙂', '🙂', '🙂']"
-
-
-def test_fill_blocks():
-    # "OK" is less than three characters, so it is not a slot.
-    template = _Template(
-        None,
-        template="""# MixedCase is OK
-
-FIRST
-
-with fake:
-    SECOND
-    if True:
-        THIRD
-""",
-    )
-    template.fill_blocks(
-        FIRST="\n".join(f"import {i}" for i in "abc"),
-        SECOND="\n".join(f"f({i})" for i in "123"),
-        THIRD="\n".join(f"{i}()" for i in "xyz"),
-    )
-    assert (
-        str(template)
-        == """# MixedCase is OK
-
-import a
-import b
-import c
-
-with fake:
-    f(1)
-    f(2)
-    f(3)
-    if True:
-        x()
-        y()
-        z()
-"""
-    )
-
-
-def test_fill_template_unfilled_slots():
-    context_template = _Template("context")
-    with pytest.raises(
-        Exception,
-        match=re.escape("context.py has unfilled slots"),
-    ):
-        str(context_template.fill_values())
-
-
-def test_make_notebook():
-    notebook = make_notebook_py(
-        csv_path=fake_csv,
-        contributions=1,
-        epsilon=1,
-        columns={
-            # For a strong test, use a column whose name
-            # doesn't work as a python identifier.
-            "hw-number": {
-                "lower_bound": 5,
-                "upper_bound": 15,
-                "bin_count": 20,
-                "weight": 4,
-            }
-        },
-    )
-    print(notebook)
-    globals = {}
-    exec(notebook, globals)
-    assert isinstance(globals["context"], dp.Context)
-
-
-def test_make_script():
-    script = make_script_py(
-        contributions=1,
-        epsilon=1,
-        columns={
-            "hw-number": {
-                "lower_bound": 5,
-                "upper_bound": 15,
-                "bin_count": 20,
-                "weight": 4,
-            }
-        },
-    )
-    print(script)
-
-    with NamedTemporaryFile(mode="w") as fp:
-        fp.write(script)
-        fp.flush()
-
-        result = subprocess.run(["python", fp.name, "--csv", fake_csv])
-        assert result.returncode == 0