From 87b6f8e40288c7fb8efe0493994ddfface55c2b2 Mon Sep 17 00:00:00 2001
From: Chuck McCallum <mccalluc@users.noreply.github.com>
Date: Thu, 7 Nov 2024 17:13:56 -0500
Subject: [PATCH] More tooltips (#135)

---
 WHAT-WE-LEARNED.md                            |  6 ++-
 dp_creator_ii/app/analysis_panel.py           | 27 +++++++++++-
 dp_creator_ii/app/components/column_module.py | 44 ++++++++++++++++++-
 3 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/WHAT-WE-LEARNED.md b/WHAT-WE-LEARNED.md
index cbddfaa..72a09ca 100644
--- a/WHAT-WE-LEARNED.md
+++ b/WHAT-WE-LEARNED.md
@@ -8,7 +8,7 @@ Unless I'm missing something, there doesn't seem to be any warning when there is
 
 ## UI and Server functions don't really separate concerns
 
-My first impression was that the UI function would be something like a "view" and the server would be a "controller", but for any kind of conditional display I need a `render.ui`, so that distinction breaks down quickly.
+My first impression was that the UI function would be something like a "view" and the server would be a "controller", but for any kind of conditional display I need a `render.ui`, so that distinction breaks down quickly. Just maintaining a naming convention for these little bits of UI in the server gets to be a chore. It would be kludgy, but what if we could suply lambdas instead of names?
 
 ## Refactoring: values vs. reactive values
 
@@ -63,6 +63,10 @@ I've had to tweak the CSS a few times:
 The different flavors of "Shiny" are a bit of nuissance when trying to find examples.
 The maturity of Shiny for R means that the vast majority of the examples are for R, even with Python in the search. It would be nice if the docs site remembered that I only want to look at docs for Core.
 
+## It's easy to forget `return`
+
+This is simple, but I was still scratching my head for a while. While there are some cases where returning `None` is intended, is it more more likely to be an error? What if it raised a warning, and an explicit empty string could be returned if that's really what you want?
+
 ## Shiny docs could have better formatting
 
 - https://shiny.posit.co/py/api/core/ui.layout_columns.html: bullet list not rendered correctly.
diff --git a/dp_creator_ii/app/analysis_panel.py b/dp_creator_ii/app/analysis_panel.py
index ab17b60..2a256da 100644
--- a/dp_creator_ii/app/analysis_panel.py
+++ b/dp_creator_ii/app/analysis_panel.py
@@ -5,7 +5,7 @@
 from dp_creator_ii.app.components.inputs import log_slider
 from dp_creator_ii.app.components.column_module import column_ui, column_server
 from dp_creator_ii.utils.csv_helper import read_field_names
-from dp_creator_ii.app.components.outputs import output_code_sample
+from dp_creator_ii.app.components.outputs import output_code_sample, demo_tooltip
 from dp_creator_ii.utils.templates import make_privacy_loss_block
 
 
@@ -18,6 +18,7 @@ def analysis_ui():
             "the number of bins for the histogram, "
             "and its relative share of the privacy budget."
         ),
+        ui.output_ui("columns_checkbox_group_tooltip_ui"),
         ui.input_checkbox_group("columns_checkbox_group", None, []),
         ui.output_ui("columns_ui"),
         ui.markdown(
@@ -25,6 +26,7 @@ def analysis_ui():
             "Values above 1 will add less noise to the data, "
             "but have a greater risk of revealing individual data."
         ),
+        ui.output_ui("epsilon_tooltip_ui"),
         log_slider("log_epsilon_slider", 0.1, 10.0),
         ui.output_text("epsilon"),
         output_code_sample("Privacy Loss", "privacy_loss_python"),
@@ -75,6 +77,17 @@ def _on_column_set_change():
         column_ids_selected = input.columns_checkbox_group()
         clear_column_weights(column_ids_selected)
 
+    @render.ui
+    def columns_checkbox_group_tooltip_ui():
+        return demo_tooltip(
+            is_demo,
+            """
+            Not all columns need analysis. For this demo, just check
+            "class_year" and "grade". With more columns selected,
+            each column has a smaller share of the privacy budget.
+            """,
+        )
+
     @render.ui
     def columns_ui():
         column_ids = input.columns_checkbox_group()
@@ -86,6 +99,7 @@ def columns_ui():
                 epsilon=epsilon_calc(),
                 set_column_weight=set_column_weight,
                 get_weights_sum=get_weights_sum,
+                is_demo=is_demo,
             )
         return [
             [
@@ -103,6 +117,17 @@ def csv_fields_calc():
     def csv_fields():
         return csv_fields_calc()
 
+    @render.ui
+    def epsilon_tooltip_ui():
+        return demo_tooltip(
+            is_demo,
+            """
+            If you set epsilon above one, you'll see that the distribution
+            becomes less noisy, and the confidence intervals become smaller...
+            but increased accuracy risks revealing personal information.
+            """,
+        )
+
     @reactive.calc
     def epsilon_calc():
         return pow(10, input.log_epsilon_slider())
diff --git a/dp_creator_ii/app/components/column_module.py b/dp_creator_ii/app/components/column_module.py
index 7bc95d8..c27e51d 100644
--- a/dp_creator_ii/app/components/column_module.py
+++ b/dp_creator_ii/app/components/column_module.py
@@ -5,7 +5,7 @@
 from dp_creator_ii.utils.dp_helper import make_confidence_accuracy_histogram
 from dp_creator_ii.app.components.plots import plot_histogram
 from dp_creator_ii.utils.templates import make_column_config_block
-from dp_creator_ii.app.components.outputs import output_code_sample
+from dp_creator_ii.app.components.outputs import output_code_sample, demo_tooltip
 
 
 @module.ui
@@ -13,9 +13,12 @@ def column_ui():  # pragma: no cover
     width = "10em"  # Just wide enough so the text isn't trucated.
     return ui.layout_columns(
         [
+            ui.output_ui("bounds_tooltip_ui"),
             ui.input_numeric("min", "Min", 0, width=width),
             ui.input_numeric("max", "Max", 10, width=width),
+            ui.output_ui("bins_tooltip_ui"),
             ui.input_numeric("bins", "Bins", 10, width=width),
+            ui.output_ui("weight_tooltip_ui"),
             ui.input_select(
                 "weight",
                 "Weight",
@@ -60,6 +63,7 @@ def column_server(
     epsilon,
     set_column_weight,
     get_weights_sum,
+    is_demo,
 ):  # pragma: no cover
     @reactive.effect
     @reactive.event(input.weight)
@@ -75,6 +79,44 @@ def column_config():
             "weight": float(input.weight()),
         }
 
+    @render.ui
+    def bounds_tooltip_ui():
+        return demo_tooltip(
+            is_demo,
+            """
+            DP requires that we limit the sensitivity to the contributions
+            of any individual. To do this, we need an estimate of the lower
+            and upper bounds for each variable. We should not look at the
+            data when estimating the bounds! In this case, we could imagine
+            that "class year" would vary between 1 and 4, and we could limit
+            "grade" to values between 50 and 100.
+            """,
+        )
+
+    @render.ui
+    def bins_tooltip_ui():
+        return demo_tooltip(
+            is_demo,
+            """
+            Different statistics can be measured with DP.
+            This tool provides a histogram. If you increase the number of bins,
+            you'll see that each individual bin becomes noisier to provide
+            the same overall privacy guarantee. For this example, give
+            "class_year" 4 bins and "grade" 5 bins.
+            """,
+        )
+
+    @render.ui
+    def weight_tooltip_ui():
+        return demo_tooltip(
+            is_demo,
+            """
+            You have a finite privacy budget, but you can choose
+            how to allocate it. For simplicity, we limit the options here,
+            but when using the library you can fine tune this.
+            """,
+        )
+
     @render.code
     def column_code():
         config = column_config()