resolve conflict in test_app but commenting out conflict

opendp · Dec 2, 2024 · 588280a · 588280a
2 parents b5d7c17 + 55bd929
commit 588280a
Show file tree

Hide file tree

Showing 4 changed files with 114 additions and 85 deletions.
diff --git a/dp_wizard/app/analysis_panel.py b/dp_wizard/app/analysis_panel.py
@@ -9,33 +9,67 @@
 from dp_wizard.utils.dp_helper import confidence
 from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
 from dp_wizard.utils.code_generators import make_privacy_loss_block
-from dp_wizard.app.components.column_module import col_widths
 
 
 def analysis_ui():
     return ui.nav_panel(
         "Define Analysis",
-        ui.markdown(
-            "Select numeric columns of interest, "
-            "and for each numeric column indicate the expected range, "
-            "the number of bins for the histogram, "
-            "and its relative share of the privacy budget."
-        ),
-        ui.input_checkbox_group(
-            "columns_checkbox_group",
-            ["Columns", ui.output_ui("columns_checkbox_group_tooltip_ui")],
-            [],
+        ui.layout_columns(
+            ui.card(
+                ui.card_header("Columns"),
+                ui.markdown(
+                    "Select numeric columns of interest, "
+                    "and for each numeric column indicate the expected range, "
+                    "the number of bins for the histogram, "
+                    "and its relative share of the privacy budget."
+                ),
+                ui.input_checkbox_group(
+                    "columns_checkbox_group",
+                    ["Columns", ui.output_ui("columns_checkbox_group_tooltip_ui")],
+                    [],
+                ),
+            ),
+            ui.card(
+                ui.card_header("Privacy Budget"),
+                ui.markdown(
+                    "What is your privacy budget for this release? "
+                    "Values above 1 will add less noise to the data, "
+                    "but have a greater risk of revealing individual data."
+                ),
+                ui.output_ui("epsilon_tooltip_ui"),
+                log_slider("log_epsilon_slider", 0.1, 10.0),
+                ui.output_text("epsilon_text"),
+                output_code_sample("Privacy Loss", "privacy_loss_python"),
+            ),
+            ui.card(
+                ui.card_header("Simulation"),
+                ui.markdown(
+                    f"""
+                    This simulation will assume a normal distribution
+                    between the specified lower and upper bounds.
+                    Until you make a release, your CSV will not be
+                    read except to determine the columns.
+
+                    The actual value is within the error bar
+                    with {int(confidence * 100)}% confidence.
+                    """
+                ),
+                ui.markdown(
+                    """
+                    What is the approximate number of rows in the dataset?
+                    This number is only used for the simulation
+                    and not the final calculation.
+                    """
+                ),
+                ui.input_select(
+                    "row_count",
+                    "Estimated Rows",
+                    choices=["100", "1000", "10000"],
+                    selected="100",
+                ),
+            ),
         ),
         ui.output_ui("columns_ui"),
-        ui.markdown(
-            "What is your privacy budget for this release? "
-            "Values above 1 will add less noise to the data, "
-            "but have a greater risk of revealing individual data."
-        ),
-        ui.output_ui("epsilon_tooltip_ui"),
-        log_slider("log_epsilon_slider", 0.1, 10.0),
-        ui.output_text("epsilon_text"),
-        output_code_sample("Privacy Loss", "privacy_loss_python"),
         ui.output_ui("download_results_button_ui"),
         value="analysis_panel",
     )
@@ -101,48 +135,21 @@ def columns_checkbox_group_tooltip_ui():
     def columns_ui():
         column_ids = input.columns_checkbox_group()
         column_ids_to_names = csv_ids_names_calc()
-        column_ids_to_labels = csv_ids_labels_calc()
         for column_id in column_ids:
             column_server(
                 column_id,
                 name=column_ids_to_names[column_id],
                 contributions=contributions(),
                 epsilon=epsilon(),
+                row_count=int(input.row_count()),
                 lower_bounds=lower_bounds,
                 upper_bounds=upper_bounds,
                 bin_counts=bin_counts,
                 weights=weights,
                 is_demo=is_demo,
                 is_single_column=len(column_ids) == 1,
             )
-        confidence_percent = f"{int(confidence * 100)}%"
-        note_md = f"""
-        This simulation assumes a normal distribution between the specified
-        lower and upper bounds. Your CSV has not been read except to
-        determine the columns.
-
-        The confidence interval is {confidence_percent}.
-        """
-        return [
-            [
-                [
-                    ui.h3(column_ids_to_labels[column_id]),
-                    column_ui(column_id),
-                ]
-                for column_id in column_ids
-            ],
-            [
-                (
-                    ui.layout_columns(
-                        [],
-                        [ui.markdown(note_md)],
-                        col_widths=col_widths,  # type: ignore
-                    )
-                    if column_ids
-                    else []
-                )
-            ],
-        ]
+        return [column_ui(column_id) for column_id in column_ids]
 
     @reactive.calc
     def csv_ids_names_calc():

diff --git a/dp_wizard/app/components/column_module.py b/dp_wizard/app/components/column_module.py
@@ -10,39 +10,46 @@
 
 default_weight = "2"
 label_width = "10em"  # Just wide enough so the text isn't trucated.
-col_widths = {
-    # Controls stay roughly a constant width;
-    # Graph expands to fill space.
-    "sm": [4, 8],
-    "md": [3, 9],
-    "lg": [2, 10],
-}
 
 
 @module.ui
 def column_ui():  # pragma: no cover
-    return ui.layout_columns(
-        [
-            # The initial values on these inputs
-            # should be overridden by the reactive.effect.
-            ui.input_numeric(
-                "lower",
-                ["Lower", ui.output_ui("bounds_tooltip_ui")],
-                0,
-                width=label_width,
-            ),
-            ui.input_numeric("upper", "Upper", 0, width=label_width),
-            ui.input_numeric(
-                "bins", ["Bins", ui.output_ui("bins_tooltip_ui")], 0, width=label_width
-            ),
-            ui.output_ui("optional_weight_ui"),
-        ],
-        [
-            ui.output_plot("column_plot", height="300px"),
-            # Make plot smaller than default: about the same size as the other column.
-            output_code_sample("Column Definition", "column_code"),
-        ],
-        col_widths=col_widths,  # type: ignore
+    col_widths = {
+        # Controls stay roughly a constant width;
+        # Graph expands to fill space.
+        "sm": [4, 8],
+        "md": [3, 9],
+        "lg": [2, 10],
+    }
+    return ui.card(
+        ui.card_header(ui.output_text("card_header")),
+        ui.layout_columns(
+            [
+                # The initial values on these inputs
+                # should be overridden by the reactive.effect.
+                ui.input_numeric(
+                    "lower",
+                    ["Lower", ui.output_ui("bounds_tooltip_ui")],
+                    0,
+                    width=label_width,
+                ),
+                ui.input_numeric("upper", "Upper", 0, width=label_width),
+                ui.input_numeric(
+                    "bins",
+                    ["Bins", ui.output_ui("bins_tooltip_ui")],
+                    0,
+                    width=label_width,
+                ),
+                ui.output_ui("optional_weight_ui"),
+            ],
+            [
+                ui.output_plot("column_plot", height="300px"),
+                # Make plot smaller than default:
+                # about the same size as the other column.
+                output_code_sample("Column Definition", "column_code"),
+            ],
+            col_widths=col_widths,  # type: ignore
+        ),
     )
 
 
@@ -54,6 +61,7 @@ def column_server(
     name: str,
     contributions: int,
     epsilon: float,
+    row_count: int,
     lower_bounds: reactive.Value[dict[str, float]],
     upper_bounds: reactive.Value[dict[str, float]],
     bin_counts: reactive.Value[dict[str, int]],
@@ -89,6 +97,10 @@ def _set_bins():
     def _set_weight():
         weights.set({**weights(), name: input.weight()})
 
+    @render.text
+    def card_header():
+        return name
+
     @render.ui
     def bounds_tooltip_ui():
         return demo_tooltip(
@@ -166,6 +178,7 @@ def column_plot():
             # Exit early to avoid divide-by-zero.
             return None
         accuracy, histogram = make_accuracy_histogram(
+            row_count=row_count,
             lower=lower_x,
             upper=upper_x,
             bin_count=bin_count,

diff --git a/dp_wizard/utils/dp_helper.py b/dp_wizard/utils/dp_helper.py
@@ -13,6 +13,7 @@
 
 
 def make_accuracy_histogram(
+    row_count: int,
     lower: float,
     upper: float,
     bin_count: int,
@@ -22,7 +23,12 @@ def make_accuracy_histogram(
     """
     Creates fake data between lower and upper, and then returns a DP histogram from it.
     >>> accuracy, histogram = make_accuracy_histogram(
-    ...     lower=0, upper=10, bin_count=5, contributions=1, weighted_epsilon=1)
+    ...     row_count=100,
+    ...     lower=0, upper=10,
+    ...     bin_count=5,
+    ...     contributions=1,
+    ...     weighted_epsilon=1
+    ... )
     >>> accuracy
     3.37...
     >>> histogram
@@ -42,7 +48,6 @@ def make_accuracy_histogram(
     # Mock data only depends on lower and upper bounds, so it could be cached,
     # but I'd guess this is dominated by the DP operations,
     # so not worth optimizing.
-    row_count = 100
     df = mock_data({"value": ColumnDef(lower, upper)}, row_count=row_count)
 
     # TODO: When this is stable, merge it to templates, so we can be

diff --git a/tests/test_app.py b/tests/test_app.py
@@ -9,7 +9,7 @@
 default_app = create_app_fixture(Path(__file__).parent / "fixtures/default_app.py")
 tooltip = "#choose_csv_demo_tooltip_ui svg"
 for_the_demo = "For the demo, we'll imagine"
-simulation = "This simulation assumes a normal distribution"
+simulation = "This simulation will assume a normal distribution"
 
 
 # TODO: Why is incomplete coverage reported here?
@@ -85,6 +85,10 @@ def expect_no_error():
     download_results_button = page.get_by_role("button", name="Download results")
     assert download_results_button.is_disabled()
 
+    # Currently the only change when the estimated rows changes is the plot,
+    # but we could have the confidence interval in the text...
+    page.get_by_label("Estimated Rows").select_option("1000")
+
     # Set column details:
     page.get_by_label("grade").check()
     expect_visible(simulation)
@@ -96,15 +100,15 @@ def expect_no_error():
     page.get_by_label("Upper").fill(new_value)
     # Uncheck the column:
     page.get_by_label("grade").uncheck()
-    expect_not_visible(simulation)
+    expect_visible(simulation)
     # Recheck the column:
     page.get_by_label("grade").check()
     expect_visible(simulation)
     assert page.get_by_label("Upper").input_value() == new_value
     # Add a second column:
-    page.get_by_label("blank").check()
-    # TODO: Flaky test
-    # expect_visible("Weight")
+    # page.get_by_label("blank").check()
+    # TODO: Test is flaky?
+    # expect(page.get_by_text("Weight")).to_have_count(2)
     # TODO: Setting more inputs without checking for updates
     # causes recalculations to pile up, and these cause timeouts on CI:
     # It is still rerendering the graph after hitting "Download results".