Skip to content

Commit

Permalink
Let user set number of rows in simulated data (#183)
Browse files Browse the repository at this point in the history
* add selector for row count

* add to test; fix fragile test
  • Loading branch information
mccalluc authored Nov 26, 2024
1 parent 2b42029 commit 006643a
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 4 deletions.
13 changes: 12 additions & 1 deletion dp_wizard/app/analysis_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ def analysis_ui():
[],
),
ui.output_ui("columns_ui"),
ui.markdown(
"What is the approximate number of rows in the dataset? "
"This number is only used for the simulation and not the final calculation."
),
ui.input_select(
"row_count",
"Estimated Rows",
choices=["100", "1000", "10000"],
selected="100",
),
ui.markdown(
"What is your privacy budget for this release? "
"Values above 1 will add less noise to the data, "
Expand Down Expand Up @@ -108,6 +118,7 @@ def columns_ui():
name=column_ids_to_names[column_id],
contributions=contributions(),
epsilon=epsilon(),
row_count=int(input.row_count()),
lower_bounds=lower_bounds,
upper_bounds=upper_bounds,
bin_counts=bin_counts,
Expand Down Expand Up @@ -135,7 +146,7 @@ def columns_ui():
(
ui.layout_columns(
[],
[ui.markdown(note_md)],
ui.markdown(note_md),
col_widths=col_widths, # type: ignore
)
if column_ids
Expand Down
2 changes: 2 additions & 0 deletions dp_wizard/app/components/column_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def column_server(
name: str,
contributions: int,
epsilon: float,
row_count: int,
lower_bounds: reactive.Value[dict[str, float]],
upper_bounds: reactive.Value[dict[str, float]],
bin_counts: reactive.Value[dict[str, int]],
Expand Down Expand Up @@ -166,6 +167,7 @@ def column_plot():
# Exit early to avoid divide-by-zero.
return None
accuracy, histogram = make_accuracy_histogram(
row_count=row_count,
lower=lower_x,
upper=upper_x,
bin_count=bin_count,
Expand Down
9 changes: 7 additions & 2 deletions dp_wizard/utils/dp_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@


def make_accuracy_histogram(
row_count: int,
lower: float,
upper: float,
bin_count: int,
Expand All @@ -22,7 +23,12 @@ def make_accuracy_histogram(
"""
Creates fake data between lower and upper, and then returns a DP histogram from it.
>>> accuracy, histogram = make_accuracy_histogram(
... lower=0, upper=10, bin_count=5, contributions=1, weighted_epsilon=1)
... row_count=100,
... lower=0, upper=10,
... bin_count=5,
... contributions=1,
... weighted_epsilon=1
... )
>>> accuracy
3.37...
>>> histogram
Expand All @@ -42,7 +48,6 @@ def make_accuracy_histogram(
# Mock data only depends on lower and upper bounds, so it could be cached,
# but I'd guess this is dominated by the DP operations,
# so not worth optimizing.
row_count = 100
df = mock_data({"value": ColumnDef(lower, upper)}, row_count=row_count)

# TODO: When this is stable, merge it to templates, so we can be
Expand Down
6 changes: 5 additions & 1 deletion tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ def expect_no_error():
download_results_button = page.get_by_role("button", name="Download results")
assert download_results_button.is_disabled()

# Currently the only change when the estimated rows changes is the plot,
# but we could have the confidence interval in the text...
page.get_by_label("Estimated Rows").select_option("1000")

# Set column details:
page.get_by_label("grade").check()
expect_visible(simulation)
Expand All @@ -103,7 +107,7 @@ def expect_no_error():
assert page.get_by_label("Upper").input_value() == new_value
# Add a second column:
page.get_by_label("blank").check()
expect_visible("Weight")
expect(page.get_by_text("Weight")).to_have_count(2)
# TODO: Setting more inputs without checking for updates
# causes recalculations to pile up, and these cause timeouts on CI:
# It is still rerendering the graph after hitting "Download results".
Expand Down

0 comments on commit 006643a

Please sign in to comment.