Skip to content

Commit

Permalink
resolve conflict in test_app but commenting out conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
mccalluc committed Dec 2, 2024
2 parents b5d7c17 + 55bd929 commit 588280a
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 85 deletions.
105 changes: 56 additions & 49 deletions dp_wizard/app/analysis_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,67 @@
from dp_wizard.utils.dp_helper import confidence
from dp_wizard.app.components.outputs import output_code_sample, demo_tooltip
from dp_wizard.utils.code_generators import make_privacy_loss_block
from dp_wizard.app.components.column_module import col_widths


def analysis_ui():
return ui.nav_panel(
"Define Analysis",
ui.markdown(
"Select numeric columns of interest, "
"and for each numeric column indicate the expected range, "
"the number of bins for the histogram, "
"and its relative share of the privacy budget."
),
ui.input_checkbox_group(
"columns_checkbox_group",
["Columns", ui.output_ui("columns_checkbox_group_tooltip_ui")],
[],
ui.layout_columns(
ui.card(
ui.card_header("Columns"),
ui.markdown(
"Select numeric columns of interest, "
"and for each numeric column indicate the expected range, "
"the number of bins for the histogram, "
"and its relative share of the privacy budget."
),
ui.input_checkbox_group(
"columns_checkbox_group",
["Columns", ui.output_ui("columns_checkbox_group_tooltip_ui")],
[],
),
),
ui.card(
ui.card_header("Privacy Budget"),
ui.markdown(
"What is your privacy budget for this release? "
"Values above 1 will add less noise to the data, "
"but have a greater risk of revealing individual data."
),
ui.output_ui("epsilon_tooltip_ui"),
log_slider("log_epsilon_slider", 0.1, 10.0),
ui.output_text("epsilon_text"),
output_code_sample("Privacy Loss", "privacy_loss_python"),
),
ui.card(
ui.card_header("Simulation"),
ui.markdown(
f"""
This simulation will assume a normal distribution
between the specified lower and upper bounds.
Until you make a release, your CSV will not be
read except to determine the columns.
The actual value is within the error bar
with {int(confidence * 100)}% confidence.
"""
),
ui.markdown(
"""
What is the approximate number of rows in the dataset?
This number is only used for the simulation
and not the final calculation.
"""
),
ui.input_select(
"row_count",
"Estimated Rows",
choices=["100", "1000", "10000"],
selected="100",
),
),
),
ui.output_ui("columns_ui"),
ui.markdown(
"What is your privacy budget for this release? "
"Values above 1 will add less noise to the data, "
"but have a greater risk of revealing individual data."
),
ui.output_ui("epsilon_tooltip_ui"),
log_slider("log_epsilon_slider", 0.1, 10.0),
ui.output_text("epsilon_text"),
output_code_sample("Privacy Loss", "privacy_loss_python"),
ui.output_ui("download_results_button_ui"),
value="analysis_panel",
)
Expand Down Expand Up @@ -101,48 +135,21 @@ def columns_checkbox_group_tooltip_ui():
def columns_ui():
column_ids = input.columns_checkbox_group()
column_ids_to_names = csv_ids_names_calc()
column_ids_to_labels = csv_ids_labels_calc()
for column_id in column_ids:
column_server(
column_id,
name=column_ids_to_names[column_id],
contributions=contributions(),
epsilon=epsilon(),
row_count=int(input.row_count()),
lower_bounds=lower_bounds,
upper_bounds=upper_bounds,
bin_counts=bin_counts,
weights=weights,
is_demo=is_demo,
is_single_column=len(column_ids) == 1,
)
confidence_percent = f"{int(confidence * 100)}%"
note_md = f"""
This simulation assumes a normal distribution between the specified
lower and upper bounds. Your CSV has not been read except to
determine the columns.
The confidence interval is {confidence_percent}.
"""
return [
[
[
ui.h3(column_ids_to_labels[column_id]),
column_ui(column_id),
]
for column_id in column_ids
],
[
(
ui.layout_columns(
[],
[ui.markdown(note_md)],
col_widths=col_widths, # type: ignore
)
if column_ids
else []
)
],
]
return [column_ui(column_id) for column_id in column_ids]

@reactive.calc
def csv_ids_names_calc():
Expand Down
71 changes: 42 additions & 29 deletions dp_wizard/app/components/column_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,39 +10,46 @@

default_weight = "2"
label_width = "10em" # Just wide enough so the text isn't trucated.
col_widths = {
# Controls stay roughly a constant width;
# Graph expands to fill space.
"sm": [4, 8],
"md": [3, 9],
"lg": [2, 10],
}


@module.ui
def column_ui(): # pragma: no cover
return ui.layout_columns(
[
# The initial values on these inputs
# should be overridden by the reactive.effect.
ui.input_numeric(
"lower",
["Lower", ui.output_ui("bounds_tooltip_ui")],
0,
width=label_width,
),
ui.input_numeric("upper", "Upper", 0, width=label_width),
ui.input_numeric(
"bins", ["Bins", ui.output_ui("bins_tooltip_ui")], 0, width=label_width
),
ui.output_ui("optional_weight_ui"),
],
[
ui.output_plot("column_plot", height="300px"),
# Make plot smaller than default: about the same size as the other column.
output_code_sample("Column Definition", "column_code"),
],
col_widths=col_widths, # type: ignore
col_widths = {
# Controls stay roughly a constant width;
# Graph expands to fill space.
"sm": [4, 8],
"md": [3, 9],
"lg": [2, 10],
}
return ui.card(
ui.card_header(ui.output_text("card_header")),
ui.layout_columns(
[
# The initial values on these inputs
# should be overridden by the reactive.effect.
ui.input_numeric(
"lower",
["Lower", ui.output_ui("bounds_tooltip_ui")],
0,
width=label_width,
),
ui.input_numeric("upper", "Upper", 0, width=label_width),
ui.input_numeric(
"bins",
["Bins", ui.output_ui("bins_tooltip_ui")],
0,
width=label_width,
),
ui.output_ui("optional_weight_ui"),
],
[
ui.output_plot("column_plot", height="300px"),
# Make plot smaller than default:
# about the same size as the other column.
output_code_sample("Column Definition", "column_code"),
],
col_widths=col_widths, # type: ignore
),
)


Expand All @@ -54,6 +61,7 @@ def column_server(
name: str,
contributions: int,
epsilon: float,
row_count: int,
lower_bounds: reactive.Value[dict[str, float]],
upper_bounds: reactive.Value[dict[str, float]],
bin_counts: reactive.Value[dict[str, int]],
Expand Down Expand Up @@ -89,6 +97,10 @@ def _set_bins():
def _set_weight():
weights.set({**weights(), name: input.weight()})

@render.text
def card_header():
return name

@render.ui
def bounds_tooltip_ui():
return demo_tooltip(
Expand Down Expand Up @@ -166,6 +178,7 @@ def column_plot():
# Exit early to avoid divide-by-zero.
return None
accuracy, histogram = make_accuracy_histogram(
row_count=row_count,
lower=lower_x,
upper=upper_x,
bin_count=bin_count,
Expand Down
9 changes: 7 additions & 2 deletions dp_wizard/utils/dp_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@


def make_accuracy_histogram(
row_count: int,
lower: float,
upper: float,
bin_count: int,
Expand All @@ -22,7 +23,12 @@ def make_accuracy_histogram(
"""
Creates fake data between lower and upper, and then returns a DP histogram from it.
>>> accuracy, histogram = make_accuracy_histogram(
... lower=0, upper=10, bin_count=5, contributions=1, weighted_epsilon=1)
... row_count=100,
... lower=0, upper=10,
... bin_count=5,
... contributions=1,
... weighted_epsilon=1
... )
>>> accuracy
3.37...
>>> histogram
Expand All @@ -42,7 +48,6 @@ def make_accuracy_histogram(
# Mock data only depends on lower and upper bounds, so it could be cached,
# but I'd guess this is dominated by the DP operations,
# so not worth optimizing.
row_count = 100
df = mock_data({"value": ColumnDef(lower, upper)}, row_count=row_count)

# TODO: When this is stable, merge it to templates, so we can be
Expand Down
14 changes: 9 additions & 5 deletions tests/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
default_app = create_app_fixture(Path(__file__).parent / "fixtures/default_app.py")
tooltip = "#choose_csv_demo_tooltip_ui svg"
for_the_demo = "For the demo, we'll imagine"
simulation = "This simulation assumes a normal distribution"
simulation = "This simulation will assume a normal distribution"


# TODO: Why is incomplete coverage reported here?
Expand Down Expand Up @@ -85,6 +85,10 @@ def expect_no_error():
download_results_button = page.get_by_role("button", name="Download results")
assert download_results_button.is_disabled()

# Currently the only change when the estimated rows changes is the plot,
# but we could have the confidence interval in the text...
page.get_by_label("Estimated Rows").select_option("1000")

# Set column details:
page.get_by_label("grade").check()
expect_visible(simulation)
Expand All @@ -96,15 +100,15 @@ def expect_no_error():
page.get_by_label("Upper").fill(new_value)
# Uncheck the column:
page.get_by_label("grade").uncheck()
expect_not_visible(simulation)
expect_visible(simulation)
# Recheck the column:
page.get_by_label("grade").check()
expect_visible(simulation)
assert page.get_by_label("Upper").input_value() == new_value
# Add a second column:
page.get_by_label("blank").check()
# TODO: Flaky test
# expect_visible("Weight")
# page.get_by_label("blank").check()
# TODO: Test is flaky?
# expect(page.get_by_text("Weight")).to_have_count(2)
# TODO: Setting more inputs without checking for updates
# causes recalculations to pile up, and these cause timeouts on CI:
# It is still rerendering the graph after hitting "Download results".
Expand Down

0 comments on commit 588280a

Please sign in to comment.