Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create mock data and plot #35

Merged
merged 17 commits into from
Oct 10, 2024
Merged
20 changes: 19 additions & 1 deletion dp_creator_ii/app/analysis_panel.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,34 @@
from shiny import ui, reactive
from shiny import ui, reactive, render

from dp_creator_ii.mock_data import mock_data, ColumnDef
from dp_creator_ii.app.plots import plot_error_bars_with_cutoff


def analysis_ui():
return ui.nav_panel(
"Define Analysis",
"TODO: Define analysis",
ui.output_plot("plot_preview"),
"(This plot is only to demonstrate that plotting works.)",
ui.input_action_button("go_to_results", "Download results"),
value="analysis_panel",
)


def analysis_server(input, output, session):
@render.plot()
def plot_preview():
min_x = 0
max_x = 100
df = mock_data({"col_0_100": ColumnDef(min_x, max_x)}, row_count=20)
return plot_error_bars_with_cutoff(
df["col_0_100"].to_list(),
x_min_label=min_x,
x_max_label=max_x,
y_cutoff=30,
ekraffmiller marked this conversation as resolved.
Show resolved Hide resolved
y_error=5,
)

@reactive.effect
@reactive.event(input.go_to_results)
def go_to_results():
Expand Down
40 changes: 40 additions & 0 deletions dp_creator_ii/app/plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import matplotlib.pyplot as plt
import numpy as np


def plot_error_bars_with_cutoff(
y_values, x_min_label="min", x_max_label="max", y_cutoff=0, y_error=0
):
x_values = 0.5 + np.arange(len(y_values))
x_values_above = []
x_values_below = []
y_values_above = []
y_values_below = []
for x, y in zip(x_values, y_values):
if y < y_cutoff:
x_values_below.append(x)
y_values_below.append(y)
else:
x_values_above.append(x)
y_values_above.append(y)

figure, axes = plt.subplots()
color = "skyblue"
shared = {
"width": 0.8,
"edgecolor": color,
"linewidth": 1,
"yerr": y_error,
}
axes.bar(x_values_above, y_values_above, color=color, **shared)
axes.bar(x_values_below, y_values_below, color="white", **shared)
axes.hlines([y_cutoff], 0, len(y_values), colors=["black"], linestyles=["dotted"])

axes.set(xlim=(0, len(y_values)), ylim=(0, max(y_values)))
axes.get_xaxis().set_ticks(
ticks=[x_values[0], x_values[-1]],
labels=[x_min_label, x_max_label],
)
axes.get_yaxis().set_ticks([])

return figure
28 changes: 28 additions & 0 deletions dp_creator_ii/mock_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from collections import namedtuple
import polars as pl
from scipy.stats import norm # type: ignore

ColumnDef = namedtuple("ColumnDef", ["min", "max"])


def mock_data(column_defs, row_count=1000):
schema = {column_name: float for column_name in column_defs.keys()}
data = {column_name: [] for column_name in column_defs.keys()}

# The details here don't really matter: Any method that
# deterministically gave us more values in the middle of the range
# and fewer at the extremes would do.
quantile_width = 95 / 100
for column_name, column_def in column_defs.items():
min_ppf = norm.ppf((1 - quantile_width) / 2)
max_ppf = norm.ppf(1 - (1 - quantile_width) / 2)
min_value = column_def.min
max_value = column_def.max
slope = (max_value - min_value) / (max_ppf - min_ppf)
intercept = min_value - slope * min_ppf
for i in range(row_count):
quantile = (quantile_width * i / (row_count - 1)) + (1 - quantile_width) / 2
ppf = norm.ppf(quantile)
value = slope * ppf + intercept
data[column_name].append(value)
return pl.DataFrame(data=data, schema=schema)
15 changes: 15 additions & 0 deletions dp_creator_ii/tests/test_mock_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import polars as pl

from dp_creator_ii.mock_data import mock_data, ColumnDef


def test_mock_data():
col_0_100 = ColumnDef(0, 100)
col_neg_pos = ColumnDef(-10, 10)
df = mock_data({"col_0_100": col_0_100, "col_neg_pos": col_neg_pos})

assert df.select(pl.len()).item() == 1000
assert df.get_column("col_0_100")[0] == 0
assert df.get_column("col_0_100")[999] == 100
assert df.get_column("col_neg_pos")[0] == -10
assert df.get_column("col_neg_pos")[999] == 10
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ dynamic = ["version", "description"]
dependencies = [
"shiny",
"shinywidgets",
"matplotlib",
"opendp[polars]",
"jupytext",
"jupyter-client",
Expand Down
3 changes: 3 additions & 0 deletions requirements-dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,6 @@ ipykernel
# Shiny:
shiny
shinywidgets

# Visualization:
matplotlib
18 changes: 18 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,12 @@ comm==0.2.2
# via
# ipykernel
# ipywidgets
contourpy==1.3.0
# via matplotlib
coverage==7.6.1
# via -r requirements-dev.in
cycler==0.12.1
# via matplotlib
debugpy==1.8.6
# via ipykernel
decorator==5.1.1
Expand All @@ -73,6 +77,8 @@ flit==3.9.0
# via -r requirements-dev.in
flit-core==3.9.0
# via flit
fonttools==4.54.1
# via matplotlib
greenlet==3.0.3
# via playwright
h11==0.14.0
Expand Down Expand Up @@ -124,6 +130,8 @@ jupyterlab-widgets==3.0.13
# via ipywidgets
jupytext==1.16.4
# via -r requirements-dev.in
kiwisolver==1.4.7
# via matplotlib
linkify-it-py==2.0.3
# via shiny
markdown-it-py==3.0.0
Expand All @@ -135,6 +143,8 @@ markupsafe==2.1.5
# via
# jinja2
# nbconvert
matplotlib==3.9.2
# via -r requirements-dev.in
matplotlib-inline==0.1.7
# via
# ipykernel
Expand Down Expand Up @@ -170,6 +180,8 @@ nodeenv==1.9.1
# via pre-commit
numpy==1.26.4
# via
# contourpy
# matplotlib
# opendp
# pyarrow
# randomgen
Expand All @@ -184,6 +196,7 @@ packaging==24.1
# htmltools
# ipykernel
# jupytext
# matplotlib
# nbconvert
# pytest
# shiny
Expand All @@ -195,6 +208,8 @@ pathspec==0.12.1
# via black
pexpect==4.9.0
# via ipython
pillow==10.4.0
# via matplotlib
pip-tools==7.4.1
# via -r requirements-dev.in
platformdirs==4.3.6
Expand Down Expand Up @@ -233,6 +248,8 @@ pygments==2.18.0
# via
# ipython
# nbconvert
pyparsing==3.1.4
# via matplotlib
pyproject-hooks==1.1.0
# via
# build
Expand All @@ -249,6 +266,7 @@ pytest-playwright==0.5.2
python-dateutil==2.9.0.post0
# via
# jupyter-client
# matplotlib
# shinywidgets
python-multipart==0.0.9
# via shiny
Expand Down