From 73e27090631e8d746b690b9f6d9e943953054aaa Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 2 Oct 2024 13:46:55 -0400 Subject: [PATCH 01/15] make dataframe, still need inverse cdf --- dp_creator_ii/mock_data.py | 15 +++++++++++++++ dp_creator_ii/tests/test_mock_data.py | 9 +++++++++ 2 files changed, 24 insertions(+) create mode 100644 dp_creator_ii/mock_data.py create mode 100644 dp_creator_ii/tests/test_mock_data.py diff --git a/dp_creator_ii/mock_data.py b/dp_creator_ii/mock_data.py new file mode 100644 index 0000000..2ce2b6d --- /dev/null +++ b/dp_creator_ii/mock_data.py @@ -0,0 +1,15 @@ +from collections import namedtuple +import polars as pl + +ColumnDef = namedtuple("ColumnDef", ["min", "max"]) + + +def mock_data(column_defs, row_count=1000): + schema = {column_name: float for column_name in column_defs.keys()} + data = {column_name: [] for column_name in column_defs.keys()} + for i in range(row_count): + for column_name, column_def in column_defs.items(): + scale = column_def.max - column_def.min + value = scale * i / row_count + column_def.min + data[column_name].append(value) + return pl.DataFrame(data=data, schema=schema) diff --git a/dp_creator_ii/tests/test_mock_data.py b/dp_creator_ii/tests/test_mock_data.py new file mode 100644 index 0000000..1afa516 --- /dev/null +++ b/dp_creator_ii/tests/test_mock_data.py @@ -0,0 +1,9 @@ +from dp_creator_ii.mock_data import mock_data, ColumnDef + + +def test_mock_data(): + col_0_100 = ColumnDef(0, 100) + col_neg_pos = ColumnDef(-10, 10) + df = mock_data({"col_0_100": col_0_100, "col_neg_pos": col_neg_pos}, row_count=5) + print(df) + assert df == None From a610fe640af4c963f648fc68002acad6a8a467b7 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 2 Oct 2024 15:14:51 -0400 Subject: [PATCH 02/15] use norm --- dp_creator_ii/mock_data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dp_creator_ii/mock_data.py b/dp_creator_ii/mock_data.py index 2ce2b6d..2e82462 100644 --- a/dp_creator_ii/mock_data.py +++ b/dp_creator_ii/mock_data.py @@ -1,5 +1,6 @@ from collections import namedtuple import polars as pl +from scipy.stats import norm ColumnDef = namedtuple("ColumnDef", ["min", "max"]) @@ -9,7 +10,7 @@ def mock_data(column_defs, row_count=1000): data = {column_name: [] for column_name in column_defs.keys()} for i in range(row_count): for column_name, column_def in column_defs.items(): - scale = column_def.max - column_def.min - value = scale * i / row_count + column_def.min + quantile = i / row_count / 2 + 0.25 # ie, 25th to 75th percentiles + value = norm.ppf(quantile) data[column_name].append(value) return pl.DataFrame(data=data, schema=schema) From a3670ee69afe1e48f19ebeb4f04fd076400eb9ec Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 2 Oct 2024 17:18:31 -0400 Subject: [PATCH 03/15] math is right --- dp_creator_ii/mock_data.py | 10 +++++++--- dp_creator_ii/tests/test_mock_data.py | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/dp_creator_ii/mock_data.py b/dp_creator_ii/mock_data.py index 2e82462..9a43f22 100644 --- a/dp_creator_ii/mock_data.py +++ b/dp_creator_ii/mock_data.py @@ -8,9 +8,13 @@ def mock_data(column_defs, row_count=1000): schema = {column_name: float for column_name in column_defs.keys()} data = {column_name: [] for column_name in column_defs.keys()} - for i in range(row_count): + for i in range(row_count + 1): for column_name, column_def in column_defs.items(): - quantile = i / row_count / 2 + 0.25 # ie, 25th to 75th percentiles - value = norm.ppf(quantile) + quantile_width = 2 / 3 + quantile = (quantile_width * i / row_count) + (1 - quantile_width) / 2 + ppf = norm.ppf(quantile) + scale = column_def.max - column_def.min + center = (column_def.max + column_def.min) / 2 + value = ppf * scale / 2 + center data[column_name].append(value) return pl.DataFrame(data=data, schema=schema) diff --git a/dp_creator_ii/tests/test_mock_data.py b/dp_creator_ii/tests/test_mock_data.py index 1afa516..feda4cb 100644 --- a/dp_creator_ii/tests/test_mock_data.py +++ b/dp_creator_ii/tests/test_mock_data.py @@ -4,6 +4,6 @@ def test_mock_data(): col_0_100 = ColumnDef(0, 100) col_neg_pos = ColumnDef(-10, 10) - df = mock_data({"col_0_100": col_0_100, "col_neg_pos": col_neg_pos}, row_count=5) + df = mock_data({"col_0_100": col_0_100, "col_neg_pos": col_neg_pos}) print(df) assert df == None From a768ce9632e9f283fbf9455dfe4ea1d90a27847f Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 2 Oct 2024 17:21:05 -0400 Subject: [PATCH 04/15] switch loops --- dp_creator_ii/mock_data.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dp_creator_ii/mock_data.py b/dp_creator_ii/mock_data.py index 9a43f22..7f4cbbd 100644 --- a/dp_creator_ii/mock_data.py +++ b/dp_creator_ii/mock_data.py @@ -8,13 +8,14 @@ def mock_data(column_defs, row_count=1000): schema = {column_name: float for column_name in column_defs.keys()} data = {column_name: [] for column_name in column_defs.keys()} - for i in range(row_count + 1): - for column_name, column_def in column_defs.items(): - quantile_width = 2 / 3 + + quantile_width = 2 / 3 + for column_name, column_def in column_defs.items(): + scale = column_def.max - column_def.min + center = (column_def.max + column_def.min) / 2 + for i in range(row_count + 1): quantile = (quantile_width * i / row_count) + (1 - quantile_width) / 2 ppf = norm.ppf(quantile) - scale = column_def.max - column_def.min - center = (column_def.max + column_def.min) / 2 value = ppf * scale / 2 + center data[column_name].append(value) return pl.DataFrame(data=data, schema=schema) From 025167323bf0d0ec5d73c1d8cf971d13285b8eff Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 2 Oct 2024 17:31:41 -0400 Subject: [PATCH 05/15] add a test --- dp_creator_ii/mock_data.py | 4 ++-- dp_creator_ii/tests/test_mock_data.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dp_creator_ii/mock_data.py b/dp_creator_ii/mock_data.py index 7f4cbbd..e235e33 100644 --- a/dp_creator_ii/mock_data.py +++ b/dp_creator_ii/mock_data.py @@ -13,8 +13,8 @@ def mock_data(column_defs, row_count=1000): for column_name, column_def in column_defs.items(): scale = column_def.max - column_def.min center = (column_def.max + column_def.min) / 2 - for i in range(row_count + 1): - quantile = (quantile_width * i / row_count) + (1 - quantile_width) / 2 + for i in range(row_count): + quantile = (quantile_width * i / (row_count - 1)) + (1 - quantile_width) / 2 ppf = norm.ppf(quantile) value = ppf * scale / 2 + center data[column_name].append(value) diff --git a/dp_creator_ii/tests/test_mock_data.py b/dp_creator_ii/tests/test_mock_data.py index feda4cb..80ea5a4 100644 --- a/dp_creator_ii/tests/test_mock_data.py +++ b/dp_creator_ii/tests/test_mock_data.py @@ -1,3 +1,5 @@ +import polars as pl + from dp_creator_ii.mock_data import mock_data, ColumnDef @@ -5,5 +7,4 @@ def test_mock_data(): col_0_100 = ColumnDef(0, 100) col_neg_pos = ColumnDef(-10, 10) df = mock_data({"col_0_100": col_0_100, "col_neg_pos": col_neg_pos}) - print(df) - assert df == None + assert df.select(pl.len()).item() == 1000 From 193e9673706ab9a387ae14e1c137a71b4ed4d38b Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 2 Oct 2024 17:42:55 -0400 Subject: [PATCH 06/15] test mock_data distribution --- dp_creator_ii/tests/test_mock_data.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dp_creator_ii/tests/test_mock_data.py b/dp_creator_ii/tests/test_mock_data.py index 80ea5a4..5db6be9 100644 --- a/dp_creator_ii/tests/test_mock_data.py +++ b/dp_creator_ii/tests/test_mock_data.py @@ -1,4 +1,5 @@ import polars as pl +from pytest import approx from dp_creator_ii.mock_data import mock_data, ColumnDef @@ -7,4 +8,10 @@ def test_mock_data(): col_0_100 = ColumnDef(0, 100) col_neg_pos = ColumnDef(-10, 10) df = mock_data({"col_0_100": col_0_100, "col_neg_pos": col_neg_pos}) + assert df.select(pl.len()).item() == 1000 + assert 0 < df.get_column("col_0_100")[0] < 2 + assert 98 < df.get_column("col_0_100")[999] < 100 + assert df.get_column("col_neg_pos")[0] + df.get_column("col_neg_pos")[ + 999 + ] == approx(0) From c4cc8262bef6fd8ca5d5cf311a78ec4b002681ec Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 2 Oct 2024 17:43:49 -0400 Subject: [PATCH 07/15] reformat --- dp_creator_ii/tests/test_mock_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dp_creator_ii/tests/test_mock_data.py b/dp_creator_ii/tests/test_mock_data.py index 5db6be9..44a99d8 100644 --- a/dp_creator_ii/tests/test_mock_data.py +++ b/dp_creator_ii/tests/test_mock_data.py @@ -12,6 +12,6 @@ def test_mock_data(): assert df.select(pl.len()).item() == 1000 assert 0 < df.get_column("col_0_100")[0] < 2 assert 98 < df.get_column("col_0_100")[999] < 100 - assert df.get_column("col_neg_pos")[0] + df.get_column("col_neg_pos")[ - 999 - ] == approx(0) + assert ( + df.get_column("col_neg_pos")[0] + df.get_column("col_neg_pos")[999] + ) == approx(0) From 67b275d6289f08883efd1ac385c70ba857d2f315 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Wed, 2 Oct 2024 18:01:40 -0400 Subject: [PATCH 08/15] Ignore scipy typing (difficult because we are pinned to an old version) --- dp_creator_ii/mock_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dp_creator_ii/mock_data.py b/dp_creator_ii/mock_data.py index e235e33..dd7a0bb 100644 --- a/dp_creator_ii/mock_data.py +++ b/dp_creator_ii/mock_data.py @@ -1,6 +1,6 @@ from collections import namedtuple import polars as pl -from scipy.stats import norm +from scipy.stats import norm # type: ignore ColumnDef = namedtuple("ColumnDef", ["min", "max"]) From 21a69cb380aa1382f4341edf4fa427ae43c28678 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 3 Oct 2024 11:26:46 -0400 Subject: [PATCH 09/15] add a notebook while we figure out the visualization --- dp_creator_ii/junk.ipynb | 121 +++++++++++++++++++++++++++++++++++++ dp_creator_ii/mock_data.py | 3 + pyproject.toml | 1 + requirements-dev.in | 3 + requirements-dev.txt | 18 ++++++ 5 files changed, 146 insertions(+) create mode 100644 dp_creator_ii/junk.ipynb diff --git a/dp_creator_ii/junk.ipynb b/dp_creator_ii/junk.ipynb new file mode 100644 index 0000000..da343c7 --- /dev/null +++ b/dp_creator_ii/junk.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (20, 2)
col_0_100col_neg_pos
f64f64
1.628922-9.674216
8.231443-8.353711
14.175125-7.164975
19.650658-6.069868
24.783898-5.04322
75.2161025.04322
80.3493426.069868
85.8248757.164975
91.7685578.353711
98.3710789.674216
" + ], + "text/plain": [ + "shape: (20, 2)\n", + "┌───────────┬─────────────┐\n", + "│ col_0_100 ┆ col_neg_pos │\n", + "│ --- ┆ --- │\n", + "│ f64 ┆ f64 │\n", + "╞═══════════╪═════════════╡\n", + "│ 1.628922 ┆ -9.674216 │\n", + "│ 8.231443 ┆ -8.353711 │\n", + "│ 14.175125 ┆ -7.164975 │\n", + "│ 19.650658 ┆ -6.069868 │\n", + "│ 24.783898 ┆ -5.04322 │\n", + "│ … ┆ … │\n", + "│ 75.216102 ┆ 5.04322 │\n", + "│ 80.349342 ┆ 6.069868 │\n", + "│ 85.824875 ┆ 7.164975 │\n", + "│ 91.768557 ┆ 8.353711 │\n", + "│ 98.371078 ┆ 9.674216 │\n", + "└───────────┴─────────────┘" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dp_creator_ii.mock_data import mock_data, ColumnDef\n", + "\n", + "col_0_100 = ColumnDef(0, 100)\n", + "col_neg_pos = ColumnDef(-10, 10)\n", + "df = mock_data({\"col_0_100\": col_0_100, \"col_neg_pos\": col_neg_pos}, row_count=20)\n", + "df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOUAAADqCAYAAABdn4LDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAALxElEQVR4nO3dX2yT9R7H8U/bsw2GHRMn2yoMh0clUdiJRJbFcGG2DLgw+OcCEy8wMRpxXPgvORcnCjnJCf5JvNAQvZN4g8oFGkmOAQeb0QyMuMQYzQJmEcw2FjFI3Zir6/dc7LRSYKPtuj3ftu9XsrB1z55+u4fPft89z7ddyMxMANwIB10AgEyEEnCGUALOEErAGUIJOEMoAWcIJeDM3/L5omQyqaGhIUWjUYVCoULXBBQVM1M8HlcsFlM4PPd1Lq9QDg0NaeXKlXO+c6CUnD17VitWrJjzfvIKZTQalSQNDg5q2bJlcy4iCIlEQocPH1ZnZ6cqKiqCLicvPIaFNTY2plgsJml6YVqyZIkk6ddff1Vzc3M6F3OVVyhTLWs0GlVNTU1BClloiURC1dXVqqmpcf+fYSY8hoUViUTS79fU1KRDmUgkJKlgv8rlFUqgnFyanFJFJKQ/p5Lp2/6cSqY/DoUjM31pXgglcB0VkZD+/q//Kjk5kb5t7e7DClcukiQl/xgv6P1xSQRwhlACztC+AlkKVy7Sqn8emv/7mfd7AJATQgk4QygBZwgl4AyhBDQ9QhcKhRQKhTQ2NhZoLYQScIZQAs4QSsAZhgdQtlKD5pJmHDYPAqFE2UoNmkuacdhckk7/Z8uC1kX7CjhDKAFnaF8BLdyweTZYKQFnCCXgDKEEnCGUKGmeZlqzRSgBZwgl4AyhBJzhOiVKjteZ1mwRSpQcrzOt2aJ9BZwhlIAztK8oaZ5mWrPFSgk4QygBZwglilIxjs9li1ACzhBKwBlCCTjDJREUjWIfn8sWoUTRKPbxuWzRvgLOEErAGdpXFKViHJ/LFisl4AyhhCulPKmTLUIJOEMoAWcIJeAMZ18RuI33tysUjlw1mVNqkzrZIpQIXLR6cVlM6mSL9hVwhlACztC+wpVSntTJFislFgRDAdkjlIAzhBJwhlACznCiB/Mq9RIeMw0FhMKRoEpzi1BiXqVewoOhgOzRvgLOEErAGdpXLAiGArLHSgk4QygxZ0zrFBahBJwhlIAznOhBXsrl73oEgVAiL+Xydz2CQPsKOEMoAWdoXzFnDAYUFislZsT1x2AQSsAZQgk4QygBZzjRgwwMBQSPUCIDQwHBo30FnCGUgDO0r5gRQwHBYKUsQwwF+EYoAWcIJeAMoQSc4URPmWAooHgQyjLBUEDxoH0FnCGUgDO0r2WIoQDfWClLCEMBpYFQAs4QSsAZQgk4w4meIrfx/naFwpGrhgAYCihehLLIRasXMxRQYmhfAWcIJeAM7WsJYSigNLBSFgGGAsoLoQScIZSAM/xO6VjqickzXX8MhSNBlYZ5RCgdSz0xmeuP5YX2FXCGUALO0L4WAa4/lhdWyoBxDRJXIpSAM4QScIZQAs5woicAvFo5ZkMoA8CrlWM2tK+AM4QScIb2NWAMBuBKrJTzhKEA5ItQAs4QSsAZQgk4w4meAmIoAIVAKAuIoQAUAu0r4AyhBJyhfZ0nDAUgX6yUOWIoAPONUALOEErAGUIJOMOJniwwFICFRCizwFAAFhLtK+AMoQScoX3NEUMBmG+slP/HUAC8IJSAM4QScIZQAs6U9Ymejfe3KxSOXDUEwFAAglTWoYxWL2YoAO7QvgLOEErAmbJuXy/HUAC8YKUEnCn5UDKpg2JT8qEEig2hBJwhlIAzJXv2NfUSHjNN6oTCkaBKA2ZVsqFMvYQHkzooNrSvgDMlu1KmMBSAYsNKCThDKAFnijqUTOugFBV1KIFSRCgBZwgl4EzRXRLhj+2g1BVdKPljOyh1tK+AM4QScKbo2tfLMUKHUsRKCTjjMpRM6qCcuQwlUM4IJeAMoQSccXP2lUkdYJqbUDKpA0yjfQWcIZSAM27a18sxqYNyxkoJOEMoAWcIJeDMgoaSmVbg+lgpAWcIJeAMoQScmffrlMy0ArmZ91Ay0wrkhvYVcIZQAs4s6OwrM63A9bFSAs4QSsAZQgk4QygBZwoSSgbNgcJhpQScIZSAM4QScGZOwwOhcOSqwXIGzYG5mVMo//HvIwpXVTNoDhRQXqE0M0lS8o/x6X8vC2Xyj3HJ/lopL168mN5uNkFs57m2bLfzXFu223muLZvtUp9L5WKuQpbHnn7++WetXLmyIAUApeLs2bNasWLFnPeTVyiTyaSGhoYUjUYVCoXmXARQzMxM8XhcsVhM4fDcz53mFUoA84dLIoAzhBJwhlACzhBKwJm8Qrl3717deuutWrRokVpbW/XVV18Vuq55s3v37vQzWlJva9asCbqsWX3++ed64IEHFIvFFAqF9NFHH2V83sz08ssvq7GxUYsXL1ZHR4dOnToVTLEzuN5jePzxx686Lps3bw6m2Bns2bNH9957r6LRqJYvX64HH3xQAwMDGdtMTEyoq6tLN910k2644QY98sgjOnfuXE73k3MoP/jgAz3//PPatWuXvvnmG7W0tGjTpk0aHR3NdVeBueuuuzQ8PJx+++KLL4IuaVZjY2NqaWnR3r17r/n51157TW+++abeeecdnThxQkuWLNGmTZs0MTFxze2DcL3HIEmbN2/OOC779+9fwAqvr7e3V11dXTp+/LiOHDmiRCKhzs7OjKcrPvfcc/rkk0904MAB9fb2amhoSA8//HBud2Q52rBhg3V1daU/npqaslgsZnv27Ml1V4HYtWuXtbS0BF1G3iTZwYMH0x8nk0lraGiw119/PX3bhQsXrKqqyvbv3x9Ahdd35WMwM9u+fbtt3bo1kHryNTo6apKst7fXzKa/7xUVFXbgwIH0Nj/88INJsr6+vqz3m9NKOTk5qZMnT6qjoyN9WzgcVkdHh/r6+nL7aRCgU6dOKRaLafXq1Xrsscd05syZoEvK2+DgoEZGRjKOydKlS9Xa2lpUx0SSenp6tHz5ct15553asWOHzp8/H3RJs/rtt98kScuWLZMknTx5UolEIuNYrFmzRk1NTTkdi5xC+csvv2hqakr19fUZt9fX12tkZCSXXQWmtbVV+/bt06effqq3335bg4OD2rhxo+LxeNCl5SX1fS/mYyJNt67vvfeeuru79eqrr6q3t1dbtmzR1NRU0KVdUzKZ1LPPPqv77rtPd999t6TpY1FZWana2tqMbXM9Fgv6uq8ebNny17NW1q1bp9bWVq1atUoffvihnnjiiQArK2+PPvpo+v21a9dq3bp1uu2229TT06P29vYAK7u2rq4ufffdd/NyPiKnlbKurk6RSOSqs0nnzp1TQ0NDQQtbKLW1tbrjjjt0+vTpoEvJS+r7XkrHRJJWr16turo6l8dl586dOnTokI4dO5YxgN7Q0KDJyUlduHAhY/tcj0VOoaysrNT69evV3d2dvi2ZTKq7u1ttbW257MqN33//XT/++KMaGxuDLiUvzc3NamhoyDgmFy9e1IkTJ4r2mEjTz0Q6f/68q+NiZtq5c6cOHjyoo0ePqrm5OePz69evV0VFRcaxGBgY0JkzZ3I7FrmecXr//fetqqrK9u3bZ99//7099dRTVltbayMjI7nuKhAvvPCC9fT02ODgoH355ZfW0dFhdXV1Njo6GnRpM4rH49bf32/9/f0myd544w3r7++3n376yczMXnnlFautrbWPP/7Yvv32W9u6das1NzfbpUuXAq78L7M9hng8bi+++KL19fXZ4OCgffbZZ3bPPffY7bffbhMTE0GXnrZjxw5bunSp9fT02PDwcPptfHw8vc3TTz9tTU1NdvToUfv666+tra3N2tracrqfnENpZvbWW29ZU1OTVVZW2oYNG+z48eP57CYQ27Zts8bGRqusrLRbbrnFtm3bZqdPnw66rFkdO3bMJF31tn37djObvizy0ksvWX19vVVVVVl7e7sNDAwEW/QVZnsM4+Pj1tnZaTfffLNVVFTYqlWr7Mknn3T3g/5a9Uuyd999N73NpUuX7JlnnrEbb7zRqqur7aGHHrLh4eGc7oenbgHOMPsKOEMoAWcIJeAMoQScIZSAM4QScIZQAs4QSsAZQgk4QygBZwgl4Mz/AH/QByhRQX5/AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "# plt.style.use('_mpl-gallery')\n", + "\n", + "# make data:\n", + "y = df['col_0_100'].to_list()\n", + "x = 0.5 + np.arange(len(y))\n", + "\n", + "# plot\n", + "_figure, axes = plt.subplots()\n", + "\n", + "axes.bar(x, y, width=1, edgecolor=\"white\", linewidth=0.7, yerr=2)\n", + "\n", + "axes.set(xlim=(0, 20),\n", + " ylim=(0, 100))\n", + "axes.get_yaxis().set_ticks([])\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/dp_creator_ii/mock_data.py b/dp_creator_ii/mock_data.py index dd7a0bb..dcfb462 100644 --- a/dp_creator_ii/mock_data.py +++ b/dp_creator_ii/mock_data.py @@ -9,6 +9,9 @@ def mock_data(column_defs, row_count=1000): schema = {column_name: float for column_name in column_defs.keys()} data = {column_name: [] for column_name in column_defs.keys()} + # The details here don't really matter: Any method that + # deterministically gave us more values in the middle of the range + # and fewer at the extremes would do. quantile_width = 2 / 3 for column_name, column_def in column_defs.items(): scale = column_def.max - column_def.min diff --git a/pyproject.toml b/pyproject.toml index 59751d1..a24fdfc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ dynamic = ["version", "description"] dependencies = [ "shiny", "shinywidgets", + "matplotlib", "opendp[polars]", "jupytext", "jupyter-client", diff --git a/requirements-dev.in b/requirements-dev.in index f2eb714..493433e 100644 --- a/requirements-dev.in +++ b/requirements-dev.in @@ -35,3 +35,6 @@ ipykernel # Shiny: shiny shinywidgets + +# Visualization: +matplotlib diff --git a/requirements-dev.txt b/requirements-dev.txt index 7916b16..dd96103 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -45,8 +45,12 @@ comm==0.2.2 # via # ipykernel # ipywidgets +contourpy==1.3.0 + # via matplotlib coverage==7.6.1 # via -r requirements-dev.in +cycler==0.12.1 + # via matplotlib debugpy==1.8.6 # via ipykernel decorator==5.1.1 @@ -73,6 +77,8 @@ flit==3.9.0 # via -r requirements-dev.in flit-core==3.9.0 # via flit +fonttools==4.54.1 + # via matplotlib greenlet==3.0.3 # via playwright h11==0.14.0 @@ -124,6 +130,8 @@ jupyterlab-widgets==3.0.13 # via ipywidgets jupytext==1.16.4 # via -r requirements-dev.in +kiwisolver==1.4.7 + # via matplotlib linkify-it-py==2.0.3 # via shiny markdown-it-py==3.0.0 @@ -135,6 +143,8 @@ markupsafe==2.1.5 # via # jinja2 # nbconvert +matplotlib==3.9.2 + # via -r requirements-dev.in matplotlib-inline==0.1.7 # via # ipykernel @@ -170,6 +180,8 @@ nodeenv==1.9.1 # via pre-commit numpy==1.26.4 # via + # contourpy + # matplotlib # opendp # pyarrow # randomgen @@ -184,6 +196,7 @@ packaging==24.1 # htmltools # ipykernel # jupytext + # matplotlib # nbconvert # pytest # shiny @@ -195,6 +208,8 @@ pathspec==0.12.1 # via black pexpect==4.9.0 # via ipython +pillow==10.4.0 + # via matplotlib pip-tools==7.4.1 # via -r requirements-dev.in platformdirs==4.3.6 @@ -233,6 +248,8 @@ pygments==2.18.0 # via # ipython # nbconvert +pyparsing==3.1.4 + # via matplotlib pyproject-hooks==1.1.0 # via # build @@ -249,6 +266,7 @@ pytest-playwright==0.5.2 python-dateutil==2.9.0.post0 # via # jupyter-client + # matplotlib # shinywidgets python-multipart==0.0.9 # via shiny From a100c069d8e08f5a456f5a2e23aa1a47c7c62c84 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 3 Oct 2024 12:01:28 -0400 Subject: [PATCH 10/15] everything we need in the plot --- dp_creator_ii/junk.ipynb | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/dp_creator_ii/junk.ipynb b/dp_creator_ii/junk.ipynb index da343c7..82c5939 100644 --- a/dp_creator_ii/junk.ipynb +++ b/dp_creator_ii/junk.ipynb @@ -54,14 +54,14 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 40, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOUAAADqCAYAAABdn4LDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAALxElEQVR4nO3dX2yT9R7H8U/bsw2GHRMn2yoMh0clUdiJRJbFcGG2DLgw+OcCEy8wMRpxXPgvORcnCjnJCf5JvNAQvZN4g8oFGkmOAQeb0QyMuMQYzQJmEcw2FjFI3Zir6/dc7LRSYKPtuj3ftu9XsrB1z55+u4fPft89z7ddyMxMANwIB10AgEyEEnCGUALOEErAGUIJOEMoAWcIJeDM3/L5omQyqaGhIUWjUYVCoULXBBQVM1M8HlcsFlM4PPd1Lq9QDg0NaeXKlXO+c6CUnD17VitWrJjzfvIKZTQalSQNDg5q2bJlcy4iCIlEQocPH1ZnZ6cqKiqCLicvPIaFNTY2plgsJml6YVqyZIkk6ddff1Vzc3M6F3OVVyhTLWs0GlVNTU1BClloiURC1dXVqqmpcf+fYSY8hoUViUTS79fU1KRDmUgkJKlgv8rlFUqgnFyanFJFJKQ/p5Lp2/6cSqY/DoUjM31pXgglcB0VkZD+/q//Kjk5kb5t7e7DClcukiQl/xgv6P1xSQRwhlACztC+AlkKVy7Sqn8emv/7mfd7AJATQgk4QygBZwgl4AyhBDQ9QhcKhRQKhTQ2NhZoLYQScIZQAs4QSsAZhgdQtlKD5pJmHDYPAqFE2UoNmkuacdhckk7/Z8uC1kX7CjhDKAFnaF8BLdyweTZYKQFnCCXgDKEEnCGUKGmeZlqzRSgBZwgl4AyhBJzhOiVKjteZ1mwRSpQcrzOt2aJ9BZwhlIAztK8oaZ5mWrPFSgk4QygBZwglilIxjs9li1ACzhBKwBlCCTjDJREUjWIfn8sWoUTRKPbxuWzRvgLOEErAGdpXFKViHJ/LFisl4AyhhCulPKmTLUIJOEMoAWcIJeAMZ18RuI33tysUjlw1mVNqkzrZIpQIXLR6cVlM6mSL9hVwhlACztC+wpVSntTJFislFgRDAdkjlIAzhBJwhlACznCiB/Mq9RIeMw0FhMKRoEpzi1BiXqVewoOhgOzRvgLOEErAGdpXLAiGArLHSgk4QygxZ0zrFBahBJwhlIAznOhBXsrl73oEgVAiL+Xydz2CQPsKOEMoAWdoXzFnDAYUFislZsT1x2AQSsAZQgk4QygBZzjRgwwMBQSPUCIDQwHBo30FnCGUgDO0r5gRQwHBYKUsQwwF+EYoAWcIJeAMoQSc4URPmWAooHgQyjLBUEDxoH0FnCGUgDO0r2WIoQDfWClLCEMBpYFQAs4QSsAZQgk4w4meIrfx/naFwpGrhgAYCihehLLIRasXMxRQYmhfAWcIJeAM7WsJYSigNLBSFgGGAsoLoQScIZSAM/xO6VjqickzXX8MhSNBlYZ5RCgdSz0xmeuP5YX2FXCGUALO0L4WAa4/lhdWyoBxDRJXIpSAM4QScIZQAs5woicAvFo5ZkMoA8CrlWM2tK+AM4QScIb2NWAMBuBKrJTzhKEA5ItQAs4QSsAZQgk4w4meAmIoAIVAKAuIoQAUAu0r4AyhBJyhfZ0nDAUgX6yUOWIoAPONUALOEErAGUIJOMOJniwwFICFRCizwFAAFhLtK+AMoQScoX3NEUMBmG+slP/HUAC8IJSAM4QScIZQAs6U9Ymejfe3KxSOXDUEwFAAglTWoYxWL2YoAO7QvgLOEErAmbJuXy/HUAC8YKUEnCn5UDKpg2JT8qEEig2hBJwhlIAzJXv2NfUSHjNN6oTCkaBKA2ZVsqFMvYQHkzooNrSvgDMlu1KmMBSAYsNKCThDKAFnijqUTOugFBV1KIFSRCgBZwgl4EzRXRLhj+2g1BVdKPljOyh1tK+AM4QScKbo2tfLMUKHUsRKCTjjMpRM6qCcuQwlUM4IJeAMoQSccXP2lUkdYJqbUDKpA0yjfQWcIZSAM27a18sxqYNyxkoJOEMoAWcIJeDMgoaSmVbg+lgpAWcIJeAMoQScmffrlMy0ArmZ91Ay0wrkhvYVcIZQAs4s6OwrM63A9bFSAs4QSsAZQgk4QygBZwoSSgbNgcJhpQScIZSAM4QScGZOwwOhcOSqwXIGzYG5mVMo//HvIwpXVTNoDhRQXqE0M0lS8o/x6X8vC2Xyj3HJ/lopL168mN5uNkFs57m2bLfzXFu223muLZvtUp9L5WKuQpbHnn7++WetXLmyIAUApeLs2bNasWLFnPeTVyiTyaSGhoYUjUYVCoXmXARQzMxM8XhcsVhM4fDcz53mFUoA84dLIoAzhBJwhlACzhBKwJm8Qrl3717deuutWrRokVpbW/XVV18Vuq55s3v37vQzWlJva9asCbqsWX3++ed64IEHFIvFFAqF9NFHH2V83sz08ssvq7GxUYsXL1ZHR4dOnToVTLEzuN5jePzxx686Lps3bw6m2Bns2bNH9957r6LRqJYvX64HH3xQAwMDGdtMTEyoq6tLN910k2644QY98sgjOnfuXE73k3MoP/jgAz3//PPatWuXvvnmG7W0tGjTpk0aHR3NdVeBueuuuzQ8PJx+++KLL4IuaVZjY2NqaWnR3r17r/n51157TW+++abeeecdnThxQkuWLNGmTZs0MTFxze2DcL3HIEmbN2/OOC779+9fwAqvr7e3V11dXTp+/LiOHDmiRCKhzs7OjKcrPvfcc/rkk0904MAB9fb2amhoSA8//HBud2Q52rBhg3V1daU/npqaslgsZnv27Ml1V4HYtWuXtbS0BF1G3iTZwYMH0x8nk0lraGiw119/PX3bhQsXrKqqyvbv3x9Ahdd35WMwM9u+fbtt3bo1kHryNTo6apKst7fXzKa/7xUVFXbgwIH0Nj/88INJsr6+vqz3m9NKOTk5qZMnT6qjoyN9WzgcVkdHh/r6+nL7aRCgU6dOKRaLafXq1Xrsscd05syZoEvK2+DgoEZGRjKOydKlS9Xa2lpUx0SSenp6tHz5ct15553asWOHzp8/H3RJs/rtt98kScuWLZMknTx5UolEIuNYrFmzRk1NTTkdi5xC+csvv2hqakr19fUZt9fX12tkZCSXXQWmtbVV+/bt06effqq3335bg4OD2rhxo+LxeNCl5SX1fS/mYyJNt67vvfeeuru79eqrr6q3t1dbtmzR1NRU0KVdUzKZ1LPPPqv77rtPd999t6TpY1FZWana2tqMbXM9Fgv6uq8ebNny17NW1q1bp9bWVq1atUoffvihnnjiiQArK2+PPvpo+v21a9dq3bp1uu2229TT06P29vYAK7u2rq4ufffdd/NyPiKnlbKurk6RSOSqs0nnzp1TQ0NDQQtbKLW1tbrjjjt0+vTpoEvJS+r7XkrHRJJWr16turo6l8dl586dOnTokI4dO5YxgN7Q0KDJyUlduHAhY/tcj0VOoaysrNT69evV3d2dvi2ZTKq7u1ttbW257MqN33//XT/++KMaGxuDLiUvzc3NamhoyDgmFy9e1IkTJ4r2mEjTz0Q6f/68q+NiZtq5c6cOHjyoo0ePqrm5OePz69evV0VFRcaxGBgY0JkzZ3I7FrmecXr//fetqqrK9u3bZ99//7099dRTVltbayMjI7nuKhAvvPCC9fT02ODgoH355ZfW0dFhdXV1Njo6GnRpM4rH49bf32/9/f0myd544w3r7++3n376yczMXnnlFautrbWPP/7Yvv32W9u6das1NzfbpUuXAq78L7M9hng8bi+++KL19fXZ4OCgffbZZ3bPPffY7bffbhMTE0GXnrZjxw5bunSp9fT02PDwcPptfHw8vc3TTz9tTU1NdvToUfv666+tra3N2tracrqfnENpZvbWW29ZU1OTVVZW2oYNG+z48eP57CYQ27Zts8bGRqusrLRbbrnFtm3bZqdPnw66rFkdO3bMJF31tn37djObvizy0ksvWX19vVVVVVl7e7sNDAwEW/QVZnsM4+Pj1tnZaTfffLNVVFTYqlWr7Mknn3T3g/5a9Uuyd999N73NpUuX7JlnnrEbb7zRqqur7aGHHrLh4eGc7oenbgHOMPsKOEMoAWcIJeAMoQScIZSAM4QScIZQAs4QSsAZQgk4QygBZwgl4Mz/AH/QByhRQX5/AAAAAElFTkSuQmCC", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAakAAADqCAYAAAAYl7otAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAPxUlEQVR4nO3dbWxU55nG8evMizHx2EZOAgjhBLYvSqKkiUQIQZEiVWKDkCqVTUaqdiMtSZH6oQYpcboq3aZLuiyJtqiaJDK0qiKRql1HFLUoCRFtCQRHqZyigpSXhlCBEAx1bOwCNgzYM/ac/TDi4AOesWfm2L5n5v/7ZB5uzhx4jufi3D7PM47ruq4AADAoNNsnAABAPoQUAMAsQgoAYBYhBQAwi5ACAJhFSAEAzCKkAABmRUr5Q9lsVj09PWpsbJTjOEGfEwCgQrmuq0uXLmnRokUKhcq/DyoppHp6etTa2lr2iwMAqlMymdTixYvLPk5JIdXY2ChJOnXqlFpaWso+CQCAbalUSosWLZKUu1FpaGiYsO78+fNaunSplxPlKimkrrX4Ghsb1dTUFMiJAADsCofD3tdNTU15QyqTyUhSYD8KKimkAADV5Wp6TNFw/mAZHcv6vh7/6/GcUHjC8VIRUgAARcOOvvzDfXl/P5se9r6+74U/KlRXP3HdyJVAz4tH0AEAZhFSAACzaPcBACYVqqvXnd/fO/OvO+OvCADAFBFSAACzCCkAgFmEFADALEIKAKpQKpWS4zhyHEepVGq2T6dkhBQAwCxCCgBgFiEFADCLxbwAUIGC2hDWOkIKACpQUBvCXnNi65rAzi1ItPsAAGYRUgAAs2j3AUAVmq0NYYPGnRQAwCxCCgBgFiEFADCLkAIAA6plr72gEVIAALMIKQCAWYQUAMAs1kkBwAwptN9etey1FzRCCgBmSKH99qplr72g0e4DAJhFSAEAzKLdBwAGVMtee0HjTgoAYBYhBQAwi5ACgBKxldH0I6QAAGYRUgAAswgpAIBZPIIOAHkU2sZIYiujmUBIAUAehbYxktjKaCbQ7gMAmEVIAQDMot0HACViK6Ppx50UAMAsQgpAzWCHiMpDSAEAzCKkAABmEVIAALN4ug9A1WCHiOpDSAGoGuwQUX1o9wEAzCKkAABm0e4DUDPYIaLycCcFwDQW4NY2QgoAYBYhBQAwi5ACAJjFgxMAZhULcFEIIQVgVgW5AJfFt9WHdh8AwCxCCgBgFu0+AKaxALe2cScFADCLkAIQKHaIQJAIKQCAWYQUAMAsHpwAUBQW32ImEVIAisKn32Im0e4DAJhFSAEAzKLdByBQLL5FkLiTAmoc65pgGSEFADCLkAIAmEVIAQDM4sEJoAYUWoDL4ltYRkgBNaDQAlwW38Iy2n0AALMIKQCAWbT7gBrH4ltYxp0UUIFYgItaQUgBAMwipAAAZhFSAACzeHACMIhPvwVyCCnAID79Fsih3QcAMIuQAgCYRbsPqEAswEWt4E4KmAEsvgVKQ0gBAMwipAAAZhFSAACzeHACCACLb4HpQUgBAWDxLTA9aPcBAMwipAAAZtHuA2YAi2+B0nAnBeTBAlxg9hFSAACzCCkAgFn8TAo1i7VNgH2EFGpWkGubWNcETA/afQAAswgpAIBZtPuAPFjbBMw+7qRQNVjXBFQfQgoAYBYhBQAwi5ACAJjFgxOoGCy+BWoPIYWKwQcLArWHdh8AwCxCCgBgFu0+VA0W3wLVhzspzBoW3wKYDCEFADCLkAIAmEVIAQDM4sEJTKtCC3BZfAtgMoQUplWhBbgsvgUwGdp9AACzCCkAgFm0+zBrWHwLYDLcSaEoLMAFMJMIKQCAWYQUAMAsQgoAYBYPTsCHT78FYAkhBR8+/RaAJbT7AABmEVIAALNo96EoLMAFMJO4k6pyLL4FUMkIKQCAWYQUAMAsQgoAYBYPTlQ4Ft8CqGaEVIVj8S2Aaka7DwBgFiEFADCLdl+VY/EtgErGnRQAwCxCyiB2iQCAHEIKAGAWIQUAMIuQAgCYxdN9s4BdIgBgagipWRDkLhHsEAGgmtHuAwCYxZ2UQSzABYAc7qQAAGYRUgAAswipALBDBABMD0IKAGAWIQUAMIuQAgCYxSPoU8AOEQAwOwipKQhyhwiJXSIAYKpo9wEAzCKkAABm0e4LANsYAcD04E4KAGBWTYYUO0QAQGWoyZACAFQGQgoAYBYhBQAwq2qf7iu0SwQ7RABAZajakCq0SwQ7RABAZaDdBwAwi5ACAJhVte2+QtghAgAqA3dSAACzCCkAgFlltftSqZRaWlrkOLlHvdPptDKZjCKRiObMmeOrk6S5c+cqFMrlYiaTUTqdVjgcVn19fUm1V65ckeu6qq+vVzgcliSNjo5qZGREWdd/rtnMsORKTiQqJ5SrdbNjckczkuMoFJ0zrnZEct0AaqVQ9Pr5uqNpudmsMpnM9bFJap1wRE44N02um5WbSUuS74lEdzQjNzsmJxyWE46WUOvKzYx4X3u1Yxm5Y/lrnegcb+692lBYTiTqm89selhOtE6OEypYe+2pS3/tqNyxUTkh//+nJpzPcbVOpK5wbXYsd26ZkUDmfrL5LPY6SaVSub/LpHNf7nVyfT6nWptKpeS67qRzf30+J79OJpt733wWqB0evv7kbu7fvZjv+5trR0dHc68X0HuE9+9bzPd9ntrh4eHc99YMv0dMOJ831AaprDupO++8UwMDA96vt23bplgspg0bNvjq5s+fr1gspjNnznhj27dvVywW0/r16321S5YsUSwW07Fjx7yx119/XbFYTHPnzvXtt3fPPfcoFovp6NGjXu2uXbsUi8X0L2u/6Ttu7y/blUzENXL2r97Y1ROHlUzEdW7X877avs5NSibiunrq+nGHT3+sZCKu3l9/z1d7bvdmJRNxXflbtzc20nNcyURcX+zc6Kvt3/Oikom4Ojv/zxvL9J9WMhFXzy++46sd2PtTJRNxXf7o997Y6IUvlEzEdXbHOl/tP/7QoWQirqG/vOWNjV0+r2QiruQr3/LVnj/4mpKJuAa7f+ONuSOpXG0irtHRUW/84vu/UjIR18X3f3X9ANkxr9Ydub7v4WD3b5RMxHX+4Gu+17vt1hYlE3GNXT7vjQ395S0lE3H94w8dvtqzO9blzuHCF97Y5Y9+r2QiroG9P/XV9vziO0om4sr0n/bGUp8dUjIRV/+eF321X+zcmJv7nuPe2JW/dWtec5PO7d7sq+399feUTMQ1fPpjb+zqqaNKJuLq69zkqz236/ncdXLisDc2cvavuevkl+2+2v7fblEyEVfq2PveWLrvZG7uX/uuv/atn2hec5Muf/KuN5YZSCqZiOvvP/+2r3Zg3ytKJuK6dPQdb2xsqD93nXQ86as9v/9nuevkz7/zxrJXh7z5HO/CoZ25uf/TG96YmxlRMhHXvOYm7w1Lki7+6Q0lE3FdOLTTd4xrx81eHfLGhv78u9x1sv9nvtqzHU/mrpOhfm/s0tF3cnO/7xVf7d9//u3c3A8kvbHLn7yrZCKuJ//tX321Pa99V8lEXOm+k95Y6tj7uevkt1t8tRO9R7zzzt5ZeY9IfXbIG8v3HvHUun+flfcIZce88XzvEUGi3QcAMMtxS7g3GxoaUnNzs06fPq3W1tYZafdduHBBCxYskCRdvnxZDQ0Nk7b77vufQ95xy2n3ndi6Rl/6wd7A2n3HX/yG7n7h3SnVTnYrf2LrGn3p+28Fdit/8idr9ZXnc/8zC6Ld99F/Pqr7XvhjIO2+k//7TW+BdhDtvo+f/7ru+/H+QNo4J1/6hr78w32Btfs+/q9V+tqWg4G1+/JfJ8W3+z554TF9bWtXYO2+E1vX6J/+Y0/BuZ9qu++zLWt075b3Jp37qbb7Pv/xP+urP3g7sHbfZNdJMe2+T3/0dd3zo32BtftObF2TO7cy231jwymdfeVbGhwcVFNTk8pV1s+kGhoavJOVpLq6OtXV1U1Yd6NoNKpoNDrl2onGb7nllpvGIpGIIpHITVsdjZ/ca5xQWE5d+Kbx8RdYkLVOpE6O5Pt7T1brG3NCcibYHcOJROUoWkat49WOn08nHPUuvIlqfeMT1Eq5+bxxR498tRPt/DH+G9BXO9F8FlMbCufO7Yb5m665L/Y6aWho8P1d8s9nuddJnvksUHvj931x8xnA3BeoHf8fXqnY7/ubayORyISvN93vEVOpra+vv/l7awbeI3zjed4jgmRmnVShvfYk9tsDgFpkJqQK7bUnsd8eANQiHpwAAJhFSAEAzDLT7psM++0BQO3hTgoAYBYhBQAwi5ACAJhFSAEAzJq2kEqlUrlVyuM2hAUAoBjcSQEAzCKkAABmEVIAALPKWszrhMJ5N3plQ1gAQLnKCqkH/nu/QnNu/rgMiQ1hAQDlKymkrn1OYnbkSt6a8SGVHbkiuYXvpIaGhgoer1hBHo9zs3E8zs3G8Tg3G8ezem7XjlHC5+lOqKRP5j179qxaW1sDOQEAQPVJJpNavHhx2ccpKaSy2ax6enrU2NgY+KcwAgAql+u6unTpkhYtWqRQqPxn80oKKQAAZgKPoAMAzCKkAABmEVIAALMIKQCAWSWF1Pbt27VkyRLV19drxYoVOnz4cNDnBQAw7KWXXtLy5cvV2Nio+fPna+3atTp+/LivZnh4WG1tbbr11lsVi8X0xBNPqK+vr6jXKTqkdu3apfb2dm3evFlHjx7V/fffr9WrV+vcuXPFHgoAUKG6urrU1tamDz/8UPv371cmk9Fjjz3m+2imZ599Vm+//bZ2796trq4u9fT06PHHHy/qdYp+BH3FihVavny5Ojo6JOXWTLW2tmrjxo3atGlTUS8OAKgO/f39mj9/vrq6uvToo49qcHBQt99+uzo7OxWPxyVJn3/+ue6++251d3fr4YcfntJxi7qTSqfTOnLkiFatWnX9AKGQVq1ape7u7mIOBQCoIoODg5KklpYWSdKRI0eUyWR8eXHXXXfpjjvuKCovigqpgYEBjY2NacGCBb7xBQsWqLe3t5hDAQCqRDab1TPPPKNHHnlE9957rySpt7dXdXV1mjdvnq+22Lwoaxd0AADa2tr06aef6oMPPgj82EXdSd12220Kh8M3PZ3R19enhQsXBnpiAAD7NmzYoL179+q9997zbSi7cOFCpdNpXbx40VdfbF4UFVJ1dXVatmyZDhw44I1ls1kdOHBAK1euLOZQAIAK5rquNmzYoD179ujgwYNaunSp7/eXLVumaDTqy4vjx4/rzJkzReVF0e2+9vZ2rVu3Tg8++KAeeughvfzyy0qlUnr66aeLPRQAoEK1tbWps7NTb775phobG72fMzU3N2vu3Llqbm7W+vXr1d7erpaWFjU1NWnjxo1auXLllJ/sk0rcBb2jo0Pbtm1Tb2+vHnjgAb366qtasWJFsYcBAFSofB/TtHPnTj311FOScot5n3vuOb3xxhsaGRnR6tWrtWPHjqLafXxUBwDALPbuAwCYRUgBAMwipAAAZhFSAACzCCkAgFmEFADALEIKAGAWIQUAMIuQAgCYRUgBAMwipAAAZv0/+JS7kjoIspcAAAAASUVORK5CYII=", "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -72,19 +72,18 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", - "# plt.style.use('_mpl-gallery')\n", + "# plot\n", + "figure, axes = plt.subplots()\n", + "figure.set_size_inches(4, 2)\n", "\n", "# make data:\n", "y = df['col_0_100'].to_list()\n", "x = 0.5 + np.arange(len(y))\n", - "\n", - "# plot\n", - "_figure, axes = plt.subplots()\n", - "\n", "axes.bar(x, y, width=1, edgecolor=\"white\", linewidth=0.7, yerr=2)\n", + "axes.hlines([10], 0, 20, colors=[\"black\"], linestyles=[\"dotted\"])\n", "\n", - "axes.set(xlim=(0, 20),\n", - " ylim=(0, 100))\n", + "axes.set(xlim=(0, 20), ylim=(0, 100))\n", + "axes.get_xaxis().set_ticks([0, 20])\n", "axes.get_yaxis().set_ticks([])\n", "plt.show()" ] From 4f861ad44e60ca5286764d4a0d5d9e3391bf5f74 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 3 Oct 2024 12:22:35 -0400 Subject: [PATCH 11/15] plotting in app works --- dp_creator_ii/app/analysis_panel.py | 41 +++++++++- dp_creator_ii/junk.ipynb | 120 ---------------------------- 2 files changed, 40 insertions(+), 121 deletions(-) delete mode 100644 dp_creator_ii/junk.ipynb diff --git a/dp_creator_ii/app/analysis_panel.py b/dp_creator_ii/app/analysis_panel.py index a52a926..056f7f8 100644 --- a/dp_creator_ii/app/analysis_panel.py +++ b/dp_creator_ii/app/analysis_panel.py @@ -1,16 +1,55 @@ -from shiny import ui, reactive +from shiny import ui, reactive, render +import matplotlib.pyplot as plt +import numpy as np + +from dp_creator_ii.mock_data import mock_data, ColumnDef def analysis_ui(): return ui.nav_panel( "Define Analysis", "TODO: Define analysis", + ui.output_plot("plot_preview"), + "(This plot is only to demonstrate that plotting works.)", ui.input_action_button("go_to_results", "Download results"), value="analysis_panel", ) def analysis_server(input, output, session): + @render.plot() + def plot_preview(): + col_0_100 = ColumnDef(0, 100) + col_neg_pos = ColumnDef(-10, 10) + df = mock_data({"col_0_100": col_0_100}, row_count=20) + + # plot + figure, axes = plt.subplots() + figure.set_size_inches(4, 2) + + # make data: + y = df["col_0_100"].to_list() + x = 0.5 + np.arange(len(y)) + axes.bar( + x, y, width=0.8, edgecolor="skyblue", linewidth=1, yerr=2, color="skyblue" + ) + axes.bar( + x[:5], + y[:5], + width=0.8, + edgecolor="skyblue", + linewidth=0.5, + yerr=2, + color="white", + ) + axes.hlines([10], 0, 20, colors=["black"], linestyles=["dotted"]) + + axes.set(xlim=(0, 20), ylim=(0, 100)) + axes.get_xaxis().set_ticks([0, 20]) + axes.get_yaxis().set_ticks([]) + + return figure + @reactive.effect @reactive.event(input.go_to_results) def go_to_results(): diff --git a/dp_creator_ii/junk.ipynb b/dp_creator_ii/junk.ipynb deleted file mode 100644 index 82c5939..0000000 --- a/dp_creator_ii/junk.ipynb +++ /dev/null @@ -1,120 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (20, 2)
col_0_100col_neg_pos
f64f64
1.628922-9.674216
8.231443-8.353711
14.175125-7.164975
19.650658-6.069868
24.783898-5.04322
75.2161025.04322
80.3493426.069868
85.8248757.164975
91.7685578.353711
98.3710789.674216
" - ], - "text/plain": [ - "shape: (20, 2)\n", - "┌───────────┬─────────────┐\n", - "│ col_0_100 ┆ col_neg_pos │\n", - "│ --- ┆ --- │\n", - "│ f64 ┆ f64 │\n", - "╞═══════════╪═════════════╡\n", - "│ 1.628922 ┆ -9.674216 │\n", - "│ 8.231443 ┆ -8.353711 │\n", - "│ 14.175125 ┆ -7.164975 │\n", - "│ 19.650658 ┆ -6.069868 │\n", - "│ 24.783898 ┆ -5.04322 │\n", - "│ … ┆ … │\n", - "│ 75.216102 ┆ 5.04322 │\n", - "│ 80.349342 ┆ 6.069868 │\n", - "│ 85.824875 ┆ 7.164975 │\n", - "│ 91.768557 ┆ 8.353711 │\n", - "│ 98.371078 ┆ 9.674216 │\n", - "└───────────┴─────────────┘" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from dp_creator_ii.mock_data import mock_data, ColumnDef\n", - "\n", - "col_0_100 = ColumnDef(0, 100)\n", - "col_neg_pos = ColumnDef(-10, 10)\n", - "df = mock_data({\"col_0_100\": col_0_100, \"col_neg_pos\": col_neg_pos}, row_count=20)\n", - "df\n" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAakAAADqCAYAAAAYl7otAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAPxUlEQVR4nO3dbWxU55nG8evMizHx2EZOAgjhBLYvSqKkiUQIQZEiVWKDkCqVTUaqdiMtSZH6oQYpcboq3aZLuiyJtqiaJDK0qiKRql1HFLUoCRFtCQRHqZyigpSXhlCBEAx1bOwCNgzYM/ac/TDi4AOesWfm2L5n5v/7ZB5uzhx4jufi3D7PM47ruq4AADAoNNsnAABAPoQUAMAsQgoAYBYhBQAwi5ACAJhFSAEAzCKkAABmRUr5Q9lsVj09PWpsbJTjOEGfEwCgQrmuq0uXLmnRokUKhcq/DyoppHp6etTa2lr2iwMAqlMymdTixYvLPk5JIdXY2ChJOnXqlFpaWso+CQCAbalUSosWLZKUu1FpaGiYsO78+fNaunSplxPlKimkrrX4Ghsb1dTUFMiJAADsCofD3tdNTU15QyqTyUhSYD8KKimkAADV5Wp6TNFw/mAZHcv6vh7/6/GcUHjC8VIRUgAARcOOvvzDfXl/P5se9r6+74U/KlRXP3HdyJVAz4tH0AEAZhFSAACzaPcBACYVqqvXnd/fO/OvO+OvCADAFBFSAACzCCkAgFmEFADALEIKAKpQKpWS4zhyHEepVGq2T6dkhBQAwCxCCgBgFiEFADCLxbwAUIGC2hDWOkIKACpQUBvCXnNi65rAzi1ItPsAAGYRUgAAs2j3AUAVmq0NYYPGnRQAwCxCCgBgFiEFADCLkAIAA6plr72gEVIAALMIKQCAWYQUAMAs1kkBwAwptN9etey1FzRCCgBmSKH99qplr72g0e4DAJhFSAEAzKLdBwAGVMtee0HjTgoAYBYhBQAwi5ACgBKxldH0I6QAAGYRUgAAswgpAIBZPIIOAHkU2sZIYiujmUBIAUAehbYxktjKaCbQ7gMAmEVIAQDMot0HACViK6Ppx50UAMAsQgpAzWCHiMpDSAEAzCKkAABmEVIAALN4ug9A1WCHiOpDSAGoGuwQUX1o9wEAzCKkAABm0e4DUDPYIaLycCcFwDQW4NY2QgoAYBYhBQAwi5ACAJjFgxMAZhULcFEIIQVgVgW5AJfFt9WHdh8AwCxCCgBgFu0+AKaxALe2cScFADCLkAIQKHaIQJAIKQCAWYQUAMAsHpwAUBQW32ImEVIAisKn32Im0e4DAJhFSAEAzKLdByBQLL5FkLiTAmoc65pgGSEFADCLkAIAmEVIAQDM4sEJoAYUWoDL4ltYRkgBNaDQAlwW38Iy2n0AALMIKQCAWbT7gBrH4ltYxp0UUIFYgItaQUgBAMwipAAAZhFSAACzeHACMIhPvwVyCCnAID79Fsih3QcAMIuQAgCYRbsPqEAswEWt4E4KmAEsvgVKQ0gBAMwipAAAZhFSAACzeHACCACLb4HpQUgBAWDxLTA9aPcBAMwipAAAZtHuA2YAi2+B0nAnBeTBAlxg9hFSAACzCCkAgFn8TAo1i7VNgH2EFGpWkGubWNcETA/afQAAswgpAIBZtPuAPFjbBMw+7qRQNVjXBFQfQgoAYBYhBQAwi5ACAJjFgxOoGCy+BWoPIYWKwQcLArWHdh8AwCxCCgBgFu0+VA0W3wLVhzspzBoW3wKYDCEFADCLkAIAmEVIAQDM4sEJTKtCC3BZfAtgMoQUplWhBbgsvgUwGdp9AACzCCkAgFm0+zBrWHwLYDLcSaEoLMAFMJMIKQCAWYQUAMAsQgoAYBYPTsCHT78FYAkhBR8+/RaAJbT7AABmEVIAALNo96EoLMAFMJO4k6pyLL4FUMkIKQCAWYQUAMAsQgoAYBYPTlQ4Ft8CqGaEVIVj8S2Aaka7DwBgFiEFADCLdl+VY/EtgErGnRQAwCxCyiB2iQCAHEIKAGAWIQUAMIuQAgCYxdN9s4BdIgBgagipWRDkLhHsEAGgmtHuAwCYxZ2UQSzABYAc7qQAAGYRUgAAswipALBDBABMD0IKAGAWIQUAMIuQAgCYxSPoU8AOEQAwOwipKQhyhwiJXSIAYKpo9wEAzCKkAABm0e4LANsYAcD04E4KAGBWTYYUO0QAQGWoyZACAFQGQgoAYBYhBQAwq2qf7iu0SwQ7RABAZajakCq0SwQ7RABAZaDdBwAwi5ACAJhVte2+QtghAgAqA3dSAACzCCkAgFlltftSqZRaWlrkOLlHvdPptDKZjCKRiObMmeOrk6S5c+cqFMrlYiaTUTqdVjgcVn19fUm1V65ckeu6qq+vVzgcliSNjo5qZGREWdd/rtnMsORKTiQqJ5SrdbNjckczkuMoFJ0zrnZEct0AaqVQ9Pr5uqNpudmsMpnM9bFJap1wRE44N02um5WbSUuS74lEdzQjNzsmJxyWE46WUOvKzYx4X3u1Yxm5Y/lrnegcb+692lBYTiTqm89selhOtE6OEypYe+2pS3/tqNyxUTkh//+nJpzPcbVOpK5wbXYsd26ZkUDmfrL5LPY6SaVSub/LpHNf7nVyfT6nWptKpeS67qRzf30+J79OJpt733wWqB0evv7kbu7fvZjv+5trR0dHc68X0HuE9+9bzPd9ntrh4eHc99YMv0dMOJ831AaprDupO++8UwMDA96vt23bplgspg0bNvjq5s+fr1gspjNnznhj27dvVywW0/r16321S5YsUSwW07Fjx7yx119/XbFYTHPnzvXtt3fPPfcoFovp6NGjXu2uXbsUi8X0L2u/6Ttu7y/blUzENXL2r97Y1ROHlUzEdW7X877avs5NSibiunrq+nGHT3+sZCKu3l9/z1d7bvdmJRNxXflbtzc20nNcyURcX+zc6Kvt3/Oikom4Ojv/zxvL9J9WMhFXzy++46sd2PtTJRNxXf7o997Y6IUvlEzEdXbHOl/tP/7QoWQirqG/vOWNjV0+r2QiruQr3/LVnj/4mpKJuAa7f+ONuSOpXG0irtHRUW/84vu/UjIR18X3f3X9ANkxr9Ydub7v4WD3b5RMxHX+4Gu+17vt1hYlE3GNXT7vjQ395S0lE3H94w8dvtqzO9blzuHCF97Y5Y9+r2QiroG9P/XV9vziO0om4sr0n/bGUp8dUjIRV/+eF321X+zcmJv7nuPe2JW/dWtec5PO7d7sq+399feUTMQ1fPpjb+zqqaNKJuLq69zkqz236/ncdXLisDc2cvavuevkl+2+2v7fblEyEVfq2PveWLrvZG7uX/uuv/atn2hec5Muf/KuN5YZSCqZiOvvP/+2r3Zg3ytKJuK6dPQdb2xsqD93nXQ86as9v/9nuevkz7/zxrJXh7z5HO/CoZ25uf/TG96YmxlRMhHXvOYm7w1Lki7+6Q0lE3FdOLTTd4xrx81eHfLGhv78u9x1sv9nvtqzHU/mrpOhfm/s0tF3cnO/7xVf7d9//u3c3A8kvbHLn7yrZCKuJ//tX321Pa99V8lEXOm+k95Y6tj7uevkt1t8tRO9R7zzzt5ZeY9IfXbIG8v3HvHUun+flfcIZce88XzvEUGi3QcAMMtxS7g3GxoaUnNzs06fPq3W1tYZafdduHBBCxYskCRdvnxZDQ0Nk7b77vufQ95xy2n3ndi6Rl/6wd7A2n3HX/yG7n7h3SnVTnYrf2LrGn3p+28Fdit/8idr9ZXnc/8zC6Ld99F/Pqr7XvhjIO2+k//7TW+BdhDtvo+f/7ru+/H+QNo4J1/6hr78w32Btfs+/q9V+tqWg4G1+/JfJ8W3+z554TF9bWtXYO2+E1vX6J/+Y0/BuZ9qu++zLWt075b3Jp37qbb7Pv/xP+urP3g7sHbfZNdJMe2+T3/0dd3zo32BtftObF2TO7cy231jwymdfeVbGhwcVFNTk8pV1s+kGhoavJOVpLq6OtXV1U1Yd6NoNKpoNDrl2onGb7nllpvGIpGIIpHITVsdjZ/ca5xQWE5d+Kbx8RdYkLVOpE6O5Pt7T1brG3NCcibYHcOJROUoWkat49WOn08nHPUuvIlqfeMT1Eq5+bxxR498tRPt/DH+G9BXO9F8FlMbCufO7Yb5m665L/Y6aWho8P1d8s9nuddJnvksUHvj931x8xnA3BeoHf8fXqnY7/ubayORyISvN93vEVOpra+vv/l7awbeI3zjed4jgmRmnVShvfYk9tsDgFpkJqQK7bUnsd8eANQiHpwAAJhFSAEAzDLT7psM++0BQO3hTgoAYBYhBQAwi5ACAJhFSAEAzJq2kEqlUrlVyuM2hAUAoBjcSQEAzCKkAABmEVIAALPKWszrhMJ5N3plQ1gAQLnKCqkH/nu/QnNu/rgMiQ1hAQDlKymkrn1OYnbkSt6a8SGVHbkiuYXvpIaGhgoer1hBHo9zs3E8zs3G8Tg3G8ezem7XjlHC5+lOqKRP5j179qxaW1sDOQEAQPVJJpNavHhx2ccpKaSy2ax6enrU2NgY+KcwAgAql+u6unTpkhYtWqRQqPxn80oKKQAAZgKPoAMAzCKkAABmEVIAALMIKQCAWSWF1Pbt27VkyRLV19drxYoVOnz4cNDnBQAw7KWXXtLy5cvV2Nio+fPna+3atTp+/LivZnh4WG1tbbr11lsVi8X0xBNPqK+vr6jXKTqkdu3apfb2dm3evFlHjx7V/fffr9WrV+vcuXPFHgoAUKG6urrU1tamDz/8UPv371cmk9Fjjz3m+2imZ599Vm+//bZ2796trq4u9fT06PHHHy/qdYp+BH3FihVavny5Ojo6JOXWTLW2tmrjxo3atGlTUS8OAKgO/f39mj9/vrq6uvToo49qcHBQt99+uzo7OxWPxyVJn3/+ue6++251d3fr4YcfntJxi7qTSqfTOnLkiFatWnX9AKGQVq1ape7u7mIOBQCoIoODg5KklpYWSdKRI0eUyWR8eXHXXXfpjjvuKCovigqpgYEBjY2NacGCBb7xBQsWqLe3t5hDAQCqRDab1TPPPKNHHnlE9957rySpt7dXdXV1mjdvnq+22Lwoaxd0AADa2tr06aef6oMPPgj82EXdSd12220Kh8M3PZ3R19enhQsXBnpiAAD7NmzYoL179+q9997zbSi7cOFCpdNpXbx40VdfbF4UFVJ1dXVatmyZDhw44I1ls1kdOHBAK1euLOZQAIAK5rquNmzYoD179ujgwYNaunSp7/eXLVumaDTqy4vjx4/rzJkzReVF0e2+9vZ2rVu3Tg8++KAeeughvfzyy0qlUnr66aeLPRQAoEK1tbWps7NTb775phobG72fMzU3N2vu3Llqbm7W+vXr1d7erpaWFjU1NWnjxo1auXLllJ/sk0rcBb2jo0Pbtm1Tb2+vHnjgAb366qtasWJFsYcBAFSofB/TtHPnTj311FOScot5n3vuOb3xxhsaGRnR6tWrtWPHjqLafXxUBwDALPbuAwCYRUgBAMwipAAAZhFSAACzCCkAgFmEFADALEIKAGAWIQUAMIuQAgCYRUgBAMwipAAAZv0/+JS7kjoIspcAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "# plot\n", - "figure, axes = plt.subplots()\n", - "figure.set_size_inches(4, 2)\n", - "\n", - "# make data:\n", - "y = df['col_0_100'].to_list()\n", - "x = 0.5 + np.arange(len(y))\n", - "axes.bar(x, y, width=1, edgecolor=\"white\", linewidth=0.7, yerr=2)\n", - "axes.hlines([10], 0, 20, colors=[\"black\"], linestyles=[\"dotted\"])\n", - "\n", - "axes.set(xlim=(0, 20), ylim=(0, 100))\n", - "axes.get_xaxis().set_ticks([0, 20])\n", - "axes.get_yaxis().set_ticks([])\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 03611bd503b4ff6fee22738695703c1d25763326 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 3 Oct 2024 13:07:50 -0400 Subject: [PATCH 12/15] fix the math --- dp_creator_ii/mock_data.py | 12 ++++++++---- dp_creator_ii/tests/test_mock_data.py | 9 ++++----- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/dp_creator_ii/mock_data.py b/dp_creator_ii/mock_data.py index dcfb462..92b626e 100644 --- a/dp_creator_ii/mock_data.py +++ b/dp_creator_ii/mock_data.py @@ -12,13 +12,17 @@ def mock_data(column_defs, row_count=1000): # The details here don't really matter: Any method that # deterministically gave us more values in the middle of the range # and fewer at the extremes would do. - quantile_width = 2 / 3 + quantile_width = 95 / 100 for column_name, column_def in column_defs.items(): - scale = column_def.max - column_def.min - center = (column_def.max + column_def.min) / 2 + min_ppf = norm.ppf((1 - quantile_width) / 2) + max_ppf = norm.ppf(1 - (1 - quantile_width) / 2) + min_value = column_def.min + max_value = column_def.max + slope = (max_value - min_value) / (max_ppf - min_ppf) + intercept = min_value - slope * min_ppf for i in range(row_count): quantile = (quantile_width * i / (row_count - 1)) + (1 - quantile_width) / 2 ppf = norm.ppf(quantile) - value = ppf * scale / 2 + center + value = slope * ppf + intercept data[column_name].append(value) return pl.DataFrame(data=data, schema=schema) diff --git a/dp_creator_ii/tests/test_mock_data.py b/dp_creator_ii/tests/test_mock_data.py index 44a99d8..ef5969e 100644 --- a/dp_creator_ii/tests/test_mock_data.py +++ b/dp_creator_ii/tests/test_mock_data.py @@ -10,8 +10,7 @@ def test_mock_data(): df = mock_data({"col_0_100": col_0_100, "col_neg_pos": col_neg_pos}) assert df.select(pl.len()).item() == 1000 - assert 0 < df.get_column("col_0_100")[0] < 2 - assert 98 < df.get_column("col_0_100")[999] < 100 - assert ( - df.get_column("col_neg_pos")[0] + df.get_column("col_neg_pos")[999] - ) == approx(0) + assert df.get_column("col_0_100")[0] == 0 + assert df.get_column("col_0_100")[999] == 100 + assert df.get_column("col_neg_pos")[0] == -10 + assert df.get_column("col_neg_pos")[999] == 10 From ed9c7876d903c3620ea146dc977914287105531b Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 3 Oct 2024 13:22:49 -0400 Subject: [PATCH 13/15] Pull plot into helper function --- dp_creator_ii/app/analysis_panel.py | 65 ++++++++++++++++------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/dp_creator_ii/app/analysis_panel.py b/dp_creator_ii/app/analysis_panel.py index 056f7f8..97be201 100644 --- a/dp_creator_ii/app/analysis_panel.py +++ b/dp_creator_ii/app/analysis_panel.py @@ -16,39 +16,44 @@ def analysis_ui(): ) +def plot(y_values, x_min_label="min", x_max_label="max", y_cutoff=0): + figure, axes = plt.subplots() + # figure.set_size_inches(4, 2) + + x_values = 0.5 + np.arange(len(y_values)) + axes.bar( + x_values, + y_values, + width=0.8, + edgecolor="skyblue", + linewidth=1, + yerr=2, + color="skyblue", + ) + axes.bar( + x_values[:5], + y_values[:5], + width=0.8, + edgecolor="skyblue", + linewidth=0.5, + yerr=2, + color="white", + ) + axes.hlines([y_cutoff], 0, len(y_values), colors=["black"], linestyles=["dotted"]) + + axes.set(xlim=(0, len(y_values)), ylim=(0, max(y_values))) + axes.get_xaxis().set_ticks([]) + axes.get_yaxis().set_ticks([]) + + return figure + + def analysis_server(input, output, session): @render.plot() def plot_preview(): - col_0_100 = ColumnDef(0, 100) - col_neg_pos = ColumnDef(-10, 10) - df = mock_data({"col_0_100": col_0_100}, row_count=20) - - # plot - figure, axes = plt.subplots() - figure.set_size_inches(4, 2) - - # make data: - y = df["col_0_100"].to_list() - x = 0.5 + np.arange(len(y)) - axes.bar( - x, y, width=0.8, edgecolor="skyblue", linewidth=1, yerr=2, color="skyblue" - ) - axes.bar( - x[:5], - y[:5], - width=0.8, - edgecolor="skyblue", - linewidth=0.5, - yerr=2, - color="white", - ) - axes.hlines([10], 0, 20, colors=["black"], linestyles=["dotted"]) - - axes.set(xlim=(0, 20), ylim=(0, 100)) - axes.get_xaxis().set_ticks([0, 20]) - axes.get_yaxis().set_ticks([]) - - return figure + df = mock_data({"col_0_100": ColumnDef(0, 100)}, row_count=20) + + return plot(df["col_0_100"].to_list(), y_cutoff=10) @reactive.effect @reactive.event(input.go_to_results) From 3cded8ef383f796e8f9e720f81f674ca6d38c4c8 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 3 Oct 2024 13:53:22 -0400 Subject: [PATCH 14/15] move plot to separate file --- dp_creator_ii/app/analysis_panel.py | 48 +++++++---------------------- dp_creator_ii/app/plots.py | 40 ++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 37 deletions(-) create mode 100644 dp_creator_ii/app/plots.py diff --git a/dp_creator_ii/app/analysis_panel.py b/dp_creator_ii/app/analysis_panel.py index 97be201..31c87ce 100644 --- a/dp_creator_ii/app/analysis_panel.py +++ b/dp_creator_ii/app/analysis_panel.py @@ -1,8 +1,7 @@ from shiny import ui, reactive, render -import matplotlib.pyplot as plt -import numpy as np from dp_creator_ii.mock_data import mock_data, ColumnDef +from dp_creator_ii.app.plots import plot_error_bars_with_cutoff def analysis_ui(): @@ -16,44 +15,19 @@ def analysis_ui(): ) -def plot(y_values, x_min_label="min", x_max_label="max", y_cutoff=0): - figure, axes = plt.subplots() - # figure.set_size_inches(4, 2) - - x_values = 0.5 + np.arange(len(y_values)) - axes.bar( - x_values, - y_values, - width=0.8, - edgecolor="skyblue", - linewidth=1, - yerr=2, - color="skyblue", - ) - axes.bar( - x_values[:5], - y_values[:5], - width=0.8, - edgecolor="skyblue", - linewidth=0.5, - yerr=2, - color="white", - ) - axes.hlines([y_cutoff], 0, len(y_values), colors=["black"], linestyles=["dotted"]) - - axes.set(xlim=(0, len(y_values)), ylim=(0, max(y_values))) - axes.get_xaxis().set_ticks([]) - axes.get_yaxis().set_ticks([]) - - return figure - - def analysis_server(input, output, session): @render.plot() def plot_preview(): - df = mock_data({"col_0_100": ColumnDef(0, 100)}, row_count=20) - - return plot(df["col_0_100"].to_list(), y_cutoff=10) + min_x = 0 + max_x = 100 + df = mock_data({"col_0_100": ColumnDef(min_x, max_x)}, row_count=20) + return plot_error_bars_with_cutoff( + df["col_0_100"].to_list(), + x_min_label=min_x, + x_max_label=max_x, + y_cutoff=30, + y_error=5, + ) @reactive.effect @reactive.event(input.go_to_results) diff --git a/dp_creator_ii/app/plots.py b/dp_creator_ii/app/plots.py new file mode 100644 index 0000000..0ca6118 --- /dev/null +++ b/dp_creator_ii/app/plots.py @@ -0,0 +1,40 @@ +import matplotlib.pyplot as plt +import numpy as np + + +def plot_error_bars_with_cutoff( + y_values, x_min_label="min", x_max_label="max", y_cutoff=0, y_error=0 +): + x_values = 0.5 + np.arange(len(y_values)) + x_values_above = [] + x_values_below = [] + y_values_above = [] + y_values_below = [] + for x, y in zip(x_values, y_values): + if y < y_cutoff: + x_values_below.append(x) + y_values_below.append(y) + else: + x_values_above.append(x) + y_values_above.append(y) + + figure, axes = plt.subplots() + color = "skyblue" + shared = { + "width": 0.8, + "edgecolor": color, + "linewidth": 1, + "yerr": y_error, + } + axes.bar(x_values_above, y_values_above, color=color, **shared) + axes.bar(x_values_below, y_values_below, color="white", **shared) + axes.hlines([y_cutoff], 0, len(y_values), colors=["black"], linestyles=["dotted"]) + + axes.set(xlim=(0, len(y_values)), ylim=(0, max(y_values))) + axes.get_xaxis().set_ticks( + ticks=[x_values[0], x_values[-1]], + labels=[x_min_label, x_max_label], + ) + axes.get_yaxis().set_ticks([]) + + return figure From ea5a7f74db332e3f18ab1eb205c6832a97d53e1e Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 3 Oct 2024 13:54:39 -0400 Subject: [PATCH 15/15] unused import --- dp_creator_ii/tests/test_mock_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dp_creator_ii/tests/test_mock_data.py b/dp_creator_ii/tests/test_mock_data.py index ef5969e..4530a75 100644 --- a/dp_creator_ii/tests/test_mock_data.py +++ b/dp_creator_ii/tests/test_mock_data.py @@ -1,5 +1,4 @@ import polars as pl -from pytest import approx from dp_creator_ii.mock_data import mock_data, ColumnDef