Skip to content

Commit

Permalink
Calculate rapid test rates after smoothing and averaging (#123)
Browse files Browse the repository at this point in the history
  • Loading branch information
roecla authored Jul 21, 2021
1 parent baba22e commit 66b73fb
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 64 deletions.
8 changes: 7 additions & 1 deletion src/plotting/task_plot_rapid_test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,15 @@
)
from src.simulation.task_process_rapid_test_statistics import CHANNELS
from src.simulation.task_process_rapid_test_statistics import OUTCOMES
from src.simulation.task_process_rapid_test_statistics import RATES
from src.simulation.task_process_rapid_test_statistics import SHARE_TYPES

RATES = [
"true_positive_rate",
"false_positive_rate",
"true_negative_rate",
"false_negative_rate",
]


def _create_rapid_test_plot_parametrization():
signature = "depends_on, plot_single_runs, ylabel, produces"
Expand Down
137 changes: 111 additions & 26 deletions src/simulation/task_process_rapid_test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from src.config import BLD
from src.simulation.scenario_config import (
create_path_to_rapid_test_statistic_time_series,
create_path_to_rapid_test_statistic_time_series as get_ts_path,
)
from src.simulation.scenario_config import create_path_to_raw_rapid_test_statistics
from src.simulation.scenario_config import get_named_scenarios
Expand All @@ -29,32 +29,18 @@
]
SHARE_TYPES = ["number", "popshare", "testshare"]

RATES = [
"false_negative_rate",
"false_positive_rate",
"true_negative_rate",
"true_positive_rate",
]

RAPID_TEST_STATISTICS = []
for out, channel, share_type in product(OUTCOMES, CHANNELS, SHARE_TYPES):
RAPID_TEST_STATISTICS.append(f"{share_type}_{out}_by_{channel}")
for out, channel in product(RATES, CHANNELS):
RAPID_TEST_STATISTICS.append(f"{out}_by_{channel}")


_PARAMETRIZATION = [
(
column,
create_path_to_rapid_test_statistic_time_series("spring_baseline", column),
)
for column in RAPID_TEST_STATISTICS
_SINGLE_COL_PARAMETRIZATION = [
(column, get_ts_path("spring_baseline", column)) for column in RAPID_TEST_STATISTICS
]


@pytask.mark.skipif(_N_SEEDS == 0, reason="spring baseline did not run.")
@pytask.mark.depends_on(_DEPENDENCIES)
@pytask.mark.parametrize("column, produces", _PARAMETRIZATION)
@pytask.mark.parametrize("column, produces", _SINGLE_COL_PARAMETRIZATION)
def task_process_rapid_test_statistics(depends_on, column, produces):
dfs = {
seed: pd.read_csv(path, parse_dates=["date"], index_col="date")
Expand All @@ -70,6 +56,113 @@ def task_process_rapid_test_statistics(depends_on, column, produces):
df.to_pickle(produces)


def _get_rate_parametrization(channels):
rate_parametrization = []
for channel in channels:
rate_parametrization += [
(
f"true_positive_rate_by_{channel}",
{
"numerator": get_ts_path(
"spring_baseline", f"number_true_positive_by_{channel}"
),
"denominator": get_ts_path(
"spring_baseline", f"number_tested_positive_by_{channel}"
),
},
get_ts_path("spring_baseline", f"true_positive_rate_by_{channel}"),
),
(
f"false_positive_rate_by_{channel}",
{
"numerator": get_ts_path(
"spring_baseline", f"number_false_positive_by_{channel}"
),
"denominator": get_ts_path(
"spring_baseline", f"number_tested_positive_by_{channel}"
),
},
get_ts_path("spring_baseline", f"false_positive_rate_by_{channel}"),
),
(
f"true_negative_rate_by_{channel}",
{
"numerator": get_ts_path(
"spring_baseline", f"number_true_negative_by_{channel}"
),
"denominator": get_ts_path(
"spring_baseline", f"number_tested_negative_by_{channel}"
),
},
get_ts_path("spring_baseline", f"true_negative_rate_by_{channel}"),
),
(
f"false_negative_rate_by_{channel}",
{
"numerator": get_ts_path(
"spring_baseline", f"number_false_negative_by_{channel}"
),
"denominator": get_ts_path(
"spring_baseline", f"number_tested_negative_by_{channel}"
),
},
get_ts_path("spring_baseline", f"false_negative_rate_by_{channel}"),
),
]
return rate_parametrization


_RATE_PARAMETRIZATION = _get_rate_parametrization(CHANNELS)


@pytask.mark.parametrize("name, depends_on, produces", _RATE_PARAMETRIZATION)
def task_create_rapid_test_statistic_ratios(name, depends_on, produces):
numerator = pd.read_pickle(depends_on["numerator"])
denominator = pd.read_pickle(depends_on["denominator"])

seeds = list(range(_N_SEEDS))
rate_df = pd.DataFrame()
# needed for plotting single runs
for s in seeds:
smooth_num = numerator[s].rolling(window=7, min_periods=1, center=False).mean()
smooth_denom = (
denominator[s].rolling(window=7, min_periods=1, center=False).mean()
)
rate_df[s] = smooth_num / smooth_denom

# it's important to first average and smooth and **then** divide to get rid of noise
# before the division.
rate_df[name] = (
# use that the mean is created **after** the seeds have been added
numerator[numerator.columns[-1]]
/ denominator[denominator.columns[-1]]
)
rate_df.to_pickle(produces)


_ALL_RAPID_TEST_STATISTICS = [path for col, path in _SINGLE_COL_PARAMETRIZATION] + [
spec[-1] for spec in _RATE_PARAMETRIZATION
]


@pytask.mark.depends_on(_ALL_RAPID_TEST_STATISTICS)
@pytask.mark.produces(BLD / "tables" / "rapid_test_statistics.csv")
def task_create_nice_rapid_test_statistic_table_for_lookup(produces):
column_names = [col for col, _ in _SINGLE_COL_PARAMETRIZATION] + [
spec[0] for spec in _RATE_PARAMETRIZATION
]
assert len(set(column_names)) == len(column_names), (
"There are duplicate names in the rapid test statistic columns. "
"You probably forgot to specify a channel as part of the column name."
)

to_concat = [
pd.read_pickle(path)[[column]] for column, path in _SINGLE_COL_PARAMETRIZATION
] + [pd.read_pickle(path)[[column]] for column, _, path in _RATE_PARAMETRIZATION]
df = pd.concat(to_concat, axis=1)
df.round(4).to_csv(produces)


@pytask.mark.depends_on(_DEPENDENCIES)
def task_check_that_a_table_was_created_for_each_rapid_test_statistic(depends_on):
statistics_saved_by_sid = pd.read_csv(depends_on[0]).columns
Expand All @@ -78,11 +171,3 @@ def task_check_that_a_table_was_created_for_each_rapid_test_statistic(depends_on
assert set(should_have_a_table) == set(
RAPID_TEST_STATISTICS
), "Some rapid test statistic columns that should have a table do not."


@pytask.mark.depends_on([path for col, path in _PARAMETRIZATION])
@pytask.mark.produces(BLD / "tables" / "rapid_test_statistics.csv")
def task_create_nice_rapid_test_statistic_table_for_lookup(produces):
to_concat = [pd.read_pickle(path)[[column]] for column, path in _PARAMETRIZATION]
df = pd.concat(to_concat, axis=1)
df.round(4).to_csv(produces)
20 changes: 3 additions & 17 deletions src/testing/create_rapid_test_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,9 @@ def _calculate_rapid_test_statistics_by_channel(
Naming convention for the denominators:
- testshare -> n_tests
- popshare -> n_people
- number -> n_people / POPULATION_GERMANY
- rate -> n_{pos/neg}_tests
- testshare -> n_tests
- popshare -> n_people
- number -> n_people / POPULATION_GERMANY
Args:
states (pandas.DataFrame): sid states DataFrame.
Expand Down Expand Up @@ -113,17 +112,4 @@ def _calculate_rapid_test_statistics_by_channel(
statistics[f"popshare_{name}_by_{channel_name}"] = sr.sum() / n_obs
statistics[f"testshare_{name}_by_{channel_name}"] = sr.sum() / n_tested

statistics[f"true_positive_rate_by_{channel_name}"] = (
individual_outcomes["true_positive"].sum() / n_tested_positive
)
statistics[f"true_negative_rate_by_{channel_name}"] = (
individual_outcomes["true_negative"].sum() / n_tested_negative
)
statistics[f"false_positive_rate_by_{channel_name}"] = (
individual_outcomes["false_positive"].sum() / n_tested_positive
)
statistics[f"false_negative_rate_by_{channel_name}"] = (
individual_outcomes["false_negative"].sum() / n_tested_negative
)

return statistics
18 changes: 18 additions & 0 deletions src/testing/task_get_and_plot_share_of_tests_for_symptomatics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from src.config import PLOT_SIZE
from src.config import PLOT_START_DATE
from src.config import SRC
from src.plotting.plotting import BLUE
from src.plotting.plotting import style_plot
from src.testing.shared import convert_weekly_to_daily
from src.testing.shared import get_date_from_year_and_week
Expand Down Expand Up @@ -46,6 +47,11 @@
/ "data"
/ "testing"
/ "share_of_pcr_tests_going_to_symptomatics.pdf",
"used_share_pcr_going_to_symptomatic": BLD
/ "figures"
/ "data"
/ "testing"
/ "used_share_of_pcr_tests_going_to_symptomatics.pdf",
}
)
def task_prepare_characteristics_of_the_tested(depends_on, produces):
Expand Down Expand Up @@ -106,6 +112,18 @@ def task_prepare_characteristics_of_the_tested(depends_on, produces):
df = df.reset_index().rename(columns={"index": "date"})
df.to_csv(produces["data"])

fig, ax = plt.subplots(figsize=PLOT_SIZE)
sns.lineplot(
x=share_of_tests_for_symptomatics_series.index,
y=share_of_tests_for_symptomatics_series,
color=BLUE,
linewidth=3.0,
alpha=0.6,
)
fig, ax = style_plot(fig, ax)
fig.tight_layout()
fig.savefig(produces["used_share_pcr_going_to_symptomatic"])


def _clean_data(df):
share_sym_de = "Anteil keine, bzw. keine für COVID-19 bedeutsamen Symptome"
Expand Down
20 changes: 0 additions & 20 deletions tests/test_create_rapid_test_statistics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import pandas as pd
from pandas.testing import assert_series_equal

Expand Down Expand Up @@ -61,21 +60,6 @@ def mocked_sample_test_outcome(states, receives_rapid_test, params, seed):
{
0: {
"date": date,
# overall shares
"true_positive_rate_by_overall": 0.5,
"true_negative_rate_by_overall": 0.5,
"false_negative_rate_by_overall": 0.5,
"false_positive_rate_by_overall": 0.5,
# shares in a
"true_positive_rate_by_a": 0.5,
"true_negative_rate_by_a": 0.0,
"false_negative_rate_by_a": 1.0,
"false_positive_rate_by_a": 0.5,
# shares in b
"true_positive_rate_by_b": np.nan,
"true_negative_rate_by_b": 0.5,
"false_negative_rate_by_b": 0.5,
"false_positive_rate_by_b": np.nan,
# numbers
"number_false_negative_by_a": 2 * scaling,
"number_false_negative_by_b": 2 * scaling,
Expand Down Expand Up @@ -197,10 +181,6 @@ def test_calculate_rapid_test_statistics_by_channel():
"testshare_false_negative_by_channel": 2 / 5,
"testshare_true_positive_by_channel": 1 / 5,
"testshare_true_negative_by_channel": 1 / 5,
"true_positive_rate_by_channel": 1 / 2,
"true_negative_rate_by_channel": 1 / 3,
"false_positive_rate_by_channel": 1 / 2,
"false_negative_rate_by_channel": 2 / 3,
}
)
assert_series_equal(res.loc[expected.index], expected, check_names=False)

0 comments on commit 66b73fb

Please sign in to comment.