Skip to content

Commit

Permalink
Add y labels. Save rapid test statistics as .csv and plot each rapid …
Browse files Browse the repository at this point in the history
…test statistic separately. (#118)
  • Loading branch information
roecla authored Jun 25, 2021
1 parent 7eb914a commit 6a27233
Show file tree
Hide file tree
Showing 16 changed files with 184 additions and 106 deletions.
32 changes: 25 additions & 7 deletions src/plotting/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@
"r_effective": "effective reproduction number as estimated by the RKI",
}

OUTCOME_TO_Y_LABEL = {
"newly_infected": "weekly total new cases per 100,000 inhabitants",
"new_known_case": "weekly reported new cases per 100,000 inhabitants",
"newly_deceased": "weekly deaths per 100,000 inhabitants",
"share_ever_rapid_test": "share of the population that has \n"
"ever done a rapid test",
"share_rapid_test_in_last_week": "share of the population that has done \na rapid "
"test within the last seven days",
"share_b117": "share of B.1.1.7 among new infections",
"share_doing_rapid_test_today": "share of the population doing "
"a rapid test per day",
"ever_vaccinated": "share of the population that has been vaccinated",
"r_effective": "effective reproduction number $R_t$",
}


def calculate_virus_strain_shares(results):
"""Create the weekly incidences from a list of simulation runs.
Expand Down Expand Up @@ -87,6 +102,7 @@ def plot_incidences(
scenario_starts=None,
fig=None,
ax=None,
ylabel=None,
):
"""Plot incidences.
Expand All @@ -101,6 +117,7 @@ def plot_incidences(
will plot all runs.
scenario_starts (list, optional): the scenario start points. Each consists of a
tuple of the date and a label.
ylabel (str, optional): Label of the y axis.
Returns:
fig, ax
Expand Down Expand Up @@ -148,12 +165,10 @@ def plot_incidences(

fig, ax = style_plot(fig, ax)
ax.set_title(title)
if "New Cases" in title or "New Deaths" in title:
ax.set_ylabel("smoothed weekly incidence")
elif "share" in title.lower():
ax.set_ylabel("share")
elif "Effective" in title:
ax.set_ylabel("$R_t$")

if ylabel is not None:
ax.set_ylabel(ylabel)

x, y, width, height = 0.0, -0.3, 1, 0.2
ax.legend(loc="upper center", bbox_to_anchor=(x, y, width, height), ncol=2)
fig.tight_layout()
Expand Down Expand Up @@ -189,6 +204,7 @@ def plot_share_known_cases(share_known_cases, title, groupby, plot_single_runs=F

fig, ax = style_plot(fig, ax)
ax.set_title(title)
ax.set_ylabel("share of infections that is confirmed\nthrough PCR tests")

# Reduce legend to have each age group only once and move it to below the plot
x, y, width, height = 0.0, -0.3, 1, 0.2
Expand All @@ -214,7 +230,7 @@ def plot_share_known_cases(share_known_cases, title, groupby, plot_single_runs=F
return fig, ax


def plot_group_time_series(df, title, rki=None):
def plot_group_time_series(df, title, rki=None, ylabel=None):
"""Plot a time series by group with more than one run.
Args:
Expand All @@ -223,6 +239,7 @@ def plot_group_time_series(df, title, rki=None):
title (str): the title of the plot
rki (pandas.Series, optional): Series with the RKI data. Must have the same
index as df.
ylabel (str, optional): label of the y axis.
"""
df = df.swaplevel().copy(deep=True)
Expand All @@ -249,6 +266,7 @@ def plot_group_time_series(df, title, rki=None):
scenario_starts=None,
fig=fig,
ax=ax,
ylabel=ylabel,
)

if rki is not None:
Expand Down
11 changes: 9 additions & 2 deletions src/plotting/task_plot_decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,16 @@
_ORDERED_CHANNELS = ["Rapid Tests", "Seasonality", "Vaccinations"]
_ORDERED_RAPID_TEST_CHANNELS = ["Private", "Work", "School"]

_ALL_CHANNEL_SCENARIOS_AVAILABLE = all(
i in _AVAILABLE_SCENARIOS for i in _CHANNEL_SCENARIOS_TO_MEMBERS
)
_ALL_RAPID_TEST_SCENARIOS_AVAILABLE = all(
i in _AVAILABLE_SCENARIOS for i in _RAPID_TEST_SCENARIOS_TO_MEMBERS
)


@pytask.mark.skipif(
not all(i in _AVAILABLE_SCENARIOS for i in _CHANNEL_SCENARIOS_TO_MEMBERS),
not _ALL_CHANNEL_SCENARIOS_AVAILABLE,
reason="required scenarios are not available",
)
@pytask.mark.depends_on(
Expand Down Expand Up @@ -103,7 +110,7 @@ def task_plot_decomposition_of_infection_channels_in_spring(depends_on, produces


@pytask.mark.skipif(
not all(i in _AVAILABLE_SCENARIOS for i in _RAPID_TEST_SCENARIOS_TO_MEMBERS),
not _ALL_RAPID_TEST_SCENARIOS_AVAILABLE,
reason="required scenarios are not available",
)
@pytask.mark.depends_on(
Expand Down
29 changes: 28 additions & 1 deletion src/plotting/task_plot_incidences_by_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from src.config import BLD
from src.config import SRC
from src.plotting.plotting import make_nice_outcome
from src.plotting.plotting import OUTCOME_TO_Y_LABEL
from src.plotting.plotting import plot_group_time_series
from src.simulation.load_simulation_inputs import create_period_outputs
from src.simulation.scenario_config import create_path_to_group_incidence_plot
Expand Down Expand Up @@ -92,7 +93,33 @@ def task_plot_age_group_incidences_in_one_scenario(

nice_outcome = make_nice_outcome(outcome)
title = f"{nice_outcome} in " + "{group}"
fig, ax = plot_group_time_series(incidences, title, rki)
ylabel = _get_ylabel(outcome)
fig, ax = plot_group_time_series(
df=incidences,
title=title,
rki=rki,
ylabel=ylabel,
)

fig.savefig(produces)
plt.close()


def _get_ylabel(outcome):
ylabel = OUTCOME_TO_Y_LABEL[outcome]
if len(ylabel) > 45:
split = ylabel.split()
third = int(len(split) / 3)
ylabel = (
" ".join(split[:third])
+ "\n"
+ " ".join(split[third : 2 * third])
+ "\n"
+ " ".join(split[2 * third :])
)

elif len(ylabel) > 24:
split = ylabel.split()
half = int(len(split) / 2)
ylabel = " ".join(split[:half]) + "\n" + " ".join(split[half:])
return ylabel
157 changes: 82 additions & 75 deletions src/plotting/task_plot_rapid_test_statistics.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytask
import seaborn as sns

from src.config import BLD
from src.config import PLOT_END_DATE
from src.config import PLOT_SIZE
from src.plotting.plotting import BLUE
from src.plotting.plotting import RED
Expand All @@ -14,35 +14,38 @@

SEEDS = get_named_scenarios()["combined_baseline"]["n_seeds"]

_DEPENDENCIES = {
_CSV_DEPENDENCIES = {
seed: create_path_to_rapid_test_statistics("combined_baseline", seed)
for seed in range(SEEDS)
}

DEMAND_SHARE_COLS = [
"private_demand_share",
"work_demand_share",
"educ_demand_share",
# parts of the private demand share:
"hh_demand",
"sym_without_pcr_demand",
"other_contact_demand",
]

SHARE_INFECTED_COLS = [
f"share_infected_among_{col.replace('_share', '')}" for col in DEMAND_SHARE_COLS
]

TABLE_PATH = BLD / "tables" / "rapid_test_statistics"


_CSV_PARAMETRIZATION = [
(column, TABLE_PATH / f"{column}.csv")
for column in DEMAND_SHARE_COLS + SHARE_INFECTED_COLS
]


@pytask.mark.skipif(SEEDS == 0, reason="combined_baseline did not run.")
@pytask.mark.depends_on(_DEPENDENCIES)
@pytask.mark.produces(
{
"demand_shares": BLD
/ "figures"
/ "rapid_test_statistics"
/ "demand_shares.pdf",
"share_infected_among_demand": BLD
/ "figures"
/ "rapid_test_statistics"
/ "share_infected_among_demand.pdf",
"demand_shares_with_single_runs": BLD
/ "figures"
/ "rapid_test_statistics"
/ "demand_shares_with_single_runs.pdf",
"share_infected_among_demand_with_single_runs": BLD
/ "figures"
/ "rapid_test_statistics"
/ "share_infected_among_demand_with_single_runs.pdf",
}
)
def task_plot_rapid_test_statistics(depends_on, produces):
@pytask.mark.depends_on(_CSV_DEPENDENCIES)
@pytask.mark.parametrize("column, produces", _CSV_PARAMETRIZATION)
def task_process_rapid_test_statistics(depends_on, column, produces):
dfs = {
seed: pd.read_csv(path, parse_dates=["date"], index_col="date")
for seed, path in depends_on.items()
Expand All @@ -52,61 +55,65 @@ def task_plot_rapid_test_statistics(depends_on, produces):
"Duplicates in a rapid test statistic DataFrame's index. "
"The csv file must be deleted before every run."
)
df = pd.concat({seed: df[column] for seed, df in dfs.items()}, axis=1)
df["smoothed_mean"] = (
df.mean(axis=1).rolling(window=7, min_periods=1, center=False).mean()
)
df.to_csv(produces)

demand_share_cols = [
"private_demand_share",
"work_demand_share",
"educ_demand_share",
"hh_demand",
"sym_without_pcr_demand",
"other_contact_demand",
]
fig = _plot_columns(dfs, demand_share_cols, BLUE, False)
fig.savefig(produces["demand_shares"])

fig = _plot_columns(dfs, demand_share_cols, BLUE, True)
fig.savefig(produces["demand_shares_with_single_runs"])

share_infected_cols = [
"share_infected_among_private_demand",
"share_infected_among_work_demand",
"share_infected_among_educ_demand",
"share_infected_among_hh_demand",
"share_infected_among_sym_without_pcr_demand",
"share_infected_among_other_contact_demand",
]

fig = _plot_columns(dfs, share_infected_cols, RED, False)
fig.savefig(produces["share_infected_among_demand"])

fig = _plot_columns(dfs, share_infected_cols, RED, True)
fig.savefig(produces["share_infected_among_demand_with_single_runs"])

_PLOT_PARAMETRIZATION = []
for column in DEMAND_SHARE_COLS:
for plot_single_runs in [True, False]:
depends_on = TABLE_PATH / f"{column}.csv"
ylabel = "share of the population demanding a rapid test"
file_name = (
f"{column}_with_single_runs.pdf" if plot_single_runs else f"{column}.pdf"
)
produces = BLD / "figures" / "rapid_test_statistics" / file_name
spec = (depends_on, BLUE, plot_single_runs, ylabel, produces)
_PLOT_PARAMETRIZATION.append(spec)

for column in SHARE_INFECTED_COLS:
for plot_single_runs in [True, False]:
depends_on = TABLE_PATH / f"{column}.csv"
ylabel = "share of rapid tests demanded by infected individuals"
file_name = (
f"{column}_with_single_runs.pdf" if plot_single_runs else f"{column}.pdf"
)

produces = BLD / "figures" / "rapid_test_statistics" / file_name
spec = (depends_on, RED, plot_single_runs, ylabel, produces)
_PLOT_PARAMETRIZATION.append(spec)


@pytask.mark.parametrize(
"depends_on, color, plot_single_runs, ylabel, produces", _PLOT_PARAMETRIZATION
)
def task_plot_rapid_test_statistics(
depends_on, color, plot_single_runs, ylabel, produces
):
df = pd.read_csv(depends_on, index_col="date", parse_dates=["date"])
fig = _plot_df(df=df, color=color, plot_single_runs=plot_single_runs, ylabel=ylabel)
fig.savefig(produces)
plt.close()


def _plot_columns(dfs, cols, color, plot_single_runs):
n_rows = int(np.ceil(len(cols) / 2))
fig, axes = plt.subplots(
ncols=2,
nrows=n_rows,
figsize=(PLOT_SIZE[0] * n_rows, PLOT_SIZE[1] * 2),
sharex=True,
def _plot_df(df, color, plot_single_runs, ylabel):
fig, ax = plt.subplots(figsize=PLOT_SIZE)

sns.lineplot(
x=df["smoothed_mean"].index,
y=df["smoothed_mean"],
ax=ax,
linewidth=4,
color=color,
alpha=0.8,
)
for col, ax in zip(cols, axes.flatten()):
mean = pd.concat([df[col] for df in dfs.values()], axis=1).mean(axis=1)
smoothed_mean = mean.rolling(window=7, min_periods=1, center=False).mean()
sns.lineplot(
x=smoothed_mean.index,
y=smoothed_mean,
ax=ax,
linewidth=4,
color=color,
alpha=0.8,
)

if plot_single_runs:
for df in dfs.values():
if plot_single_runs:
for col in df.columns:
if col != "smoothed_mean":
sns.lineplot(
x=df.index,
y=df[col].rolling(window=7, min_periods=1, center=False).mean(),
Expand All @@ -115,9 +122,9 @@ def _plot_columns(dfs, cols, color, plot_single_runs):
color=color,
alpha=0.6,
)
ax.set_xlim(pd.Timestamp("2021-03-15"), pd.Timestamp(PLOT_END_DATE))
fig, ax = style_plot(fig, ax)

ax.set_title(col.replace("_", " ").title())
fig, ax = style_plot(fig, ax)

ax.set_ylabel(ylabel)
fig.tight_layout()
return fig
3 changes: 3 additions & 0 deletions src/plotting/task_plot_scenario_comparisons.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from src.plotting.plotting import make_nice_outcome
from src.plotting.plotting import ORANGE
from src.plotting.plotting import OUTCOME_TO_EMPIRICAL_LABEL
from src.plotting.plotting import OUTCOME_TO_Y_LABEL
from src.plotting.plotting import plot_incidences
from src.plotting.plotting import PURPLE
from src.plotting.plotting import RED
Expand Down Expand Up @@ -436,6 +437,7 @@ def task_plot_scenario_comparison(
colors=colors,
scenario_starts=scenario_starts,
n_single_runs=0,
ylabel=OUTCOME_TO_Y_LABEL.get(outcome, None),
)
ax.set_xlim(xlims)

Expand All @@ -456,6 +458,7 @@ def task_plot_scenario_comparison(
colors=colors,
scenario_starts=scenario_starts,
n_single_runs=None,
ylabel=OUTCOME_TO_Y_LABEL.get(outcome, None),
)
ax_with_lines.set_xlim(xlims)
if "new_work_scenarios" in str(produces):
Expand Down
1 change: 1 addition & 0 deletions src/plotting/task_plot_share_known_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def _create_parametrization():
title = (
f"Share Known Cases {'By Age Group' if groupby else ''} in {nice_name}"
)
title = None
parametrization.append((depends_on, title, groupby, produces))

return "depends_on, title, groupby, produces", parametrization
Expand Down
Loading

0 comments on commit 6a27233

Please sign in to comment.