Add y labels. Save rapid test statistics as .csv and plot each rapid …

…test statistic separately. (#118)
covid-19-impact-lab · Jun 25, 2021 · 6a27233 · 6a27233
1 parent 7eb914a
commit 6a27233
Show file tree

Hide file tree

Showing 16 changed files with 184 additions and 106 deletions.
diff --git a/src/plotting/plotting.py b/src/plotting/plotting.py
@@ -47,6 +47,21 @@
     "r_effective": "effective reproduction number as estimated by the RKI",
 }
 
+OUTCOME_TO_Y_LABEL = {
+    "newly_infected": "weekly total new cases per 100,000 inhabitants",
+    "new_known_case": "weekly reported new cases per 100,000 inhabitants",
+    "newly_deceased": "weekly deaths per 100,000 inhabitants",
+    "share_ever_rapid_test": "share of the population that has \n"
+    "ever done a rapid test",
+    "share_rapid_test_in_last_week": "share of the population that has done \na rapid "
+    "test within the last seven days",
+    "share_b117": "share of B.1.1.7 among new infections",
+    "share_doing_rapid_test_today": "share of the population doing "
+    "a rapid test per day",
+    "ever_vaccinated": "share of the population that has been vaccinated",
+    "r_effective": "effective reproduction number $R_t$",
+}
+
 
 def calculate_virus_strain_shares(results):
     """Create the weekly incidences from a list of simulation runs.
@@ -87,6 +102,7 @@ def plot_incidences(
     scenario_starts=None,
     fig=None,
     ax=None,
+    ylabel=None,
 ):
     """Plot incidences.
 
@@ -101,6 +117,7 @@ def plot_incidences(
             will plot all runs.
         scenario_starts (list, optional): the scenario start points. Each consists of a
             tuple of the date and a label.
+        ylabel (str, optional): Label of the y axis.
 
     Returns:
         fig, ax
@@ -148,12 +165,10 @@ def plot_incidences(
 
     fig, ax = style_plot(fig, ax)
     ax.set_title(title)
-    if "New Cases" in title or "New Deaths" in title:
-        ax.set_ylabel("smoothed weekly incidence")
-    elif "share" in title.lower():
-        ax.set_ylabel("share")
-    elif "Effective" in title:
-        ax.set_ylabel("$R_t$")
+
+    if ylabel is not None:
+        ax.set_ylabel(ylabel)
+
     x, y, width, height = 0.0, -0.3, 1, 0.2
     ax.legend(loc="upper center", bbox_to_anchor=(x, y, width, height), ncol=2)
     fig.tight_layout()
@@ -189,6 +204,7 @@ def plot_share_known_cases(share_known_cases, title, groupby, plot_single_runs=F
 
     fig, ax = style_plot(fig, ax)
     ax.set_title(title)
+    ax.set_ylabel("share of infections that is confirmed\nthrough PCR tests")
 
     # Reduce legend to have each age group only once and move it to below the plot
     x, y, width, height = 0.0, -0.3, 1, 0.2
@@ -214,7 +230,7 @@ def plot_share_known_cases(share_known_cases, title, groupby, plot_single_runs=F
     return fig, ax
 
 
-def plot_group_time_series(df, title, rki=None):
+def plot_group_time_series(df, title, rki=None, ylabel=None):
     """Plot a time series by group with more than one run.
 
     Args:
@@ -223,6 +239,7 @@ def plot_group_time_series(df, title, rki=None):
         title (str): the title of the plot
         rki (pandas.Series, optional): Series with the RKI data. Must have the same
             index as df.
+        ylabel (str, optional): label of the y axis.
 
     """
     df = df.swaplevel().copy(deep=True)
@@ -249,6 +266,7 @@ def plot_group_time_series(df, title, rki=None):
             scenario_starts=None,
             fig=fig,
             ax=ax,
+            ylabel=ylabel,
         )
 
         if rki is not None:

diff --git a/src/plotting/task_plot_decomposition.py b/src/plotting/task_plot_decomposition.py
@@ -55,9 +55,16 @@
 _ORDERED_CHANNELS = ["Rapid Tests", "Seasonality", "Vaccinations"]
 _ORDERED_RAPID_TEST_CHANNELS = ["Private", "Work", "School"]
 
+_ALL_CHANNEL_SCENARIOS_AVAILABLE = all(
+    i in _AVAILABLE_SCENARIOS for i in _CHANNEL_SCENARIOS_TO_MEMBERS
+)
+_ALL_RAPID_TEST_SCENARIOS_AVAILABLE = all(
+    i in _AVAILABLE_SCENARIOS for i in _RAPID_TEST_SCENARIOS_TO_MEMBERS
+)
+
 
 @pytask.mark.skipif(
-    not all(i in _AVAILABLE_SCENARIOS for i in _CHANNEL_SCENARIOS_TO_MEMBERS),
+    not _ALL_CHANNEL_SCENARIOS_AVAILABLE,
     reason="required scenarios are not available",
 )
 @pytask.mark.depends_on(
@@ -103,7 +110,7 @@ def task_plot_decomposition_of_infection_channels_in_spring(depends_on, produces
 
 
 @pytask.mark.skipif(
-    not all(i in _AVAILABLE_SCENARIOS for i in _RAPID_TEST_SCENARIOS_TO_MEMBERS),
+    not _ALL_RAPID_TEST_SCENARIOS_AVAILABLE,
     reason="required scenarios are not available",
 )
 @pytask.mark.depends_on(

diff --git a/src/plotting/task_plot_incidences_by_group.py b/src/plotting/task_plot_incidences_by_group.py
@@ -7,6 +7,7 @@
 from src.config import BLD
 from src.config import SRC
 from src.plotting.plotting import make_nice_outcome
+from src.plotting.plotting import OUTCOME_TO_Y_LABEL
 from src.plotting.plotting import plot_group_time_series
 from src.simulation.load_simulation_inputs import create_period_outputs
 from src.simulation.scenario_config import create_path_to_group_incidence_plot
@@ -92,7 +93,33 @@ def task_plot_age_group_incidences_in_one_scenario(
 
     nice_outcome = make_nice_outcome(outcome)
     title = f"{nice_outcome} in " + "{group}"
-    fig, ax = plot_group_time_series(incidences, title, rki)
+    ylabel = _get_ylabel(outcome)
+    fig, ax = plot_group_time_series(
+        df=incidences,
+        title=title,
+        rki=rki,
+        ylabel=ylabel,
+    )
 
     fig.savefig(produces)
     plt.close()
+
+
+def _get_ylabel(outcome):
+    ylabel = OUTCOME_TO_Y_LABEL[outcome]
+    if len(ylabel) > 45:
+        split = ylabel.split()
+        third = int(len(split) / 3)
+        ylabel = (
+            " ".join(split[:third])
+            + "\n"
+            + " ".join(split[third : 2 * third])
+            + "\n"
+            + " ".join(split[2 * third :])
+        )
+
+    elif len(ylabel) > 24:
+        split = ylabel.split()
+        half = int(len(split) / 2)
+        ylabel = " ".join(split[:half]) + "\n" + " ".join(split[half:])
+    return ylabel
diff --git a/src/plotting/task_plot_rapid_test_statistics.py b/src/plotting/task_plot_rapid_test_statistics.py
@@ -1,10 +1,10 @@
 import matplotlib.pyplot as plt
-import numpy as np
 import pandas as pd
 import pytask
 import seaborn as sns
 
 from src.config import BLD
+from src.config import PLOT_END_DATE
 from src.config import PLOT_SIZE
 from src.plotting.plotting import BLUE
 from src.plotting.plotting import RED
@@ -14,35 +14,38 @@
 
 SEEDS = get_named_scenarios()["combined_baseline"]["n_seeds"]
 
-_DEPENDENCIES = {
+_CSV_DEPENDENCIES = {
     seed: create_path_to_rapid_test_statistics("combined_baseline", seed)
     for seed in range(SEEDS)
 }
 
+DEMAND_SHARE_COLS = [
+    "private_demand_share",
+    "work_demand_share",
+    "educ_demand_share",
+    # parts of the private demand share:
+    "hh_demand",
+    "sym_without_pcr_demand",
+    "other_contact_demand",
+]
+
+SHARE_INFECTED_COLS = [
+    f"share_infected_among_{col.replace('_share', '')}" for col in DEMAND_SHARE_COLS
+]
+
+TABLE_PATH = BLD / "tables" / "rapid_test_statistics"
+
+
+_CSV_PARAMETRIZATION = [
+    (column, TABLE_PATH / f"{column}.csv")
+    for column in DEMAND_SHARE_COLS + SHARE_INFECTED_COLS
+]
+
 
 @pytask.mark.skipif(SEEDS == 0, reason="combined_baseline did not run.")
-@pytask.mark.depends_on(_DEPENDENCIES)
-@pytask.mark.produces(
-    {
-        "demand_shares": BLD
-        / "figures"
-        / "rapid_test_statistics"
-        / "demand_shares.pdf",
-        "share_infected_among_demand": BLD
-        / "figures"
-        / "rapid_test_statistics"
-        / "share_infected_among_demand.pdf",
-        "demand_shares_with_single_runs": BLD
-        / "figures"
-        / "rapid_test_statistics"
-        / "demand_shares_with_single_runs.pdf",
-        "share_infected_among_demand_with_single_runs": BLD
-        / "figures"
-        / "rapid_test_statistics"
-        / "share_infected_among_demand_with_single_runs.pdf",
-    }
-)
-def task_plot_rapid_test_statistics(depends_on, produces):
+@pytask.mark.depends_on(_CSV_DEPENDENCIES)
+@pytask.mark.parametrize("column, produces", _CSV_PARAMETRIZATION)
+def task_process_rapid_test_statistics(depends_on, column, produces):
     dfs = {
         seed: pd.read_csv(path, parse_dates=["date"], index_col="date")
         for seed, path in depends_on.items()
@@ -52,61 +55,65 @@ def task_plot_rapid_test_statistics(depends_on, produces):
             "Duplicates in a rapid test statistic DataFrame's index. "
             "The csv file must be deleted before every run."
         )
+    df = pd.concat({seed: df[column] for seed, df in dfs.items()}, axis=1)
+    df["smoothed_mean"] = (
+        df.mean(axis=1).rolling(window=7, min_periods=1, center=False).mean()
+    )
+    df.to_csv(produces)
 
-    demand_share_cols = [
-        "private_demand_share",
-        "work_demand_share",
-        "educ_demand_share",
-        "hh_demand",
-        "sym_without_pcr_demand",
-        "other_contact_demand",
-    ]
-    fig = _plot_columns(dfs, demand_share_cols, BLUE, False)
-    fig.savefig(produces["demand_shares"])
-
-    fig = _plot_columns(dfs, demand_share_cols, BLUE, True)
-    fig.savefig(produces["demand_shares_with_single_runs"])
-
-    share_infected_cols = [
-        "share_infected_among_private_demand",
-        "share_infected_among_work_demand",
-        "share_infected_among_educ_demand",
-        "share_infected_among_hh_demand",
-        "share_infected_among_sym_without_pcr_demand",
-        "share_infected_among_other_contact_demand",
-    ]
-
-    fig = _plot_columns(dfs, share_infected_cols, RED, False)
-    fig.savefig(produces["share_infected_among_demand"])
-
-    fig = _plot_columns(dfs, share_infected_cols, RED, True)
-    fig.savefig(produces["share_infected_among_demand_with_single_runs"])
 
+_PLOT_PARAMETRIZATION = []
+for column in DEMAND_SHARE_COLS:
+    for plot_single_runs in [True, False]:
+        depends_on = TABLE_PATH / f"{column}.csv"
+        ylabel = "share of the population demanding a rapid test"
+        file_name = (
+            f"{column}_with_single_runs.pdf" if plot_single_runs else f"{column}.pdf"
+        )
+        produces = BLD / "figures" / "rapid_test_statistics" / file_name
+        spec = (depends_on, BLUE, plot_single_runs, ylabel, produces)
+        _PLOT_PARAMETRIZATION.append(spec)
+
+for column in SHARE_INFECTED_COLS:
+    for plot_single_runs in [True, False]:
+        depends_on = TABLE_PATH / f"{column}.csv"
+        ylabel = "share of rapid tests demanded by infected individuals"
+        file_name = (
+            f"{column}_with_single_runs.pdf" if plot_single_runs else f"{column}.pdf"
+        )
+
+        produces = BLD / "figures" / "rapid_test_statistics" / file_name
+        spec = (depends_on, RED, plot_single_runs, ylabel, produces)
+        _PLOT_PARAMETRIZATION.append(spec)
+
+
+@pytask.mark.parametrize(
+    "depends_on, color, plot_single_runs, ylabel, produces", _PLOT_PARAMETRIZATION
+)
+def task_plot_rapid_test_statistics(
+    depends_on, color, plot_single_runs, ylabel, produces
+):
+    df = pd.read_csv(depends_on, index_col="date", parse_dates=["date"])
+    fig = _plot_df(df=df, color=color, plot_single_runs=plot_single_runs, ylabel=ylabel)
+    fig.savefig(produces)
     plt.close()
 
 
-def _plot_columns(dfs, cols, color, plot_single_runs):
-    n_rows = int(np.ceil(len(cols) / 2))
-    fig, axes = plt.subplots(
-        ncols=2,
-        nrows=n_rows,
-        figsize=(PLOT_SIZE[0] * n_rows, PLOT_SIZE[1] * 2),
-        sharex=True,
+def _plot_df(df, color, plot_single_runs, ylabel):
+    fig, ax = plt.subplots(figsize=PLOT_SIZE)
+
+    sns.lineplot(
+        x=df["smoothed_mean"].index,
+        y=df["smoothed_mean"],
+        ax=ax,
+        linewidth=4,
+        color=color,
+        alpha=0.8,
     )
-    for col, ax in zip(cols, axes.flatten()):
-        mean = pd.concat([df[col] for df in dfs.values()], axis=1).mean(axis=1)
-        smoothed_mean = mean.rolling(window=7, min_periods=1, center=False).mean()
-        sns.lineplot(
-            x=smoothed_mean.index,
-            y=smoothed_mean,
-            ax=ax,
-            linewidth=4,
-            color=color,
-            alpha=0.8,
-        )
 
-        if plot_single_runs:
-            for df in dfs.values():
+    if plot_single_runs:
+        for col in df.columns:
+            if col != "smoothed_mean":
                 sns.lineplot(
                     x=df.index,
                     y=df[col].rolling(window=7, min_periods=1, center=False).mean(),
@@ -115,9 +122,9 @@ def _plot_columns(dfs, cols, color, plot_single_runs):
                     color=color,
                     alpha=0.6,
                 )
+    ax.set_xlim(pd.Timestamp("2021-03-15"), pd.Timestamp(PLOT_END_DATE))
+    fig, ax = style_plot(fig, ax)
 
-        ax.set_title(col.replace("_", " ").title())
-        fig, ax = style_plot(fig, ax)
-
+    ax.set_ylabel(ylabel)
     fig.tight_layout()
     return fig
diff --git a/src/plotting/task_plot_scenario_comparisons.py b/src/plotting/task_plot_scenario_comparisons.py
@@ -13,6 +13,7 @@
 from src.plotting.plotting import make_nice_outcome
 from src.plotting.plotting import ORANGE
 from src.plotting.plotting import OUTCOME_TO_EMPIRICAL_LABEL
+from src.plotting.plotting import OUTCOME_TO_Y_LABEL
 from src.plotting.plotting import plot_incidences
 from src.plotting.plotting import PURPLE
 from src.plotting.plotting import RED
@@ -436,6 +437,7 @@ def task_plot_scenario_comparison(
         colors=colors,
         scenario_starts=scenario_starts,
         n_single_runs=0,
+        ylabel=OUTCOME_TO_Y_LABEL.get(outcome, None),
     )
     ax.set_xlim(xlims)
 
@@ -456,6 +458,7 @@ def task_plot_scenario_comparison(
         colors=colors,
         scenario_starts=scenario_starts,
         n_single_runs=None,
+        ylabel=OUTCOME_TO_Y_LABEL.get(outcome, None),
     )
     ax_with_lines.set_xlim(xlims)
     if "new_work_scenarios" in str(produces):

diff --git a/src/plotting/task_plot_share_known_cases.py b/src/plotting/task_plot_share_known_cases.py
@@ -34,6 +34,7 @@ def _create_parametrization():
             title = (
                 f"Share Known Cases {'By Age Group' if groupby else ''} in {nice_name}"
             )
+            title = None
             parametrization.append((depends_on, title, groupby, produces))
 
     return "depends_on, title, groupby, produces", parametrization