From 6a433c73a1975215fc1769365fad79c06c207209 Mon Sep 17 00:00:00 2001
From: rkansal47 <raghavk1997@yahoo.co.in>
Date: Fri, 31 May 2024 17:30:16 -0500
Subject: [PATCH] john arc results

---
 paper/latex_tables.ipynb                      |  70 ++++-
 paper/limit_plots.ipynb                       | 259 ++++++++++++++++
 paper/tables/nonres_lpsfs.tex                 |   9 +
 .../postprocessing/InferenceAnalysis.ipynb    |  90 +++---
 src/HHbbVV/postprocessing/PostProcess.ipynb   | 279 ++++++++----------
 src/HHbbVV/postprocessing/TrainBDT.py         |   3 +
 src/HHbbVV/postprocessing/plotting.py         |  32 +-
 src/HHbbVV/postprocessing/postprocessing.py   |   2 +
 8 files changed, 541 insertions(+), 203 deletions(-)
 create mode 100644 paper/limit_plots.ipynb
 create mode 100644 paper/tables/nonres_lpsfs.tex

diff --git a/paper/latex_tables.ipynb b/paper/latex_tables.ipynb
index e4f5a55d..875c2b14 100644
--- a/paper/latex_tables.ipynb
+++ b/paper/latex_tables.ipynb
@@ -2,14 +2,16 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
     "import yaml\n",
     "import json\n",
     "import numpy as np\n",
-    "from copy import deepcopy"
+    "import pandas as pd\n",
+    "from copy import deepcopy\n",
+    "from pathlib import Path"
    ]
   },
   {
@@ -343,12 +345,72 @@
     "    f.writelines(lines)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lund plane SFs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Nonresonant"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "templates_dir = Path(\"../src/HHbbVV/postprocessing/templates/24Apr26NonresBDT995AllSigs\")\n",
+    "dfs = {\n",
+    "    \"ggF\": pd.read_csv(templates_dir / \"lpsfs_passggf.csv\").to_numpy(),\n",
+    "    \"VBF\": pd.read_csv(templates_dir / \"lpsfs_passvbf.csv\").to_numpy(),\n",
+    "}\n",
+    "\n",
+    "sig_map = {\n",
+    "    \"HHbbVV\": r\"SM ggF \\HH\",\n",
+    "    \"VBFHHbbVV\": r\"SM VBF \\HH\",\n",
+    "    \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": r\"VBF \\HH ($\\kapvv = 0$)\",\n",
+    "    \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": r\"VBF \\HH ($\\kapvv = 2$)\",\n",
+    "}\n",
+    "\n",
+    "df = dfs[\"ggF\"]\n",
+    "\n",
+    "lines = []\n",
+    "\n",
+    "for j, (region, df) in enumerate(dfs.items()):\n",
+    "    sigs = df[:, 0]\n",
+    "    for i, (sig, siglabel) in enumerate(sig_map.items()):\n",
+    "        if i == 0:\n",
+    "            region_label = rf\"\\multirow{{{len(sig_map)}}}{{*}}{{{region}}}\"\n",
+    "        else:\n",
+    "            region_label = \"\"\n",
+    "\n",
+    "        line = [region_label, siglabel]\n",
+    "        sigidx = np.where(sigs == sig)[0][0]\n",
+    "        row = df[sigidx, 1:]\n",
+    "\n",
+    "        sf = row[0].split(\" ± \")\n",
+    "        line.append(rf\"${sf[0]} \\pm {sf[1]}$\")\n",
+    "\n",
+    "        for i in range(1, len(row)):\n",
+    "            line.append(f\"{row[i]:.2f}\")\n",
+    "\n",
+    "        lines.append(\" & \".join(line) + r\" \\\\\" + \"\\n\")\n",
+    "\n",
+    "    if j != len(dfs) - 1:\n",
+    "        lines.append(r\"\\hline\" + \"\\n\")\n",
+    "\n",
+    "# remove trailing \"\\\\\" and \"\\n\"\n",
+    "lines[-1] = lines[-1][:-4]\n",
+    "\n",
+    "with Path(\"tables/nonres_lpsfs.tex\").open(\"w\") as f:\n",
+    "    f.writelines(lines)"
+   ]
   }
  ],
  "metadata": {
diff --git a/paper/limit_plots.ipynb b/paper/limit_plots.ipynb
new file mode 100644
index 00000000..036621a5
--- /dev/null
+++ b/paper/limit_plots.ipynb
@@ -0,0 +1,259 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.ticker as mticker\n",
+    "import mplhep as hep\n",
+    "\n",
+    "plt.style.use(hep.style.CMS)\n",
+    "hep.style.use(\"CMS\")\n",
+    "formatter = mticker.ScalarFormatter(useMathText=True)\n",
+    "formatter.set_powerlimits((-3, 3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.patches as patches\n",
+    "import mplhep as hep\n",
+    "\n",
+    "# Apply the CMS style\n",
+    "plt.style.use(hep.style.CMS)\n",
+    "\n",
+    "# Define the observed and expected data for the first set (from the first PDF)\n",
+    "observed_value_1 = 142\n",
+    "expected_median_1 = 69\n",
+    "expected_68_low_1, expected_68_high_1 = 50, 80  # 68% confidence interval\n",
+    "expected_95_low_1, expected_95_high_1 = 40, 100  # 95% confidence interval\n",
+    "\n",
+    "# Define the observed and expected data for the second set (from the second PDF)\n",
+    "observed_value_2 = 1.1\n",
+    "expected_median_2 = 0.9\n",
+    "expected_68_low_2, expected_68_high_2 = 0.7, 1.1  # 68% confidence interval\n",
+    "expected_95_low_2, expected_95_high_2 = 0.5, 1.3  # 95% confidence interval\n",
+    "\n",
+    "# Create the figure with custom dimensions\n",
+    "fig, ax = plt.subplots(figsize=(10, 6))  # Adjust the dimensions as needed\n",
+    "\n",
+    "# Plot for the first set\n",
+    "# Add rectangles for the confidence intervals\n",
+    "rect_95_1 = patches.Rectangle(\n",
+    "    (expected_95_low_1, 0.7),\n",
+    "    expected_95_high_1 - expected_95_low_1,\n",
+    "    0.2,\n",
+    "    linewidth=0,\n",
+    "    edgecolor=\"none\",\n",
+    "    facecolor=\"yellow\",\n",
+    "    alpha=0.5,\n",
+    ")\n",
+    "ax.add_patch(rect_95_1)\n",
+    "rect_68_1 = patches.Rectangle(\n",
+    "    (expected_68_low_1, 0.7),\n",
+    "    expected_68_high_1 - expected_68_low_1,\n",
+    "    0.2,\n",
+    "    linewidth=0,\n",
+    "    edgecolor=\"none\",\n",
+    "    facecolor=\"green\",\n",
+    "    alpha=0.5,\n",
+    ")\n",
+    "ax.add_patch(rect_68_1)\n",
+    "ax.plot([expected_median_1, expected_median_1], [0.7, 0.9], \"k--\")\n",
+    "ax.plot([observed_value_1, observed_value_1], [0.7, 0.9], \"k-\", linewidth=2)\n",
+    "\n",
+    "# Plot for the second set\n",
+    "# Add rectangles for the confidence intervals\n",
+    "rect_95_2 = patches.Rectangle(\n",
+    "    (expected_95_low_2, 0.4),\n",
+    "    expected_95_high_2 - expected_95_low_2,\n",
+    "    0.2,\n",
+    "    linewidth=0,\n",
+    "    edgecolor=\"none\",\n",
+    "    facecolor=\"yellow\",\n",
+    "    alpha=0.5,\n",
+    ")\n",
+    "ax.add_patch(rect_95_2)\n",
+    "rect_68_2 = patches.Rectangle(\n",
+    "    (expected_68_low_2, 0.4),\n",
+    "    expected_68_high_2 - expected_68_low_2,\n",
+    "    0.2,\n",
+    "    linewidth=0,\n",
+    "    edgecolor=\"none\",\n",
+    "    facecolor=\"green\",\n",
+    "    alpha=0.5,\n",
+    ")\n",
+    "ax.add_patch(rect_68_2)\n",
+    "ax.plot([expected_median_2, expected_median_2], [0.4, 0.6], \"k--\")\n",
+    "ax.plot([observed_value_2, observed_value_2], [0.4, 0.6], \"k-\", linewidth=2)\n",
+    "\n",
+    "# Set the x and y axis labels\n",
+    "ax.set_xlabel(r\"95% CL limit on $\\sigma(pp \\rightarrow HH) / \\sigma$\")\n",
+    "ax.set_yticks([0.8, 0.5])\n",
+    "ax.set_yticklabels(\n",
+    "    [\n",
+    "        r\"$\\kappa_{\\lambda} = 1, \\kappa_{t} = 2, \\kappa_{V} = 1$\",\n",
+    "        r\"$\\kappa_{\\lambda} = 1, \\kappa_{t} = 1, \\kappa_{V} = 0$\",\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "# Set the title\n",
+    "ax.set_title(\"CMS Work in Progress\")\n",
+    "\n",
+    "# Set x-axis to logarithmic scale\n",
+    "ax.set_xscale(\"log\")\n",
+    "\n",
+    "# Add a legend in the top right without the limit values\n",
+    "legend_elements = [\n",
+    "    patches.Patch(color=\"green\", alpha=0.5, label=\"68% expected\"),\n",
+    "    patches.Patch(color=\"yellow\", alpha=0.5, label=\"95% expected\"),\n",
+    "    plt.Line2D([0], [0], color=\"k\", linestyle=\"--\", label=\"Median expected\"),\n",
+    "    plt.Line2D([0], [0], color=\"k\", linewidth=2, label=\"Observed\"),\n",
+    "]\n",
+    "ax.legend(handles=legend_elements, loc=\"upper right\")\n",
+    "\n",
+    "# Set x-axis limits\n",
+    "ax.set_xlim(0.1, 200)\n",
+    "\n",
+    "# Use scientific notation for the x-axis\n",
+    "ax.xaxis.set_major_formatter(plt.ScalarFormatter(useMathText=True))\n",
+    "ax.ticklabel_format(style=\"sci\", axis=\"x\", scilimits=(0, 0))\n",
+    "\n",
+    "# Show grid\n",
+    "ax.grid(True, axis=\"x\")\n",
+    "\n",
+    "# Apply CMS label with `mplhep`\n",
+    "hep.cms.label(ax=ax, data=True, lumi=138, com=13)\n",
+    "\n",
+    "# Adjust layout\n",
+    "plt.tight_layout()\n",
+    "\n",
+    "# Show the plot\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.patches as patches\n",
+    "\n",
+    "# Define the observed and expected data\n",
+    "observed_value = 142\n",
+    "expected_median = 69\n",
+    "expected_68_low, expected_68_high = 50, 80  # 68% confidence interval\n",
+    "expected_95_low, expected_95_high = 40, 100  # 95% confidence interval\n",
+    "\n",
+    "# Create the plot with custom dimensions\n",
+    "fig, ax = plt.subplots(figsize=(8, 2))  # Adjust the dimensions as needed\n",
+    "\n",
+    "# Add rectangles for the confidence intervals\n",
+    "# 95% confidence interval\n",
+    "rect_95 = patches.Rectangle(\n",
+    "    (expected_95_low, -0.1),\n",
+    "    expected_95_high - expected_95_low,\n",
+    "    0.2,\n",
+    "    linewidth=0,\n",
+    "    edgecolor=\"none\",\n",
+    "    facecolor=\"yellow\",\n",
+    "    alpha=0.5,\n",
+    "    label=\"95% expected\",\n",
+    ")\n",
+    "ax.add_patch(rect_95)\n",
+    "\n",
+    "# 68% confidence interval\n",
+    "rect_68 = patches.Rectangle(\n",
+    "    (expected_68_low, -0.1),\n",
+    "    expected_68_high - expected_68_low,\n",
+    "    0.2,\n",
+    "    linewidth=0,\n",
+    "    edgecolor=\"none\",\n",
+    "    facecolor=\"green\",\n",
+    "    alpha=0.5,\n",
+    "    label=\"68% expected\",\n",
+    ")\n",
+    "ax.add_patch(rect_68)\n",
+    "\n",
+    "# Plot the expected median line\n",
+    "ax.plot([expected_median, expected_median], [-0.1, 0.1], \"k--\", label=\"Median expected: 69\")\n",
+    "\n",
+    "# Plot the observed value as a solid black line\n",
+    "ax.plot([observed_value, observed_value], [-0.1, 0.1], \"k-\", linewidth=2, label=\"Observed: 142\")\n",
+    "\n",
+    "# Set the x and y axis labels\n",
+    "ax.set_xlabel(r\"95% CL limit on $\\sigma(pp \\rightarrow HH) / \\sigma$\")\n",
+    "ax.set_yticks([])  # Remove y-axis ticks\n",
+    "\n",
+    "# Set the title\n",
+    "ax.set_title(\"CMS Work in Progress\")\n",
+    "\n",
+    "# Add a legend\n",
+    "ax.legend()\n",
+    "\n",
+    "# Set x-axis limits\n",
+    "ax.set_xlim(0, 150)  # Adjust based on the range of your data\n",
+    "ax.set_ylim(-0.2, 0.3)  # Adjust y-axis to provide space for the kappa values\n",
+    "\n",
+    "# Use scientific notation for the x-axis\n",
+    "ax.xaxis.set_major_formatter(plt.ScalarFormatter(useMathText=True))\n",
+    "ax.ticklabel_format(style=\"sci\", axis=\"x\", scilimits=(0, 0))\n",
+    "\n",
+    "# Add kappa values above the observed limit within the figure box\n",
+    "kappa_values = r\"$\\kappa_{\\lambda} = 1, \\kappa_{t} = 2, \\kappa_{V} = 1$\"\n",
+    "plt.text(\n",
+    "    observed_value,\n",
+    "    0.2,\n",
+    "    kappa_values,\n",
+    "    fontsize=12,\n",
+    "    horizontalalignment=\"center\",\n",
+    "    verticalalignment=\"bottom\",\n",
+    ")\n",
+    "\n",
+    "# Show grid\n",
+    "ax.grid(True, axis=\"x\")\n",
+    "\n",
+    "# Show the plot\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/paper/tables/nonres_lpsfs.tex b/paper/tables/nonres_lpsfs.tex
new file mode 100644
index 00000000..71e813dd
--- /dev/null
+++ b/paper/tables/nonres_lpsfs.tex
@@ -0,0 +1,9 @@
+\multirow{4}{*}{ggF} & SM ggF \HH & $1.05 \pm 0.24$ & 0.16 & 0.05 & 0.00 & 0.16 \\
+ & SM VBF \HH & $1.17 \pm 0.45$ & 0.35 & 0.05 & 0.00 & 0.16 \\
+ & VBF \HH ($\kapvv = 0$) & $1.09 \pm 0.18$ & 0.02 & 0.04 & 0.01 & 0.15 \\
+ & VBF \HH ($\kapvv = 2$) & $1.10 \pm 0.18$ & 0.02 & 0.05 & 0.01 & 0.15 \\
+\midrule
+\multirow{4}{*}{VBF} & SM ggF \HH & $0.95 \pm 0.28$ & 0.26 & 0.08 & 0.01 & 0.12 \\
+ & SM VBF \HH & $1.08 \pm 0.46$ & 0.38 & 0.05 & 0.01 & 0.19 \\
+ & VBF \HH ($\kapvv = 0$) & $0.93 \pm 0.27$ & 0.16 & 0.06 & 0.02 & 0.23 \\
+ & VBF \HH ($\kapvv = 2$) & $0.94 \pm 0.27$ & 0.16 & 0.05 & 0.02 & 0.23
diff --git a/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb b/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb
index 8b8918ff..5b874918 100644
--- a/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb
+++ b/src/HHbbVV/postprocessing/InferenceAnalysis.ipynb
@@ -58,7 +58,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "plot_dir = MAIN_DIR / \"plots/TaggerAnalysis/24May20\"\n",
+    "plot_dir = MAIN_DIR / \"plots/TaggerAnalysis/24May29\"\n",
     "# plot_dir = MAIN_DIR / \"plots/BDT/24Apr9\"\n",
     "plot_dir.mkdir(parents=True, exist_ok=True)\n",
     "\n",
@@ -315,12 +315,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from sklearn.metrics import roc_curve, auc, integrate\n",
+    "from sklearn.metrics import roc_curve, auc\n",
+    "from scipy import integrate\n",
     "\n",
     "rocs = {}\n",
     "# sig_key = \"HHbbVV\"\n",
-    "bg_keys = [\"TT\", \"QCD\"]\n",
+    "tot_bg_keys = [\"TT\", \"QCD\"]\n",
     "bg_skip = 1\n",
+    "weight_key = \"finalWeight\"\n",
     "\n",
     "\n",
     "for cutstr in cut_labels:\n",
@@ -329,42 +331,47 @@
     "    # for sig_key in tqdm(nonres_sig_keys + res_sig_keys):\n",
     "    for sig_key in tqdm(nonres_sig_keys):\n",
     "        rocs[cutstr][sig_key] = {}\n",
-    "        sig_cut = cuts_dict[sig_key][cutstr]\n",
-    "        bg_cuts = [cuts_dict[bg_key][cutstr] for bg_key in bg_keys]\n",
-    "\n",
-    "        y_true = np.concatenate(\n",
-    "            [\n",
-    "                np.ones(np.sum(sig_cut)),\n",
-    "                np.zeros(int(np.ceil(np.sum(np.concatenate(bg_cuts)) / bg_skip))),\n",
-    "            ]\n",
-    "        )\n",
-    "        # print(y_true[np.sum(sig_cut):])\n",
-    "\n",
-    "        weights = np.concatenate(\n",
-    "            [events_dict[sig_key][\"weight\"][sig_cut]]\n",
-    "            + [\n",
-    "                events_dict[bg_key][\"weight\"][bg_cut][::bg_skip]\n",
-    "                for bg_key, bg_cut in zip(bg_keys, bg_cuts)\n",
-    "            ],\n",
-    "        )\n",
+    "        # sig_cut = cuts_dict[sig_key][cutstr]\n",
+    "        for bg_label, bg_keys in (\n",
+    "            {\"Combined\": tot_bg_keys} | {bg_key: [bg_key] for bg_key in tot_bg_keys}\n",
+    "        ).items():\n",
+    "            rocs[cutstr][sig_key][bg_label] = {}\n",
+    "            # bg_cuts = [cuts_dict[bg_key][cutstr] for bg_key in bg_keys]\n",
+    "\n",
+    "            y_true = np.concatenate(\n",
+    "                [\n",
+    "                    np.ones(len(events_dict[sig_key])),\n",
+    "                    np.zeros(\n",
+    "                        int(\n",
+    "                            np.ceil(\n",
+    "                                np.sum([len(events_dict[bg_key]) for bg_key in bg_keys]) / bg_skip\n",
+    "                            )\n",
+    "                        )\n",
+    "                    ),\n",
+    "                ]\n",
+    "            )\n",
+    "            # print(y_true[np.sum(sig_cut):])\n",
     "\n",
-    "        for t, pvars in plot_vars.items():\n",
-    "            score_label = pvars[\"score_label\"]\n",
-    "            scores = np.concatenate(\n",
-    "                [events_dict[sig_key][score_label][sig_cut]]\n",
-    "                + [\n",
-    "                    events_dict[bg_key][score_label][bg_cut][::bg_skip]\n",
-    "                    for bg_key, bg_cut in zip(bg_keys, bg_cuts)\n",
-    "                ],\n",
+    "            weights = np.concatenate(\n",
+    "                [events_dict[sig_key][weight_key]]\n",
+    "                + [events_dict[bg_key][weight_key][::bg_skip] for bg_key in bg_keys],\n",
     "            )\n",
-    "            # print(scores[np.sum(sig_cut):])\n",
-    "            fpr, tpr, thresholds = roc_curve(y_true, scores, sample_weight=weights)\n",
-    "            rocs[cutstr][sig_key][t] = {\n",
-    "                \"fpr\": fpr,\n",
-    "                \"tpr\": tpr,\n",
-    "                \"thresholds\": thresholds,\n",
-    "                # \"auc\": auc(fpr, tpr),\n",
-    "            }"
+    "\n",
+    "            for t, pvars in plot_vars.items():\n",
+    "                score_label = pvars[\"score_label\"]\n",
+    "                scores = np.concatenate(\n",
+    "                    [events_dict[sig_key][score_label]]\n",
+    "                    + [events_dict[bg_key][score_label][::bg_skip] for bg_key in bg_keys],\n",
+    "                )\n",
+    "                # print(scores[np.sum(sig_cut):])\n",
+    "                fpr, tpr, thresholds = roc_curve(y_true, scores, sample_weight=weights)\n",
+    "                rocs[cutstr][sig_key][bg_label][t] = {\n",
+    "                    \"fpr\": fpr,\n",
+    "                    \"tpr\": tpr,\n",
+    "                    \"thresholds\": thresholds,\n",
+    "                    \"auc\": integrate.trapz(tpr, fpr),\n",
+    "                    \"label\": bg_label,\n",
+    "                }"
    ]
   },
   {
@@ -417,10 +424,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "roc = rocs[cutstr][sig_key][t]\n",
-    "roc_auc = integrate.trapz(y=roc[\"tpr\"], x=roc[\"fpr\"])\n",
-    "print(\"AUC:\", roc_auc)\n",
-    "plotting.rocCurve(roc[\"fpr\"], roc[\"tpr\"], show=True, plot_dir=plot_dir, name=\"THVV\", auc=roc_auc)"
+    "roc = rocs[cutstr][sig_key][\"Combined\"][t]\n",
+    "plotting.rocCurve(roc[\"fpr\"], roc[\"tpr\"], show=True, plot_dir=plot_dir, name=\"THVV\", auc=roc[\"auc\"])\n",
+    "\n",
+    "# bg_rocs = {key: val[t] for key, val in rocs[cutstr][sig_key].items()}\n",
+    "# plotting.multiROCCurveGrey({\"all\": bg_rocs}, [], xlim=[0, 0.8], show=True, plot_dir=plot_dir, name=\"THVV_sep_bgs\")"
    ]
   },
   {
diff --git a/src/HHbbVV/postprocessing/PostProcess.ipynb b/src/HHbbVV/postprocessing/PostProcess.ipynb
index a34b3d17..b8cda9bb 100644
--- a/src/HHbbVV/postprocessing/PostProcess.ipynb
+++ b/src/HHbbVV/postprocessing/PostProcess.ipynb
@@ -84,8 +84,8 @@
     "year = \"2018\"\n",
     "bdt_preds_dir = samples_dir / \"24_04_05_k2v0_training_eqsig_vbf_vars_rm_deta/inferences\"\n",
     "\n",
-    "date = \"24May16\"\n",
-    "plot_dir = MAIN_DIR / f\"plots/PostProcessing/{date}/\"\n",
+    "date = \"24May31\"\n",
+    "plot_dir = MAIN_DIR / f\"plots/PostProcessing/{date}LPSFs\"\n",
     "templates_dir = f\"templates/{date}\"\n",
     "_ = os.system(f\"mkdir -p {plot_dir}\")\n",
     "_ = os.system(f\"mkdir -p {plot_dir}/cutflows/\")\n",
@@ -116,7 +116,6 @@
    ]
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
    "metadata": {},
    "source": [
@@ -182,23 +181,6 @@
     "## Control plots"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sel, _ = utils.make_selection(\n",
-    "    {\n",
-    "        \"bbFatJetPt\": [450, CUT_MAX_VAL],\n",
-    "        \"VVFatJetPt\": [450, CUT_MAX_VAL],\n",
-    "        \"vbf_Mass_jj\": [500, CUT_MAX_VAL],\n",
-    "    },\n",
-    "    events_dict,\n",
-    "    bb_masks,\n",
-    ")"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -240,14 +222,14 @@
     "    # ShapeVar(var=\"nGoodMuons\", label=r\"# of Muons\", bins=[3, 0, 3]),\n",
     "    # ShapeVar(var=\"nGoodElectrons\", label=r\"# of Electrons\", bins=[3, 0, 3]),\n",
     "    # ShapeVar(var=\"nGoodJets\", label=r\"# of AK4 B-Jets\", bins=[5, 0, 5]),\n",
-    "    ShapeVar(var=\"VBFJetPt0\", label=r\"Leading VBF-tagged Jet $p_T$\", bins=[20, 20, 300]),\n",
-    "    ShapeVar(var=\"VBFJetPt1\", label=r\"Sub-leading VBF-tagged Jet $p_T$\", bins=[20, 20, 300]),\n",
-    "    ShapeVar(var=\"VBFJetEta0\", label=r\"Leading VBF-tagged Jet $\\eta$\", bins=[9, -4.5, 4.5]),\n",
-    "    ShapeVar(var=\"VBFJetEta1\", label=r\"Sub-leading VBF-tagged Jet $\\eta$\", bins=[9, -4.5, 4.5]),\n",
-    "    ShapeVar(var=\"VBFJetPhi0\", label=r\"Leading VBF-tagged Jet $\\varphi$\", bins=[10, -3, 3]),\n",
-    "    ShapeVar(var=\"VBFJetPhi1\", label=r\"Sub-leading VBF-tagged Jet $\\varphi$\", bins=[10, -3, 3]),\n",
-    "    ShapeVar(var=\"vbf_Mass_jj\", label=r\"$m_{jj}^{VBF}$\", bins=[20, 0, 1000]),\n",
-    "    ShapeVar(var=\"vbf_dEta_jj\", label=r\"$|\\Delta\\eta_{jj}^{VBF}|$\", bins=[20, 0, 6]),\n",
+    "    # ShapeVar(var=\"VBFJetPt0\", label=r\"Leading VBF-tagged Jet $p_T$\", bins=[20, 20, 300]),\n",
+    "    # ShapeVar(var=\"VBFJetPt1\", label=r\"Sub-leading VBF-tagged Jet $p_T$\", bins=[20, 20, 300]),\n",
+    "    # ShapeVar(var=\"VBFJetEta0\", label=r\"Leading VBF-tagged Jet $\\eta$\", bins=[9, -4.5, 4.5]),\n",
+    "    # ShapeVar(var=\"VBFJetEta1\", label=r\"Sub-leading VBF-tagged Jet $\\eta$\", bins=[9, -4.5, 4.5]),\n",
+    "    # ShapeVar(var=\"VBFJetPhi0\", label=r\"Leading VBF-tagged Jet $\\varphi$\", bins=[10, -3, 3]),\n",
+    "    # ShapeVar(var=\"VBFJetPhi1\", label=r\"Sub-leading VBF-tagged Jet $\\varphi$\", bins=[10, -3, 3]),\n",
+    "    # ShapeVar(var=\"vbf_Mass_jj\", label=r\"$m_{jj}^{VBF}$\", bins=[20, 0, 1000]),\n",
+    "    # ShapeVar(var=\"vbf_dEta_jj\", label=r\"$|\\Delta\\eta_{jj}^{VBF}|$\", bins=[20, 0, 6]),\n",
     "    # ShapeVar(var=\"BDTScore\", label=r\"BDT Score\", bins=[50, 0, 1]),\n",
     "]\n",
     "\n",
@@ -366,100 +348,58 @@
     "postprocessing.plot_bdt_sculpting(events_dict, bb_masks, plot_dir, year, show=True)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check tagger mass sculpting"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "cuts = [0, 0.1, 0.5, 0.9, 0.95]\n",
-    "# bdtvars = [\"\", \"TT\", \"VJets\"]\n",
-    "bdtvars = [\"\"]\n",
-    "plot_keys = [\"TT\"]\n",
+    "weight_key = \"finalWeight\"\n",
+    "show = True\n",
     "\n",
-    "shape_var = ShapeVar(\n",
-    "    var=\"bbFatJetParticleNetMass\", label=r\"$m^{bb}_{reg}$ (GeV)\", bins=[20, 50, 250]\n",
-    ")\n",
+    "cuts = {\n",
+    "    \"bb\": [0.8, 0.9, 0.95],\n",
+    "    \"VV\": [0, 0.1, 0.5, 0.8],\n",
+    "}\n",
+    "plot_keys = [data_key, \"QCD\", \"TT\", \"Z+Jets\", \"HHbbVV\", \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\"]\n",
+    "# plot_keys = [\"QCD\"]\n",
+    "\n",
+    "taggers = {\n",
+    "    \"bb\": (\"bbFatJetParticleNetMD_Txbb\", r\"$T^{bb}_{Xbb}$\"),\n",
+    "    \"VV\": (\"VVFatJetParTMD_THWWvsT\", r\"$T^{VV}_{HWW}$\"),\n",
+    "}\n",
+    "\n",
+    "for jet in [\"bb\", \"VV\"]:\n",
+    "    shape_var = ShapeVar(\n",
+    "        var=f\"{jet}FatJetParticleNetMass\", label=rf\"$m^{{{jet}}}_{{reg}}$ (GeV)\", bins=[20, 50, 250]\n",
+    "    )\n",
+    "\n",
+    "    cut_var, cut_var_label = taggers[jet]\n",
     "\n",
-    "for var in bdtvars:\n",
     "    for key in plot_keys:\n",
     "        ed_key = {key: events_dict[key]}\n",
     "        bbm_key = {key: bb_masks[key]}\n",
     "\n",
-    "        fig, ax = plt.subplots(1, 1, figsize=(12, 12))\n",
-    "        plt.rcParams.update({\"font.size\": 24})\n",
-    "\n",
-    "        for i, cut in enumerate(cuts):\n",
-    "            sel, _ = utils.make_selection({f\"BDTScore{var}\": [cut, CUT_MAX_VAL]}, ed_key, bbm_key)\n",
-    "            h = utils.singleVarHist(ed_key, shape_var, bbm_key, selection=sel)\n",
-    "\n",
-    "            hep.histplot(\n",
-    "                h[key, ...] / np.sum(h[key, ...].values()),\n",
-    "                yerr=True,\n",
-    "                label=f\"BDTScore >= {cut}\",\n",
-    "                # density=True,\n",
-    "                ax=ax,\n",
-    "                linewidth=2,\n",
-    "                alpha=0.8,\n",
-    "            )\n",
-    "\n",
-    "        ax.set_xlabel(shape_var.label)\n",
-    "        ax.set_ylabel(\"Fraction of Events\")\n",
-    "        ax.legend()\n",
-    "\n",
-    "        hep.cms.label(ax=ax, data=False, year=year, lumi=round(LUMI[year] / 1e3))\n",
-    "        plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cuts = [0.01, 0.1, 0.5, 0.9, 0.99]\n",
-    "# bdtvars = [\"\", \"TT\", \"VJets\"]\n",
-    "bdtvars = [\"\"]\n",
-    "sig_scales = [1e5, 5e4, 2e4, 1e4, 2e3]\n",
-    "\n",
-    "# for ttcut in [0.01, 0.1, 0.5, 0.9, 0.99]:\n",
-    "#     ttsel, _ = utils.make_selection({\"BDTScoreTT\": [ttcut, CUT_MAX_VAL]}, events_dict, bb_masks)\n",
-    "#     cutstr = f\"tt{ttcut}\"\n",
-    "\n",
-    "#     hists = postprocessing.control_plots(\n",
-    "#         events_dict,\n",
-    "#         bb_masks,\n",
-    "#         nonres_sig_keys,\n",
-    "#         control_plot_vars,\n",
-    "#         f\"{plot_dir}/ControlPlots/{year}/\",\n",
-    "#         year,\n",
-    "#         hists={},\n",
-    "#         bg_keys=[\"QCD\", \"TT\", \"ST\", \"V+Jets\", \"Diboson\"],\n",
-    "#         selection=ttsel,\n",
-    "#         cutstr=cutstr,\n",
-    "#         show=True,\n",
-    "#     )\n",
-    "\n",
-    "for var in bdtvars:\n",
-    "    for i, cut in enumerate(cuts):\n",
-    "        sel, _ = utils.make_selection({f\"BDTScore{var}\": [cut, CUT_MAX_VAL]}, events_dict, bb_masks)\n",
-    "        cutstr = f\"bdt{var}{cut}\"\n",
-    "        sig_scale = sig_scales[i]\n",
-    "\n",
-    "        hists = postprocessing.control_plots(\n",
-    "            events_dict,\n",
-    "            bb_masks,\n",
-    "            nonres_sig_keys,\n",
-    "            control_plot_vars,\n",
-    "            f\"{plot_dir}/ControlPlots/{year}/\",\n",
+    "        plotting.cutsLinePlot(\n",
+    "            ed_key,\n",
+    "            shape_var,\n",
+    "            key,\n",
+    "            cut_var,\n",
+    "            cut_var_label,\n",
+    "            cuts[jet],\n",
     "            year,\n",
-    "            hists={},\n",
-    "            bg_keys=[\"QCD\", \"TT\", \"ST\", \"V+Jets\", \"Diboson\"],\n",
-    "            selection=sel,\n",
-    "            cutstr=cutstr,\n",
-    "            sig_scale_dict={\"HHbbVV\": sig_scale},\n",
-    "            combine_pdf=False,\n",
-    "            show=True,\n",
+    "            weight_key,\n",
+    "            bb_masks=bbm_key,\n",
+    "            plot_dir=plot_dir,\n",
+    "            name=f\"{year}_{cut_var}Cuts_{shape_var.var}_{key}\",\n",
+    "            show=show,\n",
     "        )"
    ]
   },
@@ -468,16 +408,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Overall BDT SF"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "nonres_sig_keys"
+    "## Get Lund plane weights"
    ]
   },
   {
@@ -487,50 +418,96 @@
    "outputs": [],
    "source": [
     "postprocessing.lpsfs(\n",
+    "    list(nonres_samples.keys()),\n",
+    "    selection_regions[\"lpsf_passggf\"],\n",
+    "    systematics,\n",
     "    events_dict,\n",
     "    bb_masks,\n",
-    "    nonres_sig_keys,\n",
-    "    nonres_samples,\n",
-    "    cutflow,\n",
-    "    selection_regions[\"lpsf\"],\n",
-    "    systematics,\n",
     "    all_years=False,\n",
     ")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Check distributions with and without LP weights"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "events = events_dict[\"HHbbVV\"]\n",
-    "bb_mask = bb_masks[\"HHbbVV\"]\n",
-    "weight = events[\"finalWeight\"].values.squeeze()\n",
-    "weight_lp = weight * events[\"VV_lp_sf_nom\"].values.squeeze()\n",
-    "weight_lp_sys_up = weight * events[\"VV_lp_sf_sys_up\"].values.squeeze()\n",
-    "weight_lp_sys_down = weight * events[\"VV_lp_sf_sys_down\"].values.squeeze()\n",
-    "\n",
-    "plt.hist(\n",
-    "    utils.get_feat(events, \"bbFatJetPt\", bb_mask),\n",
-    "    np.linspace(250, 1200, 31),\n",
-    "    weights=weight,\n",
-    "    histtype=\"step\",\n",
-    "    label=\"Pre-LP\",\n",
-    ")\n",
-    "plt.hist(\n",
-    "    utils.get_feat(events, \"bbFatJetPt\", bb_mask),\n",
-    "    np.linspace(250, 1200, 31),\n",
-    "    weights=weight_lp,\n",
-    "    histtype=\"step\",\n",
-    "    label=\"Post-LP\",\n",
-    ")\n",
-    "plt.title(\"2018 HHbbVV\")\n",
-    "plt.xlabel(r\"$p_T^{VV}$ (GeV)\")\n",
-    "plt.ylabel(\"Events\")\n",
-    "# plt.hist(utils.get_feat(events, \"VVFatJetPt\", bb_mask), np.linspace(250, 2000, 31), weights=weight_lp_sys_up, histtype=\"step\", label=\"Post-LP Sys Up\")\n",
-    "# plt.hist(utils.get_feat(events, \"VVFatJetPt\", bb_mask), np.linspace(250, 2000, 31), weights=weight_lp_sys_down, histtype=\"step\", label=\"Post-LP Sys Down\")\n",
-    "plt.legend()"
+    "plt.rcParams.update({\"font.size\": 24})\n",
+    "\n",
+    "control_plot_vars = [\n",
+    "    # ShapeVar(\n",
+    "    #     var=\"bbFatJetPt\", label=r\"$p^{bb}_T$ (GeV)\", bins=[20, 300, 2300], significance_dir=\"right\"\n",
+    "    # ),\n",
+    "    # ShapeVar(var=\"bbFatJetParticleNetMass\", label=r\"$m^{bb}_{reg}$ (GeV)\", bins=[20, 50, 250]),\n",
+    "    ShapeVar(var=\"BDTScore\", label=r\"BDT Score\", bins=[20, 0, 1]),\n",
+    "]\n",
+    "\n",
+    "\n",
+    "for sig_key in nonres_samples.keys():\n",
+    "    events = events_dict[sig_key]\n",
+    "    bb_mask = bb_masks[sig_key]\n",
+    "    weight = events[\"finalWeight\"].values.squeeze()\n",
+    "    weight_lp = weight * events[\"VV_lp_sf_nom\"].values.squeeze()\n",
+    "    weight_lp_sys_up = weight * events[\"VV_lp_sf_sys_up\"].values.squeeze()\n",
+    "    weight_lp_sys_down = weight * events[\"VV_lp_sf_sys_down\"].values.squeeze()\n",
+    "\n",
+    "    for shape_var in control_plot_vars:\n",
+    "        h = Hist(\n",
+    "            hist.axis.StrCategory([\"Pre-LP\", \"Post-LP\"], name=\"lptype\"),\n",
+    "            shape_var.axis,\n",
+    "            storage=\"weight\",\n",
+    "        )\n",
+    "\n",
+    "        h.fill(\n",
+    "            **{\n",
+    "                \"lptype\": \"Pre-LP\",\n",
+    "                shape_var.var: utils.get_feat(events, shape_var.var, bb_mask),\n",
+    "                \"weight\": weight,\n",
+    "            }\n",
+    "        )\n",
+    "\n",
+    "        h.fill(\n",
+    "            **{\n",
+    "                \"lptype\": \"Post-LP\",\n",
+    "                shape_var.var: utils.get_feat(events, shape_var.var, bb_mask),\n",
+    "                \"weight\": weight_lp,\n",
+    "            }\n",
+    "        )\n",
+    "\n",
+    "        for norm in [True, False]:\n",
+    "            fig, ax = plt.subplots(figsize=(10, 10))\n",
+    "\n",
+    "            for l in [\"Pre-LP\", \"Post-LP\"]:\n",
+    "                plot_hist = (h[l, ...] / h[l, ...].values().sum()) if norm else h[l, ...]\n",
+    "                hep.histplot(\n",
+    "                    plot_hist,\n",
+    "                    ax=ax,\n",
+    "                    histtype=\"step\",\n",
+    "                    label=l,\n",
+    "                )\n",
+    "\n",
+    "            plt.title(plotting.sample_label_map[sig_key], y=1.08)\n",
+    "            plt.xlabel(shape_var.label)\n",
+    "\n",
+    "            ylabel = \"Normalized Events\" if norm else \"Events\"\n",
+    "            plt.ylabel(ylabel)\n",
+    "            plt.legend()\n",
+    "            hep.cms.label(label=\"Preliminary\", data=False, com=13)\n",
+    "\n",
+    "            norm_str = \"_norm\" if norm else \"\"\n",
+    "            plt.savefig(\n",
+    "                plot_dir / f\"{year}_{shape_var.var}_{sig_key}_lpsf{norm_str}.pdf\",\n",
+    "                bbox_inches=\"tight\",\n",
+    "            )\n",
+    "            plt.show()"
    ]
   },
   {
diff --git a/src/HHbbVV/postprocessing/TrainBDT.py b/src/HHbbVV/postprocessing/TrainBDT.py
index c5ecc9d2..3a5dfd52 100644
--- a/src/HHbbVV/postprocessing/TrainBDT.py
+++ b/src/HHbbVV/postprocessing/TrainBDT.py
@@ -584,6 +584,7 @@ def plot_mass_shapes(train, test, sig_keys, model_dir, training_keys):
                         shape_var,
                         key,
                         f"BDTScore{sig_key}",
+                        "BDTScore",
                         cuts,
                         year,
                         weight_key,
@@ -604,6 +605,7 @@ def plot_mass_shapes(train, test, sig_keys, model_dir, training_keys):
                         shape_var,
                         key,
                         f"BDTScore{sig_key}",
+                        "BDTScore",
                         cuts,
                         "all",
                         weight_key,
@@ -618,6 +620,7 @@ def plot_mass_shapes(train, test, sig_keys, model_dir, training_keys):
                         shape_var,
                         key,
                         f"BDTScore{sig_key}",
+                        "BDTScore",
                         cuts,
                         "all",
                         weight_key,
diff --git a/src/HHbbVV/postprocessing/plotting.py b/src/HHbbVV/postprocessing/plotting.py
index e60d2dd8..35af0256 100644
--- a/src/HHbbVV/postprocessing/plotting.py
+++ b/src/HHbbVV/postprocessing/plotting.py
@@ -835,7 +835,7 @@ def rocCurve(
 
     plt.xlim(*xlim)
     plt.ylim(*ylim)
-    hep.cms.label(data=False, rlabel="(13 TeV)")
+    hep.cms.label(data=False, label="Preliminary", rlabel="(13 TeV)")
 
     if len(name):
         plt.savefig(plot_dir / f"{name}.pdf", bbox_inches="tight")
@@ -853,19 +853,35 @@ def _find_nearest(array, value):
 
 
 def multiROCCurveGrey(
-    rocs: dict, sig_effs: list[float], plot_dir: Path, name: str = "", show: bool = False
+    rocs: dict,
+    sig_effs: list[float],
+    plot_dir: Path,
+    xlim=None,
+    ylim=None,
+    name: str = "",
+    show: bool = False,
 ):
-    xlim = [0, 1]
-    ylim = [1e-6, 1]
+    """_summary_
+
+    Args:
+        rocs (dict): {label: {sig_key1: roc, sig_key2: roc, ...}, ...} where label is e.g Test or Train
+        sig_effs (list[float]): plot signal efficiency lines
+    """
+    if ylim is None:
+        ylim = [1e-06, 1]
+    if xlim is None:
+        xlim = [0, 1]
     line_style = {"colors": "lightgrey", "linestyles": "dashed"}
 
     plt.figure(figsize=(12, 12))
     for roc_sigs in rocs.values():
         for roc in roc_sigs.values():
+            auc_label = f" (AUC: {roc['auc']:.2f})" if "auc" in roc else ""
+
             plt.plot(
                 roc["tpr"],
                 roc["fpr"],
-                label=roc["label"],
+                label=roc["label"] + auc_label,
                 linewidth=2,
             )
 
@@ -874,13 +890,14 @@ def multiROCCurveGrey(
                 plt.hlines(y=y, xmin=0, xmax=sig_eff, **line_style)
                 plt.vlines(x=sig_eff, ymin=0, ymax=y, **line_style)
 
-    hep.cms.label(data=False, rlabel="")
+    hep.cms.label(data=False, label="Preliminary", rlabel="(13 TeV)")
     plt.yscale("log")
     plt.xlabel("Signal efficiency")
     plt.ylabel("Background efficiency")
     plt.xlim(*xlim)
     plt.ylim(*ylim)
     plt.legend(loc="upper left")
+    plt.grid(which="major")
 
     if len(name):
         plt.savefig(plot_dir / f"{name}.pdf", bbox_inches="tight")
@@ -1111,6 +1128,7 @@ def cutsLinePlot(
     shape_var: utils.ShapeVar,
     plot_key: str,
     cut_var: str,
+    cut_var_label: str,
     cuts: list[float],
     year: str,
     weight_key: str,
@@ -1154,7 +1172,7 @@ def cutsLinePlot(
         hep.histplot(
             hists[cut],
             yerr=True,
-            label=f"BDTScore >= {cut}",
+            label=f"{cut_var_label} >= {cut}",
             ax=ax,
             linewidth=2,
             alpha=0.8,
diff --git a/src/HHbbVV/postprocessing/postprocessing.py b/src/HHbbVV/postprocessing/postprocessing.py
index b9774902..5275dbf5 100644
--- a/src/HHbbVV/postprocessing/postprocessing.py
+++ b/src/HHbbVV/postprocessing/postprocessing.py
@@ -1298,6 +1298,7 @@ def lpsfs(
             assert bb_masks is not None, "Need bb_masks to calculate LP SFs for single year"
 
             events_dict[sig_key] = postprocess_lpsfs(events_dict[sig_key])
+            continue
 
         for lp_region in lp_selection_regions:
             rlabel = lp_region.lpsf_region
@@ -1533,6 +1534,7 @@ def plot_bdt_sculpting(
                 shape_var,
                 key,
                 f"BDTScore{var}",
+                r"$BDT_{ggF}$" if var == "" else r"$BDT_{VBF}$",
                 cuts,
                 year,
                 weight_key,