diff --git a/inference_scans/run_law.sh b/inference_scans/run_law.sh index 0734ee1f..d4f5f102 100755 --- a/inference_scans/run_law.sh +++ b/inference_scans/run_law.sh @@ -61,7 +61,7 @@ while true; do ;; --vbf) pois="r_qqhh" - vbfargs="--x-min 0.1 --x-max 10 --parameter-values C2V=0" + vbfargs="--x-min 0.1 --x-max 10 --parameter-values C2V=1" ;; --noggf) pois="r_qqhh" @@ -128,13 +128,13 @@ fi if [ $limits_at_point = 1 ]; then law run PlotUpperLimitsAtPoint \ $common_args $vbfargs \ - --datacard-names bbVV \ + --datacard-names bbVV \ --multi-datacards $cards \ --pois $pois \ --show-parameters kl,kt,C2V,CV \ --UpperLimits-workflow $workflow \ --UpperLimits-tasks-per-job 1 \ - --UpperLimits-custom-args="--cl $cl" \ + --UpperLimits-custom-args="--cl $cl --verbose 9 --rMax 10" \ --x-log \ --h-lines 1 \ --save-hep-data True \ diff --git a/src/HHbbVV/postprocessing/InterpolateSignal.ipynb b/src/HHbbVV/postprocessing/InterpolateSignal.ipynb new file mode 100644 index 00000000..1b3a5be3 --- /dev/null +++ b/src/HHbbVV/postprocessing/InterpolateSignal.ipynb @@ -0,0 +1,353 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle, json, gzip\n", + "import numpy as np\n", + "import hist\n", + "from hist import Hist\n", + "\n", + "from typing import Optional, List, Dict\n", + "from copy import copy\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import mplhep as hep\n", + "from matplotlib import colors\n", + "\n", + "from tqdm import tqdm\n", + "\n", + "from pathlib import Path\n", + "import os\n", + "\n", + "from HHbbVV.hh_vars import years, bg_keys\n", + "from HHbbVV.postprocessing import datacardHelpers\n", + "from postprocessing import nonres_shape_vars as shape_vars\n", + "import plotting\n", + "\n", + "plt.rcParams.update({\"font.size\": 16})\n", + "plt.style.use(hep.style.CMS)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_dir = Path(\"../../plots/Interpolate/24Aug1\")\n", + "plot_dir.mkdir(parents=True, exist_ok=True)\n", + "\n", + "templates_dir = Path(\"templates/24Apr26NonresBDT995AllSigs\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load and process templates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "templates_dict: dict[str, dict[str, Hist]] = {}\n", + "\n", + "for year in years:\n", + " with (templates_dir / f\"{year}_templates.pkl\").open(\"rb\") as f:\n", + " templates_dict[year] = datacardHelpers.rem_neg(pickle.load(f))\n", + "\n", + "templates = datacardHelpers.sum_templates(templates_dict, years)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vbf_keys = [\n", + " \"VBFHHbbVV\",\n", + " \"qqHH_CV_1_C2V_1_kl_0_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_1_kl_2_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\",\n", + " \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV\",\n", + "]\n", + "\n", + "vbf_hists = {}\n", + "\n", + "for key, h in templates.items():\n", + " vbf_hists[key] = []\n", + " for vbf_key in vbf_keys:\n", + " vbf_hists[key].append(h[vbf_key, ...])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interpolation coefficients" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import sympy\n", + "\n", + "csamples = [\n", + " # CV, C2V, kl\n", + " (1.0, 1.0, 1.0),\n", + " (1.0, 1.0, 0.0),\n", + " (1.0, 1.0, 2.0),\n", + " (1.0, 0.0, 1.0),\n", + " (1.0, 2.0, 1.0),\n", + " # (0.5, 1.0, 1.0),\n", + " (1.5, 1.0, 1.0),\n", + "]\n", + "\n", + "M = sympy.Matrix(\n", + " [\n", + " [\n", + " CV**2 * kl**2,\n", + " CV**4,\n", + " C2V**2,\n", + " CV**3 * kl,\n", + " CV * C2V * kl,\n", + " CV**2 * C2V,\n", + " ]\n", + " for i, (CV, C2V, kl) in enumerate(csamples)\n", + " ]\n", + ")\n", + "\n", + "# the vector of couplings\n", + "CV, C2V, kl = sympy.symbols(\"CV C2V kl\")\n", + "c = sympy.Matrix(\n", + " [\n", + " [CV**2 * kl**2],\n", + " [CV**4],\n", + " [C2V**2],\n", + " [CV**3 * kl],\n", + " [CV * C2V * kl],\n", + " [CV**2 * C2V],\n", + " ]\n", + ")\n", + "\n", + "# the vector of symbolic sample cross sections\n", + "s = sympy.Matrix([[sympy.Symbol(\"xs{}\".format(i))] for i in range(len(csamples))])\n", + "\n", + "# actual computation, i.e., matrix inversion and multiplications with vectors\n", + "M_inv = M.pinv()\n", + "coeffs = c.transpose() * M_inv\n", + "sigma = coeffs * s" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_hist_interp(cv, c2v, Kl, hists):\n", + " sigma_val = sigma.subs({CV: cv, C2V: c2v, kl: Kl})\n", + " counts = []\n", + " errs = []\n", + " for i in range(len(hists[0].values())):\n", + " count = np.array(\n", + " sigma_val.subs(\n", + " {sympy.Symbol(f\"xs{j}\"): hists[j].values()[i] for j in range(len(vbf_keys))}\n", + " )\n", + " )[0][0]\n", + " err = np.array(\n", + " sigma_val.subs(\n", + " {\n", + " sympy.Symbol(f\"xs{j}\"): np.nan_to_num(\n", + " np.sqrt(hists[j].variances()[i]) / hists[j].values()[i]\n", + " )\n", + " for j in range(len(vbf_keys))\n", + " }\n", + " )\n", + " )[0][0]\n", + "\n", + " if count < 1e-12:\n", + " count = 0\n", + "\n", + " counts.append(count)\n", + " errs.append(err)\n", + "\n", + " return np.array(counts), np.array(errs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add interpolated signals to templates" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "interp_points = np.arange(-1.0, 3.1, 0.1)\n", + "samples = [f\"qqHH_CV_1_C2V_{c:.1f}_kl_1_HHbbVV\" for c in interp_points]\n", + "\n", + "interp_hists = {}\n", + "\n", + "for region in [\"passvbf\", \"passggf\", \"fail\"]:\n", + " print(region)\n", + " h = Hist(\n", + " hist.axis.StrCategory(samples, name=\"Sample\"),\n", + " *templates[\"passvbf\"].axes[1:],\n", + " storage=hist.storage.Weight(),\n", + " )\n", + " for i, c in tqdm(enumerate(interp_points)):\n", + " c_h, c_err = get_hist_interp(1.0, c, 1.0, vbf_hists[region])\n", + " h.values()[i, :] = c_h\n", + " h.variances()[i, :] = (c_err * c_h) ** 2\n", + "\n", + " interp_hists[region] = h" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ctemplates = {}\n", + "\n", + "for region in [\"passvbf\", \"passggf\", \"fail\"]:\n", + " template = templates[region]\n", + " # combined sig + bg samples\n", + " csamples = list(template.axes[0]) + samples\n", + "\n", + " # new hist with all samples\n", + " ctemplate = Hist(\n", + " hist.axis.StrCategory(csamples, name=\"Sample\"),\n", + " *template.axes[1:],\n", + " storage=\"weight\",\n", + " )\n", + "\n", + " # add background hists\n", + " for sample in template.axes[0]:\n", + " sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n", + " ctemplate.view(flow=True)[sample_key_index, ...] = template[sample, ...].view(flow=True)\n", + "\n", + " # add signal hists\n", + " for sample in samples:\n", + " sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n", + " ctemplate.view(flow=True)[sample_key_index, ...] = interp_hists[region][sample, ...].view(\n", + " flow=True\n", + " )\n", + "\n", + " ctemplates[region] = ctemplate" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "selection_regions = {\n", + " \"passvbf\": \"VBF\",\n", + " \"passggf\": \"ggF\",\n", + " # \"fail\": \"Fail\",\n", + "}\n", + "\n", + "ylims = {\"passggf\": 200, \"passvbf\": 100, \"fail\": 7e5}\n", + "\n", + "sig_scale_dict = {\n", + " # \"HHbbVV\": 100,\n", + " # \"VBFHHbbVV\": 2000,\n", + " \"qqHH_CV_1_C2V_1.6_kl_1_HHbbVV\": 1,\n", + " \"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\": 1,\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 1,\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 1,\n", + "}\n", + "\n", + "for region, region_label in selection_regions.items():\n", + " pass_region = region.startswith(\"pass\")\n", + " for i, shape_var in enumerate(shape_vars):\n", + " plot_params = {\n", + " \"hists\": ctemplates[region],\n", + " \"sig_keys\": list(sig_scale_dict.keys()),\n", + " \"bg_keys\": [],\n", + " \"sig_scale_dict\": sig_scale_dict if pass_region else None,\n", + " \"show\": True,\n", + " \"year\": \"all\",\n", + " \"ylim\": ylims[region],\n", + " \"title\": f\"Pre-fit {region_label} Region\",\n", + " \"name\": f\"{plot_dir}/interp_{region}_{shape_var.var}_signal_log.pdf\",\n", + " \"ncol\": 2, # if region == \"passvbf\" else 1,\n", + " \"ratio_ylims\": [0, 5] if region == \"passvbf\" else [0, 2],\n", + " \"cmslabel\": \"Preliminary\",\n", + " \"plot_data\": False,\n", + " \"log\": True,\n", + " }\n", + "\n", + " plotting.ratioHistPlot(**plot_params, data_err=True)\n", + "\n", + "# break\n", + "# break" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python310", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/HHbbVV/postprocessing/plotting.py b/src/HHbbVV/postprocessing/plotting.py index 20f57791..d6953d7f 100644 --- a/src/HHbbVV/postprocessing/plotting.py +++ b/src/HHbbVV/postprocessing/plotting.py @@ -360,14 +360,15 @@ def ratioHistPlot( ax.set_ylabel(y_label) # background samples - hep.histplot( - [hists[sample, :] for sample in bg_keys], - ax=ax, - histtype="fill", - stack=True, - label=bg_labels, - color=bg_colours, - ) + if len(bg_keys): + hep.histplot( + [hists[sample, :] for sample in bg_keys], + ax=ax, + histtype="fill", + stack=True, + label=bg_labels, + color=bg_colours, + ) # signal samples if len(sig_scale_dict): @@ -482,8 +483,8 @@ def ratioHistPlot( # new: plotting data errors (black lines) and background errors (shaded) separately yerr = np.nan_to_num( np.abs( - poisson_interval(pre_divide_hists[data_key, ...]) - - pre_divide_hists[data_key, ...] + poisson_interval(pre_divide_hists[data_key, ...].values()) + - pre_divide_hists[data_key, ...].values() ) / (bg_tot.values() + 1e-5) )