From a52e5f468f40aefd71b05190d39a6db1f6d91da9 Mon Sep 17 00:00:00 2001 From: rkansal47 Date: Thu, 22 Feb 2024 16:45:39 -0600 Subject: [PATCH] semiresolved veto gen study and mass info --- src/HHbbVV/bash/run_local.sh | 25 +- .../processors/SemiResolvedVetoGenStudy.ipynb | 430 ++++++++++++++++++ src/HHbbVV/processors/bbVVSkimmer.py | 37 +- 3 files changed, 474 insertions(+), 18 deletions(-) create mode 100644 src/HHbbVV/processors/SemiResolvedVetoGenStudy.ipynb diff --git a/src/HHbbVV/bash/run_local.sh b/src/HHbbVV/bash/run_local.sh index 221a80f9..c3e3ac42 100755 --- a/src/HHbbVV/bash/run_local.sh +++ b/src/HHbbVV/bash/run_local.sh @@ -3,20 +3,25 @@ mkdir -p tmp/test_outputs/ year=2017 -extraargs="--no-inference" +extraargs="" +# extraargs="--no-inference" python -W ignore src/run.py --processor skimmer --year $year --samples HH --subsamples GluGluToHHTobbVV_node_cHHH1 --save-systematics --starti 0 --endi 1 $extraargs mv "0-1.parquet" tmp/test_outputs/hhbbvv.parquet mv "outfiles/0-1.pkl" tmp/test_outputs/hhbbvv.pkl -python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs -mv "0-1.parquet" tmp/test_outputs/ttsl.parquet -mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl +python -W ignore src/run.py --processor skimmer --year $year --samples XHY --subsamples NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 --save-systematics --starti 0 --endi 1 $extraargs +mv "0-1.parquet" tmp/test_outputs/xhy.parquet +mv "outfiles/0-1.pkl" tmp/test_outputs/xhy.pkl -python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs -mv "0-1.parquet" tmp/test_outputs/qcd.parquet -mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl +# python -W ignore src/run.py --processor skimmer --year $year --samples TTbar --subsamples TTToSemiLeptonic --save-systematics --starti 0 --endi 1 $extraargs +# mv "0-1.parquet" tmp/test_outputs/ttsl.parquet +# mv "outfiles/0-1.pkl" tmp/test_outputs/ttsl.pkl -python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs -mv "0-1.parquet" tmp/test_outputs/data.parquet -mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl \ No newline at end of file +# python -W ignore src/run.py --processor skimmer --year $year --samples QCD --subsamples QCD_HT1000to1500 --save-systematics --starti 0 --endi 1 $extraargs +# mv "0-1.parquet" tmp/test_outputs/qcd.parquet +# mv "outfiles/0-1.pkl" tmp/test_outputs/qcd.pkl + +# python -W ignore src/run.py --processor skimmer --year $year --samples "JetHT$year" --subsamples "JetHT_Run${year}D" --save-systematics --starti 0 --endi 1 $extraargs +# mv "0-1.parquet" tmp/test_outputs/data.parquet +# mv "outfiles/0-1.pkl" tmp/test_outputs/data.pkl \ No newline at end of file diff --git a/src/HHbbVV/processors/SemiResolvedVetoGenStudy.ipynb b/src/HHbbVV/processors/SemiResolvedVetoGenStudy.ipynb new file mode 100644 index 00000000..8f12ca08 --- /dev/null +++ b/src/HHbbVV/processors/SemiResolvedVetoGenStudy.ipynb @@ -0,0 +1,430 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Studying efficiency / mistag rate of a veto of the semi-resolved X-HY channel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import uproot\n", + "import awkward as ak\n", + "from coffea import nanoevents\n", + "from coffea.nanoevents.methods.base import NanoEventsArray\n", + "from coffea.analysis_tools import Weights, PackedSelection\n", + "from coffea.nanoevents.methods import nanoaod\n", + "from coffea.nanoevents.methods import vector\n", + "from coffea.lookup_tools.dense_lookup import dense_lookup\n", + "\n", + "ak.behavior.update(vector.behavior)\n", + "\n", + "import pickle, json, gzip\n", + "import numpy as np\n", + "\n", + "from typing import Optional, List, Dict\n", + "from copy import copy\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import mplhep as hep\n", + "from matplotlib import colors\n", + "\n", + "from utils import pad_val" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "events = nanoevents.NanoEventsFactory.from_root(\n", + " \"root://cmseos.fnal.gov///store/user/lpcpfnano/ammitra/v2_3/2017/XHY/NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250_TuneCP5_13TeV-madgraph-pythia8/NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250/230323_182603/0000/nano_mc2017_1-1.root\",\n", + " schemaclass=nanoevents.NanoAODSchema,\n", + ").events()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fatjets = events.FatJet\n", + "txbb = fatjets.particleNetMD_Xbb / (fatjets.particleNetMD_QCD + fatjets.particleNetMD_Xbb)\n", + "Wqq_score = (fatjets.particleNetMD_Xqq + fatjets.particleNetMD_Xcc) / (\n", + " fatjets.particleNetMD_Xqq + fatjets.particleNetMD_Xcc + fatjets.particleNetMD_QCD\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "wtg2 = ak.sum(Wqq_score[:, :3] >= 0.8, axis=1) >= 2\n", + "sorted_wqq_score = np.argsort(pad_val(Wqq_score, 3, 0, 1), axis=1)\n", + "lowest_wqq_index = np.argsort(pad_val(Wqq_score, 3, 0, 1), axis=1)[:, 0]\n", + "ltxbb = pad_val(txbb, 3, 0, 1)[np.arange(len(fatjets)), lowest_wqq_index]\n", + "passveto = wtg2 & (ltxbb >= 0.98)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.mean(passveto)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fj1 = fatjets[passveto][np.arange(len(fatjets[passveto])), sorted_wqq_score[passveto][:, 2]]\n", + "fj2 = fatjets[passveto][np.arange(len(fatjets[passveto])), sorted_wqq_score[passveto][:, 1]]\n", + "fj3 = fatjets[passveto][np.arange(len(fatjets[passveto])), sorted_wqq_score[passveto][:, 0]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.stack((fj1.pt, fj2.pt)).to_numpy().T" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(fj1.delta_r(fj2), np.arange(0, 4, 0.2), histtype=\"step\")\n", + "plt.xlabel(\"dR between W-tagged fatjets\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure(figsize=(8, 8))\n", + "plt.hist((fj1 + fj2).mass, np.arange(0, 4000, 400), histtype=\"step\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "d_PDGID = 1\n", + "u_PDGID = 2\n", + "s_PDGID = 3\n", + "c_PDGID = 4\n", + "b_PDGID = 5\n", + "g_PDGID = 21\n", + "TOP_PDGID = 6\n", + "\n", + "ELE_PDGID = 11\n", + "vELE_PDGID = 12\n", + "MU_PDGID = 13\n", + "vMU_PDGID = 14\n", + "TAU_PDGID = 15\n", + "vTAU_PDGID = 16\n", + "\n", + "G_PDGID = 22\n", + "Z_PDGID = 23\n", + "W_PDGID = 24\n", + "HIGGS_PDGID = 25\n", + "Y_PDGID = 35\n", + "\n", + "b_PDGIDS = [511, 521, 523]\n", + "\n", + "GRAV_PDGID = 39\n", + "\n", + "GEN_FLAGS = [\"fromHardProcess\", \"isLastCopy\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "higgs = events.GenPart[\n", + " (abs(events.GenPart.pdgId) == HIGGS_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)\n", + "]\n", + "is_bb = abs(higgs.children.pdgId) == b_PDGID\n", + "has_bb = ak.sum(ak.flatten(is_bb, axis=2), axis=1) == 2\n", + "\n", + "bb = ak.flatten(higgs.children[is_bb], axis=2)\n", + "\n", + "# gen Y and kids\n", + "Ys = events.GenPart[(abs(events.GenPart.pdgId) == Y_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)]\n", + "is_VV = (abs(Ys.children.pdgId) == W_PDGID) + (abs(Ys.children.pdgId) == Z_PDGID)\n", + "has_VV = ak.sum(ak.flatten(is_VV, axis=2), axis=1) == 2\n", + "\n", + "VV = ak.flatten(Ys.children[is_VV], axis=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(VV[passveto][:, 0].delta_r(VV[passveto][:, 1]), np.arange(0, 4, 0.2), histtype=\"step\")\n", + "plt.xlabel(\"dR between gen Ws\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(\n", + " np.min((fj1.delta_r(Ys[passveto]), fj2.delta_r(Ys[passveto])), axis=0),\n", + " np.arange(0, 4, 0.2),\n", + " histtype=\"step\",\n", + ")\n", + "plt.xlabel(\"dR between closer w-tagged fatjet and gen Y\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(\n", + " np.max((fj1.delta_r(Ys[passveto]), fj2.delta_r(Ys[passveto])), axis=0),\n", + " np.arange(0, 4, 0.2),\n", + " histtype=\"step\",\n", + ")\n", + "plt.xlabel(\"dR between farther w-tagged fatjet and gen Y\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(fj3.delta_r(higgs[passveto]), np.arange(0, 4, 0.2), histtype=\"step\")\n", + "plt.xlabel(\"dR between bb-tagged fatjet and gen Higgs\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fj1ws = (fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc) / (\n", + " fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc + fj1.particleNetMD_QCD\n", + ")\n", + "fj2ws = (fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc) / (\n", + " fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc + fj2.particleNetMD_QCD\n", + ")\n", + "\n", + "fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))\n", + "\n", + "plt.title(\"Higgs-matched w-tagged fatjet\")\n", + "plt.hist(\n", + " np.concatenate((fj1ws[fj1closer], fj2ws[~fj1closer])), np.arange(0.8, 1, 0.02), histtype=\"step\"\n", + ")\n", + "plt.xlabel(\"Wqq Score\")\n", + "plt.show()\n", + "\n", + "plt.title(\"Non-Higgs-matched w-tagged fatjet\")\n", + "plt.hist(\n", + " np.concatenate((fj1ws[~fj1closer], fj2ws[fj1closer])), np.arange(0.8, 1, 0.02), histtype=\"step\"\n", + ")\n", + "plt.xlabel(\"Wqq Score\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fj1ws = (fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc) / (\n", + " fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc + fj1.particleNetMD_QCD\n", + ")\n", + "fj2ws = (fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc) / (\n", + " fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc + fj2.particleNetMD_QCD\n", + ")\n", + "\n", + "fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))\n", + "\n", + "plt.title(\"Higgs-matched w-tagged fatjet\")\n", + "plt.hist(np.concatenate((fj1.msoftdrop[fj1closer], fj2.msoftdrop[~fj1closer])), histtype=\"step\")\n", + "plt.xlabel(\"FatJet SD Mass\")\n", + "plt.show()\n", + "\n", + "plt.title(\"Non-Higgs-matched w-tagged fatjet\")\n", + "plt.hist(np.concatenate((fj1.msoftdrop[~fj1closer], fj2.msoftdrop[fj1closer])), histtype=\"step\")\n", + "plt.xlabel(\"FatJet SD Mass\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fj1ws = (fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc) / (\n", + " fj1.particleNetMD_Xqq + fj1.particleNetMD_Xcc + fj1.particleNetMD_QCD\n", + ")\n", + "fj2ws = (fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc) / (\n", + " fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc + fj2.particleNetMD_QCD\n", + ")\n", + "\n", + "fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))\n", + "\n", + "plt.title(\"Higgs-matched w-tagged fatjet\")\n", + "plt.hist(\n", + " np.concatenate((fj1.pt[fj1closer], fj2.pt[~fj1closer])),\n", + " np.arange(0, 2000, 200),\n", + " histtype=\"step\",\n", + ")\n", + "plt.xlabel(\"FatJet pT\")\n", + "plt.show()\n", + "\n", + "plt.title(\"Non-Higgs-matched w-tagged fatjet\")\n", + "plt.hist(\n", + " np.concatenate((fj1.pt[~fj1closer], fj2.pt[fj1closer])),\n", + " np.arange(0, 2000, 200),\n", + " histtype=\"step\",\n", + ")\n", + "plt.xlabel(\"FatJet pT\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fatjets.t" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fj1t2 = fj1.tau2 / fj1.tau1\n", + "fj2t2 = fj2.tau2 / fj2.tau1\n", + "\n", + "fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))\n", + "\n", + "plt.title(\"Higgs-matched w-tagged fatjet\")\n", + "plt.hist(np.concatenate((fj1t2[fj1closer], fj2t2[~fj1closer])), histtype=\"step\")\n", + "plt.xlabel(\"tau 2 / 1\")\n", + "plt.show()\n", + "\n", + "plt.title(\"Non-Higgs-matched w-tagged fatjet\")\n", + "plt.hist(np.concatenate((fj1t2[~fj1closer], fj2t2[fj1closer])), histtype=\"step\")\n", + "plt.xlabel(\"tau 2 / 1\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fj1t2 = fj1.tau4 / fj1.tau2\n", + "fj2t2 = fj2.tau4 / fj2.tau2\n", + "\n", + "fj1closer = ak.flatten(fj1.delta_r(Ys[passveto]) <= fj2.delta_r(Ys[passveto]))\n", + "\n", + "plt.title(\"Higgs-matched w-tagged fatjet\")\n", + "plt.hist(np.concatenate((fj1t2[fj1closer], fj2t2[~fj1closer])), histtype=\"step\")\n", + "plt.xlabel(\"tau 4 / 2\")\n", + "plt.show()\n", + "\n", + "plt.title(\"Non-Higgs-matched w-tagged fatjet\")\n", + "plt.hist(np.concatenate((fj1t2[~fj1closer], fj2t2[fj1closer])), histtype=\"step\")\n", + "plt.xlabel(\"tau 4 / 2\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ak.flatten(fj1closer)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fj2ws = (fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc) / (\n", + " fj2.particleNetMD_Xqq + fj2.particleNetMD_Xcc + fj2.particleNetMD_QCD\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "_ = plt.hist(ltxbb[wtg2], bins, histtype=\"step\", label=label, density=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "bbVV", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py index 52965028..077e154d 100644 --- a/src/HHbbVV/processors/bbVVSkimmer.py +++ b/src/HHbbVV/processors/bbVVSkimmer.py @@ -122,6 +122,7 @@ class bbVVSkimmer(processor.ProcessorABC): "VBFJetPt", "VBFJetPhi", "VBFJetMass", + "nGoodVBFJets", "ak8FatJetHbb", "ak8FatJetHVV", "ak8FatJetHVVNumProngs", @@ -133,6 +134,10 @@ class bbVVSkimmer(processor.ProcessorABC): "nGoodElectronsHbb", "nGoodMuonsHH", "nGoodMuonsHbb", + "ak8FatJetNumWTagged", + "ak8FatJetLowestWTaggedTxbb", + "ak8FatJetWTaggedMsd", + "ak8FatJetWTaggedParticleNetMass", ] for shift in jec_shifts: @@ -475,14 +480,6 @@ def process(self, events: ak.Array): ) add_selection("ak8bb_txbb", txbb_cut, *selection_args) - # XHY->bbWW semi-resolved channel veto - Wqq_score = (fatjets.particleNetMD_Xqq + fatjets.particleNetMD_Xcc) / ( - fatjets.particleNetMD_Xqq + fatjets.particleNetMD_Xcc + fatjets.particleNetMD_QCD - ) - - print("Num W tagged:", ak.sum(Wqq_score >= 0.8, axis=1).to_numpy()) - skimmed_events["ak8FatJetNumWTagged"] = ak.sum(Wqq_score >= 0.8, axis=1).to_numpy() - # 2018 HEM cleaning # https://indico.cern.ch/event/1249623/contributions/5250491/attachments/2594272/4477699/HWW_0228_Draft.pdf if year == "2018": @@ -589,6 +586,30 @@ def process(self, events: ak.Array): skimmed_events["nGoodMuonsHbb"] = nmuonsHbb.to_numpy() skimmed_events["nGoodMuonsHH"] = nmuonsHH.to_numpy() + # XHY->bbWW semi-resolved channel veto + Wqq_score = (fatjets.particleNetMD_Xqq + fatjets.particleNetMD_Xcc) / ( + fatjets.particleNetMD_Xqq + fatjets.particleNetMD_Xcc + fatjets.particleNetMD_QCD + ) + + skimmed_events["ak8FatJetNumWTagged"] = ak.sum(Wqq_score[:, :3] >= 0.8, axis=1).to_numpy() + + sorted_wqq_score = np.argsort(pad_val(Wqq_score, 3, 0, 1), axis=1) + + # get TXbb score of the lowest-Wqq-tagged jet + skimmed_events["ak8FatJetLowestWTaggedTxbb"] = pad_val(fatjets["Txbb"], 3, 0, 1)[ + np.arange(len(fatjets)), sorted_wqq_score[:, 0] + ] + + # save both SD and regressed masses of the two W-tagged AK8 jets + # Amitav will optimize mass cut soon + + mass_dict = {"particleNet_mass": "ParticleNetMass", "msoftdrop": "Msd"} + for mkey, mlabel in mass_dict.items(): + mass_vals = pad_val(fatjets[mkey], 3, 0, 1) + mv_fj1 = mass_vals[np.arange(len(fatjets)), sorted_wqq_score[:, 2]] + mv_fj2 = mass_vals[np.arange(len(fatjets)), sorted_wqq_score[:, 1]] + skimmed_events[f"ak8FatJetWTagged{mlabel}"] = np.stack([mv_fj1, mv_fj2]).T + ###################### # Remove branches ######################