Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Mar 24, 2024
1 parent 3535605 commit 907f8e3
Show file tree
Hide file tree
Showing 26 changed files with 182 additions and 64 deletions.
88 changes: 37 additions & 51 deletions src/HHbbVV/postprocessing/PostProcessRes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,12 @@
"outputs": [],
"source": [
"# del nonres_samples[\"VBFHHbbVV\"]\n",
"nonres_sig_keys = [\"HHbbVV\", \"VBFHHbbVV\"]\n",
"nonres_sig_keys = [\n",
" \"HHbbVV\",\n",
" \"VBFHHbbVV\",\n",
" \"qqHH_CV_1_C2V_1_kl_2_HHbbVV\",\n",
" \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\",\n",
"]\n",
"nonres_samples = {key: nonres_samples[key] for key in nonres_sig_keys}\n",
"\n",
"# bg_keys = [\"QCD\", \"TT\", \"ST\", \"V+Jets\", \"Diboson\"]\n",
Expand All @@ -91,7 +96,7 @@
"outputs": [],
"source": [
"MAIN_DIR = Path(\"../../../\")\n",
"samples_dir = MAIN_DIR / \"../data/skimmer/24Mar6AllYearsBDTVars\"\n",
"samples_dir = MAIN_DIR / \"../data/skimmer/24Mar14UpdateData\"\n",
"# samples_dir = f\"{MAIN_DIR}/../data/skimmer/Feb24\"\n",
"# nonres_signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/Jun10\"\n",
"# res_signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/Apr11\"\n",
Expand All @@ -100,7 +105,7 @@
"# res_signal_samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Apr11/\"\n",
"year = \"2016APV\"\n",
"\n",
"date = \"24Mar6\"\n",
"date = \"24Mar18\"\n",
"plot_dir = MAIN_DIR / f\"plots/PostProcessing/{date}/\"\n",
"templates_dir = Path(f\"templates/{date}/\")\n",
"\n",
Expand Down Expand Up @@ -138,6 +143,7 @@
" {**nonres_samples}, # , **res_samples, **samples},\n",
" year,\n",
" load_filters,\n",
" variations=False,\n",
")\n",
"\n",
"utils.add_to_cutflow(events_dict, \"Preselection\", \"finalWeight\", cutflow)\n",
Expand All @@ -150,44 +156,34 @@
"metadata": {},
"outputs": [],
"source": [
"events = events_dict[\"HHbbVV\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"list(events.columns)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"events[\"DijetMass\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"events = pd.read_parquet(f\"{samples_dir}/{year}/GluGluToHHTobbVV_node_cHHH1/parquet\")\n",
"events"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"list(events.columns)"
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.ticker as mticker\n",
"import mplhep as hep\n",
"\n",
"plt.style.use(hep.style.CMS)\n",
"hep.style.use(\"CMS\")\n",
"formatter = mticker.ScalarFormatter(useMathText=True)\n",
"formatter.set_powerlimits((-3, 3))\n",
"\n",
"# this is needed for some reason to update the font size for the first plot\n",
"fig, ax = plt.subplots(1, 1, figsize=(12, 12))\n",
"plt.rcParams.update({\"font.size\": 24})\n",
"plt.close()\n",
"\n",
"fig, ax = plt.subplots(1, 1, figsize=(8, 8))\n",
"for sample, events in events_dict.items():\n",
" plt.hist(\n",
" events[\"ak8FatJetPt\"].to_numpy().reshape(-1),\n",
" bins=np.arange(300, 1200, 50),\n",
" label=sample,\n",
" histtype=\"step\",\n",
" density=True,\n",
" )\n",
"\n",
"plt.legend()\n",
"plt.xlabel(\"AK8 Jet pT (GeV)\")\n",
"plt.show()"
]
},
{
Expand Down Expand Up @@ -218,16 +214,6 @@
"Control Plots"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"events = events_dict[\"HHbbVV\"]\n",
"events['VBFJetPt'][]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
51 changes: 51 additions & 0 deletions src/HHbbVV/postprocessing/bash_scripts/LPSFYearCheck.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash
# shellcheck disable=SC2086,SC2043

####################################################################################################
# Checking LP SF for each year
# Author: Raghav Kansal
####################################################################################################

MAIN_DIR="../../.."
data_dir="$MAIN_DIR/../data/skimmer/24Mar14UpdateData"
TAG=""


options=$(getopt -o "" --long "tag:" -- "$@")
eval set -- "$options"

while true; do
case "$1" in
--tag)
shift
TAG=$1
;;
--)
shift
break;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
shift
done

if [[ -z $TAG ]]; then
echo "Tag required using the --tag option. Exiting"
exit 1
fi

for year in 2016APV 2016 2017 2018
do
# --sig-samples qqHH_CV_1_C2V_1_kl_2_HHbbVV --bg-keys "" --no-data \
python -u postprocessing.py --year $year --data-dir "$data_dir" --templates \
--no-lp-sf-all-years \
--bdt-preds-dir "$data_dir/24_03_07_new_samples_max_depth_5/inferences" \
--plot-dir "${MAIN_DIR}/plots/PostProcessing/$TAG" \
--template-dir "test_templates/$TAG$year" --no-do-jshifts --vbf
done
17 changes: 12 additions & 5 deletions src/HHbbVV/postprocessing/bash_scripts/MassPlots.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,20 @@
####################################################################################################

MAIN_DIR="../../.."
data_dir="$MAIN_DIR/../data/skimmer/24Mar14UpdateData"
TAG=""
samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150"
# samples="HHbbVV VBFHHbbVV NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80"
resonant="--resonant"

options=$(getopt -o "" --long "tag:" -- "$@")
options=$(getopt -o "" --long "nonresonant,tag:" -- "$@")
eval set -- "$options"

while true; do
case "$1" in
--nonresonant)
resonant=""
samples="HHbbVV VBFHHbbVV qqHH_CV_1_C2V_0_kl_1_HHbbVV qqHH_CV_1_C2V_2_kl_1_HHbbVV"
;;
--tag)
shift
TAG=$1
Expand All @@ -46,11 +51,13 @@ if [[ -z $TAG ]]; then
exit 1
fi

for year in 2016APV 2016 2017 2018
# for year in 2016APV 2016 2017 2018
for year in 2016APV 2016 2017
do
python -u postprocessing.py --control-plots --year $year --resonant \
--data-dir "${MAIN_DIR}/../data/skimmer/24Mar5AllYears" \
python -u postprocessing.py --control-plots --year $year $resonant \
--data-dir $data_dir \
--sig-samples $samples \
--plot-dir "${MAIN_DIR}/plots/PostProcessing/$TAG" \
--bdt-preds-dir "$data_dir/24_03_07_new_samples_max_depth_5/inferences" \
--mass-plots
done
3 changes: 1 addition & 2 deletions src/HHbbVV/postprocessing/bash_scripts/NonresTemplates.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ if [[ -z $TAG ]]; then
exit 1
fi

# for year in 2016APV 2016 2017 2018
for year in 2017 2018 2016APV 2016
for year in 2016APV 2016 2017 2018
do
python -u postprocessing.py --year $year --data-dir "$data_dir" --templates \
--bdt-preds-dir "$data_dir/24_03_07_new_samples_max_depth_5/inferences" \
Expand Down
19 changes: 13 additions & 6 deletions src/HHbbVV/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def main(args):
derive_variables(
events_dict,
bb_masks,
nonres_vars=args.vbf or args.control_plots,
nonres_vars=args.vbf or (args.control_plots and not args.mass_plots),
# nonres_vars=args.vbf,
vbf_vars=args.vbf,
do_jshifts=args.vbf, # only need shifts for BDT pre-processing
Expand All @@ -437,7 +437,7 @@ def main(args):
print("\nCutflow", cutflow)

# Load BDT Scores
if not args.resonant and not args.vbf:
if not args.resonant and not args.vbf and not args.mass_plots:
print("\nLoading BDT predictions")
load_bdt_preds(
events_dict,
Expand Down Expand Up @@ -466,10 +466,10 @@ def main(args):
else:
p_sig_keys = plot_sig_keys_nonres
sig_scale_dict = {
"HHbbVV": 1e5,
"VBFHHbbVV": 2e5,
"qqHH_CV_1_C2V_0_kl_1_HHbbVV": 2e3,
"qqHH_CV_1_C2V_2_kl_1_HHbbVV": 2e3,
"HHbbVV": 3e5,
"VBFHHbbVV": 3e6,
"qqHH_CV_1_C2V_0_kl_1_HHbbVV": 6e3,
"qqHH_CV_1_C2V_2_kl_1_HHbbVV": 6e3,
}

control_plots(
Expand All @@ -483,6 +483,7 @@ def main(args):
sig_scale_dict=sig_scale_dict,
# sig_splits=sig_splits,
HEM2d=args.HEM2d,
plot_ratio=not args.mass_plots, # don't need data/MC ratio for mreg vs msd comparison
same_ylim=args.mass_plots,
show=False,
)
Expand Down Expand Up @@ -1069,6 +1070,9 @@ def apply_trigger_weights(events_dict: dict[str, pd.DataFrame], year: str, cutfl

def qcd_sf(events_dict: dict[str, pd.DataFrame], cutflow: pd.DataFrame):
"""Applies a QCD scale factor."""
if qcd_key not in events_dict:
return

trig_yields = cutflow.iloc[:, -1]
non_qcd_bgs_yield = np.sum(
[
Expand Down Expand Up @@ -1440,6 +1444,7 @@ def control_plots(
sig_scale_dict: dict[str, float] = None,
combine_pdf: bool = True,
HEM2d: bool = False,
plot_ratio: bool = True,
plot_significance: bool = False,
same_ylim: bool = False,
show: bool = False,
Expand All @@ -1453,6 +1458,7 @@ def control_plots(
{var1: ([num bins, min, max], label), var2...}.
sig_splits: split up signals into different plots (in case there are too many for one)
HEM2d: whether to plot 2D hists of FatJet phi vs eta for bb and VV jets as a check for HEM cleaning.
plot_ratio: whether to plot the data/MC ratio.
plot_significance: whether to plot the significance as well as the ratio plot.
same_ylim: whether to use the same y-axis limits for all plots.
log: True or False if plot on log scale or not - or "both" if both.
Expand Down Expand Up @@ -1516,6 +1522,7 @@ def control_plots(
show=show,
log=log,
ylim=ylim if not log else 1e15,
plot_ratio=plot_ratio,
)
merger_control_plots.append(name)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,Pre-selection,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,BDTScore >= 0.998,bbFatJetParticleNetMD_Txbb >= 0.9735,0 ≤ nGoodElectronsHbb < 0.9,0 ≤ nGoodMuonsHbb < 0.9
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.0173959132140592,0.0173959132140592,0.017299990201845,0.017214419089199705,0.00042044328198753,0.0002600195116698255,0.0002558856684327143,0.0002558856684327143
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,Pre-selection,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,BDTScore >= 0.998,bbFatJetParticleNetMD_Txbb >= 0.9737,0 ≤ nGoodElectronsHbb < 0.9,0 ≤ nGoodMuonsHbb < 0.9
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.021501518954477026,0.021501518954477026,0.021387774292197015,0.02128691499756289,0.0005347383241229343,0.0003420753830479197,0.0003420753830479197,0.0003420753830479197
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,Pre-selection,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,BDTScore >= 0.998,bbFatJetParticleNetMD_Txbb >= 0.9714,0 ≤ nGoodElectronsHbb < 0.9,0 ≤ nGoodMuonsHbb < 0.9
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.03499528349928152,0.03499528349928152,0.034883938631222695,0.034828787476897716,0.0011217067019144162,0.0007799503782162023,0.0007798089829283552,0.0007711790124800327
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,Pre-selection,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,BDTScore >= 0.998,bbFatJetParticleNetMD_Txbb >= 0.9734,0 ≤ nGoodElectronsHbb < 0.9,0 ≤ nGoodMuonsHbb < 0.9
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.04105557177629756,0.04105557177629756,0.04090476204464955,0.0407915993104881,0.0015553429231248339,0.0011102815028561314,0.0011071970472941277,0.0011060993774949018
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,SF,sj_matching_unc,sj_pt_unc,stat_unc,syst_unc
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.86 ± 0.59,0.14285714285714285,0.010252321392099382,0.12946452895544497,0.6522582240401613
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"2016APV": {}, "qqHH_CV_1_C2V_1_kl_2_HHbbVV": {"lp_sf": 0.8604309258381978, "lp_sf_unc": 0.6802316726269908, "lp_sf_uncs": {"syst_unc": 0.6522582240401613, "stat_unc": 0.12946452895544497, "sj_pt_unc": 0.010252321392099382, "sj_matching_unc": 0.14285714285714285}}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,Pre-selection,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,BDTScore >= 0.998,bbFatJetParticleNetMD_Txbb >= 0.9735,0 ≤ nGoodElectronsHbb < 0.9,0 ≤ nGoodMuonsHbb < 0.9
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.0173959132140592,0.0173959132140592,0.017299990201845,0.017214419089199705,0.00042044328198753,0.0002600195116698255,0.0002558856684327143,0.0002558856684327143
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,SF,sj_matching_unc,sj_pt_unc,stat_unc,syst_unc
qqHH_CV_1_C2V_1_kl_2_HHbbVV,1.24 ± 0.67,0.1399108138238573,0.008987658498025358,0.1001598592479952,0.5155430106030157
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"2016": {}, "qqHH_CV_1_C2V_1_kl_2_HHbbVV": {"lp_sf": 1.2411820701630025, "lp_sf_unc": 0.5435737365034577, "lp_sf_uncs": {"syst_unc": 0.5155430106030157, "stat_unc": 0.1001598592479952, "sj_pt_unc": 0.008987658498025358, "sj_matching_unc": 0.1399108138238573}}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,Pre-selection,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,BDTScore >= 0.998,bbFatJetParticleNetMD_Txbb >= 0.9737,0 ≤ nGoodElectronsHbb < 0.9,0 ≤ nGoodMuonsHbb < 0.9
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.021501518954477026,0.021501518954477026,0.021387774292197015,0.02128691499756289,0.0005347383241229343,0.0003420753830479197,0.0003420753830479197,0.0003420753830479197
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,SF,sj_matching_unc,sj_pt_unc,stat_unc,syst_unc
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.86 ± 0.59,0.14285714285714285,0.010252321392099382,0.12946452895544497,0.6522582240401613
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"2016APV": {}, "qqHH_CV_1_C2V_1_kl_2_HHbbVV": {"lp_sf": 0.8604309258381978, "lp_sf_unc": 0.6802316726269908, "lp_sf_uncs": {"syst_unc": 0.6522582240401613, "stat_unc": 0.12946452895544497, "sj_pt_unc": 0.010252321392099382, "sj_matching_unc": 0.14285714285714285}}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,Pre-selection,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,BDTScore >= 0.998,bbFatJetParticleNetMD_Txbb >= 0.9714,0 ≤ nGoodElectronsHbb < 0.9,0 ≤ nGoodMuonsHbb < 0.9
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.03499528349928152,0.03499528349928152,0.034883938631222695,0.034828787476897716,0.0011217067019144162,0.0007799503782162023,0.0007798089829283552,0.0007711790124800327
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,SF,sj_matching_unc,sj_pt_unc,stat_unc,syst_unc
qqHH_CV_1_C2V_1_kl_2_HHbbVV,1.13 ± 0.57,0.12784248474764282,0.007395479581722196,0.06913171449227407,0.48201850196491147
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"2017": {}, "qqHH_CV_1_C2V_1_kl_2_HHbbVV": {"lp_sf": 1.1269971767791451, "lp_sf_unc": 0.5035071242890548, "lp_sf_uncs": {"syst_unc": 0.48201850196491147, "stat_unc": 0.06913171449227407, "sj_pt_unc": 0.007395479581722196, "sj_matching_unc": 0.12784248474764282}}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,Pre-selection,LP SF,bbFatJetPt >= 300,VVFatJetPt >= 300,BDTScore >= 0.998,bbFatJetParticleNetMD_Txbb >= 0.9734,0 ≤ nGoodElectronsHbb < 0.9,0 ≤ nGoodMuonsHbb < 0.9
qqHH_CV_1_C2V_1_kl_2_HHbbVV,0.04105557177629756,0.04105557177629756,0.04090476204464955,0.0407915993104881,0.0015553429231248339,0.0011102815028561314,0.0011071970472941277,0.0011060993774949018
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
,SF,sj_matching_unc,sj_pt_unc,stat_unc,syst_unc
qqHH_CV_1_C2V_1_kl_2_HHbbVV,1.11 ± 0.64,0.11687725631768953,0.004125334945509687,0.07621464039715414,0.5657408058437927
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"2018": {}, "qqHH_CV_1_C2V_1_kl_2_HHbbVV": {"lp_sf": 1.1068573499629724, "lp_sf_unc": 0.5827080248635804, "lp_sf_uncs": {"syst_unc": 0.5657408058437927, "stat_unc": 0.07621464039715414, "sj_pt_unc": 0.004125334945509687, "sj_matching_unc": 0.11687725631768953}}}
Loading

0 comments on commit 907f8e3

Please sign in to comment.