diff --git a/pyproject.toml b/pyproject.toml index 9e76f9c2..2475231c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,6 +136,7 @@ ignore = [ "ISC001", # Conflicts with formatter "PLR", # Design related pylint codes "T201", # print statements + "T203", # pprint statements "EM101", # Error message "EM102", "G002", # Logging statement format diff --git a/src/HHbbVV/combine/run_blinded.sh b/src/HHbbVV/combine/run_blinded.sh index 7ccc7184..c86f3d92 100755 --- a/src/HHbbVV/combine/run_blinded.sh +++ b/src/HHbbVV/combine/run_blinded.sh @@ -19,6 +19,9 @@ # Specify resonant with --resonant / -r, otherwise does nonresonant # Specify seed with --seed (default 42) and number of toys with --numtoys (default 100) # +# For nonresonant, will try to load all the regions automatically based on which text files exist +# Can use --noggf, --novbf to exclude ggF and VBF regions respectively +# # Usage ./run_blinded.sh [-wblsdgt] [--numtoys 100] [--seed 42] # # Author: Raghav Kansal @@ -46,8 +49,10 @@ numtoys=100 bias=-1 mintol=0.1 # --cminDefaultMinimizerTolerance # maxcalls=1000000000 # --X-rtd MINIMIZER_MaxCalls +nonresggf=1 +nonresvbf=1 -options=$(getopt -o "wblsdrgti" --long "workspace,bfit,limits,significance,dfit,dfitasimov,resonant,gofdata,goftoys,impactsi,impactsf:,impactsc:,bias:,seed:,numtoys:,mintol:" -- "$@") +options=$(getopt -o "wblsdrgti" --long "workspace,bfit,limits,significance,dfit,dfitasimov,resonant,noggf,novbf,gofdata,goftoys,impactsi,impactsf:,impactsc:,bias:,seed:,numtoys:,mintol:" -- "$@") eval set -- "$options" while true; do @@ -73,6 +78,12 @@ while true; do -r|--resonant) resonant=1 ;; + --noggf) + nonresggf=0 + ;; + --novbf) + nonresvbf=0 + ;; -g|--gofdata) gofdata=1 ;; @@ -143,22 +154,45 @@ outsdir=${cards_dir}/outs mkdir -p $outsdir if [ $resonant = 0 ]; then + # nonresonant args + if [ -f "mXbin0pass.txt" ]; then echo -e "\nWARNING: This is doing nonresonant fits - did you mean to pass -r|--resonant?\n" fi CMS_PARAMS_LABEL="CMS_bbWW_hadronic" - # nonresonant args - ccargs="fail=${cards_dir}/fail.txt failBlinded=${cards_dir}/failBlinded.txt pass=${cards_dir}/pass.txt passBlinded=${cards_dir}/passBlinded.txt" - maskunblindedargs="mask_pass=1,mask_fail=1,mask_passBlinded=0,mask_failBlinded=0" - maskblindedargs="mask_pass=0,mask_fail=0,mask_passBlinded=1,mask_failBlinded=1" + if [ -f "pass.txt" ]; then + echo "Single pass region" + ccargs="fail=${cards_dir}/fail.txt failBlinded=${cards_dir}/failBlinded.txt pass=${cards_dir}/pass.txt passBlinded=${cards_dir}/passBlinded.txt" + maskunblindedargs="mask_pass=1,mask_fail=1,mask_passBlinded=0,mask_failBlinded=0" + maskblindedargs="mask_pass=0,mask_fail=0,mask_passBlinded=1,mask_failBlinded=1" + else + ccargs="fail=${cards_dir}/fail.txt failBlinded=${cards_dir}/failBlinded.txt" + maskunblindedargs="mask_fail=1,mask_failBlinded=0" + maskblindedargs="mask_fail=0,mask_failBlinded=1" + + if [ -f "passggf.txt" ] && [ $nonresggf = 1 ]; then + echo "passggf region" + ccargs+=" passggf=${cards_dir}/passggf.txt passggfBlinded=${cards_dir}/passggfBlinded.txt" + maskunblindedargs+=",mask_passggf=1,mask_passggfBlinded=0" + maskblindedargs+=",mask_passggf=0,mask_passggfBlinded=1" + fi + + if [ -f "passvbf.txt" ] && [ $nonresvbf = 1 ]; then + echo "passvbf region" + ccargs+=" passvbf=${cards_dir}/passvbf.txt passvbfBlinded=${cards_dir}/passvbfBlinded.txt" + maskunblindedargs+=",mask_passvbf=1,mask_passvbfBlinded=0" + maskblindedargs+=",mask_passvbf=0,mask_passvbfBlinded=1" + fi + fi - # freeze qcd params in blinded bins + # freeze fail region qcd params in blinded bins setparamsblinded="" freezeparamsblinded="" for bin in {5..9} do + # would need to use regex here for multiple fail regions setparamsblinded+="${CMS_PARAMS_LABEL}_tf_dataResidual_Bin${bin}=0," freezeparamsblinded+="${CMS_PARAMS_LABEL}_tf_dataResidual_Bin${bin}," done @@ -167,7 +201,6 @@ if [ $resonant = 0 ]; then setparamsblinded=${setparamsblinded%,} freezeparamsblinded=${freezeparamsblinded%,} - # floating parameters using var{} floats a bunch of parameters which shouldn't be floated, # so countering this inside --freezeParameters which takes priority. # Although, practically even if those are set to "float", I didn't see them ever being fitted, @@ -239,7 +272,7 @@ echo "$unblindedparams" ulimit -s unlimited if [ $workspace = 1 ]; then - echo "Combining cards" + echo "Combining cards $ccargs" combineCards.py $ccargs > $ws.txt echo "Running text2workspace" @@ -263,7 +296,7 @@ if [ $bfit = 1 ]; then -n Snapshot 2>&1 | tee $outsdir/MultiDimFit.txt else if [ ! -f "higgsCombineSnapshot.MultiDimFit.mH125.root" ]; then - echo "Background-only fit snapshot doesn't exist! Use the -b|--bfit option to run fit first" + echo "Background-only fit snapshot doesn't exist! Use the -b|--bfit option to run fit first. (Ignore this if you're only creating the workspace.)" exit 1 fi fi diff --git a/src/HHbbVV/combine/run_ftest_nonres.sh b/src/HHbbVV/combine/run_ftest_nonres.sh index 5e067c85..19e12b4b 100755 --- a/src/HHbbVV/combine/run_ftest_nonres.sh +++ b/src/HHbbVV/combine/run_ftest_nonres.sh @@ -103,8 +103,8 @@ setparamsblinded="" freezeparamsblinded="" for bin in {5..9} do - setparamsblinded+="CMS_bbWW_boosted_ggf_qcdparam_msdbin${bin}=0," - freezeparamsblinded+="CMS_bbWW_boosted_ggf_qcdparam_msdbin${bin}," + setparamsblinded+="${CMS_PARAMS_LABEL}_tf_dataResidual_Bin${bin}=0," + freezeparamsblinded+="${CMS_PARAMS_LABEL}_tf_dataResidual_Bin${bin}," done # remove last comma diff --git a/src/HHbbVV/combine/submit/submit_ftest_nonres.templ.sh b/src/HHbbVV/combine/submit/submit_ftest_nonres.templ.sh index 7b5e0cb9..da262391 100644 --- a/src/HHbbVV/combine/submit/submit_ftest_nonres.templ.sh +++ b/src/HHbbVV/combine/submit/submit_ftest_nonres.templ.sh @@ -61,8 +61,8 @@ setparamsblinded="" freezeparamsblinded="" for bin in {5..9} do - setparamsblinded+="${CMS_PARAMS_LABEL}_tf_dataResidual_Bin${bin}=0," - freezeparamsblinded+="${CMS_PARAMS_LABEL}_tf_dataResidual_Bin${bin}," + setparamsblinded+="rgx{${CMS_PARAMS_LABEL}_tf_dataResidual.*_Bin${bin}}=0," + freezeparamsblinded+="rgx{${CMS_PARAMS_LABEL}_tf_dataResidual.*_Bin${bin}}," done # remove last comma diff --git a/src/HHbbVV/postprocessing/CombineTemplates.ipynb b/src/HHbbVV/postprocessing/CombineTemplates.ipynb index 7bc9f509..1d52ed53 100644 --- a/src/HHbbVV/postprocessing/CombineTemplates.ipynb +++ b/src/HHbbVV/postprocessing/CombineTemplates.ipynb @@ -60,14 +60,14 @@ " for region, h in ggf_templates.items():\n", " if region.startswith(\"pass\"):\n", " # rename pass regions\n", - " combined_templates[region.replace(\"pass\", \"pass_ggf\")] = h\n", + " combined_templates[region.replace(\"pass\", \"passggf\")] = h\n", " else:\n", " combined_templates[region] = h\n", "\n", " for region, h in vbf_templates.items():\n", " if region.startswith(\"pass\"):\n", " # rename pass regions\n", - " combined_templates[region.replace(\"pass\", \"pass_vbf\")] = h\n", + " combined_templates[region.replace(\"pass\", \"passvbf\")] = h\n", " else:\n", " # checking that fail regions are identical\n", " assert combined_templates[region] == h\n", @@ -90,7 +90,7 @@ "metadata": {}, "outputs": [], "source": [ - "combined_systematics = {\"pass_ggf\": {}, \"pass_vbf\": {}}\n", + "combined_systematics = {\"passggf\": {}, \"passvbf\": {}}\n", "\n", "with (ggf_templates_path / \"systematics.json\").open(\"r\") as f:\n", " ggf_systematics = json.load(f)\n", @@ -104,25 +104,25 @@ " for region, val in sval.items():\n", " if region.startswith(\"pass\"):\n", " # rename pass regions\n", - " combined_systematics[skey][region.replace(\"pass\", \"pass_ggf\")] = val\n", + " combined_systematics[skey][region.replace(\"pass\", \"passggf\")] = val\n", " else:\n", " combined_systematics[skey][region] = val\n", " else:\n", " # LP SFs\n", - " combined_systematics[\"pass_ggf\"][skey] = sval\n", + " combined_systematics[\"passggf\"][skey] = sval\n", "\n", "for skey, sval in vbf_systematics.items():\n", " if skey in years:\n", " for region, val in sval.items():\n", " if region.startswith(\"pass\"):\n", " # rename pass regions\n", - " combined_systematics[skey][region.replace(\"pass\", \"pass_vbf\")] = val\n", + " combined_systematics[skey][region.replace(\"pass\", \"passvbf\")] = val\n", " else:\n", " # checking that fail regions are identical\n", " assert combined_systematics[skey][region] == val\n", " else:\n", " # LP SFs\n", - " combined_systematics[\"pass_vbf\"][skey] = sval\n", + " combined_systematics[\"passvbf\"][skey] = sval\n", "\n", "with open(templates_path / \"systematics.json\", \"w\") as f:\n", " json.dump(combined_systematics, f, indent=4)" diff --git a/src/HHbbVV/postprocessing/CreateDatacard.py b/src/HHbbVV/postprocessing/CreateDatacard.py index a6bea068..2b828aec 100644 --- a/src/HHbbVV/postprocessing/CreateDatacard.py +++ b/src/HHbbVV/postprocessing/CreateDatacard.py @@ -15,7 +15,9 @@ import json import logging import pickle +import pprint import sys +import warnings from collections import OrderedDict from pathlib import Path @@ -69,6 +71,13 @@ default=False, ) +parser.add_argument( + "--nonres-regions", + default="all", + type=str, + help="nonresonant regions for which to make cards", + choices=["ggf", "vbf", "all"], +) parser.add_argument("--cards-dir", default="cards", type=str, help="output card directory") parser.add_argument("--mcstats-threshold", default=100, type=float, help="mcstats threshold n_eff") @@ -94,8 +103,8 @@ default=None, nargs="*", type=int, - help="order of polynomial for TF in [dim 1, dim 2] = [mH(bb), -] for nonresonant or [mY, mX] for resonant." - "Default is 0 for nonresonant and (1, 2) for resonant.", + help="order of polynomial for TF in [dim/cat 1, dim/cat 2] = [mH(bb) for ggF, mH(bb) for VBF] for nonresonant or [mY, mX] for resonant." + "Default is [0, 1] for nonresonant and [1, 2] for resonant.", ) parser.add_argument("--model-name", default=None, type=str, help="output model name") @@ -113,10 +122,29 @@ CMS_PARAMS_LABEL = "CMS_bbWW_hadronic" if not args.resonant else "CMS_XHYbbWW_boosted" +MCB_LABEL = "Blinded" # for templates where MC is "blinded" to get background estimates qcd_data_key = "qcd_datadriven" if args.nTF is None: - args.nTF = [1, 2] if args.resonant else [0] + if args.resonant: + args.nTF = [1, 2] + else: + if args.nonres_regions == "all": + args.nTF = [0, 1] + elif args.nonres_regions == "ggf": + args.nTF = [0] + elif args.nonres_regions == "vbf": + args.nTF = [1] + +if not args.resonant: + if args.nonres_regions == "all": + signal_regions = ["passggf", "passvbf"] + elif args.nonres_regions == "ggf": + signal_regions = ["passggf"] + elif args.nonres_regions == "vbf": + signal_regions = ["passvbf"] +else: + signal_regions = ["pass"] # (name in templates, name in cards) mc_samples = OrderedDict( @@ -246,11 +274,13 @@ ), } -for sig_key in sig_keys: - # values will be added in from the systematics JSON - nuisance_params[f"{CMS_PARAMS_LABEL}_lp_sf_{mc_samples[sig_key]}"] = Syst( - prior="lnN", samples=[sig_key] - ) +# LP SFs - uncorrelated across regions to be more conservative (?) +for sr in signal_regions: + for sig_key in sig_keys: + # values will be added in from the systematics JSON + nuisance_params[f"{CMS_PARAMS_LABEL}_lp_sf_{sr}_{mc_samples[sig_key]}"] = Syst( + prior="lnN", samples=[sig_key], regions=[sr] + ) if args.year != "all": # remove other years' keys @@ -295,6 +325,10 @@ # "top_pt": Syst(name="CMS_top_pT_reweighting", prior="shape", samples=["TT"]) # TODO } +if not args.resonant: + # AK4 jets only used for nonresonant + corr_year_shape_systs["pileupID"] = Syst(name="CMS_pileup_id", prior="shape", samples=all_mc) + uncorr_year_shape_systs = { "pileup": Syst(name="CMS_pileup", prior="shape", samples=all_mc), # TODO: add 2016APV template into this @@ -393,14 +427,25 @@ def get_templates( return templates_dict, templates_summed -def process_systematics_combined(systematics: dict): - """Get total uncertainties from per-year systs in ``systematics``""" - for sig_key in sig_keys: - # already for all years - nuisance_params[f"{CMS_PARAMS_LABEL}_lp_sf_{mc_samples[sig_key]}"].value = ( - 1 + systematics[sig_key]["lp_sf_unc"] - ) +def _process_lpsfs(systematics: dict): + for sr in signal_regions: + for sig_key in sig_keys: + # already for all years + try: + lp_sf_unc = systematics[sr][sig_key]["lp_sf_unc"] + except KeyError: + warnings.warn( + f"No {sr} region in systematics? Trying old convention for LP SFs.", + stacklevel=2, + ) + lp_sf_unc = systematics[sig_key]["lp_sf_unc"] + + nuisance_params[f"{CMS_PARAMS_LABEL}_lp_sf_{sr}_{mc_samples[sig_key]}"].value = ( + 1 + lp_sf_unc + ) + +def _process_triggereffs(systematics: dict): tdict = {} for region in systematics[years[0]]: if len(years) > 1: @@ -423,37 +468,22 @@ def process_systematics_combined(systematics: dict): nuisance_params[f"{CMS_PARAMS_LABEL}_triggerEffSF_uncorrelated"].value = tdict -def process_systematics_separate(bg_systematics: dict, sig_systs: dict[str, dict]): - """Get total uncertainties from per-year systs separated into bg and sig systs""" - for sig_key in sig_keys: - # already for all years - nuisance_params[f"{CMS_PARAMS_LABEL}_lp_sf_{mc_samples[sig_key]}"].value = ( - 1 + sig_systs[sig_key][sig_key]["lp_sf_unc"] - ) - - # use only bg trig uncs. - tdict = {} - for region in bg_systematics[years[0]]: - if len(years) > 1: - trig_totals, trig_total_errs = [], [] - for year in years: - trig_totals.append(bg_systematics[year][region]["trig_total"]) - trig_total_errs.append(bg_systematics[year][region]["trig_total_err"]) +def process_systematics_combined(systematics: dict): + """Get total uncertainties from per-year systs in ``systematics``""" + _process_lpsfs(systematics) + _process_triggereffs(systematics) - trig_total = np.sum(trig_totals) - trig_total_errs = np.linalg.norm(trig_total_errs) + print("Nuisance Parameters") + pprint.pprint(nuisance_params) - tdict[region] = 1 + (trig_total_errs / trig_total) - else: - year = years[0] - tdict[region] = 1 + ( - bg_systematics[year][region]["trig_total_err"] - / bg_systematics[year][region]["trig_total"] - ) - nuisance_params[f"{CMS_PARAMS_LABEL}_triggerEffSF_uncorrelated"].value = tdict +def process_systematics_separate(bg_systs: dict, sig_systs: dict[str, dict]): + """Get total uncertainties from per-year systs separated into bg and sig systs""" + _process_lpsfs(sig_systs) + _process_triggereffs(bg_systs) - print("Nuisance Parameters\n", nuisance_params) + print("Nuisance Parameters") + pprint.pprint(nuisance_params) def process_systematics(templates_dir: str, sig_separate: bool): @@ -465,14 +495,14 @@ def process_systematics(templates_dir: str, sig_separate: bool): process_systematics_combined(systematics) # LP SF and trig effs. else: with (templates_dir / "backgrounds/systematics.json").open("r") as f: - bg_systematics = json.load(f) + bg_systs = json.load(f) sig_systs = {} for sig_key in sig_keys: with (templates_dir / f"{hist_names[sig_key]}/systematics.json").open("r") as f: sig_systs[sig_key] = json.load(f) - process_systematics_separate(bg_systematics, sig_systs) # LP SF and trig effs. + process_systematics_separate(bg_systs, sig_systs) # LP SF and trig effs. # TODO: separate function for VBF? @@ -562,29 +592,32 @@ def fill_regions( region_templates = templates_summed[region][:, :, mX_bin] pass_region = region.startswith("pass") - region_noblinded = region.split("Blinded")[0] - blind_str = "Blinded" if region.endswith("Blinded") else "" + region_noblinded = region.split(MCB_LABEL)[0] + blind_str = MCB_LABEL if region.endswith(MCB_LABEL) else "" + print("\n\n") logging.info("starting region: %s" % region) binstr = "" if mX_bin is None else f"mXbin{mX_bin}" ch = rl.Channel(binstr + region.replace("_", "")) # can't have '_'s in name model.addChannel(ch) for sample_name, card_name in mc_samples.items(): + print("") # don't add signals in fail regions # also skip resonant signals in pass blinded - they are ignored in the validation fits anyway if sample_name in sig_keys and ( - not pass_region or (mX_bin is not None and region == "passBlinded") + not pass_region + or (mX_bin is not None and region not in [sr + MCB_LABEL for sr in signal_regions]) ): - logging.info(f"\nSkipping {sample_name} in {region} region\n") + logging.info(f"Skipping {sample_name} in {region} region") continue # single top only in fail regions if sample_name == "ST" and pass_region: - logging.info(f"\nSkipping ST in {region} region\n") + logging.info(f"Skipping ST in {region} region") continue - logging.info("get templates for: %s" % sample_name) + logging.info("Getting templates for: %s" % sample_name) sample_template = region_templates[sample_name, :] @@ -624,7 +657,14 @@ def fill_regions( # rate systematics for skey, syst in nuisance_params.items(): - if sample_name not in syst.samples or (not pass_region and syst.pass_only): + region_name = region if args.resonant else region_noblinded + + if ( + sample_name not in syst.samples + or (not pass_region and syst.pass_only) + or (syst.regions is not None and region_name not in syst.regions) + ): + logging.info(f"Skipping {skey} rate") continue logging.info(f"Getting {skey} rate") @@ -633,7 +673,6 @@ def fill_regions( val, val_down = syst.value, syst.value_down if syst.diff_regions: - region_name = region if args.resonant else region_noblinded val = val[region_name] val_down = val_down[region_name] if val_down is not None else val_down if syst.diff_samples: @@ -644,7 +683,12 @@ def fill_regions( # correlated shape systematics for skey, syst in corr_year_shape_systs.items(): - if sample_name not in syst.samples or (not pass_region and syst.pass_only): + if ( + sample_name not in syst.samples + or (not pass_region and syst.pass_only) + or (syst.regions is not None and region_name not in syst.regions) + ): + logging.info(f"Skipping {skey} shapes") continue logging.info(f"Getting {skey} shapes") @@ -684,7 +728,12 @@ def fill_regions( # uncorrelated shape systematics for skey, syst in uncorr_year_shape_systs.items(): - if sample_name not in syst.samples or (not pass_region and syst.pass_only): + if ( + sample_name not in syst.samples + or (not pass_region and syst.pass_only) + or (syst.regions is not None and region_name not in syst.regions) + ): + logging.info(f"Skipping {skey} shapes") continue logging.info(f"Getting {skey} shapes") @@ -737,25 +786,11 @@ def nonres_alphabet_fit( shape_var = shape_vars[0] m_obs = rl.Observable(shape_var.name, shape_var.bins) - # QCD overall pass / fail efficiency - qcd_eff = ( - templates_summed["pass"][qcd_key, :].sum().value - / templates_summed["fail"][qcd_key, :].sum().value - ) + ########################## + # Setup fail region first + ########################## - # transfer factor - tf_dataResidual = rl.BasisPoly( - f"{CMS_PARAMS_LABEL}_tf_dataResidual", - (shape_var.order,), - [shape_var.name], - basis="Bernstein", - limits=(-20, 20), - square_params=True, - ) - tf_dataResidual_params = tf_dataResidual(shape_var.scaled) - tf_params_pass = qcd_eff * tf_dataResidual_params # scale params initially by qcd eff - - # qcd params + # Independent nuisances to float QCD in each fail bin qcd_params = np.array( [ rl.IndependentParameter(f"{CMS_PARAMS_LABEL}_tf_dataResidual_Bin{i}", 0) @@ -763,23 +798,19 @@ def nonres_alphabet_fit( ] ) - for blind_str in ["", "Blinded"]: - # for blind_str in ["Blinded"]: - passChName = f"pass{blind_str}".replace("_", "") + fail_qcd_samples = {} + + for blind_str in ["", MCB_LABEL]: failChName = f"fail{blind_str}".replace("_", "") - logging.info( - f"setting transfer factor for pass region {passChName}, fail region {failChName}" - ) + logging.info(f"Setting up fail region {failChName}") failCh = model[failChName] - passCh = model[passChName] - # sideband fail # was integer, and numpy complained about subtracting float from it initial_qcd = failCh.getObservation().astype(float) for sample in failCh: if sample.sampletype == rl.Sample.SIGNAL: continue - logging.debug("subtracting %s from qcd" % sample._name) + logging.debug("Subtracting %s from qcd" % sample._name) initial_qcd -= sample.getExpectation(nominal=True) if np.any(initial_qcd < 0.0): @@ -806,14 +837,46 @@ def nonres_alphabet_fit( ) failCh.addSample(fail_qcd) - pass_qcd = rl.TransferFactorSample( - f"{passChName}_{CMS_PARAMS_LABEL}_qcd_datadriven", - rl.Sample.BACKGROUND, - tf_params_pass, - fail_qcd, - min_val=min_qcd_val, + fail_qcd_samples[blind_str] = fail_qcd + + ########################## + # Now do signal regions + ########################## + + for sr in signal_regions: + # QCD overall pass / fail efficiency + qcd_eff = ( + templates_summed[sr][qcd_key, :].sum().value + / templates_summed["fail"][qcd_key, :].sum().value ) - passCh.addSample(pass_qcd) + + # transfer factor + tf_dataResidual = rl.BasisPoly( + f"{CMS_PARAMS_LABEL}_tf_dataResidual_{sr}", + (shape_var.orders[sr],), + [shape_var.name], + basis="Bernstein", + limits=(-20, 20), + square_params=True, + ) + # dependent parameters of the TF params representing QCD in each bin of pass region + tf_dataResidual_params = tf_dataResidual(shape_var.scaled) + tf_params_pass = qcd_eff * tf_dataResidual_params # scale params initially by qcd eff + + for blind_str in ["", MCB_LABEL]: + # for blind_str in [MCB_LABEL]: + passChName = f"{sr}{blind_str}".replace("_", "") + logging.info(f"setting transfer factor for pass region {passChName}") + passCh = model[passChName] + + pass_qcd = rl.TransferFactorSample( + f"{passChName}_{CMS_PARAMS_LABEL}_qcd_datadriven", + rl.Sample.BACKGROUND, + tf_params_pass, + fail_qcd_samples[blind_str], + min_val=min_qcd_val, + ) + passCh.addSample(pass_qcd) def res_alphabet_fit( @@ -825,6 +888,7 @@ def res_alphabet_fit( ): shape_var_mY, shape_var_mX = shape_vars m_obs = rl.Observable(shape_var_mY.name, shape_var_mY.bins) + sr = signal_regions[0] # QCD overall pass / fail efficiency qcd_eff = ( @@ -835,7 +899,7 @@ def res_alphabet_fit( # transfer factor tf_dataResidual = rl.BasisPoly( f"{CMS_PARAMS_LABEL}_tf_dataResidual", - (shape_var_mX.order, shape_var_mY.order), + (shape_var_mX.orders[sr], shape_var_mY.orders[sr]), [shape_var_mX.name, shape_var_mY.name], basis="Bernstein", limits=(-20, 20), @@ -859,8 +923,8 @@ def res_alphabet_fit( ] ) - for blind_str in ["", "Blinded"]: - # for blind_str in ["Blinded"]: + for blind_str in ["", MCB_LABEL]: + # for blind_str in [MCB_LABEL]: passChName = f"mXbin{mX_bin}pass{blind_str}".replace("_", "") failChName = f"mXbin{mX_bin}fail{blind_str}".replace("_", "") logging.info( @@ -914,9 +978,9 @@ def res_alphabet_fit( def createDatacardAlphabet(args, templates_dict, templates_summed, shape_vars): - # (pass, fail) x (unblinded, blinded) + # (*signal_regions, fail) x (MC not-blinded, MC blinded) regions: list[str] = [ - f"{pf}{blind_str}" for pf in ["pass", "fail"] for blind_str in ["", "Blinded"] + f"{pf}{blind_str}" for pf in [*signal_regions, "fail"] for blind_str in ["", MCB_LABEL] ] # build actual fit model now @@ -1212,14 +1276,29 @@ def main(args): # TODO: check if / how to include signal trig eff uncs. (rn only using bg uncs.) process_systematics(args.templates_dir, args.sig_separate) - # random template from which to extract shape vars + # arbitrary template from which to extract shape vars sample_templates: Hist = templates_summed[next(iter(templates_summed.keys()))] # [mH(bb)] for nonresonant, [mY, mX] for resonant - shape_vars = [ - ShapeVar(name=axis.name, bins=axis.edges, order=args.nTF[i]) - for i, axis in enumerate(sample_templates.axes[1:]) - ] + if not args.resonant: + shape_vars = [ + ShapeVar( + name=axis.name, + bins=axis.edges, + orders={sr: args.nTF[i] for i, sr in enumerate(signal_regions)}, + ) + for _, axis in enumerate(sample_templates.axes[1:]) + ] + else: + if len(signal_regions) != 1: + raise NotImplementedError( + "Need to update shape vars for multiple resonant signal regions." + ) + + shape_vars = [ + ShapeVar(name=axis.name, bins=axis.edges, orders={signal_regions[0]: args.nTF[i]}) + for i, axis in enumerate(sample_templates.axes[1:]) + ] args.cards_dir.mkdir(parents=True, exist_ok=True) with (args.cards_dir / "templates.txt").open("w") as f: diff --git a/src/HHbbVV/postprocessing/PlotFits.ipynb b/src/HHbbVV/postprocessing/PlotFits.ipynb index cce0c1dd..d77a0c5a 100644 --- a/src/HHbbVV/postprocessing/PlotFits.ipynb +++ b/src/HHbbVV/postprocessing/PlotFits.ipynb @@ -43,14 +43,16 @@ "MAIN_DIR = Path(\"../../../\")\n", "nTF = 1\n", "\n", - "plot_dir = MAIN_DIR / f\"plots/PostFit/24Apr10ggFMP9965/nTF{nTF}\"\n", + "vbf_only = False\n", + "ggf_only = False\n", + "k2v0sig = True\n", + "\n", + "plot_dir = MAIN_DIR / \"plots/PostFit/24Apr10NonresCombined\"\n", "# plot_dir = (\n", "# MAIN_DIR\n", "# / \"plots/PostFit/24Apr9ggFScan/nTF1/ggf_txbb_MP_ggf_bdt_0.9965_vbf_txbb_HP_vbf_bdt_0.999_lepton_veto_Hbb\"\n", "# )\n", - "plot_dir.mkdir(exist_ok=True, parents=True)\n", - "\n", - "vbf = False" + "plot_dir.mkdir(exist_ok=True, parents=True)" ] }, { @@ -59,8 +61,7 @@ "metadata": {}, "outputs": [], "source": [ - "# cards_dir = \"24Apr8VBFHP999\"\n", - "cards_dir = f\"f_tests/24Apr10ggFMP9965/nTF_{nTF}\"\n", + "cards_dir = \"24Apr10NonresCombined/\"\n", "# cards_dir = \"24Apr9ggFScan_nTF0/ggf_txbb_HP_ggf_bdt_0.996_vbf_txbb_HP_vbf_bdt_0.999_lepton_veto_Hbb\"\n", "asimov = False\n", "\n", @@ -78,7 +79,7 @@ "outputs": [], "source": [ "# templates_dir = Path(f\"templates/{cards_dir}\")\n", - "templates_dir = Path(f\"templates/24Apr9ggFMP9965\")\n", + "templates_dir = Path(f\"templates/24Apr10NonresCombined\")\n", "# templates_dir = Path(\n", "# f\"templates/24Apr9ggFScan/ggf_txbb_HP_ggf_bdt_0.996_vbf_txbb_HP_vbf_bdt_0.999_lepton_veto_Hbb\"\n", "# )\n", @@ -114,8 +115,8 @@ "\n", "hist_label_map = {val: key for key, val in hist_label_map_inverse.items()}\n", "\n", - "sig_keys = [\"HHbbVV\", \"VBFHHbbVV\"]\n", - "sig_keys = [\"qqHH_CV_1_C2V_0_kl_1_HHbbVV\"] if vbf else [\"HHbbVV\"]\n", + "sig_keys = [\"HHbbVV\", \"VBFHHbbVV\", \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\", \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\"]\n", + "# sig_keys = [\"qqHH_CV_1_C2V_0_kl_1_HHbbVV\"] if k2v0sig else [\"HHbbVV\"]\n", "samples = bg_keys + sig_keys + [data_key]" ] }, @@ -134,9 +135,16 @@ "shape_vars = nonres_shape_vars\n", "\n", "selection_regions = {\n", - " \"pass\": \"VBF\" if vbf else \"ggF\",\n", + " \"passvbf\": \"VBF\",\n", + " \"passggf\": \"ggF\",\n", " \"fail\": \"Fail\",\n", - "}" + "}\n", + "\n", + "if vbf_only:\n", + " selection_regions.pop(\"passggf\")\n", + "\n", + "if ggf_only:\n", + " selection_regions.pop(\"passvbf\")" ] }, { @@ -191,10 +199,14 @@ "metadata": {}, "outputs": [], "source": [ - "pass_ylim = 8 if vbf else 22\n", - "fail_ylim = 600000\n", + "ylims = {\"passggf\": 20, \"passvbf\": 7, \"fail\": 6e5}\n", "title_label = \" Asimov Dataset\" if asimov else \"\"\n", - "sig_scale_dict = {\"HHbbVV\": 100, \"VBFHHbbVV\": 2000, \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 1}\n", + "sig_scale_dict = {\n", + " \"HHbbVV\": 100,\n", + " \"VBFHHbbVV\": 2000,\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 1,\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 1,\n", + "}\n", "sig_scale_dict = {key: val for key, val in sig_scale_dict.items() if key in sig_keys}\n", "\n", "for shape, shape_label in shapes.items():\n", @@ -209,7 +221,7 @@ " \"sig_scale_dict\": sig_scale_dict if pass_region else None,\n", " \"show\": True,\n", " \"year\": \"all\",\n", - " \"ylim\": pass_ylim if pass_region else fail_ylim,\n", + " \"ylim\": ylims[region],\n", " \"title\": f\"{shape_label} {region_label} Region{title_label}\",\n", " \"name\": f\"{plot_dir}/{shape}_{region}_{shape_var.var}.pdf\",\n", " }\n", diff --git a/src/HHbbVV/postprocessing/datacardHelpers.py b/src/HHbbVV/postprocessing/datacardHelpers.py index 8d61b1db..f817df85 100644 --- a/src/HHbbVV/postprocessing/datacardHelpers.py +++ b/src/HHbbVV/postprocessing/datacardHelpers.py @@ -1,6 +1,5 @@ from __future__ import annotations -import logging from dataclasses import dataclass, field from string import Template @@ -38,6 +37,8 @@ class Syst: uncorr_years: list[str] = field(default_factory=lambda: all_years) pass_only: bool = False # is it applied only in the pass regions + regions: list[str] = None # regions affected by it (if None, this is ignored) + def __post_init__(self): if isinstance(self.value, dict) and not (self.diff_regions or self.diff_samples): raise RuntimeError( @@ -51,7 +52,7 @@ class ShapeVar: name: str = None bins: np.ndarray = None # bin edges - order: int = None # TF order + orders: dict = None # TF order: dict of categories -> order def __post_init__(self): # use bin centers for polynomial fit @@ -235,11 +236,11 @@ def get_effect_updown(values_nominal, values_up, values_down, mask, logger, epsi effect_up[mask_up & zero_up] = values_nominal[mask_up & zero_up] * epsilon effect_down[mask_down & zero_down] = values_nominal[mask_down & zero_down] * epsilon - _shape_checks(values_up, values_down, values_nominal, effect_up, effect_down, logger) + # _shape_checks(values_up, values_down, values_nominal, effect_up, effect_down, logger) - logging.debug(f"nominal : {values_nominal}") - logging.debug(f"effect_up : {effect_up}") - logging.debug(f"effect_down: {effect_down}") + logger.debug(f"nominal : {values_nominal}") + logger.debug(f"effect_up : {effect_up}") + logger.debug(f"effect_down: {effect_down}") return effect_up, effect_down diff --git a/src/HHbbVV/postprocessing/postprocessing.py b/src/HHbbVV/postprocessing/postprocessing.py index 7854460a..4d3491d3 100644 --- a/src/HHbbVV/postprocessing/postprocessing.py +++ b/src/HHbbVV/postprocessing/postprocessing.py @@ -387,7 +387,7 @@ def main(args): ) with systs_file.open("w") as f: - json.dump(systematics, f) + json.dump(systematics, f, index=4) def _init(args): @@ -1153,7 +1153,7 @@ def _lpsfs(args, filters, scan, scan_cuts, scan_wps, sig_keys, sig_samples): wsysts[region.lpsf_region] = systematics[region.lpsf_region + "_" + cutstr] with systs_file.open("w") as f: - json.dump(wsysts, f) + json.dump(wsysts, f, index=4) def _get_signal_all_years( @@ -1312,7 +1312,7 @@ def lpsfs( if systs_file is not None: with systs_file.open("w") as f: - json.dump(systematics, f) + json.dump(systematics, f, index=4) if template_dir is not None: sf_table = OrderedDict() # format SFs for each sig key in a table diff --git a/src/HHbbVV/postprocessing/regions.py b/src/HHbbVV/postprocessing/regions.py index 1c17910f..6fbb1ac7 100644 --- a/src/HHbbVV/postprocessing/regions.py +++ b/src/HHbbVV/postprocessing/regions.py @@ -1,5 +1,6 @@ """ Defines all the analysis regions. +****Important****: Region names used in the analysis cannot have underscores because of a rhalphalib convention. Author(s): Raghav Kansal """ @@ -65,7 +66,7 @@ def get_nonres_selection_regions( regions = { # {label: {cutvar: [min, max], ...}, ...} - "pass_vbf": Region( + "passvbf": Region( cuts={ "bbFatJetPt": pt_cuts, "VVFatJetPt": pt_cuts, @@ -76,7 +77,7 @@ def get_nonres_selection_regions( signal=True, label="VBF", ), - "pass_ggf": Region( + "passggf": Region( cuts={ "bbFatJetPt": pt_cuts, "VVFatJetPt": pt_cuts, @@ -98,37 +99,37 @@ def get_nonres_selection_regions( label="Fail", ), # cuts for which LP SF is calculated - "lpsf_pass_vbf": Region( + "lpsf_passvbf": Region( cuts={ "BDTScoreVBF": [vbf_bdt_wp, CUT_MAX_VAL], }, lpsf=True, - lpsf_region="pass_vbf", + lpsf_region="passvbf", label="LP SF VBF Cut", ), - "lpsf_pass_ggf": Region( + "lpsf_passggf": Region( cuts={ "BDTScoreVBF": [-CUT_MAX_VAL, vbf_bdt_wp], # veto VBF BDT cut "BDTScore": [ggf_bdt_wp, CUT_MAX_VAL], }, lpsf=True, - lpsf_region="pass_ggf", + lpsf_region="passggf", label="LP SF ggF Cut", ), } if region == "ggf": - regions.pop("pass_vbf") - region.pop("lpsf_pass_vbf") + regions.pop("passvbf") + region.pop("lpsf_passvbf") elif region == "vbf": - regions.pop("pass_ggf") - region.pop("lpsf_pass_ggf") + regions.pop("passggf") + region.pop("lpsf_passggf") elif region == "ggf_no_vbf": # old version without any VBF category - lpregion = regions["lpsf_pass_ggf"] + lpregion = regions["lpsf_passggf"] lpregion.lpsf_region = "pass" regions = { - "pass": regions["pass_ggf"], + "pass": regions["passggf"], "fail": regions["fail"], "lpsf": lpregion, } diff --git a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2016APV_templates.pkl b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2016APV_templates.pkl index 540ef457..1aeb9965 100644 Binary files a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2016APV_templates.pkl and b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2016APV_templates.pkl differ diff --git a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2016_templates.pkl b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2016_templates.pkl index 1b66ed61..4f081235 100644 Binary files a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2016_templates.pkl and b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2016_templates.pkl differ diff --git a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2017_templates.pkl b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2017_templates.pkl index e1b99025..dd8b890a 100644 Binary files a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2017_templates.pkl and b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2017_templates.pkl differ diff --git a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2018_templates.pkl b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2018_templates.pkl index 5ee88f2f..a8c0283e 100644 Binary files a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2018_templates.pkl and b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/2018_templates.pkl differ diff --git a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/systematics.json b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/systematics.json index 6c8691a5..7cf72086 100644 --- a/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/systematics.json +++ b/src/HHbbVV/postprocessing/templates/24Apr10NonresCombined/systematics.json @@ -1,5 +1,5 @@ { - "pass_ggf": { + "passggf": { "HHbbVV": { "lp_sf": 1.0662593442284205, "lp_sf_unc": 0.2047565290328658, @@ -101,7 +101,7 @@ } } }, - "pass_vbf": { + "passvbf": { "qqHH_CV_1_C2V_0_kl_1_HHbbVV": { "lp_sf": 0.9319151497178899, "lp_sf_unc": 0.2875145732655031, @@ -204,7 +204,7 @@ } }, "2016": { - "pass_ggf": { + "passggf": { "trig_total": 35428.46861688447, "trig_total_err": 169.70344916739498 }, @@ -212,13 +212,13 @@ "trig_total": 2191129.51425094, "trig_total_err": 3393.9771949494093 }, - "pass_vbf": { + "passvbf": { "trig_total": 154861.2273932935, "trig_total_err": 3894.1821702776856 } }, "2016APV": { - "pass_ggf": { + "passggf": { "trig_total": 38500.12454948732, "trig_total_err": 145.32670570366125 }, @@ -226,13 +226,13 @@ "trig_total": 2578476.2790518906, "trig_total_err": 3693.285504726755 }, - "pass_vbf": { + "passvbf": { "trig_total": 175190.49526402235, "trig_total_err": 4318.649481220402 } }, "2017": { - "pass_ggf": { + "passggf": { "trig_total": 65678.89161826814, "trig_total_err": 207.86047980915436 }, @@ -240,13 +240,13 @@ "trig_total": 2460502.005127463, "trig_total_err": 2704.983599867493 }, - "pass_vbf": { + "passvbf": { "trig_total": 389745.69783708075, "trig_total_err": 7746.5100600706855 } }, "2018": { - "pass_ggf": { + "passggf": { "trig_total": 46677.8895927491, "trig_total_err": 137.1808889147677 }, @@ -254,7 +254,7 @@ "trig_total": 1876156.0255485063, "trig_total_err": 1795.8409437101016 }, - "pass_vbf": { + "passvbf": { "trig_total": 276742.9830432943, "trig_total_err": 5028.429129818956 }