From 59fe24dfa20a6aa58908d2b03c704a307d33b7ed Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 Sep 2023 19:14:30 +0000 Subject: [PATCH] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/HHbbVV/VBF_binder/VBFgenselection.ipynb | 695 +++++++++++++----- src/HHbbVV/VBF_binder/plot1dhistscuts.ipynb | 400 ++++++---- .../postprocessing/PostProcessVBF.ipynb | 11 +- .../PostProcessVBFtesting.ipynb | 174 +++-- 4 files changed, 859 insertions(+), 421 deletions(-) diff --git a/src/HHbbVV/VBF_binder/VBFgenselection.ipynb b/src/HHbbVV/VBF_binder/VBFgenselection.ipynb index e0260c54..949f0e2d 100644 --- a/src/HHbbVV/VBF_binder/VBFgenselection.ipynb +++ b/src/HHbbVV/VBF_binder/VBFgenselection.ipynb @@ -80,13 +80,12 @@ " # \"/eos/uscms//store/user/lpcpfnano/rkansal/v2_3/2016/XHY/NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-190_TuneCP5_13TeV-madgraph-pythia8/NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-190/230323_193051/0000/nano_mc2016post_1-3.root\",\n", " \"nano_mc2018_1-1.root\",\n", " schemaclass=nanoevents.NanoAODSchema,\n", - ").events() \n", + ").events()\n", "\n", "Z_PDGID = 23\n", "W_PDGID = 24\n", "HIGGS_PDGID = 25\n", - "GEN_FLAGS = [\"fromHardProcess\", \"isLastCopy\"]\n", - " " + "GEN_FLAGS = [\"fromHardProcess\", \"isLastCopy\"]" ] }, { @@ -114,26 +113,21 @@ "source": [ "events.GenJet[0]\n", "higgs = events.GenPart[\n", - " (abs(events.GenPart.pdgId) == HIGGS_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)\n", - " ]\n", + " (abs(events.GenPart.pdgId) == HIGGS_PDGID) * events.GenPart.hasFlags(GEN_FLAGS)\n", + "]\n", "\n", - "vbfs = events.GenPart[\n", - " ((abs(events.GenPart.pdgId) == 24) ) * events.GenPart.hasFlags(GEN_FLAGS)\n", - " ]\n", + "vbfs = events.GenPart[((abs(events.GenPart.pdgId) == 24)) * events.GenPart.hasFlags(GEN_FLAGS)]\n", "\n", "\n", + "print(ak.sum(ak.num(events.GenPart, axis=1)))\n", + "print(ak.num(events.GenPart, axis=1))\n", "\n", - "\n", - "\n", - "print(ak.sum(ak.num(events.GenPart, axis=1) ))\n", - "print( ak.num(events.GenPart, axis=1))\n", - "\n", - "print(ak.sum(ak.num(higgs, axis=1) ))\n", - "print( ak.num(higgs, axis=1))\n", + "print(ak.sum(ak.num(higgs, axis=1)))\n", + "print(ak.num(higgs, axis=1))\n", "print(higgs.mass)\n", "\n", - "print(ak.sum(ak.num(vbfs, axis=1) ))\n", - "print( ak.num(vbfs, axis=1))\n", + "print(ak.sum(ak.num(vbfs, axis=1)))\n", + "print(ak.num(vbfs, axis=1))\n", "print(vbfs.pt)\n", "print(vbfs)" ] @@ -158,11 +152,11 @@ "source": [ "n = 1244\n", "\n", - "print(' '.join(map(str, es[n].pdgId)))\n", - "print(' '.join(map(str, es[n].mass)))\n", + "print(\" \".join(map(str, es[n].pdgId)))\n", + "print(\" \".join(map(str, es[n].mass)))\n", "\n", - "print(' '.join(map(str, es2[n].pdgId)))\n", - "print(' '.join(map(str, es2[n].mass)))" + "print(\" \".join(map(str, es2[n].pdgId)))\n", + "print(\" \".join(map(str, es2[n].mass)))" ] }, { @@ -194,7 +188,7 @@ " count_dict[value_6] = count_dict.get(value_6, 0) + 1\n", "\n", "# Print the count dictionary\n", - "print(count_dict)\n" + "print(count_dict)" ] }, { @@ -215,8 +209,8 @@ } ], "source": [ - "print(ak.sum(ak.num(vbfs.children, axis=1) ))\n", - "print( ak.num(VBFs.children, axis=1))\n", + "print(ak.sum(ak.num(vbfs.children, axis=1)))\n", + "print(ak.num(VBFs.children, axis=1))\n", "print(VBFs.children.pdgId)\n", "print(VBFs.children.mass)" ] @@ -266,8 +260,8 @@ "# Select the daughter particles from the GenPart branch\n", "ds = events.GenPart[daughter_indices]\n", "\n", - "print(ak.sum(ak.num(ds, axis=1) ))\n", - "print( ak.num(ds, axis=1))\n", + "print(ak.sum(ak.num(ds, axis=1)))\n", + "print(ak.num(ds, axis=1))\n", "print(ds.pdgId)\n", "print(ds.mass)" ] @@ -294,11 +288,11 @@ "print(all.status, len(all))\n", "print(all.pdgId, len(all))\n", "\n", - "output = [all.status,all.pdgId]\n", + "output = [all.status, all.pdgId]\n", "\n", - "print(' '.join(map(str, all.status)))\n", - "print(' '.join(map(str, all.pdgId)))\n", - "#print('\\n'.join(map(str, output)))" + "print(\" \".join(map(str, all.status)))\n", + "print(\" \".join(map(str, all.pdgId)))\n", + "# print('\\n'.join(map(str, output)))" ] }, { @@ -323,10 +317,10 @@ "print(all.status, len(all))\n", "print(all.pdgId, len(all))\n", "\n", - "output = [all.status,all.pdgId]\n", + "output = [all.status, all.pdgId]\n", "\n", - "print(' '.join(map(str, all.status)))\n", - "print(' '.join(map(str, all.pdgId)))" + "print(\" \".join(map(str, all.status)))\n", + "print(\" \".join(map(str, all.pdgId)))" ] }, { @@ -347,35 +341,85 @@ } ], "source": [ - "\n", - "\n", "import matplotlib.pyplot as plt\n", "\n", "# Replace with your actual data\n", - "unsorted = [(0.10, 2.95), (0.13, 4.24), (0.17, 6.21), (0.22, 9.57), \n", - " (0.29, 14.02), (0.37, 20.02), (0.49, 26.73), (0.63, 34.21), \n", - " (0.82, 41.90), (1.07, 48.98), (1.39, 56.94), (1.81, 64.62), \n", - " (2.36, 73.29), (3.07, 83.35), (4.00, 89.80)]\n", + "unsorted = [\n", + " (0.10, 2.95),\n", + " (0.13, 4.24),\n", + " (0.17, 6.21),\n", + " (0.22, 9.57),\n", + " (0.29, 14.02),\n", + " (0.37, 20.02),\n", + " (0.49, 26.73),\n", + " (0.63, 34.21),\n", + " (0.82, 41.90),\n", + " (1.07, 48.98),\n", + " (1.39, 56.94),\n", + " (1.81, 64.62),\n", + " (2.36, 73.29),\n", + " (3.07, 83.35),\n", + " (4.00, 89.80),\n", + "]\n", "\n", "\n", - "pt_sorted = [(0.10, 3.08), (0.13, 4.48), (0.17, 6.56), (0.22, 9.82), \n", - " (0.29, 14.23), (0.37, 19.94), (0.49, 27.05), (0.63, 34.29), \n", - " (0.82, 39.69), (1.07, 44.64), (1.39, 49.32), (1.81, 56.12), \n", - " (2.36, 65.91), (3.07, 80.29), (4.00, 88.84)]\n", + "pt_sorted = [\n", + " (0.10, 3.08),\n", + " (0.13, 4.48),\n", + " (0.17, 6.56),\n", + " (0.22, 9.82),\n", + " (0.29, 14.23),\n", + " (0.37, 19.94),\n", + " (0.49, 27.05),\n", + " (0.63, 34.29),\n", + " (0.82, 39.69),\n", + " (1.07, 44.64),\n", + " (1.39, 49.32),\n", + " (1.81, 56.12),\n", + " (2.36, 65.91),\n", + " (3.07, 80.29),\n", + " (4.00, 88.84),\n", + "]\n", "\n", - "mass_sorted = [(0.10, 1.61), (0.13, 2.33), (0.17, 3.47), (0.22, 5.17), \n", - " (0.29, 7.58), (0.37, 10.53), (0.49, 14.77), (0.63, 19.75), \n", - " (0.82, 24.26), (1.07, 28.86), (1.39, 34.64), (1.81, 42.96), \n", - " (2.36, 56.19), (3.07, 77.39), (4.00, 88.42)]\n", + "mass_sorted = [\n", + " (0.10, 1.61),\n", + " (0.13, 2.33),\n", + " (0.17, 3.47),\n", + " (0.22, 5.17),\n", + " (0.29, 7.58),\n", + " (0.37, 10.53),\n", + " (0.49, 14.77),\n", + " (0.63, 19.75),\n", + " (0.82, 24.26),\n", + " (1.07, 28.86),\n", + " (1.39, 34.64),\n", + " (1.81, 42.96),\n", + " (2.36, 56.19),\n", + " (3.07, 77.39),\n", + " (4.00, 88.42),\n", + "]\n", "\n", - "eta_sorted = [(0.10, 1.15), (0.13, 1.95), (0.17, 3.10), (0.22, 4.55), \n", - " (0.29, 6.80), (0.37, 9.82), (0.49, 13.35), (0.63, 17.57), \n", - " (0.82, 22.83), (1.07, 27.98), (1.39, 35.02), (1.81, 43.58), \n", - " (2.36, 57.00), (3.07, 77.69), (4.00, 88.57)]\n", + "eta_sorted = [\n", + " (0.10, 1.15),\n", + " (0.13, 1.95),\n", + " (0.17, 3.10),\n", + " (0.22, 4.55),\n", + " (0.29, 6.80),\n", + " (0.37, 9.82),\n", + " (0.49, 13.35),\n", + " (0.63, 17.57),\n", + " (0.82, 22.83),\n", + " (1.07, 27.98),\n", + " (1.39, 35.02),\n", + " (1.81, 43.58),\n", + " (2.36, 57.00),\n", + " (3.07, 77.69),\n", + " (4.00, 88.57),\n", + "]\n", "\n", "datasets = [unsorted, pt_sorted, mass_sorted, eta_sorted]\n", - "colors = ['blue', 'red', 'green', 'purple']\n", - "titles = ['Unsorted', 'Pt Sorted', 'Mass Sorted', 'Eta Sorted']\n", + "colors = [\"blue\", \"red\", \"green\", \"purple\"]\n", + "titles = [\"Unsorted\", \"Pt Sorted\", \"Mass Sorted\", \"Eta Sorted\"]\n", "\n", "plt.figure(figsize=(10, 7))\n", "\n", @@ -383,11 +427,11 @@ " deltaR_values, percentages = zip(*dataset)\n", " plt.plot(deltaR_values, percentages, color=color, label=title)\n", " plt.scatter(deltaR_values, percentages, color=color)\n", - "plt.title('Number of events with >= 1 correctly reconstructed jet')\n", - "plt.xlabel('Delta R')\n", - "plt.ylabel('Percentage of signal events with any (1 or 2) matching jet (%)')\n", + "plt.title(\"Number of events with >= 1 correctly reconstructed jet\")\n", + "plt.xlabel(\"Delta R\")\n", + "plt.ylabel(\"Percentage of signal events with any (1 or 2) matching jet (%)\")\n", "plt.legend()\n", - "plt.show()\n" + "plt.show()" ] }, { @@ -408,32 +452,102 @@ } ], "source": [ - "unsorted_new = [(0.10, 7.64), (0.13, 7.64), (0.17, 7.65), (0.22, 7.65), (0.29, 7.65), \n", - " (0.37, 7.66), (0.49, 7.70), (0.63, 7.97), (0.82, 8.82), (1.07, 10.54), \n", - " (1.39, 12.97), (1.81, 16.34), (2.36, 22.48), (3.07, 39.14), (4.00, 67.98)]\n", + "unsorted_new = [\n", + " (0.10, 7.64),\n", + " (0.13, 7.64),\n", + " (0.17, 7.65),\n", + " (0.22, 7.65),\n", + " (0.29, 7.65),\n", + " (0.37, 7.66),\n", + " (0.49, 7.70),\n", + " (0.63, 7.97),\n", + " (0.82, 8.82),\n", + " (1.07, 10.54),\n", + " (1.39, 12.97),\n", + " (1.81, 16.34),\n", + " (2.36, 22.48),\n", + " (3.07, 39.14),\n", + " (4.00, 67.98),\n", + "]\n", "\n", - "pt_sorted_new = [(0.10, 7.85), (0.13, 7.85), (0.17, 7.85), (0.22, 7.85), (0.29, 7.85),\n", - " (0.37, 7.86), (0.49, 7.94), (0.63, 8.03), (0.82, 8.30), (1.07, 8.82),\n", - " (1.39, 9.47), (1.81, 10.61), (2.36, 13.48), (3.07, 26.26), (4.00, 57.30)]\n", + "pt_sorted_new = [\n", + " (0.10, 7.85),\n", + " (0.13, 7.85),\n", + " (0.17, 7.85),\n", + " (0.22, 7.85),\n", + " (0.29, 7.85),\n", + " (0.37, 7.86),\n", + " (0.49, 7.94),\n", + " (0.63, 8.03),\n", + " (0.82, 8.30),\n", + " (1.07, 8.82),\n", + " (1.39, 9.47),\n", + " (1.81, 10.61),\n", + " (2.36, 13.48),\n", + " (3.07, 26.26),\n", + " (4.00, 57.30),\n", + "]\n", "\n", - "mass_sorted_new = [(0.10, 7.19), (0.13, 7.19), (0.17, 7.19), (0.22, 7.19), (0.29, 7.19), \n", - " (0.37, 7.19), (0.49, 7.19), (0.63, 7.19), (0.82, 7.24), (1.07, 7.32),\n", - " (1.39, 7.43), (1.81, 7.82), (2.36, 8.80), (3.07, 14.18), (4.00, 35.35)]\n", + "mass_sorted_new = [\n", + " (0.10, 7.19),\n", + " (0.13, 7.19),\n", + " (0.17, 7.19),\n", + " (0.22, 7.19),\n", + " (0.29, 7.19),\n", + " (0.37, 7.19),\n", + " (0.49, 7.19),\n", + " (0.63, 7.19),\n", + " (0.82, 7.24),\n", + " (1.07, 7.32),\n", + " (1.39, 7.43),\n", + " (1.81, 7.82),\n", + " (2.36, 8.80),\n", + " (3.07, 14.18),\n", + " (4.00, 35.35),\n", + "]\n", "\n", - "eta_sorted_new = [(0.10, 7.54), (0.13, 7.54), (0.17, 7.54), (0.22, 7.54), (0.29, 7.54), \n", - " (0.37, 7.54), (0.49, 7.56), (0.63, 7.58), (0.82, 7.65), (1.07, 7.81),\n", - " (1.39, 8.02), (1.81, 8.40), (2.36, 9.48), (3.07, 14.83), (4.00, 35.49)]\n", + "eta_sorted_new = [\n", + " (0.10, 7.54),\n", + " (0.13, 7.54),\n", + " (0.17, 7.54),\n", + " (0.22, 7.54),\n", + " (0.29, 7.54),\n", + " (0.37, 7.54),\n", + " (0.49, 7.56),\n", + " (0.63, 7.58),\n", + " (0.82, 7.65),\n", + " (1.07, 7.81),\n", + " (1.39, 8.02),\n", + " (1.81, 8.40),\n", + " (2.36, 9.48),\n", + " (3.07, 14.83),\n", + " (4.00, 35.49),\n", + "]\n", "\n", "\n", + "datasets = [\n", + " unsorted,\n", + " pt_sorted,\n", + " mass_sorted,\n", + " eta_sorted,\n", + " unsorted_new,\n", + " pt_sorted_new,\n", + " mass_sorted_new,\n", + " eta_sorted_new,\n", + "]\n", "\n", - "datasets = [unsorted, pt_sorted, mass_sorted, eta_sorted, \n", - " unsorted_new, pt_sorted_new, mass_sorted_new, eta_sorted_new]\n", + "colors = [\"blue\", \"red\", \"green\", \"purple\", \"lightblue\", \"lightcoral\", \"lightgreen\", \"violet\"]\n", "\n", - "colors = ['blue', 'red', 'green', 'purple', \n", - " 'lightblue', 'lightcoral', 'lightgreen', 'violet']\n", - "\n", - "titles = ['Unsorted', 'Pt Sorted', 'Mass Sorted', 'Eta Sorted', \n", - " 'Unsorted (=2)', 'Pt Sorted (=2)', 'Mass Sorted (=2)', 'Eta Sorted (=2)']\n", + "titles = [\n", + " \"Unsorted\",\n", + " \"Pt Sorted\",\n", + " \"Mass Sorted\",\n", + " \"Eta Sorted\",\n", + " \"Unsorted (=2)\",\n", + " \"Pt Sorted (=2)\",\n", + " \"Mass Sorted (=2)\",\n", + " \"Eta Sorted (=2)\",\n", + "]\n", "\n", "plt.figure(figsize=(10, 7))\n", "\n", @@ -441,12 +555,12 @@ " deltaR_values, percentages = zip(*dataset)\n", " plt.plot(deltaR_values, percentages, color=color, label=title)\n", " plt.scatter(deltaR_values, percentages, color=color)\n", - " \n", - "plt.title('Number of events with >= 1 correctly reconstructed jet')\n", - "plt.xlabel('Delta R')\n", - "plt.ylabel('Percentage of signal events with any (1 or 2) matching jet (%)')\n", - "#plt.xscale('log')\n", - "#plt.yscale('log')\n", + "\n", + "plt.title(\"Number of events with >= 1 correctly reconstructed jet\")\n", + "plt.xlabel(\"Delta R\")\n", + "plt.ylabel(\"Percentage of signal events with any (1 or 2) matching jet (%)\")\n", + "# plt.xscale('log')\n", + "# plt.yscale('log')\n", "plt.legend()\n", "plt.grid(True)\n", "plt.show()" @@ -481,7 +595,7 @@ " \"gen_std_phi\": None,\n", " \"gen_average_eta\": None,\n", " \"gen_std_eta\": None,\n", - " \"number_in_category\": 3701\n", + " \"number_in_category\": 3701,\n", " },\n", " \"1_true_values_stats\": {\n", " \"reco_average_mass\": 9.8,\n", @@ -500,7 +614,7 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": -0.0,\n", " \"gen_std_eta\": 1.7,\n", - " \"number_in_category\": 3355\n", + " \"number_in_category\": 3355,\n", " },\n", " \"2_true_values_stats\": {\n", " \"reco_average_mass\": 15.2,\n", @@ -519,7 +633,7 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": 0.0,\n", " \"gen_std_eta\": 1.5,\n", - " \"number_in_category\": 1993\n", + " \"number_in_category\": 1993,\n", " },\n", " },\n", " \"fatjet_lepton_ak4_veto\": {\n", @@ -543,7 +657,7 @@ " \"gen_std_phi\": None,\n", " \"gen_average_eta\": None,\n", " \"gen_std_eta\": None,\n", - " \"number_in_category\": 5812\n", + " \"number_in_category\": 5812,\n", " },\n", " \"1_true_values_stats\": {\n", " \"reco_average_mass\": 10.6,\n", @@ -562,7 +676,7 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": -0.0,\n", " \"gen_std_eta\": 1.6,\n", - " \"number_in_category\": 2762\n", + " \"number_in_category\": 2762,\n", " },\n", " \"2_true_values_stats\": {\n", " \"reco_average_mass\": 11.5,\n", @@ -581,10 +695,9 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": 0.0,\n", " \"gen_std_eta\": 1.6,\n", - " \"number_in_category\": 939\n", + " \"number_in_category\": 939,\n", " },\n", " },\n", - "\n", " \"random_sorted\": {\n", " \"0_true_values_percentage\": 86.12,\n", " \"1_true_values_percentage\": 12.14,\n", @@ -606,7 +719,7 @@ " \"gen_std_phi\": None,\n", " \"gen_average_eta\": None,\n", " \"gen_std_eta\": None,\n", - " \"number_in_category\": 8268\n", + " \"number_in_category\": 8268,\n", " },\n", " \"1_true_values_stats\": {\n", " \"reco_average_mass\": 10.8,\n", @@ -625,7 +738,7 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": -0.0,\n", " \"gen_std_eta\": 1.6,\n", - " \"number_in_category\": 1165\n", + " \"number_in_category\": 1165,\n", " },\n", " \"2_true_values_stats\": {\n", " \"reco_average_mass\": 15.0,\n", @@ -644,7 +757,7 @@ " \"gen_std_phi\": 1.7,\n", " \"gen_average_eta\": -0.1,\n", " \"gen_std_eta\": 1.5,\n", - " \"number_in_category\": 161\n", + " \"number_in_category\": 161,\n", " },\n", " },\n", " \"pt_sorted\": {\n", @@ -668,7 +781,7 @@ " \"gen_std_phi\": None,\n", " \"gen_average_eta\": None,\n", " \"gen_std_eta\": None,\n", - " \"number_in_category\": 7500\n", + " \"number_in_category\": 7500,\n", " },\n", " \"1_true_values_stats\": {\n", " \"reco_average_mass\": 13.2,\n", @@ -687,7 +800,7 @@ " \"gen_std_phi\": 1.9,\n", " \"gen_average_eta\": 0.0,\n", " \"gen_std_eta\": 1.6,\n", - " \"number_in_category\": 1820\n", + " \"number_in_category\": 1820,\n", " },\n", " \"2_true_values_stats\": {\n", " \"reco_average_mass\": 16.9,\n", @@ -706,7 +819,7 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": -0.0,\n", " \"gen_std_eta\": 1.6,\n", - " \"number_in_category\": 277\n", + " \"number_in_category\": 277,\n", " },\n", " },\n", " \"mass_sorted\": {\n", @@ -730,7 +843,7 @@ " \"gen_std_phi\": None,\n", " \"gen_average_eta\": None,\n", " \"gen_std_eta\": None,\n", - " \"number_in_category\": 8189\n", + " \"number_in_category\": 8189,\n", " },\n", " \"1_true_values_stats\": {\n", " \"reco_average_mass\": 12.1,\n", @@ -749,7 +862,7 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": 0.0,\n", " \"gen_std_eta\": 1.8,\n", - " \"number_in_category\": 975\n", + " \"number_in_category\": 975,\n", " },\n", " \"2_true_values_stats\": {\n", " \"reco_average_mass\": 17.3,\n", @@ -768,7 +881,7 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": -0.1,\n", " \"gen_std_eta\": 1.8,\n", - " \"number_in_category\": 140\n", + " \"number_in_category\": 140,\n", " },\n", " },\n", " \"eta_sorted\": {\n", @@ -792,7 +905,7 @@ " \"gen_std_phi\": None,\n", " \"gen_average_eta\": None,\n", " \"gen_std_eta\": None,\n", - " \"number_in_category\": 8308\n", + " \"number_in_category\": 8308,\n", " },\n", " \"1_true_values_stats\": {\n", " \"reco_average_mass\": 10.5,\n", @@ -811,7 +924,7 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": 0.0,\n", " \"gen_std_eta\": 1.9,\n", - " \"number_in_category\": 886\n", + " \"number_in_category\": 886,\n", " },\n", " \"2_true_values_stats\": {\n", " \"reco_average_mass\": 15.7,\n", @@ -830,10 +943,10 @@ " \"gen_std_phi\": 1.8,\n", " \"gen_average_eta\": -0.3,\n", " \"gen_std_eta\": 1.9,\n", - " \"number_in_category\": 110\n", + " \"number_in_category\": 110,\n", " },\n", " },\n", - "}\n" + "}" ] }, { @@ -1470,7 +1583,6 @@ } ], "source": [ - "\n", "# Prepare lists to populate DataFrame\n", "sort_types = []\n", "matches = []\n", @@ -1480,17 +1592,17 @@ "percentages = []\n", "\n", "for sort_key, sort_value in output.items():\n", - " for match in ['0', '1', '2']:\n", + " for match in [\"0\", \"1\", \"2\"]:\n", " percentage_key = f\"{match}_true_values_percentage\"\n", " stats_key = f\"{match}_true_values_stats\"\n", - " \n", + "\n", " if percentage_key in sort_value:\n", " percentage = sort_value[percentage_key]\n", " else:\n", " percentage = None\n", - " \n", + "\n", " if stats_key in sort_value:\n", - " for row in ['mass', 'pt', 'eta']:\n", + " for row in [\"mass\", \"pt\", \"eta\"]:\n", " reco_mean_key = f\"reco_average_{row}\"\n", " reco_std_key = f\"reco_std_{row}\"\n", " gen_mean_key = f\"gen_average_{row}\"\n", @@ -1498,29 +1610,40 @@ "\n", " sort_types.extend([sort_key, sort_key])\n", " matches.extend([match, match])\n", - " rows.extend([f'reco_{row}', f'gen_{row}'])\n", - " means.extend([sort_value[stats_key].get(reco_mean_key, None), sort_value[stats_key].get(gen_mean_key, None)])\n", - " std_devs.extend([sort_value[stats_key].get(reco_std_key, None), sort_value[stats_key].get(gen_std_key, None)])\n", - " percentages.extend([percentage]*2)\n", + " rows.extend([f\"reco_{row}\", f\"gen_{row}\"])\n", + " means.extend(\n", + " [\n", + " sort_value[stats_key].get(reco_mean_key, None),\n", + " sort_value[stats_key].get(gen_mean_key, None),\n", + " ]\n", + " )\n", + " std_devs.extend(\n", + " [\n", + " sort_value[stats_key].get(reco_std_key, None),\n", + " sort_value[stats_key].get(gen_std_key, None),\n", + " ]\n", + " )\n", + " percentages.extend([percentage] * 2)\n", "\n", "# Create DataFrame\n", "df = pd.DataFrame(\n", - " {'Sort_Type': sort_types,\n", - " 'Match': matches,\n", - " 'Row': rows,\n", - " 'Mean': means,\n", - " 'Standard_Deviation': std_devs,\n", - " 'Percentage': percentages\n", - " })\n", + " {\n", + " \"Sort_Type\": sort_types,\n", + " \"Match\": matches,\n", + " \"Row\": rows,\n", + " \"Mean\": means,\n", + " \"Standard_Deviation\": std_devs,\n", + " \"Percentage\": percentages,\n", + " }\n", + ")\n", "\n", "# Pivot DataFrame to create multi-index columns\n", - "df = df.pivot(index=['Sort_Type', 'Row'], columns='Match')\n", + "df = df.pivot(index=[\"Sort_Type\", \"Row\"], columns=\"Match\")\n", "\n", "# Swap column levels\n", "df = df.swaplevel(0, 1, axis=1).sort_index(axis=1)\n", "\n", - "df\n", - "\n" + "df" ] }, { @@ -1902,7 +2025,6 @@ "import pandas as pd\n", "\n", "\n", - "\n", "# Prepare lists to populate DataFrame\n", "sort_types = []\n", "matches = []\n", @@ -1912,17 +2034,17 @@ "percentages = []\n", "\n", "for sort_key, sort_value in output.items():\n", - " for match in ['0', '1', '2']:\n", + " for match in [\"0\", \"1\", \"2\"]:\n", " percentage_key = f\"{match}_true_values_percentage\"\n", " stats_key = f\"{match}_true_values_stats\"\n", - " \n", + "\n", " if percentage_key in sort_value:\n", " percentage = sort_value[percentage_key]\n", " else:\n", " percentage = None\n", - " \n", + "\n", " if stats_key in sort_value:\n", - " for row in ['mass', 'pt', 'eta']:\n", + " for row in [\"mass\", \"pt\", \"eta\"]:\n", " reco_mean_key = f\"reco_average_{row}\"\n", " reco_std_key = f\"reco_std_{row}\"\n", " gen_mean_key = f\"gen_average_{row}\"\n", @@ -1930,23 +2052,35 @@ "\n", " sort_types.extend([sort_key, sort_key])\n", " matches.extend([match, match])\n", - " rows.extend([f'reco_{row}', f'gen_{row}'])\n", - " means.extend([sort_value[stats_key].get(reco_mean_key, None), sort_value[stats_key].get(gen_mean_key, None)])\n", - " std_devs.extend([sort_value[stats_key].get(reco_std_key, None), sort_value[stats_key].get(gen_std_key, None)])\n", - " percentages.extend([percentage]*2)\n", + " rows.extend([f\"reco_{row}\", f\"gen_{row}\"])\n", + " means.extend(\n", + " [\n", + " sort_value[stats_key].get(reco_mean_key, None),\n", + " sort_value[stats_key].get(gen_mean_key, None),\n", + " ]\n", + " )\n", + " std_devs.extend(\n", + " [\n", + " sort_value[stats_key].get(reco_std_key, None),\n", + " sort_value[stats_key].get(gen_std_key, None),\n", + " ]\n", + " )\n", + " percentages.extend([percentage] * 2)\n", "\n", "# Create DataFrame\n", "df = pd.DataFrame(\n", - " {'Sort_Type': sort_types,\n", - " 'Match': matches,\n", - " 'Row': rows,\n", - " 'Mean': means,\n", - " 'Standard_Deviation': std_devs,\n", - " 'Percentage': percentages\n", - " })\n", + " {\n", + " \"Sort_Type\": sort_types,\n", + " \"Match\": matches,\n", + " \"Row\": rows,\n", + " \"Mean\": means,\n", + " \"Standard_Deviation\": std_devs,\n", + " \"Percentage\": percentages,\n", + " }\n", + ")\n", "\n", "# Pivot DataFrame to create multi-index columns\n", - "df = df.pivot_table(index='Sort_Type', columns=['Match', 'Row'])\n", + "df = df.pivot_table(index=\"Sort_Type\", columns=[\"Match\", \"Row\"])\n", "\n", "# Swap column levels\n", "df = df.swaplevel(0, 2, axis=1).sort_index(axis=1)\n", @@ -2253,17 +2387,17 @@ "percentages = []\n", "\n", "for sort_key, sort_value in output.items():\n", - " for match in ['0', '1', '2']:\n", + " for match in [\"0\", \"1\", \"2\"]:\n", " percentage_key = f\"{match}_true_values_percentage\"\n", " stats_key = f\"{match}_true_values_stats\"\n", - " \n", + "\n", " if percentage_key in sort_value:\n", " percentage = sort_value[percentage_key]\n", " else:\n", " percentage = None\n", - " \n", + "\n", " if stats_key in sort_value:\n", - " for row in ['mass', 'pt', 'eta']:\n", + " for row in [\"mass\", \"pt\", \"eta\"]:\n", " reco_mean_key = f\"reco_average_{row}\"\n", " reco_std_key = f\"reco_std_{row}\"\n", " gen_mean_key = f\"gen_average_{row}\"\n", @@ -2271,29 +2405,40 @@ "\n", " sort_types.extend([sort_key, sort_key])\n", " matches.extend([match, match])\n", - " rows.extend([f'reco_{row}', f'gen_{row}'])\n", - " means.extend([sort_value[stats_key].get(reco_mean_key, None), sort_value[stats_key].get(gen_mean_key, None)])\n", - " std_devs.extend([sort_value[stats_key].get(reco_std_key, None), sort_value[stats_key].get(gen_std_key, None)])\n", - " percentages.extend([percentage]*2)\n", + " rows.extend([f\"reco_{row}\", f\"gen_{row}\"])\n", + " means.extend(\n", + " [\n", + " sort_value[stats_key].get(reco_mean_key, None),\n", + " sort_value[stats_key].get(gen_mean_key, None),\n", + " ]\n", + " )\n", + " std_devs.extend(\n", + " [\n", + " sort_value[stats_key].get(reco_std_key, None),\n", + " sort_value[stats_key].get(gen_std_key, None),\n", + " ]\n", + " )\n", + " percentages.extend([percentage] * 2)\n", "\n", "# Create DataFrame\n", "df = pd.DataFrame(\n", - " {'Sort_Type': sort_types,\n", - " 'Match': matches,\n", - " 'Row': rows,\n", - " 'Mean': means,\n", - " 'Standard_Deviation': std_devs,\n", - " 'Percentage': percentages\n", - " })\n", + " {\n", + " \"Sort_Type\": sort_types,\n", + " \"Match\": matches,\n", + " \"Row\": rows,\n", + " \"Mean\": means,\n", + " \"Standard_Deviation\": std_devs,\n", + " \"Percentage\": percentages,\n", + " }\n", + ")\n", "\n", "# Pivot DataFrame to create multi-index columns\n", - "df = df.pivot(index='Sort_Type', columns=['Row', 'Match'])\n", + "df = df.pivot(index=\"Sort_Type\", columns=[\"Row\", \"Match\"])\n", "\n", "# Swap column levels\n", "df = df.swaplevel(0, 1, axis=1).swaplevel(1, 0, axis=1).sort_index(axis=1)\n", "\n", - "df\n", - "\n" + "df" ] }, { @@ -3100,9 +3245,9 @@ "from IPython.display import display, HTML\n", "\n", "# Set the display options\n", - "pd.set_option('display.max_columns', None)\n", - "pd.set_option('display.expand_frame_repr', False)\n", - "pd.set_option('display.max_colwidth', None)\n", + "pd.set_option(\"display.max_columns\", None)\n", + "pd.set_option(\"display.expand_frame_repr\", False)\n", + "pd.set_option(\"display.max_colwidth\", None)\n", "\n", "# Display the DataFrame as HTML\n", "display(HTML(df.to_html()))" @@ -3118,59 +3263,215 @@ "output_dict = {\n", " \"unfiltered\": {\n", " \"true_0\": {\"percentage\": \"5.72%\", \"event_count\": 549, \"reco_jet\": None, \"gen_jet\": None},\n", - " \"true_1\": {\"percentage\": \"38.82%\", \"event_count\": 3727, \n", - " \"reco_jet\": {\"mass\": {\"average\": 11.2, \"std\": 6.8}, \"pt\": {\"average\": 79.1, \"std\": 64.7}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 3.0}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 80.3, \"std\": 65.6}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 3.0}}},\n", - " \"true_2\": {\"percentage\": \"53.31%\", \"event_count\": 5118,\n", - " \"reco_jet\": {\"mass\": {\"average\": 10.8, \"std\": 6.5}, \"pt\": {\"average\": 75.9, \"std\": 65.5}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.9}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 77.5, \"std\": 65.9}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.9}}}\n", + " \"true_1\": {\n", + " \"percentage\": \"38.82%\",\n", + " \"event_count\": 3727,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 11.2, \"std\": 6.8},\n", + " \"pt\": {\"average\": 79.1, \"std\": 64.7},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 3.0},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 80.3, \"std\": 65.6},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 3.0},\n", + " },\n", + " },\n", + " \"true_2\": {\n", + " \"percentage\": \"53.31%\",\n", + " \"event_count\": 5118,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 10.8, \"std\": 6.5},\n", + " \"pt\": {\"average\": 75.9, \"std\": 65.5},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.9},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 77.5, \"std\": 65.9},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.9},\n", + " },\n", + " },\n", " },\n", " \"fatjet_lepton_ak4_veto\": {\n", " \"true_0\": {\"percentage\": \"19.08%\", \"event_count\": 1832, \"reco_jet\": None, \"gen_jet\": None},\n", - " \"true_1\": {\"percentage\": \"50.12%\", \"event_count\": 4812, \n", - " \"reco_jet\": {\"mass\": {\"average\": 11.0, \"std\": 4.6}, \"pt\": {\"average\": 73.3, \"std\": 37.1}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.9}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 73.9, \"std\": 38.1}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.9}}},\n", - " \"true_2\": {\"percentage\": \"30.36%\", \"event_count\": 2915,\n", - " \"reco_jet\": {\"mass\": {\"average\": 10.4, \"std\": 4.1}, \"pt\": {\"average\": 68.9, \"std\": 33.4}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.9}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 69.3, \"std\": 34.3}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.9}}}\n", + " \"true_1\": {\n", + " \"percentage\": \"50.12%\",\n", + " \"event_count\": 4812,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 11.0, \"std\": 4.6},\n", + " \"pt\": {\"average\": 73.3, \"std\": 37.1},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.9},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 73.9, \"std\": 38.1},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.9},\n", + " },\n", + " },\n", + " \"true_2\": {\n", + " \"percentage\": \"30.36%\",\n", + " \"event_count\": 2915,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 10.4, \"std\": 4.1},\n", + " \"pt\": {\"average\": 68.9, \"std\": 33.4},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.9},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 69.3, \"std\": 34.3},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.9},\n", + " },\n", + " },\n", " },\n", " \"random_sorted\": {\n", " \"true_0\": {\"percentage\": \"69.83%\", \"event_count\": 6704, \"reco_jet\": None, \"gen_jet\": None},\n", - " \"true_1\": {\"percentage\": \"28.07%\", \"event_count\": 2695,\n", - " \"reco_jet\": {\"mass\": {\"average\": 10.9, \"std\": 4.8}, \"pt\": {\"average\": 73.8, \"std\": 38.9}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.8}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 74.7, \"std\": 40.7}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.8}}},\n", - " \"true_2\": {\"percentage\": \"2.09%\", \"event_count\": 201,\n", - " \"reco_jet\": {\"mass\": {\"average\": 11.3, \"std\": 4.9}, \"pt\": {\"average\": 78.5, \"std\": 44.4}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.1, \"std\": 2.9}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 79.6, \"std\": 49.6}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.1, \"std\": 2.9}}}\n", + " \"true_1\": {\n", + " \"percentage\": \"28.07%\",\n", + " \"event_count\": 2695,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 10.9, \"std\": 4.8},\n", + " \"pt\": {\"average\": 73.8, \"std\": 38.9},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.8},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 74.7, \"std\": 40.7},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.8},\n", + " },\n", + " },\n", + " \"true_2\": {\n", + " \"percentage\": \"2.09%\",\n", + " \"event_count\": 201,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 11.3, \"std\": 4.9},\n", + " \"pt\": {\"average\": 78.5, \"std\": 44.4},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.1, \"std\": 2.9},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 79.6, \"std\": 49.6},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.1, \"std\": 2.9},\n", + " },\n", + " },\n", " },\n", " \"pt_sorted\": {\n", " \"true_0\": {\"percentage\": \"42.99%\", \"event_count\": 4127, \"reco_jet\": None, \"gen_jet\": None},\n", - " \"true_1\": {\"percentage\": \"51.66%\", \"event_count\": 4959,\n", - " \"reco_jet\": {\"mass\": {\"average\": 12.7, \"std\": 4.5}, \"pt\": {\"average\": 89.3, \"std\": 34.4}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.8}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 87.9, \"std\": 36.4}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.8}}},\n", - " \"true_2\": {\"percentage\": \"5.35%\", \"event_count\": 514,\n", - " \"reco_jet\": {\"mass\": {\"average\": 12.7, \"std\": 4.5}, \"pt\": {\"average\": 89.4, \"std\": 36.2}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.8}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 87.5, \"std\": 40.0}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 2.8}}}\n", + " \"true_1\": {\n", + " \"percentage\": \"51.66%\",\n", + " \"event_count\": 4959,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 12.7, \"std\": 4.5},\n", + " \"pt\": {\"average\": 89.3, \"std\": 34.4},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.8},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 87.9, \"std\": 36.4},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.8},\n", + " },\n", + " },\n", + " \"true_2\": {\n", + " \"percentage\": \"5.35%\",\n", + " \"event_count\": 514,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 12.7, \"std\": 4.5},\n", + " \"pt\": {\"average\": 89.4, \"std\": 36.2},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.8},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 87.5, \"std\": 40.0},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 2.8},\n", + " },\n", + " },\n", " },\n", " \"mass_sorted\": {\n", " \"true_0\": {\"percentage\": \"23.02%\", \"event_count\": 2210, \"reco_jet\": None, \"gen_jet\": None},\n", - " \"true_1\": {\"percentage\": \"50.26%\", \"event_count\": 4825,\n", - " \"reco_jet\": {\"mass\": {\"average\": 11.1, \"std\": 4.6}, \"pt\": {\"average\": 74.2, \"std\": 36.5}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 3.0}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 74.7, \"std\": 37.4}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 3.0}}},\n", - " \"true_2\": {\"percentage\": \"23.65%\", \"event_count\": 2270,\n", - " \"reco_jet\": {\"mass\": {\"average\": 10.6, \"std\": 4.2}, \"pt\": {\"average\": 70.4, \"std\": 33.7}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 3.0}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 69.9, \"std\": 34.6}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 3.0}}}\n", + " \"true_1\": {\n", + " \"percentage\": \"50.26%\",\n", + " \"event_count\": 4825,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 11.1, \"std\": 4.6},\n", + " \"pt\": {\"average\": 74.2, \"std\": 36.5},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 3.0},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 74.7, \"std\": 37.4},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 3.0},\n", + " },\n", + " },\n", + " \"true_2\": {\n", + " \"percentage\": \"23.65%\",\n", + " \"event_count\": 2270,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 10.6, \"std\": 4.2},\n", + " \"pt\": {\"average\": 70.4, \"std\": 33.7},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 3.0},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 69.9, \"std\": 34.6},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 3.0},\n", + " },\n", + " },\n", " },\n", " \"eta_sorted\": {\n", " \"true_0\": {\"percentage\": \"24.30%\", \"event_count\": 2333, \"reco_jet\": None, \"gen_jet\": None},\n", - " \"true_1\": {\"percentage\": \"49.50%\", \"event_count\": 4752,\n", - " \"reco_jet\": {\"mass\": {\"average\": 10.8, \"std\": 4.5}, \"pt\": {\"average\": 71.5, \"std\": 36.0}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": -0.1, \"std\": 3.0}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 72.1, \"std\": 36.9}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": -0.1, \"std\": 3.0}}},\n", - " \"true_2\": {\"percentage\": \"23.12%\", \"event_count\": 2220,\n", - " \"reco_jet\": {\"mass\": {\"average\": 10.3, \"std\": 4.1}, \"pt\": {\"average\": 67.6, \"std\": 32.7}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 3.1}},\n", - " \"gen_jet\": {\"mass\": {\"average\": 0.0, \"std\": 0.0}, \"pt\": {\"average\": 67.6, \"std\": 33.3}, \"phi\": {\"average\": 0.0, \"std\": 1.8}, \"eta\": {\"average\": 0.0, \"std\": 3.0}}}\n", + " \"true_1\": {\n", + " \"percentage\": \"49.50%\",\n", + " \"event_count\": 4752,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 10.8, \"std\": 4.5},\n", + " \"pt\": {\"average\": 71.5, \"std\": 36.0},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": -0.1, \"std\": 3.0},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 72.1, \"std\": 36.9},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": -0.1, \"std\": 3.0},\n", + " },\n", + " },\n", + " \"true_2\": {\n", + " \"percentage\": \"23.12%\",\n", + " \"event_count\": 2220,\n", + " \"reco_jet\": {\n", + " \"mass\": {\"average\": 10.3, \"std\": 4.1},\n", + " \"pt\": {\"average\": 67.6, \"std\": 32.7},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 3.1},\n", + " },\n", + " \"gen_jet\": {\n", + " \"mass\": {\"average\": 0.0, \"std\": 0.0},\n", + " \"pt\": {\"average\": 67.6, \"std\": 33.3},\n", + " \"phi\": {\"average\": 0.0, \"std\": 1.8},\n", + " \"eta\": {\"average\": 0.0, \"std\": 3.0},\n", + " },\n", + " },\n", " },\n", - "}\n" + "}" ] }, { @@ -4219,7 +4520,7 @@ " \"sorttype\": sorttype,\n", " \"true_val\": true_val,\n", " \"percentage\": truedata[\"percentage\"],\n", - " \"event_count\": truedata[\"event_count\"]\n", + " \"event_count\": truedata[\"event_count\"],\n", " }\n", " for jettype in [\"reco_jet\", \"gen_jet\"]:\n", " if truedata[jettype] is not None:\n", diff --git a/src/HHbbVV/VBF_binder/plot1dhistscuts.ipynb b/src/HHbbVV/VBF_binder/plot1dhistscuts.ipynb index 446b327a..ef3d6d98 100644 --- a/src/HHbbVV/VBF_binder/plot1dhistscuts.ipynb +++ b/src/HHbbVV/VBF_binder/plot1dhistscuts.ipynb @@ -8,7 +8,8 @@ "source": [ "import sys\n", "import os\n", - "sys.path.append('/home/users/annava/projects/HHbbVV/src/HHbbVV/postprocessing/')\n", + "\n", + "sys.path.append(\"/home/users/annava/projects/HHbbVV/src/HHbbVV/postprocessing/\")\n", "from collections import OrderedDict\n", "import utils\n", "import postprocessing\n", @@ -105,8 +106,6 @@ } ], "source": [ - "\n", - "\n", "nonres_samples = OrderedDict(\n", " [\n", " (\"HHbbVV\", \"GluGluToHHTobbVV_node_cHHH1\"),\n", @@ -164,8 +163,12 @@ "\n", "\n", "single_dataset_test = {\"VBFHHbbVV\": \"VBF_HHTobbVV_CV_1_C2V_1_C3_1\"}\n", - "events_dict = utils.load_samples( data_dir=\"/home/users/annava/projects/HHbbVV/src/HHbbVV/VBF_binder/data/\", samples=samples2 ,year='2017')\n", - "postprocessing.apply_weights(events_dict,year= '2017',cutflow= None,qcd_sf= False)" + "events_dict = utils.load_samples(\n", + " data_dir=\"/home/users/annava/projects/HHbbVV/src/HHbbVV/VBF_binder/data/\",\n", + " samples=samples2,\n", + " year=\"2017\",\n", + ")\n", + "postprocessing.apply_weights(events_dict, year=\"2017\", cutflow=None, qcd_sf=False)" ] }, { @@ -195,9 +198,9 @@ } ], "source": [ - "print(events_dict['QCD'].columns.tolist())\n", + "print(events_dict[\"QCD\"].columns.tolist())\n", "print(events_dict.keys())\n", - "print(events_dict['QCD'][('ak8FatJetParticleNetMass', 0)])" + "print(events_dict[\"QCD\"][(\"ak8FatJetParticleNetMass\", 0)])" ] }, { @@ -209,27 +212,27 @@ "# Computes appropriate scale factor for each signal file\n", "def compute_individual_scale_factors(events_dict, S_list):\n", " total_background_weight = 0\n", - " \n", + "\n", " # Calculate the total background weight\n", " for dataset, dataframe in events_dict.items():\n", " if dataset not in S_list:\n", - " total_weight = np.sum(dataframe['finalWeight'])\n", + " total_weight = np.sum(dataframe[\"finalWeight\"])\n", " total_background_weight += total_weight\n", - " \n", + "\n", " # Calculate individual scale factors for each signal dataset\n", " scale_factors = []\n", " for dataset in S_list:\n", - " total_signal_weight = np.sum(events_dict[dataset]['finalWeight'])\n", - " \n", + " total_signal_weight = np.sum(events_dict[dataset][\"finalWeight\"])\n", + "\n", " if total_signal_weight == 0:\n", " scale_factor = 0\n", " else:\n", - " scale_factor = total_background_weight / total_signal_weight/10\n", + " scale_factor = total_background_weight / total_signal_weight / 10\n", " n = np.floor(np.log10(scale_factor)) # Find the order of magnitude\n", - " scale_factor = 10 ** (n + 1) if scale_factor >= 10 ** n * 5 else 10 ** n\n", - " \n", + " scale_factor = 10 ** (n + 1) if scale_factor >= 10**n * 5 else 10**n\n", + "\n", " scale_factors.append(scale_factor)\n", - " \n", + "\n", " return scale_factors" ] }, @@ -287,62 +290,79 @@ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", - "def plot_cut_histogram(combined_data, S_list, signal_sf, xlabel, column_name, xrange,title = 'Histogram MC signal and background',cuts = {}):\n", - " jj_mass_cut = cuts.get('jj_mass_cut', 500) \n", - " jj_eta_cut = cuts.get('jj_eta_cut', 4.0) \n", - " j_eta_cut = cuts.get('j_eta_cut', 1.5) \n", - " Hbb_Txbb_cut = cuts.get('Hbb_Txbb_cut', 0.8) \n", - " HVV_Th4q_cut = cuts.get('HVV_Th4q_cut', 0.0) \n", - " \n", - " \n", + "\n", + "def plot_cut_histogram(\n", + " combined_data,\n", + " S_list,\n", + " signal_sf,\n", + " xlabel,\n", + " column_name,\n", + " xrange,\n", + " title=\"Histogram MC signal and background\",\n", + " cuts={},\n", + "):\n", + " jj_mass_cut = cuts.get(\"jj_mass_cut\", 500)\n", + " jj_eta_cut = cuts.get(\"jj_eta_cut\", 4.0)\n", + " j_eta_cut = cuts.get(\"j_eta_cut\", 1.5)\n", + " Hbb_Txbb_cut = cuts.get(\"Hbb_Txbb_cut\", 0.8)\n", + " HVV_Th4q_cut = cuts.get(\"HVV_Th4q_cut\", 0.0)\n", + "\n", " signal_data = []\n", " signal_weights = []\n", " background_data = []\n", " background_weights = []\n", " background_labels = []\n", - " \n", + "\n", " for dataset, dataframe in combined_data.items():\n", " # Computing variables of interest and masks based on them\n", - " Hbb_mask = dataframe[(\"ak8FatJetParticleNetMD_Txbb\", 0)] > dataframe[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + " Hbb_mask = (\n", + " dataframe[(\"ak8FatJetParticleNetMD_Txbb\", 0)]\n", + " > dataframe[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + " )\n", " Hbb_Txbb_values = np.where(\n", - " Hbb_mask, dataframe[(\"ak8FatJetParticleNetMD_Txbb\", 0)], dataframe[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + " Hbb_mask,\n", + " dataframe[(\"ak8FatJetParticleNetMD_Txbb\", 0)],\n", + " dataframe[(\"ak8FatJetParticleNetMD_Txbb\", 1)],\n", " )\n", " HVV_Th4q_values = np.where(\n", - " ~Hbb_mask, dataframe[(\"ak8FatJetParticleNet_Th4q\", 0)], dataframe[(\"ak8FatJetParticleNet_Th4q\", 1)]\n", + " ~Hbb_mask,\n", + " dataframe[(\"ak8FatJetParticleNet_Th4q\", 0)],\n", + " dataframe[(\"ak8FatJetParticleNet_Th4q\", 1)],\n", " )\n", - " \n", - " \n", - " mask = ((dataframe [('nGoodMuons', 0)] == 0) & \n", - " (dataframe[('nGoodElectrons', 0)] == 0) & \n", - " (dataframe[('nGoodVBFJets', 0)] >= 2) & \n", - " (dataframe[('nGoodJets', 0)] == 0) &\n", - " (dataframe[('vbf_Mass_jj', 0)] > jj_mass_cut) &\n", - " (dataframe[('vbf_dEta_jj', 0)] > jj_eta_cut) &\n", - " (np.abs(dataframe[('vbfeta', 0)]) > j_eta_cut) &\n", - " (np.abs(dataframe[('vbfeta', 1)]) > j_eta_cut) &\n", - " (Hbb_Txbb_values >= Hbb_Txbb_cut) &\n", - " (HVV_Th4q_values >= HVV_Th4q_cut) \n", - " )\n", - " #column_data = df[('bbFatJetPtOverDijetPt', 0)].values * df[('DijetPt', 0)].values\n", + "\n", + " mask = (\n", + " (dataframe[(\"nGoodMuons\", 0)] == 0)\n", + " & (dataframe[(\"nGoodElectrons\", 0)] == 0)\n", + " & (dataframe[(\"nGoodVBFJets\", 0)] >= 2)\n", + " & (dataframe[(\"nGoodJets\", 0)] == 0)\n", + " & (dataframe[(\"vbf_Mass_jj\", 0)] > jj_mass_cut)\n", + " & (dataframe[(\"vbf_dEta_jj\", 0)] > jj_eta_cut)\n", + " & (np.abs(dataframe[(\"vbfeta\", 0)]) > j_eta_cut)\n", + " & (np.abs(dataframe[(\"vbfeta\", 1)]) > j_eta_cut)\n", + " & (Hbb_Txbb_values >= Hbb_Txbb_cut)\n", + " & (HVV_Th4q_values >= HVV_Th4q_cut)\n", + " )\n", + " # column_data = df[('bbFatJetPtOverDijetPt', 0)].values * df[('DijetPt', 0)].values\n", " # Mass: ('ak8FatJetMass', 0), ('ak8FatJetMass', 1) or ('ak8FatJetParticleNetMass', 0), ('ak8FatJetParticleNetMass', 1)\n", " df = dataframe[mask]\n", - " if column_name == 'mass':\n", + " if column_name == \"mass\":\n", " column_data = np.where(\n", - " Hbb_mask, dataframe[('ak8FatJetParticleNetMass', 0)], dataframe[('ak8FatJetParticleNetMass', 1)] \n", + " Hbb_mask,\n", + " dataframe[(\"ak8FatJetParticleNetMass\", 0)],\n", + " dataframe[(\"ak8FatJetParticleNetMass\", 1)],\n", " )\n", " column_data = column_data[mask]\n", - " elif column_name == 'Hbb_Txbb':\n", + " elif column_name == \"Hbb_Txbb\":\n", " column_data = Hbb_Txbb_values\n", " column_data = column_data[mask]\n", - " elif column_name == 'HVV_Th4q':\n", - " column_data = HVV_Th4q_values\n", + " elif column_name == \"HVV_Th4q\":\n", + " column_data = HVV_Th4q_values\n", " column_data = column_data[mask]\n", " else:\n", " column_data = df[column_name].values\n", - " \n", - " \n", - " total_weight = df['finalWeight']\n", - " \n", + "\n", + " total_weight = df[\"finalWeight\"]\n", + "\n", " if dataset in S_list:\n", " i = S_list.index(dataset)\n", " column_weights = signal_sf[i] * total_weight\n", @@ -353,80 +373,104 @@ " background_data.append(column_data)\n", " background_weights.append(column_weights)\n", " background_labels.append(dataset)\n", - " \n", - " fig, ax = plt.subplots(2, 1, figsize=(12, 10), gridspec_kw={'height_ratios': [3, 1]}, sharex=True)\n", + "\n", + " fig, ax = plt.subplots(\n", + " 2, 1, figsize=(12, 10), gridspec_kw={\"height_ratios\": [3, 1]}, sharex=True\n", + " )\n", " bins = np.linspace(xrange[0], xrange[1], 100)\n", - " \n", - " ax[0].hist(background_data, bins=bins, weights=background_weights, stacked=True, label=background_labels)\n", - " \n", "\n", - " for data, weight, label,sf in zip(signal_data, signal_weights, S_list,signal_sf):\n", + " ax[0].hist(\n", + " background_data,\n", + " bins=bins,\n", + " weights=background_weights,\n", + " stacked=True,\n", + " label=background_labels,\n", + " )\n", + "\n", + " for data, weight, label, sf in zip(signal_data, signal_weights, S_list, signal_sf):\n", " if np.sum(weight) > 0:\n", " mean_value = np.average(data, weights=weight)\n", - " ax[0].axvline(mean_value, color='grey', linestyle='--', linewidth=1)\n", - " ax[0].hist(data, bins=bins, weights=weight, histtype='step', label=f\"{sf}x {label}\", linewidth=1.5)\n", - " \n", - " ax[0].set_ylabel('Expected # events')\n", + " ax[0].axvline(mean_value, color=\"grey\", linestyle=\"--\", linewidth=1)\n", + " ax[0].hist(\n", + " data,\n", + " bins=bins,\n", + " weights=weight,\n", + " histtype=\"step\",\n", + " label=f\"{sf}x {label}\",\n", + " linewidth=1.5,\n", + " )\n", + "\n", + " ax[0].set_ylabel(\"Expected # events\")\n", " ax[0].set_title(title)\n", " ax[0].legend()\n", - " \n", + "\n", " # Calculating and plotting significance for each signal dataset\n", - " for data, weight, label,sf in zip(signal_data, signal_weights, S_list,signal_sf):\n", + " for data, weight, label, sf in zip(signal_data, signal_weights, S_list, signal_sf):\n", " hist_signal, _ = np.histogram(data, bins=bins, weights=weight)\n", - " hist_background, bins = np.histogram(np.concatenate(background_data), bins=bins, weights=np.concatenate(background_weights))\n", + " hist_background, bins = np.histogram(\n", + " np.concatenate(background_data), bins=bins, weights=np.concatenate(background_weights)\n", + " )\n", " significance = np.where(hist_background > 0, hist_signal / np.sqrt(hist_background) / sf, 0)\n", " # Calculate overall significance and round to two significant figures\n", - " overall_significance = np.sum(weight) / sf / np.sqrt(np.sum(np.concatenate(background_weights)))\n", + " overall_significance = (\n", + " np.sum(weight) / sf / np.sqrt(np.sum(np.concatenate(background_weights)))\n", + " )\n", " n = np.floor(np.log10(overall_significance))\n", - " overall_significance = np.round(overall_significance, -int(n-1))\n", - " \n", + " overall_significance = np.round(overall_significance, -int(n - 1))\n", + "\n", " # Convert to scientific notation\n", " overall_significance_sci = \"{:e}\".format(overall_significance)\n", - " ax[1].step(bins[:-1], sf*significance, where='mid', label=f\"Significance {sf}x{label}: {overall_significance_sci}\")\n", - "\n", + " ax[1].step(\n", + " bins[:-1],\n", + " sf * significance,\n", + " where=\"mid\",\n", + " label=f\"Significance {sf}x{label}: {overall_significance_sci}\",\n", + " )\n", "\n", " ax[1].set_xlabel(xlabel)\n", - " ax[1].set_ylabel('$S / \\sqrt{B}$')\n", - " ax[1].set_title('Significance')\n", + " ax[1].set_ylabel(\"$S / \\sqrt{B}$\")\n", + " ax[1].set_title(\"Significance\")\n", " ax[1].legend()\n", - " \n", + "\n", " ax[0].set_xlim(xrange)\n", " ax[1].set_xlim(xrange)\n", - " \n", + "\n", " plt.tight_layout()\n", " plt.show()\n", - " \n", - " \n", - " \n", + "\n", " # Print out the individual significance\n", " significances = []\n", - " for data, weight, label,sf in zip(signal_data, signal_weights, S_list,signal_sf):\n", + " for data, weight, label, sf in zip(signal_data, signal_weights, S_list, signal_sf):\n", " S = np.sum(weight) / sf\n", " B = np.sum(np.concatenate(background_weights))\n", " print(f\"Significance for {label}: {S/np.sqrt(B)} {S} {B}\")\n", - " significances.append(S/np.sqrt(B))\n", + " significances.append(S / np.sqrt(B))\n", "\n", " return significances\n", "\n", "\n", - "S_list=['VBFHHbbVV','qqHH_CV_1_C2V_0_kl_1_HHbbVV','qqHH_CV_1_C2V_2_kl_1_HHbbVV']\n", - "scale_factors = compute_individual_scale_factors(events_dict, S_list) # notice that scale factor is based on pre-cuts (thus we can still compare visually)\n", - "xlabel = 'Hbb ParNet $M$ (GeV)'\n", - "cuts = {\n", - " 'jj_mass_cut': 0,\n", - " 'jj_eta_cut': 3.5,\n", - " 'j_eta_cut': 1,\n", - " 'Hbb_Txbb_cut': 0.95,\n", - " 'HVV_Th4q_cut': 0.9\n", - "}\n", + "S_list = [\"VBFHHbbVV\", \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\", \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\"]\n", + "scale_factors = compute_individual_scale_factors(\n", + " events_dict, S_list\n", + ") # notice that scale factor is based on pre-cuts (thus we can still compare visually)\n", + "xlabel = \"Hbb ParNet $M$ (GeV)\"\n", "cuts = {\n", - " 'jj_mass_cut': 0,\n", - " 'jj_eta_cut': 0,\n", - " 'j_eta_cut': 0,\n", - " 'Hbb_Txbb_cut': 0,\n", - " 'HVV_Th4q_cut': 0\n", + " \"jj_mass_cut\": 0,\n", + " \"jj_eta_cut\": 3.5,\n", + " \"j_eta_cut\": 1,\n", + " \"Hbb_Txbb_cut\": 0.95,\n", + " \"HVV_Th4q_cut\": 0.9,\n", "}\n", - "plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel='HVV_Th4q', column_name='mass',xrange = [0,300],cuts= cuts)\n" + "cuts = {\"jj_mass_cut\": 0, \"jj_eta_cut\": 0, \"j_eta_cut\": 0, \"Hbb_Txbb_cut\": 0, \"HVV_Th4q_cut\": 0}\n", + "plot_cut_histogram(\n", + " events_dict,\n", + " S_list=S_list,\n", + " signal_sf=scale_factors,\n", + " xlabel=\"HVV_Th4q\",\n", + " column_name=\"mass\",\n", + " xrange=[0, 300],\n", + " cuts=cuts,\n", + ")" ] }, { @@ -549,46 +593,81 @@ } ], "source": [ - "S_list=['VBFHHbbVV','qqHH_CV_1_C2V_0_kl_1_HHbbVV','qqHH_CV_1_C2V_2_kl_1_HHbbVV']\n", - "xlabel = 'VBF-Jet $p_T$ (GeV)'\n", - "xlabel = 'Hbb Jet $p_T$ (GeV)'\n", - "xlabel = 'Hbb ParNet $M$ (GeV)'\n", - "col_name = ('vbfpt', 0)\n", - "scale_factors = compute_individual_scale_factors(events_dict, S_list) # notice that scale factor is based on pre-cuts (thus we can still compare visually)\n", + "S_list = [\"VBFHHbbVV\", \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\", \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\"]\n", + "xlabel = \"VBF-Jet $p_T$ (GeV)\"\n", + "xlabel = \"Hbb Jet $p_T$ (GeV)\"\n", + "xlabel = \"Hbb ParNet $M$ (GeV)\"\n", + "col_name = (\"vbfpt\", 0)\n", + "scale_factors = compute_individual_scale_factors(\n", + " events_dict, S_list\n", + ") # notice that scale factor is based on pre-cuts (thus we can still compare visually)\n", "cuts = {\n", - " 'jj_mass_cut': 500,\n", - " 'jj_eta_cut': 4.0,\n", - " 'j_eta_cut': 1.5,\n", - " 'Hbb_Txbb_cut': 0.95,\n", - " 'HVV_Th4q_cut': 0.6\n", + " \"jj_mass_cut\": 500,\n", + " \"jj_eta_cut\": 4.0,\n", + " \"j_eta_cut\": 1.5,\n", + " \"Hbb_Txbb_cut\": 0.95,\n", + " \"HVV_Th4q_cut\": 0.6,\n", "}\n", - "#plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel=xlabel, column_name=col_name,xrange = [0,300],cuts= cuts)\n", + "# plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel=xlabel, column_name=col_name,xrange = [0,300],cuts= cuts)\n", "cuts = {\n", - " 'jj_mass_cut': 500,\n", - " 'jj_eta_cut': 6.0,\n", - " 'j_eta_cut': 2.5,\n", - " 'Hbb_Txbb_cut': 0.95,\n", - " 'HVV_Th4q_cut': 0.6\n", - "}\n", - "\n", - "#plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel=xlabel, column_name=col_name,xrange = [0,300],cuts= cuts)\n", - "cuts = {\n", - " 'jj_mass_cut': 0,\n", - " 'jj_eta_cut': 0,\n", - " 'j_eta_cut': 0,\n", - " 'Hbb_Txbb_cut': 0.,\n", - " 'HVV_Th4q_cut': 0.\n", + " \"jj_mass_cut\": 500,\n", + " \"jj_eta_cut\": 6.0,\n", + " \"j_eta_cut\": 2.5,\n", + " \"Hbb_Txbb_cut\": 0.95,\n", + " \"HVV_Th4q_cut\": 0.6,\n", "}\n", "\n", + "# plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel=xlabel, column_name=col_name,xrange = [0,300],cuts= cuts)\n", + "cuts = {\"jj_mass_cut\": 0, \"jj_eta_cut\": 0, \"j_eta_cut\": 0, \"Hbb_Txbb_cut\": 0.0, \"HVV_Th4q_cut\": 0.0}\n", "\n", "\n", - "# plot different cutting variables distributions before cuts to find reasonable values \n", + "# plot different cutting variables distributions before cuts to find reasonable values\n", "# (jj_mass_cut = 0 (+ 500) j_eta_cut = 1 (pm 1) jj_eta_cut = 3.5 (pm 1.5) Hbb_Txbb = 0.95 (+ 0.045) Hbb_Txbb = 0.9 (pm 0.09 ) # test out a grid of these in condor_outputs_tests notebook\n", - "plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel='(vbf_Mass_jj, 0)', column_name=('vbf_Mass_jj', 0),xrange = [0,1000],cuts= cuts)\n", - "plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel='(vbfeta, 0)', column_name=('vbfeta', 0),xrange = [-4.5,4.5],cuts= cuts)\n", - "plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel='(vbf_dEta_jj, 0)', column_name=('vbf_dEta_jj', 0),xrange = [0,8],cuts= cuts)\n", - "plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel='Hbb_Txbb', column_name='Hbb_Txbb',xrange = [0.8,1],cuts= cuts)\n", - "plot_cut_histogram(events_dict, S_list=S_list, signal_sf=scale_factors, xlabel='HVV_Th4q', column_name='HVV_Th4q',xrange = [0.6,1],cuts= cuts)" + "plot_cut_histogram(\n", + " events_dict,\n", + " S_list=S_list,\n", + " signal_sf=scale_factors,\n", + " xlabel=\"(vbf_Mass_jj, 0)\",\n", + " column_name=(\"vbf_Mass_jj\", 0),\n", + " xrange=[0, 1000],\n", + " cuts=cuts,\n", + ")\n", + "plot_cut_histogram(\n", + " events_dict,\n", + " S_list=S_list,\n", + " signal_sf=scale_factors,\n", + " xlabel=\"(vbfeta, 0)\",\n", + " column_name=(\"vbfeta\", 0),\n", + " xrange=[-4.5, 4.5],\n", + " cuts=cuts,\n", + ")\n", + "plot_cut_histogram(\n", + " events_dict,\n", + " S_list=S_list,\n", + " signal_sf=scale_factors,\n", + " xlabel=\"(vbf_dEta_jj, 0)\",\n", + " column_name=(\"vbf_dEta_jj\", 0),\n", + " xrange=[0, 8],\n", + " cuts=cuts,\n", + ")\n", + "plot_cut_histogram(\n", + " events_dict,\n", + " S_list=S_list,\n", + " signal_sf=scale_factors,\n", + " xlabel=\"Hbb_Txbb\",\n", + " column_name=\"Hbb_Txbb\",\n", + " xrange=[0.8, 1],\n", + " cuts=cuts,\n", + ")\n", + "plot_cut_histogram(\n", + " events_dict,\n", + " S_list=S_list,\n", + " signal_sf=scale_factors,\n", + " xlabel=\"HVV_Th4q\",\n", + " column_name=\"HVV_Th4q\",\n", + " xrange=[0.6, 1],\n", + " cuts=cuts,\n", + ")" ] }, { @@ -604,19 +683,19 @@ "# def plot_cut_histogram(combined_data, S_list, signal_sf, xlabel, column_name, xrange, title='Histogram MC signal and background', cuts={}):\n", "\n", "# Dummy combined_data, S_list, signal_sf, xlabel, column_name, and xrange for the example\n", - "S_list=['VBFHHbbVV','qqHH_CV_1_C2V_0_kl_1_HHbbVV','qqHH_CV_1_C2V_2_kl_1_HHbbVV']\n", - "xlabel = 'Hbb ParNet $M$ (GeV)'\n", - "column_name = ('vbfpt', 0)\n", - "signal_sf = compute_individual_scale_factors(events_dict, S_list) #\n", + "S_list = [\"VBFHHbbVV\", \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\", \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\"]\n", + "xlabel = \"Hbb ParNet $M$ (GeV)\"\n", + "column_name = (\"vbfpt\", 0)\n", + "signal_sf = compute_individual_scale_factors(events_dict, S_list) #\n", "xrange = [0, 300] # Replace with your actual x-range\n", "\n", "# Define the cut parameters and their linspace ranges\n", "cut_parameters = {\n", - " 'jj_mass_cut': np.linspace(0, 600, 5),\n", - " 'jj_eta_cut': np.linspace(0, 5.0, 5),\n", - " 'j_eta_cut': np.linspace(0, 4.0, 5),\n", - " 'Hbb_Txbb_cut': np.linspace(0, 0.99, 5),\n", - " 'HVV_Th4q_cut': np.linspace(0, 0.99, 5)\n", + " \"jj_mass_cut\": np.linspace(0, 600, 5),\n", + " \"jj_eta_cut\": np.linspace(0, 5.0, 5),\n", + " \"j_eta_cut\": np.linspace(0, 4.0, 5),\n", + " \"Hbb_Txbb_cut\": np.linspace(0, 0.99, 5),\n", + " \"HVV_Th4q_cut\": np.linspace(0, 0.99, 5),\n", "}\n", "\n", "# Loop through each cut parameter and its linspace range\n", @@ -625,10 +704,10 @@ " # Set only the current cut parameter to the linspace value, others to zero\n", " cuts = {k: 0 for k in cut_parameters.keys()}\n", " cuts[param] = value\n", - " \n", + "\n", " # Call the plot function\n", " title = f\"Histogram MC Signal and Background, Varying {param} at {value}\"\n", - " plot_cut_histogram(events_dict, S_list, signal_sf, xlabel, column_name, xrange, title, cuts)\n" + " plot_cut_histogram(events_dict, S_list, signal_sf, xlabel, column_name, xrange, title, cuts)" ] }, { @@ -639,11 +718,11 @@ "source": [ "# Define the cut parameters and their linspace ranges\n", "cut_parameters = {\n", - " 'jj_mass_cut': np.linspace(0, 1000, 20),\n", - " 'jj_eta_cut': np.linspace(0, 8.0, 20),\n", - " 'j_eta_cut': np.linspace(0, 4.0, 10),\n", - " 'Hbb_Txbb_cut': np.linspace(0, .99, 20)**0.5,\n", - " 'HVV_Th4q_cut': np.linspace(0, 0.99, 20) ** 0.5\n", + " \"jj_mass_cut\": np.linspace(0, 1000, 20),\n", + " \"jj_eta_cut\": np.linspace(0, 8.0, 20),\n", + " \"j_eta_cut\": np.linspace(0, 4.0, 10),\n", + " \"Hbb_Txbb_cut\": np.linspace(0, 0.99, 20) ** 0.5,\n", + " \"HVV_Th4q_cut\": np.linspace(0, 0.99, 20) ** 0.5,\n", "}\n", "\n", "# To store the significances\n", @@ -652,23 +731,24 @@ "# Loop through each cut parameter and its linspace range\n", "for param, values in cut_parameters.items():\n", " all_significances[param] = {}\n", - " \n", + "\n", " for signal in S_list:\n", " all_significances[param][signal] = []\n", - " \n", + "\n", " for value in values:\n", " # Set only the current cut parameter to the linspace value, others to zero\n", " cuts = {k: 0 for k in cut_parameters.keys()}\n", " cuts[param] = value\n", - " \n", + "\n", " # Call the plot function and get significances\n", " title = f\"Histogram MC Signal and Background, Varying {param} at {value}\"\n", - " significances = plot_cut_histogram(events_dict, S_list, signal_sf, xlabel, column_name, xrange, title, cuts)\n", - " \n", + " significances = plot_cut_histogram(\n", + " events_dict, S_list, signal_sf, xlabel, column_name, xrange, title, cuts\n", + " )\n", + "\n", " # Store the significances\n", " for signal, sig in zip(S_list, significances):\n", - " all_significances[param][signal].append(sig)\n", - "\n" + " all_significances[param][signal].append(sig)" ] }, { @@ -731,7 +811,8 @@ "# Function to round to the nearest power of 10\n", "def round_to_nearest_power_of_10(x):\n", " n = np.floor(np.log10(x)) # Find the order of magnitude\n", - " return 10 ** (n + 1) if x >= 10 ** n * 5 else 10 ** n\n", + " return 10 ** (n + 1) if x >= 10**n * 5 else 10**n\n", + "\n", "\n", "# Assuming the original scale factors are in signal_sf\n", "min_sf = min(signal_sf)\n", @@ -744,21 +825,24 @@ " # Scale the significances using the appropriate adjusted scale factor\n", " sf = adjusted_sf[i] # Using the adjusted scale factors\n", " scaled_sig_values = [sig * sf for sig in sig_values]\n", - " \n", + "\n", " # Check to make sure the dimensions match before plotting\n", " if len(cut_parameters[param]) != len(scaled_sig_values):\n", " print(f\"Dimension mismatch for {signal}. Skipping.\")\n", " continue\n", - " \n", - " plt.plot(cut_parameters[param], scaled_sig_values, label=f'Scaled significance of {signal} (Scale factor: {sf})')\n", - " \n", + "\n", + " plt.plot(\n", + " cut_parameters[param],\n", + " scaled_sig_values,\n", + " label=f\"Scaled significance of {signal} (Scale factor: {sf})\",\n", + " )\n", + "\n", " plt.xlabel(param)\n", - " plt.ylabel('Scaled Significance')\n", - " plt.title(f'Scaled Significance vs {param}')\n", + " plt.ylabel(\"Scaled Significance\")\n", + " plt.title(f\"Scaled Significance vs {param}\")\n", " plt.legend()\n", " plt.grid(True)\n", - " plt.show()\n", - "\n" + " plt.show()" ] } ], diff --git a/src/HHbbVV/postprocessing/PostProcessVBF.ipynb b/src/HHbbVV/postprocessing/PostProcessVBF.ipynb index 80da5519..643cca60 100644 --- a/src/HHbbVV/postprocessing/PostProcessVBF.ipynb +++ b/src/HHbbVV/postprocessing/PostProcessVBF.ipynb @@ -156,8 +156,15 @@ " # ShapeVar(var=\"DijetPt\", label=r\"$p_T^{jj}$ (GeV)\", bins=[30, 0, 750]),\n", " # ShapeVar(var=\"DijetMass\", label=r\"$m^{jj}$ (GeV)\", bins=[30, 600, 4000]),\n", " # ShapeVar(var=\"bbFatJetEta\", label=r\"$\\eta^{bb}$\", bins=[30, -2.4, 2.4]),\n", - " ShapeVar(var=\"bbFatJetPt\", label=r\"$p^{bb}_T$ (GeV)\", bins=[30, 300, 1500], significance_dir=\"right\"),\n", - " ShapeVar(var=\"bbFatJetParticleNetMass\", label=r\"$m^{bb}_{reg}$ (GeV)\", bins=[20, 50, 250], significance_dir=\"bin\"),\n", + " ShapeVar(\n", + " var=\"bbFatJetPt\", label=r\"$p^{bb}_T$ (GeV)\", bins=[30, 300, 1500], significance_dir=\"right\"\n", + " ),\n", + " ShapeVar(\n", + " var=\"bbFatJetParticleNetMass\",\n", + " label=r\"$m^{bb}_{reg}$ (GeV)\",\n", + " bins=[20, 50, 250],\n", + " significance_dir=\"bin\",\n", + " ),\n", " # ShapeVar(var=\"bbFatJetMsd\", label=r\"$m^{bb}_{msd}$ (GeV)\", bins=[50, 0, 300]),\n", " # ShapeVar(var=\"bbFatJetParticleNetMD_Txbb\", label=r\"$T^{bb}_{Xbb}$\", bins=[50, 0.8, 1]),\n", " # ShapeVar(var=\"VVFatJetEta\", label=r\"$\\eta^{VV}$\", bins=[30, -2.4, 2.4]),\n", diff --git a/src/HHbbVV/postprocessing/PostProcessVBFtesting.ipynb b/src/HHbbVV/postprocessing/PostProcessVBFtesting.ipynb index c0bb7eed..3ccad1ab 100644 --- a/src/HHbbVV/postprocessing/PostProcessVBFtesting.ipynb +++ b/src/HHbbVV/postprocessing/PostProcessVBFtesting.ipynb @@ -78,13 +78,13 @@ "source": [ "MAIN_DIR = \"../../../\"\n", "samples_dir = f\"{MAIN_DIR}/../data/skimmer/anava/Test\"\n", - "samples_dir = '/home/users/annava/projects/HHbbVV/src/HHbbVV/VBF_binder/data'\n", + "samples_dir = \"/home/users/annava/projects/HHbbVV/src/HHbbVV/VBF_binder/data\"\n", "# samples_dir = \"/eos/uscms/store/user/anava/bbVV/skimmer/Test/\"\n", "year = \"2017\"\n", "\n", "date = \"23Sep2\"\n", "plot_dir = f\"../../../plots/PostProcessing/{date}/\"\n", - "plot_dir = '/home/users/annava/projects/HHbbVV/src/HHbbVV/VBF_binder/vbf_tests_output'\n", + "plot_dir = \"/home/users/annava/projects/HHbbVV/src/HHbbVV/VBF_binder/vbf_tests_output\"\n", "templates_dir = f\"templates/{date}\"\n", "_ = os.system(f\"mkdir -p {plot_dir}\")\n", "_ = os.system(f\"mkdir -p {plot_dir}/cutflows/\")\n", @@ -166,8 +166,15 @@ " # ShapeVar(var=\"DijetPt\", label=r\"$p_T^{jj}$ (GeV)\", bins=[30, 0, 750]),\n", " ShapeVar(var=\"DijetMass\", label=r\"$m^{jj}$ (GeV)\", bins=[30, 600, 4000]),\n", " # ShapeVar(var=\"bbFatJetEta\", label=r\"$\\eta^{bb}$\", bins=[30, -2.4, 2.4]),\n", - " ShapeVar(var=\"bbFatJetPt\", label=r\"$p^{bb}_T$ (GeV)\", bins=[30, 300, 1500], significance_dir=\"right\"),\n", - " ShapeVar(var=\"bbFatJetParticleNetMass\", label=r\"$m^{bb}_{reg}$ (GeV)\", bins=[20, 50, 250], significance_dir=\"bin\"),\n", + " ShapeVar(\n", + " var=\"bbFatJetPt\", label=r\"$p^{bb}_T$ (GeV)\", bins=[30, 300, 1500], significance_dir=\"right\"\n", + " ),\n", + " ShapeVar(\n", + " var=\"bbFatJetParticleNetMass\",\n", + " label=r\"$m^{bb}_{reg}$ (GeV)\",\n", + " bins=[20, 50, 250],\n", + " significance_dir=\"bin\",\n", + " ),\n", " # ShapeVar(var=\"bbFatJetMsd\", label=r\"$m^{bb}_{msd}$ (GeV)\", bins=[50, 0, 300]),\n", " # ShapeVar(var=\"bbFatJetParticleNetMD_Txbb\", label=r\"$T^{bb}_{Xbb}$\", bins=[50, 0.8, 1]),\n", " # ShapeVar(var=\"VVFatJetEta\", label=r\"$\\eta^{VV}$\", bins=[30, -2.4, 2.4]),\n", @@ -227,21 +234,20 @@ " \"pass\": postprocessing.Region(\n", " cuts={\n", " \"bbFatJetParticleNetMD_Txbb\": [0.985, CUT_MAX_VAL],\n", - " ('nGoodVBFJets', 0): [2,40],\n", - " ('vbf_Mass_jj', 0): [500,10000],\n", - " ('vbf_dEta_jj', 0): [4,10000],\n", - " 'VVFatJetParticleNet_Th4q': [0.9,1]\n", - " \n", + " (\"nGoodVBFJets\", 0): [2, 40],\n", + " (\"vbf_Mass_jj\", 0): [500, 10000],\n", + " (\"vbf_dEta_jj\", 0): [4, 10000],\n", + " \"VVFatJetParticleNet_Th4q\": [0.9, 1],\n", " },\n", " label=\"Pass\",\n", " ),\n", " \"fail\": postprocessing.Region(\n", " cuts={\n", " \"bbFatJetParticleNetMD_Txbb\": [-CUT_MAX_VAL, 0.985],\n", - " ('nGoodVBFJets', 0): [2,40],\n", - " ('vbf_Mass_jj', 0): [500,10000],\n", - " ('vbf_dEta_jj', 0): [4,10000],\n", - " 'VVFatJetParticleNet_Th4q': [0.9,1]\n", + " (\"nGoodVBFJets\", 0): [2, 40],\n", + " (\"vbf_Mass_jj\", 0): [500, 10000],\n", + " (\"vbf_dEta_jj\", 0): [4, 10000],\n", + " \"VVFatJetParticleNet_Th4q\": [0.9, 1],\n", " },\n", " label=\"Fail\",\n", " ),\n", @@ -259,7 +265,6 @@ "]\n", "\n", "\n", - "\n", "t = postprocessing.get_templates(\n", " events_dict,\n", " bb_masks,\n", @@ -315,7 +320,7 @@ " blind_window=[100, 150],\n", " ),\n", "]\n", - "# A \n", + "# A\n", "\n", "\n", "# first we will do 0 to 115 mass cut pass and fail region. then we do 145 to 100000 pass and fail. the sums give (C,D)\n", @@ -324,22 +329,22 @@ " \"pass\": postprocessing.Region(\n", " cuts={\n", " \"bbFatJetParticleNetMD_Txbb\": [0.985, CUT_MAX_VAL],\n", - " ('nGoodVBFJets', 0): [2,40],\n", - " ('vbf_Mass_jj', 0): [400,10000],\n", - " ('vbf_dEta_jj', 0): [3,10000],\n", - " 'VVFatJetParticleNet_Th4q': [0.9,1],\n", - " \"bbFatJetParticleNetMass\": [145,100000],\n", + " (\"nGoodVBFJets\", 0): [2, 40],\n", + " (\"vbf_Mass_jj\", 0): [400, 10000],\n", + " (\"vbf_dEta_jj\", 0): [3, 10000],\n", + " \"VVFatJetParticleNet_Th4q\": [0.9, 1],\n", + " \"bbFatJetParticleNetMass\": [145, 100000],\n", " },\n", " label=\"Pass\",\n", " ),\n", " \"fail\": postprocessing.Region(\n", " cuts={\n", " \"bbFatJetParticleNetMD_Txbb\": [-CUT_MAX_VAL, 0.985],\n", - " ('nGoodVBFJets', 0): [2,40],\n", - " ('vbf_Mass_jj', 0): [400,10000],\n", - " ('vbf_dEta_jj', 0): [3,10000],\n", - " 'VVFatJetParticleNet_Th4q': [0.9,1],\n", - " \"bbFatJetParticleNetMass\": [145,100000],\n", + " (\"nGoodVBFJets\", 0): [2, 40],\n", + " (\"vbf_Mass_jj\", 0): [400, 10000],\n", + " (\"vbf_dEta_jj\", 0): [3, 10000],\n", + " \"VVFatJetParticleNet_Th4q\": [0.9, 1],\n", + " \"bbFatJetParticleNetMass\": [145, 100000],\n", " },\n", " label=\"Fail\",\n", " ),\n", @@ -349,22 +354,22 @@ " \"pass\": postprocessing.Region(\n", " cuts={\n", " \"bbFatJetParticleNetMD_Txbb\": [0.985, CUT_MAX_VAL],\n", - " ('nGoodVBFJets', 0): [2,40],\n", - " ('vbf_Mass_jj', 0): [400,10000],\n", - " ('vbf_dEta_jj', 0): [3,10000],\n", - " 'VVFatJetParticleNet_Th4q': [0.9,1],\n", - " \"bbFatJetParticleNetMass\": [0,115],\n", + " (\"nGoodVBFJets\", 0): [2, 40],\n", + " (\"vbf_Mass_jj\", 0): [400, 10000],\n", + " (\"vbf_dEta_jj\", 0): [3, 10000],\n", + " \"VVFatJetParticleNet_Th4q\": [0.9, 1],\n", + " \"bbFatJetParticleNetMass\": [0, 115],\n", " },\n", " label=\"Pass\",\n", " ),\n", " \"fail\": postprocessing.Region(\n", " cuts={\n", " \"bbFatJetParticleNetMD_Txbb\": [-CUT_MAX_VAL, 0.985],\n", - " ('nGoodVBFJets', 0): [2,40],\n", - " ('vbf_Mass_jj', 0): [400,10000],\n", - " ('vbf_dEta_jj', 0): [3,10000],\n", - " 'VVFatJetParticleNet_Th4q': [0.9,1],\n", - " \"bbFatJetParticleNetMass\": [0,115],\n", + " (\"nGoodVBFJets\", 0): [2, 40],\n", + " (\"vbf_Mass_jj\", 0): [400, 10000],\n", + " (\"vbf_dEta_jj\", 0): [3, 10000],\n", + " \"VVFatJetParticleNet_Th4q\": [0.9, 1],\n", + " \"bbFatJetParticleNetMass\": [0, 115],\n", " },\n", " label=\"Fail\",\n", " ),\n", @@ -374,22 +379,22 @@ " \"pass\": postprocessing.Region(\n", " cuts={\n", " \"bbFatJetParticleNetMD_Txbb\": [0.985, CUT_MAX_VAL],\n", - " ('nGoodVBFJets', 0): [2,40],\n", - " ('vbf_Mass_jj', 0): [400,10000],\n", - " ('vbf_dEta_jj', 0): [3,10000],\n", - " 'VVFatJetParticleNet_Th4q': [0.9,1],\n", - " \"bbFatJetParticleNetMass\": [115,145],\n", + " (\"nGoodVBFJets\", 0): [2, 40],\n", + " (\"vbf_Mass_jj\", 0): [400, 10000],\n", + " (\"vbf_dEta_jj\", 0): [3, 10000],\n", + " \"VVFatJetParticleNet_Th4q\": [0.9, 1],\n", + " \"bbFatJetParticleNetMass\": [115, 145],\n", " },\n", " label=\"Pass\",\n", " ),\n", " \"fail\": postprocessing.Region(\n", " cuts={\n", " \"bbFatJetParticleNetMD_Txbb\": [-CUT_MAX_VAL, 0.985],\n", - " ('nGoodVBFJets', 0): [2,40],\n", - " ('vbf_Mass_jj', 0): [400,10000],\n", - " ('vbf_dEta_jj', 0): [3,10000],\n", - " 'VVFatJetParticleNet_Th4q': [0.9,1],\n", - " \"bbFatJetParticleNetMass\": [115,145],\n", + " (\"nGoodVBFJets\", 0): [2, 40],\n", + " (\"vbf_Mass_jj\", 0): [400, 10000],\n", + " (\"vbf_dEta_jj\", 0): [3, 10000],\n", + " \"VVFatJetParticleNet_Th4q\": [0.9, 1],\n", + " \"bbFatJetParticleNetMass\": [115, 145],\n", " },\n", " label=\"Fail\",\n", " ),\n", @@ -424,9 +429,7 @@ " plot_shifts=False,\n", " lpsfs=False,\n", " show=True,\n", - ")\n", - "\n", - "\n" + ")" ] }, { @@ -444,14 +447,59 @@ "metadata": {}, "outputs": [], "source": [ - "categories = ['HHbbVV', 'ggHH_kl_2p45_kt_1_HHbbVV', 'ggHH_kl_5_kt_1_HHbbVV', 'ggHH_kl_0_kt_1_HHbbVV', 'VBFHHbbVV', 'qqHH_CV_1_C2V_0_kl_1_HHbbVV', 'qqHH_CV_1p5_C2V_1_kl_1_HHbbVV', 'qqHH_CV_1_C2V_1_kl_2_HHbbVV', 'qqHH_CV_1_C2V_2_kl_1_HHbbVV', 'qqHH_CV_1_C2V_1_kl_0_HHbbVV', 'qqHH_CV_0p5_C2V_1_kl_1_HHbbVV', 'QCD', 'TT', 'ST', 'V+Jets', 'Diboson', 'ggFHbb', 'VBFHbb', 'ZHbb', 'WHbb', 'ggZHbb', 'ttHbb', 'Data', 'HHbbVV_txbb_down', 'ggHH_kl_2p45_kt_1_HHbbVV_txbb_down', 'ggHH_kl_5_kt_1_HHbbVV_txbb_down', 'ggHH_kl_0_kt_1_HHbbVV_txbb_down', 'VBFHHbbVV_txbb_down', 'qqHH_CV_1_C2V_0_kl_1_HHbbVV_txbb_down', 'qqHH_CV_1p5_C2V_1_kl_1_HHbbVV_txbb_down', 'qqHH_CV_1_C2V_1_kl_2_HHbbVV_txbb_down', 'qqHH_CV_1_C2V_2_kl_1_HHbbVV_txbb_down', 'qqHH_CV_1_C2V_1_kl_0_HHbbVV_txbb_down', 'qqHH_CV_0p5_C2V_1_kl_1_HHbbVV_txbb_down', 'HHbbVV_txbb_up', 'ggHH_kl_2p45_kt_1_HHbbVV_txbb_up', 'ggHH_kl_5_kt_1_HHbbVV_txbb_up', 'ggHH_kl_0_kt_1_HHbbVV_txbb_up', 'VBFHHbbVV_txbb_up', 'qqHH_CV_1_C2V_0_kl_1_HHbbVV_txbb_up', 'qqHH_CV_1p5_C2V_1_kl_1_HHbbVV_txbb_up', 'qqHH_CV_1_C2V_1_kl_2_HHbbVV_txbb_up', 'qqHH_CV_1_C2V_2_kl_1_HHbbVV_txbb_up', 'qqHH_CV_1_C2V_1_kl_0_HHbbVV_txbb_up', 'qqHH_CV_0p5_C2V_1_kl_1_HHbbVV_txbb_up']\n", - "hhbbvv_index = categories.index('HHbbVV')\n", - "\n", + "categories = [\n", + " \"HHbbVV\",\n", + " \"ggHH_kl_2p45_kt_1_HHbbVV\",\n", + " \"ggHH_kl_5_kt_1_HHbbVV\",\n", + " \"ggHH_kl_0_kt_1_HHbbVV\",\n", + " \"VBFHHbbVV\",\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\",\n", + " \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_1_kl_2_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\",\n", + " \"qqHH_CV_1_C2V_1_kl_0_HHbbVV\",\n", + " \"qqHH_CV_0p5_C2V_1_kl_1_HHbbVV\",\n", + " \"QCD\",\n", + " \"TT\",\n", + " \"ST\",\n", + " \"V+Jets\",\n", + " \"Diboson\",\n", + " \"ggFHbb\",\n", + " \"VBFHbb\",\n", + " \"ZHbb\",\n", + " \"WHbb\",\n", + " \"ggZHbb\",\n", + " \"ttHbb\",\n", + " \"Data\",\n", + " \"HHbbVV_txbb_down\",\n", + " \"ggHH_kl_2p45_kt_1_HHbbVV_txbb_down\",\n", + " \"ggHH_kl_5_kt_1_HHbbVV_txbb_down\",\n", + " \"ggHH_kl_0_kt_1_HHbbVV_txbb_down\",\n", + " \"VBFHHbbVV_txbb_down\",\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV_txbb_down\",\n", + " \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV_txbb_down\",\n", + " \"qqHH_CV_1_C2V_1_kl_2_HHbbVV_txbb_down\",\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV_txbb_down\",\n", + " \"qqHH_CV_1_C2V_1_kl_0_HHbbVV_txbb_down\",\n", + " \"qqHH_CV_0p5_C2V_1_kl_1_HHbbVV_txbb_down\",\n", + " \"HHbbVV_txbb_up\",\n", + " \"ggHH_kl_2p45_kt_1_HHbbVV_txbb_up\",\n", + " \"ggHH_kl_5_kt_1_HHbbVV_txbb_up\",\n", + " \"ggHH_kl_0_kt_1_HHbbVV_txbb_up\",\n", + " \"VBFHHbbVV_txbb_up\",\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV_txbb_up\",\n", + " \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV_txbb_up\",\n", + " \"qqHH_CV_1_C2V_1_kl_2_HHbbVV_txbb_up\",\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV_txbb_up\",\n", + " \"qqHH_CV_1_C2V_1_kl_0_HHbbVV_txbb_up\",\n", + " \"qqHH_CV_0p5_C2V_1_kl_1_HHbbVV_txbb_up\",\n", + "]\n", + "hhbbvv_index = categories.index(\"HHbbVV\")\n", "\n", "\n", "# Assuming you have the Hist objects as hist_pass and hist_fail\n", - "hist_pass = t['pass']\n", - "hist_fail = t['fail']\n", + "hist_pass = t[\"pass\"]\n", + "hist_fail = t[\"fail\"]\n", "\n", "hist_pass_values = hist_pass.values()\n", "hist_fail_values = hist_fail.values()\n", @@ -462,16 +510,14 @@ "\n", "\n", "print(type(hist_pass.values()))\n", - "#print(hist_pass.values())\n", + "# print(hist_pass.values())\n", "print(dir(hist_pass))\n", "\n", "total_events_pass = np.sum(hhbbvv_values_pass)\n", "total_events_fail = np.sum(hhbbvv_values_fail)\n", "\n", - "print(f\"Total number of 'HHbbVV' events in 'pass' histogram: {total_events_pass}\") # 0.023298 \n", - "print(f\"Total number of 'HHbbVV' events in 'fail' histogram: {total_events_fail}\") # 0.019126\n", - "\n", - "\n" + "print(f\"Total number of 'HHbbVV' events in 'pass' histogram: {total_events_pass}\") # 0.023298\n", + "print(f\"Total number of 'HHbbVV' events in 'fail' histogram: {total_events_fail}\") # 0.019126" ] }, { @@ -480,13 +526,13 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", - "C = (0.380591+0.705142 )/(1752.5063309999998 + 3264.734414) #(0.440593 + 0.222956) / (122.0 + 68.0)\n", + "C = (0.380591 + 0.705142) / (\n", + " 1752.5063309999998 + 3264.734414\n", + ") # (0.440593 + 0.222956) / (122.0 + 68.0)\n", "# fail control D\n", "\n", - "D1 = (0.295384+0.817020)/(3690.896753 + 2894.3845) # (0.484652 + 0.162842)/(1285.0 + 502.0)\n", - "print(C,D1)\n" + "D1 = (0.295384 + 0.817020) / (3690.896753 + 2894.3845) # (0.484652 + 0.162842)/(1285.0 + 502.0)\n", + "print(C, D1)" ] }, { @@ -496,7 +542,7 @@ "outputs": [], "source": [ "0.440593 + 0.222956, (122.0 + 68.0)\n", - "(0.484652 + 0.162842),(1285.0 + 502.0)" + "(0.484652 + 0.162842), (1285.0 + 502.0)" ] } ],