From 3be0f167f01dc2c743bbee6b95f620cee9d12aba Mon Sep 17 00:00:00 2001
From: Juraj Smiesko <juraj.smiesko@cern.ch>
Date: Mon, 16 Sep 2024 11:32:07 +0200
Subject: [PATCH] Before scaling the histograms in plots stage check final
 stage results

---
 .../higgs/mH-recoil/mumu/analysis_plots.py    |  37 +--
 man/man7/fccanalysis-plots-script.7           | 114 +++++++++
 python/do_plots.py                            | 230 ++++++++++++++----
 python/parsers.py                             |   7 +-
 4 files changed, 325 insertions(+), 63 deletions(-)
 create mode 100644 man/man7/fccanalysis-plots-script.7

diff --git a/examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py b/examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py
index 0f6aca0212..cf77fcdeb7 100644
--- a/examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py
+++ b/examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py
@@ -1,25 +1,30 @@
 import ROOT
 
-# global parameters
-intLumi        = 5.0e+06 #in pb-1
+# Global parameters
+intLumi        = 5.0e+06  # in pb-1
 ana_tex        = 'e^{+}e^{-} #rightarrow ZH #rightarrow #mu^{+}#mu^{-} + X'
 delphesVersion = '3.4.2'
 energy         = 240.0
 collider       = 'FCC-ee'
 inputDir       = 'outputs/FCCee/higgs/mH-recoil/mumu/final/'
-formats        = ['png','pdf']
-yaxis          = ['lin','log']
-stacksig       = ['stack','nostack']
+formats        = ['png', 'pdf']
+yaxis          = ['lin', 'log']
+stacksig       = ['stack', 'nostack']
 outdir         = 'outputs/FCCee/higgs/mH-recoil/mumu/plots/'
 plotStatUnc    = True
 
-variables = ['mz','mz_zoom','leptonic_recoil_m','leptonic_recoil_m_zoom','leptonic_recoil_m_zoom2']
-rebin = [1, 1, 1, 1, 2] # uniform rebin per variable (optional)
+variables = ['mz',
+             'mz_zoom',
+             'leptonic_recoil_m',
+             'leptonic_recoil_m_zoom',
+             'leptonic_recoil_m_zoom2']
+rebin = [1, 1, 1, 1, 2]  # uniform rebin per variable (optional)
 
-###Dictonnary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection
+# Dictionary with the list of selections to be plotted for this analysis. The
+# name of the selections should be the same than in the final selection
 selections = {}
-selections['ZH']   = ["sel0","sel1"]
-selections['ZH_2'] = ["sel0","sel1"]
+selections['ZH'] = ["sel0", "sel1"]
+selections['ZH_2'] = ["sel0", "sel1"]
 
 extralabel = {}
 extralabel['sel0'] = "Selection: N_{Z} = 1"
@@ -32,15 +37,13 @@
 colors['VV'] = ROOT.kGreen+3
 
 plots = {}
-plots['ZH'] = {'signal':{'ZH':['MySample_p8_ee_ZH_ecm240']},
-               'backgrounds':{'WW':['p8_ee_WW_ecm240'],
-                              'ZZ':['p8_ee_ZZ_ecm240']}
-           }
+plots['ZH'] = {'signal': {'ZH': ['MySample_p8_ee_ZH_ecm240']},
+               'backgrounds': {'WW': ['p8_ee_WW_ecm240'],
+                               'ZZ': ['p8_ee_ZZ_ecm240']}}
 
 
-plots['ZH_2'] = {'signal':{'ZH':['MySample_p8_ee_ZH_ecm240']},
-                 'backgrounds':{'VV':['p8_ee_WW_ecm240','p8_ee_ZZ_ecm240']}
-             }
+plots['ZH_2'] = {'signal': {'ZH': ['MySample_p8_ee_ZH_ecm240']},
+                 'backgrounds': {'VV': ['p8_ee_WW_ecm240', 'p8_ee_ZZ_ecm240']}}
 
 legend = {}
 legend['ZH'] = 'ZH'
diff --git a/man/man7/fccanalysis-plots-script.7 b/man/man7/fccanalysis-plots-script.7
new file mode 100644
index 0000000000..a0732a6071
--- /dev/null
+++ b/man/man7/fccanalysis-plots-script.7
@@ -0,0 +1,114 @@
+.\" Manpage for fccanalysis-plots-script
+.\" Contact FCC-PED-SoftwareAndComputing-Analysis@cern.ch to correct errors or typos.
+.TH FCCANALYSIS\-PLOTS\-SCRIPT 7 "16 Sep 2024" "0.9.0" "fccanalysis-plots-script man page"
+.SH NAME
+\fBfccanalysis\-plots\-script\fR \(en analysis script for the plots stage of the
+analysis
+.SH SYNOPSIS
+.IP
+*
+.SH DESCRIPTION
+.PP
+The analysis script for the plots stage of the analysis is expected to be a
+valid Python script containing definitions of the plots out of the
+histograms obtained in the final stage of the analysis or from histmaker run\&.
+To run the plots stage do
+.IP
+fccanalysis plots \fIanalysis_script.py\fR
+
+.RE
+.SH ATTRIBUTES
+User can use the following global attributes to control the behavior of the
+plots stage\&.
+.TP
+\fBinputDir\fR or \fBindir\fR (mandatory)
+User has to specify the directory where the input files coming from the previous
+analysis stage are stored\&.
+.TP
+\fBoutputDir\fR or \fBoutdir\fR (optional)
+User can specify the directory for the output files\&.
+.br
+Default value: \&. (current working directory)
+.TP
+\fBdoScale\fR (optional)
+Whether to scale the histograms to the expected integrated luminosity\&. It is
+recommended to scale your histograms already in the \fBfinal\fR stage of the
+analysis\&.
+.br
+Default value: False
+.TP
+\fBintLumi\fR (mandatory)
+Expected integrated luminosity in pb^{-1}\&. The value will be used in the
+figures, but also to scale histograms if \fBdoScale\fR is set to True\&.
+.br
+Default value: Value from the input file
+.TP
+\fBscaleSig\fR (optional)
+Additional scale factor to be applied on the signal histograms\&.
+.br
+Default value: 1.0
+.TP
+\fBscaleBkg\fR (optional)
+Additional scale factor to be applied on the background histograms\&.
+.br
+Default value: 1.0
+.TP
+\fBplots\fR (mandatory)
+Dictionary of plots to be created\&.
+.br
+Default value: {} (empty dictionary)
+.TP
+\fBsplitLeg\fR (optional)
+Whether to split legend into two columns\&.
+.br
+Default value: False
+.TP
+\fBlegendCoord\fR (optional)
+Adjusts position of the legend\&. Takes a list of four values\&.
+.br
+Default value: List of four None
+.TP
+\fBplotStatUnc\fR (optional)
+Whether to plot statistical uncertainty\&.
+.br
+Default value: False
+.TP
+\fBlegendTextSize\fR (optional)
+Adjusts size of the text in the plot's legend\&.
+.br
+Default value: 0.035
+.PP
+This section is under construction. You are invited to help :)
+.SH SEE ALSO
+fccanalysis(1), fccanalysis-plots(1)
+.SH BUGS
+Many
+.SH AUTHORS
+There are many contributors to the FCCAnalyses framework, but the principal
+authors are:
+.in +4
+Clement Helsens
+.br
+Valentin Volkl
+.br
+Gerardo Ganis
+.SH FCCANALYSES
+Part of the FCCAnalyses framework\&.
+.SH LINKS
+.PP
+.UR https://hep-fcc\&.github\&.io/FCCAnalyses/
+FCCAnalyses webpage
+.UE
+.PP
+.UR https://github\&.com/HEP\-FCC/FCCAnalyses/
+FCCAnalysises GitHub repository
+.UE
+.PP
+.UR https://fccsw\-forum\&.web\&.cern\&.ch/
+FCCSW Forum
+.UE
+.SH CONTACT
+.pp
+.MT FCC-PED-SoftwareAndComputing-Analysis@cern.ch
+FCC-PED-SoftwareAndComputing-Analysis
+.ME
diff --git a/python/do_plots.py b/python/do_plots.py
index 7a2f4048dc..138738db2b 100644
--- a/python/do_plots.py
+++ b/python/do_plots.py
@@ -50,61 +50,135 @@ def formatStatUncHist(hists, name, hstyle=3254):
 
 
 # _____________________________________________________________________________
-def mapHistos(var, label, sel, param, rebin):
-    LOGGER.info('Run plots for var:%s     label:%s     selection:%s',
-                var, label, sel)
-    signal = param.plots[label]['signal']
-    backgrounds = param.plots[label]['backgrounds']
+def load_hists(var: str,
+               label: str,
+               sel: str,
+               config: dict[str, any],
+               rebin: int) -> tuple[dict[str, any], dict[str: any]]:
+    '''
+    Load all histograms needed for the plot
+    '''
+
+    signal = config['plots'][label]['signal']
+    backgrounds = config['plots'][label]['backgrounds']
 
     hsignal = {}
     for s in signal:
         hsignal[s] = []
-        for f in signal[s]:
-            fin = param.inputDir+f+'_'+sel+'_histo.root'
-            if not os.path.isfile(fin):
-                LOGGER.info('File "%s" not found!\nSkipping it...', fin)
+        for filepathstem in signal[s]:
+            infilepath = config['input_dir'] + filepathstem + '_' + sel + \
+                         '_histo.root'
+            if not os.path.isfile(infilepath):
+                LOGGER.info('File "%s" not found!\nSkipping it...', infilepath)
                 continue
 
-            with ROOT.TFile(fin, 'READ') as tf:
-                h = tf.Get(var)
-                hh = copy.deepcopy(h)
-                hh.SetDirectory(0)
-            scaleSig = 1.
-            try:
-                scaleSig = param.scaleSig
-            except AttributeError:
-                LOGGER.debug('No scale signal, using 1.')
-                param.scaleSig = scaleSig
-            LOGGER.info('ScaleSig: %g', scaleSig)
-            hh.Scale(param.intLumi*scaleSig)
-            hh.Rebin(rebin)
+            with ROOT.TFile(infilepath, 'READ') as infile:
+                hist = copy.deepcopy(infile.Get(var))
+                hist.SetDirectory(0)
+
+                scale = config['scale_sig']
+
+                # Check if histograms were already scaled to lumi
+                try:
+                    scaled = infile.scaled
+                except AttributeError:
+                    LOGGER.error('Input file does not contain scaling '
+                                 'information!\n  %s\nAborting...', infilepath)
+                    sys.exit(3)
+
+                if scaled:
+                    try:
+                        int_lumi_in_file = infile.intLumi.GetVal()
+                    except AttributeError:
+                        LOGGER.error('Can not load integrated luminosity '
+                                     'value from the input file!\n  %s\n'
+                                     'Aborting...', infilepath)
+
+                    if config['int_lumi'] != int_lumi_in_file:
+                        LOGGER.warning(
+                            'Histograms are already scaled to different '
+                            'luminosity value!\n'
+                            'Luminosity from the input file is %s pb-1 and '
+                            'luminosity in plots script is %s pb-1.',
+                            config['int_lumi'], int_lumi_in_file)
+                        if config['do_scale']:
+                            LOGGER.warning(
+                                'Rescaling from %s pb-1 to %s pb-1...',
+                                config['int_lumi'], int_lumi_in_file)
+                            scale *= config['int_lumi'] / int_lumi_in_file
+
+                else:
+                    if config['do_scale']:
+                        scale = scale * config['int_lumi']
+
+            hist.Scale(scale)
+
+            hist.Rebin(rebin)
 
             if len(hsignal[s]) == 0:
-                hsignal[s].append(hh)
+                hsignal[s].append(hist)
             else:
-                hh.Add(hsignal[s][0])
-                hsignal[s][0] = hh
+                hist.Add(hsignal[s][0])
+                hsignal[s][0] = hist
 
     hbackgrounds = {}
     for b in backgrounds:
         hbackgrounds[b] = []
-        for f in backgrounds[b]:
-            fin = param.inputDir+f+'_'+sel+'_histo.root'
-            if not os.path.isfile(fin):
-                LOGGER.info('File "%s" not found!\nSkipping it...', fin)
+        for filepathstem in backgrounds[b]:
+            infilepath = config['input_dir'] + filepathstem + '_' + sel + \
+                         '_histo.root'
+            if not os.path.isfile(infilepath):
+                LOGGER.info('File "%s" not found!\nSkipping it...', infilepath)
                 continue
 
-            with ROOT.TFile(fin) as tf:
-                h = tf.Get(var)
-                hh = copy.deepcopy(h)
-                hh.SetDirectory(0)
-            hh.Scale(param.intLumi)
-            hh.Rebin(rebin)
+            with ROOT.TFile(infilepath) as infile:
+                hist = copy.deepcopy(infile.Get(var))
+                hist.SetDirectory(0)
+
+                scale = config['scale_bkg']
+
+                # Check if histograms were already scaled to lumi
+                try:
+                    scaled = infile.scaled
+                except AttributeError:
+                    LOGGER.error('Input file does not contain scaling '
+                                 'information!\n  %s\nAborting...', infilepath)
+                    sys.exit(3)
+
+                if scaled:
+                    try:
+                        int_lumi_in_file = infile.intLumi.GetVal()
+                    except AttributeError:
+                        LOGGER.error('Can not load integrated luminosity '
+                                     'value from the input file!\n  %s\n'
+                                     'Aborting...', infilepath)
+
+                    if config['int_lumi'] != int_lumi_in_file:
+                        LOGGER.warning(
+                            'Histograms are already scaled to different '
+                            'luminosity value!\n'
+                            'Luminosity from the input file is %s pb-1 and '
+                            'luminosity in plots script is %s pb-1.',
+                            config['int_lumi'], int_lumi_in_file)
+                        if config['do_scale']:
+                            LOGGER.warning(
+                                'Rescaling from %s pb-1 to %s pb-1...',
+                                config['int_lumi'], int_lumi_in_file)
+                            scale *= config['int_lumi'] / int_lumi_in_file
+
+                else:
+                    if config['do_scale']:
+                        scale = scale * config['int_lumi']
+
+            hist.Scale(scale)
+
+            hist.Rebin(rebin)
+
             if len(hbackgrounds[b]) == 0:
-                hbackgrounds[b].append(hh)
+                hbackgrounds[b].append(hist)
             else:
-                hh.Add(hbackgrounds[b][0])
-                hbackgrounds[b][0] = hh
+                hist.Add(hbackgrounds[b][0])
+                hbackgrounds[b][0] = hist
 
     for s in hsignal:
         if len(hsignal[s]) == 0:
@@ -801,8 +875,71 @@ def run(args):
     script_module = importlib.import_module(base_name)
 
     # Merge script and command line arguments into one configuration object
-    config = {}
+    # Also check the script attributes
+    config: dict[str, any] = {}
+
+    # Input directory
+    config['input_dir'] = os.getcwd()
+    if hasattr(script_module, 'indir'):
+        config['input_dir'] = script_module.indir
+    if hasattr(script_module, 'inputDir'):
+        config['input_dir'] = script_module.inputDir
+    if args.input_dir is not None:
+        config['input_dir'] = args.input_dir
+
+    # Output directory
+    config['output_dir'] = os.getcwd()
+    if hasattr(script_module, 'outdir'):
+        config['output_dir'] = script_module.outdir
+    if hasattr(script_module, 'outputDir'):
+        config['output_dir'] = script_module.outputDir
+    if args.output_dir is not None:
+        config['output_dir'] = args.output_dir
+
+    # Integrated luminosity
+    config['int_lumi'] = 1.
+    if hasattr(script_module, 'intLumi'):
+        config['int_lumi'] = script_module.intLumi
+    else:
+        LOGGER.debug('No integrated luminosity provided, using 1.0 pb-1.')
+    LOGGER.info('Integrated luminosity: %g pb-1', config['int_lumi'])
+
+    # Whether to scale histograms to luminosity
+    config['do_scale'] = 1.0
+    if hasattr(script_module, 'doScale'):
+        config['do_scale'] = script_module.doScale
+    else:
+        LOGGER.debug('No scaling to luminosity requested, scaling won\'t be '
+                     'done.')
+        config['do_scale'] = False
+    if config['do_scale']:
+        LOGGER.info('Histograms will be scaled to luminosity.')
+
+    # Scale factor to apply to all signal histograms
+    config['scale_sig'] = 1.0
+    if hasattr(script_module, 'scaleSig'):
+        config['scale_sig'] = script_module.scaleSig
+    else:
+        LOGGER.debug('No scale factor for signal provided, using 1.0.')
+    LOGGER.info('Scale factor for signal: %g', config['scale_sig'])
 
+    # Scale factor to apply to all background histograms
+    config['scale_bkg'] = 1.0
+    if hasattr(script_module, 'scaleBkg'):
+        config['scale_bkg'] = script_module.scaleBkg
+    else:
+        LOGGER.debug('No scale factor for background provided, using 1.0.')
+    LOGGER.info('Scale factor for background: %g', config['scale_sig'])
+
+    # Plots list
+    config['plots']: dict[str, any] = {}
+    if hasattr(script_module, 'plots'):
+        config['plots'] = script_module.plots
+    else:
+        LOGGER.debug('List of plots not provided!\nAborting...')
+        sys.exit(3)
+
+    # Splitting legend into two columns
     config['split_leg'] = False
     if hasattr(script_module, 'splitLeg'):
         config['split_leg'] = script_module.splitLeg
@@ -836,6 +973,7 @@ def run(args):
         sys.exit()
 
     counter = 0
+    LOGGER.info('Plotting:')
     for var_index, var in enumerate(script_module.variables):
         for label, sels in script_module.selections.items():
             for sel in sels:
@@ -844,11 +982,15 @@ def run(args):
                     if len(script_module.rebin) == \
                             len(script_module.variables):
                         rebin_tmp = script_module.rebin[var_index]
-                hsignal, hbackgrounds = mapHistos(var,
-                                                  label,
-                                                  sel,
-                                                  script_module,
-                                                  rebin=rebin_tmp)
+
+                LOGGER.info('  var: %s     label: %s     selection: %s',
+                            var, label, sel)
+
+                hsignal, hbackgrounds = load_hists(var,
+                                                   label,
+                                                   sel,
+                                                   config,
+                                                   rebin=rebin_tmp)
                 runPlots(config,
                          args,
                          var + "_" + label,
diff --git a/python/parsers.py b/python/parsers.py
index c0915ef777..c2fc092eac 100644
--- a/python/parsers.py
+++ b/python/parsers.py
@@ -146,7 +146,11 @@ def setup_run_parser_plots(parser):
     '''
     Define command line arguments for the plots sub-command.
     '''
-    parser.add_argument('script_path', help="path to the plots script")
+    parser.add_argument('script_path', help='path to the plots script')
+    parser.add_argument('--input-dir', type=str, default=None,
+                        help='input directory location')
+    parser.add_argument('--output-dir', type=str, default=None,
+                        help='output directory location')
     parser.add_argument('--legend-text-size', type=float, default=None,
                         help='text size for the legend elements')
     parser.add_argument('--legend-x-min', type=float, default=None,
@@ -159,7 +163,6 @@ def setup_run_parser_plots(parser):
                         help='maximal y position of the legend')
 
 
-
 def setup_run_parser_combine(parser):
     '''
     Define command line arguments for the combine sub-command.