diff --git a/.github/scripts/man2html.sh b/.github/scripts/man2html.sh
index 683e988df8..db72d29b87 100755
--- a/.github/scripts/man2html.sh
+++ b/.github/scripts/man2html.sh
@@ -3,9 +3,9 @@
mkdir -p ./man/html
cd ./man/man1
for MANFILE in *; do
- mandoc -Thtml -Ostyle=/css/mandoc.css "${MANFILE}" > "../html/${MANFILE::-1}html"
+ mandoc -Thtml -Ostyle=../../css/mandoc.css "${MANFILE}" > "../html/${MANFILE::-1}html"
done
cd ../man7
for MANFILE in *; do
- mandoc -Thtml -Ostyle=/css/mandoc.css "${MANFILE}" > "../html/${MANFILE::-1}html"
+ mandoc -Thtml -Ostyle=../../css/mandoc.css "${MANFILE}" > "../html/${MANFILE::-1}html"
done
diff --git a/bin/fccanalysis b/bin/fccanalysis
index cf466e0c2a..64d51bd743 100755
--- a/bin/fccanalysis
+++ b/bin/fccanalysis
@@ -1,8 +1,20 @@
#!/usr/bin/env python3
+'''
+Starting point (executable) for fccanalysis command
+'''
import argparse
import logging
+from parsers import setup_subparsers
+from init_analysis import init_analysis
+from build_analysis import build_analysis
+from test_fccanalyses import test_fccanalyses
+from pin_analysis import PinAnalysis
+from run_analysis import run
+from run_final_analysis import run_final
+from do_plots import do_plots
+
class MultiLineFormatter(logging.Formatter):
'''
@@ -30,9 +42,12 @@ class MultiLineFormatter(logging.Formatter):
def main():
+ '''
+ Starting point for fccanalysis command
+ '''
parser = argparse.ArgumentParser(description='FCCAnalyses v0.8.0')
- # Verbosity settings
+ # Verbosity arguments
verbosity_argument_group = parser.add_mutually_exclusive_group()
verbosity_argument_group.add_argument('-v', '--verbose',
action='store_true',
@@ -42,42 +57,16 @@ def main():
help='make output more verbose')
verbosity_argument_group.add_argument('-vvv', '--most-verbose',
action='store_true',
- help='make output most verbose')
-
- # Create sub-parsers
- subparsers = parser.add_subparsers(help='types of running modes',
- dest='command')
- parser_init = subparsers.add_parser(
- 'init',
- help="generate a RDataFrame based FCC analysis")
- parser_build = subparsers.add_parser(
- 'build',
- help='build and install local analysis')
- parser_test = subparsers.add_parser(
- 'test',
- help='test whole or a part of the analysis framework')
- parser_pin = subparsers.add_parser(
- 'pin',
- help='pin fccanalyses to the current version of Key4hep stack')
- parser_run = subparsers.add_parser(
- 'run',
- help="run a RDataFrame based FCC analysis")
- parser_run_final = subparsers.add_parser(
- 'final',
- help="run a RDataFrame based FCC analysis final configuration")
- parser_run_plots = subparsers.add_parser(
- 'plots',
- help="run a RDataFrame based FCC analysis plot configuration")
-
- # Setup sub-parsers
- import Parsers as fccpars
- fccpars.setup_init_parser(parser_init)
- fccpars.setup_build_parser(parser_build)
- fccpars.setup_test_parser(parser_test)
- fccpars.setup_pin_parser(parser_pin)
- fccpars.setup_run_parser(parser_run)
- fccpars.setup_run_parser_final(parser_run_final)
- fccpars.setup_run_parser_plots(parser_run_plots)
+ help='make output even more verbose')
+
+ # Sub-parsers
+ subparsers = parser.add_subparsers(
+ title='sub-commands',
+ metavar='sub-command',
+ dest='command',
+ help='one of the available sub-commands')
+
+ setup_subparsers(subparsers)
args = parser.parse_args()
@@ -94,19 +83,18 @@ def main():
logger.addHandler(stream_handler)
if args.command == 'init':
- from FCCAnalysisSetup import setup
- setup(parser)
+ init_analysis(parser)
elif args.command == 'build':
- from build_analysis import build_analysis
build_analysis(parser)
elif args.command == 'test':
- from test import test_fccanalyses
test_fccanalyses(parser)
elif args.command == 'pin':
- from pin_analysis import PinAnalysis
PinAnalysis(parser)
+ elif args.command == 'final':
+ run_final(parser)
+ elif args.command == 'plots':
+ do_plots(parser)
else:
- from FCCAnalysisRun import run
run(parser)
diff --git a/examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py b/examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py
index e9c3b1e3f4..e1075a43c3 100644
--- a/examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py
+++ b/examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py
@@ -22,6 +22,8 @@
#produces ROOT TTrees, default is False
doTree = False
+saveTabular = True
+
###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file
cutList = {"sel0":"Zcand_q == 0",
"sel1":"Zcand_q == -1 || Zcand_q == 1",
diff --git a/man/man1/fccanalysis-build.1 b/man/man1/fccanalysis-build.1
new file mode 100644
index 0000000000..62e418ffcc
--- /dev/null
+++ b/man/man1/fccanalysis-build.1
@@ -0,0 +1,60 @@
+.\" Manpage for fccanalysis-build
+.\" Contact FCC-PED-SoftwareAndComputing-Analysis@cern.ch to correct errors or typos.
+.TH FCCANALYSIS\-BUILD 1 "17 Jan 2024" "0.8.0" "fccanalysis-build man page"
+.SH NAME
+\fBfccanalysis\-build\fR \(en helper to build FCCAnalyses framework locally
+.SH SYNOPSIS
+.B fccanalysis build
+[\fB\-h\fR | \fB\-\-help\fR]
+[\fB\-c\fR]
+[\fB\-j\fR \fIBUILD_THREADS\fR]
+.SH DESCRIPTION
+.B fccanalysis\-build
+will build the whole FCCAnalyses framework\&.
+
+This sub-command is not intended as a replacement for the CMake build procedure,
+it just makes this task a little easier\&. The helper will build the framework in
+the directory: build and install it into the directory: install\&.
+.SH OPTIONS
+.TP
+.BR \-h ", " \-\-help
+Prints short help message and exits\&.
+.TP
+.BR \-c ", " \-\-clean\-build
+Completely clean build directory before compilation\&.
+.TP
+\fB\-j\fR \fIBUILD_THREADS\fR, \fB\-\-build\-threads\fR \fIBUILD_THREADS\fR
+Specify number of threads used when compiling (equivalent to `make -j`)
+.SH SEE ALSO
+fccanalysis(1), fccanalysis\-test(7), cmake(1)
+.SH BUGS
+Many
+.SH AUTHORS
+There are many contributors to the FCCAnalyses framework, but the principal
+authors are:
+.in +4
+Clement Helsens
+.br
+Valentin Volk
+.br
+Gerardo Ganis
+.SH FCCANALYSES
+Part of the FCCAnalyses framework\&.
+.SH LINKS
+.PP
+.UR https://hep-fcc\&.github\&.io/FCCAnalyses/
+FCCAnalyses webpage
+.UE
+.PP
+.UR https://github\&.com/HEP\-FCC/FCCAnalyses/
+FCCAnalysises GitHub repository
+.UE
+.PP
+.UR https://fccsw\-forum\&.web\&.cern\&.ch/
+FCCSW Forum
+.UE
+.SH CONTACT
+.pp
+.MT FCC-PED-SoftwareAndComputing-Analysis@cern.ch
+FCC-PED-SoftwareAndComputing-Analysis
+.ME
diff --git a/man/man1/fccanalysis-run.1 b/man/man1/fccanalysis-run.1
index 1b7dfad0ca..f6b416be1f 100644
--- a/man/man1/fccanalysis-run.1
+++ b/man/man1/fccanalysis-run.1
@@ -1,125 +1,89 @@
-.\" Manpage for fccanalysis
-.\" Contact fcc-experiments-sw-dev@cern.ch to correct errors or typos.
-.TH FCCANALYSIS\-RUN 1 "24 May 2023" "0.8.0" "fccanalysis-run man page"
+.\" Manpage for fccanalysis-run
+.\" Contact FCC-PED-SoftwareAndComputing-Analysis@cern.ch to correct errors or typos.
+.TH FCCANALYSIS\-RUN 1 "17 Jan 2024" "0.8.0" "fccanalysis-run man page"
.SH NAME
-fccanalysis\-run \- run FCC analysis
+\fBfccanalysis\-run\fR \- run FCC analysis
.SH SYNOPSIS
-.sp
-.nf
-\fIfccanalysis run\fR [\-h | \-\-help] [\-\-files\-list FILES_LIST [FILES_LIST ...]]
- [\-\-output OUTPUT] [\-\-nevents NEVENTS] [\-\-test] [\-\-bench]
- [\-\-ncpus NCPUS] [\-\-preprocess] [\-\-validate] [\-\-rerunfailed]
- [\-\-jobdir JOBDIR]
-.fi
-.sp
+.B fccanalysis run
+[\fB\-h\fR | \fB\-\-help\fR]
+[\fB\-\-files\-list\fR \fIFILES_LIST\fR [\fIFILES_LIST\fR ...]]
+[\fB\-\-output\fR \fIOUTPUT\fR]
+[\fB\-\-nevents\fR \fINEVENTS\fR]
+[\fB\-\-test\fR]
+[\fB\-\-bench\fR]
+[\fB\-\-ncpus\fR \fINCPUS\fR]
+.I analysis-script
.SH DESCRIPTION
-\fIfccanalysis-run\fR will run analysis provided in the analysis file\&. The
+.B fccanalysis-run
+will run analysis provided in the analysis file\&. The
analysis itself can be divided into several stages if desired\&. For all those
stages \fIfccanalysis-run\fR is used\&.
-.sp
-When using \fIfccanalysis-run\fR the analysis is running in the managed mode,
-where the RDataFrame provided is steered by the framework and users can control
-some aspects of the running with additional global attributes, see
-\fBfccanalysis-file\fR(8).
+
+When using \fBfccanalysis-run\fR the analysis is running in the managed mode,
+where the RDataFrame is steered by the framework and users can control some
+aspects of the running with additional global attributes, see
+\fIfccanalysis-script\fR(8).
.SH OPTIONS
-.PP
-
-.RS 4
-Path to analysis file\&.
-.RE
-.PP
-\-\-help
-.RS 4
+.TP
+.I analysis-script
+Path to analysis script\&.
+.TP
+.BR \-h ", " \-\-help
Prints short help message and exits\&.
-.RE
-.PP
-\-\-files\-list FILES_LIST [FILES_LIST ...]
-.RS 4
+.TP
+\fB\-\-files\-list\fR \fIFILES_LIST\fR [\fIFILES_LIST\fR ...]
Specify input file to bypass the processList\&.
-.RE
-.PP
-\-\-output OUTPUT
-.RS 4
+.TP
+\fB\-\-output\fR \fIOUTPUT\fR
Specify output file name to bypass the processList and or outputList, default
\fIoutput.root\fR\&.
-.RE
-.PP
-\-\-nevents NEVENTS
-.RS 4
+.TP
+\fB\-\-nevents\fR \fINEVENTS\fR
Specify max number of events to process\&.
-.RE
-.PP
-\-\-test
-.RS 4
+.TP
+.B \-\-test
Run over the test file\&.
-.RE
-.PP
-\-\-bench
-.RS 4
+.TP
+.B \-\-bench
Output benchmark results to a JSON file\&.
-.RE
-.PP
-\-\-ncpus NCPUS
-.RS 4
+.TP
+\-\-ncpus \fINCPUS\fR
Set number of threads\&.
-.RE
-.PP
-\-\-preprocess
-.RS 4
-Run preprocessing\&.
-.RE
-.PP
-\-\-validate
-.RS 4
-Validate a given production\&.
-.RE
-.PP
-\-\-rerunfailed
-.RS 4
-Rerun failed jobs\&.
-.RE
-.PP
-\-\-jobdir JOBDIR
-.RS 4
-Specify the batch job directory\&.
-.RE
.SH ENVIRONMENT VARIABLES
-.PP
-\fBFCCDICTSDIR\fR
-.RS 4
+.TP
+.B FCCDICTSDIR
Controls search path for the process dictionaries. The default value is
\fI/cvmfs/fcc.cern.ch/FCCDicts/\fR\&.
-.RE
.SH SEE ALSO
-fccanalysis(1), fccanalysis-file(7)
+fccanalysis(1), fccanalysis-script(7)
.SH BUGS
Many
.SH AUTHORS
There are many contributors to the FCCAnalyses framework, but the principal
authors are:
-.br
-.RS 4
+.in +4
Clement Helsens
.br
Valentin Volk
.br
Gerardo Ganis
-.RE
.SH FCCANALYSES
Part of the FCCAnalyses framework\&.
.SH LINKS
.PP
-1\&. FCCAnalyses webpage
-.RS 4
-https://hep-fcc\&.github\&.io/FCCAnalyses/
-.RE
-.PP
-2\&. FCCAnalysises GitHub repository
-.RS
-https://github\&.com/HEP\-FCC/FCCAnalyses/
-.RE
-.PP
-3\&. FCCSW Forum
-.RS
-https://fccsw\-forum\&.web\&.cern\&.ch/
-.RE
+.UR https://hep-fcc\&.github\&.io/FCCAnalyses/
+FCCAnalyses webpage
+.UE
+.PP
+.UR https://github\&.com/HEP\-FCC/FCCAnalyses/
+FCCAnalysises GitHub repository
+.UE
+.PP
+.UR https://fccsw\-forum\&.web\&.cern\&.ch/
+FCCSW Forum
+.UE
+.SH CONTACT
+.pp
+.MT FCC-PED-SoftwareAndComputing-Analysis@cern.ch
+FCC-PED-SoftwareAndComputing-Analysis
+.ME
diff --git a/man/man1/fccanalysis-test.1 b/man/man1/fccanalysis-test.1
index 049fa80011..e7921c348f 100644
--- a/man/man1/fccanalysis-test.1
+++ b/man/man1/fccanalysis-test.1
@@ -1,30 +1,33 @@
.\" Manpage for fccanalysis-test
-.\" Contact fcc-experiments-sw-dev@cern.ch to correct errors or typos.
-.TH FCCANALYSIS\-TEST 1 "17 June 2023" "0.8.0" "fccanalysis-test man page"
+.\" Contact FCC-PED-SoftwareAndComputing-Analysis@cern.ch to correct errors or typos.
+.TH FCCANALYSIS\-TEST 1 "17 Jan 2024" "0.8.0" "fccanalysis-test man page"
.SH NAME
-fccanalysis\-test \- test FCCAnalyses framework
+\fBfccanalysis\-test\fR \(en test FCCAnalyses framework
.SH SYNOPSIS
-.sp
-.nf
-\fIfccanalysis test\fR [\-h | \-\-help] [-R TESTS_REGEX] [-E EXCLUDE_REGEX]
-.fi
-.sp
+.B fccanalysis test
+[\fB\-h\fR | \fB\-\-help\fR]
+[\fB\-R\fR \fITESTS_REGEX\fR]
+[\fB\-E\fR \fIEXCLUDE_REGEX\fR]
+[\fB\-j\fR \fIPARALLEL\fR]
.SH DESCRIPTION
-\fIfccanalysis\-test\fR will test the whole or a part of the FCC analysis
-framework\&.
+.B fccanalysis\-test
+will test the whole or a part of the FCC analysis framework\&.
+
It is not intended as a replacement for running \fIctest\fR command directly in
the build directory, it is just convenience shortcut.
.SH OPTIONS
-.PP
-\-h, \-\-help
-.RS 4
+.TP
+.BR \-h ", " \-\-help
Prints short help message and exits\&.
-.RE
-.PP
-[test_regex]
-.RS 4
+.TP
+\fB\-R\fR \fITESTS_REGEX\fR
Specify subset of tests to be run\&.
-.RE
+.TP
+\fB\-E\fR \fIEXCLUDE_REGEX\fR
+Specify subset of tests to ignore\&.
+.TP
+\fB\-j\fR \fIPARALLEL\fR, \fB\-\-parallel\fR \fIPARALLEL\fI
+Number of tests running in parallel (equivalent to `ctest -j`)\&.
.SH SEE ALSO
fccanalysis(1), fccanalysis\-build(7), ctest(1)
.SH BUGS
@@ -32,29 +35,29 @@ Many
.SH AUTHORS
There are many contributors to the FCCAnalyses framework, but the principal
authors are:
-.br
-.RS 4
+.in +4
Clement Helsens
.br
Valentin Volk
.br
Gerardo Ganis
-.RE
.SH FCCANALYSES
Part of the FCCAnalyses framework\&.
.SH LINKS
.PP
-1\&. FCCAnalyses webpage
-.RS 4
-https://hep-fcc\&.github\&.io/FCCAnalyses/
-.RE
+.UR https://hep-fcc\&.github\&.io/FCCAnalyses/
+FCCAnalyses webpage
+.UE
.PP
-2\&. FCCAnalysises GitHub repository
-.RS
-https://github\&.com/HEP\-FCC/FCCAnalyses/
-.RE
+.UR https://github\&.com/HEP\-FCC/FCCAnalyses/
+FCCAnalysises GitHub repository
+.UE
.PP
-3\&. FCCSW Forum
-.RS
-https://fccsw\-forum\&.web\&.cern\&.ch/
-.RE
+.UR https://fccsw\-forum\&.web\&.cern\&.ch/
+FCCSW Forum
+.UE
+.SH CONTACT
+.pp
+.MT FCC-PED-SoftwareAndComputing-Analysis@cern.ch
+FCC-PED-SoftwareAndComputing-Analysis
+.ME
diff --git a/man/man1/fccanalysis.1 b/man/man1/fccanalysis.1
index 07261a3c39..ece72b5cfb 100644
--- a/man/man1/fccanalysis.1
+++ b/man/man1/fccanalysis.1
@@ -1,102 +1,86 @@
.\" Manpage for fccanalysis
-.\" Contact fcc-experiments-sw-dev@cern.ch to correct errors or typos.
-.TH FCCANALYSIS 1 "19 Oct 2023" "0.8.0" "fccanalysis man page"
+.\" Contact FCC-PED-SoftwareAndComputing-Analysis@cern.ch to correct errors or typos.
+.TH FCCANALYSIS 1 "17 Jan 2024" "0.8.0" "fccanalysis man page"
.SH NAME
-fccanalysis \- build and run your FCC analysis
+\fBfccanalysis\fR \(en write, build and run your FCC analysis
.SH SYNOPSIS
-.sp
-.nf
-\fIfccanalysis\fR [\-h] [\-v | \-vv | \-vvv] []
-.fi
-.sp
+.B fccanalysis
+[\fB\-h\fR]
+[\fB\-v\fR | \fB\-vv\fR | \fB\-vvv\fR]
+<\fIsub-command\fR>
+[<\fIargs\fR>]
.SH DESCRIPTION
-\fIfccanalysis\fR is the manager program for the FCC analysis, which can build,
+.B fccanalysis
+is the manager program for the FCC analysis, which can build,
run or submit analysis to the batch\&.
.SH OPTIONS
-.PP
-\-h, \-\-help
-.RS 4
+.TP
+.BR \-h ", " \-\-help
Prints short help message and exits\&.
-.RE
-.PP
-\-v, \-\-verbose
-.RS 4
+.TP
+.BR \-v ", " \-\-verbose
Makes output verbose. RDataFrame verbosity level is set to
ROOT.Experimental.ELogLevel.kInfo\&.
-.RE
-.PP
-\-vv, \-\-more\-verbose
-.RS 4
+.TP
+.BR \-vv ", " \-\-more\-verbose
Makes output more verbose. RDataFrame verbosity level is set to
ROOT.Experimental.ELogLevel.kDebug\&.
-.RE
-.PP
-\-vvv, \-\-most\-verbose
-.RS 4
+.TP
+.BR \-vvv ", " \-\-most\-verbose
Makes output even more verbose. RDataFrame verbosity level is set to
ROOT.Experimental.ELogLevel.kDebug+10 \-\-\- outputs also generated code\&.
-.RE
-.SH COMMANDS
-.PP
-\fBfccanalysis-init\fR
-.RS 4
-[Experimental] Generate a RDataFrame based FCC analysis\&.
-.RE
-.PP
-\fBfccanalysis-build\fR
-.RS 4
-Builds and installs the FCCAnalyses framework\&.
-.RE
-.PP
-\fBfccanalysis-pin\fR
-.RS 4
+.SH SUB-COMMANDS
+.TP
+.B fccanalysis-init
+[Experimental] Generate a RDataFrame based FCC analysis package\&.
+.TP
+.B fccanalysis-build
+Helper to build and install FCCAnalyses framework\&.
+.TP
+.B fccanalysis-pin
Pin analysis to the currently sourced version of the Key4hep stack\&.
-.RE
-.PP
-\fBfccanalysis-run\fR(1)
-.RS 4
-Runs the stage analysis file provided\&.
-.RE
-.PP
-\fBfccanalysis-final\fR
-.RS 4
-Runs the finalization analysis file provided\&.
-.RE
-.PP
-\fBfccanalysis-plots\fR
-.RS 4
+.TP
+.B fccanalysis-run
+Runs the analysis script provided\&.
+.TP
+.B fccanalysis-final
+Runs the finalization analysis script provided\&.
+.TP
+.B fccanalysis-plots
Generate plots based on the plots analysis file provided\&.
-.RE
+.TP
+.B fccanalysis-test
+Helper to run tests of the full FCCAnalyses framework\&.
.SH SEE ALSO
-fccanalysis-run(1), fccanalysis-file(7)
+fccanalysis\-run(1), fccanalysis\-script(7)
.SH BUGS
Many
.SH AUTHORS
There are many contributors to the FCCAnalyses framework, but the principal
authors are:
-.br
-.RS 4
+.in +4
Clement Helsens
.br
Valentin Volk
.br
Gerardo Ganis
-.RE
.SH FCCANALYSES
Part of the FCCAnalyses framework\&.
.SH LINKS
.PP
-1\&. FCCAnalyses webpage
-.RS 4
-https://hep\-fcc\&.github\&.io/FCCAnalyses/
-.RE
+.UR https://hep-fcc\&.github\&.io/FCCAnalyses/
+FCCAnalyses webpage
+.UE
.PP
-2\&. FCCAnalysises GitHub repository
-.RS
-https://github\&.com/HEP-FCC/FCCAnalyses/
-.RE
+.UR https://github\&.com/HEP\-FCC/FCCAnalyses/
+FCCAnalysises GitHub repository
+.UE
.PP
-3\&. FCCSW Forum
-.RS
-https://fccsw\-forum\&.web\&.cern\&.ch/
-.RE
+.UR https://fccsw\-forum\&.web\&.cern\&.ch/
+FCCSW Forum
+.UE
+.SH CONTACT
+.pp
+.MT FCC-PED-SoftwareAndComputing-Analysis@cern.ch
+FCC-PED-SoftwareAndComputing-Analysis
+.ME
diff --git a/man/man7/fccanalysis-file.7 b/man/man7/fccanalysis-file.7
deleted file mode 100644
index 1c535081e9..0000000000
--- a/man/man7/fccanalysis-file.7
+++ /dev/null
@@ -1,122 +0,0 @@
-.\" Manpage for fccanalysis-file
-.\" Contact fcc-experiments-sw-dev@cern.ch to correct errors or typos.
-.TH FCCANALYSIS\-FILE 7 "24 May 2023" "0.8.0" "fccanalysis-file man page"
-.SH NAME
-fccanalysis\-file \- analysis file specification
-.SH SYNOPSIS
-.sp
-*
-.sp
-.SH DESCRIPTION
-The analysis file is expected to be a valid Python script containing either
-part of or the full analysis. There are two basic modes how to run a FCC
-analysis, one is to run in the managed mode like so:
-.PP
-.RS 4
-fcc run
-.RE
-.PP
-or
-.PP
-.RS 4
-fcc final
-.RE
-.PP
-where user needs to provide minimal number of variables and settings. In this
-mode the RDataFrame is managed for the user and it can be controlled by defining
-several global attributes in the analysis file. The other mode is to run the
-analysis file as regular python script:
-.PP
-.RS 4
-python
-.RE
-.PP
-here user has full control over the RDataFrame, but has to create all necessary
-scaffolding\&.
-.SH ATTRIBUTES
-In case of running the FCCAnalysis in the managed mode user can use the
-following global attributes to control the behavior of the analysis.
-.PP
-\fBprocDict\fR
-.RS 4
-This variable controls which process dictionary will be used. It can be either
-simple file name, absolute path or url. In the case of simple filename, the file
-is being searched for first in the working directory and then at the locations
-indicated in the $FCCDICTSDIR environment variable.
-.RE
-.PP
-\fBprodTag\fR
-.RS 4
-Provides information where to find input files. There are several way how to
-find the information, one of them uses YAML file which is being searched for in
-the subfolders of $FCCDICTSDIR.
-.RE
-.PP
-\fBprocessList\fR
-.RS 4
-Dictionary of process samples to be run over. Each process can have several
-parameters:
-.br
-\fIfraction\fR
-.RS 4
-The analysis will run over reduced number of input files roughly corresponding
-to the fraction of total events specified\&.
-.br
-Default value: 1 (full process sample)
-.RE
-\fIoutput\fR
-.RS 4
-Specifies the stem for the output file(s)\&. The stem will be used to create
-output directory if there is more than one chunk or as a filename if there is
-only one\&.
-.br
-Default value: output\&.root
-.RE
-\fIchunks\fR
-.RS 4
-The analysis RDataFrame can be split into several chunks\&.
-.br
-Default value: 1
-.RE
-.RE
-.PP
-\fBoutputDir\fR
-.RS 4
-User can specify the directory for the output files. The output directory can be
-overwriten by specifiing absolute path with `\-\-output` commandline argument\&.
-.RE
-.PP
-This section is under construction. You are invited to help :)
-.SH SEE ALSO
-fccanalysis(1), fccanalysis-run(1)
-.SH BUGS
-Many
-.SH AUTHORS
-There are many contributors to the FCCAnalyses framework, but the principal
-authors are:
-.br
-.RS 4
-Clement Helsens
-.br
-Valentin Volk
-.br
-Gerardo Ganis
-.RE
-.SH FCCANALYSES
-Part of the FCCAnalyses framework\&.
-.SH LINKS
-.PP
-1\&. FCCAnalyses webpage
-.RS 4
-https://hep-fcc\&.github\&.io/FCCAnalyses/
-.RE
-.PP
-2\&. FCCAnalysises GitHub repository
-.RS
-https://github\&.com/HEP\-FCC/FCCAnalyses/
-.RE
-.PP
-3\&. FCCSW Forum
-.RS
-https://fccsw\-forum\&.web\&.cern\&.ch/
-.RE
diff --git a/man/man7/fccanalysis-script.7 b/man/man7/fccanalysis-script.7
new file mode 100644
index 0000000000..eb68ef1707
--- /dev/null
+++ b/man/man7/fccanalysis-script.7
@@ -0,0 +1,202 @@
+.\" Manpage for fccanalysis-script
+.\" Contact FCC-PED-SoftwareAndComputing-Analysis@cern.ch to correct errors or typos.
+.TH FCCANALYSIS\-SCRIPT 7 "17 Jan 2024" "0.8.0" "fccanalysis-script man page"
+.SH NAME
+\fBfccanalysis\-script\fR \(en analysis steering script specification
+.SH SYNOPSIS
+.IP
+*
+.SH DESCRIPTION
+.PP
+The analysis script is expected to be a valid Python script containing either
+part of or the full analysis. There are two basic modes how to run an
+analysis, one is to run in the managed mode like so:
+.IP
+fccanalysis run \fIanalysis_script.py\fR
+
+.RE
+or
+.IP
+fccanalysis final \fIanalysis_script.py\fR
+
+.RE
+.PP
+where user needs to provide minimal number of variables and settings. In this
+mode the RDataFrame is managed for the user and it can be controlled by defining
+several global attributes in the analysis script. The other mode is to run the
+analysis script as a regular python script:
+.IP
+python \fIanalysis_script.py\fR
+.RE
+.PP
+here user has full control over the RDataFrame, but has to create all necessary
+scaffolding\&.
+.PP
+It is expected that the whole analysis will be split into several stages and
+it can be done in one of the two styles:
+.IP
+anascript_stage1.py \-> anascript_stage2.py \-> ... \-> anascript_stage_final.py \-> plots.py
+
+.RE
+or
+.IP
+analysis_histmaker.py \-> plots.py
+
+.RE
+In the case of the first style there are at least three stages required
+(anascript_stage1.py, anascript_stage_final.py, plots.py) and there is no upper
+limit on the number of stages. In the case of the second style only two stages
+are required. The first style is named "staged analysis" and the second
+"histmaker analysis".
+.TP
+\fBstaged analysis\fR
+The analysis script needs to contain \fIRDFanalysis\fR class of the following
+structure:
+.IP
+class RDFanalysis():
+ def analysers(df):
+ df2 = (
+ df
+ # define the muon collection
+ .Define("muons", "ReconstructedParticle::get(Muon0, ReconstructedParticles)")
+ ...
+ )
+ return df2
+ def output():
+ return ["muons", "muon_mass"]
+.TP
+\fBhistmaker analysis\fR
+The analysis script needs to contain \fIbuild_graph\fR function of the following
+structure:
+.IP
+def build_graph(df, dataset):
+ results = []
+ df = df.Define("weight", "1.0")
+ weightsum = df.Sum("weight")
+ df = df.Define("muons", "FCCAnalyses::ReconstructedParticle::sel_p(20)(muons_all)")
+ ...
+ results.append(df.Histo1D(("muons_p_cut0", "", *bins_p_mu), "muons_p"))
+ return results, weightsum
+.TP
+\fBplots script\fR
+This stage does not require neither \fIRDFanalysis\fR class neither
+\fIbuild_graph\fR function, it has it's own set of attributes, please see the
+examples in the \fIexamples\fR directory.
+.SH ATTRIBUTES
+In case of running the FCCAnalysis in the managed mode user can use the
+following global attributes to control the behavior of the analysis.
+.TP
+\fBprocessList\fR (mandatory)
+Dictionary of process samples to be run over. Each process can have several
+parameters:
+\fIfraction\fR
+.in +4
+The analysis will run over reduced number of input files roughly corresponding
+to the fraction of total events specified\&.
+.br
+Default value: 1 (full process sample)
+.in -4
+\fIoutput\fR
+.in +4
+Specifies the stem for the output file(s)\&. The stem will be used to create
+output directory if there is more than one chunk or as a filename if there is
+only one\&.
+.br
+Default value: output\&.root
+.in -4
+\fIchunks\fR
+.in +4
+The analysis RDataFrame can be split into several chunks\&.
+.br
+Default value: 1
+.TP
+\fBprodTag\fR (mandatory)
+Provides information where to find input files. There are several way how to
+find the information, one of them uses YAML file which is being searched for in
+the subfolders of $FCCDICTSDIR\&.
+.TP
+\fBoutputDir\fR (mandatory)
+User can specify the directory for the output files. The output directory can be
+overwriten by specifiing absolute path with `\-\-output` commandline argument\&.
+.TP
+\fBanalysisName\fR (optional)
+Optional name for the analysis
+.br
+Default value: empty string
+.TP
+\fBnCPUS\fR (optional)
+Number of threads the RDataFrame will use\&.
+.br
+Default value: 4
+.TP
+\fBrunBatch\fR (optional)
+Run the analysis on the HTCondor batch system.
+.br
+Default value: False
+.TP
+\fBbatchQueue\fR (optional)
+Batch queue name when running on HTCondor.
+.br
+Default value: "longlunch"
+.TP
+\fBcompGroup\fR (optional)
+Computing account when running on HTCondor.
+.br
+Default value: "group_u_FCC.local_gen"
+.TP
+\fBoutputDirEos\fR (optional)
+Output directory on EOS, if specified files will be copied there once the batch
+job is done.
+.br
+Default value: empty string
+.TP
+\fBeosType\fR (mandatory if \fIoutputDirEos\fR is used)
+Type of the EOS proxy to be used.
+.br
+Default value: empty string
+.TP
+\fBtestFile\fR (optional)
+Location of the test file.
+.br
+Default value: empty string
+.TP
+.B procDict
+This variable controls which process dictionary will be used. It can be either
+simple file name, absolute path or url. In the case of simple filename, the file
+is being searched for first in the working directory and then at the locations
+indicated in the $FCCDICTSDIR environment variable.
+.PP
+This section is under construction. You are invited to help :)
+.SH SEE ALSO
+fccanalysis(1), fccanalysis-run(1)
+.SH BUGS
+Many
+.SH AUTHORS
+There are many contributors to the FCCAnalyses framework, but the principal
+authors are:
+.in +4
+Clement Helsens
+.br
+Valentin Volk
+.br
+Gerardo Ganis
+.SH FCCANALYSES
+Part of the FCCAnalyses framework\&.
+.SH LINKS
+.PP
+.UR https://hep-fcc\&.github\&.io/FCCAnalyses/
+FCCAnalyses webpage
+.UE
+.PP
+.UR https://github\&.com/HEP\-FCC/FCCAnalyses/
+FCCAnalysises GitHub repository
+.UE
+.PP
+.UR https://fccsw\-forum\&.web\&.cern\&.ch/
+FCCSW Forum
+.UE
+.SH CONTACT
+.pp
+.MT FCC-PED-SoftwareAndComputing-Analysis@cern.ch
+FCC-PED-SoftwareAndComputing-Analysis
+.ME
diff --git a/python/FCCAnalysisRun.py b/python/FCCAnalysisRun.py
deleted file mode 100644
index b1f7072497..0000000000
--- a/python/FCCAnalysisRun.py
+++ /dev/null
@@ -1,1133 +0,0 @@
-import os
-import sys
-import time
-import yaml
-import glob
-import json
-import logging
-import subprocess
-import importlib.util
-import datetime
-import numpy as np
-
-import ROOT
-from anafile import getElement, getElementDict
-from process import getProcessInfo, get_process_dict
-
-LOGGER = logging.getLogger('FCCAnalyses.run')
-
-ROOT.gROOT.SetBatch(True)
-
-# __________________________________________________________
-def get_entries(infilepath):
- '''
- Get number of original entries and number of actual entries in the file
- '''
- infile = ROOT.TFile.Open(infilepath)
- infile.cd()
-
- processEvents = 0
- try:
- processEvents = infile.Get('eventsProcessed').GetVal()
- except AttributeError:
- LOGGER.warning('Input file is missing information about '
- 'original number of events!')
-
- eventsTTree = 0
- try:
- eventsTTree = infile.Get("events").GetEntries()
- except AttributeError:
- LOGGER.error('Input file is missing "events" TTree!\nAborting...')
- infile.Close()
- sys.exit(3)
-
- infile.Close()
-
- return processEvents, eventsTTree
-
-
-# __________________________________________________________
-def getsubfileList(in_file_list, event_list, fraction):
- nevts_total = sum(event_list)
- nevts_target = int(nevts_total * fraction)
-
- if nevts_target <= 0:
- LOGGER.error('The reduction fraction %f too stringent, no events '
- 'left!\nAborting...', fraction)
- sys.exit(3)
-
- nevts_real = 0
- out_file_list = []
- for i in range(len(event_list)):
- if nevts_real >= nevts_target:
- break
- nevts_real += event_list[i]
- out_file_list.append(in_file_list[i])
-
- info_msg = f'Reducing the input file list by fraction "{fraction}" of '
- info_msg += 'total events:\n\t'
- info_msg += f'- total number of events: {nevts_total:,}\n\t'
- info_msg += f'- targeted number of events: {nevts_target:,}\n\t'
- info_msg += '- number of events in the resulting file list: '
- info_msg += f'{nevts_real:,}\n\t'
- info_msg += '- number of files after reduction: '
- info_msg += str((len(out_file_list)))
- LOGGER.info(info_msg)
-
- return out_file_list
-
-
-# __________________________________________________________
-def getchunkList(fileList, chunks):
- chunk_list = list(np.array_split(fileList, chunks))
- chunk_list = [chunk for chunk in chunk_list if chunk.size > 0]
-
- return chunk_list
-
-
-# __________________________________________________________
-def saveBenchmark(outfile, benchmark):
- benchmarks = []
- try:
- with open(outfile, 'r') as benchin:
- benchmarks = json.load(benchin)
- except OSError:
- pass
-
- benchmarks = [b for b in benchmarks if b['name'] != benchmark['name']]
- benchmarks.append(benchmark)
-
- with open(outfile, 'w') as benchout:
- json.dump(benchmarks, benchout, indent=2)
-
-
-#__________________________________________________________
-def getCommandOutput(command):
- p = subprocess.Popen(command, shell = True, stdout = subprocess.PIPE, stderr = subprocess.PIPE,universal_newlines=True)
- (stdout,stderr) = p.communicate()
- return {"stdout":stdout, "stderr":stderr, "returncode":p.returncode}
-
-
-# __________________________________________________________
-def SubmitToCondor(cmd,nbtrials):
- submissionStatus=0
- cmd=cmd.replace('//','/') # -> dav : is it needed?
- for i in range(nbtrials):
- outputCMD = getCommandOutput(cmd)
- stderr=outputCMD["stderr"].split('\n')
- stdout=outputCMD["stdout"].split('\n') # -> dav : is it needed?
-
- if len(stderr)==1 and stderr[0]=='' :
- LOGGER.info('GOOD SUBMISSION')
- submissionStatus=1
- else:
- LOGGER.warning('Error while submitting, retrying...\n\t'
- 'Trial: %i / %i\n\tError: %s', i, nbtrials, stderr)
- time.sleep(10)
-
- if submissionStatus == 1:
- return 1
-
- if i == nbtrials-1:
- LOGGER.error('Failed submitting after: %i trials, stop trying to '
- 'submit', nbtrials)
- return 0
-
-# __________________________________________________________
-def initialize(args, rdfModule, analysisFile):
-
- # for convenience and compatibility with user code
- ROOT.gInterpreter.Declare("using namespace FCCAnalyses;")
- geometryFile = getElement(rdfModule, "geometryFile")
- readoutName = getElement(rdfModule, "readoutName")
- if geometryFile!="" and readoutName!="":
- ROOT.CaloNtupleizer.loadGeometry(geometryFile, readoutName)
-
- # set multithreading (no MT if number of events is specified)
- ncpus = 1
- if args.nevents < 0:
- if isinstance(args.ncpus, int) and args.ncpus >= 1:
- ncpus = args.ncpus
- else:
- ncpus = getElement(rdfModule, "nCPUS")
- if ncpus < 0: # use all available threads
- ROOT.EnableImplicitMT()
- ncpus = ROOT.GetThreadPoolSize()
- ROOT.ROOT.EnableImplicitMT(ncpus)
- ROOT.EnableThreadSafety()
-
- if ROOT.IsImplicitMTEnabled():
- LOGGER.info('Multithreading enabled. Running over %i threads',
- ROOT.GetThreadPoolSize())
- else:
- LOGGER.info('No multithreading enabled. Running in single thread...')
-
- # custom header files
- includePaths = getElement(rdfModule, "includePaths")
- if includePaths:
- ROOT.gInterpreter.ProcessLine(".O3")
- basepath = os.path.dirname(os.path.abspath(analysisFile))+"/"
- for path in includePaths:
- LOGGER.info('Loading %s...', path)
- ROOT.gInterpreter.Declare(f'#include "{basepath}/{path}"')
-
- # check if analyses plugins need to be loaded before anything
- # still in use?
- analysesList = getElement(rdfModule, "analysesList")
- if analysesList and len(analysesList) > 0:
- _ana = []
- for analysis in analysesList:
- LOGGER.info('Load cxx analyzers from %s...', analysis)
- if analysis.startswith('libFCCAnalysis_'):
- ROOT.gSystem.Load(analysis)
- else:
- ROOT.gSystem.Load(f'libFCCAnalysis_{analysis}')
- if not hasattr(ROOT, analysis):
- ROOT.error('Analysis %s not properly loaded!\nAborting...',
- analysis)
- sys.exit(4)
- _ana.append(getattr(ROOT, analysis).dictionary)
-
-# __________________________________________________________
-def runRDF(rdfModule, inputlist, outFile, nevt, args):
- df = ROOT.RDataFrame("events", inputlist)
-
- # limit number of events processed
- if args.nevents > 0:
- df = df.Range(0, args.nevents)
-
- try:
- df2 = getElement(rdfModule.RDFanalysis, "analysers")(df)
-
- branch_list = ROOT.vector('string')()
- blist = getElement(rdfModule.RDFanalysis, "output")()
- for bname in blist:
- branch_list.push_back(bname)
-
- df2.Snapshot("events", outFile, branch_list)
- except Exception as excp:
- LOGGER.error('During the execution of the analysis file exception '
- 'occurred:\n%s', excp)
- sys.exit(3)
-
- return df2.Count()
-
-
-# __________________________________________________________
-def sendToBatch(rdfModule, chunkList, process, analysisFile):
- localDir = os.environ["LOCAL_DIR"]
- current_date = datetime.datetime.fromtimestamp(
- datetime.datetime.now().timestamp()).strftime('%Y-%m-%d_%H-%M-%S')
- logDir = localDir + "/BatchOutputs/{}/{}".format(current_date, process)
- if not os.path.exists(logDir):
- os.system("mkdir -p {}".format(logDir))
-
- # Making sure the FCCAnalyses libraries are compiled and installed
- try:
- subprocess.check_output(['make', 'install'],
- cwd=localDir+'/build',
- stderr=subprocess.DEVNULL
- )
- except subprocess.CalledProcessError as e:
- LOGGER.error('The FCCanalyses libraries are not properly build and '
- 'installed!\nAborting job submission...')
- sys.exit(3)
-
- outputDir = getElement(rdfModule, "outputDir")
- outputDirEos = getElement(rdfModule, "outputDirEos")
- eosType = getElement(rdfModule, "eosType")
- userBatchConfig = getElement(rdfModule, "userBatchConfig")
-
- if outputDir!="" and outputDir[-1]!="/": outputDir+="/"
-
- condor_file_str = ''
- for ch in range(len(chunkList)):
- frunname = '{}/job{}_chunk{}.sh'.format(logDir, process, ch)
- LOGGER.info('Script to run %s: ', frunname)
- condor_file_str += frunname + " "
-
- frun = None
- try:
- frun = open(frunname, 'w')
- except IOError as e:
- LOGGER.warning('I/O error(%i): %s', e.errno, e.strerror)
- time.sleep(10)
- frun = open(frunname, 'w')
-
- subprocess.getstatusoutput('chmod 777 %s'%(frunname))
- frun.write('#!/bin/bash\n')
- frun.write('source ' + localDir + '/setup.sh\n')
-
- # add userBatchConfig if any
- if userBatchConfig != "":
- if not os.path.isfile(userBatchConfig):
- LOGGER.warning('userBatchConfig file does not exist, will not '
- 'add it to default config, please check')
- else:
- configFile=open(userBatchConfig)
- for line in configFile:
- frun.write(line+'\n')
-
- frun.write('mkdir job{}_chunk{}\n'.format(process,ch))
- frun.write('cd job{}_chunk{}\n'.format(process,ch))
-
- if not os.path.isabs(outputDir):
- frun.write(localDir + '/bin/fccanalysis run {} --batch --output {}chunk{}.root --files-list '.format(analysisFile, outputDir, ch))
- else:
- frun.write(localDir + '/bin/fccanalysis run {} --batch --output {}{}/chunk{}.root --files-list '.format(analysisFile, outputDir, process,ch))
-
- for ff in range(len(chunkList[ch])):
- frun.write(' %s'%(chunkList[ch][ff]))
- frun.write('\n')
- if not os.path.isabs(outputDir):
- if outputDirEos=="":
- frun.write('cp {}chunk{}.root {}/{}/{}/chunk{}.root\n'.format(outputDir,ch,localDir,outputDir,process,ch))
- else:
- frun.write('xrdcp {}chunk{}.root root://{}.cern.ch/{}/{}/chunk{}.root\n'.format(outputDir,ch,eosType,outputDirEos,process,ch))
- else:
- if outputDirEos!="":
- frun.write('xrdcp {}chunk{}.root root://{}.cern.ch/{}/{}/chunk{}.root\n'.format(outputDir,ch,eosType,outputDirEos,process,ch))
-
- frun.close()
-
-
- condor_file_str=condor_file_str.replace("//","/")
- frunname_condor = 'job_desc_{}.cfg'.format(process)
- frunfull_condor = '%s/%s'%(logDir,frunname_condor)
- frun_condor = None
- try:
- frun_condor = open(frunfull_condor, 'w')
- except IOError as e:
- LOGGER.warning('I/O error(%i): %s', e.errno, e.strerror)
- time.sleep(10)
- frun_condor = open(frunfull_condor, 'w')
- sysVer_str = ''
- try:
- f_make = open(localDir+'/build/CMakeFiles/CMakeConfigureLog.yaml', 'r')
- except IOError as e:
- LOGGER.warning('I/O error(%i): %s', e.errno, e.strerror)
- LOGGER.warning('File not found: ' + localDir+'/build/CMakeFiles/CMakeConfigureLog.yaml')
- else:
- with open(localDir+'/build/CMakeFiles/CMakeConfigureLog.yaml', 'r') as makeConfig:
- make_content = makeConfig.read()
- if 'centos7' in make_content:
- sysVer_str = '(OpSysAndVer =?= "CentOS7")' + ' &&'
- if 'almalinux9' in make_content:
- sysVer_str = '(OpSysAndVer =?= "AlmaLinux9")' + ' &&'
- if sysVer_str == '':
- LOGGER.warning('FCCAnalysis was compiled in an environment not available in lxplus HTcondor. Please check.'
- 'Submitting jobs to default operating system. There may be compatibility issues.')
- subprocess.getstatusoutput('chmod 777 {}'.format(frunfull_condor))
- frun_condor.write('executable = $(filename)\n')
- frun_condor.write('Log = {}/condor_job.{}.$(ClusterId).$(ProcId).log\n'.format(logDir,process))
- frun_condor.write('Output = {}/condor_job.{}.$(ClusterId).$(ProcId).out\n'.format(logDir,process))
- frun_condor.write('Error = {}/condor_job.{}.$(ClusterId).$(ProcId).error\n'.format(logDir,process))
- frun_condor.write('getenv = False\n')
- frun_condor.write('environment = "LS_SUBCWD={}"\n'.format(logDir)) # not sure
- frun_condor.write('requirements = ({} (Machine =!= LastRemoteHost) && (TARGET.has_avx2 =?= True) )\n'.format(sysVer_str))
- frun_condor.write('on_exit_remove = (ExitBySignal == False) && (ExitCode == 0)\n')
- frun_condor.write('max_retries = 3\n')
- frun_condor.write('+JobFlavour = "{}"\n'.format(getElement(rdfModule, "batchQueue")))
- frun_condor.write('+AccountingGroup = "{}"\n'.format(getElement(rdfModule, "compGroup")))
- frun_condor.write('RequestCpus = {}\n'.format(getElement(rdfModule, "nCPUS")))
- frun_condor.write('queue filename matching files {}\n'.format(condor_file_str))
- frun_condor.close()
-
- cmdBatch="condor_submit {}".format(frunfull_condor)
- LOGGER.info('Batch command: %s', cmdBatch)
- job=SubmitToCondor(cmdBatch, 10)
-
-
-#__________________________________________________________
-def apply_filepath_rewrites(filepath):
- '''
- Apply path rewrites if applicable.
- '''
- # Stripping leading and trailing white spaces
- filepath_stripped = filepath.strip()
- # Stripping leading and trailing slashes
- filepath_stripped = filepath_stripped.strip('/')
-
- # Splitting the path along slashes
- filepath_splitted = filepath_stripped.split('/')
-
- if len(filepath_splitted) > 1 and filepath_splitted[0] == 'eos':
- if filepath_splitted[1] == 'experiment':
- filepath = 'root://eospublic.cern.ch//' + filepath_stripped
- elif filepath_splitted[1] == 'user':
- filepath = 'root://eosuser.cern.ch//' + filepath_stripped
- elif 'home-' in filepath_splitted[1]:
- filepath = 'root://eosuser.cern.ch//eos/user/' + \
- filepath_stripped.replace('eos/home-', '')
- else:
- LOGGER.warning('Unknown EOS path type!\nPlease check with the '
- 'developers as this might impact performance of '
- 'the analysis.')
- return filepath
-
-
-# __________________________________________________________
-def runLocal(rdfModule, infile_list, args):
- # Create list of files to be processed
- info_msg = 'Creating dataframe object from files:\n\t'
- file_list = ROOT.vector('string')()
- nevents_orig = 0 # Amount of events processed in previous stage (= 0 if it is the first stage)
- nevents_local = 0 # The amount of events in the input file(s)
- for filepath in infile_list:
-
- filepath = apply_filepath_rewrites(filepath)
-
- file_list.push_back(filepath)
- info_msg += f'- {filepath}\t\n'
- infile = ROOT.TFile.Open(filepath, 'READ')
- try:
- nevents_orig += infile.Get('eventsProcessed').GetVal()
- except AttributeError:
- pass
-
- try:
- nevents_local += infile.Get("events").GetEntries()
- except AttributeError:
- LOGGER.error('Input file:\n%s\nis missing events TTree!\n'
- 'Aborting...', filepath)
- infile.Close()
- sys.exit(3)
-
- LOGGER.info(info_msg)
-
- # Adjust number of events in case --nevents was specified
- if args.nevents > 0 and args.nevents < nevents_local:
- nevents_local = args.nevents
-
- if nevents_orig > 0:
- LOGGER.info('Number of events:\n\t- original: %s\n\t- local: %s',
- f'{nevents_orig:,}', f'{nevents_local:,}')
- else:
- LOGGER.info('Number of local events: %s', f'{nevents_local:,}')
-
- output_dir = getElement(rdfModule, "outputDir")
- if not args.batch:
- if os.path.isabs(args.output):
- LOGGER.warning('Provided output path is absolute, "outputDir" '
- 'from analysis script will be ignored!')
- outfile_path = os.path.join(output_dir, args.output)
- else:
- outfile_path = args.output
- LOGGER.info('Output file path:\n%s', outfile_path)
-
- # Run RDF
- start_time = time.time()
- outn = runRDF(rdfModule, file_list, outfile_path, nevents_local, args)
- outn = outn.GetValue()
-
- outfile = ROOT.TFile(outfile_path, 'update')
- param = ROOT.TParameter(int)(
- 'eventsProcessed',
- nevents_orig if nevents_orig != 0 else nevents_local)
- param.Write()
- outfile.Write()
- outfile.Close()
-
- elapsed_time = time.time() - start_time
- info_msg = f"{' SUMMARY ':=^80}\n"
- info_msg += 'Elapsed time (H:M:S): '
- info_msg += time.strftime('%H:%M:%S', time.gmtime(elapsed_time))
- info_msg += '\nEvents processed/second: '
- info_msg += f'{int(nevents_local/elapsed_time):,}'
- info_msg += f'\nTotal events processed: {int(nevents_local):,}'
- info_msg += f'\nNo. result events: {int(outn):,}'
- if nevents_local > 0:
- info_msg += f'\nReduction factor local: {outn/nevents_local}'
- if nevents_orig > 0:
- info_msg += f'\nReduction factor total: {outn/nevents_orig}'
- info_msg += '\n'
- info_msg += 80 * '='
- info_msg += '\n'
- LOGGER.info(info_msg)
-
- if args.bench:
- analysis_name = getElement(rdfModule, 'analysisName')
- if not analysis_name:
- analysis_name = args.anafile_path
-
- bench_time = {}
- bench_time['name'] = 'Time spent running the analysis: '
- bench_time['name'] += analysis_name
- bench_time['unit'] = 'Seconds'
- bench_time['value'] = elapsed_time
- bench_time['range'] = 10
- bench_time['extra'] = 'Analysis path: ' + args.anafile_path
- saveBenchmark('benchmarks_smaller_better.json', bench_time)
-
- bench_evt_per_sec = {}
- bench_evt_per_sec['name'] = 'Events processed per second: '
- bench_evt_per_sec['name'] += analysis_name
- bench_evt_per_sec['unit'] = 'Evt/s'
- bench_evt_per_sec['value'] = nevents_local / elapsed_time
- bench_time['range'] = 1000
- bench_time['extra'] = 'Analysis path: ' + args.anafile_path
- saveBenchmark('benchmarks_bigger_better.json', bench_evt_per_sec)
-
-
-#__________________________________________________________
-def runStages(args, rdfModule, preprocess, analysisFile):
- '''
- Run regular stage.
- '''
-
- # Set ncpus, load header files, custom dicts, ...
- initialize(args, rdfModule, analysisFile)
-
- # Check if outputDir exist and if not create it
- outputDir = getElement(rdfModule, "outputDir")
- if not os.path.exists(outputDir) and outputDir:
- os.system("mkdir -p {}".format(outputDir))
-
- # Check if outputDir exist and if not create it
- outputDirEos = getElement(rdfModule,"outputDirEos")
- if not os.path.exists(outputDirEos) and outputDirEos:
- os.system("mkdir -p {}".format(outputDirEos))
-
- # Check if test mode is specified, and if so run the analysis on it (this
- # will exit after)
- if args.test:
- LOGGER.info('Running over test file...')
- testfile_path = getElement(rdfModule, "testFile")
- directory, _ = os.path.split(args.output)
- if directory:
- os.system("mkdir -p {}".format(directory))
- runLocal(rdfModule, [testfile_path], args)
- sys.exit(0)
-
- # Check if files are specified, and if so run the analysis on it/them (this
- # will exit after)
- if len(args.files_list) > 0:
- LOGGER.info('Running over files provided in command line argument...')
- directory, _ = os.path.split(args.output)
- if directory:
- os.system(f'mkdir -p {directory}')
- runLocal(rdfModule, args.files_list, args)
- sys.exit(0)
-
- # Check if batch mode and set start and end file from original list
- run_batch = getElement(rdfModule, "runBatch")
-
- # Check if the process list is specified
- process_list = getElement(rdfModule, "processList")
-
- for process_name in process_list:
- file_list, event_list = getProcessInfo(
- process_name,
- getElement(rdfModule, "prodTag"),
- getElement(rdfModule, "inputDir"))
-
- if len(file_list) <= 0:
- LOGGER.error('No files to process!\nAborting...')
- sys.exit(3)
-
- # Determine the fraction of the input to be processed
- fraction = 1
- if getElementDict(process_list[process_name], 'fraction'):
- fraction = getElementDict(process_list[process_name], 'fraction')
- # Put together output path
- output_stem = process_name
- if getElementDict(process_list[process_name], 'output'):
- output_stem = getElementDict(process_list[process_name], 'output')
- # Determine the number of chunks the output will be split into
- chunks = 1
- if getElementDict(process_list[process_name], 'chunks'):
- chunks = getElementDict(process_list[process_name], 'chunks')
-
- info_msg = f'Adding process "{process_name}" with:'
- if fraction < 1:
- info_msg += f'\n\t- fraction: {fraction}'
- info_msg += f'\n\t- number of files: {len(file_list):,}'
- info_msg += f'\n\t- output stem: {output_stem}'
- if chunks > 1:
- info_msg += f'\n\t- number of chunks: {chunks}'
-
- if fraction < 1:
- file_list = getsubfileList(file_list, event_list, fraction)
-
- chunk_list = [file_list]
- if chunks > 1:
- chunk_list = getchunkList(file_list, chunks)
- LOGGER.info('Number of the output files: %s', f'{len(chunk_list):,}')
-
- # Create directory if more than 1 chunk
- if chunks > 1:
- output_directory = os.path.join(outputDir, output_stem)
-
- if not os.path.exists(output_directory):
- os.system("mkdir -p {}".format(output_directory))
-
- if run_batch:
- # Sending to the batch system
- LOGGER.info('Running on the batch...')
- if len(chunk_list) == 1:
- LOGGER.warning('\033[4m\033[1m\033[91mRunning on batch with '
- 'only one chunk might not be optimal\033[0m')
-
- sendToBatch(rdfModule, chunk_list, process_name, analysisFile)
-
- else:
- # Running locally
- LOGGER.info('Running locally...')
- if len(chunk_list) == 1:
- args.output = '{}.root'.format(output_stem)
- runLocal(rdfModule, chunk_list[0], args)
- else:
- for index, chunk in enumerate(chunk_list):
- args.output = '{}/chunk{}.root'.format(output_stem, index)
- runLocal(rdfModule, chunk, args)
-
-
-# __________________________________________________________
-def testfile(f):
- tf=ROOT.TFile.Open(f)
- tt=None
- try:
- tt=tf.Get("events")
- if tt == None:
- LOGGER.warning('File does not contains events, selection was too '
- 'tight, skipping it: %s', f)
- return False
- except IOError as e:
- LOGGER.warning('I/O error(%i): %s', e.errno, e.strerror)
- return False
- except ValueError:
- LOGGER.warning('Could read the file')
- return False
- except:
- LOGGER.warning('Unexpected error: %s\nfile ===%s=== must be deleted',
- sys.exc_info()[0], f)
- return False
- return True
-
-
-# __________________________________________________________
-def runFinal(rdfModule):
- proc_dict_location = getElement(rdfModule, "procDict", True)
- if not proc_dict_location:
- LOGGER.error('Location of the procDict not provided!\nAborting...')
- sys.exit(3)
-
- procDict = get_process_dict(proc_dict_location)
-
- procDictAdd = getElement(rdfModule, "procDictAdd", True)
- for procAdd in procDictAdd:
- if getElementDict(procDict, procAdd) == None:
- procDict[procAdd] = procDictAdd[procAdd]
-
- ROOT.ROOT.EnableImplicitMT(getElement(rdfModule, "nCPUS", True))
-
- nevents_real = 0
- start_time = time.time()
-
- processEvents={}
- eventsTTree={}
- processList={}
- saveTab=[]
- efficiencyList=[]
-
- inputDir = getElement(rdfModule,"inputDir", True)
- if not inputDir:
- LOGGER.error('The inputDir variable is mandatory for the final stage '
- 'of the analysis!\nAborting...')
- sys.exit(3)
-
- if inputDir[-1]!="/":inputDir+="/"
-
- outputDir = getElement(rdfModule,"outputDir", True)
- if outputDir!="":
- if outputDir[-1]!="/":outputDir+="/"
-
- if not os.path.exists(outputDir) and outputDir!='':
- os.system("mkdir -p {}".format(outputDir))
-
- cutList = getElement(rdfModule,"cutList", True)
- length_cuts_names = max([len(cut) for cut in cutList])
- cutLabels = getElement(rdfModule,"cutLabels", True)
-
- # save a table in a separate tex file
- saveTabular = getElement(rdfModule,"saveTabular", True)
- if saveTabular:
- # option to rewrite the cuts in a better way for the table. otherwise, take them from the cutList
- if cutLabels:
- cutNames = list(cutLabels.values())
- else:
- cutNames = [cut for cut in cutList]
-
- cutNames.insert(0, ' ')
- saveTab.append(cutNames)
- efficiencyList.append(cutNames)
-
- for process_id in getElement(rdfModule, "processList", True):
- processEvents[process_id] = 0
- eventsTTree[process_id] = 0
-
- fileListRoot = ROOT.vector('string')()
- infilepath = inputDir + process_id + '.root' # input file
- if not os.path.isfile(infilepath):
- LOGGER.warning('File %s does not exist!\nTrying if it is a '
- 'directory as it was processed with batch.',
- infilepath)
- else:
- LOGGER.info('Open file:\n\t%s', infilepath)
- processEvents[process_id], eventsTTree[process_id] = get_entries(infilepath)
- fileListRoot.push_back(infilepath)
-
- indirpath = inputDir + process_id
- if os.path.isdir(indirpath):
- info_msg = f'Open directory {indirpath}'
- flist = glob.glob(indirpath + '/chunk*.root')
- for filepath in flist:
- info_msg += '\n\t' + filepath
- chunkProcessEvents, chunkEventsTTree = get_entries(filepath)
- processEvents[process_id] += chunkProcessEvents
- eventsTTree[process_id] += chunkEventsTTree
- fileListRoot.push_back(filepath)
- LOGGER.info(info_msg)
- processList[process_id] = fileListRoot
-
- info_msg = 'Processed events:'
- for process_id, n_events in processEvents.items():
- info_msg += f'\n\t- {process_id}: {n_events:,}'
- LOGGER.info(info_msg)
- info_msg = 'Events in the TTree:'
- for process_id, n_events in eventsTTree.items():
- info_msg += f'\n\t- {process_id}: {n_events:,}'
- LOGGER.info(info_msg)
-
- histoList = getElement(rdfModule, "histoList", True)
- doScale = getElement(rdfModule, "doScale", True)
- intLumi = getElement(rdfModule, "intLumi", True)
-
- doTree = getElement(rdfModule, "doTree", True)
- for pr in getElement(rdfModule, "processList", True):
- LOGGER.info('Running over process: %s', pr)
-
- if processEvents[pr] == 0:
- LOGGER.error('Can\'t scale histograms, the number of processed '
- 'events for the process "%s" seems to be zero!', pr)
- sys.exit(3)
-
- RDF = ROOT.ROOT.RDataFrame
- df = RDF("events", processList[pr])
- defineList = getElement(rdfModule,"defineList", True)
- if len(defineList) > 0:
- LOGGER.info('Registering extra DataFrame defines...')
- for define in defineList:
- df = df.Define(define, defineList[define])
-
- fout_list = []
- histos_list = []
- tdf_list = []
- count_list = []
- cuts_list = []
- cuts_list.append(pr)
- eff_list = []
- eff_list.append(pr)
-
- # Define all histos, snapshots, etc...
- LOGGER.info('Defining snapshots and histograms')
- for cut in cutList:
- fout = outputDir+pr+'_'+cut+'.root' #output file for tree
- fout_list.append(fout)
-
- df_cut = df.Filter(cutList[cut])
- count_list.append(df_cut.Count())
-
- histos = []
-
- for v in histoList:
- if "name" in histoList[v]: # default 1D histogram
- model = ROOT.RDF.TH1DModel(v, ";{};".format(histoList[v]["title"]), histoList[v]["bin"], histoList[v]["xmin"], histoList[v]["xmax"])
- histos.append(df_cut.Histo1D(model,histoList[v]["name"]))
- elif "cols" in histoList[v]: # multi dim histogram (1, 2 or 3D)
- cols = histoList[v]['cols']
- bins = histoList[v]['bins']
- bins_unpacked = tuple([i for sub in bins for i in sub])
- if len(bins) != len(cols):
- LOGGER.error('Amount of columns should be equal to '
- 'the amount of bin configs!\nAborting...')
- sys.exit(3)
- if len(cols) == 1:
- histos.append(df_cut.Histo1D((v, "", *bins_unpacked), cols[0]))
- elif len(cols) == 2:
- histos.append(df_cut.Histo2D((v, "", *bins_unpacked), cols[0], cols[1]))
- elif len(cols) == 3:
- histos.append(df_cut.Histo3D((v, "", *bins_unpacked), cols[0], cols[1], cols[2]))
- else:
- LOGGER.error('Only 1, 2 or 3D histograms supported.')
- sys.exit(3)
- else:
- LOGGER.error('Error parsing the histogram config. Provide '
- 'either name or cols.')
- sys.exit(3)
- histos_list.append(histos)
-
- if doTree:
- opts = ROOT.RDF.RSnapshotOptions()
- opts.fLazy = True
- try:
- snapshot_tdf = df_cut.Snapshot("events", fout, "", opts)
- except Exception as excp:
- LOGGER.error('During the execution of the final stage '
- 'exception occurred:\n%s', excp)
- sys.exit(3)
-
- # Needed to avoid python garbage collector messing around with
- # the snapshot
- tdf_list.append(snapshot_tdf)
-
- # Now perform the loop and evaluate everything at once.
- LOGGER.info('Evaluating...')
- all_events = df.Count().GetValue()
- LOGGER.info('Done')
-
- nevents_real += all_events
- uncertainty = ROOT.Math.sqrt(all_events)
-
- if doScale:
- all_events = all_events*1.*procDict[pr]["crossSection"]*procDict[pr]["kfactor"]*procDict[pr]["matchingEfficiency"]*intLumi/processEvents[pr]
- uncertainty = ROOT.Math.sqrt(all_events)*procDict[pr]["crossSection"]*procDict[pr]["kfactor"]*procDict[pr]["matchingEfficiency"]*intLumi/processEvents[pr]
- LOGGER.info('Printing scaled number of events!!!')
-
- cfn_width = 16 + length_cuts_names # Cutflow name width
- info_msg = 'Cutflow:'
- info_msg += f'\n\t{"All events":{cfn_width}} : {all_events:,}'
-
- if saveTabular:
- cuts_list.append('{nevents:.2e} $\\pm$ {uncertainty:.2e}'.format(nevents=all_events,uncertainty=uncertainty)) # scientific notation - recomended for backgrounds
- # cuts_list.append('{nevents:.3f} $\\pm$ {uncertainty:.3f}'.format(nevents=all_events,uncertainty=uncertainty)) # float notation - recomended for signals with few events
- eff_list.append(1.) #start with 100% efficiency
-
- for i, cut in enumerate(cutList):
- neventsThisCut = count_list[i].GetValue()
- neventsThisCut_raw = neventsThisCut
- uncertainty = ROOT.Math.sqrt(neventsThisCut_raw)
- if doScale:
- neventsThisCut = neventsThisCut*1.*procDict[pr]["crossSection"]*procDict[pr]["kfactor"]*procDict[pr]["matchingEfficiency"]*intLumi/processEvents[pr]
- uncertainty = ROOT.Math.sqrt(neventsThisCut_raw)*procDict[pr]["crossSection"]*procDict[pr]["kfactor"]*procDict[pr]["matchingEfficiency"]*intLumi/processEvents[pr]
- info_msg += f'\n\t{"After selection " + cut:{cfn_width}} : '
- info_msg += f'{neventsThisCut:,}'
-
- # Saving the number of events, uncertainty and efficiency for the output-file
- if saveTabular and cut != 'selNone':
- if neventsThisCut != 0:
- cuts_list.append('{nevents:.2e} $\\pm$ {uncertainty:.2e}'.format(nevents=neventsThisCut,uncertainty=uncertainty)) # scientific notation - recomended for backgrounds
- # cuts_list.append('{nevents:.3f} $\\pm$ {uncertainty:.3f}'.format(nevents=neventsThisCut,uncertainty=uncertainty)) # # float notation - recomended for signals with few events
- prevNevents = cuts_list[-2].split()
- eff_list.append('{eff:.3f}'.format(eff=1.*neventsThisCut/all_events))
- # if number of events is zero, the previous uncertainty is saved instead:
- elif '$\\pm$' in cuts_list[-1]:
- cut = (cuts_list[-1]).split()
- cuts_list.append('$\\leq$ {uncertainty}'.format(uncertainty=cut[2]))
- eff_list.append('0.')
- else:
- cuts_list.append(cuts_list[-1])
- eff_list.append('0.')
-
- LOGGER.info(info_msg)
-
- # And save everything
- LOGGER.info('Saving the outputs...')
- for i, cut in enumerate(cutList):
- fhisto = outputDir+pr+'_'+cut+'_histo.root' #output file for histograms
- tf = ROOT.TFile.Open(fhisto,'RECREATE')
- for h in histos_list[i]:
- try :
- h.Scale(1.*procDict[pr]["crossSection"]*procDict[pr]["kfactor"]*procDict[pr]["matchingEfficiency"]/processEvents[pr])
- except KeyError:
- LOGGER.warning(
- 'No value defined for process %s in dictionary!', pr)
- if h.Integral(0,-1)>0:h.Scale(1./h.Integral(0,-1))
- h.Write()
- tf.Close()
-
- if doTree:
- # test that the snapshot worked well
- validfile = testfile(fout_list[i])
- if not validfile: continue
-
- if saveTabular and cut != 'selNone':
- saveTab.append(cuts_list)
- efficiencyList.append(eff_list)
-
- if saveTabular:
- f = open(outputDir+"outputTabular.txt","w")
- # Printing the number of events in format of a LaTeX table
- print('\\begin{table}[H] \n \\centering \n \\resizebox{\\textwidth}{!}{ \n \\begin{tabular}{|l||',end='',file=f)
- print('c|' * (len(cuts_list)-1),end='',file=f)
- print('} \hline',file=f)
- for i, row in enumerate(saveTab):
- print(' ', end='', file=f)
- print(*row, sep = ' & ', end='', file=f)
- print(' \\\\ ', file=f)
- if (i == 0):
- print(' \\hline',file=f)
- print(' \\hline \n \\end{tabular}} \n \\caption{Caption} \n \\label{tab:my_label} \n\\end{table}', file=f)
-
- # Efficiency:
- print('\n\nEfficiency: ', file=f)
- print('\\begin{table}[H] \n \\centering \n \\resizebox{\\textwidth}{!}{ \n \\begin{tabular}{|l||',end='',file=f)
- print('c|' * (len(cuts_list)-1),end='',file=f)
- print('} \hline',file=f)
- for i in range(len(eff_list)):
- print(' ', end='', file=f)
- v = [row[i] for row in efficiencyList]
- print(*v, sep = ' & ', end='', file=f)
- print(' \\\\ ', file=f)
- if (i == 0):
- print(' \\hline',file=f)
- print(' \\hline \n \\end{tabular}} \n \\caption{Caption} \n \\label{tab:my_label} \n\\end{table}', file=f)
- f.close()
-
- elapsed_time = time.time() - start_time
-
- info_msg = f"{' SUMMARY ':=^80}\n"
- info_msg += 'Elapsed time (H:M:S): '
- info_msg += time.strftime('%H:%M:%S', time.gmtime(elapsed_time))
- info_msg += '\nEvents processed/second: '
- info_msg += f'{int(nevents_real/elapsed_time):,}'
- info_msg += f'\nTotal events processed: {nevents_real:,}'
- info_msg += '\n'
- info_msg += 80 * '='
- info_msg += '\n'
- LOGGER.info(info_msg)
-
-
-def runHistmaker(args, rdfModule, analysisFile):
-
- # set ncpus, load header files, custom dicts, ...
- initialize(args, rdfModule, analysisFile)
-
- # load process dictionary
- proc_dict_location = getElement(rdfModule, "procDict", True)
- if not proc_dict_location:
- LOGGER.error('Location of the procDict not provided.\nAborting...')
- sys.exit(3)
-
- procDict = get_process_dict(proc_dict_location)
-
- # check if outputDir exist and if not create it
- outputDir = getElement(rdfModule,"outputDir")
- if not os.path.exists(outputDir) and outputDir!='':
- os.system("mkdir -p {}".format(outputDir))
-
- doScale = getElement(rdfModule,"doScale", True)
- intLumi = getElement(rdfModule,"intLumi", True)
-
- # check if the process list is specified, and create graphs for them
- processList = getElement(rdfModule,"processList")
- graph_function = getattr(rdfModule, "build_graph")
- results = [] # all the histograms
- hweights = [] # all the weights
- evtcounts = [] # event count of the input file
- eventsProcessedDict = {} # number of events processed per process, in a potential previous step
- for process in processList:
- fileList, eventList = getProcessInfo(process, getElement(rdfModule,"prodTag"), getElement(rdfModule, "inputDir"))
- if len(fileList) == 0:
- LOGGER.error('No files to process!\nAborting...')
- sys.exit(3)
- processDict={}
- fraction = 1
- output = process
- chunks = 1
- try:
- processDict = processList[process]
- if getElementDict(processList[process], 'fraction') != None: fraction = getElementDict(processList[process], 'fraction')
- if getElementDict(processList[process], 'output') != None: output = getElementDict(processList[process], 'output')
- if getElementDict(processList[process], 'chunks') != None: chunks = getElementDict(processList[process], 'chunks')
- except TypeError:
- LOGGER.warning('No values set for process %s will use default '
- 'values!', process)
- if fraction < 1:fileList = getsubfileList(fileList, eventList, fraction)
-
- # get the number of events processed, in a potential previous step
- fileListRoot = ROOT.vector('string')()
- nevents_meta = 0 # amount of events processed in previous stage (= 0 if it is the first stage)
- for fileName in fileList:
- fileName = apply_filepath_rewrites(fileName)
- fileListRoot.push_back(fileName)
- if getElement(rdfModule,"prodTag") == None: # skip check for processed events in case of first stage
- tf=ROOT.TFile.Open(str(fileName),"READ")
- tf.cd()
- for key in tf.GetListOfKeys():
- if 'eventsProcessed' == key.GetName():
- nevents_meta += tf.eventsProcessed.GetVal()
- break
- if args.test:
- break
- eventsProcessedDict[process] = nevents_meta
- info_msg = f'Add process "{process}" with:'
- info_msg += f'\n\tfraction = {fraction}'
- info_msg += f'\n\tnFiles = {len(fileListRoot):,}'
- info_msg += f'\n\toutput = {output}\n\tchunks = {chunks}'
- LOGGER.info(info_msg)
-
- df = ROOT.ROOT.RDataFrame("events", fileListRoot)
- evtcount = df.Count()
- res, hweight = graph_function(df, process)
- results.append(res)
- hweights.append(hweight)
- evtcounts.append(evtcount)
-
- LOGGER.info('Starting the event loop...')
- start_time = time.time()
- ROOT.ROOT.RDF.RunGraphs(evtcounts)
- LOGGER.info('Event loop done!')
- elapsed_time = time.time() - start_time
-
- LOGGER.info('Writing out output files...')
- nevents_tot = 0
- for process, res, hweight, evtcount in zip(processList, results, hweights, evtcounts):
- LOGGER.info('Writing out process %s, nEvents processed %s',
- process, f'{evtcount.GetValue():,}')
- fOut = ROOT.TFile(f"{outputDir}/{process}.root", "RECREATE")
-
- # get the cross-sections etc. First try locally, then the procDict
- if 'crossSection' in processList[process]:
- crossSection = processList[process]['crossSection']
- elif process in procDict and 'crossSection' in procDict[process]:
- crossSection = procDict[process]['crossSection']
- else:
- LOGGER.warning('Can\'t find cross-section for process %s in '
- 'processList or procDict!\nUsing default value of 1'
- , process)
- crossSection = 1
-
- if 'kfactor' in processList[process]:
- kfactor = processList[process]['kfactor']
- elif process in procDict and 'kfactor' in procDict[process]:
- kfactor = procDict[process]['kfactor']
- else:
- kfactor = 1
-
- if 'matchingEfficiency' in processList[process]:
- matchingEfficiency = processList[process]['matchingEfficiency']
- elif process in procDict and 'matchingEfficiency' in procDict[process]:
- matchingEfficiency = procDict[process]['matchingEfficiency']
- else:
- matchingEfficiency = 1
-
- eventsProcessed = eventsProcessedDict[process] if eventsProcessedDict[process] != 0 else evtcount.GetValue()
- scale = crossSection*kfactor*matchingEfficiency/eventsProcessed
-
- histsToWrite = {}
- for r in res:
- hist = r.GetValue()
- hName = hist.GetName()
- if hist.GetName() in histsToWrite: # merge histograms in case histogram exists
- histsToWrite[hName].Add(hist)
- else:
- histsToWrite[hName] = hist
-
- for hist in histsToWrite.values():
- if doScale:
- hist.Scale(scale*intLumi)
- hist.Write()
-
- # write all meta info to the output file
- p = ROOT.TParameter(int)("eventsProcessed", eventsProcessed)
- p.Write()
- p = ROOT.TParameter(float)("sumOfWeights", hweight.GetValue())
- p.Write()
- p = ROOT.TParameter(float)("intLumi", intLumi)
- p.Write()
- p = ROOT.TParameter(float)("crossSection", crossSection)
- p.Write()
- p = ROOT.TParameter(float)("kfactor", kfactor)
- p.Write()
- p = ROOT.TParameter(float)("matchingEfficiency", matchingEfficiency)
- p.Write()
- nevents_tot += evtcount.GetValue()
-
- info_msg = f"{' SUMMARY ':=^80}\n"
- info_msg += 'Elapsed time (H:M:S): '
- info_msg += time.strftime('%H:%M:%S', time.gmtime(elapsed_time))
- info_msg += '\nEvents processed/second: '
- info_msg += f'{int(nevents_tot/elapsed_time):,}'
- info_msg += f'\nTotal events processed: {nevents_tot:,}'
- info_msg += '\n'
- info_msg += 80 * '='
- info_msg += '\n'
- LOGGER.info(info_msg)
-
-
-# __________________________________________________________
-def runPlots(analysisFile):
- import doPlots as dp
- dp.run(analysisFile)
-
-
-def run(parser):
- '''
- Set things in motion.
- '''
-
- args, _ = parser.parse_known_args()
-
- # Check that the analysis file exists
- analysisFile = args.anafile_path
- if not os.path.isfile(analysisFile):
- LOGGER.error('Analysis script %s not found!\nAborting...',
- analysisFile)
- sys.exit(3)
-
- # Load pre compiled analyzers
- LOGGER.info('Loading analyzers from libFCCAnalyses...')
- ROOT.gSystem.Load("libFCCAnalyses")
- # Is this still needed?? 01/04/2022 still to be the case
- _fcc = ROOT.dummyLoader
-
- # Set verbosity level
- if args.verbose:
- # ROOT.Experimental.ELogLevel.kInfo verbosity level is more
- # equivalent to DEBUG in other log systems
- LOGGER.debug('Setting verbosity level "kInfo" for RDataFrame...')
- verbosity = ROOT.Experimental.RLogScopedVerbosity(
- ROOT.Detail.RDF.RDFLogChannel(),
- ROOT.Experimental.ELogLevel.kInfo)
- if args.more_verbose:
- LOGGER.debug('Setting verbosity level "kDebug" for RDataFrame...')
- verbosity = ROOT.Experimental.RLogScopedVerbosity(
- ROOT.Detail.RDF.RDFLogChannel(),
- ROOT.Experimental.ELogLevel.kDebug)
- if args.most_verbose:
- LOGGER.debug('Setting verbosity level "kDebug+10" for '
- 'RDataFrame...')
- verbosity = ROOT.Experimental.RLogScopedVerbosity(
- ROOT.Detail.RDF.RDFLogChannel(),
- ROOT.Experimental.ELogLevel.kDebug+10)
-
- # Load the analysis
- analysisFile = os.path.abspath(analysisFile)
- LOGGER.info('Loading analysis file:\n%s', analysisFile)
- rdfSpec = importlib.util.spec_from_file_location("rdfanalysis", analysisFile)
- rdfModule = importlib.util.module_from_spec(rdfSpec)
- rdfSpec.loader.exec_module(rdfModule)
-
- if not hasattr(args, 'command'):
- LOGGER.error('Unknow sub-command "%s"!\nAborting...')
- sys.exit(3)
-
- if args.command == "run":
- if hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
- LOGGER.error('Analysis file ambiguous!\nBoth "RDFanalysis" '
- 'class and "build_graph" function are defined.')
- sys.exit(3)
- elif hasattr(rdfModule, "build_graph") and not hasattr(rdfModule, "RDFanalysis"):
- runHistmaker(args, rdfModule, analysisFile)
- elif not hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
- runStages(args, rdfModule, args.preprocess, analysisFile)
- else:
- LOGGER.error('Analysis file does not contain required '
- 'objects!\nProvide either "RDFanalysis" class or '
- '"build_graph" function.')
- sys.exit(3)
- elif args.command == "final":
- runFinal(rdfModule)
- elif args.command == "plots":
- runPlots(analysisFile)
diff --git a/python/FCCAnalysisSetup.py b/python/FCCAnalysisSetup.py
deleted file mode 100644
index cfcebc5318..0000000000
--- a/python/FCCAnalysisSetup.py
+++ /dev/null
@@ -1,12 +0,0 @@
-#__________________________________________________________
-def setup(mainparser):
- from analysis_builder import setup_analysis
-
- args, _ = mainparser.parse_known_args()
- if args.command == 'init':
- setup_analysis(package=args.package,
- name=args.name,
- author=args.author,
- description=args.description,
- standalone=args.standalone,
- output_dir=args.output_dir)
diff --git a/python/Parsers.py b/python/Parsers.py
deleted file mode 100644
index ae7257a408..0000000000
--- a/python/Parsers.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import argparse
-
-def setup_init_parser(parser):
- publicOptions = parser.add_argument_group('User options')
- publicOptions.add_argument('package', help='name of the analysis package to be built')
- publicOptions.add_argument('--name', help='name of the main analysis utility', default='DummyAnalysis')
- publicOptions.add_argument('--author', help="author's \"name \" (will use git-config if not specified)")
- publicOptions.add_argument('--description', help='analysis package description')
- publicOptions.add_argument('--standalone', action='store_true', help="also add CMake directive to build standalone package", default=False)
- publicOptions.add_argument('--output-dir', help='output directory where the analysis package will be written')
-
-def setup_build_parser(parser):
- publicOptions = parser.add_argument_group('User build options')
- publicOptions.add_argument('-c', '--clean-build',
- action='store_true',
- default=False,
- help='do a clean build')
- publicOptions.add_argument(
- '-j', '--build-threads',
- type=int,
- default=1,
- help='number of threads when building (equivalent to `make -j`)'
- )
-
-
-def setup_test_parser(parser):
- '''
- Adds test options
- '''
- test_args = parser.add_argument_group('Test options')
- test_args.add_argument(
- '-R', '--tests-regex',
- type=str,
- help='Run tests matching regular expression (e.g. run only unit tests '
- 'with "^UT")'
- )
- test_args.add_argument(
- '-E', '--exclude-regex',
- type=str,
- help='Exclude tests matching regular expression'
- )
- test_args.add_argument(
- '-j', '--parallel',
- type=int,
- default=-1,
- help='number of tests running in parallel (equivalent to `ctest -j`)'
- )
-
-
-def setup_pin_parser(parser):
- publicOptions = parser.add_argument_group('User pin options')
- publicOptions.add_argument('-c', '--clear',
- action='store_true',
- default=False,
- help='clear analysis pin')
- publicOptions.add_argument('-f', '--force',
- action='store_true',
- default=False,
- help='force recreate analysis pin')
- publicOptions.add_argument('-s', '--show',
- action='store_true',
- default=False,
- help='show pinned stack')
-
-
-def setup_run_parser(parser):
- '''
- Define command line arguments for the run subcommand.
- '''
- parser.add_argument('anafile_path',
- help='path to analysis file')
- parser.add_argument('--files-list', default=[], nargs='+',
- help='specify input file to bypass the processList')
- parser.add_argument('--output', type=str, default='output.root',
- help='specify output file name to bypass the processList and or outputList')
- parser.add_argument('--nevents', type=int, default=-1,
- help='specify max number of events to process')
- parser.add_argument('--test', action='store_true', default=False,
- help='run over the test file')
- parser.add_argument('--bench', action='store_true', default=False,
- help='output benchmark results to a JSON file')
- parser.add_argument('--ncpus', type=int, default=-1,
- help='set number of threads')
- parser.add_argument('--final', action='store_true', default=False,
- help='run final analysis (produces final histograms and trees)')
- parser.add_argument('--plots', action='store_true', default=False,
- help='run analysis plots')
- parser.add_argument('--preprocess', action='store_true', default=False,
- help='run preprocessing')
- parser.add_argument('--validate', action='store_true', default=False,
- help='validate a given production')
- parser.add_argument('--rerunfailed', action='store_true', default=False,
- help='rerun failed jobs')
- parser.add_argument('--jobdir', type=str, default='output.root',
- help='specify the batch job directory')
-
- # Internal argument, not to be used by the users
- parser.add_argument('--batch', action='store_true', default=False,
- help=argparse.SUPPRESS)
-
-
-def setup_run_parser_final(parser):
- '''
- Define command line arguments for the final subcommand.
- '''
- parser.add_argument('anafile_path',
- help='path to analysis_final script')
-
-
-def setup_run_parser_plots(parser):
- '''
- Define command line arguments for the plots subcommand.
- '''
- parser.add_argument('anafile_path', help="path to analysis_plots script")
diff --git a/python/analysis_builder.py b/python/analysis_builder.py
deleted file mode 100644
index 689c84c403..0000000000
--- a/python/analysis_builder.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import logging
-
-LOGGER = logging.getLogger('FCCAnalyses.analysis_builder')
-
-def find_author():
- from subprocess import getoutput
- return getoutput('git config --global --get user.name') + ' <' + getoutput('git config --global --get user.email') + '>'
-
-def replace_all(input: str, repl) -> str:
- output = input
- for a, b in repl.items():
- output = output.replace(a, b)
- return output
-
-def setup_analysis(package: str,
- author: str='',
- description: str='',
- name: str='',
- standalone: bool=False,
- output_dir: str=''):
- if not author:
- author = find_author()
- if not description:
- description = '[...]'
- elif '\n' in description:
- raise RuntimeError('Multiline description is not supported. Please edit the output analysis header file directly.')
- from subprocess import getoutput
- fccanalyses_path = getoutput('git rev-parse --show-toplevel')
- replacement_dict = {
- '__pkgname__': package,
- '__pkgdesc__': description,
- '__name__': name,
- '__author__': author,
- '__fccpath__': fccanalyses_path
- }
-
- if not output_dir:
- path = f'{fccanalyses_path}/case-studies/{package}'
- else:
- path = output_dir
-
- import os
- for p in [path, f'{path}/src', f'{path}/include', f'{path}/scripts']:
- try:
- os.mkdir(p)
- except FileExistsError:
- LOGGER.warning('FCCAnalysis package "%s" already exists.', package)
- try:
- tmpl_dir = os.path.join(fccanalyses_path, 'templates')
- with open(f'{path}/src/classes.h', 'w') as f:
- f.write(replace_all(open(f'{tmpl_dir}/classes.h', 'r').read(), replacement_dict))
- with open(f'{path}/src/classes_def.xml', 'w') as f:
- f.write(replace_all(open(f'{tmpl_dir}/classes_def.xml', 'r').read(), replacement_dict))
- with open(f'{path}/src/{name}.cc', 'w') as f:
- f.write(replace_all(open(f'{tmpl_dir}/Package.cc', 'r').read(), replacement_dict))
- with open(f'{path}/include/{name}.h', 'w') as f:
- f.write(replace_all(open(f'{tmpl_dir}/Package.h', 'r').read(), replacement_dict))
- with open(f'{path}/scripts/analysis_cfg.py', 'w') as f:
- f.write(replace_all(open(f'{tmpl_dir}/analysis_cfg.py', 'r').read(), replacement_dict))
- if standalone:
- with open(f'{path}/CMakeLists.txt', 'w') as f:
- f.write(replace_all(open(f'{tmpl_dir}/CMakeLists.txt', 'r').read(), replacement_dict))
- except OSError as error:
- LOGGER.error('FCCAnalysis package "%s" creation error:\n%s',
- package, error)
diff --git a/python/anafile.py b/python/anascript.py
similarity index 92%
rename from python/anafile.py
rename to python/anascript.py
index 34b319069b..dc7ed2cbd3 100644
--- a/python/anafile.py
+++ b/python/anascript.py
@@ -7,15 +7,15 @@
import logging
-LOGGER = logging.getLogger('FCCAnalyses.run')
+LOGGER: logging.Logger = logging.getLogger('FCCAnalyses.run')
-def getElement(rdfModule, element, isFinal=False):
+def get_element(rdf_module, element: str, is_final: bool = False):
'''
Pick up the attribute from the analysis file.
'''
try:
- return getattr(rdfModule, element)
+ return getattr(rdf_module, element)
except AttributeError:
# return default values or crash if mandatory
@@ -27,7 +27,7 @@ def getElement(rdfModule, element, isFinal=False):
elif element == 'analysers':
LOGGER.error('The function <%s> is mandatory in your analysis '
'script!.\nAborting...', element)
- if isFinal:
+ if is_final:
LOGGER.error('The function <%s> is not part of the final '
'stage of the analysis!', element)
sys.exit(3)
@@ -35,7 +35,7 @@ def getElement(rdfModule, element, isFinal=False):
elif element == 'output':
LOGGER.error('The function <%s> is mandatory in your analysis '
'script.\nAborting...', element)
- if isFinal:
+ if is_final:
LOGGER.error('The function <%s> is not part of the final '
'stage of the analysis!', element)
sys.exit(3)
@@ -53,7 +53,7 @@ def getElement(rdfModule, element, isFinal=False):
elif element == 'runBatch':
LOGGER.debug('The variable <%s> is optional in your analysis '
'script.\nReturning default value: False')
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return False
@@ -68,7 +68,7 @@ def getElement(rdfModule, element, isFinal=False):
LOGGER.debug('The variable <%s> is optional in your analysis '
'script.\nReturning default value: "workday"',
element)
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return 'workday'
@@ -77,7 +77,7 @@ def getElement(rdfModule, element, isFinal=False):
LOGGER.debug('The variable <%s> is optional in your analysis '
'script.\nReturning default value: '
'"group_u_FCC.local_gen"', element)
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return 'group_u_FCC.local_gen'
@@ -85,7 +85,7 @@ def getElement(rdfModule, element, isFinal=False):
elif element == 'outputDirEos':
LOGGER.debug('The variable <%s> is optional in your analysis '
'script.\nReturning empty string.', element)
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return ''
@@ -94,7 +94,7 @@ def getElement(rdfModule, element, isFinal=False):
LOGGER.debug('The variable <%s> is optional in your analysis '
'script.\nReturning default value: "eospublic"',
element)
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return 'eospublic'
@@ -102,7 +102,7 @@ def getElement(rdfModule, element, isFinal=False):
elif element == 'userBatchConfig':
LOGGER.debug('The variable <%s> is optional in your your analysis '
'script.\nReturning empty string.', element)
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return ''
@@ -115,13 +115,13 @@ def getElement(rdfModule, element, isFinal=False):
LOGGER.debug('The variable <%s> is optional in your analysis '
'script.\nReturning default test file:\n\t%s',
element, test_file_path)
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return test_file_path
elif element == 'procDict':
- if isFinal:
+ if is_final:
LOGGER.error('The variable <%s> is mandatory in the final '
'stage of the analysis.\nAborting...', element)
sys.exit(3)
@@ -129,7 +129,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'cutList':
- if isFinal:
+ if is_final:
LOGGER.debug('The variable <%s> is optional in your final '
'analysis script.\nReturning empty dictionary.',
element)
@@ -138,7 +138,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'defineList':
- if isFinal:
+ if is_final:
LOGGER.debug('The variable <%s> is optional in your final '
'analysis script.\nReturning empty dictionary.',
element)
@@ -147,7 +147,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'histoList':
- if isFinal:
+ if is_final:
LOGGER.error('The variable <%s> is mandatory in the final '
'stage of the analysis.\nAborting...', element)
sys.exit(3)
@@ -155,7 +155,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'doTree':
- if isFinal:
+ if is_final:
LOGGER.debug('The variable <%s> is optional in your final '
'analysis script.\nReturning default value: '
'False',
@@ -165,7 +165,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'procDictAdd':
- if isFinal:
+ if is_final:
LOGGER.debug('The variable <%s> is optional in your final '
'analysis script.\nReturning empty dictionary.',
element)
@@ -174,7 +174,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'doScale':
- if isFinal:
+ if is_final:
LOGGER.debug('The variable <%s> is optional in the analysis '
'final step/histmaker.\nBy default no scaling is '
'applied.', element)
@@ -183,7 +183,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'intLumi':
- if isFinal:
+ if is_final:
LOGGER.debug('The variable <%s> is optional in the analysis '
'final step/histmaker.\nUsing the default value: '
'1', element)
@@ -192,7 +192,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'saveTabular':
- if isFinal:
+ if is_final:
LOGGER.debug('The variable <%s> is optional in your final '
'analysis script.\nReturning empty dictionary.',
element)
@@ -201,7 +201,7 @@ def getElement(rdfModule, element, isFinal=False):
'stages of the analysis', element)
elif element == 'cutLabels':
- if isFinal:
+ if is_final:
LOGGER.debug('The variable <%s> is optional in your final '
'analysis script.\nReturning empty dictionary.',
element)
@@ -212,7 +212,7 @@ def getElement(rdfModule, element, isFinal=False):
elif element == 'geometryFile':
LOGGER.debug('The variable <%s> is optional in your analysis '
'script.\nReturning empty string.', element)
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return ''
@@ -220,7 +220,7 @@ def getElement(rdfModule, element, isFinal=False):
elif element == 'readoutName':
LOGGER.debug('The variable <%s> is optional in your analysis '
'script.\nReturning empty string.', element)
- if isFinal:
+ if is_final:
LOGGER.debug('The option <%s> is not available in the final '
'stage of the analysis.', element)
return ''
@@ -228,7 +228,7 @@ def getElement(rdfModule, element, isFinal=False):
return None
-def getElementDict(_dict, element):
+def get_element_dict(_dict, element: str):
'''
Returns None if the key is not found in the dictionary.
'''
diff --git a/python/anascript.pyi b/python/anascript.pyi
new file mode 100644
index 0000000000..befa2f0a28
--- /dev/null
+++ b/python/anascript.pyi
@@ -0,0 +1,8 @@
+# generated with `stubgen anascript.py`
+
+import logging
+
+LOGGER: logging.Logger
+
+def get_element(rdf_module, element: str, is_final: bool = False): ...
+def get_element_dict(_dict, element: str): ...
diff --git a/python/build_analysis.py b/python/build_analysis.py
index 98498463b5..9eefaf0bce 100644
--- a/python/build_analysis.py
+++ b/python/build_analysis.py
@@ -13,19 +13,19 @@
LOGGER = logging.getLogger('FCCAnalyses.build')
-def run_subprocess(command, run_dir):
+def run_subprocess(command: str, run_dir: str):
'''
Run subprocess in specified directory.
Check only the return value, otherwise keep the subprocess connected to
stdin/stout/stderr.
'''
try:
- proc = subprocess.Popen(command, cwd=run_dir)
- status = proc.wait()
+ with subprocess.Popen(command, cwd=run_dir) as proc:
+ status: int = proc.wait()
- if status != 0:
- LOGGER.error('Error encountered!\nAborting...')
- sys.exit(3)
+ if status != 0:
+ LOGGER.error('Error encountered!\nAborting...')
+ sys.exit(3)
except KeyboardInterrupt:
LOGGER.error('Aborting...')
@@ -67,5 +67,5 @@ def build_analysis(mainparser):
LOGGER.info('Creating install directory...')
os.makedirs(install_path)
- run_subprocess(['make', '-j{}'.format(args.build_threads), 'install'],
+ run_subprocess(['make', f'-j{args.build_threads}', 'install'],
local_dir + '/build')
diff --git a/python/doPlots.py b/python/doPlots.py
deleted file mode 100644
index ee4052431f..0000000000
--- a/python/doPlots.py
+++ /dev/null
@@ -1,712 +0,0 @@
-#!/usr/bin/env python
-'''
-Create plots out of the histograms produced in previous stages
-'''
-import sys
-import os
-import os.path
-import ntpath
-import importlib
-import copy
-import re
-import logging
-import ROOT
-
-ROOT.gROOT.SetBatch(True)
-ROOT.gStyle.SetOptStat(0)
-ROOT.gStyle.SetOptTitle(0)
-
-
-LOGGER = logging.getLogger('FCCAnalyses.plot')
-
-# __________________________________________________________
-def removekey(d, key):
- r = dict(d)
- del r[key]
- return r
-
-def sortedDictValues(dic):
- keys = sorted(dic)
- return [dic[key] for key in keys]
-
-def formatStatUncHist(hists, name, hstyle=3254):
- hTot = hists[0].Clone(name + "_unc")
- for h in hists[1:]:
- hTot.Add(h)
- hTot.SetFillColor(ROOT.kBlack)
- hTot.SetMarkerSize(0)
- hTot.SetLineWidth(0)
- hTot.SetFillStyle(hstyle)
- return hTot
-
-#__________________________________________________________
-def mapHistos(var, label, sel, param, rebin):
- LOGGER.info('Run plots for var:%s label:%s selection:%s',
- var, label, sel)
- signal=param.plots[label]['signal']
- backgrounds=param.plots[label]['backgrounds']
-
- hsignal = {}
- for s in signal:
- hsignal[s]=[]
- for f in signal[s]:
- fin=param.inputDir+f+'_'+sel+'_histo.root'
- if not os.path.isfile(fin):
- LOGGER.info('File %s does not exist!\nSkipping it...', fin)
- else:
- tf=ROOT.TFile(fin)
- h=tf.Get(var)
- hh = copy.deepcopy(h)
- scaleSig=1.
- try:
- scaleSig=param.scaleSig
- except AttributeError:
- LOGGER.info('No scale signal, using 1.')
- param.scaleSig=scaleSig
- LOGGER.info('ScaleSig: %g', scaleSig)
- hh.Scale(param.intLumi*scaleSig)
- hh.Rebin(rebin)
-
- if len(hsignal[s])==0:
- hsignal[s].append(hh)
- else:
- hh.Add(hsignal[s][0])
- hsignal[s][0]=hh
-
-
- hbackgrounds = {}
- for b in backgrounds:
- hbackgrounds[b]=[]
- for f in backgrounds[b]:
- fin=param.inputDir+f+'_'+sel+'_histo.root'
- if not os.path.isfile(fin):
- LOGGER.info('File %s does not exist!\nSkipping it...', fin)
- else:
- tf=ROOT.TFile(fin)
- h=tf.Get(var)
- hh = copy.deepcopy(h)
- hh.Scale(param.intLumi)
- hh.Rebin(rebin)
- if len(hbackgrounds[b])==0:
- hbackgrounds[b].append(hh)
- else:
- hh.Add(hbackgrounds[b][0])
- hbackgrounds[b][0]=hh
-
- for s in hsignal:
- if len(hsignal[s])==0:
- hsignal=removekey(hsignal,s)
-
- for b in hbackgrounds:
- if len(hbackgrounds[b])==0:
- hbackgrounds=removekey(hbackgrounds,b)
-
- return hsignal,hbackgrounds
-
-
-#__________________________________________________________
-def mapHistosFromHistmaker(hName, param, plotCfg):
- rebin = plotCfg['rebin'] if 'rebin' in plotCfg else 1
- LOGGER('Get histograms for %s', hName)
- signal=param.procs['signal']
- backgrounds=param.procs['backgrounds']
- scaleSig = plotCfg['scaleSig'] if 'scaleSig' in plotCfg else 1
-
- hsignal = {}
- for s in signal:
- hsignal[s]=[]
- for f in signal[s]:
- fin=f"{param.inputDir}/{f}.root"
- if not os.path.isfile(fin):
- LOGGER.info('File %s does not exist!\nSkipping it...', fin)
- else:
- tf=ROOT.TFile(fin)
- h=tf.Get(hName)
- hh = copy.deepcopy(h)
- LOGGER.info('ScaleSig: ', scaleSig)
- hh.Scale(param.intLumi*scaleSig)
- hh.Rebin(rebin)
- if len(hsignal[s])==0:
- hsignal[s].append(hh)
- else:
- hh.Add(hsignal[s][0])
- hsignal[s][0]=hh
-
-
- hbackgrounds = {}
- for b in backgrounds:
- hbackgrounds[b]=[]
- for f in backgrounds[b]:
- fin=f"{param.inputDir}/{f}.root"
- if not os.path.isfile(fin):
- LOGGER.info('File %s does not exist!\nSkipping it...', fin)
- else:
- tf=ROOT.TFile(fin)
- h=tf.Get(hName)
- hh = copy.deepcopy(h)
- hh.Scale(param.intLumi)
- hh.Rebin(rebin)
- if len(hbackgrounds[b])==0:
- hbackgrounds[b].append(hh)
- else:
- hh.Add(hbackgrounds[b][0])
- hbackgrounds[b][0]=hh
-
- for s in hsignal:
- if len(hsignal[s])==0:
- hsignal=removekey(hsignal,s)
-
- for b in hbackgrounds:
- if len(hbackgrounds[b])==0:
- hbackgrounds=removekey(hbackgrounds,b)
-
- return hsignal,hbackgrounds
-
-#__________________________________________________________
-def runPlots(var,sel,param,hsignal,hbackgrounds,extralab,splitLeg,plotStatUnc):
-
- ###Below are settings for separate signal and background legends
- if(splitLeg):
- legsize = 0.04*(len(hsignal))
- legsize2 = 0.04*(len(hbackgrounds))
- legCoord = [0.15,0.60 - legsize,0.50,0.62]
- leg2 = ROOT.TLegend(0.60,0.60 - legsize2,0.88,0.62)
- leg2.SetFillColor(0)
- leg2.SetFillStyle(0)
- leg2.SetLineColor(0)
- leg2.SetShadowColor(10)
- leg2.SetTextSize(0.035)
- leg2.SetTextFont(42)
- else:
- legsize = 0.04*(len(hbackgrounds)+len(hsignal))
- legCoord=[0.68, 0.86-legsize, 0.96, 0.88]
- try:
- legCoord=param.legendCoord
- except AttributeError:
- LOGGER.info('No legCoord, using default one...')
- legCoord=[0.68, 0.86-legsize, 0.96, 0.88]
- leg2 = None
-
- leg = ROOT.TLegend(legCoord[0],legCoord[1],legCoord[2],legCoord[3])
- leg.SetFillColor(0)
- leg.SetFillStyle(0)
- leg.SetLineColor(0)
- leg.SetShadowColor(10)
- leg.SetTextSize(0.035)
- leg.SetTextFont(42)
-
- for b in hbackgrounds:
- if(splitLeg):
- leg2.AddEntry(hbackgrounds[b][0],param.legend[b],"f")
- else:
- leg.AddEntry(hbackgrounds[b][0],param.legend[b],"f")
- for s in hsignal:
- leg.AddEntry(hsignal[s][0],param.legend[s],"l")
-
-
- yields={}
- for s in hsignal:
- yields[s]=[param.legend[s],hsignal[s][0].Integral(0,-1), hsignal[s][0].GetEntries()]
- for b in hbackgrounds:
- yields[b]=[param.legend[b],hbackgrounds[b][0].Integral(0,-1), hbackgrounds[b][0].GetEntries()]
-
- histos=[]
- colors=[]
-
- nsig=len(hsignal)
- nbkg=len(hbackgrounds)
-
- for s in hsignal:
- histos.append(hsignal[s][0])
- colors.append(param.colors[s])
-
- for b in hbackgrounds:
- histos.append(hbackgrounds[b][0])
- colors.append(param.colors[b])
-
- intLumiab = param.intLumi/1e+06
- intLumi = f'L = {intLumiab:.0f} ab^{{-1}}'
- if hasattr(param, "intLumiLabel"):
- intLumi = getattr(param, "intLumiLabel")
-
- lt = "FCCAnalyses: FCC-hh Simulation (Delphes)"
- rt = "#sqrt{{s}} = {:.1f} TeV, L = {}".format(param.energy,intLumi)
-
- if 'ee' in param.collider:
- lt = "FCCAnalyses: FCC-ee Simulation (Delphes)"
- rt = "#sqrt{{s}} = {:.1f} GeV, {}".format(param.energy,intLumi)
-
-
-
- customLabel=""
- try:
- customLabel=param.customLabel
- except AttributeError:
- LOGGER.info('No customLable, using nothing...')
-
- scaleSig=1.
- try:
- scaleSig=param.scaleSig
- except AttributeError:
- LOGGER('No scale signal, using 1.')
- param.scaleSig=scaleSig
-
- if 'AAAyields' in var:
- drawStack(var, 'events', leg, lt, rt, param.formats, param.outdir+"/"+sel, False , True , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc)
- return
-
- if 'stack' in param.stacksig:
- if 'lin' in param.yaxis:
- drawStack(var+"_stack_lin", 'events', leg, lt, rt, param.formats, param.outdir+"/"+sel, False , True , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc)
- if 'log' in param.yaxis:
- drawStack(var+"_stack_log", 'events', leg, lt, rt, param.formats, param.outdir+"/"+sel, True , True , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc)
- if 'lin' not in param.yaxis and 'log' not in param.yaxis:
- LOGGER.info('Unrecognised option in formats, should be '
- '[\'lin\',\'log\']')
-
- if 'nostack' in param.stacksig:
- if 'lin' in param.yaxis:
- drawStack(var+"_nostack_lin", 'events', leg, lt, rt, param.formats, param.outdir+"/"+sel, False , False , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc)
- if 'log' in param.yaxis:
- drawStack(var+"_nostack_log", 'events', leg, lt, rt, param.formats, param.outdir+"/"+sel, True , False , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc)
- if 'lin' not in param.yaxis and 'log' not in param.yaxis:
- LOGGER.info('Unrecognised option in formats, should be '
- '[\'lin\',\'log\']')
- if 'stack' not in param.stacksig and 'nostack' not in param.stacksig:
- LOGGER.info('Unrecognised option in stacksig, should be '
- '[\'stack\',\'nostack\']')
-
-#__________________________________________________________
-def runPlotsHistmaker(hName, param, plotCfg):
-
- output = plotCfg['output']
- hsignal,hbackgrounds=mapHistosFromHistmaker(hName, param, plotCfg)
-
- if hasattr(param, "splitLeg"):
- splitLeg = param.splitLeg
- else:
- splitLeg = False
-
- if hasattr(param, "plotStatUnc"):
- plotStatUnc = param.plotStatUnc
- else:
- plotStatUnc = False
-
- ###Below are settings for separate signal and background legends
- if(splitLeg):
- legsize = 0.04*(len(hsignal))
- legsize2 = 0.04*(len(hbackgrounds))
- legCoord = [0.15,0.60 - legsize,0.50,0.62]
- leg2 = ROOT.TLegend(0.60,0.60 - legsize2,0.88,0.62)
- leg2.SetFillColor(0)
- leg2.SetFillStyle(0)
- leg2.SetLineColor(0)
- leg2.SetShadowColor(10)
- leg2.SetTextSize(0.035)
- leg2.SetTextFont(42)
- else:
- legsize = 0.04*(len(hbackgrounds)+len(hsignal))
- legCoord=[0.68, 0.86-legsize, 0.96, 0.88]
- try:
- legCoord=param.legendCoord
- except AttributeError:
- LOGGER.info('No legCoord, using default one...')
- legCoord=[0.68, 0.86-legsize, 0.96, 0.88]
- leg2 = None
-
- leg = ROOT.TLegend(legCoord[0],legCoord[1],legCoord[2],legCoord[3])
- leg.SetFillColor(0)
- leg.SetFillStyle(0)
- leg.SetLineColor(0)
- leg.SetShadowColor(10)
- leg.SetTextSize(0.035)
- leg.SetTextFont(42)
-
- for b in hbackgrounds:
- if(splitLeg):
- leg2.AddEntry(hbackgrounds[b][0],param.legend[b],"f")
- else:
- leg.AddEntry(hbackgrounds[b][0],param.legend[b],"f")
- for s in hsignal:
- leg.AddEntry(hsignal[s][0],param.legend[s],"l")
-
-
- yields={}
- for s in hsignal:
- yields[s]=[param.legend[s],hsignal[s][0].Integral(0,-1), hsignal[s][0].GetEntries()]
- for b in hbackgrounds:
- yields[b]=[param.legend[b],hbackgrounds[b][0].Integral(0,-1), hbackgrounds[b][0].GetEntries()]
-
- histos=[]
- colors=[]
-
- nsig=len(hsignal)
- nbkg=len(hbackgrounds)
-
- for s in hsignal:
- histos.append(hsignal[s][0])
- colors.append(param.colors[s])
-
- for b in hbackgrounds:
- histos.append(hbackgrounds[b][0])
- colors.append(param.colors[b])
-
- xtitle = plotCfg['xtitle'] if 'xtitle' in plotCfg else ""
- ytitle = plotCfg['ytitle'] if 'ytitle' in plotCfg else "Events"
- xmin = plotCfg['xmin'] if 'xmin' in plotCfg else -1
- xmax = plotCfg['xmax'] if 'xmax' in plotCfg else -1
- ymin = plotCfg['ymin'] if 'ymin' in plotCfg else -1
- ymax = plotCfg['ymax'] if 'ymax' in plotCfg else -1
- stack = plotCfg['stack'] if 'stack' in plotCfg else False
- logy = plotCfg['logy'] if 'logy' in plotCfg else False
- extralab = plotCfg['extralab'] if 'extralab' in plotCfg else ""
- scaleSig = plotCfg['scaleSig'] if 'scaleSig' in plotCfg else 1
-
-
- intLumiab = param.intLumi/1e+06
- intLumi = f'L = {intLumiab:.0f} ab^{{-1}}'
- if hasattr(param, "intLumiLabel"):
- intLumi = getattr(param, "intLumiLabel")
-
- lt = "FCCAnalyses: FCC-hh Simulation (Delphes)"
- rt = "#sqrt{{s}} = {:.1f} TeV, L = {}".format(param.energy,intLumi)
-
- if 'ee' in param.collider:
- lt = "FCCAnalyses: FCC-ee Simulation (Delphes)"
- rt = "#sqrt{{s}} = {:.1f} GeV, {}".format(param.energy,intLumi)
-
- customLabel=""
- try:
- customLabel=param.customLabel
- except AttributeError:
- LOGGER.info('No customLable, using nothing...')
-
-
-
- if stack:
- if logy:
- drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir, True , True , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xtitle=xtitle)
- else:
- drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir, False , True , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xtitle=xtitle)
-
- else:
- if logy:
- drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir, True , False , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xtitle=xtitle)
- else:
- drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir, False , False , histos, colors, param.ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, leg2, yields, plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xtitle=xtitle)
-
-
-
-
-#_____________________________________________________________________________________________________________
-def drawStack(name, ylabel, legend, leftText, rightText, formats, directory, logY, stacksig, histos, colors, ana_tex, extralab, scaleSig, customLabel, nsig, nbkg, legend2=None, yields=None, plotStatUnc=False, xmin=-1, xmax=-1, ymin=-1, ymax=-1, xtitle="", ytitle=""):
-
- canvas = ROOT.TCanvas(name, name, 800, 800)
- canvas.SetLogy(logY)
- canvas.SetTicks(1,1)
- canvas.SetLeftMargin(0.14)
- canvas.SetRightMargin(0.08)
-
- sumhistos = histos[0].Clone()
- iterh = iter(histos)
- next(iterh)
-
- unit = 'GeV'
- if 'TeV' in str(histos[0].GetXaxis().GetTitle()):
- unit = 'TeV'
-
- if unit in str(histos[0].GetXaxis().GetTitle()):
- bwidth=sumhistos.GetBinWidth(1)
- if bwidth.is_integer():
- ylabel+=' / {} {}'.format(int(bwidth), unit)
- else:
- ylabel+=' / {:.2f} {}'.format(bwidth, unit)
-
-
- nbins = 1 if not isinstance(xtitle, list) else len(xtitle)
- h_dummy = ROOT.TH1D("h_dummy", "", nbins, 0, nbins)
- if nbins == 1:
- h_dummy.GetXaxis().SetTitle(histos[0].GetXaxis().GetTitle() if xtitle == "" else xtitle)
- h_dummy.GetYaxis().SetTitleOffset(1.95)
- h_dummy.GetXaxis().SetTitleOffset(1.2*h_dummy.GetXaxis().GetTitleOffset())
- else: # for cutflow plots
- for i,label in enumerate(xtitle): h_dummy.GetXaxis().SetBinLabel(i+1, label)
- h_dummy.GetXaxis().LabelsOption("u")
- h_dummy.GetXaxis().SetLabelSize(1.1*h_dummy.GetXaxis().GetLabelSize())
- h_dummy.GetXaxis().SetLabelOffset(1.5*h_dummy.GetXaxis().GetLabelOffset())
- h_dummy.GetYaxis().SetTitle(ylabel)
-
-
- for h in iterh:
- sumhistos.Add(h)
-
- maxh = sumhistos.GetMaximum()
- minh = sumhistos.GetMinimum()
-
- if logY:
- canvas.SetLogy(1)
-
- # define stacked histo
- hStack = ROOT.THStack("hstack", "")
- hStackBkg = ROOT.THStack("hstackbkg", "")
- hStackSig = ROOT.THStack("hstacksig","")
- BgMCHistYieldsDic = {}
-
- # first plot backgrounds (sorted by the yields)
- for i in range(nsig, nsig+nbkg):
- h = histos[i]
- h.SetLineWidth(1)
- h.SetLineColor(ROOT.kBlack)
- h.SetFillColor(colors[i])
- if h.Integral() > 0:
- BgMCHistYieldsDic[h.Integral()] = h
- else:
- BgMCHistYieldsDic[-1*nbkg] = h
- # sort stack by yields (smallest to largest)
- BgMCHistYieldsDic = sortedDictValues(BgMCHistYieldsDic)
- for h in BgMCHistYieldsDic:
- hStack.Add(h)
- hStackBkg.Add(h)
-
- # add the signal histograms
- for i in range(nsig):
- h = histos[i]
- h.SetLineWidth(3)
- h.SetLineColor(colors[i])
- hStack.Add(h)
- hStackSig.Add(h)
-
-
- if xmin != -1 and xmax != -1:
- h_dummy.GetXaxis().SetLimits(xmin, xmax)
-
- h_dummy.Draw("HIST")
- if stacksig:
- hStack.Draw("HIST SAME")
- if plotStatUnc:
- hUnc_sig_bkg = formatStatUncHist(hStack.GetHists(), "sig_bkg") # sig+bkg uncertainty
- hUnc_sig_bkg.Draw("E2 SAME")
- else:
- hStackBkg.Draw("HIST SAME")
- hStackSig.Draw("HIST SAME NOSTACK")
- if plotStatUnc:
- hUnc_bkg = formatStatUncHist(hStackBkg.GetHists(), "bkg_only") # bkg-only uncertainty
- hUnc_bkg.Draw("E2 SAME")
- for sHist in hStackSig.GetHists():
- hUnc_sig = formatStatUncHist([sHist], "sig", 3245) # sigs uncertainty
- hUnc_sig.Draw("E2 SAME")
-
- # x limits
- if xmin == -1:
- h_tmp = hStack.GetStack().Last()
- xmin = h_tmp.GetBinLowEdge(1)
- if xmax == -1:
- h_tmp = hStack.GetStack().Last()
- xmax = h_tmp.GetBinLowEdge(h_tmp.GetNbinsX()+1)
- h_dummy.GetXaxis().SetLimits(xmin, xmax)
-
- # y limits
- def getMinMaxRange(hists, xmin, xmax):
- hTot = hists[0].Clone(name + "_unc")
- for h in hists[1:]: hTot.Add(h)
- vals = []
- for i in range(0, hTot.GetNbinsX()+1):
- if hTot.GetBinLowEdge(i) > xmin or hTot.GetBinLowEdge(i+1) < xmax:
- if hTot.GetBinContent(i) != 0:
- vals.append(hTot.GetBinContent(i))
- if len(vals) == 0:
- return 1e-5, 1
- return min(vals), max(vals)
-
- if stacksig:
- ymin_, ymax_ = getMinMaxRange(hStack.GetHists(), xmin, xmax)
- else:
- yminSig, ymaxSig = getMinMaxRange(hStackSig.GetHists(), xmin, xmax)
- yminBkg, ymaxBkg = getMinMaxRange(hStackBkg.GetHists(), xmin, xmax)
- ymin_ = min(yminSig, yminBkg)
- ymax_ = max(ymaxSig, ymaxBkg)
- if ymin == -1:
- ymin = ymin_*0.1 if logY else 0
- if ymax == -1:
- ymax = ymax_*1000. if logY else 1.4*ymax_
- if ymin <= 0 and logY:
- LOGGER.error('Log scale can\'t start at: %i', ymin)
- sys.exit(3)
- h_dummy.SetMaximum(ymax)
- h_dummy.SetMinimum(ymin)
-
- legend.Draw()
- if legend2 != None:
- legend2.Draw()
-
- Text = ROOT.TLatex()
- Text.SetNDC()
- Text.SetTextAlign(31);
- Text.SetTextSize(0.04)
-
- text = '#it{' + leftText +'}'
- Text.DrawLatex(0.90, 0.94, text)
-
- text = '#it{'+customLabel+'}'
- Text.SetTextAlign(12);
- Text.SetNDC(ROOT.kTRUE)
- Text.SetTextSize(0.04)
- Text.DrawLatex(0.18, 0.85, text)
-
- rightText = re.split(",", rightText)
- text = '#bf{#it{' + rightText[0] +'}}'
-
- Text.SetTextAlign(12);
- Text.SetNDC(ROOT.kTRUE)
- Text.SetTextSize(0.04)
- Text.DrawLatex(0.18, 0.81, text)
-
- rightText[1]=rightText[1].replace(" ","")
- text = '#bf{#it{' + rightText[1] +'}}'
- Text.SetTextSize(0.035)
- Text.DrawLatex(0.18, 0.76, text)
-
- text = '#bf{#it{' + ana_tex +'}}'
- Text.SetTextSize(0.04)
- Text.DrawLatex(0.18, 0.71, text)
-
- text = '#bf{#it{' + extralab +'}}'
- Text.SetTextSize(0.025)
- Text.DrawLatex(0.18, 0.66, text)
-
- text = '#bf{#it{' + 'Signal scale=' + str(scaleSig)+'}}'
- Text.SetTextSize(0.025)
- if scaleSig!=1:Text.DrawLatex(0.18, 0.63, text)
-
- canvas.RedrawAxis()
- canvas.GetFrame().SetBorderSize( 12 )
- canvas.Modified()
- canvas.Update()
-
- if 'AAAyields' in name:
- dummyh=ROOT.TH1F("","",1,0,1)
- dummyh.SetStats(0)
- dummyh.GetXaxis().SetLabelOffset(999)
- dummyh.GetXaxis().SetLabelSize(0)
- dummyh.GetYaxis().SetLabelOffset(999)
- dummyh.GetYaxis().SetLabelSize(0)
- dummyh.Draw("AH")
- legend.Draw()
-
- Text.SetNDC()
- Text.SetTextAlign(31);
- Text.SetTextSize(0.04)
-
- text = '#it{' + leftText +'}'
- Text.DrawLatex(0.90, 0.92, text)
-
- text = '#bf{#it{' + rightText[0] +'}}'
- Text.SetTextAlign(12);
- Text.SetNDC(ROOT.kTRUE)
- Text.SetTextSize(0.04)
- Text.DrawLatex(0.18, 0.83, text)
-
- text = '#bf{#it{' + rightText[1] +'}}'
- Text.SetTextSize(0.035)
- Text.DrawLatex(0.18, 0.78, text)
-
- text = '#bf{#it{' + ana_tex +'}}'
- Text.SetTextSize(0.04)
- Text.DrawLatex(0.18, 0.73, text)
-
- text = '#bf{#it{' + extralab +'}}'
- Text.SetTextSize(0.025)
- Text.DrawLatex(0.18, 0.68, text)
-
- text = '#bf{#it{' + 'Signal scale=' + str(scaleSig)+'}}'
- Text.SetTextSize(0.04)
- Text.DrawLatex(0.18, 0.55, text)
-
- dy=0
- text = '#bf{#it{' + 'Process' +'}}'
- Text.SetTextSize(0.035)
- Text.DrawLatex(0.18, 0.45, text)
-
- text = '#bf{#it{' + 'Yields' +'}}'
- Text.SetTextSize(0.035)
- Text.DrawLatex(0.5, 0.45, text)
-
- text = '#bf{#it{' + 'Raw MC' +'}}'
- Text.SetTextSize(0.035)
- Text.DrawLatex(0.75, 0.45, text)
-
- for y in yields:
- text = '#bf{#it{' + yields[y][0] +'}}'
- Text.SetTextSize(0.035)
- Text.DrawLatex(0.18, 0.4-dy*0.05, text)
-
- stry=str(yields[y][1])
- stry=stry.split('.')[0]
- text = '#bf{#it{' + stry +'}}'
- Text.SetTextSize(0.035)
- Text.DrawLatex(0.5, 0.4-dy*0.05, text)
-
- stry=str(yields[y][2])
- stry=stry.split('.')[0]
- text = '#bf{#it{' + stry +'}}'
- Text.SetTextSize(0.035)
- Text.DrawLatex(0.75, 0.4-dy*0.05, text)
-
-
- dy+=1
- #canvas.Modified()
- #canvas.Update()
-
-
- printCanvas(canvas, name, formats, directory)
-
-
-
-
-#____________________________________________________
-def printCanvas(canvas, name, formats, directory):
-
- if format != "":
- if not os.path.exists(directory) :
- os.system("mkdir -p "+directory)
- for f in formats:
- outFile = os.path.join(directory, name) + "." + f
- canvas.SaveAs(outFile)
-
-
-
-#__________________________________________________________
-def run(paramFile):
- ROOT.gROOT.SetBatch(True)
- ROOT.gErrorIgnoreLevel = ROOT.kWarning
-
- module_path = os.path.abspath(paramFile)
- module_dir = os.path.dirname(module_path)
- base_name = os.path.splitext(ntpath.basename(paramFile))[0]
-
- sys.path.insert(0, module_dir)
- param = importlib.import_module(base_name)
-
- if hasattr(param, "splitLeg"):
- splitLeg = param.splitLeg
- else:
- splitLeg = False
-
- if hasattr(param, "plotStatUnc"):
- plotStatUnc = param.plotStatUnc
- else:
- plotStatUnc = False
-
- if hasattr(param, "hists"):
- for hName,plotCfg in param.hists.items():
- runPlotsHistmaker(hName, param, plotCfg)
- quit()
-
- counter=0
- for iVar,var in enumerate(param.variables):
- for label, sels in param.selections.items():
- for sel in sels:
- hsignal,hbackgrounds=mapHistos(var,label,sel, param, rebin=param.rebin[iVar] if hasattr(param, "rebin") and len(param.rebin) == len(param.variables) else 1)
- runPlots(var+"_"+label,sel,param,hsignal,hbackgrounds,param.extralabel[sel],splitLeg,plotStatUnc)
- if counter==0: runPlots("AAAyields_"+label,sel,param,hsignal,hbackgrounds,param.extralabel[sel],splitLeg,plotStatUnc)
- counter+=1
diff --git a/python/do_plots.py b/python/do_plots.py
new file mode 100644
index 0000000000..927d2280c0
--- /dev/null
+++ b/python/do_plots.py
@@ -0,0 +1,807 @@
+#!/usr/bin/env python
+'''
+Create plots out of the histograms produced in previous stages
+'''
+import sys
+import os
+import os.path
+import ntpath
+import importlib
+import copy
+import re
+import logging
+import ROOT # type: ignore
+
+ROOT.gROOT.SetBatch(True)
+ROOT.gStyle.SetOptStat(0)
+ROOT.gStyle.SetOptTitle(0)
+
+
+LOGGER = logging.getLogger('FCCAnalyses.plot')
+
+
+# _____________________________________________________________________________
+def removekey(d: dict, key: str) -> dict:
+ '''
+ Remove dictionary element.
+ '''
+ r = dict(d)
+ del r[key]
+ return r
+
+
+def sorted_dict_values(dic: dict) -> list:
+ ''''
+ Sort values in the dictionary.
+ '''
+ keys = sorted(dic)
+ return [dic[key] for key in keys]
+
+
+def formatStatUncHist(hists, name, hstyle=3254):
+ hist_tot = hists[0].Clone(name + "_unc")
+ for h in hists[1:]:
+ hist_tot.Add(h)
+ hist_tot.SetFillColor(ROOT.kBlack)
+ hist_tot.SetMarkerSize(0)
+ hist_tot.SetLineWidth(0)
+ hist_tot.SetFillStyle(hstyle)
+ return hist_tot
+
+
+# _____________________________________________________________________________
+def mapHistos(var, label, sel, param, rebin):
+ LOGGER.info('Run plots for var:%s label:%s selection:%s',
+ var, label, sel)
+ signal = param.plots[label]['signal']
+ backgrounds = param.plots[label]['backgrounds']
+
+ hsignal = {}
+ for s in signal:
+ hsignal[s] = []
+ for f in signal[s]:
+ fin = param.inputDir+f+'_'+sel+'_histo.root'
+ if not os.path.isfile(fin):
+ LOGGER.info('File "%s" not found!\nSkipping it...', fin)
+ continue
+
+ with ROOT.TFile(fin, 'READ') as tf:
+ h = tf.Get(var)
+ hh = copy.deepcopy(h)
+ hh.SetDirectory(0)
+ scaleSig = 1.
+ try:
+ scaleSig = param.scaleSig
+ except AttributeError:
+ LOGGER.debug('No scale signal, using 1.')
+ param.scaleSig = scaleSig
+ LOGGER.info('ScaleSig: %g', scaleSig)
+ hh.Scale(param.intLumi*scaleSig)
+ hh.Rebin(rebin)
+
+ if len(hsignal[s]) == 0:
+ hsignal[s].append(hh)
+ else:
+ hh.Add(hsignal[s][0])
+ hsignal[s][0] = hh
+
+ hbackgrounds = {}
+ for b in backgrounds:
+ hbackgrounds[b] = []
+ for f in backgrounds[b]:
+ fin = param.inputDir+f+'_'+sel+'_histo.root'
+ if not os.path.isfile(fin):
+ LOGGER.info('File "%s" not found!\nSkipping it...', fin)
+ continue
+
+ with ROOT.TFile(fin) as tf:
+ h = tf.Get(var)
+ hh = copy.deepcopy(h)
+ hh.SetDirectory(0)
+ hh.Scale(param.intLumi)
+ hh.Rebin(rebin)
+ if len(hbackgrounds[b]) == 0:
+ hbackgrounds[b].append(hh)
+ else:
+ hh.Add(hbackgrounds[b][0])
+ hbackgrounds[b][0] = hh
+
+ for s in hsignal:
+ if len(hsignal[s]) == 0:
+ hsignal = removekey(hsignal, s)
+
+ for b in hbackgrounds:
+ if len(hbackgrounds[b]) == 0:
+ hbackgrounds = removekey(hbackgrounds, b)
+
+ if not hsignal:
+ LOGGER.error('No signal input files found!\nAborting...')
+ sys.exit(3)
+
+ return hsignal, hbackgrounds
+
+
+# _____________________________________________________________________________
+def mapHistosFromHistmaker(hName, param, plotCfg):
+ rebin = plotCfg['rebin'] if 'rebin' in plotCfg else 1
+ LOGGER.info('Get histograms for %s', hName)
+ signal = param.procs['signal']
+ backgrounds = param.procs['backgrounds']
+ scaleSig = plotCfg['scaleSig'] if 'scaleSig' in plotCfg else 1
+
+ hsignal = {}
+ for s in signal:
+ hsignal[s] = []
+ for f in signal[s]:
+ fin = f"{param.inputDir}/{f}.root"
+ if not os.path.isfile(fin):
+ LOGGER.info('File "%s" not found!\nSkipping it...', fin)
+ continue
+
+ with ROOT.TFile(fin) as tf:
+ h = tf.Get(hName)
+ hh = copy.deepcopy(h)
+ hh.SetDirectory(0)
+ LOGGER.info('ScaleSig: %g', scaleSig)
+ hh.Scale(param.intLumi*scaleSig)
+ hh.Rebin(rebin)
+ if len(hsignal[s]) == 0:
+ hsignal[s].append(hh)
+ else:
+ hh.Add(hsignal[s][0])
+ hsignal[s][0] = hh
+
+ hbackgrounds = {}
+ for b in backgrounds:
+ hbackgrounds[b] = []
+ for f in backgrounds[b]:
+ fin = f"{param.inputDir}/{f}.root"
+ if not os.path.isfile(fin):
+ LOGGER.info('File "%s" not found!\nSkipping it...', fin)
+ continue
+
+ with ROOT.TFile(fin) as tf:
+ h = tf.Get(hName)
+ hh = copy.deepcopy(h)
+ hh.SetDirectory(0)
+ hh.Scale(param.intLumi)
+ hh.Rebin(rebin)
+ if len(hbackgrounds[b]) == 0:
+ hbackgrounds[b].append(hh)
+ else:
+ hh.Add(hbackgrounds[b][0])
+ hbackgrounds[b][0] = hh
+
+ for s in hsignal:
+ if len(hsignal[s]) == 0:
+ hsignal = removekey(hsignal, s)
+
+ for b in hbackgrounds:
+ if len(hbackgrounds[b]) == 0:
+ hbackgrounds = removekey(hbackgrounds, b)
+
+ if not hsignal:
+ LOGGER.error('No signal input files found!\nAborting...')
+ sys.exit(3)
+
+ return hsignal, hbackgrounds
+
+
+# _____________________________________________________________________________
+def runPlots(var, sel, param, hsignal, hbackgrounds, extralab, splitLeg,
+ plotStatUnc):
+
+ # Below are settings for separate signal and background legends
+ if splitLeg:
+ legsize = 0.04*(len(hsignal))
+ legsize2 = 0.04*(len(hbackgrounds))
+ legCoord = [0.15, 0.60 - legsize, 0.50, 0.62]
+ leg2 = ROOT.TLegend(0.60, 0.60 - legsize2, 0.88, 0.62)
+ leg2.SetFillColor(0)
+ leg2.SetFillStyle(0)
+ leg2.SetLineColor(0)
+ leg2.SetShadowColor(10)
+ leg2.SetTextSize(0.035)
+ leg2.SetTextFont(42)
+ else:
+ legsize = 0.04*(len(hbackgrounds)+len(hsignal))
+ legCoord = [0.68, 0.86-legsize, 0.96, 0.88]
+ try:
+ legCoord = param.legendCoord
+ except AttributeError:
+ LOGGER.debug('No legCoord, using default one...')
+ legCoord = [0.68, 0.86-legsize, 0.96, 0.88]
+ leg2 = None
+
+ leg = ROOT.TLegend(legCoord[0], legCoord[1], legCoord[2], legCoord[3])
+ leg.SetFillColor(0)
+ leg.SetFillStyle(0)
+ leg.SetLineColor(0)
+ leg.SetShadowColor(10)
+ leg.SetTextSize(0.035)
+ leg.SetTextFont(42)
+
+ for b in hbackgrounds:
+ if splitLeg:
+ leg2.AddEntry(hbackgrounds[b][0], param.legend[b], "f")
+ else:
+ leg.AddEntry(hbackgrounds[b][0], param.legend[b], "f")
+ for s in hsignal:
+ leg.AddEntry(hsignal[s][0], param.legend[s], "l")
+
+ yields = {}
+ for s in hsignal:
+ yields[s] = [param.legend[s],
+ hsignal[s][0].Integral(0, -1),
+ hsignal[s][0].GetEntries()]
+ for b in hbackgrounds:
+ yields[b] = [param.legend[b],
+ hbackgrounds[b][0].Integral(0, -1),
+ hbackgrounds[b][0].GetEntries()]
+
+ histos = []
+ colors = []
+
+ nsig = len(hsignal)
+ nbkg = len(hbackgrounds)
+
+ for s in hsignal:
+ histos.append(hsignal[s][0])
+ colors.append(param.colors[s])
+
+ for b in hbackgrounds:
+ histos.append(hbackgrounds[b][0])
+ colors.append(param.colors[b])
+
+ intLumiab = param.intLumi/1e+06
+ intLumi = f'L = {intLumiab:.0f} ab^{{-1}}'
+ if hasattr(param, "intLumiLabel"):
+ intLumi = getattr(param, "intLumiLabel")
+
+ lt = 'FCCAnalyses: FCC-hh Simulation (Delphes)'
+ rt = f'#sqrt{{s}} = {param.energy:.1f} TeV, L = {intLumi}'
+
+ if 'ee' in param.collider:
+ lt = 'FCCAnalyses: FCC-ee Simulation (Delphes)'
+ rt = f'#sqrt{{s}} = {param.energy:.1f} GeV, {intLumi}'
+
+ customLabel = ""
+ try:
+ customLabel = param.customLabel
+ except AttributeError:
+ LOGGER.debug('No customLable, using nothing...')
+
+ scaleSig = 1.
+ try:
+ scaleSig = param.scaleSig
+ except AttributeError:
+ LOGGER.debug('No scale signal, using 1.')
+ param.scaleSig = scaleSig
+
+ if 'AAAyields' in var:
+ drawStack(var, 'events', leg, lt, rt, param.formats,
+ param.outdir+"/"+sel, False, True, histos, colors,
+ param.ana_tex, extralab, scaleSig, customLabel, nsig,
+ nbkg, leg2, yields, plotStatUnc)
+ return
+
+ if 'stack' in param.stacksig:
+ if 'lin' in param.yaxis:
+ drawStack(var+"_stack_lin", 'events', leg, lt, rt, param.formats,
+ param.outdir+"/"+sel, False, True, histos, colors,
+ param.ana_tex, extralab, scaleSig, customLabel, nsig,
+ nbkg, leg2, yields, plotStatUnc)
+ if 'log' in param.yaxis:
+ drawStack(var+"_stack_log", 'events', leg, lt, rt, param.formats,
+ param.outdir+"/"+sel, True, True, histos, colors,
+ param.ana_tex, extralab, scaleSig, customLabel, nsig,
+ nbkg, leg2, yields, plotStatUnc)
+ if 'lin' not in param.yaxis and 'log' not in param.yaxis:
+ LOGGER.info('Unrecognised option in formats, should be '
+ '[\'lin\',\'log\']')
+
+ if 'nostack' in param.stacksig:
+ if 'lin' in param.yaxis:
+ drawStack(var+"_nostack_lin", 'events', leg, lt, rt, param.formats,
+ param.outdir+"/"+sel, False, False, histos, colors,
+ param.ana_tex, extralab, scaleSig, customLabel, nsig,
+ nbkg, leg2, yields, plotStatUnc)
+ if 'log' in param.yaxis:
+ drawStack(var+"_nostack_log", 'events', leg, lt, rt, param.formats,
+ param.outdir+"/"+sel, True, False, histos, colors,
+ param.ana_tex, extralab, scaleSig, customLabel, nsig,
+ nbkg, leg2, yields, plotStatUnc)
+ if 'lin' not in param.yaxis and 'log' not in param.yaxis:
+ LOGGER.info('Unrecognised option in formats, should be '
+ '[\'lin\',\'log\']')
+ if 'stack' not in param.stacksig and 'nostack' not in param.stacksig:
+ LOGGER.info('Unrecognised option in stacksig, should be '
+ '[\'stack\',\'nostack\']')
+
+
+# _____________________________________________________________________________
+def runPlotsHistmaker(hName, param, plotCfg):
+
+ output = plotCfg['output']
+ hsignal, hbackgrounds = mapHistosFromHistmaker(hName, param, plotCfg)
+
+ if hasattr(param, "splitLeg"):
+ splitLeg = param.splitLeg
+ else:
+ splitLeg = False
+
+ if hasattr(param, "plotStatUnc"):
+ plotStatUnc = param.plotStatUnc
+ else:
+ plotStatUnc = False
+
+ # Below are settings for separate signal and background legends
+ if splitLeg:
+ legsize = 0.04*(len(hsignal))
+ legsize2 = 0.04*(len(hbackgrounds))
+ legCoord = [0.15, 0.60 - legsize, 0.50, 0.62]
+ leg2 = ROOT.TLegend(0.60, 0.60 - legsize2, 0.88, 0.62)
+ leg2.SetFillColor(0)
+ leg2.SetFillStyle(0)
+ leg2.SetLineColor(0)
+ leg2.SetShadowColor(10)
+ leg2.SetTextSize(0.035)
+ leg2.SetTextFont(42)
+ else:
+ legsize = 0.04*(len(hbackgrounds)+len(hsignal))
+ legCoord = [0.68, 0.86-legsize, 0.96, 0.88]
+ try:
+ legCoord = param.legendCoord
+ except AttributeError:
+ LOGGER.debug('No legCoord, using default one...')
+ legCoord = [0.68, 0.86-legsize, 0.96, 0.88]
+ leg2 = None
+
+ leg = ROOT.TLegend(legCoord[0], legCoord[1], legCoord[2], legCoord[3])
+ leg.SetFillColor(0)
+ leg.SetFillStyle(0)
+ leg.SetLineColor(0)
+ leg.SetShadowColor(10)
+ leg.SetTextSize(0.035)
+ leg.SetTextFont(42)
+
+ for b in hbackgrounds:
+ if splitLeg:
+ leg2.AddEntry(hbackgrounds[b][0], param.legend[b], "f")
+ else:
+ leg.AddEntry(hbackgrounds[b][0], param.legend[b], "f")
+ for s in hsignal:
+ leg.AddEntry(hsignal[s][0], param.legend[s], "l")
+
+ yields = {}
+ for s in hsignal:
+ yields[s] = [param.legend[s],
+ hsignal[s][0].Integral(0, -1),
+ hsignal[s][0].GetEntries()]
+ for b in hbackgrounds:
+ yields[b] = [param.legend[b],
+ hbackgrounds[b][0].Integral(0, -1),
+ hbackgrounds[b][0].GetEntries()]
+
+ histos = []
+ colors = []
+
+ nsig = len(hsignal)
+ nbkg = len(hbackgrounds)
+
+ for s in hsignal:
+ histos.append(hsignal[s][0])
+ colors.append(param.colors[s])
+
+ for b in hbackgrounds:
+ histos.append(hbackgrounds[b][0])
+ colors.append(param.colors[b])
+
+ xtitle = plotCfg['xtitle'] if 'xtitle' in plotCfg else ""
+ ytitle = plotCfg['ytitle'] if 'ytitle' in plotCfg else "Events"
+ xmin = plotCfg['xmin'] if 'xmin' in plotCfg else -1
+ xmax = plotCfg['xmax'] if 'xmax' in plotCfg else -1
+ ymin = plotCfg['ymin'] if 'ymin' in plotCfg else -1
+ ymax = plotCfg['ymax'] if 'ymax' in plotCfg else -1
+ stack = plotCfg['stack'] if 'stack' in plotCfg else False
+ logy = plotCfg['logy'] if 'logy' in plotCfg else False
+ extralab = plotCfg['extralab'] if 'extralab' in plotCfg else ""
+ scaleSig = plotCfg['scaleSig'] if 'scaleSig' in plotCfg else 1
+
+ intLumiab = param.intLumi/1e+06
+ intLumi = f'L = {intLumiab:.0f} ab^{{-1}}'
+ if hasattr(param, "intLumiLabel"):
+ intLumi = getattr(param, "intLumiLabel")
+
+ lt = 'FCCAnalyses: FCC-hh Simulation (Delphes)'
+ rt = f'#sqrt{{s}} = {param.energy:.1f} TeV, L = {intLumi}'
+
+ if 'ee' in param.collider:
+ lt = 'FCCAnalyses: FCC-ee Simulation (Delphes)'
+ rt = f'#sqrt{{s}} = {param.energy:.1f} GeV, {intLumi}'
+
+ customLabel = ""
+ try:
+ customLabel = param.customLabel
+ except AttributeError:
+ LOGGER.debug('No customLable, using nothing...')
+
+ if stack:
+ if logy:
+ drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir,
+ True, True, histos, colors, param.ana_tex, extralab,
+ scaleSig, customLabel, nsig, nbkg, leg2, yields,
+ plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
+ xtitle=xtitle)
+ else:
+ drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir,
+ False, True, histos, colors, param.ana_tex, extralab,
+ scaleSig, customLabel, nsig, nbkg, leg2, yields,
+ plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
+ xtitle=xtitle)
+
+ else:
+ if logy:
+ drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir,
+ True, False, histos, colors, param.ana_tex, extralab,
+ scaleSig, customLabel, nsig, nbkg, leg2, yields,
+ plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
+ xtitle=xtitle)
+ else:
+ drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir,
+ False, False, histos, colors, param.ana_tex, extralab,
+ scaleSig, customLabel, nsig, nbkg, leg2, yields,
+ plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax,
+ xtitle=xtitle)
+
+
+# _____________________________________________________________________________
+def drawStack(name, ylabel, legend, leftText, rightText, formats, directory,
+ logY, stacksig, histos, colors, ana_tex, extralab, scaleSig,
+ customLabel, nsig, nbkg, legend2=None, yields=None,
+ plotStatUnc=False, xmin=-1, xmax=-1, ymin=-1, ymax=-1,
+ xtitle=""):
+
+ canvas = ROOT.TCanvas(name, name, 800, 800)
+ canvas.SetLogy(logY)
+ canvas.SetTicks(1, 1)
+ canvas.SetLeftMargin(0.14)
+ canvas.SetRightMargin(0.08)
+
+ sumhistos = histos[0].Clone()
+ iterh = iter(histos)
+ next(iterh)
+
+ unit = 'GeV'
+ if 'TeV' in str(histos[0].GetXaxis().GetTitle()):
+ unit = 'TeV'
+
+ if unit in str(histos[0].GetXaxis().GetTitle()):
+ bwidth = sumhistos.GetBinWidth(1)
+ if bwidth.is_integer():
+ ylabel += f' / {bwidth} {unit}'
+ else:
+ ylabel += f' / {bwidth:.2f} {unit}'
+
+ nbins = 1 if not isinstance(xtitle, list) else len(xtitle)
+ h_dummy = ROOT.TH1D("h_dummy", "", nbins, 0, nbins)
+ if nbins == 1:
+ h_dummy.GetXaxis().SetTitle(
+ histos[0].GetXaxis().GetTitle() if xtitle == "" else xtitle)
+ h_dummy.GetYaxis().SetTitleOffset(1.95)
+ h_dummy.GetXaxis().SetTitleOffset(
+ 1.2*h_dummy.GetXaxis().GetTitleOffset())
+ else: # for cutflow plots
+ for i, label in enumerate(xtitle):
+ h_dummy.GetXaxis().SetBinLabel(i+1, label)
+ h_dummy.GetXaxis().LabelsOption("u")
+ h_dummy.GetXaxis().SetLabelSize(1.1*h_dummy.GetXaxis().GetLabelSize())
+ h_dummy.GetXaxis().SetLabelOffset(
+ 1.5*h_dummy.GetXaxis().GetLabelOffset())
+ h_dummy.GetYaxis().SetTitle(ylabel)
+
+ for h in iterh:
+ sumhistos.Add(h)
+
+ if logY:
+ canvas.SetLogy(1)
+
+ # define stacked histo
+ hStack = ROOT.THStack("hstack", "")
+ hStackBkg = ROOT.THStack("hstackbkg", "")
+ hStackSig = ROOT.THStack("hstacksig", "")
+ BgMCHistYieldsDic = {}
+
+ # first plot backgrounds (sorted by the yields)
+ for i in range(nsig, nsig+nbkg):
+ h = histos[i]
+ h.SetLineWidth(1)
+ h.SetLineColor(ROOT.kBlack)
+ h.SetFillColor(colors[i])
+ if h.Integral() > 0:
+ BgMCHistYieldsDic[h.Integral()] = h
+ else:
+ BgMCHistYieldsDic[-1*nbkg] = h
+ # sort stack by yields (smallest to largest)
+ BgMCHistYieldsDic = sorted_dict_values(BgMCHistYieldsDic)
+ for h in BgMCHistYieldsDic:
+ hStack.Add(h)
+ hStackBkg.Add(h)
+
+ # add the signal histograms
+ for i in range(nsig):
+ h = histos[i]
+ h.SetLineWidth(3)
+ h.SetLineColor(colors[i])
+ hStack.Add(h)
+ hStackSig.Add(h)
+
+ if xmin != -1 and xmax != -1:
+ h_dummy.GetXaxis().SetLimits(xmin, xmax)
+
+ h_dummy.Draw("HIST")
+ if stacksig:
+ hStack.Draw("HIST SAME")
+ if plotStatUnc:
+ # sig+bkg uncertainty
+ hUnc_sig_bkg = formatStatUncHist(hStack.GetHists(), "sig_bkg")
+ hUnc_sig_bkg.Draw("E2 SAME")
+ else:
+ hStackBkg.Draw("HIST SAME")
+ hStackSig.Draw("HIST SAME NOSTACK")
+ if plotStatUnc:
+ # bkg-only uncertainty
+ hUnc_bkg = formatStatUncHist(hStackBkg.GetHists(), "bkg_only")
+ hUnc_bkg.Draw("E2 SAME")
+ for sHist in hStackSig.GetHists():
+ # sigs uncertainty
+ hUnc_sig = formatStatUncHist([sHist], "sig", 3245)
+ hUnc_sig.Draw("E2 SAME")
+
+ # x limits
+ if xmin == -1:
+ h_tmp = hStack.GetStack().Last()
+ xmin = h_tmp.GetBinLowEdge(1)
+ if xmax == -1:
+ h_tmp = hStack.GetStack().Last()
+ xmax = h_tmp.GetBinLowEdge(h_tmp.GetNbinsX()+1)
+ h_dummy.GetXaxis().SetLimits(xmin, xmax)
+
+ # y limits
+ def get_minmax_range(hists, xmin, xmax):
+ hist_tot = hists[0].Clone(name + "_unc")
+ for h in hists[1:]:
+ hist_tot.Add(h)
+ vals = []
+ for i in range(0, hist_tot.GetNbinsX()+1):
+ if hist_tot.GetBinLowEdge(i) > xmin or \
+ hist_tot.GetBinLowEdge(i+1) < xmax:
+ if hist_tot.GetBinContent(i) != 0:
+ vals.append(hist_tot.GetBinContent(i))
+ if len(vals) == 0:
+ return 1e-5, 1
+ return min(vals), max(vals)
+
+ if stacksig:
+ ymin_, ymax_ = get_minmax_range(hStack.GetHists(), xmin, xmax)
+ else:
+ ymin_sig, ymax_sig = get_minmax_range(hStackSig.GetHists(), xmin, xmax)
+ ymin_bkg, ymax_bkg = get_minmax_range(hStackBkg.GetHists(), xmin, xmax)
+ ymin_ = min(ymin_sig, ymin_bkg)
+ ymax_ = max(ymax_sig, ymax_bkg)
+ if ymin == -1:
+ ymin = ymin_*0.1 if logY else 0
+ if ymax == -1:
+ ymax = ymax_*1000. if logY else 1.4*ymax_
+ if ymin <= 0 and logY:
+ LOGGER.error('Log scale can\'t start at: %i', ymin)
+ sys.exit(3)
+ h_dummy.SetMaximum(ymax)
+ h_dummy.SetMinimum(ymin)
+
+ legend.Draw()
+ if legend2 is not None:
+ legend2.Draw()
+
+ latex = ROOT.TLatex()
+ latex.SetNDC()
+ latex.SetTextAlign(31)
+ latex.SetTextSize(0.04)
+
+ text = '#it{' + leftText + '}'
+ latex.DrawLatex(0.90, 0.94, text)
+
+ text = '#it{'+customLabel+'}'
+ latex.SetTextAlign(12)
+ latex.SetNDC(ROOT.kTRUE)
+ latex.SetTextSize(0.04)
+ latex.DrawLatex(0.18, 0.85, text)
+
+ rightText = re.split(",", rightText)
+ text = '#bf{#it{' + rightText[0] + '}}'
+
+ latex.SetTextAlign(12)
+ latex.SetNDC(ROOT.kTRUE)
+ latex.SetTextSize(0.04)
+ latex.DrawLatex(0.18, 0.81, text)
+
+ rightText[1] = rightText[1].replace(" ", "")
+ text = '#bf{#it{' + rightText[1] + '}}'
+ latex.SetTextSize(0.035)
+ latex.DrawLatex(0.18, 0.76, text)
+
+ text = '#bf{#it{' + ana_tex + '}}'
+ latex.SetTextSize(0.04)
+ latex.DrawLatex(0.18, 0.71, text)
+
+ text = '#bf{#it{' + extralab + '}}'
+ latex.SetTextSize(0.025)
+ latex.DrawLatex(0.18, 0.66, text)
+
+ text = '#bf{#it{' + 'Signal scale=' + str(scaleSig)+'}}'
+ latex.SetTextSize(0.025)
+ if scaleSig != 1:
+ latex.DrawLatex(0.18, 0.63, text)
+
+ canvas.RedrawAxis()
+ canvas.GetFrame().SetBorderSize(12)
+ canvas.Modified()
+ canvas.Update()
+
+ if 'AAAyields' in name:
+ dummyh = ROOT.TH1F("", "", 1, 0, 1)
+ dummyh.SetStats(0)
+ dummyh.GetXaxis().SetLabelOffset(999)
+ dummyh.GetXaxis().SetLabelSize(0)
+ dummyh.GetYaxis().SetLabelOffset(999)
+ dummyh.GetYaxis().SetLabelSize(0)
+ dummyh.Draw("AH")
+ legend.Draw()
+
+ latex.SetNDC()
+ latex.SetTextAlign(31)
+ latex.SetTextSize(0.04)
+
+ text = '#it{' + leftText + '}'
+ latex.DrawLatex(0.90, 0.92, text)
+
+ text = '#bf{#it{' + rightText[0] + '}}'
+ latex.SetTextAlign(12)
+ latex.SetNDC(ROOT.kTRUE)
+ latex.SetTextSize(0.04)
+ latex.DrawLatex(0.18, 0.83, text)
+
+ text = '#bf{#it{' + rightText[1] + '}}'
+ latex.SetTextSize(0.035)
+ latex.DrawLatex(0.18, 0.78, text)
+
+ text = '#bf{#it{' + ana_tex + '}}'
+ latex.SetTextSize(0.04)
+ latex.DrawLatex(0.18, 0.73, text)
+
+ text = '#bf{#it{' + extralab + '}}'
+ latex.SetTextSize(0.025)
+ latex.DrawLatex(0.18, 0.68, text)
+
+ text = '#bf{#it{' + 'Signal scale=' + str(scaleSig) + '}}'
+ latex.SetTextSize(0.04)
+ latex.DrawLatex(0.18, 0.55, text)
+
+ dy = 0
+ text = '#bf{#it{' + 'Process' + '}}'
+ latex.SetTextSize(0.035)
+ latex.DrawLatex(0.18, 0.45, text)
+
+ text = '#bf{#it{' + 'Yields' + '}}'
+ latex.SetTextSize(0.035)
+ latex.DrawLatex(0.5, 0.45, text)
+
+ text = '#bf{#it{' + 'Raw MC' + '}}'
+ latex.SetTextSize(0.035)
+ latex.DrawLatex(0.75, 0.45, text)
+
+ for y in yields:
+ text = '#bf{#it{' + yields[y][0] + '}}'
+ latex.SetTextSize(0.035)
+ latex.DrawLatex(0.18, 0.4-dy*0.05, text)
+
+ stry = str(yields[y][1])
+ stry = stry.split('.', maxsplit=1)[0]
+ text = '#bf{#it{' + stry + '}}'
+ latex.SetTextSize(0.035)
+ latex.DrawLatex(0.5, 0.4-dy*0.05, text)
+
+ stry = str(yields[y][2])
+ stry = stry.split('.', maxsplit=1)[0]
+ text = '#bf{#it{' + stry + '}}'
+ latex.SetTextSize(0.035)
+ latex.DrawLatex(0.75, 0.4-dy*0.05, text)
+
+ dy += 1
+ # canvas.Modified()
+ # canvas.Update()
+
+ print_canvas(canvas, name, formats, directory)
+
+
+# _____________________________________________________________________________
+def print_canvas(canvas, name, formats, directory):
+ '''
+ Saving canvas in multiple formats.
+ '''
+
+ if not formats:
+ LOGGER.error('No output formats specified!\nAborting...')
+ sys.exit(3)
+
+ if not os.path.exists(directory):
+ os.system("mkdir -p " + directory)
+
+ for f in formats:
+ out_file = os.path.join(directory, name) + "." + f
+ canvas.SaveAs(out_file)
+
+
+# _____________________________________________________________________________
+def run(script_path):
+ '''
+ Run over all the plots.
+ '''
+ ROOT.gROOT.SetBatch(True)
+ ROOT.gErrorIgnoreLevel = ROOT.kWarning
+
+ module_path = os.path.abspath(script_path)
+ module_dir = os.path.dirname(module_path)
+ base_name = os.path.splitext(ntpath.basename(script_path))[0]
+
+ sys.path.insert(0, module_dir)
+ param = importlib.import_module(base_name)
+
+ split_leg = False
+ if hasattr(param, "splitLeg"):
+ split_leg = param.splitLeg
+
+ plot_stat_unc = False
+ if hasattr(param, "plotStatUnc"):
+ plot_stat_unc = param.plotStatUnc
+
+ if hasattr(param, "hists"):
+ for hist_name, plot_cfg in param.hists.items():
+ runPlotsHistmaker(hist_name, param, plot_cfg)
+ sys.exit()
+
+ counter = 0
+ for var_index, var in enumerate(param.variables):
+ for label, sels in param.selections.items():
+ for sel in sels:
+ rebin_tmp = 1
+ if (hasattr(param, "rebin") and
+ len(param.rebin) == len(param.variables)):
+ rebin_tmp = param.rebin[var_index]
+ hsignal, hbackgrounds = mapHistos(var, label, sel, param,
+ rebin=rebin_tmp)
+ runPlots(var+"_"+label, sel, param, hsignal, hbackgrounds,
+ param.extralabel[sel], split_leg, plot_stat_unc)
+ if counter == 0:
+ runPlots("AAAyields_"+label, sel, param, hsignal,
+ hbackgrounds, param.extralabel[sel], split_leg,
+ plot_stat_unc)
+ counter += 1
+
+
+def do_plots(parser):
+ '''
+ Run plots generation
+ '''
+
+ args, _ = parser.parse_known_args()
+
+ if args.command != 'plots':
+ LOGGER.error('Wrong sub-command!\nAborting...')
+
+ if not os.path.isfile(args.script_path):
+ LOGGER.error('Plots script "%s" not found!\nAborting...',
+ args.script_path)
+ sys.exit(3)
+
+ run(args.script_path)
diff --git a/python/init_analysis.py b/python/init_analysis.py
new file mode 100644
index 0000000000..14794164d5
--- /dev/null
+++ b/python/init_analysis.py
@@ -0,0 +1,121 @@
+'''
+Initialize local analysis
+'''
+
+import logging
+import os
+import sys
+from subprocess import getoutput
+
+
+LOGGER = logging.getLogger('FCCAnalyses.init_analysis')
+
+
+def find_author():
+ '''
+ Retrieve the author of the package from the git confioguration
+ '''
+ return getoutput('git config --global --get user.name') + \
+ ' <' + getoutput('git config --global --get user.email') + '>'
+
+
+def replace_all(_input: str, repl: dict) -> str:
+ '''
+ Replace all elements of repl in the provided string.
+ '''
+ output = _input
+ for a, b in repl.items():
+ output = output.replace(a, b)
+ return output
+
+
+def create_file(dest_path: str,
+ template_path: str,
+ replacements: dict) -> bool:
+ '''
+ Create file from a template.
+ TODO: exceptions
+ '''
+ with open(template_path, 'r', encoding='utf-8') as template_file:
+ template: str = template_file.read()
+
+ with open(dest_path, 'w', encoding='utf-8') as dest_file:
+ dest_file.write(replace_all(template, replacements))
+
+ return True
+
+
+def setup_analysis(package: str,
+ author: str = '',
+ description: str = '',
+ name: str = '',
+ standalone: bool = False,
+ output_dir: str = ''):
+ '''
+ Generates the analysis package.
+ '''
+ if not author:
+ author = find_author()
+ if not description:
+ description = '[...]'
+ elif '\n' in description:
+ raise RuntimeError('Multiline description is not supported. Please '
+ 'edit the output analysis header file directly.')
+ fccanalyses_path = getoutput('git rev-parse --show-toplevel')
+ replacement_dict = {
+ '__pkgname__': package,
+ '__pkgdesc__': description,
+ '__name__': name,
+ '__author__': author,
+ '__fccpath__': fccanalyses_path
+ }
+
+ if not output_dir:
+ path = f'{fccanalyses_path}/case-studies/{package}'
+ else:
+ path = output_dir
+
+ for p in [path, f'{path}/src', f'{path}/include', f'{path}/scripts']:
+ try:
+ os.mkdir(p)
+ except FileExistsError:
+ LOGGER.warning('FCCAnalysis package "%s" already exists.', package)
+ try:
+ tmpl_dir = os.path.join(fccanalyses_path, 'templates')
+ create_file(f'{path}/src/classes.h', f'{tmpl_dir}/classes.h',
+ replacement_dict)
+ create_file(f'{path}/src/classes_def.xml',
+ f'{tmpl_dir}/classes_def.xml',
+ replacement_dict)
+ create_file(f'{path}/src/{name}.cc', f'{tmpl_dir}/Package.cc',
+ replacement_dict)
+ create_file(f'{path}/include/{name}.h', f'{tmpl_dir}/Package.h',
+ replacement_dict)
+ create_file(f'{path}/scripts/analysis_cfg.py',
+ f'{tmpl_dir}/analysis_cfg.py',
+ replacement_dict)
+ if standalone:
+ create_file(f'{path}/CMakeLists.txt', f'{tmpl_dir}/CMakeLists.txt',
+ replacement_dict)
+ except OSError as error:
+ LOGGER.error('FCCAnalysis package "%s" creation error:\n%s',
+ package, error)
+ sys.exit(3)
+
+
+def init_analysis(mainparser):
+ '''
+ Initialize analysis package
+ '''
+
+ args, _ = mainparser.parse_known_args()
+
+ if args.command != 'init':
+ LOGGER.error('Wrong sub-command!\nAborting...')
+
+ setup_analysis(package=args.package,
+ name=args.name,
+ author=args.author,
+ description=args.description,
+ standalone=args.standalone,
+ output_dir=args.output_dir)
diff --git a/python/parsers.py b/python/parsers.py
new file mode 100644
index 0000000000..ce10776ef2
--- /dev/null
+++ b/python/parsers.py
@@ -0,0 +1,174 @@
+'''
+Parsers for the fccanalysis sub-commands
+'''
+
+import argparse
+
+
+def setup_init_parser(parser):
+ '''
+ Arguments for the init sub-command
+ '''
+ init_args = parser.add_argument_group('Init arguments')
+
+ init_args.add_argument('package',
+ help='name of the analysis package to be built')
+ init_args.add_argument('--name',
+ default='DummyAnalysis',
+ help='name of the main analysis utility')
+ init_args.add_argument(
+ '--author',
+ help="author's \"name \" (will use git-config if not "
+ "specified)")
+ init_args.add_argument('--description',
+ help='analysis package description')
+ init_args.add_argument(
+ '--standalone',
+ action='store_true',
+ default=False,
+ help='also add CMake directive to build standalone package')
+ init_args.add_argument(
+ '--output-dir',
+ help='output directory where the analysis package will be written')
+
+
+def setup_build_parser(parser):
+ '''
+ Arguments for the build sub-command
+ '''
+ build_args = parser.add_argument_group('Build arguments')
+ build_args.add_argument('-c', '--clean-build',
+ action='store_true',
+ default=False,
+ help='do a clean build')
+ build_args.add_argument(
+ '-j', '--build-threads',
+ type=int,
+ default=1,
+ help='number of threads when building (equivalent to `make -j`)'
+ )
+
+
+def setup_test_parser(parser):
+ '''
+ Arguments for the test sub-command
+ '''
+ test_args = parser.add_argument_group('Test arguments')
+ test_args.add_argument(
+ '-R', '--tests-regex',
+ type=str,
+ help='Run tests matching regular expression (e.g. run only unit tests '
+ 'with "^UT")'
+ )
+ test_args.add_argument(
+ '-E', '--exclude-regex',
+ type=str,
+ help='Exclude tests matching regular expression'
+ )
+ test_args.add_argument(
+ '-j', '--parallel',
+ type=int,
+ default=-1,
+ help='number of tests running in parallel (equivalent to `ctest -j`)'
+ )
+
+
+def setup_pin_parser(parser):
+ '''
+ Arguments for the pin sub-command
+ '''
+ pin_args = parser.add_argument_group('Pin arguments')
+ pin_args.add_argument('-c', '--clear',
+ action='store_true',
+ default=False,
+ help='clear analysis pin')
+ pin_args.add_argument('-f', '--force',
+ action='store_true',
+ default=False,
+ help='force recreate analysis pin')
+ pin_args.add_argument('-s', '--show',
+ action='store_true',
+ default=False,
+ help='show pinned stack')
+
+
+def setup_run_parser(parser):
+ '''
+ Define command line arguments for the run sub-command.
+ '''
+ parser.add_argument('anascript_path',
+ help='path to analysis script')
+ parser.add_argument('--files-list', default=[], nargs='+',
+ help='specify input file(s) to bypass the processList')
+ parser.add_argument(
+ '--output',
+ type=str,
+ default='output.root',
+ help='specify output file name to bypass the processList and or '
+ 'outputList')
+ parser.add_argument('--nevents', type=int, default=-1,
+ help='specify max number of events to process')
+ parser.add_argument('--test', action='store_true', default=False,
+ help='run over the test input file')
+ parser.add_argument('--bench', action='store_true', default=False,
+ help='output benchmark results to a JSON file')
+ parser.add_argument('--ncpus', type=int, default=-1,
+ help='set number of threads')
+
+ # Internal argument, not to be used by the users
+ parser.add_argument('--batch', action='store_true', default=False,
+ help=argparse.SUPPRESS)
+
+
+def setup_run_parser_final(parser):
+ '''
+ Define command line arguments for the final sub-command.
+ '''
+ parser.add_argument('anascript_path',
+ help='path to analysis_final script')
+
+
+def setup_run_parser_plots(parser):
+ '''
+ Define command line arguments for the plots sub-command.
+ '''
+ parser.add_argument('script_path', help="path to the plots script")
+
+
+# _____________________________________________________________________________
+def setup_subparsers(subparsers):
+ '''
+ Sets all sub-parsers for all sub-commands
+ '''
+
+ # Create sub-parsers
+ parser_init = subparsers.add_parser(
+ 'init',
+ help="generate a RDataFrame based FCC analysis")
+ parser_build = subparsers.add_parser(
+ 'build',
+ help='build and install local analysis')
+ parser_test = subparsers.add_parser(
+ 'test',
+ help='test whole or a part of the analysis framework')
+ parser_pin = subparsers.add_parser(
+ 'pin',
+ help='pin fccanalyses to the current version of Key4hep stack')
+ parser_run = subparsers.add_parser(
+ 'run',
+ help="run a RDataFrame based FCC analysis")
+ parser_run_final = subparsers.add_parser(
+ 'final',
+ help="run a RDataFrame based FCC analysis final configuration")
+ parser_run_plots = subparsers.add_parser(
+ 'plots',
+ help="run a RDataFrame based FCC analysis plot configuration")
+
+ # Register sub-parsers
+ setup_init_parser(parser_init)
+ setup_build_parser(parser_build)
+ setup_test_parser(parser_test)
+ setup_pin_parser(parser_pin)
+ setup_run_parser(parser_run)
+ setup_run_parser_final(parser_run_final)
+ setup_run_parser_plots(parser_run_plots)
diff --git a/python/process.py b/python/process.py
index 6b5d144e8d..cecbeab115 100644
--- a/python/process.py
+++ b/python/process.py
@@ -7,53 +7,57 @@
import json
import glob
import logging
-import yaml
-import ROOT
+import urllib.request
+import yaml # type: ignore
+import ROOT # type: ignore
ROOT.gROOT.SetBatch(True)
-LOGGER = logging.getLogger('FCCAnalyses.process_info')
+LOGGER: logging.Logger = logging.getLogger('FCCAnalyses.process_info')
-def getEntries(f):
- tf=ROOT.TFile.Open(f,"READ")
- tf.cd()
- tt=tf.Get("events")
- nevents=tt.GetEntries()
- tf.Close()
+
+def get_entries(inpath: str) -> int:
+ '''
+ Get number of entries in the TTree named "events".
+ '''
+ nevents = None
+ with ROOT.TFile(inpath, 'READ') as infile:
+ tt = infile.Get("events")
+ nevents = tt.GetEntries()
return nevents
-def getProcessInfo(process, prodTag, inputDir):
+def get_process_info(process: str,
+ prod_tag: str,
+ input_dir: str) -> tuple[list[str], list[int]]:
'''
Decide where to look for the filelist and eventlist.
'''
- if prodTag is None and inputDir is None:
+ if prod_tag is None and input_dir is None:
LOGGER.error('The variable or is mandatory in '
'your analysis script!\nAborting...')
sys.exit(3)
- elif prodTag is not None and inputDir is not None:
+ elif prod_tag is not None and input_dir is not None:
LOGGER.error('The variables and can\'t be set '
'both at the same time in your analysis script!\n'
'Aborting...')
sys.exit(3)
- if prodTag!=None:
- return getProcessInfoYaml(process, prodTag)
- elif inputDir!=None:
- return getProcessInfoFiles(process, inputDir)
- else:
- LOGGER.error('Problem, why are you here???\nAborting...')
- sys.exit(3)
+ if prod_tag is not None:
+ return get_process_info_yaml(process, prod_tag)
+
+ return get_process_info_files(process, input_dir)
-def getProcessInfoFiles(process, inputDir):
+def get_process_info_files(process: str, input_dir: str) -> tuple[list[str],
+ list[int]]:
'''
Get list of files and events from the specified location
'''
- filelist=[]
- eventlist=[]
- filetest='{}/{}.root'.format(inputDir, process)
- dirtest='{}/{}'.format(inputDir, process)
+ filelist = []
+ eventlist = []
+ filetest = f'{input_dir}/{process}.root'
+ dirtest = f'{input_dir}/{process}'
if os.path.isfile(filetest) and os.path.isdir(dirtest):
LOGGER.error('For process "%s" both a file %s and a directory %s '
@@ -69,19 +73,19 @@ def getProcessInfoFiles(process, inputDir):
if os.path.isfile(filetest):
filelist.append(filetest)
- eventlist.append(getEntries(filetest))
+ eventlist.append(get_entries(filetest))
if os.path.isdir(dirtest):
- flist=glob.glob(dirtest+"/*.root")
+ flist = glob.glob(dirtest+"/*.root")
for f in flist:
filelist.append(f)
- eventlist.append(getEntries(f))
-
+ eventlist.append(get_entries(f))
return filelist, eventlist
-def getProcessInfoYaml(process, prodTag):
+def get_process_info_yaml(process: str, prod_tag: str) -> tuple[list[str],
+ list[int]]:
'''
Get list of files and events from the YAML file
'''
@@ -89,7 +93,8 @@ def getProcessInfoYaml(process, prodTag):
proc_dict_dirs = get_process_dict_dirs()
yamlfilepath = None
for path in proc_dict_dirs:
- yamlfilepath = os.path.join(path, 'yaml', prodTag, process, 'merge.yaml')
+ yamlfilepath = os.path.join(path, 'yaml', prod_tag, process,
+ 'merge.yaml')
if not os.path.isfile(yamlfilepath):
continue
if not yamlfilepath:
@@ -97,7 +102,7 @@ def getProcessInfoYaml(process, prodTag):
'Aborting...')
sys.exit(3)
- with open(yamlfilepath) as ftmp:
+ with open(yamlfilepath, 'r', encoding='utf-8') as ftmp:
try:
doc = yaml.load(ftmp, Loader=yaml.FullLoader)
except yaml.YAMLError as exc:
@@ -113,24 +118,24 @@ def getProcessInfoYaml(process, prodTag):
filelist = [doc['merge']['outdir']+f[0] for f in doc['merge']['outfiles']]
eventlist = [f[1] for f in doc['merge']['outfiles']]
+
return filelist, eventlist
-def get_process_dict(proc_dict_location):
+def get_process_dict(proc_dict_location: str) -> dict:
'''
Pick up the dictionary with process information
'''
if 'http://' in proc_dict_location or 'https://' in proc_dict_location:
LOGGER.info('Getting process dictionary from the web:\n%s',
proc_dict_location)
- import urllib.request
- req = urllib.request.urlopen(proc_dict_location).read()
- try:
- proc_dict = json.loads(req.decode('utf-8'))
- except json.decoder.JSONDecodeError:
- LOGGER.error('Failed to parse process dictionary correctly!\n'
- 'Aborting...')
- sys.exit(3)
+ with urllib.request.urlopen(proc_dict_location).read() as response:
+ try:
+ proc_dict = json.loads(response.read())
+ except json.decoder.JSONDecodeError:
+ LOGGER.error('Failed to parse process dictionary correctly!\n'
+ 'Aborting...')
+ sys.exit(3)
else:
proc_dict_dirs = get_process_dict_dirs()
@@ -144,7 +149,7 @@ def get_process_dict(proc_dict_location):
LOGGER.info('Loading process dictionary from:\n%s', proc_dict_path)
- with open(proc_dict_path, 'r') as infile:
+ with open(proc_dict_path, 'r', encoding='utf-8') as infile:
try:
proc_dict = json.load(infile)
except json.decoder.JSONDecodeError:
@@ -152,6 +157,9 @@ def get_process_dict(proc_dict_location):
'correctly!\nAborting...')
sys.exit(3)
+ if proc_dict:
+ break
+
if not proc_dict:
LOGGER.error('Process dictionary not found!\nAborting...')
sys.exit(3)
@@ -159,17 +167,17 @@ def get_process_dict(proc_dict_location):
return proc_dict
-def get_process_dict_dirs():
+def get_process_dict_dirs() -> list[str]:
'''
Get search directories for the process dictionaries
'''
- dirs = os.getenv('FCCDICTSDIR')
- if not dirs:
+ dirs_var = os.getenv('FCCDICTSDIR')
+ if dirs_var is None:
LOGGER.error('Environment variable FCCDICTSDIR not defined!\n'
'Was the setup.sh file sourced properly?\n'
'Aborting...')
sys.exit(3)
- dirs = dirs.split(':')
- dirs = [d for d in dirs if d]
+ dirs = dirs_var.split(':')
+ dirs[:] = [d for d in dirs if d]
return dirs
diff --git a/python/process.pyi b/python/process.pyi
new file mode 100644
index 0000000000..b3b12a976b
--- /dev/null
+++ b/python/process.pyi
@@ -0,0 +1,12 @@
+# generated with `stubgen process.py`
+
+import logging
+
+LOGGER: logging.Logger
+
+def get_entries(inpath: str) -> int: ...
+def get_process_info(process: str, prod_tag: str, input_dir: str) -> tuple[list[str], list[int]]: ...
+def get_process_info_files(process: str, input_dir: str) -> tuple[list[str], list[int]]: ...
+def get_process_info_yaml(process: str, prod_tag: str) -> tuple[list[str], list[int]]: ...
+def get_process_dict(proc_dict_location: str) -> dict: ...
+def get_process_dict_dirs() -> list[str]: ...
diff --git a/python/run_analysis.py b/python/run_analysis.py
new file mode 100644
index 0000000000..9ac5355249
--- /dev/null
+++ b/python/run_analysis.py
@@ -0,0 +1,924 @@
+'''
+Run regular stage of an analysis
+'''
+
+import os
+import sys
+import time
+import json
+import shutil
+import logging
+import subprocess
+import importlib.util
+import datetime
+import numpy as np
+
+import ROOT # type: ignore
+from anascript import get_element, get_element_dict
+from process import get_process_info, get_process_dict
+
+LOGGER = logging.getLogger('FCCAnalyses.run')
+
+ROOT.gROOT.SetBatch(True)
+
+
+# _____________________________________________________________________________
+def determine_os(local_dir: str) -> str | None:
+ '''
+ Determines platform on which FCCAnalyses was compiled
+ '''
+ cmake_config_path = local_dir + '/build/CMakeFiles/CMakeConfigureLog.yaml'
+ if not os.path.isfile(cmake_config_path):
+ LOGGER.warning('CMake configuration file was not found!\n'
+ 'Was FCCAnalyses properly build?')
+ return None
+
+ with open(cmake_config_path, 'r', encoding='utf-8') as cmake_config_file:
+ cmake_config = cmake_config_file.read()
+ if 'centos7' in cmake_config:
+ return 'centos7'
+ if 'almalinux9' in cmake_config:
+ return 'almalinux9'
+
+ return None
+
+
+# _____________________________________________________________________________
+def create_condor_config(log_dir: str,
+ process_name: str,
+ build_os: str | None,
+ rdf_module,
+ subjob_scripts: list[str]) -> str:
+ '''
+ Creates contents of condor configuration file.
+ '''
+ cfg = 'executable = $(filename)\n'
+
+ cfg += f'Log = {log_dir}/condor_job.{process_name}.'
+ cfg += '$(ClusterId).$(ProcId).log\n'
+
+ cfg += f'Output = {log_dir}/condor_job.{process_name}.'
+ cfg += '$(ClusterId).$(ProcId).out\n'
+
+ cfg += f'Error = {log_dir}/condor_job.{process_name}.'
+ cfg += '$(ClusterId).$(ProcId).error\n'
+
+ cfg += 'getenv = False\n'
+
+ cfg += 'environment = "LS_SUBCWD={log_dir}"\n' # not sure
+
+ cfg += 'requirements = ( '
+ if build_os == 'centos7':
+ cfg += '(OpSysAndVer =?= "CentOS7") && '
+ if build_os == 'almalinux9':
+ cfg += '(OpSysAndVer =?= "AlmaLinux9") && '
+ if build_os is None:
+ LOGGER.warning('Submitting jobs to default operating system. There '
+ 'may be compatibility issues.')
+ cfg += '(Machine =!= LastRemoteHost) && (TARGET.has_avx2 =?= True) )\n'
+
+ cfg += 'on_exit_remove = (ExitBySignal == False) && (ExitCode == 0)\n'
+
+ cfg += 'max_retries = 3\n'
+
+ cfg += '+JobFlavour = "%s"\n' % get_element(rdf_module, 'batchQueue')
+
+ cfg += '+AccountingGroup = "%s"\n' % get_element(rdf_module, 'compGroup')
+
+ cfg += 'RequestCpus = %i\n' % get_element(rdf_module, "nCPUS")
+
+ cfg += 'queue filename matching files'
+ for script in subjob_scripts:
+ cfg += ' ' + script
+ cfg += '\n'
+
+ return cfg
+
+
+# _____________________________________________________________________________
+def create_subjob_script(local_dir: str,
+ rdf_module,
+ process_name: str,
+ chunk_num: int,
+ chunk_list: list[list[str]],
+ anapath: str) -> str:
+ '''
+ Creates sub-job script to be run.
+ '''
+
+ output_dir = get_element(rdf_module, "outputDir")
+ output_dir_eos = get_element(rdf_module, "outputDirEos")
+ eos_type = get_element(rdf_module, "eosType")
+ user_batch_config = get_element(rdf_module, "userBatchConfig")
+
+ scr = '#!/bin/bash\n\n'
+ scr += 'source ' + local_dir + '/setup.sh\n\n'
+
+ # add userBatchConfig if any
+ if user_batch_config != '':
+ if not os.path.isfile(user_batch_config):
+ LOGGER.warning('userBatchConfig file can\'t be found! Will not '
+ 'add it to the default config.')
+ else:
+ with open(user_batch_config, 'r', encoding='utf-8') as cfgfile:
+ for line in cfgfile:
+ scr += line + '\n'
+ scr += '\n\n'
+
+ scr += f'mkdir job_{process_name}_chunk_{chunk_num}\n'
+ scr += f'cd job_{process_name}_chunk_{chunk_num}\n\n'
+
+ if not os.path.isabs(output_dir):
+ output_path = os.path.join(output_dir, f'chunk_{chunk_num}.root')
+ else:
+ output_path = os.path.join(output_dir, process_name,
+ f'chunk_{chunk_num}.root')
+
+ scr += local_dir
+ scr += f'/bin/fccanalysis run {anapath} --batch '
+ scr += f'--output {output_path} '
+ scr += '--files-list'
+ for file_path in chunk_list[chunk_num]:
+ scr += f' {file_path}'
+ scr += '\n\n'
+
+ if not os.path.isabs(output_dir) and output_dir_eos == '':
+ final_dest = os.path.join(local_dir, output_dir, process_name,
+ f'chunk_{chunk_num}.root')
+ scr += f'cp {output_path} {final_dest}\n'
+
+ if output_dir_eos != '':
+ final_dest = os.path.join(output_dir_eos,
+ process_name,
+ f'chunk_{chunk_num}.root')
+ final_dest = f'root://{eos_type}.cern.ch/' + final_dest
+ scr += f'xrdcp {output_path} {final_dest}\n'
+
+ return scr
+
+
+# _____________________________________________________________________________
+def get_subfile_list(in_file_list, event_list, fraction):
+ '''
+ Obtain list of files roughly containing the requested fraction of events.
+ '''
+ nevts_total = sum(event_list)
+ nevts_target = int(nevts_total * fraction)
+
+ if nevts_target <= 0:
+ LOGGER.error('The reduction fraction %f too stringent, no events '
+ 'left!\nAborting...', fraction)
+ sys.exit(3)
+
+ nevts_real = 0
+ out_file_list = []
+ for i in enumerate(event_list):
+ if nevts_real >= nevts_target:
+ break
+ nevts_real += event_list[i]
+ out_file_list.append(in_file_list[i])
+
+ info_msg = f'Reducing the input file list by fraction "{fraction}" of '
+ info_msg += 'total events:\n\t'
+ info_msg += f'- total number of events: {nevts_total:,}\n\t'
+ info_msg += f'- targeted number of events: {nevts_target:,}\n\t'
+ info_msg += '- number of events in the resulting file list: '
+ info_msg += f'{nevts_real:,}\n\t'
+ info_msg += '- number of files after reduction: '
+ info_msg += str((len(out_file_list)))
+ LOGGER.info(info_msg)
+
+ return out_file_list
+
+
+# _____________________________________________________________________________
+def get_chunk_list(file_list: str, chunks: int):
+ '''
+ Get list of input file paths arranged into chunks.
+ '''
+ chunk_list = list(np.array_split(file_list, chunks))
+ return [chunk for chunk in chunk_list if chunk.size > 0]
+
+
+# _____________________________________________________________________________
+def save_benchmark(outfile, benchmark):
+ '''
+ Save benchmark results to a JSON file.
+ '''
+ benchmarks = []
+ try:
+ with open(outfile, 'r', encoding='utf-8') as benchin:
+ benchmarks = json.load(benchin)
+ except OSError:
+ pass
+
+ benchmarks = [b for b in benchmarks if b['name'] != benchmark['name']]
+ benchmarks.append(benchmark)
+
+ with open(outfile, 'w', encoding='utf-8') as benchout:
+ json.dump(benchmarks, benchout, indent=2)
+
+
+# _____________________________________________________________________________
+def submit_job(cmd: str, max_trials: int) -> bool:
+ '''
+ Submit job to condor, retry `max_trials` times.
+ '''
+ for i in range(max_trials):
+ with subprocess.Popen(cmd, shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True) as proc:
+ (stdout, stderr) = proc.communicate()
+
+ if proc.returncode == 0 and len(stderr) == 0:
+ LOGGER.info(stdout)
+ LOGGER.info('GOOD SUBMISSION')
+ return True
+
+ LOGGER.warning('Error while submitting, retrying...\n '
+ 'Trial: %i / %i\n Error: %s',
+ i, max_trials, stderr)
+ time.sleep(10)
+
+ LOGGER.error('Failed submitting after: %i trials!', max_trials)
+ return False
+
+
+# _____________________________________________________________________________
+def initialize(args, rdf_module, anapath: str):
+ '''
+ Common initialization steps.
+ '''
+
+ # for convenience and compatibility with user code
+ ROOT.gInterpreter.Declare("using namespace FCCAnalyses;")
+ geometry_file = get_element(rdf_module, "geometryFile")
+ readout_name = get_element(rdf_module, "readoutName")
+ if geometry_file != "" and readout_name != "":
+ ROOT.CaloNtupleizer.loadGeometry(geometry_file, readout_name)
+
+ # set multithreading (no MT if number of events is specified)
+ ncpus = 1
+ if args.nevents < 0:
+ if isinstance(args.ncpus, int) and args.ncpus >= 1:
+ ncpus = args.ncpus
+ else:
+ ncpus = get_element(rdf_module, "nCPUS")
+ if ncpus < 0: # use all available threads
+ ROOT.EnableImplicitMT()
+ ncpus = ROOT.GetThreadPoolSize()
+ ROOT.ROOT.EnableImplicitMT(ncpus)
+ ROOT.EnableThreadSafety()
+
+ if ROOT.IsImplicitMTEnabled():
+ LOGGER.info('Multithreading enabled. Running over %i threads',
+ ROOT.GetThreadPoolSize())
+ else:
+ LOGGER.info('No multithreading enabled. Running in single thread...')
+
+ # custom header files
+ include_paths = get_element(rdf_module, "includePaths")
+ if include_paths:
+ ROOT.gInterpreter.ProcessLine(".O3")
+ basepath = os.path.dirname(os.path.abspath(anapath)) + "/"
+ for path in include_paths:
+ LOGGER.info('Loading %s...', path)
+ ROOT.gInterpreter.Declare(f'#include "{basepath}/{path}"')
+
+ # check if analyses plugins need to be loaded before anything
+ # still in use?
+ analyses_list = get_element(rdf_module, "analysesList")
+ if analyses_list and len(analyses_list) > 0:
+ _ana = []
+ for analysis in analyses_list:
+ LOGGER.info('Load cxx analyzers from %s...', analysis)
+ if analysis.startswith('libFCCAnalysis_'):
+ ROOT.gSystem.Load(analysis)
+ else:
+ ROOT.gSystem.Load(f'libFCCAnalysis_{analysis}')
+ if not hasattr(ROOT, analysis):
+ ROOT.error('Analysis %s not properly loaded!\nAborting...',
+ analysis)
+ sys.exit(3)
+ _ana.append(getattr(ROOT, analysis).dictionary)
+
+
+# _____________________________________________________________________________
+def run_rdf(rdf_module,
+ input_list: list[str],
+ out_file: str,
+ args) -> int:
+ '''
+ Create RDataFrame and snapshot it.
+ '''
+ df = ROOT.RDataFrame("events", input_list)
+
+ # limit number of events processed
+ if args.nevents > 0:
+ df = df.Range(0, args.nevents)
+
+ try:
+ df2 = get_element(rdf_module.RDFanalysis, "analysers")(df)
+
+ branch_list = ROOT.vector('string')()
+ blist = get_element(rdf_module.RDFanalysis, "output")()
+ for bname in blist:
+ branch_list.push_back(bname)
+
+ df2.Snapshot("events", out_file, branch_list)
+ except Exception as excp:
+ LOGGER.error('During the execution of the analysis file exception '
+ 'occurred:\n%s', excp)
+ sys.exit(3)
+
+ return df2.Count()
+
+
+# _____________________________________________________________________________
+def send_to_batch(rdf_module, chunk_list, process, anapath: str):
+ '''
+ Send jobs to HTCondor batch system.
+ '''
+ local_dir = os.environ['LOCAL_DIR']
+ current_date = datetime.datetime.fromtimestamp(
+ datetime.datetime.now().timestamp()).strftime('%Y-%m-%d_%H-%M-%S')
+ log_dir = os.path.join(local_dir, 'BatchOutputs', current_date, process)
+ if not os.path.exists(log_dir):
+ os.system(f'mkdir -p {log_dir}')
+
+ # Making sure the FCCAnalyses libraries are compiled and installed
+ try:
+ subprocess.check_output(['make', 'install'],
+ cwd=local_dir+'/build',
+ stderr=subprocess.DEVNULL
+ )
+ except subprocess.CalledProcessError:
+ LOGGER.error('The FCCanalyses libraries are not properly build and '
+ 'installed!\nAborting job submission...')
+ sys.exit(3)
+
+ subjob_scripts = []
+ for ch in range(len(chunk_list)):
+ subjob_script_path = os.path.join(log_dir,
+ f'job_{process}_chunk_{ch}.sh')
+ subjob_scripts.append(subjob_script_path)
+
+ for i in range(3):
+ try:
+ with open(subjob_script_path, 'w', encoding='utf-8') as ofile:
+ subjob_script = create_subjob_script(local_dir,
+ rdf_module,
+ process,
+ ch,
+ chunk_list,
+ anapath)
+ ofile.write(subjob_script)
+ except IOError as e:
+ if i < 2:
+ LOGGER.warning('I/O error(%i): %s', e.errno, e.strerror)
+ else:
+ LOGGER.error('I/O error(%i): %s', e.errno, e.strerror)
+ sys.exit(3)
+ else:
+ break
+ time.sleep(10)
+ subprocess.getstatusoutput(f'chmod 777 {subjob_script_path}')
+
+ LOGGER.debug('Sub-job scripts to be run:\n - %s',
+ '\n - '.join(subjob_scripts))
+
+ condor_config_path = f'{log_dir}/job_desc_{process}.cfg'
+
+ for i in range(3):
+ try:
+ with open(condor_config_path, 'w', encoding='utf-8') as cfgfile:
+ condor_config = create_condor_config(log_dir,
+ process,
+ determine_os(local_dir),
+ rdf_module,
+ subjob_scripts)
+ cfgfile.write(condor_config)
+ except IOError as e:
+ LOGGER.warning('I/O error(%i): %s', e.errno, e.strerror)
+ if i == 2:
+ sys.exit(3)
+ else:
+ break
+ time.sleep(10)
+ subprocess.getstatusoutput(f'chmod 777 {condor_config_path}')
+
+ batch_cmd = f'condor_submit {condor_config_path}'
+ LOGGER.info('Batch command:\n %s', batch_cmd)
+ success = submit_job(batch_cmd, 10)
+ if not success:
+ sys.exit(3)
+
+
+# _____________________________________________________________________________
+def apply_filepath_rewrites(filepath: str) -> str:
+ '''
+ Apply path rewrites if applicable.
+ '''
+ # Stripping leading and trailing white spaces
+ filepath_stripped = filepath.strip()
+ # Stripping leading and trailing slashes
+ filepath_stripped = filepath_stripped.strip('/')
+
+ # Splitting the path along slashes
+ filepath_splitted = filepath_stripped.split('/')
+
+ if len(filepath_splitted) > 1 and filepath_splitted[0] == 'eos':
+ if filepath_splitted[1] == 'experiment':
+ filepath = 'root://eospublic.cern.ch//' + filepath_stripped
+ elif filepath_splitted[1] == 'user':
+ filepath = 'root://eosuser.cern.ch//' + filepath_stripped
+ elif 'home-' in filepath_splitted[1]:
+ filepath = 'root://eosuser.cern.ch//eos/user/' + \
+ filepath_stripped.replace('eos/home-', '')
+ else:
+ LOGGER.warning('Unknown EOS path type!\nPlease check with the '
+ 'developers as this might impact performance of '
+ 'the analysis.')
+ return filepath
+
+
+# _____________________________________________________________________________
+def run_local(rdf_module, infile_list, args):
+ '''
+ Run analysis locally.
+ '''
+ # Create list of files to be processed
+ info_msg = 'Creating dataframe object from files:\n\t'
+ file_list = ROOT.vector('string')()
+ # Amount of events processed in previous stage (= 0 if it is the first
+ # stage)
+ nevents_orig = 0
+ # The amount of events in the input file(s)
+ nevents_local = 0
+ for filepath in infile_list:
+
+ filepath = apply_filepath_rewrites(filepath)
+
+ file_list.push_back(filepath)
+ info_msg += f'- {filepath}\t\n'
+ infile = ROOT.TFile.Open(filepath, 'READ')
+ try:
+ nevents_orig += infile.Get('eventsProcessed').GetVal()
+ except AttributeError:
+ pass
+
+ try:
+ nevents_local += infile.Get("events").GetEntries()
+ except AttributeError:
+ LOGGER.error('Input file:\n%s\nis missing events TTree!\n'
+ 'Aborting...', filepath)
+ infile.Close()
+ sys.exit(3)
+ infile.Close()
+
+ LOGGER.info(info_msg)
+
+ # Adjust number of events in case --nevents was specified
+ if args.nevents > 0 and args.nevents < nevents_local:
+ nevents_local = args.nevents
+
+ if nevents_orig > 0:
+ LOGGER.info('Number of events:\n\t- original: %s\n\t- local: %s',
+ f'{nevents_orig:,}', f'{nevents_local:,}')
+ else:
+ LOGGER.info('Number of local events: %s', f'{nevents_local:,}')
+
+ output_dir = get_element(rdf_module, "outputDir")
+ if not args.batch:
+ if os.path.isabs(args.output):
+ LOGGER.warning('Provided output path is absolute, "outputDir" '
+ 'from analysis script will be ignored!')
+ outfile_path = os.path.join(output_dir, args.output)
+ else:
+ outfile_path = args.output
+ LOGGER.info('Output file path:\n%s', outfile_path)
+
+ # Run RDF
+ start_time = time.time()
+ outn = run_rdf(rdf_module, file_list, outfile_path, args)
+ outn = outn.GetValue()
+
+ with ROOT.TFile(outfile_path, 'update') as outfile:
+ param = ROOT.TParameter(int)(
+ 'eventsProcessed',
+ nevents_orig if nevents_orig != 0 else nevents_local)
+ param.Write()
+ outfile.Write()
+
+ elapsed_time = time.time() - start_time
+ info_msg = f"{' SUMMARY ':=^80}\n"
+ info_msg += 'Elapsed time (H:M:S): '
+ info_msg += time.strftime('%H:%M:%S', time.gmtime(elapsed_time))
+ info_msg += '\nEvents processed/second: '
+ info_msg += f'{int(nevents_local/elapsed_time):,}'
+ info_msg += f'\nTotal events processed: {int(nevents_local):,}'
+ info_msg += f'\nNo. result events: {int(outn):,}'
+ if nevents_local > 0:
+ info_msg += f'\nReduction factor local: {outn/nevents_local}'
+ if nevents_orig > 0:
+ info_msg += f'\nReduction factor total: {outn/nevents_orig}'
+ info_msg += '\n'
+ info_msg += 80 * '='
+ info_msg += '\n'
+ LOGGER.info(info_msg)
+
+ if args.bench:
+ analysis_name = get_element(rdf_module, 'analysisName')
+ if not analysis_name:
+ analysis_name = args.anascript_path
+
+ bench_time = {}
+ bench_time['name'] = 'Time spent running the analysis: '
+ bench_time['name'] += analysis_name
+ bench_time['unit'] = 'Seconds'
+ bench_time['value'] = elapsed_time
+ bench_time['range'] = 10
+ bench_time['extra'] = 'Analysis path: ' + args.anascript_path
+ save_benchmark('benchmarks_smaller_better.json', bench_time)
+
+ bench_evt_per_sec = {}
+ bench_evt_per_sec['name'] = 'Events processed per second: '
+ bench_evt_per_sec['name'] += analysis_name
+ bench_evt_per_sec['unit'] = 'Evt/s'
+ bench_evt_per_sec['value'] = nevents_local / elapsed_time
+ bench_time['range'] = 1000
+ bench_time['extra'] = 'Analysis path: ' + args.anascript_path
+ save_benchmark('benchmarks_bigger_better.json', bench_evt_per_sec)
+
+
+# _____________________________________________________________________________
+def run_stages(args, rdf_module, anapath):
+ '''
+ Run regular stage.
+ '''
+
+ # Set ncpus, load header files, custom dicts, ...
+ initialize(args, rdf_module, anapath)
+
+ # Check if outputDir exist and if not create it
+ output_dir = get_element(rdf_module, "outputDir")
+ if not os.path.exists(output_dir) and output_dir:
+ os.system(f'mkdir -p {output_dir}')
+
+ # Check if outputDir exist and if not create it
+ output_dir_eos = get_element(rdf_module, "outputDirEos")
+ if not os.path.exists(output_dir_eos) and output_dir_eos:
+ os.system(f'mkdir -p {output_dir_eos}')
+
+ # Check if test mode is specified, and if so run the analysis on it (this
+ # will exit after)
+ if args.test:
+ LOGGER.info('Running over test file...')
+ testfile_path = get_element(rdf_module, "testFile")
+ directory, _ = os.path.split(args.output)
+ if directory:
+ os.system(f'mkdir -p {directory}')
+ run_local(rdf_module, [testfile_path], args)
+ sys.exit(0)
+
+ # Check if files are specified, and if so run the analysis on it/them (this
+ # will exit after)
+ if len(args.files_list) > 0:
+ LOGGER.info('Running over files provided in command line argument...')
+ directory, _ = os.path.split(args.output)
+ if directory:
+ os.system(f'mkdir -p {directory}')
+ run_local(rdf_module, args.files_list, args)
+ sys.exit(0)
+
+ # Check if batch mode is available
+ run_batch = get_element(rdf_module, 'runBatch')
+ if run_batch and shutil.which('condor_q') is None:
+ LOGGER.error('HTCondor tools can\'t be found!\nAborting...')
+ sys.exit(3)
+
+ # Check if the process list is specified
+ process_list = get_element(rdf_module, 'processList')
+
+ for process_name in process_list:
+ file_list, event_list = get_process_info(
+ process_name,
+ get_element(rdf_module, "prodTag"),
+ get_element(rdf_module, "inputDir"))
+
+ if len(file_list) <= 0:
+ LOGGER.error('No files to process!\nAborting...')
+ sys.exit(3)
+
+ # Determine the fraction of the input to be processed
+ fraction = 1
+ if get_element_dict(process_list[process_name], 'fraction'):
+ fraction = get_element_dict(process_list[process_name], 'fraction')
+ # Put together output path
+ output_stem = process_name
+ if get_element_dict(process_list[process_name], 'output'):
+ output_stem = get_element_dict(process_list[process_name],
+ 'output')
+ # Determine the number of chunks the output will be split into
+ chunks = 1
+ if get_element_dict(process_list[process_name], 'chunks'):
+ chunks = get_element_dict(process_list[process_name], 'chunks')
+
+ info_msg = f'Adding process "{process_name}" with:'
+ if fraction < 1:
+ info_msg += f'\n\t- fraction: {fraction}'
+ info_msg += f'\n\t- number of files: {len(file_list):,}'
+ info_msg += f'\n\t- output stem: {output_stem}'
+ if chunks > 1:
+ info_msg += f'\n\t- number of chunks: {chunks}'
+
+ if fraction < 1:
+ file_list = get_subfile_list(file_list, event_list, fraction)
+
+ chunk_list = [file_list]
+ if chunks > 1:
+ chunk_list = get_chunk_list(file_list, chunks)
+ LOGGER.info('Number of the output files: %s', f'{len(chunk_list):,}')
+
+ # Create directory if more than 1 chunk
+ if chunks > 1:
+ output_directory = os.path.join(output_dir, output_stem)
+
+ if not os.path.exists(output_directory):
+ os.system(f'mkdir -p {output_directory}')
+
+ if run_batch:
+ # Sending to the batch system
+ LOGGER.info('Running on the batch...')
+ if len(chunk_list) == 1:
+ LOGGER.warning('\033[4m\033[1m\033[91mRunning on batch with '
+ 'only one chunk might not be optimal\033[0m')
+
+ send_to_batch(rdf_module, chunk_list, process_name, anapath)
+
+ else:
+ # Running locally
+ LOGGER.info('Running locally...')
+ if len(chunk_list) == 1:
+ args.output = f'{output_stem}.root'
+ run_local(rdf_module, chunk_list[0], args)
+ else:
+ for index, chunk in enumerate(chunk_list):
+ args.output = f'{output_stem}/chunk{index}.root'
+ run_local(rdf_module, chunk, args)
+
+
+def run_histmaker(args, rdf_module, anapath):
+ '''
+ Run the analysis using histmaker (all stages integrated into one).
+ '''
+
+ # set ncpus, load header files, custom dicts, ...
+ initialize(args, rdf_module, anapath)
+
+ # load process dictionary
+ proc_dict_location = get_element(rdf_module, "procDict", True)
+ if not proc_dict_location:
+ LOGGER.error('Location of the procDict not provided.\nAborting...')
+ sys.exit(3)
+
+ proc_dict = get_process_dict(proc_dict_location)
+
+ # check if outputDir exist and if not create it
+ output_dir = get_element(rdf_module, "outputDir")
+ if not os.path.exists(output_dir) and output_dir != '':
+ os.system(f'mkdir -p {output_dir}')
+
+ do_scale = get_element(rdf_module, "doScale", True)
+ int_lumi = get_element(rdf_module, "intLumi", True)
+
+ # check if the process list is specified, and create graphs for them
+ process_list = get_element(rdf_module, "processList")
+ graph_function = getattr(rdf_module, "build_graph")
+ results = [] # all the histograms
+ hweights = [] # all the weights
+ evtcounts = [] # event count of the input file
+ # number of events processed per process, in a potential previous step
+ events_processed_dict = {}
+ for process in process_list:
+ file_list, event_list = get_process_info(
+ process,
+ get_element(rdf_module, "prodTag"),
+ get_element(rdf_module, "inputDir"))
+ if len(file_list) == 0:
+ LOGGER.error('No files to process!\nAborting...')
+ sys.exit(3)
+ fraction = 1
+ output = process
+ chunks = 1
+ try:
+ if get_element_dict(process_list[process], 'fraction') is not None:
+ fraction = get_element_dict(process_list[process], 'fraction')
+ if get_element_dict(process_list[process], 'output') is not None:
+ output = get_element_dict(process_list[process], 'output')
+ if get_element_dict(process_list[process], 'chunks') is not None:
+ chunks = get_element_dict(process_list[process], 'chunks')
+ except TypeError:
+ LOGGER.warning('No values set for process %s will use default '
+ 'values!', process)
+ if fraction < 1:
+ file_list = get_subfile_list(file_list, event_list, fraction)
+
+ # get the number of events processed, in a potential previous step
+ file_list_root = ROOT.vector('string')()
+ # amount of events processed in previous stage (= 0 if it is the first
+ # stage)
+ nevents_meta = 0
+ for file_name in file_list:
+ file_name = apply_filepath_rewrites(file_name)
+ file_list_root.push_back(file_name)
+ # Skip check for processed events in case of first stage
+ if get_element(rdf_module, "prodTag") is None:
+ infile = ROOT.TFile(str(file_name), 'READ')
+ for key in infile.GetListOfKeys():
+ if 'eventsProcessed' == key.GetName():
+ nevents_meta += infile.eventsProcessed.GetVal()
+ break
+ infile.Close()
+ if args.test:
+ break
+ events_processed_dict[process] = nevents_meta
+ info_msg = f'Add process "{process}" with:'
+ info_msg += f'\n\tfraction = {fraction}'
+ info_msg += f'\n\tnFiles = {len(file_list_root):,}'
+ info_msg += f'\n\toutput = {output}\n\tchunks = {chunks}'
+ LOGGER.info(info_msg)
+
+ df = ROOT.ROOT.RDataFrame("events", file_list_root)
+ evtcount = df.Count()
+ res, hweight = graph_function(df, process)
+ results.append(res)
+ hweights.append(hweight)
+ evtcounts.append(evtcount)
+
+ LOGGER.info('Starting the event loop...')
+ start_time = time.time()
+ ROOT.ROOT.RDF.RunGraphs(evtcounts)
+ LOGGER.info('Event loop done!')
+ elapsed_time = time.time() - start_time
+
+ LOGGER.info('Writing out output files...')
+ nevents_tot = 0
+ for process, res, hweight, evtcount in zip(process_list,
+ results,
+ hweights,
+ evtcounts):
+ # get the cross-sections etc. First try locally, then the procDict
+ if 'crossSection' in process_list[process]:
+ cross_section = process_list[process]['crossSection']
+ elif process in proc_dict and 'crossSection' in proc_dict[process]:
+ cross_section = proc_dict[process]['crossSection']
+ else:
+ LOGGER.warning('Can\'t find cross-section for process %s in '
+ 'processList or procDict!\nUsing default value '
+ 'of 1', process)
+ cross_section = 1
+
+ if 'kfactor' in process_list[process]:
+ kfactor = process_list[process]['kfactor']
+ elif process in proc_dict and 'kfactor' in proc_dict[process]:
+ kfactor = proc_dict[process]['kfactor']
+ else:
+ kfactor = 1
+
+ if 'matchingEfficiency' in process_list[process]:
+ matching_efficiency = process_list[process]['matchingEfficiency']
+ elif process in proc_dict \
+ and 'matchingEfficiency' in proc_dict[process]:
+ matching_efficiency = proc_dict[process]['matchingEfficiency']
+ else:
+ matching_efficiency = 1
+
+ events_processed = events_processed_dict[process] \
+ if events_processed_dict[process] != 0 else evtcount.GetValue()
+ scale = cross_section*kfactor*matching_efficiency/events_processed
+
+ nevents_tot += evtcount.GetValue()
+
+ hists_to_write = {}
+ for r in res:
+ hist = r.GetValue()
+ hname = hist.GetName()
+ # merge histograms in case histogram exists
+ if hist.GetName() in hists_to_write:
+ hists_to_write[hname].Add(hist)
+ else:
+ hists_to_write[hname] = hist
+
+ LOGGER.info('Writing out process %s, nEvents processed %s',
+ process, f'{evtcount.GetValue():,}')
+ with ROOT.TFile(f'{output_dir}/{process}.root', 'RECREATE'):
+ for hist in hists_to_write.values():
+ if do_scale:
+ hist.Scale(scale * int_lumi)
+ hist.Write()
+
+ # write all meta info to the output file
+ p = ROOT.TParameter(int)("eventsProcessed", events_processed)
+ p.Write()
+ p = ROOT.TParameter(float)("sumOfWeights", hweight.GetValue())
+ p.Write()
+ p = ROOT.TParameter(float)("intLumi", int_lumi)
+ p.Write()
+ p = ROOT.TParameter(float)("crossSection", cross_section)
+ p.Write()
+ p = ROOT.TParameter(float)("kfactor", kfactor)
+ p.Write()
+ p = ROOT.TParameter(float)("matchingEfficiency",
+ matching_efficiency)
+ p.Write()
+
+ info_msg = f"{' SUMMARY ':=^80}\n"
+ info_msg += 'Elapsed time (H:M:S): '
+ info_msg += time.strftime('%H:%M:%S', time.gmtime(elapsed_time))
+ info_msg += '\nEvents processed/second: '
+ info_msg += f'{int(nevents_tot/elapsed_time):,}'
+ info_msg += f'\nTotal events processed: {nevents_tot:,}'
+ info_msg += '\n'
+ info_msg += 80 * '='
+ info_msg += '\n'
+ LOGGER.info(info_msg)
+
+
+def run(parser):
+ '''
+ Set things in motion.
+ '''
+
+ args, _ = parser.parse_known_args()
+
+ if not hasattr(args, 'command'):
+ LOGGER.error('Error occurred during subcommand routing!\nAborting...')
+ sys.exit(3)
+
+ if args.command != 'run':
+ LOGGER.error('Unknow sub-command "%s"!\nAborting...')
+ sys.exit(3)
+
+ # Check that the analysis file exists
+ anapath = args.anascript_path
+ if not os.path.isfile(anapath):
+ LOGGER.error('Analysis script %s not found!\nAborting...',
+ anapath)
+ sys.exit(3)
+
+ # Set verbosity level of the RDataFrame
+ if args.verbose:
+ # ROOT.Experimental.ELogLevel.kInfo verbosity level is more
+ # equivalent to DEBUG in other log systems
+ LOGGER.debug('Setting verbosity level "kInfo" for RDataFrame...')
+ verbosity = ROOT.Experimental.RLogScopedVerbosity(
+ ROOT.Detail.RDF.RDFLogChannel(),
+ ROOT.Experimental.ELogLevel.kInfo)
+ LOGGER.debug(verbosity)
+ if args.more_verbose:
+ LOGGER.debug('Setting verbosity level "kDebug" for RDataFrame...')
+ verbosity = ROOT.Experimental.RLogScopedVerbosity(
+ ROOT.Detail.RDF.RDFLogChannel(),
+ ROOT.Experimental.ELogLevel.kDebug)
+ LOGGER.debug(verbosity)
+ if args.most_verbose:
+ LOGGER.debug('Setting verbosity level "kDebug+10" for '
+ 'RDataFrame...')
+ verbosity = ROOT.Experimental.RLogScopedVerbosity(
+ ROOT.Detail.RDF.RDFLogChannel(),
+ ROOT.Experimental.ELogLevel.kDebug+10)
+ LOGGER.debug(verbosity)
+
+ # Load pre compiled analyzers
+ LOGGER.info('Loading analyzers from libFCCAnalyses...')
+ ROOT.gSystem.Load("libFCCAnalyses")
+ # Is this still needed?? 01/04/2022 still to be the case
+ fcc_loaded = ROOT.dummyLoader()
+ if fcc_loaded:
+ LOGGER.debug('Succesfuly loaded main FCCanalyses analyzers.')
+
+ # Load the analysis script as a module
+ anapath = os.path.abspath(anapath)
+ LOGGER.info('Loading analysis file:\n%s', anapath)
+ rdf_spec = importlib.util.spec_from_file_location("rdfanalysis",
+ anapath)
+ rdf_module = importlib.util.module_from_spec(rdf_spec)
+ rdf_spec.loader.exec_module(rdf_module)
+
+ if hasattr(rdf_module, "build_graph") and \
+ hasattr(rdf_module, "RDFanalysis"):
+ LOGGER.error('Analysis file ambiguous!\nBoth "RDFanalysis" '
+ 'class and "build_graph" function are defined.')
+ sys.exit(3)
+ elif hasattr(rdf_module, "build_graph") and \
+ not hasattr(rdf_module, "RDFanalysis"):
+ run_histmaker(args, rdf_module, anapath)
+ elif not hasattr(rdf_module, "build_graph") and \
+ hasattr(rdf_module, "RDFanalysis"):
+ run_stages(args, rdf_module, anapath)
+ else:
+ LOGGER.error('Analysis file does not contain required '
+ 'objects!\nProvide either "RDFanalysis" class or '
+ '"build_graph" function.')
+ sys.exit(3)
diff --git a/python/run_final_analysis.py b/python/run_final_analysis.py
new file mode 100644
index 0000000000..d3874eeb7b
--- /dev/null
+++ b/python/run_final_analysis.py
@@ -0,0 +1,496 @@
+'''
+Run final stage of an analysis
+'''
+
+import os
+import sys
+import time
+import glob
+import logging
+import importlib.util
+
+import ROOT # type: ignore
+from anascript import get_element, get_element_dict
+from process import get_process_dict
+
+LOGGER = logging.getLogger('FCCAnalyses.run_final')
+
+ROOT.gROOT.SetBatch(True)
+
+
+# _____________________________________________________________________________
+def get_entries(infilepath: str) -> tuple[int, int]:
+ '''
+ Get number of original entries and number of actual entries in the file
+ '''
+ events_processed = 0
+ events_in_ttree = 0
+
+ with ROOT.TFile(infilepath, 'READ') as infile:
+ try:
+ events_processed = infile.Get('eventsProcessed').GetVal()
+ except AttributeError:
+ LOGGER.warning('Input file is missing information about '
+ 'original number of events!')
+
+ try:
+ events_in_ttree = infile.Get("events").GetEntries()
+ except AttributeError:
+ LOGGER.error('Input file is missing "events" TTree!\nAborting...')
+ sys.exit(3)
+
+ return events_processed, events_in_ttree
+
+
+# _____________________________________________________________________________
+def testfile(f: str) -> bool:
+ '''
+ Test input file from previous stages
+ '''
+ with ROOT.TFile(f, 'READ') as infile:
+ tt = None
+ try:
+ tt = infile.Get("events")
+ if tt is None:
+ LOGGER.warning('File does not contains events, selection was '
+ 'too tight, skipping it: %s', f)
+ return False
+ except IOError as e:
+ LOGGER.warning('I/O error(%i): %s', e.errno, e.strerror)
+ return False
+ except ValueError:
+ LOGGER.warning('Could read the file')
+ return False
+ except:
+ LOGGER.warning('Unexpected error: %s\nfile ===%s=== must be '
+ 'deleted',
+ sys.exc_info()[0], f)
+ return False
+ return True
+
+
+# __________________________________________________________
+def run(rdf_module):
+ '''
+ Main loop.
+ '''
+ proc_dict_location = get_element(rdf_module, "procDict", True)
+ if not proc_dict_location:
+ LOGGER.error(
+ 'Location of the process dictionary not provided!\nAborting...')
+ sys.exit(3)
+
+ process_dict = get_process_dict(proc_dict_location)
+
+ process_dict_additions = get_element(rdf_module, "procDictAdd", True)
+ for addition in process_dict_additions:
+ if get_element_dict(process_dict, addition) is None:
+ process_dict[addition] = process_dict_additions[addition]
+ else:
+ LOGGER.debug('Process already in the dictionary. Skipping it...')
+
+ ROOT.ROOT.EnableImplicitMT(get_element(rdf_module, "nCPUS", True))
+
+ nevents_real = 0
+ start_time = time.time()
+
+ process_events = {}
+ events_ttree = {}
+ file_list = {}
+ save_tab = []
+ efficiency_list = []
+
+ input_dir = get_element(rdf_module, "inputDir", True)
+ if not input_dir:
+ LOGGER.error('The inputDir variable is mandatory for the final stage '
+ 'of the analysis!\nAborting...')
+ sys.exit(3)
+
+ if input_dir[-1] != "/":
+ input_dir += "/"
+
+ output_dir = get_element(rdf_module, "outputDir", True)
+ if output_dir != "":
+ if output_dir[-1] != "/":
+ output_dir += "/"
+
+ if not os.path.exists(output_dir) and output_dir != '':
+ os.system(f'mkdir -p {output_dir}')
+
+ cut_list = get_element(rdf_module, "cutList", True)
+ length_cuts_names = max(len(cut) for cut in cut_list)
+ cut_labels = get_element(rdf_module, "cutLabels", True)
+
+ # save a table in a separate tex file
+ save_tabular = get_element(rdf_module, "saveTabular", True)
+ if save_tabular:
+ # option to rewrite the cuts in a better way for the table. otherwise,
+ # take them from the cutList
+ if cut_labels:
+ cut_names = list(cut_labels.values())
+ else:
+ cut_names = list(cut_list)
+
+ cut_names.insert(0, ' ')
+ save_tab.append(cut_names)
+ efficiency_list.append(cut_names)
+
+ process_list = get_element(rdf_module, "processList", True)
+ for process_name in process_list:
+ process_events[process_name] = 0
+ events_ttree[process_name] = 0
+ file_list[process_name] = ROOT.vector('string')()
+
+ infilepath = input_dir + process_name + '.root' # input file
+ if not os.path.isfile(infilepath):
+ LOGGER.debug('File %s does not exist!\nTrying if it is a '
+ 'directory as it might have been processed in batch.',
+ infilepath)
+ else:
+ LOGGER.info('Open file:\n\t%s', infilepath)
+ process_events[process_name], events_ttree[process_name] = \
+ get_entries(infilepath)
+ file_list[process_name].push_back(infilepath)
+
+ indirpath = input_dir + process_name
+ if os.path.isdir(indirpath):
+ info_msg = f'Open directory {indirpath}'
+ flist = glob.glob(indirpath + '/chunk*.root')
+ for filepath in flist:
+ info_msg += '\n\t' + filepath
+ chunk_process_events, chunk_events_ttree = \
+ get_entries(filepath)
+ process_events[process_name] += chunk_process_events
+ events_ttree[process_name] += chunk_events_ttree
+ file_list[process_name].push_back(filepath)
+ LOGGER.info(info_msg)
+
+ info_msg = 'Processed events:'
+ for process_name, n_events in process_events.items():
+ info_msg += f'\n\t- {process_name}: {n_events:,}'
+ LOGGER.info(info_msg)
+ info_msg = 'Events in the TTree:'
+ for process_name, n_events in events_ttree.items():
+ info_msg += f'\n\t- {process_name}: {n_events:,}'
+ LOGGER.info(info_msg)
+
+ histo_list = get_element(rdf_module, "histoList", True)
+ do_scale = get_element(rdf_module, "doScale", True)
+ int_lumi = get_element(rdf_module, "intLumi", True)
+
+ do_tree = get_element(rdf_module, "doTree", True)
+ for process_name in process_list:
+ LOGGER.info('Running over process: %s', process_name)
+
+ if process_events[process_name] == 0:
+ LOGGER.error('Can\'t scale histograms, the number of processed '
+ 'events for the process "%s" seems to be zero!',
+ process_name)
+ sys.exit(3)
+
+ df = ROOT.ROOT.RDataFrame("events", file_list[process_name])
+ define_list = get_element(rdf_module, "defineList", True)
+ if len(define_list) > 0:
+ LOGGER.info('Registering extra DataFrame defines...')
+ for define in define_list:
+ df = df.Define(define, define_list[define])
+
+ fout_list = []
+ histos_list = []
+ tdf_list = []
+ count_list = []
+ cuts_list = []
+ cuts_list.append(process_name)
+ eff_list = []
+ eff_list.append(process_name)
+
+ # Define all histos, snapshots, etc...
+ LOGGER.info('Defining snapshots and histograms')
+ for cut in cut_list:
+ # output file for tree
+ fout = output_dir + process_name + '_' + cut + '.root'
+ fout_list.append(fout)
+
+ df_cut = df.Filter(cut_list[cut])
+ count_list.append(df_cut.Count())
+
+ histos = []
+
+ for v in histo_list:
+ # default 1D histogram
+ if "name" in histo_list[v]:
+ model = ROOT.RDF.TH1DModel(
+ v,
+ f';{histo_list[v]["title"]};',
+ histo_list[v]["bin"],
+ histo_list[v]["xmin"],
+ histo_list[v]["xmax"])
+ histos.append(df_cut.Histo1D(model, histo_list[v]["name"]))
+ # multi dim histogram (1, 2 or 3D)
+ elif "cols" in histo_list[v]:
+ cols = histo_list[v]['cols']
+ bins = histo_list[v]['bins']
+ bins_unpacked = tuple(i for sub in bins for i in sub)
+ if len(bins) != len(cols):
+ LOGGER.error('Amount of columns should be equal to '
+ 'the amount of bin configs!\nAborting...')
+ sys.exit(3)
+ if len(cols) == 1:
+ histos.append(df_cut.Histo1D((v, "", *bins_unpacked),
+ cols[0]))
+ elif len(cols) == 2:
+ histos.append(df_cut.Histo2D((v, "", *bins_unpacked),
+ cols[0],
+ cols[1]))
+ elif len(cols) == 3:
+ histos.append(df_cut.Histo3D((v, "", *bins_unpacked),
+ cols[0],
+ cols[1],
+ cols[2]))
+ else:
+ LOGGER.error('Only 1, 2 or 3D histograms supported.')
+ sys.exit(3)
+ else:
+ LOGGER.error('Error parsing the histogram config. Provide '
+ 'either name or cols.')
+ sys.exit(3)
+ histos_list.append(histos)
+
+ if do_tree:
+ opts = ROOT.RDF.RSnapshotOptions()
+ opts.fLazy = True
+ try:
+ snapshot_tdf = df_cut.Snapshot("events", fout, "", opts)
+ except Exception as excp:
+ LOGGER.error('During the execution of the final stage '
+ 'exception occurred:\n%s', excp)
+ sys.exit(3)
+
+ # Needed to avoid python garbage collector messing around with
+ # the snapshot
+ tdf_list.append(snapshot_tdf)
+
+ # Now perform the loop and evaluate everything at once.
+ LOGGER.info('Evaluating...')
+ all_events = df.Count().GetValue()
+ LOGGER.info('Done')
+
+ nevents_real += all_events
+ uncertainty = ROOT.Math.sqrt(all_events)
+
+ if do_scale:
+ all_events = all_events * 1. * \
+ process_dict[process_name]["crossSection"] * \
+ process_dict[process_name]["kfactor"] * \
+ process_dict[process_name]["matchingEfficiency"] * \
+ int_lumi / process_events[process_name]
+ uncertainty = ROOT.Math.sqrt(all_events) * \
+ process_dict[process_name]["crossSection"] * \
+ process_dict[process_name]["kfactor"] * \
+ process_dict[process_name]["matchingEfficiency"] * \
+ int_lumi / process_events[process_name]
+ LOGGER.info('Printing scaled number of events!!!')
+
+ cfn_width = 16 + length_cuts_names # Cutflow name width
+ info_msg = 'Cutflow:'
+ info_msg += f'\n\t{"All events":{cfn_width}} : {all_events:,}'
+
+ if save_tabular:
+ # scientific notation - recomended for backgrounds
+ cuts_list.append(f'{all_events:.2e} $\\pm$ {uncertainty:.2e}')
+ # float notation - recomended for signals with few events
+ # cuts_list.append(f'{all_events:.3f} $\\pm$ {uncertainty:.3f}')
+ # ####eff_list.append(1.) # start with 100% efficiency
+
+ for i, cut in enumerate(cut_list):
+ nevents_this_cut = count_list[i].GetValue()
+ nevents_this_cut_raw = nevents_this_cut
+ uncertainty = ROOT.Math.sqrt(nevents_this_cut_raw)
+ if do_scale:
+ nevents_this_cut = \
+ nevents_this_cut * 1. * \
+ process_dict[process_name]["crossSection"] * \
+ process_dict[process_name]["kfactor"] * \
+ process_dict[process_name]["matchingEfficiency"] * \
+ int_lumi / process_events[process_name]
+ uncertainty = \
+ ROOT.Math.sqrt(nevents_this_cut_raw) * \
+ process_dict[process_name]["crossSection"] * \
+ process_dict[process_name]["kfactor"] * \
+ process_dict[process_name]["matchingEfficiency"] * \
+ int_lumi / process_events[process_name]
+ info_msg += f'\n\t{"After selection " + cut:{cfn_width}} : '
+ info_msg += f'{nevents_this_cut:,}'
+
+ # Saving the number of events, uncertainty and efficiency for the
+ # output-file
+ if save_tabular and cut != 'selNone':
+ if nevents_this_cut != 0:
+ # scientific notation - recomended for backgrounds
+ cuts_list.append(
+ f'{nevents_this_cut:.2e} $\\pm$ {uncertainty:.2e}')
+ # float notation - recomended for signals with few events
+ # cuts_list.append(
+ # f'{neventsThisCut:.3f} $\\pm$ {uncertainty:.3f}')
+ eff_list.append(f'{1.*nevents_this_cut/all_events:.3f}')
+ # if number of events is zero, the previous uncertainty is
+ # saved instead:
+ elif '$\\pm$' in cuts_list[-1]:
+ cut = (cuts_list[-1]).split()
+ cuts_list.append(f'$\\leq$ {cut[2]}')
+ eff_list.append('0.')
+ else:
+ cuts_list.append(cuts_list[-1])
+ eff_list.append('0.')
+
+ LOGGER.info(info_msg)
+
+ # And save everything
+ LOGGER.info('Saving the outputs...')
+ for i, cut in enumerate(cut_list):
+ # output file for histograms
+ fhisto = output_dir + process_name + '_' + cut + '_histo.root'
+ with ROOT.TFile(fhisto, 'RECREATE'):
+ for h in histos_list[i]:
+ try:
+ h.Scale(
+ 1. * process_dict[process_name]["crossSection"] *
+ process_dict[process_name]["kfactor"] *
+ process_dict[process_name]["matchingEfficiency"] /
+ process_events[process_name])
+ except KeyError:
+ LOGGER.warning(
+ 'No value defined for process %s in dictionary!',
+ process_name)
+ if h.Integral(0, -1) > 0:
+ h.Scale(1./h.Integral(0, -1))
+ h.Write()
+
+ if do_tree:
+ # test that the snapshot worked well
+ validfile = testfile(fout_list[i])
+ if not validfile:
+ continue
+
+ if save_tabular and cut != 'selNone':
+ save_tab.append(cuts_list)
+ efficiency_list.append(eff_list)
+
+ if save_tabular:
+ tabular_path = output_dir + 'outputTabular.txt'
+ LOGGER.info('Saving tabular to:\n%s', tabular_path)
+ with open(tabular_path, 'w', encoding='utf-8') as outfile:
+ # Printing the number of events in format of a LaTeX table
+ outfile.write('\\begin{table}[H]\n'
+ ' \\centering\n'
+ ' \\resizebox{\\textwidth}{!}{\n'
+ ' \\begin{tabular}{|l||')
+ outfile.write('c|' * (len(cuts_list)-1))
+ outfile.write('} \\hline\n')
+ for i, row in enumerate(save_tab):
+ outfile.write(' ')
+ outfile.write(' & '.join(row))
+ outfile.write(' \\\\\n')
+ if i == 0:
+ outfile.write(' \\hline\n')
+ outfile.write(' \\hline \n'
+ ' \\end{tabular}} \n'
+ ' \\caption{Caption} \n'
+ ' \\label{tab:my_label} \n'
+ '\\end{table}\n')
+
+ # Efficiency:
+ outfile.write('\n\nEfficiency:\n')
+ outfile.write('\\begin{table}[H] \n'
+ ' \\centering \n'
+ ' \\resizebox{\\textwidth}{!}{ \n'
+ ' \\begin{tabular}{|l||')
+ outfile.write('c|' * (len(cuts_list)-1))
+ outfile.write('} \\hline\n')
+ print(eff_list)
+ for i in range(len(eff_list)):
+ outfile.write(' ')
+ print('i:', i)
+ print(efficiency_list)
+ v = [row[i] for row in efficiency_list]
+ outfile.write(' & '.join(str(v)))
+ outfile.write(' \\\\\n')
+ if i == 0:
+ outfile.write(' \\hline\n')
+ outfile.write(' \\hline \n'
+ ' \\end{tabular}} \n'
+ ' \\caption{Caption} \n'
+ ' \\label{tab:my_label} \n'
+ '\\end{table}\n')
+
+ elapsed_time = time.time() - start_time
+
+ info_msg = f"{' SUMMARY ':=^80}\n"
+ info_msg += 'Elapsed time (H:M:S): '
+ info_msg += time.strftime('%H:%M:%S', time.gmtime(elapsed_time))
+ info_msg += '\nEvents processed/second: '
+ info_msg += f'{int(nevents_real/elapsed_time):,}'
+ info_msg += f'\nTotal events processed: {nevents_real:,}'
+ info_msg += '\n'
+ info_msg += 80 * '='
+ info_msg += '\n'
+ LOGGER.info(info_msg)
+
+
+def run_final(parser):
+ '''
+ Run final stage of the analysis.
+ '''
+
+ args, _ = parser.parse_known_args()
+
+ if args.command != 'final':
+ LOGGER.error('Unknow sub-command "%s"!\nAborting...', args.command)
+ sys.exit(3)
+
+ # Check that the analysis file exists
+ anapath = args.anascript_path
+ if not os.path.isfile(anapath):
+ LOGGER.error('Analysis script "%s" not found!\nAborting...',
+ anapath)
+ sys.exit(3)
+
+ # Load pre compiled analyzers
+ LOGGER.info('Loading analyzers from libFCCAnalyses...')
+ ROOT.gSystem.Load("libFCCAnalyses")
+ # Is this still needed?? 01/04/2022 still to be the case
+ _fcc = ROOT.dummyLoader
+ LOGGER.debug(_fcc)
+
+ # Set verbosity level
+ if args.verbose:
+ # ROOT.Experimental.ELogLevel.kInfo verbosity level is more
+ # equivalent to DEBUG in other log systems
+ LOGGER.debug('Setting verbosity level "kInfo" for RDataFrame...')
+ verbosity = ROOT.Experimental.RLogScopedVerbosity(
+ ROOT.Detail.RDF.RDFLogChannel(),
+ ROOT.Experimental.ELogLevel.kInfo)
+ LOGGER.debug(verbosity)
+ if args.more_verbose:
+ LOGGER.debug('Setting verbosity level "kDebug" for RDataFrame...')
+ verbosity = ROOT.Experimental.RLogScopedVerbosity(
+ ROOT.Detail.RDF.RDFLogChannel(),
+ ROOT.Experimental.ELogLevel.kDebug)
+ LOGGER.debug(verbosity)
+ if args.most_verbose:
+ LOGGER.debug('Setting verbosity level "kDebug+10" for '
+ 'RDataFrame...')
+ verbosity = ROOT.Experimental.RLogScopedVerbosity(
+ ROOT.Detail.RDF.RDFLogChannel(),
+ ROOT.Experimental.ELogLevel.kDebug+10)
+ LOGGER.debug(verbosity)
+
+ # Load the analysis
+ anapath_abs = os.path.abspath(anapath)
+ LOGGER.info('Loading analysis script:\n%s', anapath_abs)
+ rdf_spec = importlib.util.spec_from_file_location('rdfanalysis',
+ anapath_abs)
+ rdf_module = importlib.util.module_from_spec(rdf_spec)
+ rdf_spec.loader.exec_module(rdf_module)
+
+ run(rdf_module)
diff --git a/python/test.py b/python/test_fccanalyses.py
similarity index 100%
rename from python/test.py
rename to python/test_fccanalyses.py