Skip to content

Commit

Permalink
Removing legacy style to run an analysis (#327)
Browse files Browse the repository at this point in the history
* Removing legacy style to run an analysis

* Fix batch print

* Batch mode for root

* Remove runValidate
  • Loading branch information
kjvbrt authored Dec 14, 2023
1 parent 1d9183e commit 09d25ef
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 134 deletions.
1 change: 1 addition & 0 deletions addons/FastJet/python/jetClusteringHelper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import ROOT

ROOT.gROOT.SetBatch(True)

class ExclusiveJetClusteringHelper:
def __init__(self, coll, njets, tag=""):
Expand Down
3 changes: 2 additions & 1 deletion addons/ONNXRuntime/python/jetFlavourHelper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import sys
import json
import ROOT
import sys

ROOT.gROOT.SetBatch(True)

class JetFlavourHelper:
def __init__(self, coll, jet, jetc, tag=""):
Expand Down
34 changes: 26 additions & 8 deletions bin/fccanalysis
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class MultiLineFormatter(logging.Formatter):

def main():
parser = argparse.ArgumentParser(description='FCCAnalyses v0.8.0')

# Verbosity settings
verbosity_argument_group = parser.add_mutually_exclusive_group()
verbosity_argument_group.add_argument('-v', '--verbose',
action='store_true',
Expand All @@ -42,16 +44,32 @@ def main():
action='store_true',
help='make output most verbose')

subparsers = parser.add_subparsers(help='types of running modes', dest='command')
parser_init = subparsers.add_parser('init', help="generate a RDataFrame based FCC analysis")
parser_build = subparsers.add_parser('build', help='build and install local analysis')
# Create sub-parsers
subparsers = parser.add_subparsers(help='types of running modes',
dest='command')
parser_init = subparsers.add_parser(
'init',
help="generate a RDataFrame based FCC analysis")
parser_build = subparsers.add_parser(
'build',
help='build and install local analysis')
parser_test = subparsers.add_parser(
'test', help='test whole or a part of the analysis framework')
parser_pin = subparsers.add_parser('pin', help='pin fccanalyses to the current version of Key4hep stack')
parser_run = subparsers.add_parser('run', help="run a RDataFrame based FCC analysis")
parser_run_final = subparsers.add_parser('final', help="run a RDataFrame based FCC analysis final configuration")
parser_run_plots = subparsers.add_parser('plots', help="run a RDataFrame based FCC analysis plot configuration")
'test',
help='test whole or a part of the analysis framework')
parser_pin = subparsers.add_parser(
'pin',
help='pin fccanalyses to the current version of Key4hep stack')
parser_run = subparsers.add_parser(
'run',
help="run a RDataFrame based FCC analysis")
parser_run_final = subparsers.add_parser(
'final',
help="run a RDataFrame based FCC analysis final configuration")
parser_run_plots = subparsers.add_parser(
'plots',
help="run a RDataFrame based FCC analysis plot configuration")

# Setup sub-parsers
import Parsers as fccpars
fccpars.setup_init_parser(parser_init)
fccpars.setup_build_parser(parser_build)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#Mandatory: Production tag when running over EDM4Hep centrally produced events, this points to the yaml files for getting sample statistics
prodTag = "FCCee/spring2021/IDEA/"

#Optional: output directory, default is local dir
#Optional: output directory, default is local running dir
outputDir = "ZH_mumu_recoil_batch/stage1"

#Optional: ncpus, default is 4
Expand Down
160 changes: 36 additions & 124 deletions python/FCCAnalysisRun.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
import ROOT
import os, sys
import os
import sys
import time
import yaml
import glob
import json
import logging
import subprocess
import importlib.util
from array import array
import datetime
import numpy as np

import ROOT
from anafile import getElement, getElementDict
from process import getProcessInfo, get_process_dict


LOGGER = logging.getLogger('FCCAnalyses.run')

ROOT.gROOT.SetBatch(True)

# __________________________________________________________
def get_entries(infilepath):
Expand Down Expand Up @@ -308,7 +308,7 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile):
frun_condor.write('Error = {}/condor_job.{}.$(ClusterId).$(ProcId).error\n'.format(logDir,process))
frun_condor.write('getenv = False\n')
frun_condor.write('environment = "LS_SUBCWD={}"\n'.format(logDir)) # not sure
frun_condor.write('requirements = ( (OpSysAndVer =?= "CentOS7") && (Machine =!= LastRemoteHost) && (TARGET.has_avx2 =?= True) )\n')
frun_condor.write('requirements = ( (Machine =!= LastRemoteHost) && (TARGET.has_avx2 =?= True) )\n')
frun_condor.write('on_exit_remove = (ExitBySignal == False) && (ExitCode == 0)\n')
frun_condor.write('max_retries = 3\n')
frun_condor.write('+JobFlavour = "{}"\n'.format(getElement(rdfModule, "batchQueue")))
Expand All @@ -318,8 +318,8 @@ def sendToBatch(rdfModule, chunkList, process, analysisFile):
frun_condor.close()

cmdBatch="condor_submit {}".format(frunfull_condor)
LOGGER.info('Batch command: ', cmdBatch)
job=SubmitToCondor(cmdBatch,10)
LOGGER.info('Batch command: %s', cmdBatch)
job=SubmitToCondor(cmdBatch, 10)


#__________________________________________________________
Expand Down Expand Up @@ -1040,71 +1040,33 @@ def runHistmaker(args, rdfModule, analysisFile):
LOGGER.info(info_msg)


#__________________________________________________________
# __________________________________________________________
def runPlots(analysisFile):
import doPlots as dp
dp.run(analysisFile)

#__________________________________________________________
def runValidate(jobdir):
listdir=os.listdir(jobdir)
if jobdir[-1]!="/":jobdir+="/"
for dir in listdir:
if not os.path.isdir(jobdir+dir): continue
listfile=glob.glob(jobdir+dir+"/*.sh")
for file in listfile:
with open(file) as f:
for line in f:
pass
lastLine = line
LOGGER.info(line)


#__________________________________________________________
def setup_run_parser(parser):
publicOptions = parser.add_argument_group('User options')
publicOptions.add_argument('anafile_path', help="path to analysis script")
publicOptions.add_argument("--files-list", help="Specify input file to bypass the processList", default=[], nargs='+')
publicOptions.add_argument("--output", help="Specify output file name to bypass the processList and or outputList, default output.root", type=str, default="output.root")
publicOptions.add_argument("--nevents", help="Specify max number of events to process", type=int, default=-1)
publicOptions.add_argument("--test", action='store_true', help="Run over the test file", default=False)
publicOptions.add_argument('--bench', action='store_true', help='Output benchmark results to a JSON file', default=False)
publicOptions.add_argument("--ncpus", help="Set number of threads", type=int)
publicOptions.add_argument("--final", action='store_true', help="Run final analysis (produces final histograms and trees)", default=False)
publicOptions.add_argument("--plots", action='store_true', help="Run analysis plots", default=False)
publicOptions.add_argument("--preprocess", action='store_true', help="Run preprocessing", default=False)
publicOptions.add_argument("--validate", action='store_true', help="Validate a given production", default=False)
publicOptions.add_argument("--rerunfailed", action='store_true', help="Rerun failed jobs", default=False)
publicOptions.add_argument("--jobdir", help="Specify the batch job directory", type=str, default="output.root")

internalOptions = parser.add_argument_group('\033[4m\033[1m\033[91m Internal options, NOT FOR USERS\033[0m')
internalOptions.add_argument("--batch", action='store_true', help="Submit on batch", default=False)
def run(parser):
'''
Set things in motion.
'''

args, _ = parser.parse_known_args()

#__________________________________________________________
def run(mainparser, subparser=None):
"""
Set things in motion.
The two parser arguments are a hack to allow running this
both as `fccanalysis run` and `python config/FCCAnalysisRun.py`
For the latter case, both are the same (see below).
"""

if subparser:
setup_run_parser(subparser)
args, _ = mainparser.parse_known_args()
#check that the analysis file exists
# Check that the analysis file exists
analysisFile = args.anafile_path
if not os.path.isfile(analysisFile):
LOGGER.error('Script %s does not exist!\nSpecify a valid analysis '
'script in the command line arguments', analysisFile)
LOGGER.error('Analysis script %s not found!\nAborting...',
analysisFile)
sys.exit(3)

# Load pre compiled analyzers
LOGGER.info('Loading analyzers from libFCCAnalyses...')
ROOT.gSystem.Load("libFCCAnalyses")
# Is this still needed?? 01/04/2022 still to be the case
_fcc = ROOT.dummyLoader

# Set verbosity level
if args.verbose:
# ROOT.Experimental.ELogLevel.kInfo verbosity level is more
# equivalent to DEBUG in other log systems
Expand All @@ -1131,75 +1093,25 @@ def run(mainparser, subparser=None):
rdfModule = importlib.util.module_from_spec(rdfSpec)
rdfSpec.loader.exec_module(rdfModule)

if hasattr(args, 'command'):
if args.command == "run":
if hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
LOGGER.error('Analysis file ambiguous!\nBoth "RDFanalysis" '
'class and "build_graph" function are defined.')
sys.exit(3)
elif hasattr(rdfModule, "build_graph") and not hasattr(rdfModule, "RDFanalysis"):
runHistmaker(args, rdfModule, analysisFile)
elif not hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
runStages(args, rdfModule, args.preprocess, analysisFile)
else:
LOGGER.error('Analysis file does not contain required '
'objects!\nProvide either "RDFanalysis" class or '
'"build_graph" function.')
sys.exit(3)
elif args.command == "final":
runFinal(rdfModule)
elif args.command == "plots":
runPlots(analysisFile)
return

LOGGER.warning('Running the old way...\nThis way of running the analysis '
'is deprecated and will be removed in the next release!\n'
'The FCCAnalyses release 0.8.0 is the last one to support '
'this stile of running!')

# below is legacy using the old way of runnig with options in
# "python config/FCCAnalysisRun.py analysis.py --options check if this is
# final analysis
if args.final:
if args.plots:
LOGGER.error('Can not have --plots with --final, exit')
sys.exit(3)
if args.preprocess:
LOGGER.error('Can not have --preprocess with --final, exit')
sys.exit(3)
runFinal(rdfModule)
if not hasattr(args, 'command'):
LOGGER.error('Unknow sub-command "%s"!\nAborting...')
sys.exit(3)

elif args.plots:
if args.final:
LOGGER.error('Can not have --final with --plots, exit')
if args.command == "run":
if hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
LOGGER.error('Analysis file ambiguous!\nBoth "RDFanalysis" '
'class and "build_graph" function are defined.')
sys.exit(3)
if args.preprocess:
LOGGER.error('Can not have --preprocess with --plots, exit')
elif hasattr(rdfModule, "build_graph") and not hasattr(rdfModule, "RDFanalysis"):
runHistmaker(args, rdfModule, analysisFile)
elif not hasattr(rdfModule, "build_graph") and hasattr(rdfModule, "RDFanalysis"):
runStages(args, rdfModule, args.preprocess, analysisFile)
else:
LOGGER.error('Analysis file does not contain required '
'objects!\nProvide either "RDFanalysis" class or '
'"build_graph" function.')
sys.exit(3)
elif args.command == "final":
runFinal(rdfModule)
elif args.command == "plots":
runPlots(analysisFile)

elif args.validate:
runValidate(args.jobdir)

else:
if args.preprocess:
if args.plots:
LOGGER.error('Can not have --plots with --preprocess, exit')
sys.exit(3)
if args.final:
LOGGER.error('Can not have --final with --preprocess, exit')
sys.exit(3)
runStages(args, rdfModule, args.preprocess, analysisFile)


# __________________________________________________________
if __name__ == "__main__":
LOGGER.warning('Running this script directly is deprecated, use '
'`fccanalysis run` instead. FCCAnalyses release 0.8.0 is '
'the last one to support this style of running!')
# legacy behavior: allow running this script directly
# with python config/FCCAnalysis.py
# and the same behavior as `fccanalysis run`
import argparse
parser = argparse.ArgumentParser()
run(parser, parser)

0 comments on commit 09d25ef

Please sign in to comment.