diff --git a/Configuration/data/electronFiducialMap_2022F_data.root b/Configuration/data/electronFiducialMap_2022F_data.root index d0565876..113539ff 100644 Binary files a/Configuration/data/electronFiducialMap_2022F_data.root and b/Configuration/data/electronFiducialMap_2022F_data.root differ diff --git a/Configuration/data/muonFiducialMap_2022F_data.root b/Configuration/data/muonFiducialMap_2022F_data.root index 5516bac2..bfa6dee0 100644 Binary files a/Configuration/data/muonFiducialMap_2022F_data.root and b/Configuration/data/muonFiducialMap_2022F_data.root differ diff --git a/Configuration/python/configurationOptions.py b/Configuration/python/configurationOptions.py index 4ed0d282..3d9f7331 100644 --- a/Configuration/python/configurationOptions.py +++ b/Configuration/python/configurationOptions.py @@ -1811,7 +1811,7 @@ 'EGamma_2022A' : 6, 'EGamma_2022B' : 112, - 'EGamma_2022C' : 200, + 'EGamma_2022C' : 3354, 'EGamma_2022D' : 33, 'EGamma_2022E' : 800, 'EGamma_2022F' : 1000, diff --git a/Configuration/python/histogramUtilities.py b/Configuration/python/histogramUtilities.py index fcea205a..a7f34347 100644 --- a/Configuration/python/histogramUtilities.py +++ b/Configuration/python/histogramUtilities.py @@ -102,7 +102,7 @@ def getHistIntegral(sample,condor_dir,channel,hist,xlo,xhi): # print "xhi is outside the range of the histogram, will include all the overflow instead" xhiBin = histogram.GetXaxis().FindBin(float(xhi)) #intError = Double (0.0) - intError = 0.0 + intError = c_double(0.0) integral = histogram.IntegralAndError(xloBin, xhiBin, intError) inputFile.Close() diff --git a/DBTools/python/condorSubArgumentsSet.py b/DBTools/python/condorSubArgumentsSet.py index f10a25a4..17cc380d 100644 --- a/DBTools/python/condorSubArgumentsSet.py +++ b/DBTools/python/condorSubArgumentsSet.py @@ -10,7 +10,7 @@ CondorSubArgumentsSet = { 1 : {'Executable' : ''}, 2 : {'Universe' : 'vanilla'}, - 3 : {'request_memory ' : '2048MB'}, + 3 : {'request_memory ' : '4092MB'}, 4 : {'Arguments' : ''}, 5 : {'Output' : 'condor_$(Process).out'}, 6 : {'Error' : 'condor_$(Process).err'}, @@ -23,7 +23,7 @@ 1 : {'Executable' : ''}, 2 : {'Universe' : 'vanilla'}, 3 : {'Getenv' : 'True'}, - 4 : {'request_memory ' : '2048MB'}, + 4 : {'request_memory ' : '4092MB'}, 5 : {'Arguments' : ''}, 6 : {'Output' : 'condor_$(Process).out'}, 7 : {'Error' : 'condor_$(Process).err'}, diff --git a/DBTools/python/createEventLists.py b/DBTools/python/createEventLists.py new file mode 100755 index 00000000..22048fac --- /dev/null +++ b/DBTools/python/createEventLists.py @@ -0,0 +1,57 @@ +#!/usr/bin/python3 + +import json +import ROOT as r +import numpy as np +import sys +import os + +r.gInterpreter.Declare( + ''' + #include "OSUT3Analysis/DBTools/interface/getEvents.h" + + void dummyWrapper(){ + getEvents(); + } + ''' +) +r.gSystem.Load('libOSUT3AnalysisDBTools.so') + +if __name__ == "__main__": + + + if not len(sys.argv) >= 3: + print("Error: Need to provide the input json file") + sys.exit(1) + + outputDir = '/data/users/mcarrigan/condor/EventLists/' + if len(sys.argv) >= 4: + outputDir = sys.argv[3] + + f_json = sys.argv[1] + + job = int(sys.argv[2]) + + dataset = f_json.split('/')[-1].split('-')[0] + + outputPath = outputDir + dataset + '/' + + if not os.path.exists(outputPath): + os.mkdir(outputPath) + + with open(f_json) as secondary_fin: + secondary_dict = json.load(secondary_fin) + filename = list(secondary_dict.keys())[job] + + #for filename in secondary_dict.keys(): + if not filename.startswith('root://'): filename = 'root://cmsxrootd.fnal.gov:/' + filename + print("Getting events for", filename) + events = r.getEventsInFile(filename) + events = sorted(events, key=lambda x: (x.runNum, x.lumiBlock, x.event)) + tmpEvents = np.array([str(x.runNum)+':'+str(x.lumiBlock)+':'+str(x.event) for x in events]) + print(tmpEvents, len(tmpEvents)) + fileStr = filename.split('/')[-1] + outputFile = outputPath + fileStr + np.savez(outputFile, eventList=tmpEvents) + print("saved file {} with {} events".format(outputFile, len(tmpEvents))) + #break \ No newline at end of file diff --git a/DBTools/python/getSiblings.py b/DBTools/python/getSiblings.py index 68262f18..00f7543b 100644 --- a/DBTools/python/getSiblings.py +++ b/DBTools/python/getSiblings.py @@ -47,6 +47,7 @@ def __init__(self, inputJSON, secondaryJSON, label): self.nJobs = -1 self.jobNumber = -1 self.eventsPerJob = -1 + self.redirector = None self.local=False @@ -59,6 +60,7 @@ def getSiblings(self): self.getFilesFromList(args.jobNumber, args.totalJobs) self.findMatches() else: + print("Running default option...") self.getFilesFromList(args.jobNumber, args.totalJobs) self.findMatches() @@ -72,12 +74,14 @@ def getDASInfo(dataset, jsonName=None): test = ast.literal_eval(miniaod) files = [x['file'][0]['name'] for x in test] + events = [len(x['events'][0]['number']) if 'events' in list(x.keys()) else 0 for x in test] lumis = [[x['lumi'][0]['number']] if isinstance(x['lumi'][0]['number'], int) else x['lumi'][0]['number'] for x in test] runs = [[x['run'][0]['run_number']] if isinstance(x['run'][0]['run_number'], int) else x['run'][0]['run_number'] for x in test] miniDict = {} - for f, l, r in zip(files, lumis, runs): + for f, l, r, e in zip(files, lumis, runs, events): + if e == 0: continue miniDict[f] = {'lumis': l, 'runs': r} if jsonName: @@ -125,20 +129,32 @@ def findMatches(self, jsonName='default.json'): with open(self.secondaryJSON) as secondary_fin: secondary_dict = json.load(secondary_fin) + secondary_dataset = self.secondaryJSON.split('/')[-1].split('-')[0] + eventsDir = '/data/users/mcarrigan/condor/EventLists/' + secondary_dataset + for inputFile in self.inputFiles: - if inputFile not in primary_dict.keys(): - continue p_file = inputFile + if 'root://' in inputFile: + self.redirector = 'root://'+inputFile.split('://')[1] + p_file = '/'+inputFile.split('://')[-1] + print("saving redirector as", self.redirector) + #print("looking for file", p_file) + if p_file not in primary_dict.keys(): + continue + #print("found file ", p_file) primary_info = primary_dict[p_file] - if p_file not in self.inputFiles: continue + if p_file not in self.inputFiles and inputFile not in self.inputFiles: continue sibs = [] + #print("looking for siblings") for s_file, secondary_info in secondary_dict.items(): if len(np.intersect1d(primary_info['runs'], secondary_info['runs'])) == 0: continue if len(np.intersect1d(primary_info['lumis'], secondary_info['lumis'])) != 0: sibs.append(s_file) - + + #print("There are {} siblings".format(len(sibs))) siblings[p_file] = sibs - self.getEventList(p_file, sibs) + + self.getEventList(inputFile, sibs, eventsDir) if self.eventsPerJob != -1 and len(self.sharedEvents) > self.eventsPerJob: break @@ -152,7 +168,7 @@ def findMatches(self, jsonName='default.json'): self.siblingDict = siblings - def getEventList(self, primaryFile, siblings): + def getEventList(self, primaryFile, siblings, eventsDir): primaryEvents = np.array([]) if not primaryFile in self.inputFiles: @@ -160,16 +176,25 @@ def getEventList(self, primaryFile, siblings): return if not primaryFile.startswith("root://") and not self.local: primaryFile = 'root://cms-xrd-global.cern.ch:/' + primaryFile + print("getting primary file events") events = r.getEventsInFile(primaryFile) tmpEvents = np.array([str(x.runNum)+':'+str(x.lumiBlock)+':'+str(x.event) for x in events]) primaryEvents = np.concatenate((primaryEvents, tmpEvents)) secondaryEvents = np.array([]) - for filename in siblings: - if not filename.startswith("root://") and not self.local: - filename = 'root://cms-xrd-global.cern.ch:/' + filename - events = r.getEventsInFile(filename) - tmpEvents = np.array([str(x.runNum)+':'+str(x.lumiBlock)+':'+str(x.event) for x in events]) + for ifile, filename in enumerate(siblings): + fileStr = filename.split('/')[-1] + filePath = '/'.join([eventsDir,fileStr+'.npz']) + print("Looking for event list for file", filePath, filename) + if os.path.exists(filePath): + print("found event list!") + tmpEvents = np.load(filePath)['eventList'] + else: + print("getting secondary file events", filename) + if not filename.startswith("root://") and not self.local: + filename = 'root://cms-xrd-global.cern.ch:/' + filename + events = r.getEventsInFile(filename) + tmpEvents = np.array([str(x.runNum)+':'+str(x.lumiBlock)+':'+str(x.event) for x in events]) secondaryEvents = np.concatenate((secondaryEvents, tmpEvents)) this_sharedEvents = np.intersect1d(primaryEvents, secondaryEvents) @@ -180,6 +205,7 @@ def getEventList(self, primaryFile, siblings): def getFilesFromList(self, jobNumber, nJobs): + print("getting files from list") sys.path.append(os.getcwd()) self.jobNumber = jobNumber @@ -191,6 +217,7 @@ def getFilesFromList(self, jobNumber, nJobs): #If no job number or number of jobs is passed use the full file list if jobNumber == -1 or nJobs == -1: self.inputFiles = datasetInfo.listOfFiles + print("Using full file list") return filesPerJob = int (math.floor (len (datasetInfo.listOfFiles) / nJobs)) @@ -204,7 +231,7 @@ def getFilesFromList(self, jobNumber, nJobs): if runList[0].startswith('file:'): runList = [x.split('file:')[1] for x in runList] - #print("This is the run list:\n",runList) + print("This is the run list:\n",runList) self.inputFiles = runList diff --git a/DBTools/python/osusub_cfg.py b/DBTools/python/osusub_cfg.py index 071e6e70..9cccac8f 100644 --- a/DBTools/python/osusub_cfg.py +++ b/DBTools/python/osusub_cfg.py @@ -198,7 +198,7 @@ def skimListExists(dataset): def getSiblingList(sibList, runList, siblingDataset): - print("Trying to get sibling list") + print("Trying to get sibling list from", sibList) siblings = [] fin = open(sibList, 'r') diff --git a/DBTools/scripts/eventListWrapper.sh b/DBTools/scripts/eventListWrapper.sh new file mode 100755 index 00000000..8c3c2c07 --- /dev/null +++ b/DBTools/scripts/eventListWrapper.sh @@ -0,0 +1,13 @@ +#!/usr/bin/bash + +(>&2 echo "Starting job on " `date`) # Date/time of start of job +(>&2 echo "Running on: `uname -a`") # Condor job is running on this node +(>&2 echo "System software: `cat /etc/redhat-release`") # Operating System on that node +(>&2 echo "Arguments passed to this script are: $@") #print all arguments + +export CMSSW_VERSION_LOCAL=$1 +source /cvmfs/cms.cern.ch/cmsset_default.sh +export SCRAM_ARCH=slc7_amd64_gcc820 +#eval `scramv1 runtime -sh` + +python3 createEventLists.py $2 $3 diff --git a/DBTools/scripts/osusub.py b/DBTools/scripts/osusub.py index 5413781c..b7ab95dd 100755 --- a/DBTools/scripts/osusub.py +++ b/DBTools/scripts/osusub.py @@ -907,6 +907,8 @@ def MakeSpecificConfig(Dataset, Directory, SkimDirectory, Label, SkimChannelName ConfigFile.write(" print(\"No valid grid proxy. Not adding sibling files.\")\n") if arguments.localSkim != None: ConfigFile.write("siblings = ['file:{0}{1}'.format(" + arguments.localSkim + ", sib.split('/')[-1]) for sib in siblings] \n") + elif arguments.Redirector != "": + ConfigFile.write("siblings = ['root://{0}:/{1}'.format(\'" + RedirectorDic[arguments.Redirector] + "\', sib) for sib in siblings] \n") ConfigFile.write("pset.process.source.secondaryFileNames.extend(siblings)\n\n") # If the dataset has a Run3 skim sibling defined and not run over skim, add the corresponding files to the secondary file names @@ -923,6 +925,8 @@ def MakeSpecificConfig(Dataset, Directory, SkimDirectory, Label, SkimChannelName ConfigFile.write(" print( \"No valid grid proxy. Not adding sibling files.\")\n" ) if arguments.localSkim != None: ConfigFile.write("siblings = ['file:{0}{1}'.format(\'" + arguments.localSkim + "\', sib.split('/')[-1]) for sib in siblings] \n") + elif arguments.Redirector != "": + ConfigFile.write("siblings = ['root://{0}:/{1}'.format(\'" + RedirectorDic[arguments.Redirector] + "\', sib) for sib in siblings] \n") ConfigFile.write("pset.process.source.secondaryFileNames.extend(siblings)\n\n") #if ...: make this an if statement for running over no cuts diff --git a/DBTools/scripts/submitCreateEventLists.py b/DBTools/scripts/submitCreateEventLists.py new file mode 100644 index 00000000..d50fefbd --- /dev/null +++ b/DBTools/scripts/submitCreateEventLists.py @@ -0,0 +1,157 @@ +import os +import argparse +import json +import shutil +from OSUT3Analysis.DBTools.getSiblings import * +import sys + +def writeCondorSub(exe, nJobs, outDir, jsonFile, requirements, wrapper, cmssw): + exe = exe.split('/')[-1] + jsonFile = jsonFile.split('/')[-1] + wrapper = wrapper.split('/')[-1] + f = open('run.sub', 'w') + uid = os.getuid() + submitLines = """ + Universe = vanilla + Rank = TARGET.IsLocalSlot + request_disk = {6} + request_memory = {5} + request_cpus = {4} + executable = {7} + arguments = {8} {2} $(PROCESS) + log = {3}/log_$(PROCESS).log + output = {3}/out_$(PROCESS).out + error = {3}/error_$(PROCESS).err + should_transfer_files = Yes + when_to_transfer_output = ON_EXIT + transfer_input_files = {3}/{1}, {3}/{2}, {3}/{7} + transfer_output_files = "" + getenv = true + x509userproxy = /tmp/x509up_u{9} + queue {0} + """.format(nJobs,exe,jsonFile,outDir,requirements[0],requirements[1],requirements[2],wrapper, cmssw, uid) + + f.write(submitLines) + f.close() + +def checkEmpty(filename): + data = np.load(filename) + return len(data[data.files[0]]) == 0 + +def checkFailures(jsonFile, logDir): + resubmitJobs = [] + outputDir = jsonFile.split('/')[-1].split('-')[0] + + print("Checking for failures") + print(jsonFile, logDir, outputDir) + + #open json file to check file names against job numbers + f_json = open('/'.join([logDir, jsonFile]), 'r') + a_json = json.load(f_json) + files = [x.split('/')[-1] for x in list(a_json.keys())] + + #loop over output files to check for issues + for filename in os.listdir(logDir): + if not filename.endswith('.out'): continue + + #get run number + runNum = filename.split('_')[-1].split('.')[0] + + #check if the output exists + thisFileName = files[int(runNum)] + eventListDir = '/data/users/mcarrigan/condor/EventLists/{}/{}.npz'.format(outputDir, thisFileName) + if not os.path.exists(eventListDir): + resubmitJobs.append(runNum) + print("Output file {} does not exist".format(eventListDir), runNum) + continue + + if checkEmpty(eventListDir): + resubmitJobs.append(runNum) + print("Output file {} is empty".format(eventListDir), runNum) + continue + + + #check if there are any file open failures + with open('/'.join([logDir, filename]), 'r') as fin: + for line in fin: + if 'Failed to open file after 5 attempts' in line: + resubmitJobs.append(runNum) + print("File {} had trouble opening root files".format(filename), runNum) + break + + + resubmitScript = logDir + '/resubmit.sub' + shutil.copy(logDir + '/run.sub', resubmitScript) + + with open(resubmitScript, 'r') as file: + lines = file.readlines() + + # Modify the last line + for i, line in enumerate(lines): + if line.strip().startswith('queue'): + # Change the line to the new queue value + lines[i] = ' queue Process in {}\n'.format(' '.join(map(str, resubmitJobs))) + break + + # Write the modified lines back to the file + with open(resubmitScript, 'w') as file: + file.writelines(lines) + + cmd = 'condor_submit {}'.format(resubmitScript) + os.system(cmd) + print(cmd) + + print("Jobs that need reprocessing", resubmitJobs) + + + +if __name__ == "__main__": + + + parser = argparse.ArgumentParser() + parser.add_argument("-j", "--json", type=str, help="Input json of dataset to get events from") + parser.add_argument('-d', '--dataset', type=str, help="Dataset to get json from") + parser.add_argument('-r', '--resubmit', action='store_true', help='Option to check for failed jobs and resubmit') + args = parser.parse_args() + + jsonFile = '/home/mcarrigan/scratch0/disTracksML/CMSSW_13_0_13/src/DisappTrks/BackgroundEstimation/test/debugMuonSkim/Muon_Run2022E-EXODisappTrk-27Jun2023-v1_AOD.json' + wrapper = os.environ['CMSSW_BASE'] + '/src/OSUT3Analysis/DBTools/scripts/eventListWrapper.sh' + script = os.environ['CMSSW_BASE'] + '/src/OSUT3Analysis/DBTools/python/createEventLists.py' + requirements = ['2', '2000MB', '3000MB'] #CPU, Memory, Disk + cmssw = os.environ['CMSSW_VERSION'] + + if args.json: + jsonFile = args.json + + if args.dataset: + jsonFile = args.dataset.replace('/', '_')[1:] + if not args.resubmit: + getSiblings.getDASInfo(args.dataset, jsonName = '{}.json'.format(jsonFile)) + jsonFile = '{}.json'.format(jsonFile) + + outputDir = '/abyss/users/mcarrigan/log/DisappTrks/EventLists_{}'.format(jsonFile.split('/')[-1].split('.')[0]) + + if args.resubmit: + checkFailures(jsonFile, outputDir) + sys.exit(0) + + nJobs = 0 + with open(jsonFile) as fin: + f_json = json.load(fin) + nJobs = len(f_json.keys()) + + if not os.path.exists(outputDir): + os.mkdir(outputDir) + + writeCondorSub(script, nJobs, outputDir, jsonFile, requirements, wrapper, cmssw) + + if not args.resubmit: + shutil.copy(script, outputDir) + shutil.copy('run.sub', outputDir) + shutil.copy(jsonFile, outputDir) + shutil.copy('eventListWrapper.sh', outputDir) + + os.system('condor_submit run.sub') + + +