diff --git a/python/process.py b/python/process.py index e521eccd19..c510eb440e 100644 --- a/python/process.py +++ b/python/process.py @@ -29,7 +29,8 @@ def get_entries(inpath: str) -> int | None: try: nevents = infile.Get("events").GetEntries() except AttributeError: - LOGGER.error('Input file is missing "events" TTree!\nAborting...') + LOGGER.error('Input file is missing "events" TTree!\n - %s' + '\nAborting...', inpath) sys.exit(3) return nevents @@ -63,7 +64,7 @@ def get_entries_sow(infilepath: str, nevents_max: Optional[int] = None, get_loca sumOfWeightsTTree = 0. # check for empty chunk (can this be improved? exception from RDF cannot be caught it seems?) - tree =infile.Get("events") + tree = infile.Get("events") if not tree: print("Tree not found in file", infilepath, " possibly empty chunk - continuing with next one.") infile.Close() @@ -71,7 +72,7 @@ def get_entries_sow(infilepath: str, nevents_max: Optional[int] = None, get_loca try: - #use a RDF here too so the nevents restriction option can be imposed easily for the local events + # use a RDF here too so the nevents restriction option can be imposed easily for the local events rdf_tmp = ROOT.ROOT.RDataFrame("events", infilepath) if nevents_max: @@ -85,7 +86,7 @@ def get_entries_sow(infilepath: str, nevents_max: Optional[int] = None, get_loca # infile.Get("events").Draw('EventHeader.weight[0]>>histo') # histo=ROOT.gDirectory.Get('histo') histo = rdf_tmp.Histo1D(weight_name) - sumOfWeightsTTree=float(eventsTTree)*histo.GetMean() + sumOfWeightsTTree = float(eventsTTree) * histo.GetMean() except cppyy.gbl.std.runtime_error: LOGGER.error('Error: Event weights requested with do_weighted,' 'but input file does not contain weight column. Aborting.') diff --git a/python/run_analysis.py b/python/run_analysis.py index b1bfe51f84..52e3a9b660 100644 --- a/python/run_analysis.py +++ b/python/run_analysis.py @@ -886,7 +886,8 @@ def run_histmaker(args, rdf_module, anapath): LOGGER.info('Writing out process %s, nEvents processed %s', process, f'{evtcount.GetValue():,}') - with ROOT.TFile(f'{output_dir}/{process}.root', 'RECREATE'): + with ROOT.TFile(os.path.join(output_dir, f'{process}.root'), + 'RECREATE'): for hist in hists_to_write.values(): if do_scale: hist.Scale(scale * int_lumi) diff --git a/python/run_final_analysis.py b/python/run_final_analysis.py index f439210830..8b8a90c40b 100644 --- a/python/run_final_analysis.py +++ b/python/run_final_analysis.py @@ -41,7 +41,8 @@ def get_entries(infilepath: str) -> tuple[int, int]: try: events_in_ttree = infile.Get("events").GetEntries() except AttributeError: - LOGGER.error('Input file is missing "events" TTree!\nAborting...') + LOGGER.error('Input file is missing "events" TTree!\n - %s' + '\nAborting...') sys.exit(3) return events_processed, events_in_ttree @@ -67,6 +68,42 @@ def get_processes(rdf_module: object) -> list[str]: return process_list +# _____________________________________________________________________________ +def find_sample_files(input_dir: str, + sample_name: str) -> list[str]: + ''' + Find input files for the specified sample name. + ''' + result: list[str] = [] + + full_input_path = os.path.abspath(os.path.join(input_dir, sample_name)) + + # Find all input files ending with .root + if os.path.isdir(full_input_path): + all_files = os.listdir(full_input_path) + # Remove files not ending with `.root` + all_files = [f for f in all_files if f.endswith('.root')] + # Remove directories + all_files = [f for f in all_files + if os.path.isfile(os.path.join(full_input_path, f))] + result = [os.path.join(full_input_path, f) for f in all_files] + + # Handle case when there is just one input file + if len(result) < 1: + if os.path.isfile(full_input_path + '.root'): + result.append(full_input_path + '.root') + else: + LOGGER.debug('Input file "%s" does not exist!', + full_input_path + '.root') + + if len(result) < 1: + LOGGER.error('Can not find input files for "%s" sample!\nAborting...', + sample_name) + sys.exit(3) + + return result + + # _____________________________________________________________________________ def save_results(results: dict[str, dict[str, any]], rdf_module: object) -> None: @@ -279,47 +316,34 @@ def run(rdf_module, args) -> None: file_list[process_name] = ROOT.vector('string')() - infilepath = input_dir + process_name + '.root' # input file - - if not os.path.isfile(infilepath): - LOGGER.debug('File %s does not exist!\nTrying if it is a ' - 'directory as it might have been processed in batch.', - infilepath) - else: - LOGGER.info('Open file:\n %s', infilepath) + flist = find_sample_files(input_dir, process_name) + for filepath in flist: + # TODO: check in `get_entries()` if file is valid and remove it + # from the input list if it is not if do_weighted: - process_events[process_name], events_ttree[process_name], \ - sow_process[process_name], sow_ttree[process_name] = \ - get_entries_sow(infilepath, weight_name="weight") - else: - process_events[process_name], events_ttree[process_name] = \ - get_entries(infilepath) - file_list[process_name].push_back(infilepath) - - indirpath = input_dir + process_name - if os.path.isdir(indirpath): - #reset the nevts/sow counters to avoid wrong counting in case a single file of same name (e.g. local test output) also exists in the same directory - process_events[process_name] = 0 - events_ttree[process_name] = 0 - sow_process[process_name] = 0. - sow_ttree[process_name] = 0. - - info_msg = f'Open directory {indirpath}' - flist = glob.glob(indirpath + '/chunk*.root') - for filepath in flist: - info_msg += '\n\t' + filepath - if do_weighted: - chunk_process_events, chunk_events_ttree, \ + chunk_process_events, chunk_events_ttree, \ chunk_sow_process, chunk_sow_ttree = \ get_entries_sow(filepath, weight_name="weight") - sow_process[process_name] += chunk_sow_process - sow_ttree[process_name] += chunk_sow_ttree - else: - chunk_process_events, chunk_events_ttree = \ - get_entries(filepath) - process_events[process_name] += chunk_process_events - events_ttree[process_name] += chunk_events_ttree - file_list[process_name].push_back(filepath) + sow_process[process_name] += chunk_sow_process + sow_ttree[process_name] += chunk_sow_ttree + else: + chunk_process_events, chunk_events_ttree = \ + get_entries(filepath) + process_events[process_name] += chunk_process_events + events_ttree[process_name] += chunk_events_ttree + file_list[process_name].push_back(filepath) + if len(file_list[process_name]) < 1: + LOGGER.error('No valid input files for sample "%s" ' + 'found!\nAborting..', process_name) + sys.exit(3) + if len(file_list[process_name]) == 1: + LOGGER.info('Loading events for sample "%s" from file:\n - %s', + process_name, file_list[process_name][0]) + else: + info_msg = f'Loading events for sample "{process_name}"' + info_msg += ' from files:' + for filepath in file_list[process_name]: + info_msg += f'\n - {filepath}' LOGGER.info(info_msg) info_msg = 'Processed events:'