From 93188c1421d2c463585e87ad83929d61d51a4057 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Wed, 18 Sep 2024 16:47:52 +0200 Subject: [PATCH] Adjusting stages source example --- examples/data_source/stages_source.py | 81 +++++++++++++++------------ 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/examples/data_source/stages_source.py b/examples/data_source/stages_source.py index ad42225fd1..70dbe1a082 100644 --- a/examples/data_source/stages_source.py +++ b/examples/data_source/stages_source.py @@ -1,34 +1,45 @@ -# list of processes (mandatory) -processList = { - 'p8_ee_WW_ecm240': {'output': 'p8_ee_WW_ecm240_out'} -} - -# Production tag when running over EDM4Hep centrally produced events, this -# points to the yaml files for getting sample statistics (mandatory) -prodTag = "FCCee/winter2023/IDEA/" - -# Optional: output directory, default is local running directory -outputDir = "." - -# Ncpus, default is 4, -1 uses all cores available -# nCPUS = -1 - -# How to read input files -useDataSource = True - -testFile = 'https://fccsw.web.cern.ch/fccsw/testsamples/' \ - 'edm4hep1/p8_ee_WW_ecm240_edm4hep.root' - -# RDFanalysis class where the use defines the operations on the TTree -# (mandatory) -class RDFanalysis(): - - # analysis function to define the analyzers to process, please make sure - # you return the last dataframe, in this example it is df2 - def analysers(df): - - df2 = ( - df +''' +Analysis example using PODIO ROOT DataSource for reading input files. +''' + + +class Analysis(): + ''' + Mandatory class, with three mandatory methods: + * __init__ + * analyzers + * output + ''' + def __init__(self, _): + # list of processes (mandatory) + self.process_list = { + 'p8_ee_WW_ecm240': {'output': 'p8_ee_WW_ecm240_out'} + } + + # Production tag when running over EDM4Hep centrally produced events, + # this points to the yaml files for getting sample statistics + # (mandatory) + self.prod_tag = 'FCCee/winter2023/IDEA/' + + # Optional: output directory, default is local running directory + self.output_dir = "." + + # Ncpus, default is 4, -1 uses all cores available + # self.n_threads = -1 + + # How to read input files + self.use_data_source = True + + self.test_file = 'https://fccsw.web.cern.ch/fccsw/testsamples/' \ + 'edm4hep1/p8_ee_WW_ecm240_edm4hep.root' + + def analyzers(self, dframe): + ''' + Analysis function to define the analyzers to process, please make sure + you return the last dataframe, in this example it is dframe2 + ''' + dframe2 = ( + dframe .Define( "electron_truth", "recoParticle::selPDG(11)(MCRecoAssociations)") @@ -38,12 +49,10 @@ def analysers(df): "recoParticle::getPt(electron_truth)") ) - return df2 + return dframe2 - def output(): - branchList = [ + def output(self) -> list[str]: + return [ # "electron_truth", "electron_truth_pt" ] - - return branchList