From eef66b3426237a80ea54902a5afa429c2afd5a72 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko <34742917+kjvbrt@users.noreply.github.com> Date: Tue, 1 Oct 2024 16:05:44 +0200 Subject: [PATCH] podio::DataSource (#309) * Integrating EDM4hep RDataSource * Add legacy reader * Split off legacy reader * Protecting collection retrieval by mutex * Add source test * Updated man pages * Renaming Source na DataSource * Adding source test for run stages * Add tests for the standalone mode * Adding test files per process for histmaker * Adding selector by size and sorter by pT * Few more analyzers * Putting back things omitted from run_analysis.py * Adding e4hsource in LD_LIBRARY_PATH for managed tests * Making building of the source optional * Removed legacy support * Separated analyzers using Collections into independent headers * Reimplementing analyzers needed for the stage1 of the example analysis * Adjusitng test input files * Moving recoParticle definition * Changes to use podio::ROOTDataSource * using podio::DataSource * Adjusting stages source example * Adding C++ analysis tests * Updating examples * Formatting * Removing e4hsource * Adjust clang-format check * Formatting * Using PodioSource namespace * Adding .cache to .gitignore * Formatting --- .github/workflows/format.yml | 7 +- .gitignore | 1 + CMakeLists.txt | 3 + analyzers/dataframe/CMakeLists.txt | 11 + analyzers/dataframe/FCCAnalyses/Defines.h | 6 - analyzers/dataframe/FCCAnalyses/LinkDef.h | 35 +- analyzers/dataframe/FCCAnalyses/LinkSource.h | 92 ++++++ .../FCCAnalyses/ReconstructedParticle.h | 10 +- .../FCCAnalyses/ReconstructedParticleSource.h | 242 ++++++++++++++ analyzers/dataframe/FCCAnalyses/TrackSource.h | 45 +++ .../dataframe/src/ReconstructedParticle.cc | 8 +- .../src/ReconstructedParticleSource.cc | 300 ++++++++++++++++++ analyzers/dataframe/src/TrackSource.cc | 49 +++ cmake/FCCAnalysesFunctions.cmake | 2 +- .../higgs/mH-recoil/mumu/analysis_stage1.py | 54 ++-- examples/FCChh/HH_bbtautau/analysis.cc | 3 - examples/data_source/analysis_stage1.py | 134 ++++++++ examples/data_source/histmaker_source.py | 49 +++ examples/data_source/stages_source.py | 61 ++++ examples/data_source/standalone.py | 50 +++ man/man1/fccanalysis-build.1 | 6 +- man/man1/fccanalysis-run.1 | 6 +- man/man7/fccanalysis-script.7 | 14 +- python/anascript.py | 2 +- python/build_analysis.py | 11 +- python/parsers.py | 9 + python/process.py | 7 +- python/run_analysis.py | 143 ++++++--- python/run_fccanalysis.py | 71 ++++- setup.sh | 1 + tests/CMakeLists.txt | 9 + tests/integration/CMakeLists.txt | 59 ++++ tests/integration/data_source.cxx | 148 +++++++++ .../integration/data_source_associations.cxx | 269 ++++++++++++++++ tests/integration/low_level_associations.cxx | 70 ++++ tests/integration/low_level_edm4hep.cxx | 105 ++++++ tests/unittest/CMakeLists.txt | 6 +- 37 files changed, 1966 insertions(+), 132 deletions(-) create mode 100644 analyzers/dataframe/FCCAnalyses/LinkSource.h create mode 100644 analyzers/dataframe/FCCAnalyses/ReconstructedParticleSource.h create mode 100644 analyzers/dataframe/FCCAnalyses/TrackSource.h create mode 100644 analyzers/dataframe/src/ReconstructedParticleSource.cc create mode 100644 analyzers/dataframe/src/TrackSource.cc create mode 100644 examples/data_source/analysis_stage1.py create mode 100644 examples/data_source/histmaker_source.py create mode 100644 examples/data_source/stages_source.py create mode 100644 examples/data_source/standalone.py create mode 100644 tests/integration/CMakeLists.txt create mode 100644 tests/integration/data_source.cxx create mode 100644 tests/integration/data_source_associations.cxx create mode 100644 tests/integration/low_level_associations.cxx create mode 100644 tests/integration/low_level_edm4hep.cxx diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 6884af8ae7..e3a889861b 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -27,8 +27,5 @@ jobs: run: | docker exec CI_container /bin/bash -c 'cd ./Package; \ source /cvmfs/sw.hsf.org/key4hep/setup.sh;\ - git clang-format --style=file $(git merge-base upstream/master HEAD)' - - name: Check cleanliness - run: | - docker exec CI_container /bin/bash -c 'cd ./Package; \ - git diff' + git clang-format --diff --style=file $(git merge-base upstream/master HEAD) + git clang-format --diffstat --style=file $(git merge-base upstream/master HEAD)' diff --git a/.gitignore b/.gitignore index 28858293fb..82fda36413 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ __pycache__/ # Editors *~ .vimlocal +.cache/ # Distribution / packaging .Python diff --git a/CMakeLists.txt b/CMakeLists.txt index a220f1f363..dd403b566e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,9 @@ option(USE_EXTERNAL_CATCH2 "Link against an external Catch2 v3 static library, o option(FCCANALYSES_DOCUMENTATION "Whether or not to create doxygen doc target." ON) +set(WITH_PODIO_DATASOURCE ON CACHE STRING "Enable PODIO DataSource") +set_property(CACHE WITH_PODIO_DATASOURCE PROPERTY STRINGS ON OFF) + #--- Export compile commands -------------------------------------------------- set(CMAKE_EXPORT_COMPILE_COMMANDS ON) diff --git a/analyzers/dataframe/CMakeLists.txt b/analyzers/dataframe/CMakeLists.txt index c82e11cd46..6b633c1d27 100644 --- a/analyzers/dataframe/CMakeLists.txt +++ b/analyzers/dataframe/CMakeLists.txt @@ -32,6 +32,14 @@ if(NOT WITH_ACTS) list(FILTER sources EXCLUDE REGEX "VertexFinderActs.cc") endif() +if(NOT WITH_PODIO_DATASOURCE) + list(FILTER headers EXCLUDE REGEX "LinkSource.h") + list(FILTER headers EXCLUDE REGEX "ReconstructedParticleSource.h") + list(FILTER sources EXCLUDE REGEX "ReconstructedParticleSource.cc") + list(FILTER headers EXCLUDE REGEX "TrackSource.h") + list(FILTER sources EXCLUDE REGEX "TrackSource.cc") +endif() + message(STATUS "FCCAnalyses headers:\n ${headers}") message(STATUS "FCCAnalyses sources:\n ${sources}") @@ -59,7 +67,10 @@ target_link_libraries(FCCAnalyses PUBLIC ROOT::ROOTDataFrame EDM4HEP::edm4hep EDM4HEP::edm4hepDict + EDM4HEP::utils podio::podio + podio::podioRootIO + podio::podioDataSource ${ADDONS_LIBRARIES} ${DELPHES_LIBRARY} gfortran # todo: why necessary? diff --git a/analyzers/dataframe/FCCAnalyses/Defines.h b/analyzers/dataframe/FCCAnalyses/Defines.h index e537dd536e..7e8f510c2c 100644 --- a/analyzers/dataframe/FCCAnalyses/Defines.h +++ b/analyzers/dataframe/FCCAnalyses/Defines.h @@ -1,18 +1,12 @@ #ifndef DEFINES_ANALYZERS_H #define DEFINES_ANALYZERS_H -// std -#include -#include -#include - // ROOT #include "ROOT/RVec.hxx" #include "TLorentzVector.h" // EDM4hep #include "edm4hep/MCParticleData.h" -#include "edm4hep/ParticleIDData.h" #include "edm4hep/ReconstructedParticleData.h" /** diff --git a/analyzers/dataframe/FCCAnalyses/LinkDef.h b/analyzers/dataframe/FCCAnalyses/LinkDef.h index 23b891b186..a1eee99d85 100644 --- a/analyzers/dataframe/FCCAnalyses/LinkDef.h +++ b/analyzers/dataframe/FCCAnalyses/LinkDef.h @@ -8,31 +8,42 @@ //Dictionaries for output objects #pragma link C++ class std::vector+; +#pragma link C++ class ROOT::VecOps::RVec < TLorentzVector> + ; +#pragma link C++ class ROOT::VecOps::RVec < ROOT::VecOps::RVec < \ + TLorentzVector>> + \ + ; +#pragma link C++ class ROOT::VecOps::RVec < TVector3> + ; +#pragma link C++ class ROOT::VecOps::RVec < ROOT::VecOps::RVec < TVector3>> + ; + #pragma link C++ class std::vector+; -#pragma link C++ class ROOT::VecOps::RVec+; -#pragma link C++ class std::vector>+; -#pragma link C++ class std::vector>+; -#pragma link C++ class std::vector>+; +// EDM4hep #pragma link C++ class ROOT::VecOps::RVec+; #pragma link C++ class ROOT::VecOps::RVec+; #pragma link C++ class ROOT::VecOps::RVec+; #pragma link C++ class ROOT::VecOps::RVec+; -#pragma link C++ class ROOT::VecOps::RVec+; -#pragma link C++ class ROOT::VecOps::RVec+; +#pragma link C++ class ROOT::VecOps::RVec < edm4hep::ClusterData> + ; #pragma link C++ class ROOT::VecOps::RVec+; #pragma link C++ class ROOT::VecOps::RVec+; -#pragma link C++ class ROOT::VecOps::RVec+; +#pragma link C++ class ROOT::VecOps::RVec < ROOT::VecOps::RVec < \ + edm4hep::ReconstructedParticleData>> + \ + ; + +// Vectors +#pragma link C++ class ROOT::VecOps::RVec < std::vector < int>> + ; +#pragma link C++ class ROOT::VecOps::RVec < std::vector < float>> + ; +#pragma link C++ class ROOT::VecOps::RVec < std::vector < double>> + ; #pragma link C++ class ROOT::VecOps::RVec>+; #pragma link C++ class ROOT::VecOps::RVec>+; #pragma link C++ class ROOT::VecOps::RVec>+; -#pragma link C++ class ROOT::VecOps::RVec>+; -#pragma link C++ class ROOT::VecOps::RVec>+; +#pragma link C++ class std::vector < std::vector < int>> + ; +#pragma link C++ class std::vector < std::vector < float>> + ; +#pragma link C++ class std::vector < std::vector < double>> + ; #pragma link C++ class ROOT::VecOps::RVec+; -#pragma link C++ class ROOT::VecOps::RVec>+; -#pragma link C++ class ROOT::VecOps::RVec>+; -#pragma link C++ class ROOT::VecOps::RVec>+; +#pragma link C++ class ROOT::VecOps::RVec < ROOT::VecOps::RVec < \ + FCCAnalyses::VertexingUtils::FCCAnalysesVertex>> + \ + ; //to load all other functions #pragma link C++ function dummyLoader; diff --git a/analyzers/dataframe/FCCAnalyses/LinkSource.h b/analyzers/dataframe/FCCAnalyses/LinkSource.h new file mode 100644 index 0000000000..1237923f90 --- /dev/null +++ b/analyzers/dataframe/FCCAnalyses/LinkSource.h @@ -0,0 +1,92 @@ +#ifndef SOURCE_LINK_ANALYZERS_H +#define SOURCE_LINK_ANALYZERS_H + +// EDM4hep +#include "edm4hep/RecoMCParticleLinkCollection.h" + +namespace FCCAnalyses ::PodioSource ::Link { +/** + * \brief Analyzer to select links where the MC particle has a specified PDG + * ID. + * + * \param[in] pdg Desired PDG ID of the MC particle. + */ +struct selPDG { + const int m_pdg; + + explicit selPDG(const int pdg) : m_pdg(pdg){}; + + template T operator()(const T &inLinkColl) { + T result; + result.setSubsetCollection(); + + for (const auto &link : inLinkColl) { + const auto &particle = link.getTo(); + if (particle.getPDG() == m_pdg) { + result.push_back(link); + } + } + + return result; + } +}; + +/** + * \brief Analyzer to select links where MC particle has a specified absolute + * value of PDG ID. + * + * \param pdg[in] Desired absolute value of PDG ID of the MC particle. + */ +struct selAbsPDG { + const int m_pdg; + + explicit selAbsPDG(const int pdg) : m_pdg(pdg) { + if (m_pdg < 0) { + throw std::invalid_argument("FCCAnalyses::PodioSource::Link::sel_absPDG: " + "Received negative value!"); + } + }; + + template auto operator()(const T &inLinkColl) { + T result; + result.setSubsetCollection(); + + for (const auto &link : inLinkColl) { + const auto &particle = link.getTo(); + if (std::abs(particle.getPDG()) == m_pdg) { + result.push_back(link); + } + } + + return result; + }; +}; + +/** + * \brief Analyzer to select links where the MC particle has a specified + * generator status. + * + * \param status[in] Desired generator status of the particle. + */ +struct selGenStatus { + const int m_status; + + explicit selGenStatus(const int status) : m_status(status){}; + + template T operator()(const T &inLinkColl) { + T result; + result.setSubsetCollection(); + + for (const auto &link : inLinkColl) { + const auto &particle = link.getTo(); + if (particle.getGeneratorStatus() == m_status) { + result.push_back(link); + } + } + + return result; + } +}; +} // namespace FCCAnalyses::PodioSource::Link + +#endif /* SOURCE_LINK_ANALYZERS_H */ diff --git a/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h b/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h index 3321a2b680..80599d58f8 100644 --- a/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h +++ b/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h @@ -2,11 +2,15 @@ #ifndef RECONSTRUCTEDPARTICLE_ANALYZERS_H #define RECONSTRUCTEDPARTICLE_ANALYZERS_H +// STL #include #include +// ROOT #include "TLorentzVector.h" #include "ROOT/RVec.hxx" + +// EDM4hep #include "edm4hep/ReconstructedParticleData.h" #include "edm4hep/ParticleIDData.h" @@ -97,9 +101,6 @@ namespace ReconstructedParticle{ ROOT::VecOps::RVec operator() (ROOT::VecOps::RVec tags, ROOT::VecOps::RVec in); }; - - - /// return reconstructed particles ROOT::VecOps::RVec get(ROOT::VecOps::RVec index, ROOT::VecOps::RVec in); @@ -151,7 +152,7 @@ namespace ReconstructedParticle{ /// return the TlorentzVector of the one input ReconstructedParticle TLorentzVector get_tlv(edm4hep::ReconstructedParticleData in); - /// return visible 4-momentum vector + /// return visible 4-momentum vector TLorentzVector get_P4vis(ROOT::VecOps::RVec in); /// concatenate both input vectors and return the resulting vector @@ -168,7 +169,6 @@ namespace ReconstructedParticle{ /// get number of b-jets int getJet_ntags(ROOT::VecOps::RVec in); - }//end NS ReconstructedParticle }//end NS FCCAnalyses diff --git a/analyzers/dataframe/FCCAnalyses/ReconstructedParticleSource.h b/analyzers/dataframe/FCCAnalyses/ReconstructedParticleSource.h new file mode 100644 index 0000000000..2d61a0dab6 --- /dev/null +++ b/analyzers/dataframe/FCCAnalyses/ReconstructedParticleSource.h @@ -0,0 +1,242 @@ +#ifndef ANALYZERS_SOURCE_RECONSTRUCTED_PARTICLE_H +#define ANALYZERS_SOURCE_RECONSTRUCTED_PARTICLE_H + +// ROOT +#include "ROOT/RVec.hxx" + +// EDM4hep +#include "edm4hep/RecoMCParticleLinkCollection.h" +#include "edm4hep/ReconstructedParticleCollection.h" + +namespace FCCAnalyses ::PodioSource ::ReconstructedParticle { +// -------------------- Selectors ----------------------------------------- + +/** + * \brief Analyzer to select reconstructed particles associated with MC + * particle of the specified PDG ID. + */ +struct selPDG { + const int m_pdg; + /** + * \brief Constructor. + * + * \param[in] pdgID Desired value of the PDG ID. + */ + explicit selPDG(const int pdgID); + /** + * \brief Operator over the input link collection. + * + * \param[in] inLinkColl Input collection of the reco-MC links. + * \param[out] result Collection of the reconstructed particles associated + * with the MC particle of the desired PDG ID. + */ + edm4hep::ReconstructedParticleCollection + operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl); +}; + +/** + * \brief Analyzer to select reconstructed particles associated with MC + * particle with the specified absolute value of the PDG ID. + */ +struct selAbsPDG { + const int m_absPdg; + /** + * \brief Constructor. + * + * \param[in] pdgID Desired absolute value of the PDG ID. + */ + explicit selAbsPDG(const int pdgID); + /** + * \brief Operator over the input link collection. + * + * \param[in] inLinkColl Input collection of the MC-reco links. + * \param[out] result Collection of the reconstructed particles associated + * with the MC particle with the desired absolute value + * of the PDG ID. + */ + edm4hep::ReconstructedParticleCollection + operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl); +}; + +/** + * \brief Select reconstructed particles with transverse momentum greater + * than a minimum value [GeV]. + */ +struct selPt { + const float m_minPt; + /** + * \brief Constructor. + * + * \param[in] minPt Transverse momentum threshold [GeV]. + */ + explicit selPt(float minPt); + /** + * \brief Operator over input link collection. + * + * \param[in] inColl Input collection of the reconstructed particles. + * \param[out] result Collection of the selected reconstructed particles. + */ + edm4hep::ReconstructedParticleCollection + operator()(const edm4hep::ReconstructedParticleCollection &inColl); +}; + +/** + * \brief Analyzer to select specified number of reconstructed particles. + */ +struct selUpTo { + const size_t m_size; + /** + * \brief Constructor. + * + * \param[in] size Desired number of reconstructed particles. + */ + explicit selUpTo(const size_t size); + /** + * \brief Operator over input link collection. + * + * \param[out] inColl Input collection of reconstructed particles. + * \param[out] result Output collection of reconstructed particles. + */ + edm4hep::ReconstructedParticleCollection + operator()(const edm4hep::ReconstructedParticleCollection &inColl); +}; + +/** + * \brief Analyzer to select reconstructed particles associated with the MC + * particle of the desired generator status. + */ +struct selGenStatus { + const int m_status; + /** + * \brief Constructor. + * + * \param[in] status Desired generator status of the MC particle. + */ + explicit selGenStatus(const int status); + /** + * \brief Operator over input link collection. + * + * \param[in] inColl Input collection of the reco-MC particle links. + * \param[out] result Output collection of the reconstructed particles. + */ + edm4hep::ReconstructedParticleCollection + operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl); +}; + +// -------------------- Getters ------------------------------------------- + +/** + * \brief Get momenta of the input reconstructed particles. + * + * \param[in] inColl Input particle collection. + * \param[out] result Vector of particle momenta. + */ +ROOT::VecOps::RVec +getP(const edm4hep::ReconstructedParticleCollection &inColl); + +/** + * \brief Get transverse momenta (pT) of the input particles. + * + * \param[in] inColl Input particle collection. + * \param[out] result Vector of particle pTs. + */ +ROOT::VecOps::RVec +getPt(const edm4hep::ReconstructedParticleCollection &inColl); + +/** + * \brief Get rapidity (y) of the input reconstructed particles. + * + * \param[in] inColl Input particle collection. + * \param[out] result Vector of particle rapidities. + */ +ROOT::VecOps::RVec +getY(const edm4hep::ReconstructedParticleCollection &inColl); + +/** + * \brief Get energy (E) of the input reconstructed particles. + * + * \param[in] inColl Input particle collection. + * \param[out] result Vector of particle energies. + */ +ROOT::VecOps::RVec +getE(const edm4hep::ReconstructedParticleCollection &inColl); + +/** + * \brief Get mass of the input reconstructed particles. + * + * \param[in] inColl Input particle collection. + * \param[out] result Vector of particle masses. + */ +ROOT::VecOps::RVec +getMass(const edm4hep::ReconstructedParticleCollection &inColl); + +/** + * \brief Get charge of the input reconstructed particles. + * + * \param[in] inColl Input particle collection. + * \param[out] result Vector of particle charges. + */ +ROOT::VecOps::RVec +getCharge(const edm4hep::ReconstructedParticleCollection &inColl); + +// -------------------- Transformers -------------------------------------- + +/** + * \brief Sort input particles by pT. + * + * \param[in] inColl Input particles. + * \param[out] result Sorted collection of particles. + */ +edm4hep::ReconstructedParticleCollection +sortByPt(const edm4hep::ReconstructedParticleCollection &inColl); + +/** + * \brief Build two particle resonances from an arbitrary list of input + * reconstructed particles. + * Select the one closest to the pre-defined mass. + */ +struct resonanceBuilder { + const float m_resonanceMass; + /** + * \brief Constructor. + * + * \param[in] resonanceMass Desired value of the resonance mass. + */ + explicit resonanceBuilder(float resonanceMass); + /** + * \brief Operator over input particle collection. + * + * \param[in] inColl Input collection of the reconstructed particles. + * \param[out] result Selected reconstructed particle resonance. + */ + edm4hep::ReconstructedParticleCollection + operator()(const edm4hep::ReconstructedParticleCollection &inColl); +}; + +/** + * \brief Build the recoil from an arbitrary list of input resonances at the + * specified center of mass energy. + */ +struct recoilBuilder { + const float m_sqrts; + /** + * \brief Constructor. + * + * \param[in] sqrts Center of mass energy. + */ + explicit recoilBuilder(float sqrts); + /** + * \brief Operator over input collection of resonances. + * + * \param[in] inColl Input collection of resonances. + * \param[out] result Resulting recoil. + */ + edm4hep::ReconstructedParticleCollection + operator()(const edm4hep::ReconstructedParticleCollection &inColl); +}; + +} // namespace FCCAnalyses::PodioSource::ReconstructedParticle + +// namespace recoParticle = FCCAnalyses::PodioSource::ReconstructedParticle; + +#endif /* ANALYZERS_SOURCE_RECONSTRUCTED_PARTICLE_H */ diff --git a/analyzers/dataframe/FCCAnalyses/TrackSource.h b/analyzers/dataframe/FCCAnalyses/TrackSource.h new file mode 100644 index 0000000000..93ffb580d0 --- /dev/null +++ b/analyzers/dataframe/FCCAnalyses/TrackSource.h @@ -0,0 +1,45 @@ +#ifndef ANALYZERS_SOURCE_TRACK_H +#define ANALYZERS_SOURCE_TRACK_H + +// std +#include + +// ROOT +#include "ROOT/RVec.hxx" + +// EDM4hep +#include "edm4hep/TrackCollection.h" +#include "edm4hep/TrackMCParticleLinkCollection.h" + +namespace FCCAnalyses ::PodioSource ::Track { +/** + * \brief Analyzer to select tracks associated with a MC particle of a + * specified PDG ID. + * + * \param pdgID Desired PDG ID of the MC particle. + * \param chargeConjugateAllowed Whether to allow also charge conjugate + * PDG ID. Default value false --- charge conjugate not allowed. + */ +struct selPDG { + explicit selPDG(const int pdgID, const bool chargeConjugateAllowed = false); + const int m_pdg; + const bool m_chargeConjugateAllowed; + edm4hep::TrackCollection + operator()(const edm4hep::TrackMCParticleLinkCollection &inLinkColl); +}; + +/** + * \brief Analyzer to get number of track states + * + */ +ROOT::VecOps::RVec +getNstates(const edm4hep::TrackCollection &inColl); + +/** + * \brief Analyzer to get track D0. + * + */ +ROOT::VecOps::RVec getD0(const edm4hep::TrackCollection &inColl); +} // namespace FCCAnalyses::PodioSource::Track + +#endif /* ANALYZERS_SOURCE_TRACK_H */ diff --git a/analyzers/dataframe/src/ReconstructedParticle.cc b/analyzers/dataframe/src/ReconstructedParticle.cc index 65dd9cdd3a..fb01ebfa36 100644 --- a/analyzers/dataframe/src/ReconstructedParticle.cc +++ b/analyzers/dataframe/src/ReconstructedParticle.cc @@ -2,9 +2,12 @@ // std #include -#include #include +// ROOT +#include +#include + // EDM4hep #include "edm4hep/EDM4hepVersion.h" @@ -52,6 +55,7 @@ ROOT::VecOps::RVec sel_absType::operator()( result.emplace_back(in[i]); } } + return result; } @@ -115,8 +119,6 @@ ROOT::VecOps::RVec sel_charge::operator() ( return result; } - - resonanceBuilder::resonanceBuilder(float arg_resonance_mass) {m_resonance_mass = arg_resonance_mass;} ROOT::VecOps::RVec resonanceBuilder::operator()(ROOT::VecOps::RVec legs) { ROOT::VecOps::RVec result; diff --git a/analyzers/dataframe/src/ReconstructedParticleSource.cc b/analyzers/dataframe/src/ReconstructedParticleSource.cc new file mode 100644 index 0000000000..947b686bb8 --- /dev/null +++ b/analyzers/dataframe/src/ReconstructedParticleSource.cc @@ -0,0 +1,300 @@ +#include "FCCAnalyses/ReconstructedParticleSource.h" +#include "FCCAnalyses/Logger.h" + +// std +#include +#include +#include +#include + +// ROOT +#include +#include +#include + +// EDM4hep +#include + +namespace FCCAnalyses ::PodioSource ::ReconstructedParticle { +selPDG::selPDG(const int pdg) : m_pdg(pdg){}; + +edm4hep::ReconstructedParticleCollection +selPDG::operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl) { + edm4hep::ReconstructedParticleCollection result; + result.setSubsetCollection(); + + rdfDebug << "Link collection size: " << inLinkColl.size(); + + for (const auto &link : inLinkColl) { + const auto &particle = link.getTo(); + if (particle.getPDG() == m_pdg) { + result.push_back(link.getFrom()); + } + } + + return result; +} + +// -------------------------------------------------------------------------- +selAbsPDG::selAbsPDG(const int pdg) : m_absPdg(pdg) { + if (m_absPdg < 0) { + throw std::invalid_argument("ReconstructedParticle::selAbsPDG: Provided " + "PDG ID is negative!"); + } +}; + +edm4hep::ReconstructedParticleCollection +selAbsPDG::operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl) { + edm4hep::ReconstructedParticleCollection result; + result.setSubsetCollection(); + + for (const auto &link : inLinkColl) { + const auto &particle = link.getTo(); + if (std::abs(particle.getPDG()) == m_absPdg) { + result.push_back(link.getFrom()); + } + } + + return result; +} + +// -------------------------------------------------------------------------- +selPt::selPt(float minPt) : m_minPt(minPt) { + if (m_minPt < 0) { + throw std::invalid_argument("ReconstructedParticle::selPt: Provided " + "pT threshold is negative!"); + } +}; + +edm4hep::ReconstructedParticleCollection +selPt::operator()(const edm4hep::ReconstructedParticleCollection &inColl) { + edm4hep::ReconstructedParticleCollection result; + result.setSubsetCollection(); + + for (const auto &particle : inColl) { + if (edm4hep::utils::pt(particle) > m_minPt) { + result.push_back(particle); + } + } + + return result; +} + +// -------------------------------------------------------------------------- +selUpTo::selUpTo(const size_t size) : m_size(size){}; + +edm4hep::ReconstructedParticleCollection +selUpTo::operator()(const edm4hep::ReconstructedParticleCollection &inColl) { + edm4hep::ReconstructedParticleCollection result; + result.setSubsetCollection(); + + for (const auto &particle : inColl) { + if (result.size() >= m_size) { + break; + } + result.push_back(particle); + } + + return result; +} + +// -------------------------------------------------------------------------- +selGenStatus::selGenStatus(int status) : m_status(status){}; + +edm4hep::ReconstructedParticleCollection selGenStatus::operator()( + const edm4hep::RecoMCParticleLinkCollection &inLinkColl) { + edm4hep::ReconstructedParticleCollection result; + result.setSubsetCollection(); + + for (const auto &link : inLinkColl) { + const auto &mcParticle = link.getTo(); + const auto &recoParticle = link.getFrom(); + if (mcParticle.getGeneratorStatus() == m_status) { + result.push_back(link.getFrom()); + } + } + + return result; +} + +// -------------------------------------------------------------------------- +ROOT::VecOps::RVec +getP(const edm4hep::ReconstructedParticleCollection &inColl) { + ROOT::VecOps::RVec result; + + for (const auto &particle : inColl) { + auto lVec = edm4hep::utils::p4(particle, edm4hep::utils::UseMass); + result.push_back(static_cast(lVec.P())); + } + return result; +} + +// -------------------------------------------------------------------------- +ROOT::VecOps::RVec +getPt(const edm4hep::ReconstructedParticleCollection &inColl) { + ROOT::VecOps::RVec result; + result.reserve(inColl.size()); + + std::transform( + inColl.begin(), inColl.end(), std::back_inserter(result), + [](edm4hep::ReconstructedParticle p) { return edm4hep::utils::pt(p); }); + + return result; +} + +// -------------------------------------------------------------------------- +ROOT::VecOps::RVec +getY(const edm4hep::ReconstructedParticleCollection &inColl) { + ROOT::VecOps::RVec result; + + for (const auto &particle : inColl) { + auto lVec = edm4hep::utils::p4(particle, edm4hep::utils::UseMass); + result.push_back(static_cast(lVec.Rapidity())); + } + + return result; +} + +// -------------------------------------------------------------------------- +ROOT::VecOps::RVec +getE(const edm4hep::ReconstructedParticleCollection &inColl) { + ROOT::VecOps::RVec result; + + std::transform( + inColl.begin(), inColl.end(), std::back_inserter(result), + [](edm4hep::ReconstructedParticle p) { return p.getEnergy(); }); + + return result; +} + +// -------------------------------------------------------------------------- +ROOT::VecOps::RVec +getMass(const edm4hep::ReconstructedParticleCollection &inColl) { + ROOT::VecOps::RVec result; + + std::transform(inColl.begin(), inColl.end(), std::back_inserter(result), + [](edm4hep::ReconstructedParticle p) { return p.getMass(); }); + + return result; +} + +// -------------------------------------------------------------------------- +ROOT::VecOps::RVec +getCharge(const edm4hep::ReconstructedParticleCollection &inColl) { + ROOT::VecOps::RVec result; + + std::transform( + inColl.begin(), inColl.end(), std::back_inserter(result), + [](edm4hep::ReconstructedParticle p) { return p.getCharge(); }); + + return result; +} + +// -------------------------------------------------------------------------- +edm4hep::ReconstructedParticleCollection +sortByPt(const edm4hep::ReconstructedParticleCollection &inColl) { + edm4hep::ReconstructedParticleCollection outColl; + outColl.setSubsetCollection(); + + std::vector rpVec; + rpVec.reserve(inColl.size()); + for (const auto &particle : inColl) { + rpVec.emplace_back(particle); + } + + std::sort(rpVec.begin(), rpVec.end(), [](const auto &lhs, const auto &rhs) { + return edm4hep::utils::pt(lhs) > edm4hep::utils::pt(rhs); + }); + + for (const auto &particle : rpVec) { + outColl.push_back(particle); + } + + return outColl; +} + +// -------------------------------------------------------------------------- +resonanceBuilder::resonanceBuilder(float resonanceMass) + : m_resonanceMass(resonanceMass) { + if (m_resonanceMass < 0) { + throw std::invalid_argument("ReconstructedParticle::resonanceBuilder: " + "Provided resonance mass is negative!"); + } +} + +edm4hep::ReconstructedParticleCollection resonanceBuilder::operator()( + const edm4hep::ReconstructedParticleCollection &inColl) { + edm4hep::ReconstructedParticleCollection result; + + if (inColl.size() < 2) { + return result; + } + + // Convert collection into std::vector + std::vector rpVec; + rpVec.reserve(inColl.size()); + for (const auto &particle : inColl) { + rpVec.emplace_back(particle); + } + + // Loop over all possible combinations + std::vector resonanceVec; + for (auto p1 = rpVec.begin(); p1 != rpVec.end(); ++p1) { + for (auto p2 = p1 + 1; p2 != rpVec.end(); ++p2) { + edm4hep::MutableReconstructedParticle resonance; + resonance.setCharge(p1->getCharge() + p2->getCharge()); + + auto lVec1 = edm4hep::utils::p4(*p1, edm4hep::utils::UseMass); + auto lVec2 = edm4hep::utils::p4(*p2, edm4hep::utils::UseMass); + + auto lVec = lVec1 + lVec2; + resonance.setMomentum({static_cast(lVec.Px()), + static_cast(lVec.Py()), + static_cast(lVec.Pz())}); + resonance.setMass(static_cast(lVec.M())); + resonanceVec.emplace_back(resonance); + } + } + + // Sort by the distance from desired mass value + auto resonanceSort = [&](edm4hep::ReconstructedParticle i, + edm4hep::ReconstructedParticle j) { + return std::fabs(m_resonanceMass - i.getMass()) < + std::fabs(m_resonanceMass - j.getMass()); + }; + std::sort(resonanceVec.begin(), resonanceVec.end(), resonanceSort); + result.push_back(resonanceVec.front().clone()); + + return result; +} + +// -------------------------------------------------------------------------- +recoilBuilder::recoilBuilder(float sqrts) : m_sqrts(sqrts) { + if (m_sqrts < 0) { + throw std::invalid_argument("ReconstructedParticle::recoilBuilder: " + "Provided center-of-mass is negative!"); + } +} + +edm4hep::ReconstructedParticleCollection recoilBuilder::operator()( + const edm4hep::ReconstructedParticleCollection &inColl) { + edm4hep::ReconstructedParticleCollection result; + + auto recoilVec = TLorentzVector(0, 0, 0, m_sqrts); + for (const auto &r : inColl) { + auto lVec = TLorentzVector(r.getMomentum().x, r.getMomentum().y, + r.getMomentum().z, r.getMass()); + recoilVec -= lVec; + } + + edm4hep::MutableReconstructedParticle recoil; + recoil.setMomentum({static_cast(recoilVec.Px()), + static_cast(recoilVec.Py()), + static_cast(recoilVec.Pz())}); + recoil.setMass(static_cast(recoilVec.M())); + result.push_back(recoil); + + return result; +}; + +} // namespace FCCAnalyses::PodioSource::ReconstructedParticle diff --git a/analyzers/dataframe/src/TrackSource.cc b/analyzers/dataframe/src/TrackSource.cc new file mode 100644 index 0000000000..6ea774161c --- /dev/null +++ b/analyzers/dataframe/src/TrackSource.cc @@ -0,0 +1,49 @@ +#include "FCCAnalyses/TrackSource.h" + +namespace FCCAnalyses ::PodioSource ::Track { +selPDG::selPDG(const int pdg, const bool chargeConjugateAllowed) + : m_pdg(pdg), m_chargeConjugateAllowed(chargeConjugateAllowed){}; + +edm4hep::TrackCollection +selPDG::operator()(const edm4hep::TrackMCParticleLinkCollection &inLinkColl) { + edm4hep::TrackCollection result; + result.setSubsetCollection(); + + for (const auto &assoc : inLinkColl) { + const auto &particle = assoc.getTo(); + if (m_chargeConjugateAllowed) { + if (std::abs(particle.getPDG()) == std::abs(m_pdg)) { + result.push_back(assoc.getFrom()); + } + } else { + if (particle.getPDG() == m_pdg) { + result.push_back(assoc.getFrom()); + } + } + } + + return result; +} + +ROOT::VecOps::RVec +getNstates(const edm4hep::TrackCollection &inColl) { + ROOT::VecOps::RVec result; + for (const auto &track : inColl) { + result.push_back(track.getTrackStates().size()); + } + + return result; +} + +ROOT::VecOps::RVec getD0(const edm4hep::TrackCollection &inColl) { + ROOT::VecOps::RVec result; + for (const auto &track : inColl) { + for (const auto &trackState : track.getTrackStates()) { + result.push_back(trackState.D0); + break; + } + } + + return result; +} +} // namespace FCCAnalyses::PodioSource::Track diff --git a/cmake/FCCAnalysesFunctions.cmake b/cmake/FCCAnalysesFunctions.cmake index bba689564c..46d6c02223 100644 --- a/cmake/FCCAnalysesFunctions.cmake +++ b/cmake/FCCAnalysesFunctions.cmake @@ -47,7 +47,7 @@ endfunction() function(add_generic_test _testname _testcmd) add_test(NAME ${_testname} - COMMAND ${_testcmd} + COMMAND sh -c ${_testcmd} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) set_property(TEST ${_testname} APPEND PROPERTY ENVIRONMENT diff --git a/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py b/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py index fd1d9f6949..b9f8dc93f1 100644 --- a/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py +++ b/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py @@ -21,31 +21,34 @@ def __init__(self, cmdline_args): # `cmdline_arg` dictionary. self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown']) - # Mandatory: List of processes to run over + # Mandatory: List of samples (processes) used in the analysis self.process_list = { - # # Run the full statistics in one output file named - # # /p8_ee_ZZ_ecm240.root + # Run over the full statistics and save it to one output file named + # /.root 'p8_ee_ZZ_ecm240': {'fraction': 1.}, - # # Run 50% of the statistics with output into two files named - # # /p8_ee_WW_ecm240/chunk.root - 'p8_ee_WW_ecm240': {'fraction': 0.5, 'chunks': 2}, #this doesn't work? - # # Run 20% of the statistics in one file named - # # /p8_ee_ZH_ecm240_out_f02.root (example on how to change - # # the output name) + # Run over 50% of the statistics and save output into two files + # named /p8_ee_WW_ecm240/chunk.root + # Number of input files needs to be larger that number of chunks + 'p8_ee_WW_ecm240': {'fraction': 0.5, 'chunks': 2}, + # Run over 20% of the statistics and save output into one file named + # /p8_ee_ZH_ecm240_out_f02.root 'p8_ee_ZH_ecm240': {'fraction': 0.2, 'output': 'p8_ee_ZH_ecm240_out_f02'} } # Mandatory: Production tag when running over the centrally produced - # samples, this points to the yaml files for getting sample statistics - self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/' + # samples (this points to the yaml file for getting sample statistics) # self.prod_tag = 'FCCee/spring2021/IDEA/' + # or Input directory when not running over the centrally produced + # samples. + self.input_dir = '/eos/experiment/fcc/hh/tutorials/' \ + 'edm4hep_tutorial_data/' # Optional: output directory, default is local running directory self.output_dir = 'outputs/FCCee/higgs/mH-recoil/mumu/' \ f'stage1_{self.ana_args.muon_pt}' - # Optional: analysisName, default is '' + # Optional: analysis name, default is '' # self.analysis_name = 'My Analysis' # Optional: number of threads to run on, default is 'all available' @@ -55,10 +58,8 @@ def __init__(self, cmdline_args): # self.run_batch = False # Optional: test file - self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \ - 'tutorials/edm4hep_tutorial_data/' \ - 'p8_ee_ZH_ecm240.root' - + self.test_file = 'https://fccsw.web.cern.ch/fccsw/testsamples/' \ + 'edm4hep1/p8_ee_WW_ecm240_edm4hep.root' # Mandatory: analyzers function to define the analysis graph, please make # sure you return the dataframe, in this example it is dframe2 @@ -80,40 +81,41 @@ def analyzers(self, dframe): # select muons on pT .Define('selected_muons', f'ReconstructedParticle::sel_pt({muon_pt})(muons)') - # create branch with muon transverse momentum + # create column with muon transverse momentum .Define('selected_muons_pt', 'ReconstructedParticle::get_pt(selected_muons)') - # create branch with muon rapidity + # create column with muon rapidity .Define('selected_muons_y', 'ReconstructedParticle::get_y(selected_muons)') - # create branch with muon total momentum + # create column with muon total momentum .Define('selected_muons_p', 'ReconstructedParticle::get_p(selected_muons)') - # create branch with muon energy + # create column with muon energy .Define('selected_muons_e', 'ReconstructedParticle::get_e(selected_muons)') # find zed candidates from di-muon resonances .Define( 'zed_leptonic', 'ReconstructedParticle::resonanceBuilder(91)(selected_muons)') - # create branch with zed mass + # create column with zed mass .Define('zed_leptonic_m', 'ReconstructedParticle::get_mass(zed_leptonic)') - # create branch with zed transverse momenta + # create column with zed transverse momenta .Define('zed_leptonic_pt', 'ReconstructedParticle::get_pt(zed_leptonic)') # calculate recoil of zed_leptonic .Define('zed_leptonic_recoil', 'ReconstructedParticle::recoilBuilder(240)(zed_leptonic)') - # create branch with recoil mass + # create column with recoil mass .Define('zed_leptonic_recoil_m', 'ReconstructedParticle::get_mass(zed_leptonic_recoil)') - # create branch with leptonic charge + # create column with leptonic charge .Define('zed_leptonic_charge', 'ReconstructedParticle::get_charge(zed_leptonic)') - # Filter at least one candidate - .Filter('zed_leptonic_recoil_m.size()>0') + # Filter on at least one candidate + .Filter('zed_leptonic_recoil_m.size() > 0') ) + return dframe2 # Mandatory: output function, please make sure you return the branch list diff --git a/examples/FCChh/HH_bbtautau/analysis.cc b/examples/FCChh/HH_bbtautau/analysis.cc index c03f5f46bc..e1fb1feb24 100644 --- a/examples/FCChh/HH_bbtautau/analysis.cc +++ b/examples/FCChh/HH_bbtautau/analysis.cc @@ -16,10 +16,7 @@ #include "ReconstructedParticle2Track.h" #include "MCParticle.h" - auto _m = edm4hep::ReconstructedParticleData(); -auto _pod = podio::ObjectID(); - // Reproduce Heppy analysis int main(int argc, char* argv[]){ diff --git a/examples/data_source/analysis_stage1.py b/examples/data_source/analysis_stage1.py new file mode 100644 index 0000000000..352fd94cf0 --- /dev/null +++ b/examples/data_source/analysis_stage1.py @@ -0,0 +1,134 @@ +''' +Analysis example, measure Higgs mass in the Z(mumu)H recoil measurement. +''' +from argparse import ArgumentParser + + +# Mandatory: Analysis class where the user defines the operations on the +# dataframe. +class Analysis(): + ''' + Higgs mass recoil analysis in Z(mumu)H. + ''' + def __init__(self, cmdline_args): + parser = ArgumentParser( + description='Additional analysis arguments', + usage='Provide additional arguments after analysis script path') + parser.add_argument('--muon-pt', default='10.', type=float, + help='Minimal pT of the mouns.') + # Parse additional arguments not known to the FCCAnalyses parsers + # All command line arguments know to fccanalysis are provided in the + # `cmdline_arg` dictionary. + self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown']) + + # Mandatory: List of samples (processes) used in the analysis + self.process_list = { + # Run over the full statistics and save it to one output file named + # /.root + 'p8_ee_ZZ_ecm240': {'fraction': 1.}, + # Run over 50% of the statistics and save output into two files + # named /p8_ee_WW_ecm240/chunk.root + # Number of input files needs to be larger that number of chunks + 'p8_ee_WW_ecm240': {'fraction': 0.5, 'chunks': 2}, + # Run over 20% of the statistics and save output into one file + # named /p8_ee_ZH_ecm240_out_f02.root + 'p8_ee_ZH_ecm240': {'fraction': 0.2, + 'output': 'p8_ee_ZH_ecm240_out_f02'} + } + + # Mandatory: Production tag when running over the centrally produced + # samples (this points to the yaml file for getting sample statistics) + # self.prod_tag = 'FCCee/spring2021/IDEA/' + # or Input directory when not running over the centrally produced + # samples. + self.input_dir = '/eos/experiment/fcc/hh/tutorials/' \ + 'edm4hep_tutorial_data/' + + # Optional: output directory, default is local running directory + self.output_dir = 'outputs/FCCee/higgs/mH-recoil/mumu/' \ + f'stage1_{self.ana_args.muon_pt}' + + # Optional: analysis name, default is '' + # self.analysis_name = 'My Analysis' + + # Optional: number of threads to run on, default is 'all available' + # self.n_threads = 4 + + # Optional: running on HTCondor, default is False + # self.run_batch = False + + # Optional: test file + self.test_file = 'https://fccsw.web.cern.ch/fccsw/testsamples/' \ + 'edm4hep1/p8_ee_WW_ecm240_edm4hep.root' + + # Optional: read the input files with podio::DataSource + self.use_data_source = True + + # Mandatory: analyzers function to define the analysis graph, please make + # sure you return the dataframe, in this example it is dframe2 + def analyzers(self, dframe): + ''' + Analysis graph. + ''' + + muon_pt = self.ana_args.muon_pt + + dframe2 = ( + dframe + # select muons on pT + .Define('selected_muons', + f'ReconstructedParticle::selPt({muon_pt})(Muon)') + # create column with muon transverse momentum + .Define('selected_muons_pt', + 'ReconstructedParticle::getPt(selected_muons)') + # create column with muon rapidity + .Define('selected_muons_y', + 'ReconstructedParticle::getY(selected_muons)') + # create column with muon total momentum + .Define('selected_muons_p', + 'ReconstructedParticle::getP(selected_muons)') + # create column with muon energy + .Define('selected_muons_e', + 'ReconstructedParticle::getE(selected_muons)') + # find zed candidates from di-muon resonances + .Define( + 'zed_leptonic', + 'ReconstructedParticle::resonanceBuilder(91)(selected_muons)') + # create column with zed mass + .Define('zed_leptonic_m', + 'ReconstructedParticle::getMass(zed_leptonic)') + # create column with zed transverse momenta + .Define('zed_leptonic_pt', + 'ReconstructedParticle::getPt(zed_leptonic)') + # calculate recoil of zed_leptonic + .Define('zed_leptonic_recoil', + 'ReconstructedParticle::recoilBuilder(240)(zed_leptonic)') + # create column with recoil mass + .Define('zed_leptonic_recoil_m', + 'ReconstructedParticle::getMass(zed_leptonic_recoil)') + # create column with leptonic charge + .Define('zed_leptonic_charge', + 'ReconstructedParticle::getCharge(zed_leptonic)') + # Filter on at least one candidate + .Filter('zed_leptonic_recoil_m.size() > 0') + ) + + return dframe2 + + # Mandatory: output function, please make sure you return the branch list + # as a python list + def output(self): + ''' + Output variables which will be saved to output root file. + ''' + branch_list = [ + 'selected_muons_pt', + 'selected_muons_y', + 'selected_muons_p', + 'selected_muons_e', + 'zed_leptonic_pt', + 'zed_leptonic_m', + 'zed_leptonic_charge', + 'zed_leptonic_recoil_m' + ] + return branch_list diff --git a/examples/data_source/histmaker_source.py b/examples/data_source/histmaker_source.py new file mode 100644 index 0000000000..4ca111c251 --- /dev/null +++ b/examples/data_source/histmaker_source.py @@ -0,0 +1,49 @@ +# list of processes (mandatory) +processList = { + 'p8_ee_WW_ecm240': { + 'output': 'p8_ee_WW_ecm240_out', + 'testfile': 'https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/' + 'p8_ee_WW_ecm240_edm4hep.root'} +} + +# Production tag when running over EDM4Hep centrally produced events, this +# points to the yaml files for getting sample statistics (mandatory) +prodTag = "FCCee/winter2023/IDEA/" + +# Link to the dictonary that contains all the cross section informations +# etc... (mandatory) +procDict = "FCCee_procDict_winter2023_IDEA.json" + +# Optional: output directory, default is local running directory +outputDir = "." + +# Ncpus, default is 4, -1 uses all cores available +nCPUS = -1 + +# scale the histograms with the cross-section and integrated luminosity +# define some binning for various histograms +bins_pt = (20, 0, 200) + +# How to read input files +useDataSource = True + +# build_graph function that contains the analysis logic, cuts and histograms +# (mandatory) +def build_graph(df, dataset): + + results = [] + df = df.Define("weight", "1.0") + weightsum = df.Sum("weight") + + df = df.Define( + "electron_truth", + "ReconstructedParticle::selAbsPDG(11)(MCRecoAssociations)") + + df = df.Define( + "electron_truth_pt", + "ReconstructedParticle::getPt(electron_truth)") + + results.append(df.Histo1D(("h_electron_truth_pt", "", *bins_pt), + "electron_truth_pt")) + + return results, weightsum diff --git a/examples/data_source/stages_source.py b/examples/data_source/stages_source.py new file mode 100644 index 0000000000..e3400b2e92 --- /dev/null +++ b/examples/data_source/stages_source.py @@ -0,0 +1,61 @@ +''' +Analysis example using PODIO ROOT DataSource for reading input files. +''' + + +class Analysis(): + ''' + Mandatory class, with three mandatory methods: + * __init__ + * analyzers + * output + ''' + def __init__(self, _): + # list of processes (mandatory) + self.process_list = { + 'p8_ee_WW_ecm240': {'output': 'p8_ee_WW_ecm240_out'} + } + + # Production tag when running over EDM4Hep centrally produced events, + # this points to the yaml files for getting sample statistics + # (mandatory) + self.prod_tag = 'FCCee/winter2023/IDEA/' + + # Optional: output directory, default is local running directory + self.output_dir = "." + + # Ncpus, default is 4, -1 uses all cores available + # self.n_threads = -1 + + # How to read input files + self.use_data_source = True + + self.test_file = 'https://fccsw.web.cern.ch/fccsw/testsamples/' \ + 'edm4hep1/p8_ee_WW_ecm240_edm4hep.root' + + def analyzers(self, dframe): + ''' + Analysis function to define the analyzers to process, please make sure + you return the last dataframe, in this example it is dframe2 + ''' + dframe2 = ( + dframe + .Define( + "electron_truth", + "ReconstructedParticle::selPDG(11)(MCRecoAssociations)") + + .Define( + "electron_truth_pt", + "ReconstructedParticle::getPt(electron_truth)") + ) + + return dframe2 + + def output(self) -> list[str]: + ''' + List of columns to cave into output file. + ''' + return [ + # "electron_truth", + "electron_truth_pt" + ] diff --git a/examples/data_source/standalone.py b/examples/data_source/standalone.py new file mode 100644 index 0000000000..5c28a1541c --- /dev/null +++ b/examples/data_source/standalone.py @@ -0,0 +1,50 @@ +''' +Example of standalone FCCAnalyses script using podio::DataSource. +''' + +import ROOT +ROOT.gROOT.SetBatch(True) + + +def main(): + ''' + Main entry point for the standalone analysis. + ''' + input_list = ['https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/' + 'p8_ee_WW_ecm240_edm4hep.root'] + + ROOT.gSystem.Load('libFCCAnalyses') + if ROOT.dummyLoader: + print('----> DEBUG: Found FCCAnalyses library.') + ROOT.gInterpreter.Declare("using namespace FCCAnalyses::PodioSource;") + print('----> INFO: Loading analyzers from libFCCAnalyses... ',) + + if ROOT.podio.DataSource: + print('----> DEBUG: Found Podio ROOT DataSource.') + print('----> INFO: Loading events through podio::DataSource...') + + try: + dframe = ROOT.podio.CreateDataFrame(input_list) + except TypeError as excp: + print('----> ERROR: Unable to build dataframe!') + print(excp) + + dframe2 = dframe.Define( + 'electron_truth', + 'ReconstructedParticle::selPDG(11)(MCRecoAssociations)') + + dframe3 = dframe2.Define( + 'electron_truth_pt', + 'ReconstructedParticle::getPt(electron_truth)') + + dframe4 = dframe3.Filter('electron_truth_pt.size() < 3') + + count = dframe4.Count() + dframe4.Snapshot('events', 'output.root', ['electron_truth_pt']) + + print('---------------------') + print('Number of events: ', count.GetValue()) + + +if __name__ == '__main__': + main() diff --git a/man/man1/fccanalysis-build.1 b/man/man1/fccanalysis-build.1 index 31ff552b43..5247bee9e8 100644 --- a/man/man1/fccanalysis-build.1 +++ b/man/man1/fccanalysis-build.1 @@ -9,6 +9,7 @@ [\fB\-c\fR] [\fB\-j\fR \fIBUILD_THREADS\fR] [\fB\-\-acts\-on\fR] +[\fB\-\-no\-source\fR] .SH DESCRIPTION .B fccanalysis\-build will build the whole FCCAnalyses framework\&. @@ -25,10 +26,13 @@ Prints short help message and exits\&. Completely clean build directory before compilation\&. .TP \fB\-j\fR \fIBUILD_THREADS\fR, \fB\-\-build\-threads\fR \fIBUILD_THREADS\fR -Specify number of threads used when compiling (equivalent to `make -j`) +Specify number of threads used when compiling (equivalent to `make -j`)\&. .TP \fB\-\-acts\-on\fR Enable also ACTS based analyzers to be build\&. +.TP +.B \-\-no\-source +Do not build PODIO Data Source\&. .SH SEE ALSO fccanalysis(1), fccanalysis\-test(1), cmake(1) .SH BUGS diff --git a/man/man1/fccanalysis-run.1 b/man/man1/fccanalysis-run.1 index 06f6f01bdb..3ef54a0873 100644 --- a/man/man1/fccanalysis-run.1 +++ b/man/man1/fccanalysis-run.1 @@ -9,6 +9,7 @@ [\fB\-\-files\-list\fR \fIFILES_LIST\fR [\fIFILES_LIST\fR ...]] [\fB\-\-output\fR \fIOUTPUT\fR] [\fB\-\-nevents\fR \fINEVENTS\fR] +[\fB\-\-use-data-source\fR] [\fB\-\-test\fR] [\fB\-\-bench\fR] [\fB\-\-ncpus\fR \fINCPUS\fR] @@ -24,7 +25,7 @@ stages \fIfccanalysis-run\fR is used\&. When using \fBfccanalysis-run\fR the analysis is running in the managed mode, where the RDataFrame is steered by the framework and users can control some aspects of the running with additional global attributes, see -\fIfccanalysis-script\fR(8). +\fBfccanalysis-script\fR(7). .SH OPTIONS .TP .I analysis-script @@ -46,6 +47,9 @@ Specify max number of events to process\&. .B \-\-test Run over the test file\&. .TP +.B \-\-use-data-source +Load events through podio::DataSource\&. +.TP .B \-\-bench Output benchmark results to a JSON file\&. .TP diff --git a/man/man7/fccanalysis-script.7 b/man/man7/fccanalysis-script.7 index 2f1b22c4d7..a4e6bfda00 100644 --- a/man/man7/fccanalysis-script.7 +++ b/man/man7/fccanalysis-script.7 @@ -13,13 +13,12 @@ part of or the full analysis. There are two basic modes how to run an analysis, one is to run in the managed mode like so: .IP fccanalysis run \fIanalysis_script.py\fR - .RE + or .IP fccanalysis final \fIanalysis_script.py\fR -.RE .PP where user needs to provide minimal number of variables and settings. In this mode the RDataFrame is managed for the user and it can be controlled by defining @@ -27,7 +26,6 @@ several global attributes in the analysis script. The other mode is to run the analysis script as a regular python script: .IP python \fIanalysis_script.py\fR -.RE .PP here user has full control over the RDataFrame, but has to create all necessary scaffolding\&. @@ -36,13 +34,13 @@ It is expected that the whole analysis will be split into several stages and it can be done in one of the two styles: .IP anascript_stage1.py \-> anascript_stage2.py \-> ... \-> anascript_stage_final.py \-> plots.py - .RE + or .IP analysis_histmaker.py \-> plots.py -.RE +.PP In the case of the first style there are at least three stages required (anascript_stage1.py, anascript_stage_final.py, plots.py) and there is no upper limit on the number of stages. In the case of the second style only two stages @@ -171,6 +169,12 @@ paths with \fI.dot\fR and \fI.png\fR extensions are accepted. .br Default value: empty string .TP +\fBuseDataSource\fR (optional) +User can specify how to provide physical object collections to the analyzers\&. +If \fITrue\fR the events will be loaded through the EDM4hep RDataSource\&. +.br +Default value: False +.TP .B procDict This variable controls which process dictionary will be used. It can be either simple file name, absolute path or url. In the case of simple filename, the file diff --git a/python/anascript.py b/python/anascript.py index 6248993591..69344539fc 100644 --- a/python/anascript.py +++ b/python/anascript.py @@ -26,7 +26,7 @@ def get_element(rdf_module, element: str, is_final: bool = False): elif element == 'analysers': LOGGER.error('The function <%s> is mandatory in your analysis ' - 'script!.\nAborting...', element) + 'script!\nAborting...', element) if is_final: LOGGER.error('The function <%s> is not part of the final ' 'stage of the analysis!', element) diff --git a/python/build_analysis.py b/python/build_analysis.py index ef2e4cbb3d..a01d462a1a 100644 --- a/python/build_analysis.py +++ b/python/build_analysis.py @@ -15,8 +15,8 @@ def run_subprocess(command: str, run_dir: str) -> None: ''' - Run subprocess in specified directory. - Check only the return value, otherwise keep the subprocess connected to + Run sub-process in specified directory. + Check only the return value, otherwise keep the sub-process connected to stdin/stout/stderr. ''' try: @@ -25,6 +25,7 @@ def run_subprocess(command: str, run_dir: str) -> None: if status != 0: LOGGER.error('Error encountered!\nAborting...') + os.system('hash -d fccanalysis') sys.exit(3) except KeyboardInterrupt: @@ -46,7 +47,8 @@ def build_analysis(mainparser) -> None: local_dir = os.environ.get('LOCAL_DIR') build_path = pathlib.Path(local_dir + '/build') install_path = pathlib.Path(local_dir + '/install') - cmake_args = ['-DCMAKE_INSTALL_PREFIX=../install'] + cmake_args: list[str] = ['-DCMAKE_INSTALL_PREFIX=../install', + '-DCMAKE_EXPORT_COMPILE_COMMANDS=ON'] LOGGER.info('Building analysis located in:\n%s', local_dir) @@ -54,6 +56,9 @@ def build_analysis(mainparser) -> None: LOGGER.info('Building also ACTS based analyzers...') cmake_args += ['-DWITH_ACTS=ON'] + if args.no_source: + cmake_args += ['-DWITH_PODIO_DATASOURCE=OFF'] + if args.clean_build: LOGGER.info('Clearing build and install directories...') if build_path.is_dir(): diff --git a/python/parsers.py b/python/parsers.py index c2fc092eac..75db7975af 100644 --- a/python/parsers.py +++ b/python/parsers.py @@ -51,6 +51,12 @@ def setup_build_parser(parser): action='store_true', default=False, help='enable ACTS based analyzers') + build_args.add_argument( + '--no-source', + action='store_true', + default=False, + help='do not build EDM4hep RDataSource' + ) def setup_test_parser(parser): @@ -123,6 +129,9 @@ def setup_run_parser(parser): parser.add_argument('--graph-path', type=str, default='', help='analysis graph save path, should end with ' '\'.dot\' or \'.png\'') + parser.add_argument( + '--use-data-source', action='store_true', default=False, + help='use EDM4hep RDataSource to construct dataframe') # Internal argument, not to be used by the users parser.add_argument('--batch', action='store_true', default=False, diff --git a/python/process.py b/python/process.py index 2b74c5c771..b1512a8ce6 100644 --- a/python/process.py +++ b/python/process.py @@ -13,12 +13,14 @@ ROOT.gROOT.SetBatch(True) + LOGGER: logging.Logger = logging.getLogger('FCCAnalyses.process_info') -def get_entries(inpath: str) -> int: +def get_entries(inpath: str) -> int | None: ''' - Get number of entries in the TTree named "events". + Retrieves number of entries in the "events" TTree from the provided ROOT + file. ''' nevents = None with ROOT.TFile(inpath, 'READ') as infile: @@ -27,6 +29,7 @@ def get_entries(inpath: str) -> int: except AttributeError: LOGGER.error('Input file is missing "events" TTree!\nAborting...') sys.exit(3) + return nevents diff --git a/python/run_analysis.py b/python/run_analysis.py index 39412bc3e9..e4445421d3 100644 --- a/python/run_analysis.py +++ b/python/run_analysis.py @@ -14,6 +14,7 @@ import numpy as np import ROOT # type: ignore +import cppyy from anascript import get_element, get_element_dict from process import get_process_info, get_process_dict from frame import generate_graph @@ -254,7 +255,11 @@ def initialize(args, rdf_module, anapath: str): ''' # for convenience and compatibility with user code - ROOT.gInterpreter.Declare("using namespace FCCAnalyses;") + if args.use_data_source: + ROOT.gInterpreter.Declare("using namespace FCCAnalyses::PodioSource;") + else: + ROOT.gInterpreter.Declare("using namespace FCCAnalyses;") + geometry_file = get_element(rdf_module, "geometryFile") readout_name = get_element(rdf_module, "readoutName") if geometry_file != "" and readout_name != "": @@ -331,6 +336,7 @@ def run_rdf(rdf_module, for bname in blist: branch_list.push_back(bname) + # Registering Count before Snapshot to avoid additional event loops evtcount_final = dframe3.Count() # Generate computational graph of the analysis @@ -338,9 +344,9 @@ def run_rdf(rdf_module, generate_graph(dframe, args) dframe3.Snapshot("events", out_file, branch_list) - except Exception as excp: - LOGGER.error('During the execution of the analysis file exception ' - 'occurred:\n%s', excp) + except cppyy.gbl.std.runtime_error as err: + LOGGER.error('%s\nDuring the execution of the analysis script an ' + 'exception occurred!\nAborting...', err) sys.exit(3) return evtcount_init.GetValue(), evtcount_final.GetValue() @@ -365,7 +371,7 @@ def send_to_batch(rdf_module, chunk_list, process, anapath: str): stderr=subprocess.DEVNULL ) except subprocess.CalledProcessError: - LOGGER.error('The FCCanalyses libraries are not properly build and ' + LOGGER.error('The FCCAnalyses libraries are not properly build and ' 'installed!\nAborting job submission...') sys.exit(3) @@ -472,8 +478,14 @@ def run_local(rdf_module, infile_list, args): filepath = apply_filepath_rewrites(filepath) file_list.push_back(filepath) - info_msg += f'- {filepath}\t\n' - infile = ROOT.TFile.Open(filepath, 'READ') + info_msg += f'\t- {filepath}\n' + try: + infile = ROOT.TFile.Open(filepath, 'READ') + except OSError as excp: + LOGGER.error('While opening input file:\n%s\nan error ' + 'occurred:\n%s\nAborting...', filepath, excp) + sys.exit(3) + try: nevents_orig += infile.Get('eventsProcessed').GetVal() except AttributeError: @@ -514,7 +526,7 @@ def run_local(rdf_module, infile_list, args): start_time = time.time() inn, outn = run_rdf(rdf_module, file_list, outfile_path, args) elapsed_time = time.time() - start_time - + # replace nevents_local by inn = the amount of processed events info_msg = f"{' SUMMARY ':=^80}\n" @@ -690,6 +702,10 @@ def run_histmaker(args, rdf_module, anapath): Run the analysis using histmaker (all stages integrated into one). ''' + # Check whether to use PODIO ROOT DataSource to load the events + if get_element(rdf_module, "useDataSource", False): + args.use_data_source = True + # set ncpus, load header files, custom dicts, ... initialize(args, rdf_module, anapath) @@ -717,29 +733,41 @@ def run_histmaker(args, rdf_module, anapath): evtcounts = [] # event count of the input file # number of events processed per process, in a potential previous step events_processed_dict = {} - for process in process_list: - file_list, event_list = get_process_info( - process, - get_element(rdf_module, "prodTag"), - get_element(rdf_module, "inputDir")) - if len(file_list) == 0: - LOGGER.error('No files to process!\nAborting...') - sys.exit(3) - fraction = 1 - output = process - chunks = 1 - try: - if get_element_dict(process_list[process], 'fraction') is not None: - fraction = get_element_dict(process_list[process], 'fraction') - if get_element_dict(process_list[process], 'output') is not None: - output = get_element_dict(process_list[process], 'output') - if get_element_dict(process_list[process], 'chunks') is not None: - chunks = get_element_dict(process_list[process], 'chunks') - except TypeError: - LOGGER.warning('No values set for process %s will use default ' - 'values!', process) - if fraction < 1: - file_list = get_subfile_list(file_list, event_list, fraction) + for process_name, process_dict in process_list.items(): + if args.test: + try: + if get_element_dict(process_dict, 'testfile') is not None: + file_list = [get_element_dict(process_dict, 'testfile')] + except TypeError: + LOGGER.warning('No test file for process %s found!\n' + 'Aborting...') + sys.exit(3) + fraction = 1 + output = process_name + chunks = 1 + else: + file_list, event_list = get_process_info( + process_name, + get_element(rdf_module, "prodTag"), + get_element(rdf_module, "inputDir")) + if len(file_list) == 0: + LOGGER.error('No files to process!\nAborting...') + sys.exit(3) + fraction = 1 + output = process_name + chunks = 1 + try: + if get_element_dict(process_dict, 'fraction') is not None: + fraction = get_element_dict(process_dict, 'fraction') + if get_element_dict(process_dict, 'output') is not None: + output = get_element_dict(process_dict, 'output') + if get_element_dict(process_dict, 'chunks') is not None: + chunks = get_element_dict(process_dict, 'chunks') + except TypeError: + LOGGER.warning('No values set for process %s will use default ' + 'values!', process_name) + if fraction < 1: + file_list = get_subfile_list(file_list, event_list, fraction) # get the number of events processed, in a potential previous step file_list_root = ROOT.vector('string')() @@ -747,7 +775,8 @@ def run_histmaker(args, rdf_module, anapath): # stage) nevents_meta = 0 for file_name in file_list: - file_name = apply_filepath_rewrites(file_name) + if not args.use_data_source: + file_name = apply_filepath_rewrites(file_name) file_list_root.push_back(file_name) # Skip check for processed events in case of first stage if get_element(rdf_module, "prodTag") is None: @@ -759,17 +788,39 @@ def run_histmaker(args, rdf_module, anapath): infile.Close() if args.test: break - events_processed_dict[process] = nevents_meta - info_msg = f'Add process "{process}" with:' + events_processed_dict[process_name] = nevents_meta + info_msg = f'Add process "{process_name}" with:' info_msg += f'\n\tfraction = {fraction}' info_msg += f'\n\tnFiles = {len(file_list_root):,}' info_msg += f'\n\toutput = {output}\n\tchunks = {chunks}' LOGGER.info(info_msg) - dframe = ROOT.ROOT.RDataFrame("events", file_list_root) + if args.use_data_source: + if ROOT.podio.DataSource: + LOGGER.debug('Found Podio ROOT DataSource.') + else: + LOGGER.error('Podio ROOT DataSource library not found!' + '\nAborting...') + sys.exit(3) + LOGGER.info('Loading events through podio::DataSource...') + + try: + dframe = ROOT.podio.CreateDataFrame(file_list_root) + except TypeError as excp: + LOGGER.error('Unable to build dataframe using ' + 'podio::DataSource!\n%s', excp) + sys.exit(3) + else: + dframe = ROOT.ROOT.RDataFrame("events", file_list_root) evtcount = dframe.Count() - res, hweight = graph_function(dframe, process) + try: + res, hweight = graph_function(dframe, process_name) + except cppyy.gbl.std.runtime_error: + LOGGER.error('During loading of the analysis an error occurred!' + '\nAborting...') + sys.exit(3) + results.append(res) hweights.append(hweight) evtcounts.append(evtcount) @@ -886,6 +937,9 @@ def run(parser): LOGGER.error('Unknow sub-command "%s"!\nAborting...') sys.exit(3) + # Work with absolute path of the analysis script + anapath = os.path.abspath(args.anascript_path) + # Check that the analysis file exists anapath = args.anascript_path if not os.path.isfile(anapath): @@ -926,14 +980,19 @@ def run(parser): # Load the analysis script as a module anapath = os.path.abspath(anapath) - LOGGER.info('Loading analysis file:\n%s', anapath) - rdf_spec = importlib.util.spec_from_file_location("fcc_analysis_module", - anapath) - rdf_module = importlib.util.module_from_spec(rdf_spec) - rdf_spec.loader.exec_module(rdf_module) + LOGGER.info('Loading analysis script:\n%s', anapath) + try: + rdf_spec = importlib.util.spec_from_file_location('rdfanalysis', + anapath) + rdf_module = importlib.util.module_from_spec(rdf_spec) + rdf_spec.loader.exec_module(rdf_module) + except SyntaxError as err: + LOGGER.error('Syntax error encountered in the analysis script:\n%s', + err) + sys.exit(3) # Merge configuration from analysis script file with command line arguments - if get_element(rdf_module, 'graph'): + if get_element(rdf_module, 'graph', False): args.graph = True if get_element(rdf_module, 'graphPath') != '': diff --git a/python/run_fccanalysis.py b/python/run_fccanalysis.py index e6f4f86c0c..5946f12d1c 100644 --- a/python/run_fccanalysis.py +++ b/python/run_fccanalysis.py @@ -253,13 +253,33 @@ def submit_job(cmd: str, max_trials: int) -> bool: # _____________________________________________________________________________ -def initialize(args, analysis): +def merge_config(args: object, analysis: object) -> dict[str, any]: + ''' + Merge configuration from command line arguments and analysis class. + ''' + config: dict[str, any] = {} + + # Check whether to use PODIO DataSource to load the events + config['use_data_source'] = False + if args.use_data_source: + config['use_data_source'] = True + if get_attribute(analysis, 'use_data_source', False): + config['use_data_source'] = True + + return config + + +# _____________________________________________________________________________ +def initialize(config, args, analysis): ''' Common initialization steps. ''' - # for convenience and compatibility with user code - ROOT.gInterpreter.Declare("using namespace FCCAnalyses;") + # For convenience and compatibility with user code + if config['use_data_source']: + ROOT.gInterpreter.Declare("using namespace FCCAnalyses::PodioSource;") + else: + ROOT.gInterpreter.Declare("using namespace FCCAnalyses;") # Load geometry, needed for the CaloNtupleizer analyzers geometry_file = get_attribute(analysis, 'geometry_path', None) @@ -293,7 +313,7 @@ def initialize(args, analysis): # custom header files include_paths = get_attribute(analysis, 'include_paths', None) if include_paths is not None: - ROOT.gInterpreter.ProcessLine(".O3") + ROOT.gInterpreter.ProcessLine(".O2") basepath = os.path.dirname(os.path.abspath(args.anascript_path)) + "/" for path in include_paths: LOGGER.info('Loading %s...', path) @@ -301,7 +321,8 @@ def initialize(args, analysis): # _____________________________________________________________________________ -def run_rdf(args, +def run_rdf(config: dict[str, any], + args, analysis, input_list: list[str], out_file: str) -> int: @@ -309,7 +330,22 @@ def run_rdf(args, Run the analysis ROOTDataFrame and snapshot it. ''' # Create initial dataframe - dframe = ROOT.RDataFrame("events", input_list) + if config['use_data_source']: + if ROOT.podio.DataSource: + LOGGER.debug('Found podio::DataSource.') + else: + LOGGER.error('podio::DataSource library not found!\nAborting...') + sys.exit(3) + LOGGER.info('Loading events through podio::DataSource...') + + try: + dframe = ROOT.podio.CreateDataFrame(input_list) + except TypeError as excp: + LOGGER.error('Unable to build dataframe using ' + 'podio::DataSource!\n%s', excp) + sys.exit(3) + else: + dframe = ROOT.RDataFrame("events", input_list) # Limit number of events processed if args.nevents > 0: @@ -455,7 +491,10 @@ def apply_filepath_rewrites(filepath: str) -> str: # _____________________________________________________________________________ -def run_local(args, analysis, infile_list): +def run_local(config: dict[str, any], + args: object, + analysis: object, + infile_list): ''' Run analysis locally. ''' @@ -469,7 +508,8 @@ def run_local(args, analysis, infile_list): nevents_local = 0 for filepath in infile_list: - filepath = apply_filepath_rewrites(filepath) + if not config['use_data_source']: + filepath = apply_filepath_rewrites(filepath) file_list.push_back(filepath) info_msg += f'- {filepath}\t\n' @@ -512,7 +552,7 @@ def run_local(args, analysis, infile_list): # Run RDF start_time = time.time() - inn, outn = run_rdf(args, analysis, file_list, outfile_path) + inn, outn = run_rdf(config, args, analysis, file_list, outfile_path) elapsed_time = time.time() - start_time # replace nevents_local by inn = the amount of processed events @@ -577,8 +617,11 @@ def run_fccanalysis(args, analysis_module): analysis_args = vars(args) analysis = analysis_module.Analysis(analysis_args) + # Merge configuration from command line arguments and analysis class + config: dict[str, any] = merge_config(args, analysis) + # Set number of threads, load header files, custom dicts, ... - initialize(args, analysis_module) + initialize(config, args, analysis_module) # Check if output directory exist and if not create it output_dir = get_attribute(analysis, 'output_dir', None) @@ -598,7 +641,7 @@ def run_fccanalysis(args, analysis_module): directory, _ = os.path.split(args.output) if directory: os.system(f'mkdir -p {directory}') - run_local(args, analysis, [testfile_path]) + run_local(config, args, analysis, [testfile_path]) sys.exit(0) # Check if files are specified, and if so run the analysis on it/them (this @@ -608,7 +651,7 @@ def run_fccanalysis(args, analysis_module): directory, _ = os.path.split(args.output) if directory: os.system(f'mkdir -p {directory}') - run_local(args, analysis, args.files_list) + run_local(config, args, analysis, args.files_list) sys.exit(0) # Check if batch mode is available @@ -693,11 +736,11 @@ def run_fccanalysis(args, analysis_module): LOGGER.info('Running locally...') if len(chunk_list) == 1: args.output = f'{output_stem}.root' - run_local(args, analysis, chunk_list[0]) + run_local(config, args, analysis, chunk_list[0]) else: for index, chunk in enumerate(chunk_list): args.output = f'{output_stem}/chunk{index}.root' - run_local(args, analysis, chunk) + run_local(config, args, analysis, chunk) if len(process_list) == 0: LOGGER.warning('No files processed (process_list not found)!\n' diff --git a/setup.sh b/setup.sh index 5f680924be..2326229229 100644 --- a/setup.sh +++ b/setup.sh @@ -26,6 +26,7 @@ if [ "${0}" != "${BASH_SOURCE}" ]; then export PYTHONPATH=${LOCAL_DIR}/install/share/examples:${PYTHONPATH} export PATH=${LOCAL_DIR}/bin:${PATH} export PATH=${LOCAL_DIR}/install/bin:${PATH} + export LD_LIBRARY_PATH=${LOCAL_DIR}/install/lib64:${LD_LIBRARY_PATH} export LD_LIBRARY_PATH=${LOCAL_DIR}/install/lib:${LD_LIBRARY_PATH} export CMAKE_PREFIX_PATH=${LOCAL_DIR}/install:${CMAKE_PREFIX_PATH} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 45361f115a..9f93d9f0fb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,6 +2,8 @@ add_subdirectory(unittest) add_subdirectory(benchmark) +add_subdirectory(integration) + add_integration_test("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py") add_integration_test("examples/FCCee/flavour/Bc2TauNu/analysis_B2TauNu_truth.py") add_integration_test("examples/FCCee/test/jet_constituents.py") @@ -9,5 +11,12 @@ add_integration_test("examples/FCCee/vertex_lcfiplus/analysis_V0.py") add_standalone_test("examples/FCCee/fullSim/caloNtupleizer/analysis.py") +if(WITH_PODIO_DATASOURCE) + add_standalone_test("examples/data_source/standalone.py") + add_integration_test("examples/data_source/histmaker_source.py") + add_integration_test("examples/data_source/stages_source.py") + add_integration_test("examples/data_source/analysis_stage1.py") +endif() + # TODO: make this test run in the spack build environment #add_generic_test(build_new_case_study "tests/build_new_case_study.sh") diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt new file mode 100644 index 0000000000..42884f7ace --- /dev/null +++ b/tests/integration/CMakeLists.txt @@ -0,0 +1,59 @@ +# Low level EDM4hep access +add_executable(low_level_edm4hep + ${CMAKE_SOURCE_DIR}/tests/integration/low_level_edm4hep.cxx +) +target_link_libraries(low_level_edm4hep ROOT::ROOTDataFrame + podio::podioDataSource + EDM4HEP::edm4hep + EDM4HEP::edm4hepDict +) +add_generic_test( + low_level_edm4hep + "${CMAKE_CURRENT_BINARY_DIR}/low_level_edm4hep 1" + "https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/p8_ee_WW_ecm240_edm4hep.root" +) + +add_executable( + low_level_associations + ${CMAKE_SOURCE_DIR}/tests/integration/low_level_associations.cxx) +target_link_libraries(low_level_associations ROOT::ROOTDataFrame + podio::podioDataSource + EDM4HEP::edm4hep + EDM4HEP::edm4hepDict + FCCAnalyses +) +add_generic_test( + low_level_associations + "${CMAKE_CURRENT_BINARY_DIR}/low_level_associations 1" + "https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/p8_ee_WW_ecm240_edm4hep.root" +) + +if(WITH_PODIO_DATASOURCE) + add_executable(data_source + ${CMAKE_SOURCE_DIR}/tests/integration/data_source.cxx) + target_include_directories(data_source PRIVATE ${CMAKE_SOURCE_DIR}/include) + target_link_libraries(data_source podio::podioDataSource + EDM4HEP::edm4hep + ) + add_generic_test( + data_source + "${CMAKE_CURRENT_BINARY_DIR}/data_source 1" + "https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/p8_ee_WW_ecm240_edm4hep.root" + ) + + add_executable( + data_source_associations + ${CMAKE_SOURCE_DIR}/tests/integration/data_source_associations.cxx + ) + target_include_directories(data_source_associations PRIVATE + ${CMAKE_SOURCE_DIR}/include) + target_link_libraries(data_source_associations podio::podioDataSource + EDM4HEP::edm4hep + FCCAnalyses + ) + add_generic_test( + data_source_associations + "${CMAKE_CURRENT_BINARY_DIR}/data_source_associations 1" + "https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/p8_ee_WW_ecm240_edm4hep.root" + ) +endif() diff --git a/tests/integration/data_source.cxx b/tests/integration/data_source.cxx new file mode 100644 index 0000000000..b977580792 --- /dev/null +++ b/tests/integration/data_source.cxx @@ -0,0 +1,148 @@ +// ROOT +#include +#include +#include +#include + +// PODIO +#include + +// EDM4hep +#include +#include +#include +#include +#include + +edm4hep::MCParticleCollection +selElectrons(const edm4hep::MCParticleCollection &inParticles) { + edm4hep::MCParticleCollection electrons; + electrons.setSubsetCollection(); + for (auto particle : inParticles) { + if (particle.getPDG() == 11) { + auto electron = edm4hep::MCParticle(particle); + electrons.push_back(electron); + } + } + + return electrons; +} + +struct selPDG { + selPDG(int pdg, bool chargeConjugateAllowed); + const int m_pdg; + const bool m_chargeConjugateAllowed; + edm4hep::MCParticleCollection + operator()(const edm4hep::MCParticleCollection &inParticles); +}; + +selPDG::selPDG(int pdg, bool chargeConjugateAllowed) + : m_pdg(pdg), m_chargeConjugateAllowed(chargeConjugateAllowed){}; + +edm4hep::MCParticleCollection +selPDG::operator()(const edm4hep::MCParticleCollection &inParticles) { + edm4hep::MCParticleCollection result; + result.setSubsetCollection(); + for (auto particle : inParticles) { + if (m_chargeConjugateAllowed) { + if (std::abs(particle.getPDG()) == std::abs(m_pdg)) { + result.push_back(particle); + } + } else { + if (particle.getPDG() == m_pdg) { + result.push_back(particle); + } + } + } + + return result; +} + +ROOT::VecOps::RVec +getPx(const edm4hep::MCParticleCollection &inParticles) { + ROOT::VecOps::RVec result; + for (auto particle : inParticles) { + result.push_back(particle.getMomentum().x); + } + + return result; +} + +edm4hep::MCParticleCollection +get_stable_particles_from_decay(edm4hep::MCParticle in) { + edm4hep::MCParticleCollection result; + result.setSubsetCollection(); + + auto daughters = in.getDaughters(); + if (daughters.size() != 0) { // particle is unstable + for (const auto &daughter : daughters) { + auto stable_daughters = get_stable_particles_from_decay(daughter); + for (const auto &stable_daughter : stable_daughters) { + result.push_back(stable_daughter); + } + } + } else { + result.push_back(in); + } + + return result; +} + +edm4hep::MCParticle +get_mcParticle(const edm4hep::ReconstructedParticle &recoParticle, + const edm4hep::RecoMCParticleLinkCollection &assocColl) { + edm4hep::MCParticle no_result; + + for (const auto &assoc : assocColl) { + if (assoc.getFrom() == recoParticle) { + return assoc.getTo(); + } + } + + return no_result; +} + +int main(int argc, const char *argv[]) { + auto verbosity = ROOT::Experimental::RLogScopedVerbosity( + ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo); + + int nThreads = 1; + if (argc > 1) { + nThreads = atoi(argv[1]); + } + + if (nThreads > 1) { + ROOT::EnableImplicitMT(nThreads); + } + + std::string filePath = "https://fccsw.web.cern.ch/fccsw/testsamples/" + "edm4hep1/p8_ee_WW_ecm240_edm4hep.root"; + if (argc > 2) { + filePath = argv[2]; + } + + ROOT::RDataFrame rdf(std::make_unique(filePath)); + + // rdf.Describe().Print(); + // std::cout << std::endl; + + std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl; + + auto rdf2 = rdf.Define("particles_px", getPx, {"Particle"}); + // auto rdf3 = rdf2.Define("electrons", selElectrons, {"MCParticles"}); + auto rdf3 = rdf2.Define("electrons", selPDG(11, false), {"Particle"}); + auto rdf4 = rdf3.Define("electrons_px", getPx, {"electrons"}); + auto h_particles_px = rdf4.Histo1D("particles_px"); + auto h_electrons_px = rdf4.Histo1D("electrons_px"); + + h_particles_px->Print(); + h_electrons_px->Print(); + + auto canvas = std::make_unique("canvas", "Canvas", 450, 450); + h_particles_px->Draw(); + canvas->Print("/tmp/source_particles_px.pdf"); + h_electrons_px->Draw(); + canvas->Print("/tmp/source_electrons_px.pdf"); + + return EXIT_SUCCESS; +} diff --git a/tests/integration/data_source_associations.cxx b/tests/integration/data_source_associations.cxx new file mode 100644 index 0000000000..fec2649ed3 --- /dev/null +++ b/tests/integration/data_source_associations.cxx @@ -0,0 +1,269 @@ +// std +#include +#include +#include + +// ROOT +#include +#include +#include +#include +#include +#include +#include +#include + +// PODIO +#include + +// EDM4hep +#include "edm4hep/RecoMCParticleLinkCollection.h" +#include "edm4hep/ReconstructedParticleCollection.h" +#include "edm4hep/TrackCollection.h" +#include "edm4hep/TrackState.h" +#include +#include +#include + +// FCCAnalyses +#include "FCCAnalyses/SmearObjects.h" +#include "FCCAnalyses/VertexingUtils.h" + +/// for a given MC particle, returns a "track state", i.e. a vector of 5 helix +/// parameters, in Delphes convention +TVectorD TrackParamFromMC_DelphesConv(edm4hep::MCParticle aMCParticle) { + + TVector3 p(aMCParticle.getMomentum().x, aMCParticle.getMomentum().y, + aMCParticle.getMomentum().z); + TVector3 x(1e-3 * aMCParticle.getVertex().x, 1e-3 * aMCParticle.getVertex().y, + 1e-3 * aMCParticle.getVertex().z); // mm to m + float Q = aMCParticle.getCharge(); + TVectorD param = + FCCAnalyses::VertexingUtils::XPtoPar(x, p, Q); // convention Franco + + return param; +} + +TMatrixDSym get_trackCov(const edm4hep::TrackState &atrack, bool Units_mm) { + auto covMatrix = atrack.covMatrix; + + TMatrixDSym covM = + FCCAnalyses::VertexingUtils::Edm4hep2Delphes_TrackCovMatrix(covMatrix, + Units_mm); + + return covM; +} + +/** + * \brief Generates new track states, by rescaling the covariance matrix of the + * tracks. + */ +struct SmearedTracks { + bool m_debug; + TRandom m_random; + float m_smear_parameters[5]; + SmearedTracks(float smear_d0, float smear_phi, float smear_omega, + float smear_z0, float smear_tlambda, bool debug); + ROOT::VecOps::RVec + operator()(const edm4hep::RecoMCParticleLinkCollection &mcRecoLink); +}; + +SmearedTracks::SmearedTracks(float smear_d0, float smear_phi, float smear_omega, + float smear_z0, float smear_tlambda, + bool debug = false) { + + m_smear_parameters[0] = smear_d0; + m_smear_parameters[1] = smear_phi; + m_smear_parameters[2] = smear_omega; + m_smear_parameters[3] = smear_z0; + m_smear_parameters[4] = smear_tlambda; + + m_debug = debug; +} + +ROOT::VecOps::RVec SmearedTracks::operator()( + const edm4hep::RecoMCParticleLinkCollection &mcRecoLink) { + + // returns a vector of TrackStates that is parallel to the collection of full + // tracks (alltracks), i.e. same number of entries, same order. The method + // retrieve the MC particle that is associated to a track, and builds a "track + // state" out of the MC particle (i.e. it determines the corresponding (d0, + // phi, omega, z0, tanlambda). From this vector, and from the covariance + // matrix of the track, which is scaled by the user, new track states are + // generated. + + ROOT::VecOps::RVec result; + + TVectorD zero(5); + for (int k = 0; k < 5; k++) { + zero(k) = 0.; + } + + for (const auto &assoc : mcRecoLink) { + const auto &recoParticle = assoc.getFrom(); + const auto &mcParticle = assoc.getTo(); + + if (recoParticle.getTracks().size() < 1) { + continue; + } + + if (recoParticle.getTracks().size() > 1) { + std::cout << "More than one track for one reco particle encountered!" + << std::endl; + } + + const edm4hep::Track &track = recoParticle.getTracks().at(0); + + // the MC-truth track parameters, in Delphes's comvention + TVectorD mcTrackParam = TrackParamFromMC_DelphesConv(mcParticle); + // and in edm4hep convention + TVectorD mcTrackParam_edm4hep = + FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackParam(mcTrackParam, + false); + + if (track.getTrackStates().size() < 1) { + continue; + } + + if (track.getTrackStates().size() > 1) { + std::cout << "More than one track state for one track encountered!" + << std::endl; + } + + const edm4hep::TrackState &trackState = track.getTrackStates().at(0); + + // the covariance matrix of the track, in Delphes's convention + TMatrixDSym Cov = get_trackCov(trackState, false); + + // if the covMat of the track is pathological (numerical precision issue, + // fraction of tracks = 5e-6): return original track + if (Cov.Determinant() <= 0) { + result.emplace_back(trackState); + continue; + } + + // scale the covariance matrix + for (int j = 0; j < 5; j++) { + for (int k = 0; k < 5; k++) { + Cov[j][k] = Cov[j][k] * (m_smear_parameters[j] * m_smear_parameters[k]); + } + } + // if (m_debug) { + // Cov.Print(); + //} + + // generate a new track state (in Delphes's convention) + TVectorD smeared_param_delphes = FCCAnalyses::SmearObjects::CovSmear( + mcTrackParam, Cov, &m_random, m_debug); + + if (smeared_param_delphes == zero) { // Cholesky decomposition failed + result.emplace_back(trackState); + continue; + } + + // back to the edm4hep conventions.. + TVectorD smeared_param = + FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackParam( + smeared_param_delphes, false); + + auto smearedTrackState = trackState; + if (smeared_param.GetNoElements() > 4) { + smearedTrackState.D0 = smeared_param[0]; + smearedTrackState.phi = smeared_param[1]; + smearedTrackState.omega = smeared_param[2]; + smearedTrackState.Z0 = smeared_param[3]; + smearedTrackState.tanLambda = smeared_param[4]; + } else { + result.emplace_back(trackState); + continue; + } + + // transform rescaled cov matrix from Delphes convention to EDM4hep + // convention + std::array covMatrix_edm4hep = + FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackCovMatrix(Cov, false); + + smearedTrackState.covMatrix = covMatrix_edm4hep; + + if (m_debug) { + std::cout << std::endl + << "Original track " << trackState.D0 << " " << trackState.phi + << " " << trackState.omega << " " << trackState.Z0 << " " + << trackState.tanLambda << std::endl; + std::cout << "Smeared track " << smearedTrackState.D0 << " " + << smearedTrackState.phi << " " << smearedTrackState.omega + << " " << smearedTrackState.Z0 << " " + << smearedTrackState.tanLambda << std::endl; + std::cout << "MC particle " << mcTrackParam_edm4hep[0] << " " + << mcTrackParam_edm4hep[1] << " " << mcTrackParam_edm4hep[2] + << " " << mcTrackParam_edm4hep[3] << " " + << mcTrackParam_edm4hep[4] << std::endl; + for (int j = 0; j < 15; j++) + std::cout << "smeared cov matrix(" << j + << "): " << smearedTrackState.covMatrix[j] + << ", scale factor: " + << smearedTrackState.covMatrix[j] / trackState.covMatrix[j] + << std::endl; + } + /* + unsigned int recoIndex = assoc.getRec().getObjectID().index; + unsigned int mcIndex = assoc.getSim().getObjectID().index; + std::cout << "Reco: " << recoIndex << " ---> MC: " << mcIndex << std::endl; + */ + + result.emplace_back(smearedTrackState); + } + + return result; +} + +int main(int argc, char *argv[]) { + auto verbosity = ROOT::Experimental::RLogScopedVerbosity( + ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo); + + bool success = gInterpreter->Declare("#include \"edm4hep/TrackState.h\""); + if (!success) { + std::cerr << "ERROR: Unable to find edm4hep::TrackState header file!" + << std::endl; + return EXIT_FAILURE; + } + + int nThreads = 1; + if (argc > 1) { + nThreads = atoi(argv[1]); + } + + if (nThreads > 1) { + ROOT::EnableImplicitMT(nThreads); + } + + std::string filePath = "https://fccsw.web.cern.ch/fccsw/testsamples/" + "edm4hep1/p8_ee_WW_ecm240_edm4hep.root"; + if (argc > 2) { + filePath = argv[2]; + } + + ROOT::RDataFrame rdf(std::make_unique(filePath)); + + // rdf.Describe().Print(); + // std::cout << std::endl; + + std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl; + + auto rdf2 = + rdf.Define("SmearedTracks", SmearedTracks(2.0, 2.0, 2.0, 2.0, 2.0, false), + {"MCRecoAssociations"}); + + auto rdf3 = rdf2.Define( + "smearTrack_omega", + "return SmearedTracks.empty() ? -1 : SmearedTracks.at(0).omega;"); + auto h_smeared_tracks_omega = rdf3.Histo1D("smearTrack_omega"); + + h_smeared_tracks_omega->Print(); + + auto canvas = std::make_unique("canvas", "Canvas", 450, 450); + h_smeared_tracks_omega->Draw(); + canvas->Print("/tmp/source_smeared_tracks_omega.pdf"); + + return EXIT_SUCCESS; +} diff --git a/tests/integration/low_level_associations.cxx b/tests/integration/low_level_associations.cxx new file mode 100644 index 0000000000..70bdf047ba --- /dev/null +++ b/tests/integration/low_level_associations.cxx @@ -0,0 +1,70 @@ +// ROOT +#include "ROOT/RDataFrame.hxx" +#include "ROOT/RLogger.hxx" +#include "ROOT/RVec.hxx" +#include "TCanvas.h" +// EDM4hep +#include "edm4hep/MCParticleData.h" +#include "edm4hep/SimCalorimeterHitData.h" +// FCCAnalyses +#include "FCCAnalyses/ReconstructedParticle2MC.h" +#include "FCCAnalyses/SmearObjects.h" + +int main(int argc, const char *argv[]) { + auto verbosity = ROOT::Experimental::RLogScopedVerbosity( + ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo); + + bool success = gInterpreter->Declare("#include \"edm4hep/TrackState.h\""); + if (!success) { + std::cerr << "ERROR: Unable to find edm4hep::TrackState header file!" + << std::endl; + return EXIT_FAILURE; + } + + int nThreads = 1; + if (argc > 1) { + nThreads = atoi(argv[1]); + } + + if (nThreads > 1) { + ROOT::EnableImplicitMT(nThreads); + } + + std::string filePath = "https://fccsw.web.cern.ch/fccsw/testsamples/" + "edm4hep1/p8_ee_WW_ecm240_edm4hep.root"; + if (argc > 2) { + filePath = argv[2]; + } + + ROOT::RDataFrame rdf("events", filePath); + + // rdf.Describe().Print(); + // std::cout << std::endl; + + std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl; + + auto rdf2 = + rdf.Alias("MCRecoAssociations0", "_MCRecoAssociations_from.index"); + auto rdf3 = rdf2.Alias("MCRecoAssociations1", "_MCRecoAssociations_to.index"); + auto rdf4 = rdf3.Define( + "RP_MC_index", FCCAnalyses::ReconstructedParticle2MC::getRP2MC_index, + {"MCRecoAssociations0", "MCRecoAssociations1", "ReconstructedParticles"}); + auto rdf5 = rdf4.Define( + "SmearedTracks", + FCCAnalyses::SmearObjects::SmearedTracks(2.0, 2.0, 2.0, 2.0, 2.0, true), + {"ReconstructedParticles", "_EFlowTrack_trackStates", "RP_MC_index", + "Particle"}); + auto rdf6 = rdf5.Define( + "smearTrack_omega", + "return SmearedTracks.empty() ? -1 : SmearedTracks.at(0).omega;"); + // auto rdf6 = rdf5.Define("smearTrack_omega", "return RP_MC_index[0];"); + auto h_smeared_tracks_omega = rdf6.Histo1D("smearTrack_omega"); + + h_smeared_tracks_omega->Print(); + + auto canvas = std::make_unique("canvas", "Canvas", 450, 450); + h_smeared_tracks_omega->Draw(); + canvas->Print("/tmp/low_level_smeared_tracks_omega.pdf"); + + return EXIT_SUCCESS; +} diff --git a/tests/integration/low_level_edm4hep.cxx b/tests/integration/low_level_edm4hep.cxx new file mode 100644 index 0000000000..9d4d25b19d --- /dev/null +++ b/tests/integration/low_level_edm4hep.cxx @@ -0,0 +1,105 @@ +// ROOT +#include +#include +#include +#include +// EDM4hep +#include +#include + +ROOT::VecOps::RVec +selElectrons(ROOT::VecOps::RVec &inParticles) { + ROOT::VecOps::RVec electrons; + for (size_t i = 0; i < inParticles.size(); ++i) { + if (inParticles[i].PDG == 11) { + electrons.emplace_back(inParticles[i]); + } + } + + return electrons; +} + +struct selPDG { + selPDG(int pdg, bool chargeConjugateAllowed); + const int m_pdg; + const bool m_chargeConjugateAllowed; + ROOT::VecOps::RVec + operator()(ROOT::VecOps::RVec &inParticles); +}; + +selPDG::selPDG(int pdg, bool chargeConjugateAllowed) + : m_pdg(pdg), m_chargeConjugateAllowed(chargeConjugateAllowed){}; + +ROOT::VecOps::RVec +selPDG::operator()(ROOT::VecOps::RVec &inParticles) { + ROOT::VecOps::RVec result; + for (size_t i = 0; i < inParticles.size(); ++i) { + auto &particle = inParticles[i]; + if (m_chargeConjugateAllowed) { + if (std::abs(particle.PDG) == std::abs(m_pdg)) { + result.emplace_back(particle); + } + } else { + if (particle.PDG == m_pdg) { + result.emplace_back(particle); + } + } + } + + return result; +} + +ROOT::VecOps::RVec +getPx(ROOT::VecOps::RVec inParticles) { + ROOT::VecOps::RVec result; + for (auto &p : inParticles) { + result.push_back(p.momentum.x); + } + + return result; +} + +int main(int argc, const char *argv[]) { + auto verbosity = ROOT::Experimental::RLogScopedVerbosity( + ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo); + + int nThreads = 1; + if (argc > 1) { + nThreads = atoi(argv[1]); + } + + if (nThreads > 1) { + ROOT::EnableImplicitMT(nThreads); + } + + std::string filePath = "https://fccsw.web.cern.ch/fccsw/testsamples/" + "edm4hep1/p8_ee_WW_ecm240_edm4hep.root"; + if (argc > 2) { + filePath = argv[2]; + } + + ROOT::RDataFrame rdf("events", filePath); + + // rdf.Describe().Print(); + // std::cout << std::endl; + + std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl; + + auto rdf2 = rdf.Define("particles_px", getPx, {"Particle"}); + // auto rdf3 = rdf2.Define("electrons", selElectrons, {"MCParticles"}); + auto rdf3 = rdf2.Define("electrons", selPDG(11, false), {"Particle"}); + auto rdf4 = rdf3.Define("electrons_px", getPx, {"electrons"}); + auto h_particles_px = rdf4.Histo1D("particles_px"); + auto h_electrons_px = rdf4.Histo1D("electrons_px"); + + h_particles_px->Print(); + h_electrons_px->Print(); + + auto canvas = std::make_unique("canvas", "Canvas", 450, 450); + h_particles_px->Draw(); + canvas->Print("/tmp/low_level_particles_px.pdf"); + h_electrons_px->Draw(); + canvas->Print("/tmp/low_level_electrons_px.pdf"); + + return EXIT_SUCCESS; +} diff --git a/tests/unittest/CMakeLists.txt b/tests/unittest/CMakeLists.txt index 485ff7e46b..4f364b052b 100644 --- a/tests/unittest/CMakeLists.txt +++ b/tests/unittest/CMakeLists.txt @@ -1,6 +1,6 @@ find_catch_instance() -# The unittests are a bit better and they are labelled so we can put together a +# The unittests are a bit better and they are labeled so we can put together a # list of labels that we want to ignore set(filter_tests "") @@ -9,7 +9,9 @@ add_executable(unittest unittest.cpp algorithms.cpp ReconstructedParticle.cpp ) -target_link_libraries(unittest PUBLIC FCCAnalyses gfortran PRIVATE Catch2::Catch2WithMain) +target_link_libraries(unittest PUBLIC FCCAnalyses + gfortran + PRIVATE Catch2::Catch2WithMain) target_include_directories(unittest PUBLIC ${VDT_INCLUDE_DIR}) target_compile_definitions(unittest PUBLIC "-DTEST_INPUT_DATA_DIR=${TEST_INPUT_DATA_DIR}") include(Catch)