From eef66b3426237a80ea54902a5afa429c2afd5a72 Mon Sep 17 00:00:00 2001
From: Juraj Smiesko <34742917+kjvbrt@users.noreply.github.com>
Date: Tue, 1 Oct 2024 16:05:44 +0200
Subject: [PATCH] podio::DataSource (#309)

* Integrating EDM4hep RDataSource

* Add legacy reader

* Split off legacy reader

* Protecting collection retrieval by mutex

* Add source test

* Updated man pages

* Renaming Source na DataSource

* Adding source test for run stages

* Add tests for the standalone mode

* Adding test files per process for histmaker

* Adding selector by size and sorter by pT

* Few more analyzers

* Putting back things omitted from run_analysis.py

* Adding e4hsource in LD_LIBRARY_PATH for managed tests

* Making building of the source optional

* Removed legacy support

* Separated analyzers using Collections into independent headers

* Reimplementing analyzers needed for the stage1 of the example analysis

* Adjusitng test input files

* Moving recoParticle definition

* Changes to use podio::ROOTDataSource

* using podio::DataSource

* Adjusting stages source example

* Adding C++ analysis tests

* Updating examples

* Formatting

* Removing e4hsource

* Adjust clang-format check

* Formatting

* Using PodioSource namespace

* Adding .cache to .gitignore

* Formatting
---
 .github/workflows/format.yml                  |   7 +-
 .gitignore                                    |   1 +
 CMakeLists.txt                                |   3 +
 analyzers/dataframe/CMakeLists.txt            |  11 +
 analyzers/dataframe/FCCAnalyses/Defines.h     |   6 -
 analyzers/dataframe/FCCAnalyses/LinkDef.h     |  35 +-
 analyzers/dataframe/FCCAnalyses/LinkSource.h  |  92 ++++++
 .../FCCAnalyses/ReconstructedParticle.h       |  10 +-
 .../FCCAnalyses/ReconstructedParticleSource.h | 242 ++++++++++++++
 analyzers/dataframe/FCCAnalyses/TrackSource.h |  45 +++
 .../dataframe/src/ReconstructedParticle.cc    |   8 +-
 .../src/ReconstructedParticleSource.cc        | 300 ++++++++++++++++++
 analyzers/dataframe/src/TrackSource.cc        |  49 +++
 cmake/FCCAnalysesFunctions.cmake              |   2 +-
 .../higgs/mH-recoil/mumu/analysis_stage1.py   |  54 ++--
 examples/FCChh/HH_bbtautau/analysis.cc        |   3 -
 examples/data_source/analysis_stage1.py       | 134 ++++++++
 examples/data_source/histmaker_source.py      |  49 +++
 examples/data_source/stages_source.py         |  61 ++++
 examples/data_source/standalone.py            |  50 +++
 man/man1/fccanalysis-build.1                  |   6 +-
 man/man1/fccanalysis-run.1                    |   6 +-
 man/man7/fccanalysis-script.7                 |  14 +-
 python/anascript.py                           |   2 +-
 python/build_analysis.py                      |  11 +-
 python/parsers.py                             |   9 +
 python/process.py                             |   7 +-
 python/run_analysis.py                        | 143 ++++++---
 python/run_fccanalysis.py                     |  71 ++++-
 setup.sh                                      |   1 +
 tests/CMakeLists.txt                          |   9 +
 tests/integration/CMakeLists.txt              |  59 ++++
 tests/integration/data_source.cxx             | 148 +++++++++
 .../integration/data_source_associations.cxx  | 269 ++++++++++++++++
 tests/integration/low_level_associations.cxx  |  70 ++++
 tests/integration/low_level_edm4hep.cxx       | 105 ++++++
 tests/unittest/CMakeLists.txt                 |   6 +-
 37 files changed, 1966 insertions(+), 132 deletions(-)
 create mode 100644 analyzers/dataframe/FCCAnalyses/LinkSource.h
 create mode 100644 analyzers/dataframe/FCCAnalyses/ReconstructedParticleSource.h
 create mode 100644 analyzers/dataframe/FCCAnalyses/TrackSource.h
 create mode 100644 analyzers/dataframe/src/ReconstructedParticleSource.cc
 create mode 100644 analyzers/dataframe/src/TrackSource.cc
 create mode 100644 examples/data_source/analysis_stage1.py
 create mode 100644 examples/data_source/histmaker_source.py
 create mode 100644 examples/data_source/stages_source.py
 create mode 100644 examples/data_source/standalone.py
 create mode 100644 tests/integration/CMakeLists.txt
 create mode 100644 tests/integration/data_source.cxx
 create mode 100644 tests/integration/data_source_associations.cxx
 create mode 100644 tests/integration/low_level_associations.cxx
 create mode 100644 tests/integration/low_level_edm4hep.cxx

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index 6884af8ae7..e3a889861b 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -27,8 +27,5 @@ jobs:
       run: |
         docker exec CI_container /bin/bash -c 'cd ./Package; \
           source /cvmfs/sw.hsf.org/key4hep/setup.sh;\
-          git clang-format --style=file $(git merge-base upstream/master HEAD)'
-    - name: Check cleanliness
-      run: |
-        docker exec CI_container /bin/bash -c 'cd ./Package; \
-          git diff'
+          git clang-format --diff --style=file $(git merge-base upstream/master HEAD)
+          git clang-format --diffstat --style=file $(git merge-base upstream/master HEAD)'
diff --git a/.gitignore b/.gitignore
index 28858293fb..82fda36413 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,7 @@ __pycache__/
 # Editors
 *~
 .vimlocal
+.cache/
 
 # Distribution / packaging
 .Python
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a220f1f363..dd403b566e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -31,6 +31,9 @@ option(USE_EXTERNAL_CATCH2 "Link against an external Catch2 v3 static library, o
 
 option(FCCANALYSES_DOCUMENTATION "Whether or not to create doxygen doc target." ON)
 
+set(WITH_PODIO_DATASOURCE ON CACHE STRING "Enable PODIO DataSource")
+set_property(CACHE WITH_PODIO_DATASOURCE PROPERTY STRINGS ON OFF)
+
 #--- Export compile commands --------------------------------------------------
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
diff --git a/analyzers/dataframe/CMakeLists.txt b/analyzers/dataframe/CMakeLists.txt
index c82e11cd46..6b633c1d27 100644
--- a/analyzers/dataframe/CMakeLists.txt
+++ b/analyzers/dataframe/CMakeLists.txt
@@ -32,6 +32,14 @@ if(NOT WITH_ACTS)
   list(FILTER sources EXCLUDE REGEX "VertexFinderActs.cc")
 endif()
 
+if(NOT WITH_PODIO_DATASOURCE)
+  list(FILTER headers EXCLUDE REGEX "LinkSource.h")
+  list(FILTER headers EXCLUDE REGEX "ReconstructedParticleSource.h")
+  list(FILTER sources EXCLUDE REGEX "ReconstructedParticleSource.cc")
+  list(FILTER headers EXCLUDE REGEX "TrackSource.h")
+  list(FILTER sources EXCLUDE REGEX "TrackSource.cc")
+endif()
+
 message(STATUS "FCCAnalyses headers:\n   ${headers}")
 message(STATUS "FCCAnalyses sources:\n   ${sources}")
 
@@ -59,7 +67,10 @@ target_link_libraries(FCCAnalyses PUBLIC
                       ROOT::ROOTDataFrame
                       EDM4HEP::edm4hep
                       EDM4HEP::edm4hepDict
+                      EDM4HEP::utils
                       podio::podio
+                      podio::podioRootIO
+                      podio::podioDataSource
                       ${ADDONS_LIBRARIES}
                       ${DELPHES_LIBRARY}
                       gfortran # todo: why necessary?
diff --git a/analyzers/dataframe/FCCAnalyses/Defines.h b/analyzers/dataframe/FCCAnalyses/Defines.h
index e537dd536e..7e8f510c2c 100644
--- a/analyzers/dataframe/FCCAnalyses/Defines.h
+++ b/analyzers/dataframe/FCCAnalyses/Defines.h
@@ -1,18 +1,12 @@
 #ifndef DEFINES_ANALYZERS_H
 #define DEFINES_ANALYZERS_H
 
-// std
-#include <cmath>
-#include <math.h>
-#include <vector>
-
 // ROOT
 #include "ROOT/RVec.hxx"
 #include "TLorentzVector.h"
 
 // EDM4hep
 #include "edm4hep/MCParticleData.h"
-#include "edm4hep/ParticleIDData.h"
 #include "edm4hep/ReconstructedParticleData.h"
 
 /**
diff --git a/analyzers/dataframe/FCCAnalyses/LinkDef.h b/analyzers/dataframe/FCCAnalyses/LinkDef.h
index 23b891b186..a1eee99d85 100644
--- a/analyzers/dataframe/FCCAnalyses/LinkDef.h
+++ b/analyzers/dataframe/FCCAnalyses/LinkDef.h
@@ -8,31 +8,42 @@
 
 //Dictionaries for output objects
 #pragma link C++ class std::vector<TLorentzVector>+;
+#pragma link C++ class ROOT::VecOps::RVec < TLorentzVector> + ;
+#pragma link C++ class ROOT::VecOps::RVec < ROOT::VecOps::RVec <               \
+    TLorentzVector>> +                                                         \
+    ;
+#pragma link C++ class ROOT::VecOps::RVec < TVector3> + ;
+#pragma link C++ class ROOT::VecOps::RVec < ROOT::VecOps::RVec < TVector3>> + ;
+
 #pragma link C++ class std::vector<std::string>+;
 
-#pragma link C++ class ROOT::VecOps::RVec<TLorentzVector>+;
-#pragma link C++ class std::vector<std::vector<int>>+;
-#pragma link C++ class std::vector<std::vector<float>>+;
-#pragma link C++ class std::vector<std::vector<double>>+;
+// EDM4hep
 #pragma link C++ class ROOT::VecOps::RVec<edm4hep::TrackState>+;
 #pragma link C++ class ROOT::VecOps::RVec<edm4hep::VertexData>+;
 #pragma link C++ class ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData>+;
 #pragma link C++ class ROOT::VecOps::RVec<edm4hep::CalorimeterHitData>+;
-#pragma link C++ class ROOT::VecOps::RVec<edm4hep::ClusterData>+;
-#pragma link C++ class ROOT::VecOps::RVec<podio::ObjectID>+;
+#pragma link C++ class ROOT::VecOps::RVec < edm4hep::ClusterData> + ;
 #pragma link C++ class ROOT::VecOps::RVec<edm4hep::Vector3d>+;
 #pragma link C++ class ROOT::VecOps::RVec<edm4hep::MCParticleData>+;
-#pragma link C++ class ROOT::VecOps::RVec<TVector3>+;
+#pragma link C++ class ROOT::VecOps::RVec < ROOT::VecOps::RVec <               \
+    edm4hep::ReconstructedParticleData>> +                                     \
+    ;
+
+// Vectors
+#pragma link C++ class ROOT::VecOps::RVec < std::vector < int>> + ;
+#pragma link C++ class ROOT::VecOps::RVec < std::vector < float>> + ;
+#pragma link C++ class ROOT::VecOps::RVec < std::vector < double>> + ;
 #pragma link C++ class ROOT::VecOps::RVec<ROOT::VecOps::RVec<int>>+;
 #pragma link C++ class ROOT::VecOps::RVec<ROOT::VecOps::RVec<float>>+;
 #pragma link C++ class ROOT::VecOps::RVec<ROOT::VecOps::RVec<double>>+;
-#pragma link C++ class ROOT::VecOps::RVec<ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData>>+;
-#pragma link C++ class ROOT::VecOps::RVec<std::vector<float>>+;
+#pragma link C++ class std::vector < std::vector < int>> + ;
+#pragma link C++ class std::vector < std::vector < float>> + ;
+#pragma link C++ class std::vector < std::vector < double>> + ;
 
 #pragma link C++ class ROOT::VecOps::RVec<FCCAnalyses::VertexingUtils::FCCAnalysesVertex>+;
-#pragma link C++ class ROOT::VecOps::RVec<ROOT::VecOps::RVec<FCCAnalyses::VertexingUtils::FCCAnalysesVertex>>+;
-#pragma link C++ class ROOT::VecOps::RVec<ROOT::VecOps::RVec<TVector3>>+;
-#pragma link C++ class ROOT::VecOps::RVec<ROOT::VecOps::RVec<TLorentzVector>>+;
+#pragma link C++ class ROOT::VecOps::RVec < ROOT::VecOps::RVec <               \
+    FCCAnalyses::VertexingUtils::FCCAnalysesVertex>> +                         \
+    ;
 
 //to load all other functions
 #pragma link C++ function dummyLoader;
diff --git a/analyzers/dataframe/FCCAnalyses/LinkSource.h b/analyzers/dataframe/FCCAnalyses/LinkSource.h
new file mode 100644
index 0000000000..1237923f90
--- /dev/null
+++ b/analyzers/dataframe/FCCAnalyses/LinkSource.h
@@ -0,0 +1,92 @@
+#ifndef SOURCE_LINK_ANALYZERS_H
+#define SOURCE_LINK_ANALYZERS_H
+
+// EDM4hep
+#include "edm4hep/RecoMCParticleLinkCollection.h"
+
+namespace FCCAnalyses ::PodioSource ::Link {
+/**
+ * \brief Analyzer to select links where the MC particle has a specified PDG
+ *        ID.
+ *
+ * \param[in] pdg  Desired PDG ID of the MC particle.
+ */
+struct selPDG {
+  const int m_pdg;
+
+  explicit selPDG(const int pdg) : m_pdg(pdg){};
+
+  template <typename T> T operator()(const T &inLinkColl) {
+    T result;
+    result.setSubsetCollection();
+
+    for (const auto &link : inLinkColl) {
+      const auto &particle = link.getTo();
+      if (particle.getPDG() == m_pdg) {
+        result.push_back(link);
+      }
+    }
+
+    return result;
+  }
+};
+
+/**
+ * \brief Analyzer to select links where MC particle has a specified absolute
+ *        value of PDG ID.
+ *
+ * \param pdg[in]  Desired absolute value of PDG ID of the MC particle.
+ */
+struct selAbsPDG {
+  const int m_pdg;
+
+  explicit selAbsPDG(const int pdg) : m_pdg(pdg) {
+    if (m_pdg < 0) {
+      throw std::invalid_argument("FCCAnalyses::PodioSource::Link::sel_absPDG: "
+                                  "Received negative value!");
+    }
+  };
+
+  template <typename T> auto operator()(const T &inLinkColl) {
+    T result;
+    result.setSubsetCollection();
+
+    for (const auto &link : inLinkColl) {
+      const auto &particle = link.getTo();
+      if (std::abs(particle.getPDG()) == m_pdg) {
+        result.push_back(link);
+      }
+    }
+
+    return result;
+  };
+};
+
+/**
+ * \brief Analyzer to select links where the MC particle has a specified
+ *        generator status.
+ *
+ * \param status[in]  Desired generator status of the particle.
+ */
+struct selGenStatus {
+  const int m_status;
+
+  explicit selGenStatus(const int status) : m_status(status){};
+
+  template <typename T> T operator()(const T &inLinkColl) {
+    T result;
+    result.setSubsetCollection();
+
+    for (const auto &link : inLinkColl) {
+      const auto &particle = link.getTo();
+      if (particle.getGeneratorStatus() == m_status) {
+        result.push_back(link);
+      }
+    }
+
+    return result;
+  }
+};
+} // namespace FCCAnalyses::PodioSource::Link
+
+#endif /* SOURCE_LINK_ANALYZERS_H */
diff --git a/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h b/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h
index 3321a2b680..80599d58f8 100644
--- a/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h
+++ b/analyzers/dataframe/FCCAnalyses/ReconstructedParticle.h
@@ -2,11 +2,15 @@
 #ifndef  RECONSTRUCTEDPARTICLE_ANALYZERS_H
 #define  RECONSTRUCTEDPARTICLE_ANALYZERS_H
 
+// STL
 #include <cmath>
 #include <vector>
 
+// ROOT
 #include "TLorentzVector.h"
 #include "ROOT/RVec.hxx"
+
+// EDM4hep
 #include "edm4hep/ReconstructedParticleData.h"
 #include "edm4hep/ParticleIDData.h"
 
@@ -97,9 +101,6 @@ namespace ReconstructedParticle{
     ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData>  operator() (ROOT::VecOps::RVec<bool> tags, ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData> in);
   };
 
-
-
-
   /// return reconstructed particles
   ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData> get(ROOT::VecOps::RVec<int> index, ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData> in);
 
@@ -151,7 +152,7 @@ namespace ReconstructedParticle{
   /// return the TlorentzVector of the one input ReconstructedParticle
   TLorentzVector get_tlv(edm4hep::ReconstructedParticleData in);
 
-	/// return visible 4-momentum vector
+  /// return visible 4-momentum vector
   TLorentzVector get_P4vis(ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData> in);
 
   /// concatenate both input vectors and return the resulting vector
@@ -168,7 +169,6 @@ namespace ReconstructedParticle{
 
   /// get number of b-jets
   int getJet_ntags(ROOT::VecOps::RVec<bool> in);
-
 }//end NS ReconstructedParticle
 
 }//end NS FCCAnalyses
diff --git a/analyzers/dataframe/FCCAnalyses/ReconstructedParticleSource.h b/analyzers/dataframe/FCCAnalyses/ReconstructedParticleSource.h
new file mode 100644
index 0000000000..2d61a0dab6
--- /dev/null
+++ b/analyzers/dataframe/FCCAnalyses/ReconstructedParticleSource.h
@@ -0,0 +1,242 @@
+#ifndef ANALYZERS_SOURCE_RECONSTRUCTED_PARTICLE_H
+#define ANALYZERS_SOURCE_RECONSTRUCTED_PARTICLE_H
+
+// ROOT
+#include "ROOT/RVec.hxx"
+
+// EDM4hep
+#include "edm4hep/RecoMCParticleLinkCollection.h"
+#include "edm4hep/ReconstructedParticleCollection.h"
+
+namespace FCCAnalyses ::PodioSource ::ReconstructedParticle {
+// --------------------  Selectors  -----------------------------------------
+
+/**
+ * \brief Analyzer to select reconstructed particles associated with MC
+ *        particle of the specified PDG ID.
+ */
+struct selPDG {
+  const int m_pdg;
+  /**
+   * \brief Constructor.
+   *
+   * \param[in] pdgID  Desired value of the PDG ID.
+   */
+  explicit selPDG(const int pdgID);
+  /**
+   * \brief Operator over the input link collection.
+   *
+   * \param[in] inLinkColl  Input collection of the reco-MC links.
+   * \param[out] result  Collection of the reconstructed particles associated
+   *                     with the MC particle of the desired PDG ID.
+   */
+  edm4hep::ReconstructedParticleCollection
+  operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl);
+};
+
+/**
+ * \brief Analyzer to select reconstructed particles associated with MC
+ *        particle with the specified absolute value of the PDG ID.
+ */
+struct selAbsPDG {
+  const int m_absPdg;
+  /**
+   * \brief Constructor.
+   *
+   * \param[in] pdgID  Desired absolute value of the PDG ID.
+   */
+  explicit selAbsPDG(const int pdgID);
+  /**
+   * \brief Operator over the input link collection.
+   *
+   * \param[in] inLinkColl  Input collection of the MC-reco links.
+   * \param[out] result  Collection of the reconstructed particles associated
+   *                     with the MC particle with the desired absolute value
+   *                     of the PDG ID.
+   */
+  edm4hep::ReconstructedParticleCollection
+  operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl);
+};
+
+/**
+ * \brief Select reconstructed particles with transverse momentum greater
+ *        than a minimum value [GeV].
+ */
+struct selPt {
+  const float m_minPt;
+  /**
+   * \brief Constructor.
+   *
+   * \param[in] minPt  Transverse momentum threshold [GeV].
+   */
+  explicit selPt(float minPt);
+  /**
+   * \brief Operator over input link collection.
+   *
+   * \param[in] inColl  Input collection of the reconstructed particles.
+   * \param[out] result  Collection of the selected reconstructed particles.
+   */
+  edm4hep::ReconstructedParticleCollection
+  operator()(const edm4hep::ReconstructedParticleCollection &inColl);
+};
+
+/**
+ * \brief Analyzer to select specified number of reconstructed particles.
+ */
+struct selUpTo {
+  const size_t m_size;
+  /**
+   * \brief Constructor.
+   *
+   * \param[in] size  Desired number of reconstructed particles.
+   */
+  explicit selUpTo(const size_t size);
+  /**
+   * \brief Operator over input link collection.
+   *
+   * \param[out] inColl  Input collection of reconstructed particles.
+   * \param[out] result  Output collection of reconstructed particles.
+   */
+  edm4hep::ReconstructedParticleCollection
+  operator()(const edm4hep::ReconstructedParticleCollection &inColl);
+};
+
+/**
+ * \brief Analyzer to select reconstructed particles associated with the MC
+ *        particle of the desired generator status.
+ */
+struct selGenStatus {
+  const int m_status;
+  /**
+   * \brief Constructor.
+   *
+   * \param[in] status  Desired generator status of the MC particle.
+   */
+  explicit selGenStatus(const int status);
+  /**
+   * \brief Operator over input link collection.
+   *
+   * \param[in] inColl  Input collection of the reco-MC particle links.
+   * \param[out] result  Output collection of the reconstructed particles.
+   */
+  edm4hep::ReconstructedParticleCollection
+  operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl);
+};
+
+// --------------------  Getters  -------------------------------------------
+
+/**
+ * \brief Get momenta of the input reconstructed particles.
+ *
+ * \param[in] inColl  Input particle collection.
+ * \param[out] result  Vector of particle momenta.
+ */
+ROOT::VecOps::RVec<float>
+getP(const edm4hep::ReconstructedParticleCollection &inColl);
+
+/**
+ * \brief Get transverse momenta (pT) of the input particles.
+ *
+ * \param[in] inColl  Input particle collection.
+ * \param[out] result  Vector of particle pTs.
+ */
+ROOT::VecOps::RVec<float>
+getPt(const edm4hep::ReconstructedParticleCollection &inColl);
+
+/**
+ * \brief Get rapidity (y) of the input reconstructed particles.
+ *
+ * \param[in] inColl  Input particle collection.
+ * \param[out] result  Vector of particle rapidities.
+ */
+ROOT::VecOps::RVec<float>
+getY(const edm4hep::ReconstructedParticleCollection &inColl);
+
+/**
+ * \brief Get energy (E) of the input reconstructed particles.
+ *
+ * \param[in] inColl  Input particle collection.
+ * \param[out] result  Vector of particle energies.
+ */
+ROOT::VecOps::RVec<float>
+getE(const edm4hep::ReconstructedParticleCollection &inColl);
+
+/**
+ * \brief Get mass of the input reconstructed particles.
+ *
+ * \param[in] inColl  Input particle collection.
+ * \param[out] result  Vector of particle masses.
+ */
+ROOT::VecOps::RVec<float>
+getMass(const edm4hep::ReconstructedParticleCollection &inColl);
+
+/**
+ * \brief Get charge of the input reconstructed particles.
+ *
+ * \param[in] inColl  Input particle collection.
+ * \param[out] result  Vector of particle charges.
+ */
+ROOT::VecOps::RVec<float>
+getCharge(const edm4hep::ReconstructedParticleCollection &inColl);
+
+// --------------------  Transformers  --------------------------------------
+
+/**
+ * \brief Sort input particles by pT.
+ *
+ * \param[in] inColl  Input particles.
+ * \param[out] result  Sorted collection of particles.
+ */
+edm4hep::ReconstructedParticleCollection
+sortByPt(const edm4hep::ReconstructedParticleCollection &inColl);
+
+/**
+ * \brief Build two particle resonances from an arbitrary list of input
+ *        reconstructed particles.
+ *        Select the one closest to the pre-defined mass.
+ */
+struct resonanceBuilder {
+  const float m_resonanceMass;
+  /**
+   * \brief Constructor.
+   *
+   * \param[in] resonanceMass  Desired value of the resonance mass.
+   */
+  explicit resonanceBuilder(float resonanceMass);
+  /**
+   * \brief Operator over input particle collection.
+   *
+   * \param[in] inColl  Input collection of the reconstructed particles.
+   * \param[out] result  Selected reconstructed particle resonance.
+   */
+  edm4hep::ReconstructedParticleCollection
+  operator()(const edm4hep::ReconstructedParticleCollection &inColl);
+};
+
+/**
+ * \brief Build the recoil from an arbitrary list of input resonances at the
+ *        specified center of mass energy.
+ */
+struct recoilBuilder {
+  const float m_sqrts;
+  /**
+   * \brief Constructor.
+   *
+   * \param[in] sqrts  Center of mass energy.
+   */
+  explicit recoilBuilder(float sqrts);
+  /**
+   * \brief Operator over input collection of resonances.
+   *
+   * \param[in] inColl  Input collection of resonances.
+   * \param[out] result  Resulting recoil.
+   */
+  edm4hep::ReconstructedParticleCollection
+  operator()(const edm4hep::ReconstructedParticleCollection &inColl);
+};
+
+} // namespace FCCAnalyses::PodioSource::ReconstructedParticle
+
+// namespace recoParticle = FCCAnalyses::PodioSource::ReconstructedParticle;
+
+#endif /* ANALYZERS_SOURCE_RECONSTRUCTED_PARTICLE_H */
diff --git a/analyzers/dataframe/FCCAnalyses/TrackSource.h b/analyzers/dataframe/FCCAnalyses/TrackSource.h
new file mode 100644
index 0000000000..93ffb580d0
--- /dev/null
+++ b/analyzers/dataframe/FCCAnalyses/TrackSource.h
@@ -0,0 +1,45 @@
+#ifndef ANALYZERS_SOURCE_TRACK_H
+#define ANALYZERS_SOURCE_TRACK_H
+
+// std
+#include <cstddef>
+
+// ROOT
+#include "ROOT/RVec.hxx"
+
+// EDM4hep
+#include "edm4hep/TrackCollection.h"
+#include "edm4hep/TrackMCParticleLinkCollection.h"
+
+namespace FCCAnalyses ::PodioSource ::Track {
+/**
+ * \brief Analyzer to select tracks associated with a MC particle of a
+ *        specified PDG ID.
+ *
+ * \param pdgID  Desired PDG ID of the MC particle.
+ * \param chargeConjugateAllowed  Whether to allow also charge conjugate
+ *        PDG ID. Default value false --- charge conjugate not allowed.
+ */
+struct selPDG {
+  explicit selPDG(const int pdgID, const bool chargeConjugateAllowed = false);
+  const int m_pdg;
+  const bool m_chargeConjugateAllowed;
+  edm4hep::TrackCollection
+  operator()(const edm4hep::TrackMCParticleLinkCollection &inLinkColl);
+};
+
+/**
+ * \brief Analyzer to get number of track states
+ *
+ */
+ROOT::VecOps::RVec<std::size_t>
+getNstates(const edm4hep::TrackCollection &inColl);
+
+/**
+ * \brief Analyzer to get track D0.
+ *
+ */
+ROOT::VecOps::RVec<float> getD0(const edm4hep::TrackCollection &inColl);
+} // namespace FCCAnalyses::PodioSource::Track
+
+#endif /* ANALYZERS_SOURCE_TRACK_H */
diff --git a/analyzers/dataframe/src/ReconstructedParticle.cc b/analyzers/dataframe/src/ReconstructedParticle.cc
index 65dd9cdd3a..fb01ebfa36 100644
--- a/analyzers/dataframe/src/ReconstructedParticle.cc
+++ b/analyzers/dataframe/src/ReconstructedParticle.cc
@@ -2,9 +2,12 @@
 
 // std
 #include <cstdlib>
-#include <iostream>
 #include <stdexcept>
 
+// ROOT
+#include <ROOT/RDataFrame.hxx>
+#include <ROOT/RLogger.hxx>
+
 // EDM4hep
 #include "edm4hep/EDM4hepVersion.h"
 
@@ -52,6 +55,7 @@ ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData> sel_absType::operator()(
       result.emplace_back(in[i]);
     }
   }
+
   return result;
 }
 
@@ -115,8 +119,6 @@ ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData>  sel_charge::operator() (
   return result;
 }
 
-
-
 resonanceBuilder::resonanceBuilder(float arg_resonance_mass) {m_resonance_mass = arg_resonance_mass;}
 ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData> resonanceBuilder::operator()(ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData> legs) {
   ROOT::VecOps::RVec<edm4hep::ReconstructedParticleData> result;
diff --git a/analyzers/dataframe/src/ReconstructedParticleSource.cc b/analyzers/dataframe/src/ReconstructedParticleSource.cc
new file mode 100644
index 0000000000..947b686bb8
--- /dev/null
+++ b/analyzers/dataframe/src/ReconstructedParticleSource.cc
@@ -0,0 +1,300 @@
+#include "FCCAnalyses/ReconstructedParticleSource.h"
+#include "FCCAnalyses/Logger.h"
+
+// std
+#include <cmath>
+#include <cstdlib>
+#include <stdexcept>
+#include <vector>
+
+// ROOT
+#include <ROOT/RDataFrame.hxx>
+#include <ROOT/RLogger.hxx>
+#include <TLorentzVector.h>
+
+// EDM4hep
+#include <edm4hep/utils/kinematics.h>
+
+namespace FCCAnalyses ::PodioSource ::ReconstructedParticle {
+selPDG::selPDG(const int pdg) : m_pdg(pdg){};
+
+edm4hep::ReconstructedParticleCollection
+selPDG::operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl) {
+  edm4hep::ReconstructedParticleCollection result;
+  result.setSubsetCollection();
+
+  rdfDebug << "Link collection size: " << inLinkColl.size();
+
+  for (const auto &link : inLinkColl) {
+    const auto &particle = link.getTo();
+    if (particle.getPDG() == m_pdg) {
+      result.push_back(link.getFrom());
+    }
+  }
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+selAbsPDG::selAbsPDG(const int pdg) : m_absPdg(pdg) {
+  if (m_absPdg < 0) {
+    throw std::invalid_argument("ReconstructedParticle::selAbsPDG: Provided "
+                                "PDG ID is negative!");
+  }
+};
+
+edm4hep::ReconstructedParticleCollection
+selAbsPDG::operator()(const edm4hep::RecoMCParticleLinkCollection &inLinkColl) {
+  edm4hep::ReconstructedParticleCollection result;
+  result.setSubsetCollection();
+
+  for (const auto &link : inLinkColl) {
+    const auto &particle = link.getTo();
+    if (std::abs(particle.getPDG()) == m_absPdg) {
+      result.push_back(link.getFrom());
+    }
+  }
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+selPt::selPt(float minPt) : m_minPt(minPt) {
+  if (m_minPt < 0) {
+    throw std::invalid_argument("ReconstructedParticle::selPt: Provided "
+                                "pT threshold is negative!");
+  }
+};
+
+edm4hep::ReconstructedParticleCollection
+selPt::operator()(const edm4hep::ReconstructedParticleCollection &inColl) {
+  edm4hep::ReconstructedParticleCollection result;
+  result.setSubsetCollection();
+
+  for (const auto &particle : inColl) {
+    if (edm4hep::utils::pt(particle) > m_minPt) {
+      result.push_back(particle);
+    }
+  }
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+selUpTo::selUpTo(const size_t size) : m_size(size){};
+
+edm4hep::ReconstructedParticleCollection
+selUpTo::operator()(const edm4hep::ReconstructedParticleCollection &inColl) {
+  edm4hep::ReconstructedParticleCollection result;
+  result.setSubsetCollection();
+
+  for (const auto &particle : inColl) {
+    if (result.size() >= m_size) {
+      break;
+    }
+    result.push_back(particle);
+  }
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+selGenStatus::selGenStatus(int status) : m_status(status){};
+
+edm4hep::ReconstructedParticleCollection selGenStatus::operator()(
+    const edm4hep::RecoMCParticleLinkCollection &inLinkColl) {
+  edm4hep::ReconstructedParticleCollection result;
+  result.setSubsetCollection();
+
+  for (const auto &link : inLinkColl) {
+    const auto &mcParticle = link.getTo();
+    const auto &recoParticle = link.getFrom();
+    if (mcParticle.getGeneratorStatus() == m_status) {
+      result.push_back(link.getFrom());
+    }
+  }
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+ROOT::VecOps::RVec<float>
+getP(const edm4hep::ReconstructedParticleCollection &inColl) {
+  ROOT::VecOps::RVec<float> result;
+
+  for (const auto &particle : inColl) {
+    auto lVec = edm4hep::utils::p4(particle, edm4hep::utils::UseMass);
+    result.push_back(static_cast<float>(lVec.P()));
+  }
+  return result;
+}
+
+// --------------------------------------------------------------------------
+ROOT::VecOps::RVec<float>
+getPt(const edm4hep::ReconstructedParticleCollection &inColl) {
+  ROOT::VecOps::RVec<float> result;
+  result.reserve(inColl.size());
+
+  std::transform(
+      inColl.begin(), inColl.end(), std::back_inserter(result),
+      [](edm4hep::ReconstructedParticle p) { return edm4hep::utils::pt(p); });
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+ROOT::VecOps::RVec<float>
+getY(const edm4hep::ReconstructedParticleCollection &inColl) {
+  ROOT::VecOps::RVec<float> result;
+
+  for (const auto &particle : inColl) {
+    auto lVec = edm4hep::utils::p4(particle, edm4hep::utils::UseMass);
+    result.push_back(static_cast<float>(lVec.Rapidity()));
+  }
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+ROOT::VecOps::RVec<float>
+getE(const edm4hep::ReconstructedParticleCollection &inColl) {
+  ROOT::VecOps::RVec<float> result;
+
+  std::transform(
+      inColl.begin(), inColl.end(), std::back_inserter(result),
+      [](edm4hep::ReconstructedParticle p) { return p.getEnergy(); });
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+ROOT::VecOps::RVec<float>
+getMass(const edm4hep::ReconstructedParticleCollection &inColl) {
+  ROOT::VecOps::RVec<float> result;
+
+  std::transform(inColl.begin(), inColl.end(), std::back_inserter(result),
+                 [](edm4hep::ReconstructedParticle p) { return p.getMass(); });
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+ROOT::VecOps::RVec<float>
+getCharge(const edm4hep::ReconstructedParticleCollection &inColl) {
+  ROOT::VecOps::RVec<float> result;
+
+  std::transform(
+      inColl.begin(), inColl.end(), std::back_inserter(result),
+      [](edm4hep::ReconstructedParticle p) { return p.getCharge(); });
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+edm4hep::ReconstructedParticleCollection
+sortByPt(const edm4hep::ReconstructedParticleCollection &inColl) {
+  edm4hep::ReconstructedParticleCollection outColl;
+  outColl.setSubsetCollection();
+
+  std::vector<edm4hep::ReconstructedParticle> rpVec;
+  rpVec.reserve(inColl.size());
+  for (const auto &particle : inColl) {
+    rpVec.emplace_back(particle);
+  }
+
+  std::sort(rpVec.begin(), rpVec.end(), [](const auto &lhs, const auto &rhs) {
+    return edm4hep::utils::pt(lhs) > edm4hep::utils::pt(rhs);
+  });
+
+  for (const auto &particle : rpVec) {
+    outColl.push_back(particle);
+  }
+
+  return outColl;
+}
+
+// --------------------------------------------------------------------------
+resonanceBuilder::resonanceBuilder(float resonanceMass)
+    : m_resonanceMass(resonanceMass) {
+  if (m_resonanceMass < 0) {
+    throw std::invalid_argument("ReconstructedParticle::resonanceBuilder: "
+                                "Provided resonance mass is negative!");
+  }
+}
+
+edm4hep::ReconstructedParticleCollection resonanceBuilder::operator()(
+    const edm4hep::ReconstructedParticleCollection &inColl) {
+  edm4hep::ReconstructedParticleCollection result;
+
+  if (inColl.size() < 2) {
+    return result;
+  }
+
+  // Convert collection into std::vector
+  std::vector<edm4hep::ReconstructedParticle> rpVec;
+  rpVec.reserve(inColl.size());
+  for (const auto &particle : inColl) {
+    rpVec.emplace_back(particle);
+  }
+
+  // Loop over all possible combinations
+  std::vector<edm4hep::ReconstructedParticle> resonanceVec;
+  for (auto p1 = rpVec.begin(); p1 != rpVec.end(); ++p1) {
+    for (auto p2 = p1 + 1; p2 != rpVec.end(); ++p2) {
+      edm4hep::MutableReconstructedParticle resonance;
+      resonance.setCharge(p1->getCharge() + p2->getCharge());
+
+      auto lVec1 = edm4hep::utils::p4(*p1, edm4hep::utils::UseMass);
+      auto lVec2 = edm4hep::utils::p4(*p2, edm4hep::utils::UseMass);
+
+      auto lVec = lVec1 + lVec2;
+      resonance.setMomentum({static_cast<float>(lVec.Px()),
+                             static_cast<float>(lVec.Py()),
+                             static_cast<float>(lVec.Pz())});
+      resonance.setMass(static_cast<float>(lVec.M()));
+      resonanceVec.emplace_back(resonance);
+    }
+  }
+
+  // Sort by the distance from desired mass value
+  auto resonanceSort = [&](edm4hep::ReconstructedParticle i,
+                           edm4hep::ReconstructedParticle j) {
+    return std::fabs(m_resonanceMass - i.getMass()) <
+           std::fabs(m_resonanceMass - j.getMass());
+  };
+  std::sort(resonanceVec.begin(), resonanceVec.end(), resonanceSort);
+  result.push_back(resonanceVec.front().clone());
+
+  return result;
+}
+
+// --------------------------------------------------------------------------
+recoilBuilder::recoilBuilder(float sqrts) : m_sqrts(sqrts) {
+  if (m_sqrts < 0) {
+    throw std::invalid_argument("ReconstructedParticle::recoilBuilder: "
+                                "Provided center-of-mass is negative!");
+  }
+}
+
+edm4hep::ReconstructedParticleCollection recoilBuilder::operator()(
+    const edm4hep::ReconstructedParticleCollection &inColl) {
+  edm4hep::ReconstructedParticleCollection result;
+
+  auto recoilVec = TLorentzVector(0, 0, 0, m_sqrts);
+  for (const auto &r : inColl) {
+    auto lVec = TLorentzVector(r.getMomentum().x, r.getMomentum().y,
+                               r.getMomentum().z, r.getMass());
+    recoilVec -= lVec;
+  }
+
+  edm4hep::MutableReconstructedParticle recoil;
+  recoil.setMomentum({static_cast<float>(recoilVec.Px()),
+                      static_cast<float>(recoilVec.Py()),
+                      static_cast<float>(recoilVec.Pz())});
+  recoil.setMass(static_cast<float>(recoilVec.M()));
+  result.push_back(recoil);
+
+  return result;
+};
+
+} // namespace FCCAnalyses::PodioSource::ReconstructedParticle
diff --git a/analyzers/dataframe/src/TrackSource.cc b/analyzers/dataframe/src/TrackSource.cc
new file mode 100644
index 0000000000..6ea774161c
--- /dev/null
+++ b/analyzers/dataframe/src/TrackSource.cc
@@ -0,0 +1,49 @@
+#include "FCCAnalyses/TrackSource.h"
+
+namespace FCCAnalyses ::PodioSource ::Track {
+selPDG::selPDG(const int pdg, const bool chargeConjugateAllowed)
+    : m_pdg(pdg), m_chargeConjugateAllowed(chargeConjugateAllowed){};
+
+edm4hep::TrackCollection
+selPDG::operator()(const edm4hep::TrackMCParticleLinkCollection &inLinkColl) {
+  edm4hep::TrackCollection result;
+  result.setSubsetCollection();
+
+  for (const auto &assoc : inLinkColl) {
+    const auto &particle = assoc.getTo();
+    if (m_chargeConjugateAllowed) {
+      if (std::abs(particle.getPDG()) == std::abs(m_pdg)) {
+        result.push_back(assoc.getFrom());
+      }
+    } else {
+      if (particle.getPDG() == m_pdg) {
+        result.push_back(assoc.getFrom());
+      }
+    }
+  }
+
+  return result;
+}
+
+ROOT::VecOps::RVec<std::size_t>
+getNstates(const edm4hep::TrackCollection &inColl) {
+  ROOT::VecOps::RVec<std::size_t> result;
+  for (const auto &track : inColl) {
+    result.push_back(track.getTrackStates().size());
+  }
+
+  return result;
+}
+
+ROOT::VecOps::RVec<float> getD0(const edm4hep::TrackCollection &inColl) {
+  ROOT::VecOps::RVec<std::size_t> result;
+  for (const auto &track : inColl) {
+    for (const auto &trackState : track.getTrackStates()) {
+      result.push_back(trackState.D0);
+      break;
+    }
+  }
+
+  return result;
+}
+} // namespace FCCAnalyses::PodioSource::Track
diff --git a/cmake/FCCAnalysesFunctions.cmake b/cmake/FCCAnalysesFunctions.cmake
index bba689564c..46d6c02223 100644
--- a/cmake/FCCAnalysesFunctions.cmake
+++ b/cmake/FCCAnalysesFunctions.cmake
@@ -47,7 +47,7 @@ endfunction()
 
 function(add_generic_test _testname _testcmd)
   add_test(NAME ${_testname}
-           COMMAND ${_testcmd}
+           COMMAND sh -c ${_testcmd}
            WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
   set_property(TEST ${_testname}
                APPEND PROPERTY ENVIRONMENT
diff --git a/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py b/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py
index fd1d9f6949..b9f8dc93f1 100644
--- a/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py
+++ b/examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py
@@ -21,31 +21,34 @@ def __init__(self, cmdline_args):
         # `cmdline_arg` dictionary.
         self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown'])
 
-        # Mandatory: List of processes to run over
+        # Mandatory: List of samples (processes) used in the analysis
         self.process_list = {
-            # # Run the full statistics in one output file named
-            # # <outputDir>/p8_ee_ZZ_ecm240.root
+            # Run over the full statistics and save it to one output file named
+            # <outputDir>/<process_name>.root
             'p8_ee_ZZ_ecm240': {'fraction': 1.},
-            # # Run 50% of the statistics with output into two files named
-            # # <outputDir>/p8_ee_WW_ecm240/chunk<N>.root
-            'p8_ee_WW_ecm240': {'fraction': 0.5, 'chunks': 2}, #this doesn't work? 
-            # # Run 20% of the statistics in one file named
-            # # <outputDir>/p8_ee_ZH_ecm240_out_f02.root (example on how to change
-            # # the output name)
+            # Run over 50% of the statistics and save output into two files
+            # named <outputDir>/p8_ee_WW_ecm240/chunk<N>.root
+            # Number of input files needs to be larger that number of chunks
+            'p8_ee_WW_ecm240': {'fraction': 0.5, 'chunks': 2},
+            # Run over 20% of the statistics and save output into one file named
+            # <outputDir>/p8_ee_ZH_ecm240_out_f02.root
             'p8_ee_ZH_ecm240': {'fraction': 0.2,
                                 'output': 'p8_ee_ZH_ecm240_out_f02'}
         }
 
         # Mandatory: Production tag when running over the centrally produced
-        # samples, this points to the yaml files for getting sample statistics
-        self.input_dir = '/eos/experiment/fcc/hh/tutorials/edm4hep_tutorial_data/'
+        # samples (this points to the yaml file for getting sample statistics)
         # self.prod_tag = 'FCCee/spring2021/IDEA/'
+        # or Input directory when not running over the centrally produced
+        # samples.
+        self.input_dir = '/eos/experiment/fcc/hh/tutorials/' \
+                         'edm4hep_tutorial_data/'
 
         # Optional: output directory, default is local running directory
         self.output_dir = 'outputs/FCCee/higgs/mH-recoil/mumu/' \
                           f'stage1_{self.ana_args.muon_pt}'
 
-        # Optional: analysisName, default is ''
+        # Optional: analysis name, default is ''
         # self.analysis_name = 'My Analysis'
 
         # Optional: number of threads to run on, default is 'all available'
@@ -55,10 +58,8 @@ def __init__(self, cmdline_args):
         # self.run_batch = False
 
         # Optional: test file
-        self.test_file = 'root://eospublic.cern.ch//eos/experiment/fcc/hh/' \
-                         'tutorials/edm4hep_tutorial_data/' \
-                         'p8_ee_ZH_ecm240.root'
-
+        self.test_file = 'https://fccsw.web.cern.ch/fccsw/testsamples/' \
+                         'edm4hep1/p8_ee_WW_ecm240_edm4hep.root'
 
     # Mandatory: analyzers function to define the analysis graph, please make
     # sure you return the dataframe, in this example it is dframe2
@@ -80,40 +81,41 @@ def analyzers(self, dframe):
             # select muons on pT
             .Define('selected_muons',
                     f'ReconstructedParticle::sel_pt({muon_pt})(muons)')
-            # create branch with muon transverse momentum
+            # create column with muon transverse momentum
             .Define('selected_muons_pt',
                     'ReconstructedParticle::get_pt(selected_muons)')
-            # create branch with muon rapidity
+            # create column with muon rapidity
             .Define('selected_muons_y',
                     'ReconstructedParticle::get_y(selected_muons)')
-            # create branch with muon total momentum
+            # create column with muon total momentum
             .Define('selected_muons_p',
                     'ReconstructedParticle::get_p(selected_muons)')
-            # create branch with muon energy
+            # create column with muon energy
             .Define('selected_muons_e',
                     'ReconstructedParticle::get_e(selected_muons)')
             # find zed candidates from  di-muon resonances
             .Define(
                 'zed_leptonic',
                 'ReconstructedParticle::resonanceBuilder(91)(selected_muons)')
-            # create branch with zed mass
+            # create column with zed mass
             .Define('zed_leptonic_m',
                     'ReconstructedParticle::get_mass(zed_leptonic)')
-            # create branch with zed transverse momenta
+            # create column with zed transverse momenta
             .Define('zed_leptonic_pt',
                     'ReconstructedParticle::get_pt(zed_leptonic)')
             # calculate recoil of zed_leptonic
             .Define('zed_leptonic_recoil',
                     'ReconstructedParticle::recoilBuilder(240)(zed_leptonic)')
-            # create branch with recoil mass
+            # create column with recoil mass
             .Define('zed_leptonic_recoil_m',
                     'ReconstructedParticle::get_mass(zed_leptonic_recoil)')
-            # create branch with leptonic charge
+            # create column with leptonic charge
             .Define('zed_leptonic_charge',
                     'ReconstructedParticle::get_charge(zed_leptonic)')
-            # Filter at least one candidate
-            .Filter('zed_leptonic_recoil_m.size()>0')
+            # Filter on at least one candidate
+            .Filter('zed_leptonic_recoil_m.size() > 0')
         )
+
         return dframe2
 
     # Mandatory: output function, please make sure you return the branch list
diff --git a/examples/FCChh/HH_bbtautau/analysis.cc b/examples/FCChh/HH_bbtautau/analysis.cc
index c03f5f46bc..e1fb1feb24 100644
--- a/examples/FCChh/HH_bbtautau/analysis.cc
+++ b/examples/FCChh/HH_bbtautau/analysis.cc
@@ -16,10 +16,7 @@
 #include "ReconstructedParticle2Track.h"
 #include "MCParticle.h"
 
-
 auto _m = edm4hep::ReconstructedParticleData();
-auto _pod  = podio::ObjectID();
-
 
 // Reproduce Heppy analysis
 int main(int argc, char* argv[]){
diff --git a/examples/data_source/analysis_stage1.py b/examples/data_source/analysis_stage1.py
new file mode 100644
index 0000000000..352fd94cf0
--- /dev/null
+++ b/examples/data_source/analysis_stage1.py
@@ -0,0 +1,134 @@
+'''
+Analysis example, measure Higgs mass in the Z(mumu)H recoil measurement.
+'''
+from argparse import ArgumentParser
+
+
+# Mandatory: Analysis class where the user defines the operations on the
+# dataframe.
+class Analysis():
+    '''
+    Higgs mass recoil analysis in Z(mumu)H.
+    '''
+    def __init__(self, cmdline_args):
+        parser = ArgumentParser(
+            description='Additional analysis arguments',
+            usage='Provide additional arguments after analysis script path')
+        parser.add_argument('--muon-pt', default='10.', type=float,
+                            help='Minimal pT of the mouns.')
+        # Parse additional arguments not known to the FCCAnalyses parsers
+        # All command line arguments know to fccanalysis are provided in the
+        # `cmdline_arg` dictionary.
+        self.ana_args, _ = parser.parse_known_args(cmdline_args['unknown'])
+
+        # Mandatory: List of samples (processes) used in the analysis
+        self.process_list = {
+            # Run over the full statistics and save it to one output file named
+            # <outputDir>/<process_name>.root
+            'p8_ee_ZZ_ecm240': {'fraction': 1.},
+            # Run over 50% of the statistics and save output into two files
+            # named <outputDir>/p8_ee_WW_ecm240/chunk<N>.root
+            # Number of input files needs to be larger that number of chunks
+            'p8_ee_WW_ecm240': {'fraction': 0.5, 'chunks': 2},
+            # Run over 20% of the statistics and save output into one file
+            # named <outputDir>/p8_ee_ZH_ecm240_out_f02.root
+            'p8_ee_ZH_ecm240': {'fraction': 0.2,
+                                'output': 'p8_ee_ZH_ecm240_out_f02'}
+        }
+
+        # Mandatory: Production tag when running over the centrally produced
+        # samples (this points to the yaml file for getting sample statistics)
+        # self.prod_tag = 'FCCee/spring2021/IDEA/'
+        # or Input directory when not running over the centrally produced
+        # samples.
+        self.input_dir = '/eos/experiment/fcc/hh/tutorials/' \
+                         'edm4hep_tutorial_data/'
+
+        # Optional: output directory, default is local running directory
+        self.output_dir = 'outputs/FCCee/higgs/mH-recoil/mumu/' \
+                          f'stage1_{self.ana_args.muon_pt}'
+
+        # Optional: analysis name, default is ''
+        # self.analysis_name = 'My Analysis'
+
+        # Optional: number of threads to run on, default is 'all available'
+        # self.n_threads = 4
+
+        # Optional: running on HTCondor, default is False
+        # self.run_batch = False
+
+        # Optional: test file
+        self.test_file = 'https://fccsw.web.cern.ch/fccsw/testsamples/' \
+                         'edm4hep1/p8_ee_WW_ecm240_edm4hep.root'
+
+        # Optional: read the input files with podio::DataSource
+        self.use_data_source = True
+
+    # Mandatory: analyzers function to define the analysis graph, please make
+    # sure you return the dataframe, in this example it is dframe2
+    def analyzers(self, dframe):
+        '''
+        Analysis graph.
+        '''
+
+        muon_pt = self.ana_args.muon_pt
+
+        dframe2 = (
+            dframe
+            # select muons on pT
+            .Define('selected_muons',
+                    f'ReconstructedParticle::selPt({muon_pt})(Muon)')
+            # create column with muon transverse momentum
+            .Define('selected_muons_pt',
+                    'ReconstructedParticle::getPt(selected_muons)')
+            # create column with muon rapidity
+            .Define('selected_muons_y',
+                    'ReconstructedParticle::getY(selected_muons)')
+            # create column with muon total momentum
+            .Define('selected_muons_p',
+                    'ReconstructedParticle::getP(selected_muons)')
+            # create column with muon energy
+            .Define('selected_muons_e',
+                    'ReconstructedParticle::getE(selected_muons)')
+            # find zed candidates from  di-muon resonances
+            .Define(
+                'zed_leptonic',
+                'ReconstructedParticle::resonanceBuilder(91)(selected_muons)')
+            # create column with zed mass
+            .Define('zed_leptonic_m',
+                    'ReconstructedParticle::getMass(zed_leptonic)')
+            # create column with zed transverse momenta
+            .Define('zed_leptonic_pt',
+                    'ReconstructedParticle::getPt(zed_leptonic)')
+            # calculate recoil of zed_leptonic
+            .Define('zed_leptonic_recoil',
+                    'ReconstructedParticle::recoilBuilder(240)(zed_leptonic)')
+            # create column with recoil mass
+            .Define('zed_leptonic_recoil_m',
+                    'ReconstructedParticle::getMass(zed_leptonic_recoil)')
+            # create column with leptonic charge
+            .Define('zed_leptonic_charge',
+                    'ReconstructedParticle::getCharge(zed_leptonic)')
+            # Filter on at least one candidate
+            .Filter('zed_leptonic_recoil_m.size() > 0')
+        )
+
+        return dframe2
+
+    # Mandatory: output function, please make sure you return the branch list
+    # as a python list
+    def output(self):
+        '''
+        Output variables which will be saved to output root file.
+        '''
+        branch_list = [
+            'selected_muons_pt',
+            'selected_muons_y',
+            'selected_muons_p',
+            'selected_muons_e',
+            'zed_leptonic_pt',
+            'zed_leptonic_m',
+            'zed_leptonic_charge',
+            'zed_leptonic_recoil_m'
+        ]
+        return branch_list
diff --git a/examples/data_source/histmaker_source.py b/examples/data_source/histmaker_source.py
new file mode 100644
index 0000000000..4ca111c251
--- /dev/null
+++ b/examples/data_source/histmaker_source.py
@@ -0,0 +1,49 @@
+# list of processes (mandatory)
+processList = {
+    'p8_ee_WW_ecm240': {
+        'output': 'p8_ee_WW_ecm240_out',
+        'testfile': 'https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/'
+                    'p8_ee_WW_ecm240_edm4hep.root'}
+}
+
+# Production tag when running over EDM4Hep centrally produced events, this
+# points to the yaml files for getting sample statistics (mandatory)
+prodTag = "FCCee/winter2023/IDEA/"
+
+# Link to the dictonary that contains all the cross section informations
+# etc... (mandatory)
+procDict = "FCCee_procDict_winter2023_IDEA.json"
+
+# Optional: output directory, default is local running directory
+outputDir = "."
+
+# Ncpus, default is 4, -1 uses all cores available
+nCPUS = -1
+
+# scale the histograms with the cross-section and integrated luminosity
+# define some binning for various histograms
+bins_pt = (20, 0, 200)
+
+# How to read input files
+useDataSource = True
+
+# build_graph function that contains the analysis logic, cuts and histograms
+# (mandatory)
+def build_graph(df, dataset):
+
+    results = []
+    df = df.Define("weight", "1.0")
+    weightsum = df.Sum("weight")
+
+    df = df.Define(
+        "electron_truth",
+        "ReconstructedParticle::selAbsPDG(11)(MCRecoAssociations)")
+
+    df = df.Define(
+        "electron_truth_pt",
+        "ReconstructedParticle::getPt(electron_truth)")
+
+    results.append(df.Histo1D(("h_electron_truth_pt", "", *bins_pt),
+                              "electron_truth_pt"))
+
+    return results, weightsum
diff --git a/examples/data_source/stages_source.py b/examples/data_source/stages_source.py
new file mode 100644
index 0000000000..e3400b2e92
--- /dev/null
+++ b/examples/data_source/stages_source.py
@@ -0,0 +1,61 @@
+'''
+Analysis example using PODIO ROOT DataSource for reading input files.
+'''
+
+
+class Analysis():
+    '''
+    Mandatory class, with three mandatory methods:
+      * __init__
+      * analyzers
+      * output
+    '''
+    def __init__(self, _):
+        # list of processes (mandatory)
+        self.process_list = {
+            'p8_ee_WW_ecm240': {'output': 'p8_ee_WW_ecm240_out'}
+        }
+
+        # Production tag when running over EDM4Hep centrally produced events,
+        # this points to the yaml files for getting sample statistics
+        # (mandatory)
+        self.prod_tag = 'FCCee/winter2023/IDEA/'
+
+        # Optional: output directory, default is local running directory
+        self.output_dir = "."
+
+        # Ncpus, default is 4, -1 uses all cores available
+        # self.n_threads = -1
+
+        # How to read input files
+        self.use_data_source = True
+
+        self.test_file = 'https://fccsw.web.cern.ch/fccsw/testsamples/' \
+                         'edm4hep1/p8_ee_WW_ecm240_edm4hep.root'
+
+    def analyzers(self, dframe):
+        '''
+        Analysis function to define the analyzers to process, please make sure
+        you return the last dataframe, in this example it is dframe2
+        '''
+        dframe2 = (
+            dframe
+            .Define(
+              "electron_truth",
+              "ReconstructedParticle::selPDG(11)(MCRecoAssociations)")
+
+            .Define(
+              "electron_truth_pt",
+              "ReconstructedParticle::getPt(electron_truth)")
+        )
+
+        return dframe2
+
+    def output(self) -> list[str]:
+        '''
+        List of columns to cave into output file.
+        '''
+        return [
+                # "electron_truth",
+                "electron_truth_pt"
+        ]
diff --git a/examples/data_source/standalone.py b/examples/data_source/standalone.py
new file mode 100644
index 0000000000..5c28a1541c
--- /dev/null
+++ b/examples/data_source/standalone.py
@@ -0,0 +1,50 @@
+'''
+Example of standalone FCCAnalyses script using podio::DataSource.
+'''
+
+import ROOT
+ROOT.gROOT.SetBatch(True)
+
+
+def main():
+    '''
+    Main entry point for the standalone analysis.
+    '''
+    input_list = ['https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/'
+                  'p8_ee_WW_ecm240_edm4hep.root']
+
+    ROOT.gSystem.Load('libFCCAnalyses')
+    if ROOT.dummyLoader:
+        print('----> DEBUG: Found FCCAnalyses library.')
+        ROOT.gInterpreter.Declare("using namespace FCCAnalyses::PodioSource;")
+    print('----> INFO: Loading analyzers from libFCCAnalyses... ',)
+
+    if ROOT.podio.DataSource:
+        print('----> DEBUG: Found Podio ROOT DataSource.')
+    print('----> INFO: Loading events through podio::DataSource...')
+
+    try:
+        dframe = ROOT.podio.CreateDataFrame(input_list)
+    except TypeError as excp:
+        print('----> ERROR: Unable to build dataframe!')
+        print(excp)
+
+    dframe2 = dframe.Define(
+        'electron_truth',
+        'ReconstructedParticle::selPDG(11)(MCRecoAssociations)')
+
+    dframe3 = dframe2.Define(
+        'electron_truth_pt',
+        'ReconstructedParticle::getPt(electron_truth)')
+
+    dframe4 = dframe3.Filter('electron_truth_pt.size() < 3')
+
+    count = dframe4.Count()
+    dframe4.Snapshot('events', 'output.root', ['electron_truth_pt'])
+
+    print('---------------------')
+    print('Number of events: ', count.GetValue())
+
+
+if __name__ == '__main__':
+    main()
diff --git a/man/man1/fccanalysis-build.1 b/man/man1/fccanalysis-build.1
index 31ff552b43..5247bee9e8 100644
--- a/man/man1/fccanalysis-build.1
+++ b/man/man1/fccanalysis-build.1
@@ -9,6 +9,7 @@
 [\fB\-c\fR]
 [\fB\-j\fR \fIBUILD_THREADS\fR]
 [\fB\-\-acts\-on\fR]
+[\fB\-\-no\-source\fR]
 .SH DESCRIPTION
 .B fccanalysis\-build
 will build the whole FCCAnalyses framework\&.
@@ -25,10 +26,13 @@ Prints short help message and exits\&.
 Completely clean build directory before compilation\&.
 .TP
 \fB\-j\fR \fIBUILD_THREADS\fR, \fB\-\-build\-threads\fR \fIBUILD_THREADS\fR
-Specify number of threads used when compiling (equivalent to `make -j`)
+Specify number of threads used when compiling (equivalent to `make -j`)\&.
 .TP
 \fB\-\-acts\-on\fR
 Enable also ACTS based analyzers to be build\&.
+.TP
+.B \-\-no\-source
+Do not build PODIO Data Source\&.
 .SH SEE ALSO
 fccanalysis(1), fccanalysis\-test(1), cmake(1)
 .SH BUGS
diff --git a/man/man1/fccanalysis-run.1 b/man/man1/fccanalysis-run.1
index 06f6f01bdb..3ef54a0873 100644
--- a/man/man1/fccanalysis-run.1
+++ b/man/man1/fccanalysis-run.1
@@ -9,6 +9,7 @@
 [\fB\-\-files\-list\fR \fIFILES_LIST\fR [\fIFILES_LIST\fR ...]]
 [\fB\-\-output\fR \fIOUTPUT\fR]
 [\fB\-\-nevents\fR \fINEVENTS\fR]
+[\fB\-\-use-data-source\fR]
 [\fB\-\-test\fR]
 [\fB\-\-bench\fR]
 [\fB\-\-ncpus\fR \fINCPUS\fR]
@@ -24,7 +25,7 @@ stages \fIfccanalysis-run\fR is used\&.
 When using \fBfccanalysis-run\fR the analysis is running in the managed mode,
 where the RDataFrame is steered by the framework and users can control some
 aspects of the running with additional global attributes, see
-\fIfccanalysis-script\fR(8).
+\fBfccanalysis-script\fR(7).
 .SH OPTIONS
 .TP
 .I analysis-script
@@ -46,6 +47,9 @@ Specify max number of events to process\&.
 .B \-\-test
 Run over the test file\&.
 .TP
+.B \-\-use-data-source
+Load events through podio::DataSource\&.
+.TP
 .B \-\-bench
 Output benchmark results to a JSON file\&.
 .TP
diff --git a/man/man7/fccanalysis-script.7 b/man/man7/fccanalysis-script.7
index 2f1b22c4d7..a4e6bfda00 100644
--- a/man/man7/fccanalysis-script.7
+++ b/man/man7/fccanalysis-script.7
@@ -13,13 +13,12 @@ part of or the full analysis. There are two basic modes how to run an
 analysis, one is to run in the managed mode like so:
 .IP
 fccanalysis run \fIanalysis_script.py\fR
-
 .RE
+
 or
 .IP
 fccanalysis final \fIanalysis_script.py\fR
 
-.RE
 .PP
 where user needs to provide minimal number of variables and settings. In this
 mode the RDataFrame is managed for the user and it can be controlled by defining
@@ -27,7 +26,6 @@ several global attributes in the analysis script. The other mode is to run the
 analysis script as a regular python script:
 .IP
 python \fIanalysis_script.py\fR
-.RE
 .PP
 here user has full control over the RDataFrame, but has to create all necessary
 scaffolding\&.
@@ -36,13 +34,13 @@ It is expected that the whole analysis will be split into several stages and
 it can be done in one of the two styles:
 .IP
 anascript_stage1.py \-> anascript_stage2.py \-> ... \-> anascript_stage_final.py \-> plots.py
-
 .RE
+
 or
 .IP
 analysis_histmaker.py \-> plots.py
 
-.RE
+.PP
 In the case of the first style there are at least three stages required
 (anascript_stage1.py, anascript_stage_final.py, plots.py) and there is no upper
 limit on the number of stages. In the case of the second style only two stages
@@ -171,6 +169,12 @@ paths with \fI.dot\fR and \fI.png\fR extensions are accepted.
 .br
 Default value: empty string
 .TP
+\fBuseDataSource\fR (optional)
+User can specify how to provide physical object collections to the analyzers\&.
+If \fITrue\fR the events will be loaded through the EDM4hep RDataSource\&.
+.br
+Default value: False
+.TP
 .B procDict
 This variable controls which process dictionary will be used. It can be either
 simple file name, absolute path or url. In the case of simple filename, the file
diff --git a/python/anascript.py b/python/anascript.py
index 6248993591..69344539fc 100644
--- a/python/anascript.py
+++ b/python/anascript.py
@@ -26,7 +26,7 @@ def get_element(rdf_module, element: str, is_final: bool = False):
 
         elif element == 'analysers':
             LOGGER.error('The function <%s> is mandatory in your analysis '
-                         'script!.\nAborting...', element)
+                         'script!\nAborting...', element)
             if is_final:
                 LOGGER.error('The function <%s> is not part of the final '
                              'stage of the analysis!', element)
diff --git a/python/build_analysis.py b/python/build_analysis.py
index ef2e4cbb3d..a01d462a1a 100644
--- a/python/build_analysis.py
+++ b/python/build_analysis.py
@@ -15,8 +15,8 @@
 
 def run_subprocess(command: str, run_dir: str) -> None:
     '''
-    Run subprocess in specified directory.
-    Check only the return value, otherwise keep the subprocess connected to
+    Run sub-process in specified directory.
+    Check only the return value, otherwise keep the sub-process connected to
     stdin/stout/stderr.
     '''
     try:
@@ -25,6 +25,7 @@ def run_subprocess(command: str, run_dir: str) -> None:
 
             if status != 0:
                 LOGGER.error('Error encountered!\nAborting...')
+                os.system('hash -d fccanalysis')
                 sys.exit(3)
 
     except KeyboardInterrupt:
@@ -46,7 +47,8 @@ def build_analysis(mainparser) -> None:
     local_dir = os.environ.get('LOCAL_DIR')
     build_path = pathlib.Path(local_dir + '/build')
     install_path = pathlib.Path(local_dir + '/install')
-    cmake_args = ['-DCMAKE_INSTALL_PREFIX=../install']
+    cmake_args: list[str] = ['-DCMAKE_INSTALL_PREFIX=../install',
+                             '-DCMAKE_EXPORT_COMPILE_COMMANDS=ON']
 
     LOGGER.info('Building analysis located in:\n%s', local_dir)
 
@@ -54,6 +56,9 @@ def build_analysis(mainparser) -> None:
         LOGGER.info('Building also ACTS based analyzers...')
         cmake_args += ['-DWITH_ACTS=ON']
 
+    if args.no_source:
+        cmake_args += ['-DWITH_PODIO_DATASOURCE=OFF']
+
     if args.clean_build:
         LOGGER.info('Clearing build and install directories...')
         if build_path.is_dir():
diff --git a/python/parsers.py b/python/parsers.py
index c2fc092eac..75db7975af 100644
--- a/python/parsers.py
+++ b/python/parsers.py
@@ -51,6 +51,12 @@ def setup_build_parser(parser):
                             action='store_true',
                             default=False,
                             help='enable ACTS based analyzers')
+    build_args.add_argument(
+        '--no-source',
+        action='store_true',
+        default=False,
+        help='do not build EDM4hep RDataSource'
+    )
 
 
 def setup_test_parser(parser):
@@ -123,6 +129,9 @@ def setup_run_parser(parser):
     parser.add_argument('--graph-path', type=str, default='',
                         help='analysis graph save path, should end with '
                         '\'.dot\' or \'.png\'')
+    parser.add_argument(
+        '--use-data-source', action='store_true', default=False,
+        help='use EDM4hep RDataSource to construct dataframe')
 
     # Internal argument, not to be used by the users
     parser.add_argument('--batch', action='store_true', default=False,
diff --git a/python/process.py b/python/process.py
index 2b74c5c771..b1512a8ce6 100644
--- a/python/process.py
+++ b/python/process.py
@@ -13,12 +13,14 @@
 
 ROOT.gROOT.SetBatch(True)
 
+
 LOGGER: logging.Logger = logging.getLogger('FCCAnalyses.process_info')
 
 
-def get_entries(inpath: str) -> int:
+def get_entries(inpath: str) -> int | None:
     '''
-    Get number of entries in the TTree named "events".
+    Retrieves number of entries in the "events" TTree from the provided ROOT
+    file.
     '''
     nevents = None
     with ROOT.TFile(inpath, 'READ') as infile:
@@ -27,6 +29,7 @@ def get_entries(inpath: str) -> int:
         except AttributeError:
             LOGGER.error('Input file is missing "events" TTree!\nAborting...')
             sys.exit(3)
+
     return nevents
 
 
diff --git a/python/run_analysis.py b/python/run_analysis.py
index 39412bc3e9..e4445421d3 100644
--- a/python/run_analysis.py
+++ b/python/run_analysis.py
@@ -14,6 +14,7 @@
 import numpy as np
 
 import ROOT  # type: ignore
+import cppyy
 from anascript import get_element, get_element_dict
 from process import get_process_info, get_process_dict
 from frame import generate_graph
@@ -254,7 +255,11 @@ def initialize(args, rdf_module, anapath: str):
     '''
 
     # for convenience and compatibility with user code
-    ROOT.gInterpreter.Declare("using namespace FCCAnalyses;")
+    if args.use_data_source:
+        ROOT.gInterpreter.Declare("using namespace FCCAnalyses::PodioSource;")
+    else:
+        ROOT.gInterpreter.Declare("using namespace FCCAnalyses;")
+
     geometry_file = get_element(rdf_module, "geometryFile")
     readout_name = get_element(rdf_module, "readoutName")
     if geometry_file != "" and readout_name != "":
@@ -331,6 +336,7 @@ def run_rdf(rdf_module,
         for bname in blist:
             branch_list.push_back(bname)
 
+        # Registering Count before Snapshot to avoid additional event loops
         evtcount_final = dframe3.Count()
 
         # Generate computational graph of the analysis
@@ -338,9 +344,9 @@ def run_rdf(rdf_module,
             generate_graph(dframe, args)
 
         dframe3.Snapshot("events", out_file, branch_list)
-    except Exception as excp:
-        LOGGER.error('During the execution of the analysis file exception '
-                     'occurred:\n%s', excp)
+    except cppyy.gbl.std.runtime_error as err:
+        LOGGER.error('%s\nDuring the execution of the analysis script an '
+                     'exception occurred!\nAborting...', err)
         sys.exit(3)
 
     return evtcount_init.GetValue(), evtcount_final.GetValue()
@@ -365,7 +371,7 @@ def send_to_batch(rdf_module, chunk_list, process, anapath: str):
                                 stderr=subprocess.DEVNULL
                                 )
     except subprocess.CalledProcessError:
-        LOGGER.error('The FCCanalyses libraries are not properly build and '
+        LOGGER.error('The FCCAnalyses libraries are not properly build and '
                      'installed!\nAborting job submission...')
         sys.exit(3)
 
@@ -472,8 +478,14 @@ def run_local(rdf_module, infile_list, args):
         filepath = apply_filepath_rewrites(filepath)
 
         file_list.push_back(filepath)
-        info_msg += f'- {filepath}\t\n'
-        infile = ROOT.TFile.Open(filepath, 'READ')
+        info_msg += f'\t- {filepath}\n'
+        try:
+            infile = ROOT.TFile.Open(filepath, 'READ')
+        except OSError as excp:
+            LOGGER.error('While opening input file:\n%s\nan error '
+                         'occurred:\n%s\nAborting...', filepath, excp)
+            sys.exit(3)
+
         try:
             nevents_orig += infile.Get('eventsProcessed').GetVal()
         except AttributeError:
@@ -514,7 +526,7 @@ def run_local(rdf_module, infile_list, args):
     start_time = time.time()
     inn, outn = run_rdf(rdf_module, file_list, outfile_path, args)
     elapsed_time = time.time() - start_time
-    
+
     # replace nevents_local by inn = the amount of processed events
 
     info_msg = f"{' SUMMARY ':=^80}\n"
@@ -690,6 +702,10 @@ def run_histmaker(args, rdf_module, anapath):
     Run the analysis using histmaker (all stages integrated into one).
     '''
 
+    # Check whether to use PODIO ROOT DataSource to load the events
+    if get_element(rdf_module, "useDataSource", False):
+        args.use_data_source = True
+
     # set ncpus, load header files, custom dicts, ...
     initialize(args, rdf_module, anapath)
 
@@ -717,29 +733,41 @@ def run_histmaker(args, rdf_module, anapath):
     evtcounts = []  # event count of the input file
     # number of events processed per process, in a potential previous step
     events_processed_dict = {}
-    for process in process_list:
-        file_list, event_list = get_process_info(
-            process,
-            get_element(rdf_module, "prodTag"),
-            get_element(rdf_module, "inputDir"))
-        if len(file_list) == 0:
-            LOGGER.error('No files to process!\nAborting...')
-            sys.exit(3)
-        fraction = 1
-        output = process
-        chunks = 1
-        try:
-            if get_element_dict(process_list[process], 'fraction') is not None:
-                fraction = get_element_dict(process_list[process], 'fraction')
-            if get_element_dict(process_list[process], 'output') is not None:
-                output = get_element_dict(process_list[process], 'output')
-            if get_element_dict(process_list[process], 'chunks') is not None:
-                chunks = get_element_dict(process_list[process], 'chunks')
-        except TypeError:
-            LOGGER.warning('No values set for process %s will use default '
-                           'values!', process)
-        if fraction < 1:
-            file_list = get_subfile_list(file_list, event_list, fraction)
+    for process_name, process_dict in process_list.items():
+        if args.test:
+            try:
+                if get_element_dict(process_dict, 'testfile') is not None:
+                    file_list = [get_element_dict(process_dict, 'testfile')]
+            except TypeError:
+                LOGGER.warning('No test file for process %s found!\n'
+                               'Aborting...')
+                sys.exit(3)
+            fraction = 1
+            output = process_name
+            chunks = 1
+        else:
+            file_list, event_list = get_process_info(
+                process_name,
+                get_element(rdf_module, "prodTag"),
+                get_element(rdf_module, "inputDir"))
+            if len(file_list) == 0:
+                LOGGER.error('No files to process!\nAborting...')
+                sys.exit(3)
+            fraction = 1
+            output = process_name
+            chunks = 1
+            try:
+                if get_element_dict(process_dict, 'fraction') is not None:
+                    fraction = get_element_dict(process_dict, 'fraction')
+                if get_element_dict(process_dict, 'output') is not None:
+                    output = get_element_dict(process_dict, 'output')
+                if get_element_dict(process_dict, 'chunks') is not None:
+                    chunks = get_element_dict(process_dict, 'chunks')
+            except TypeError:
+                LOGGER.warning('No values set for process %s will use default '
+                               'values!', process_name)
+            if fraction < 1:
+                file_list = get_subfile_list(file_list, event_list, fraction)
 
         # get the number of events processed, in a potential previous step
         file_list_root = ROOT.vector('string')()
@@ -747,7 +775,8 @@ def run_histmaker(args, rdf_module, anapath):
         # stage)
         nevents_meta = 0
         for file_name in file_list:
-            file_name = apply_filepath_rewrites(file_name)
+            if not args.use_data_source:
+                file_name = apply_filepath_rewrites(file_name)
             file_list_root.push_back(file_name)
             # Skip check for processed events in case of first stage
             if get_element(rdf_module, "prodTag") is None:
@@ -759,17 +788,39 @@ def run_histmaker(args, rdf_module, anapath):
                 infile.Close()
             if args.test:
                 break
-        events_processed_dict[process] = nevents_meta
-        info_msg = f'Add process "{process}" with:'
+        events_processed_dict[process_name] = nevents_meta
+        info_msg = f'Add process "{process_name}" with:'
         info_msg += f'\n\tfraction = {fraction}'
         info_msg += f'\n\tnFiles = {len(file_list_root):,}'
         info_msg += f'\n\toutput = {output}\n\tchunks = {chunks}'
         LOGGER.info(info_msg)
 
-        dframe = ROOT.ROOT.RDataFrame("events", file_list_root)
+        if args.use_data_source:
+            if ROOT.podio.DataSource:
+                LOGGER.debug('Found Podio ROOT DataSource.')
+            else:
+                LOGGER.error('Podio ROOT DataSource library not found!'
+                             '\nAborting...')
+                sys.exit(3)
+            LOGGER.info('Loading events through podio::DataSource...')
+
+            try:
+                dframe = ROOT.podio.CreateDataFrame(file_list_root)
+            except TypeError as excp:
+                LOGGER.error('Unable to build dataframe using '
+                             'podio::DataSource!\n%s', excp)
+                sys.exit(3)
+        else:
+            dframe = ROOT.ROOT.RDataFrame("events", file_list_root)
         evtcount = dframe.Count()
 
-        res, hweight = graph_function(dframe, process)
+        try:
+            res, hweight = graph_function(dframe, process_name)
+        except cppyy.gbl.std.runtime_error:
+            LOGGER.error('During loading of the analysis an error occurred!'
+                         '\nAborting...')
+            sys.exit(3)
+
         results.append(res)
         hweights.append(hweight)
         evtcounts.append(evtcount)
@@ -886,6 +937,9 @@ def run(parser):
         LOGGER.error('Unknow sub-command "%s"!\nAborting...')
         sys.exit(3)
 
+    # Work with absolute path of the analysis script
+    anapath = os.path.abspath(args.anascript_path)
+
     # Check that the analysis file exists
     anapath = args.anascript_path
     if not os.path.isfile(anapath):
@@ -926,14 +980,19 @@ def run(parser):
 
     # Load the analysis script as a module
     anapath = os.path.abspath(anapath)
-    LOGGER.info('Loading analysis file:\n%s', anapath)
-    rdf_spec = importlib.util.spec_from_file_location("fcc_analysis_module",
-                                                      anapath)
-    rdf_module = importlib.util.module_from_spec(rdf_spec)
-    rdf_spec.loader.exec_module(rdf_module)
+    LOGGER.info('Loading analysis script:\n%s', anapath)
+    try:
+        rdf_spec = importlib.util.spec_from_file_location('rdfanalysis',
+                                                          anapath)
+        rdf_module = importlib.util.module_from_spec(rdf_spec)
+        rdf_spec.loader.exec_module(rdf_module)
+    except SyntaxError as err:
+        LOGGER.error('Syntax error encountered in the analysis script:\n%s',
+                     err)
+        sys.exit(3)
 
     # Merge configuration from analysis script file with command line arguments
-    if get_element(rdf_module, 'graph'):
+    if get_element(rdf_module, 'graph', False):
         args.graph = True
 
     if get_element(rdf_module, 'graphPath') != '':
diff --git a/python/run_fccanalysis.py b/python/run_fccanalysis.py
index e6f4f86c0c..5946f12d1c 100644
--- a/python/run_fccanalysis.py
+++ b/python/run_fccanalysis.py
@@ -253,13 +253,33 @@ def submit_job(cmd: str, max_trials: int) -> bool:
 
 
 # _____________________________________________________________________________
-def initialize(args, analysis):
+def merge_config(args: object, analysis: object) -> dict[str, any]:
+    '''
+    Merge configuration from command line arguments and analysis class.
+    '''
+    config: dict[str, any] = {}
+
+    # Check whether to use PODIO DataSource to load the events
+    config['use_data_source'] = False
+    if args.use_data_source:
+        config['use_data_source'] = True
+    if get_attribute(analysis, 'use_data_source', False):
+        config['use_data_source'] = True
+
+    return config
+
+
+# _____________________________________________________________________________
+def initialize(config, args, analysis):
     '''
     Common initialization steps.
     '''
 
-    # for convenience and compatibility with user code
-    ROOT.gInterpreter.Declare("using namespace FCCAnalyses;")
+    # For convenience and compatibility with user code
+    if config['use_data_source']:
+        ROOT.gInterpreter.Declare("using namespace FCCAnalyses::PodioSource;")
+    else:
+        ROOT.gInterpreter.Declare("using namespace FCCAnalyses;")
 
     # Load geometry, needed for the CaloNtupleizer analyzers
     geometry_file = get_attribute(analysis, 'geometry_path', None)
@@ -293,7 +313,7 @@ def initialize(args, analysis):
     # custom header files
     include_paths = get_attribute(analysis, 'include_paths', None)
     if include_paths is not None:
-        ROOT.gInterpreter.ProcessLine(".O3")
+        ROOT.gInterpreter.ProcessLine(".O2")
         basepath = os.path.dirname(os.path.abspath(args.anascript_path)) + "/"
         for path in include_paths:
             LOGGER.info('Loading %s...', path)
@@ -301,7 +321,8 @@ def initialize(args, analysis):
 
 
 # _____________________________________________________________________________
-def run_rdf(args,
+def run_rdf(config: dict[str, any],
+            args,
             analysis,
             input_list: list[str],
             out_file: str) -> int:
@@ -309,7 +330,22 @@ def run_rdf(args,
     Run the analysis ROOTDataFrame and snapshot it.
     '''
     # Create initial dataframe
-    dframe = ROOT.RDataFrame("events", input_list)
+    if config['use_data_source']:
+        if ROOT.podio.DataSource:
+            LOGGER.debug('Found podio::DataSource.')
+        else:
+            LOGGER.error('podio::DataSource library not found!\nAborting...')
+            sys.exit(3)
+        LOGGER.info('Loading events through podio::DataSource...')
+
+        try:
+            dframe = ROOT.podio.CreateDataFrame(input_list)
+        except TypeError as excp:
+            LOGGER.error('Unable to build dataframe using '
+                         'podio::DataSource!\n%s', excp)
+            sys.exit(3)
+    else:
+        dframe = ROOT.RDataFrame("events", input_list)
 
     # Limit number of events processed
     if args.nevents > 0:
@@ -455,7 +491,10 @@ def apply_filepath_rewrites(filepath: str) -> str:
 
 
 # _____________________________________________________________________________
-def run_local(args, analysis, infile_list):
+def run_local(config: dict[str, any],
+              args: object,
+              analysis: object,
+              infile_list):
     '''
     Run analysis locally.
     '''
@@ -469,7 +508,8 @@ def run_local(args, analysis, infile_list):
     nevents_local = 0
     for filepath in infile_list:
 
-        filepath = apply_filepath_rewrites(filepath)
+        if not config['use_data_source']:
+            filepath = apply_filepath_rewrites(filepath)
 
         file_list.push_back(filepath)
         info_msg += f'- {filepath}\t\n'
@@ -512,7 +552,7 @@ def run_local(args, analysis, infile_list):
 
     # Run RDF
     start_time = time.time()
-    inn, outn = run_rdf(args, analysis, file_list, outfile_path)
+    inn, outn = run_rdf(config, args, analysis, file_list, outfile_path)
     elapsed_time = time.time() - start_time
 
     # replace nevents_local by inn = the amount of processed events
@@ -577,8 +617,11 @@ def run_fccanalysis(args, analysis_module):
     analysis_args = vars(args)
     analysis = analysis_module.Analysis(analysis_args)
 
+    # Merge configuration from command line arguments and analysis class
+    config: dict[str, any] = merge_config(args, analysis)
+
     # Set number of threads, load header files, custom dicts, ...
-    initialize(args, analysis_module)
+    initialize(config, args, analysis_module)
 
     # Check if output directory exist and if not create it
     output_dir = get_attribute(analysis, 'output_dir', None)
@@ -598,7 +641,7 @@ def run_fccanalysis(args, analysis_module):
         directory, _ = os.path.split(args.output)
         if directory:
             os.system(f'mkdir -p {directory}')
-        run_local(args, analysis, [testfile_path])
+        run_local(config, args, analysis, [testfile_path])
         sys.exit(0)
 
     # Check if files are specified, and if so run the analysis on it/them (this
@@ -608,7 +651,7 @@ def run_fccanalysis(args, analysis_module):
         directory, _ = os.path.split(args.output)
         if directory:
             os.system(f'mkdir -p {directory}')
-        run_local(args, analysis, args.files_list)
+        run_local(config, args, analysis, args.files_list)
         sys.exit(0)
 
     # Check if batch mode is available
@@ -693,11 +736,11 @@ def run_fccanalysis(args, analysis_module):
             LOGGER.info('Running locally...')
             if len(chunk_list) == 1:
                 args.output = f'{output_stem}.root'
-                run_local(args, analysis, chunk_list[0])
+                run_local(config, args, analysis, chunk_list[0])
             else:
                 for index, chunk in enumerate(chunk_list):
                     args.output = f'{output_stem}/chunk{index}.root'
-                    run_local(args, analysis, chunk)
+                    run_local(config, args, analysis, chunk)
 
     if len(process_list) == 0:
         LOGGER.warning('No files processed (process_list not found)!\n'
diff --git a/setup.sh b/setup.sh
index 5f680924be..2326229229 100644
--- a/setup.sh
+++ b/setup.sh
@@ -26,6 +26,7 @@ if [ "${0}" != "${BASH_SOURCE}" ]; then
   export PYTHONPATH=${LOCAL_DIR}/install/share/examples:${PYTHONPATH}
   export PATH=${LOCAL_DIR}/bin:${PATH}
   export PATH=${LOCAL_DIR}/install/bin:${PATH}
+  export LD_LIBRARY_PATH=${LOCAL_DIR}/install/lib64:${LD_LIBRARY_PATH}
   export LD_LIBRARY_PATH=${LOCAL_DIR}/install/lib:${LD_LIBRARY_PATH}
   export CMAKE_PREFIX_PATH=${LOCAL_DIR}/install:${CMAKE_PREFIX_PATH}
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 45361f115a..9f93d9f0fb 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -2,6 +2,8 @@ add_subdirectory(unittest)
 
 add_subdirectory(benchmark)
 
+add_subdirectory(integration)
+
 add_integration_test("examples/FCCee/higgs/mH-recoil/mumu/analysis_stage1.py")
 add_integration_test("examples/FCCee/flavour/Bc2TauNu/analysis_B2TauNu_truth.py")
 add_integration_test("examples/FCCee/test/jet_constituents.py")
@@ -9,5 +11,12 @@ add_integration_test("examples/FCCee/vertex_lcfiplus/analysis_V0.py")
 
 add_standalone_test("examples/FCCee/fullSim/caloNtupleizer/analysis.py")
 
+if(WITH_PODIO_DATASOURCE)
+  add_standalone_test("examples/data_source/standalone.py")
+  add_integration_test("examples/data_source/histmaker_source.py")
+  add_integration_test("examples/data_source/stages_source.py")
+  add_integration_test("examples/data_source/analysis_stage1.py")
+endif()
+
 # TODO: make this test run in the spack build environment
 #add_generic_test(build_new_case_study "tests/build_new_case_study.sh")
diff --git a/tests/integration/CMakeLists.txt b/tests/integration/CMakeLists.txt
new file mode 100644
index 0000000000..42884f7ace
--- /dev/null
+++ b/tests/integration/CMakeLists.txt
@@ -0,0 +1,59 @@
+# Low level EDM4hep access
+add_executable(low_level_edm4hep
+               ${CMAKE_SOURCE_DIR}/tests/integration/low_level_edm4hep.cxx
+)
+target_link_libraries(low_level_edm4hep ROOT::ROOTDataFrame
+                                        podio::podioDataSource
+                                        EDM4HEP::edm4hep
+                                        EDM4HEP::edm4hepDict
+)
+add_generic_test(
+  low_level_edm4hep
+  "${CMAKE_CURRENT_BINARY_DIR}/low_level_edm4hep 1"
+  "https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/p8_ee_WW_ecm240_edm4hep.root"
+)
+
+add_executable(
+  low_level_associations
+  ${CMAKE_SOURCE_DIR}/tests/integration/low_level_associations.cxx)
+target_link_libraries(low_level_associations ROOT::ROOTDataFrame
+                                             podio::podioDataSource
+                                             EDM4HEP::edm4hep
+                                             EDM4HEP::edm4hepDict
+                                             FCCAnalyses
+)
+add_generic_test(
+  low_level_associations
+  "${CMAKE_CURRENT_BINARY_DIR}/low_level_associations 1"
+  "https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/p8_ee_WW_ecm240_edm4hep.root"
+)
+
+if(WITH_PODIO_DATASOURCE)
+  add_executable(data_source
+                 ${CMAKE_SOURCE_DIR}/tests/integration/data_source.cxx)
+  target_include_directories(data_source PRIVATE ${CMAKE_SOURCE_DIR}/include)
+  target_link_libraries(data_source podio::podioDataSource
+                                    EDM4HEP::edm4hep
+  )
+  add_generic_test(
+    data_source
+    "${CMAKE_CURRENT_BINARY_DIR}/data_source 1"
+    "https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/p8_ee_WW_ecm240_edm4hep.root"
+  )
+
+  add_executable(
+    data_source_associations
+    ${CMAKE_SOURCE_DIR}/tests/integration/data_source_associations.cxx
+  )
+  target_include_directories(data_source_associations PRIVATE
+                             ${CMAKE_SOURCE_DIR}/include)
+  target_link_libraries(data_source_associations podio::podioDataSource
+                                                 EDM4HEP::edm4hep
+                                                 FCCAnalyses
+  )
+  add_generic_test(
+    data_source_associations
+    "${CMAKE_CURRENT_BINARY_DIR}/data_source_associations 1"
+    "https://fccsw.web.cern.ch/fccsw/testsamples/edm4hep1/p8_ee_WW_ecm240_edm4hep.root"
+  )
+endif()
diff --git a/tests/integration/data_source.cxx b/tests/integration/data_source.cxx
new file mode 100644
index 0000000000..b977580792
--- /dev/null
+++ b/tests/integration/data_source.cxx
@@ -0,0 +1,148 @@
+// ROOT
+#include <ROOT/RDataFrame.hxx>
+#include <ROOT/RLogger.hxx>
+#include <ROOT/RVec.hxx>
+#include <TCanvas.h>
+
+// PODIO
+#include <podio/DataSource.h>
+
+// EDM4hep
+#include <edm4hep/MCParticle.h>
+#include <edm4hep/MCParticleCollection.h>
+#include <edm4hep/RecoMCParticleLinkCollection.h>
+#include <edm4hep/ReconstructedParticle.h>
+#include <edm4hep/SimCalorimeterHitCollection.h>
+
+edm4hep::MCParticleCollection
+selElectrons(const edm4hep::MCParticleCollection &inParticles) {
+  edm4hep::MCParticleCollection electrons;
+  electrons.setSubsetCollection();
+  for (auto particle : inParticles) {
+    if (particle.getPDG() == 11) {
+      auto electron = edm4hep::MCParticle(particle);
+      electrons.push_back(electron);
+    }
+  }
+
+  return electrons;
+}
+
+struct selPDG {
+  selPDG(int pdg, bool chargeConjugateAllowed);
+  const int m_pdg;
+  const bool m_chargeConjugateAllowed;
+  edm4hep::MCParticleCollection
+  operator()(const edm4hep::MCParticleCollection &inParticles);
+};
+
+selPDG::selPDG(int pdg, bool chargeConjugateAllowed)
+    : m_pdg(pdg), m_chargeConjugateAllowed(chargeConjugateAllowed){};
+
+edm4hep::MCParticleCollection
+selPDG::operator()(const edm4hep::MCParticleCollection &inParticles) {
+  edm4hep::MCParticleCollection result;
+  result.setSubsetCollection();
+  for (auto particle : inParticles) {
+    if (m_chargeConjugateAllowed) {
+      if (std::abs(particle.getPDG()) == std::abs(m_pdg)) {
+        result.push_back(particle);
+      }
+    } else {
+      if (particle.getPDG() == m_pdg) {
+        result.push_back(particle);
+      }
+    }
+  }
+
+  return result;
+}
+
+ROOT::VecOps::RVec<float>
+getPx(const edm4hep::MCParticleCollection &inParticles) {
+  ROOT::VecOps::RVec<float> result;
+  for (auto particle : inParticles) {
+    result.push_back(particle.getMomentum().x);
+  }
+
+  return result;
+}
+
+edm4hep::MCParticleCollection
+get_stable_particles_from_decay(edm4hep::MCParticle in) {
+  edm4hep::MCParticleCollection result;
+  result.setSubsetCollection();
+
+  auto daughters = in.getDaughters();
+  if (daughters.size() != 0) { // particle is unstable
+    for (const auto &daughter : daughters) {
+      auto stable_daughters = get_stable_particles_from_decay(daughter);
+      for (const auto &stable_daughter : stable_daughters) {
+        result.push_back(stable_daughter);
+      }
+    }
+  } else {
+    result.push_back(in);
+  }
+
+  return result;
+}
+
+edm4hep::MCParticle
+get_mcParticle(const edm4hep::ReconstructedParticle &recoParticle,
+               const edm4hep::RecoMCParticleLinkCollection &assocColl) {
+  edm4hep::MCParticle no_result;
+
+  for (const auto &assoc : assocColl) {
+    if (assoc.getFrom() == recoParticle) {
+      return assoc.getTo();
+    }
+  }
+
+  return no_result;
+}
+
+int main(int argc, const char *argv[]) {
+  auto verbosity = ROOT::Experimental::RLogScopedVerbosity(
+      ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo);
+
+  int nThreads = 1;
+  if (argc > 1) {
+    nThreads = atoi(argv[1]);
+  }
+
+  if (nThreads > 1) {
+    ROOT::EnableImplicitMT(nThreads);
+  }
+
+  std::string filePath = "https://fccsw.web.cern.ch/fccsw/testsamples/"
+                         "edm4hep1/p8_ee_WW_ecm240_edm4hep.root";
+  if (argc > 2) {
+    filePath = argv[2];
+  }
+
+  ROOT::RDataFrame rdf(std::make_unique<podio::DataSource>(filePath));
+
+  // rdf.Describe().Print();
+  // std::cout << std::endl;
+
+  std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl;
+
+  auto rdf2 = rdf.Define("particles_px", getPx, {"Particle"});
+  // auto rdf3 = rdf2.Define("electrons", selElectrons, {"MCParticles"});
+  auto rdf3 = rdf2.Define("electrons", selPDG(11, false), {"Particle"});
+  auto rdf4 = rdf3.Define("electrons_px", getPx, {"electrons"});
+  auto h_particles_px = rdf4.Histo1D("particles_px");
+  auto h_electrons_px = rdf4.Histo1D("electrons_px");
+
+  h_particles_px->Print();
+  h_electrons_px->Print();
+
+  auto canvas = std::make_unique<TCanvas>("canvas", "Canvas", 450, 450);
+  h_particles_px->Draw();
+  canvas->Print("/tmp/source_particles_px.pdf");
+  h_electrons_px->Draw();
+  canvas->Print("/tmp/source_electrons_px.pdf");
+
+  return EXIT_SUCCESS;
+}
diff --git a/tests/integration/data_source_associations.cxx b/tests/integration/data_source_associations.cxx
new file mode 100644
index 0000000000..fec2649ed3
--- /dev/null
+++ b/tests/integration/data_source_associations.cxx
@@ -0,0 +1,269 @@
+// std
+#include <cstddef>
+#include <iostream>
+#include <ostream>
+
+// ROOT
+#include <ROOT/RDataFrame.hxx>
+#include <ROOT/RLogger.hxx>
+#include <ROOT/RVec.hxx>
+#include <TCanvas.h>
+#include <TMatrixDSym.h>
+#include <TRandom.h>
+#include <TVector3.h>
+#include <TVectorD.h>
+
+// PODIO
+#include <podio/DataSource.h>
+
+// EDM4hep
+#include "edm4hep/RecoMCParticleLinkCollection.h"
+#include "edm4hep/ReconstructedParticleCollection.h"
+#include "edm4hep/TrackCollection.h"
+#include "edm4hep/TrackState.h"
+#include <edm4hep/MCParticle.h>
+#include <edm4hep/MCParticleCollection.h>
+#include <edm4hep/SimCalorimeterHitCollection.h>
+
+// FCCAnalyses
+#include "FCCAnalyses/SmearObjects.h"
+#include "FCCAnalyses/VertexingUtils.h"
+
+/// for a given MC particle, returns a "track state", i.e. a vector of 5 helix
+/// parameters, in Delphes convention
+TVectorD TrackParamFromMC_DelphesConv(edm4hep::MCParticle aMCParticle) {
+
+  TVector3 p(aMCParticle.getMomentum().x, aMCParticle.getMomentum().y,
+             aMCParticle.getMomentum().z);
+  TVector3 x(1e-3 * aMCParticle.getVertex().x, 1e-3 * aMCParticle.getVertex().y,
+             1e-3 * aMCParticle.getVertex().z); // mm to m
+  float Q = aMCParticle.getCharge();
+  TVectorD param =
+      FCCAnalyses::VertexingUtils::XPtoPar(x, p, Q); // convention Franco
+
+  return param;
+}
+
+TMatrixDSym get_trackCov(const edm4hep::TrackState &atrack, bool Units_mm) {
+  auto covMatrix = atrack.covMatrix;
+
+  TMatrixDSym covM =
+      FCCAnalyses::VertexingUtils::Edm4hep2Delphes_TrackCovMatrix(covMatrix,
+                                                                  Units_mm);
+
+  return covM;
+}
+
+/**
+ * \brief Generates new track states, by rescaling the covariance matrix of the
+ *        tracks.
+ */
+struct SmearedTracks {
+  bool m_debug;
+  TRandom m_random;
+  float m_smear_parameters[5];
+  SmearedTracks(float smear_d0, float smear_phi, float smear_omega,
+                float smear_z0, float smear_tlambda, bool debug);
+  ROOT::VecOps::RVec<edm4hep::TrackState>
+  operator()(const edm4hep::RecoMCParticleLinkCollection &mcRecoLink);
+};
+
+SmearedTracks::SmearedTracks(float smear_d0, float smear_phi, float smear_omega,
+                             float smear_z0, float smear_tlambda,
+                             bool debug = false) {
+
+  m_smear_parameters[0] = smear_d0;
+  m_smear_parameters[1] = smear_phi;
+  m_smear_parameters[2] = smear_omega;
+  m_smear_parameters[3] = smear_z0;
+  m_smear_parameters[4] = smear_tlambda;
+
+  m_debug = debug;
+}
+
+ROOT::VecOps::RVec<edm4hep::TrackState> SmearedTracks::operator()(
+    const edm4hep::RecoMCParticleLinkCollection &mcRecoLink) {
+
+  // returns a vector of TrackStates that is parallel to the collection of full
+  // tracks (alltracks), i.e. same number of entries, same order. The method
+  // retrieve the MC particle that is associated to a track, and builds a "track
+  // state" out of the MC particle (i.e. it determines the corresponding (d0,
+  // phi, omega, z0, tanlambda). From this vector, and from the covariance
+  // matrix of the track, which is scaled by the user, new track states are
+  // generated.
+
+  ROOT::VecOps::RVec<edm4hep::TrackState> result;
+
+  TVectorD zero(5);
+  for (int k = 0; k < 5; k++) {
+    zero(k) = 0.;
+  }
+
+  for (const auto &assoc : mcRecoLink) {
+    const auto &recoParticle = assoc.getFrom();
+    const auto &mcParticle = assoc.getTo();
+
+    if (recoParticle.getTracks().size() < 1) {
+      continue;
+    }
+
+    if (recoParticle.getTracks().size() > 1) {
+      std::cout << "More than one track for one reco particle encountered!"
+                << std::endl;
+    }
+
+    const edm4hep::Track &track = recoParticle.getTracks().at(0);
+
+    // the MC-truth track parameters, in Delphes's comvention
+    TVectorD mcTrackParam = TrackParamFromMC_DelphesConv(mcParticle);
+    // and in edm4hep convention
+    TVectorD mcTrackParam_edm4hep =
+        FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackParam(mcTrackParam,
+                                                                false);
+
+    if (track.getTrackStates().size() < 1) {
+      continue;
+    }
+
+    if (track.getTrackStates().size() > 1) {
+      std::cout << "More than one track state for one track encountered!"
+                << std::endl;
+    }
+
+    const edm4hep::TrackState &trackState = track.getTrackStates().at(0);
+
+    // the covariance matrix of the track, in Delphes's convention
+    TMatrixDSym Cov = get_trackCov(trackState, false);
+
+    // if the covMat of the track is pathological (numerical precision issue,
+    // fraction of tracks = 5e-6): return original track
+    if (Cov.Determinant() <= 0) {
+      result.emplace_back(trackState);
+      continue;
+    }
+
+    // scale the covariance matrix
+    for (int j = 0; j < 5; j++) {
+      for (int k = 0; k < 5; k++) {
+        Cov[j][k] = Cov[j][k] * (m_smear_parameters[j] * m_smear_parameters[k]);
+      }
+    }
+    // if (m_debug) {
+    // Cov.Print();
+    //}
+
+    // generate a new track state (in Delphes's convention)
+    TVectorD smeared_param_delphes = FCCAnalyses::SmearObjects::CovSmear(
+        mcTrackParam, Cov, &m_random, m_debug);
+
+    if (smeared_param_delphes == zero) { // Cholesky decomposition failed
+      result.emplace_back(trackState);
+      continue;
+    }
+
+    // back to the edm4hep conventions..
+    TVectorD smeared_param =
+        FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackParam(
+            smeared_param_delphes, false);
+
+    auto smearedTrackState = trackState;
+    if (smeared_param.GetNoElements() > 4) {
+      smearedTrackState.D0 = smeared_param[0];
+      smearedTrackState.phi = smeared_param[1];
+      smearedTrackState.omega = smeared_param[2];
+      smearedTrackState.Z0 = smeared_param[3];
+      smearedTrackState.tanLambda = smeared_param[4];
+    } else {
+      result.emplace_back(trackState);
+      continue;
+    }
+
+    // transform rescaled cov matrix from Delphes convention to EDM4hep
+    // convention
+    std::array<float, 21> covMatrix_edm4hep =
+        FCCAnalyses::VertexingUtils::Delphes2Edm4hep_TrackCovMatrix(Cov, false);
+
+    smearedTrackState.covMatrix = covMatrix_edm4hep;
+
+    if (m_debug) {
+      std::cout << std::endl
+                << "Original track " << trackState.D0 << " " << trackState.phi
+                << " " << trackState.omega << " " << trackState.Z0 << " "
+                << trackState.tanLambda << std::endl;
+      std::cout << "Smeared track " << smearedTrackState.D0 << " "
+                << smearedTrackState.phi << " " << smearedTrackState.omega
+                << " " << smearedTrackState.Z0 << " "
+                << smearedTrackState.tanLambda << std::endl;
+      std::cout << "MC particle " << mcTrackParam_edm4hep[0] << " "
+                << mcTrackParam_edm4hep[1] << " " << mcTrackParam_edm4hep[2]
+                << " " << mcTrackParam_edm4hep[3] << " "
+                << mcTrackParam_edm4hep[4] << std::endl;
+      for (int j = 0; j < 15; j++)
+        std::cout << "smeared cov matrix(" << j
+                  << "): " << smearedTrackState.covMatrix[j]
+                  << ", scale factor: "
+                  << smearedTrackState.covMatrix[j] / trackState.covMatrix[j]
+                  << std::endl;
+    }
+    /*
+    unsigned int recoIndex = assoc.getRec().getObjectID().index;
+    unsigned int mcIndex = assoc.getSim().getObjectID().index;
+    std::cout << "Reco: " << recoIndex << " ---> MC: " << mcIndex << std::endl;
+    */
+
+    result.emplace_back(smearedTrackState);
+  }
+
+  return result;
+}
+
+int main(int argc, char *argv[]) {
+  auto verbosity = ROOT::Experimental::RLogScopedVerbosity(
+      ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo);
+
+  bool success = gInterpreter->Declare("#include \"edm4hep/TrackState.h\"");
+  if (!success) {
+    std::cerr << "ERROR: Unable to find edm4hep::TrackState header file!"
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  int nThreads = 1;
+  if (argc > 1) {
+    nThreads = atoi(argv[1]);
+  }
+
+  if (nThreads > 1) {
+    ROOT::EnableImplicitMT(nThreads);
+  }
+
+  std::string filePath = "https://fccsw.web.cern.ch/fccsw/testsamples/"
+                         "edm4hep1/p8_ee_WW_ecm240_edm4hep.root";
+  if (argc > 2) {
+    filePath = argv[2];
+  }
+
+  ROOT::RDataFrame rdf(std::make_unique<podio::DataSource>(filePath));
+
+  // rdf.Describe().Print();
+  // std::cout << std::endl;
+
+  std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl;
+
+  auto rdf2 =
+      rdf.Define("SmearedTracks", SmearedTracks(2.0, 2.0, 2.0, 2.0, 2.0, false),
+                 {"MCRecoAssociations"});
+
+  auto rdf3 = rdf2.Define(
+      "smearTrack_omega",
+      "return SmearedTracks.empty() ? -1 : SmearedTracks.at(0).omega;");
+  auto h_smeared_tracks_omega = rdf3.Histo1D("smearTrack_omega");
+
+  h_smeared_tracks_omega->Print();
+
+  auto canvas = std::make_unique<TCanvas>("canvas", "Canvas", 450, 450);
+  h_smeared_tracks_omega->Draw();
+  canvas->Print("/tmp/source_smeared_tracks_omega.pdf");
+
+  return EXIT_SUCCESS;
+}
diff --git a/tests/integration/low_level_associations.cxx b/tests/integration/low_level_associations.cxx
new file mode 100644
index 0000000000..70bdf047ba
--- /dev/null
+++ b/tests/integration/low_level_associations.cxx
@@ -0,0 +1,70 @@
+// ROOT
+#include "ROOT/RDataFrame.hxx"
+#include "ROOT/RLogger.hxx"
+#include "ROOT/RVec.hxx"
+#include "TCanvas.h"
+// EDM4hep
+#include "edm4hep/MCParticleData.h"
+#include "edm4hep/SimCalorimeterHitData.h"
+// FCCAnalyses
+#include "FCCAnalyses/ReconstructedParticle2MC.h"
+#include "FCCAnalyses/SmearObjects.h"
+
+int main(int argc, const char *argv[]) {
+  auto verbosity = ROOT::Experimental::RLogScopedVerbosity(
+      ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo);
+
+  bool success = gInterpreter->Declare("#include \"edm4hep/TrackState.h\"");
+  if (!success) {
+    std::cerr << "ERROR: Unable to find edm4hep::TrackState header file!"
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  int nThreads = 1;
+  if (argc > 1) {
+    nThreads = atoi(argv[1]);
+  }
+
+  if (nThreads > 1) {
+    ROOT::EnableImplicitMT(nThreads);
+  }
+
+  std::string filePath = "https://fccsw.web.cern.ch/fccsw/testsamples/"
+                         "edm4hep1/p8_ee_WW_ecm240_edm4hep.root";
+  if (argc > 2) {
+    filePath = argv[2];
+  }
+
+  ROOT::RDataFrame rdf("events", filePath);
+
+  // rdf.Describe().Print();
+  // std::cout << std::endl;
+
+  std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl;
+
+  auto rdf2 =
+      rdf.Alias("MCRecoAssociations0", "_MCRecoAssociations_from.index");
+  auto rdf3 = rdf2.Alias("MCRecoAssociations1", "_MCRecoAssociations_to.index");
+  auto rdf4 = rdf3.Define(
+      "RP_MC_index", FCCAnalyses::ReconstructedParticle2MC::getRP2MC_index,
+      {"MCRecoAssociations0", "MCRecoAssociations1", "ReconstructedParticles"});
+  auto rdf5 = rdf4.Define(
+      "SmearedTracks",
+      FCCAnalyses::SmearObjects::SmearedTracks(2.0, 2.0, 2.0, 2.0, 2.0, true),
+      {"ReconstructedParticles", "_EFlowTrack_trackStates", "RP_MC_index",
+       "Particle"});
+  auto rdf6 = rdf5.Define(
+      "smearTrack_omega",
+      "return SmearedTracks.empty() ? -1 : SmearedTracks.at(0).omega;");
+  // auto rdf6 = rdf5.Define("smearTrack_omega", "return RP_MC_index[0];");
+  auto h_smeared_tracks_omega = rdf6.Histo1D("smearTrack_omega");
+
+  h_smeared_tracks_omega->Print();
+
+  auto canvas = std::make_unique<TCanvas>("canvas", "Canvas", 450, 450);
+  h_smeared_tracks_omega->Draw();
+  canvas->Print("/tmp/low_level_smeared_tracks_omega.pdf");
+
+  return EXIT_SUCCESS;
+}
diff --git a/tests/integration/low_level_edm4hep.cxx b/tests/integration/low_level_edm4hep.cxx
new file mode 100644
index 0000000000..9d4d25b19d
--- /dev/null
+++ b/tests/integration/low_level_edm4hep.cxx
@@ -0,0 +1,105 @@
+// ROOT
+#include <ROOT/RDataFrame.hxx>
+#include <ROOT/RLogger.hxx>
+#include <ROOT/RVec.hxx>
+#include <TCanvas.h>
+// EDM4hep
+#include <edm4hep/MCParticleData.h>
+#include <edm4hep/SimCalorimeterHitData.h>
+
+ROOT::VecOps::RVec<edm4hep::MCParticleData>
+selElectrons(ROOT::VecOps::RVec<edm4hep::MCParticleData> &inParticles) {
+  ROOT::VecOps::RVec<edm4hep::MCParticleData> electrons;
+  for (size_t i = 0; i < inParticles.size(); ++i) {
+    if (inParticles[i].PDG == 11) {
+      electrons.emplace_back(inParticles[i]);
+    }
+  }
+
+  return electrons;
+}
+
+struct selPDG {
+  selPDG(int pdg, bool chargeConjugateAllowed);
+  const int m_pdg;
+  const bool m_chargeConjugateAllowed;
+  ROOT::VecOps::RVec<edm4hep::MCParticleData>
+  operator()(ROOT::VecOps::RVec<edm4hep::MCParticleData> &inParticles);
+};
+
+selPDG::selPDG(int pdg, bool chargeConjugateAllowed)
+    : m_pdg(pdg), m_chargeConjugateAllowed(chargeConjugateAllowed){};
+
+ROOT::VecOps::RVec<edm4hep::MCParticleData>
+selPDG::operator()(ROOT::VecOps::RVec<edm4hep::MCParticleData> &inParticles) {
+  ROOT::VecOps::RVec<edm4hep::MCParticleData> result;
+  for (size_t i = 0; i < inParticles.size(); ++i) {
+    auto &particle = inParticles[i];
+    if (m_chargeConjugateAllowed) {
+      if (std::abs(particle.PDG) == std::abs(m_pdg)) {
+        result.emplace_back(particle);
+      }
+    } else {
+      if (particle.PDG == m_pdg) {
+        result.emplace_back(particle);
+      }
+    }
+  }
+
+  return result;
+}
+
+ROOT::VecOps::RVec<float>
+getPx(ROOT::VecOps::RVec<edm4hep::MCParticleData> inParticles) {
+  ROOT::VecOps::RVec<float> result;
+  for (auto &p : inParticles) {
+    result.push_back(p.momentum.x);
+  }
+
+  return result;
+}
+
+int main(int argc, const char *argv[]) {
+  auto verbosity = ROOT::Experimental::RLogScopedVerbosity(
+      ROOT::Detail::RDF::RDFLogChannel(), ROOT::Experimental::ELogLevel::kInfo);
+
+  int nThreads = 1;
+  if (argc > 1) {
+    nThreads = atoi(argv[1]);
+  }
+
+  if (nThreads > 1) {
+    ROOT::EnableImplicitMT(nThreads);
+  }
+
+  std::string filePath = "https://fccsw.web.cern.ch/fccsw/testsamples/"
+                         "edm4hep1/p8_ee_WW_ecm240_edm4hep.root";
+  if (argc > 2) {
+    filePath = argv[2];
+  }
+
+  ROOT::RDataFrame rdf("events", filePath);
+
+  // rdf.Describe().Print();
+  // std::cout << std::endl;
+
+  std::cout << "Info: Num. of slots: " << rdf.GetNSlots() << std::endl;
+
+  auto rdf2 = rdf.Define("particles_px", getPx, {"Particle"});
+  // auto rdf3 = rdf2.Define("electrons", selElectrons, {"MCParticles"});
+  auto rdf3 = rdf2.Define("electrons", selPDG(11, false), {"Particle"});
+  auto rdf4 = rdf3.Define("electrons_px", getPx, {"electrons"});
+  auto h_particles_px = rdf4.Histo1D("particles_px");
+  auto h_electrons_px = rdf4.Histo1D("electrons_px");
+
+  h_particles_px->Print();
+  h_electrons_px->Print();
+
+  auto canvas = std::make_unique<TCanvas>("canvas", "Canvas", 450, 450);
+  h_particles_px->Draw();
+  canvas->Print("/tmp/low_level_particles_px.pdf");
+  h_electrons_px->Draw();
+  canvas->Print("/tmp/low_level_electrons_px.pdf");
+
+  return EXIT_SUCCESS;
+}
diff --git a/tests/unittest/CMakeLists.txt b/tests/unittest/CMakeLists.txt
index 485ff7e46b..4f364b052b 100644
--- a/tests/unittest/CMakeLists.txt
+++ b/tests/unittest/CMakeLists.txt
@@ -1,6 +1,6 @@
 find_catch_instance()
 
-# The unittests are a bit better and they are labelled so we can put together a
+# The unittests are a bit better and they are labeled so we can put together a
 # list of labels that we want to ignore
 set(filter_tests "")
 
@@ -9,7 +9,9 @@ add_executable(unittest unittest.cpp
                         algorithms.cpp
                         ReconstructedParticle.cpp
 )
-target_link_libraries(unittest PUBLIC FCCAnalyses gfortran PRIVATE Catch2::Catch2WithMain)
+target_link_libraries(unittest PUBLIC FCCAnalyses
+                                      gfortran
+                               PRIVATE Catch2::Catch2WithMain)
 target_include_directories(unittest PUBLIC ${VDT_INCLUDE_DIR})
 target_compile_definitions(unittest PUBLIC "-DTEST_INPUT_DATA_DIR=${TEST_INPUT_DATA_DIR}")
 include(Catch)