Skip to content

Commit

Permalink
edm4hep2json: All EDM4hep collections + ROOT legacy reader (#227)
Browse files Browse the repository at this point in the history
* All EDM4hep collections + ROOT legacy reader

* Add EDM4hep version to the output

* Adding test which checks for all the collections
  • Loading branch information
kjvbrt authored Nov 1, 2023
1 parent 69268de commit eba6e61
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 102 deletions.
7 changes: 1 addition & 6 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,5 @@ if(HepMC3_FOUND AND HepPDT_FOUND )
)
endif()

if (nlohmann_json_FOUND)
add_test(NAME convert_events COMMAND edm4hep2json edm4hep_events.root)
set_property(TEST convert_events PROPERTY DEPENDS write_events)
set_test_env(convert_events)
endif()

add_subdirectory(utils)
add_subdirectory(tools)
12 changes: 12 additions & 0 deletions test/tools/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
if (nlohmann_json_FOUND)
add_test(NAME convert_events
COMMAND edm4hep2json ${CMAKE_CURRENT_BINARY_DIR}/../edm4hep_events.root)
set_property(TEST convert_events PROPERTY DEPENDS write_events)
set_test_env(convert_events)

add_test(NAME test_convert_all_collections
COMMAND python ${CMAKE_CURRENT_LIST_DIR}/test_all_collections.py
${PROJECT_SOURCE_DIR}/edm4hep.yaml
${PROJECT_SOURCE_DIR}/tools/include/edm4hep2json.hxx)
set_test_env(test_convert_all_collections)
endif()
49 changes: 49 additions & 0 deletions test/tools/test_all_collections.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
'''
Tests if all datatypes are used in the cxx file.
'''

import sys
import re
import argparse
import yaml


def test(yamlfile_path, cxxfile_path):
'''
Test itself.
Takes two parameters, Podio YAML file location and cxx file to be checked.
'''

with open(yamlfile_path, mode='r', encoding="utf-8") as yamlfile:
datamodel = yaml.safe_load(yamlfile)

# List stores lines of cxx code on which `insertToJson<CollType>` is used
datatypes_found = []

with open(cxxfile_path, mode='r', encoding="utf-8") as cxxfile:
for cxxline in cxxfile:
cxxline = cxxfile.readline()
result = re.search('insertIntoJson<edm4hep::(.+?)Collection>',
cxxline)
if result:
datatypes_found += ['edm4hep::' + result.group(1)]

datatypes_found = set(datatypes_found)

datatypes = set(datamodel['datatypes'])

if not datatypes.issubset(datatypes_found):
missing_datatypes = datatypes - datatypes_found
print('ERROR: One or more datatypes are not being converted:')
for datatype in missing_datatypes:
print(' ' + datatype)
sys.exit(2)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Test all collections')
parser.add_argument('yamlfile')
parser.add_argument('cxxfile')
args = parser.parse_args()

test(args.yamlfile, args.cxxfile)
206 changes: 133 additions & 73 deletions tools/include/edm4hep2json.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,38 @@
#define EDM4HEP_TO_JSON_H

// EDM4hep event data model
#include "edm4hep/CaloHitContributionCollection.h"
#include "edm4hep/CalorimeterHitCollection.h"
#include "edm4hep/ClusterCollection.h"
#include "edm4hep/EventHeaderCollection.h"
#include "edm4hep/MCParticleCollection.h"
#include "edm4hep/MCRecoParticleAssociationCollection.h"
#include "edm4hep/ParticleIDCollection.h"
#include "edm4hep/RawCalorimeterHitCollection.h"
#include "edm4hep/RawTimeSeriesCollection.h"
#include "edm4hep/RecDqdxCollection.h"
#include "edm4hep/RecIonizationClusterCollection.h"
#include "edm4hep/ReconstructedParticleCollection.h"
#include "edm4hep/SimCalorimeterHitCollection.h"
#include "edm4hep/SimPrimaryIonizationClusterCollection.h"
#include "edm4hep/SimTrackerHitCollection.h"
#include "edm4hep/TimeSeriesCollection.h"
#include "edm4hep/TrackCollection.h"
#include "edm4hep/TrackerHitCollection.h"
#include "edm4hep/TrackerHitPlaneCollection.h"
#include "edm4hep/TrackerPulseCollection.h"
#include "edm4hep/VertexCollection.h"

#include "edm4hep/MCRecoCaloAssociationCollection.h"
#include "edm4hep/MCRecoCaloParticleAssociationCollection.h"
#include "edm4hep/MCRecoClusterParticleAssociationCollection.h"
#include "edm4hep/MCRecoParticleAssociationCollection.h"
#include "edm4hep/MCRecoTrackParticleAssociationCollection.h"
#include "edm4hep/MCRecoTrackerAssociationCollection.h"
#include "edm4hep/MCRecoTrackerHitPlaneAssociationCollection.h"
#include "edm4hep/RecoParticleVertexAssociationCollection.h"

#include "edm4hep/EDM4hepVersion.h"

// podio specific includes
#include "podio/Frame.h"
#include "podio/UserDataCollection.h"
Expand All @@ -25,95 +44,116 @@

// STL
#include <cassert>
#include <cstdint>
#include <exception>
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>

nlohmann::json processEvent(const podio::Frame& frame, std::vector<std::string>& collList, bool verboser,
template <typename CollT>
void insertIntoJson(nlohmann::json& jsonDict, const podio::CollectionBase* coll, const std::string& name) {
const auto* typedColl = static_cast<const CollT*>(coll); // safe to cast, since we have queried the type before
nlohmann::json jsonColl{
{name, {{"collection", *typedColl}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
}

nlohmann::json processEvent(const podio::Frame& frame, std::vector<std::string>& collList,
podio::version::Version podioVersion) {
std::stringstream podioVersionStr;
podioVersionStr << podioVersion;
nlohmann::json jsonDict = {{"podioVersion", podioVersionStr.str()}};
std::stringstream e4hVersionStr;
e4hVersionStr << edm4hep::version::build_version;
nlohmann::json jsonDict = {{"podioVersion", podioVersionStr.str()}, {"edm4hepVersion", e4hVersionStr.str()}};

for (unsigned i = 0; i < collList.size(); ++i) {
auto coll = frame.get(collList[i]);
if (!coll) {
continue;
}

// Datatypes
if (coll->getTypeName() == "edm4hep::EventHeaderCollection") {
auto& eventCollection = frame.get<edm4hep::EventHeaderCollection>(collList[i]);
nlohmann::json jsonColl{{
collList[i],
{{"collection", eventCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}},
}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
insertIntoJson<edm4hep::EventHeaderCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::MCParticleCollection") {
auto& particleCollection = frame.get<edm4hep::MCParticleCollection>(collList[i]);
nlohmann::json jsonColl{{
collList[i],
{{"collection", particleCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}},
}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
} else if (coll->getTypeName() == "edm4hep::MCRecoParticleAssociationCollection") {
auto& assocCollection = frame.get<edm4hep::MCRecoParticleAssociationCollection>(collList[i]);
nlohmann::json jsonColl{{
collList[i],
{{"collection", assocCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}},
}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
} else if (coll->getTypeName() == "edm4hep::VertexCollection") {
auto& vertexCollection = frame.get<edm4hep::VertexCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i],
{{"collection", vertexCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
} else if (coll->getTypeName() == "edm4hep::TrackCollection") {
auto& trackCollection = frame.get<edm4hep::TrackCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i],
{{"collection", trackCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
} else if (coll->getTypeName() == "edm4hep::TrackerHitCollection") {
auto& hitCollection = frame.get<edm4hep::TrackerHitCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i], {{"collection", hitCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
insertIntoJson<edm4hep::MCParticleCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::SimTrackerHitCollection") {
auto& hitCollection = frame.get<edm4hep::SimTrackerHitCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i], {{"collection", hitCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
} else if (coll->getTypeName() == "edm4hep::CalorimeterHitCollection") {
auto& hitCollection = frame.get<edm4hep::CalorimeterHitCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i], {{"collection", hitCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
insertIntoJson<edm4hep::SimTrackerHitCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::CaloHitContributionCollection") {
insertIntoJson<edm4hep::CaloHitContributionCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::SimCalorimeterHitCollection") {
auto& hitCollection = frame.get<edm4hep::SimCalorimeterHitCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i], {{"collection", hitCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
insertIntoJson<edm4hep::SimCalorimeterHitCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::RawCalorimeterHitCollection") {
insertIntoJson<edm4hep::RawCalorimeterHitCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::CalorimeterHitCollection") {
insertIntoJson<edm4hep::CalorimeterHitCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::ParticleIDCollection") {
insertIntoJson<edm4hep::ParticleIDCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::ClusterCollection") {
auto& clusterCollection = frame.get<edm4hep::ClusterCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i],
{{"collection", clusterCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
insertIntoJson<edm4hep::ClusterCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::TrackerHitCollection") {
insertIntoJson<edm4hep::TrackerHitCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::TrackerHitPlaneCollection") {
insertIntoJson<edm4hep::TrackerHitPlaneCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::RawTimeSeriesCollection") {
insertIntoJson<edm4hep::RawTimeSeriesCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::TrackCollection") {
insertIntoJson<edm4hep::TrackCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::VertexCollection") {
insertIntoJson<edm4hep::VertexCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::ReconstructedParticleCollection") {
auto& recoParticleCollection = frame.get<edm4hep::ReconstructedParticleCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i],
{{"collection", recoParticleCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
} else if (coll->getTypeName() == "edm4hep::ParticleIDCollection") {
auto& particleIDCollection = frame.get<edm4hep::ParticleIDCollection>(collList[i]);
nlohmann::json jsonColl{
{collList[i],
{{"collection", particleIDCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}};
jsonDict.insert(jsonColl.begin(), jsonColl.end());
insertIntoJson<edm4hep::ReconstructedParticleCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::SimPrimaryIonizationClusterCollection") {
insertIntoJson<edm4hep::SimPrimaryIonizationClusterCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::TrackerPulseCollection") {
insertIntoJson<edm4hep::TrackerPulseCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::RecIonizationClusterCollection") {
insertIntoJson<edm4hep::RecIonizationClusterCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::TimeSeriesCollection") {
insertIntoJson<edm4hep::TimeSeriesCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::RecDqdxCollection") {
insertIntoJson<edm4hep::RecDqdxCollection>(jsonDict, coll, collList[i]);
}
// Associations
else if (coll->getTypeName() == "edm4hep::MCRecoParticleAssociationCollection") {
insertIntoJson<edm4hep::MCRecoParticleAssociationCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::MCRecoCaloAssociationCollection") {
insertIntoJson<edm4hep::MCRecoCaloAssociationCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::MCRecoTrackerAssociationCollection") {
insertIntoJson<edm4hep::MCRecoTrackerAssociationCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::MCRecoTrackerHitPlaneAssociationCollection") {
insertIntoJson<edm4hep::MCRecoTrackerHitPlaneAssociationCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::MCRecoCaloParticleAssociationCollection") {
insertIntoJson<edm4hep::MCRecoCaloParticleAssociationCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::MCRecoClusterParticleAssociationCollection") {
insertIntoJson<edm4hep::MCRecoClusterParticleAssociationCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::MCRecoTrackParticleAssociationCollection") {
insertIntoJson<edm4hep::MCRecoTrackParticleAssociationCollection>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "edm4hep::RecoParticleVertexAssociationCollection") {
insertIntoJson<edm4hep::RecoParticleVertexAssociationCollection>(jsonDict, coll, collList[i]);
}
// Podio user data
else if (coll->getTypeName() == "podio::UserDataCollection<float>") {
insertIntoJson<podio::UserDataCollection<float>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<double>") {
insertIntoJson<podio::UserDataCollection<double>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<int8_t>") {
insertIntoJson<podio::UserDataCollection<int8_t>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<int16_t>") {
insertIntoJson<podio::UserDataCollection<int16_t>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<int32_t>") {
insertIntoJson<podio::UserDataCollection<int32_t>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<int64_t>") {
insertIntoJson<podio::UserDataCollection<int64_t>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<uint8_t>") {
insertIntoJson<podio::UserDataCollection<uint8_t>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<uint16_t>") {
insertIntoJson<podio::UserDataCollection<uint16_t>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<uint32_t>") {
insertIntoJson<podio::UserDataCollection<uint32_t>>(jsonDict, coll, collList[i]);
} else if (coll->getTypeName() == "podio::UserDataCollection<uint64_t>") {
insertIntoJson<podio::UserDataCollection<uint64_t>>(jsonDict, coll, collList[i]);
} else {
std::cout << "WARNING: Collection type not recognized!\n"
<< " " << coll->getTypeName() << "\n";
Expand All @@ -130,7 +170,9 @@ std::vector<std::string> splitString(const std::string& inString) {
while (sStream.good()) {
std::string subString;
getline(sStream, subString, ',');
outString.emplace_back(subString);
if (!subString.empty()) {
outString.emplace_back(subString);
}
}

return outString;
Expand All @@ -146,13 +188,31 @@ int read_frames(const std::string& filename, const std::string& jsonFile, const
nlohmann::json allEventsDict;

unsigned nEvents = reader.getEntries(frameName);
if (nEvents < 1) {
std::cout << "WARNING: Input file contains no events!" << std::endl;
return EXIT_SUCCESS;
}
if (nEventsMax > 0) {
if ((unsigned)nEventsMax < nEvents) {
nEvents = nEventsMax;
}
}

auto collList = splitString(requestedCollections);
if (collList.empty()) {
auto frame = podio::Frame(reader.readEntry(frameName, 0));
collList = frame.getAvailableCollections();
}
if (collList.empty()) {
std::cout << "WARNING: Input file does not contain any collections!" << std::endl;
return EXIT_SUCCESS;
}
if (verboser) {
std::cout << "INFO: Following collections will be converted:" << std::endl;
for (const auto& collName : collList) {
std::cout << " * " << collName << std::endl;
}
}

std::vector<int> eventVec;
if (!requestedEvents.empty()) {
Expand Down Expand Up @@ -210,20 +270,20 @@ int read_frames(const std::string& filename, const std::string& jsonFile, const
}

if (eventVec.empty()) {
unsigned modInfo = nEvents / 10;
const unsigned modInfo = nEvents > 10 ? nEvents / 10 : 1;
for (unsigned i = 0; i < nEvents; ++i) {
if (verboser && i % modInfo == 0) {
std::cout << "INFO: Reading event " << i << std::endl;
}

auto frame = podio::Frame(reader.readNextEntry(frameName));
auto eventDict = processEvent(frame, collList, verboser, reader.currentFileVersion());
auto frame = podio::Frame(reader.readEntry(frameName, i));
auto eventDict = processEvent(frame, collList, reader.currentFileVersion());
allEventsDict["Event " + std::to_string(i)] = eventDict;
}
} else {
for (auto& i : eventVec) {
auto frame = podio::Frame(reader.readEntry(frameName, i));
auto eventDict = processEvent(frame, collList, verboser, reader.currentFileVersion());
auto eventDict = processEvent(frame, collList, reader.currentFileVersion());
allEventsDict["Event " + std::to_string(i)] = eventDict;
}
}
Expand Down
Loading

0 comments on commit eba6e61

Please sign in to comment.