From eba6e610ce172315532a677d653da034dabf852b Mon Sep 17 00:00:00 2001 From: Juraj Smiesko <34742917+kjvbrt@users.noreply.github.com> Date: Wed, 1 Nov 2023 19:58:37 +0100 Subject: [PATCH] edm4hep2json: All EDM4hep collections + ROOT legacy reader (#227) * All EDM4hep collections + ROOT legacy reader * Add EDM4hep version to the output * Adding test which checks for all the collections --- test/CMakeLists.txt | 7 +- test/tools/CMakeLists.txt | 12 ++ test/tools/test_all_collections.py | 49 +++++++ tools/include/edm4hep2json.hxx | 206 +++++++++++++++++++---------- tools/src/edm4hep2json.cxx | 55 ++++---- 5 files changed, 227 insertions(+), 102 deletions(-) create mode 100644 test/tools/CMakeLists.txt create mode 100644 test/tools/test_all_collections.py diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1ee3c3c41..11eec0b14 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -79,10 +79,5 @@ if(HepMC3_FOUND AND HepPDT_FOUND ) ) endif() -if (nlohmann_json_FOUND) - add_test(NAME convert_events COMMAND edm4hep2json edm4hep_events.root) - set_property(TEST convert_events PROPERTY DEPENDS write_events) - set_test_env(convert_events) -endif() - add_subdirectory(utils) +add_subdirectory(tools) diff --git a/test/tools/CMakeLists.txt b/test/tools/CMakeLists.txt new file mode 100644 index 000000000..2d29513f2 --- /dev/null +++ b/test/tools/CMakeLists.txt @@ -0,0 +1,12 @@ +if (nlohmann_json_FOUND) + add_test(NAME convert_events + COMMAND edm4hep2json ${CMAKE_CURRENT_BINARY_DIR}/../edm4hep_events.root) + set_property(TEST convert_events PROPERTY DEPENDS write_events) + set_test_env(convert_events) + + add_test(NAME test_convert_all_collections + COMMAND python ${CMAKE_CURRENT_LIST_DIR}/test_all_collections.py + ${PROJECT_SOURCE_DIR}/edm4hep.yaml + ${PROJECT_SOURCE_DIR}/tools/include/edm4hep2json.hxx) + set_test_env(test_convert_all_collections) +endif() diff --git a/test/tools/test_all_collections.py b/test/tools/test_all_collections.py new file mode 100644 index 000000000..58ff2fc0e --- /dev/null +++ b/test/tools/test_all_collections.py @@ -0,0 +1,49 @@ +''' +Tests if all datatypes are used in the cxx file. +''' + +import sys +import re +import argparse +import yaml + + +def test(yamlfile_path, cxxfile_path): + ''' + Test itself. + Takes two parameters, Podio YAML file location and cxx file to be checked. + ''' + + with open(yamlfile_path, mode='r', encoding="utf-8") as yamlfile: + datamodel = yaml.safe_load(yamlfile) + + # List stores lines of cxx code on which `insertToJson` is used + datatypes_found = [] + + with open(cxxfile_path, mode='r', encoding="utf-8") as cxxfile: + for cxxline in cxxfile: + cxxline = cxxfile.readline() + result = re.search('insertIntoJson', + cxxline) + if result: + datatypes_found += ['edm4hep::' + result.group(1)] + + datatypes_found = set(datatypes_found) + + datatypes = set(datamodel['datatypes']) + + if not datatypes.issubset(datatypes_found): + missing_datatypes = datatypes - datatypes_found + print('ERROR: One or more datatypes are not being converted:') + for datatype in missing_datatypes: + print(' ' + datatype) + sys.exit(2) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Test all collections') + parser.add_argument('yamlfile') + parser.add_argument('cxxfile') + args = parser.parse_args() + + test(args.yamlfile, args.cxxfile) diff --git a/tools/include/edm4hep2json.hxx b/tools/include/edm4hep2json.hxx index a90fad2a6..50dbe0f3f 100644 --- a/tools/include/edm4hep2json.hxx +++ b/tools/include/edm4hep2json.hxx @@ -2,19 +2,38 @@ #define EDM4HEP_TO_JSON_H // EDM4hep event data model +#include "edm4hep/CaloHitContributionCollection.h" #include "edm4hep/CalorimeterHitCollection.h" #include "edm4hep/ClusterCollection.h" #include "edm4hep/EventHeaderCollection.h" #include "edm4hep/MCParticleCollection.h" -#include "edm4hep/MCRecoParticleAssociationCollection.h" #include "edm4hep/ParticleIDCollection.h" +#include "edm4hep/RawCalorimeterHitCollection.h" +#include "edm4hep/RawTimeSeriesCollection.h" +#include "edm4hep/RecDqdxCollection.h" +#include "edm4hep/RecIonizationClusterCollection.h" #include "edm4hep/ReconstructedParticleCollection.h" #include "edm4hep/SimCalorimeterHitCollection.h" +#include "edm4hep/SimPrimaryIonizationClusterCollection.h" #include "edm4hep/SimTrackerHitCollection.h" +#include "edm4hep/TimeSeriesCollection.h" #include "edm4hep/TrackCollection.h" #include "edm4hep/TrackerHitCollection.h" +#include "edm4hep/TrackerHitPlaneCollection.h" +#include "edm4hep/TrackerPulseCollection.h" #include "edm4hep/VertexCollection.h" +#include "edm4hep/MCRecoCaloAssociationCollection.h" +#include "edm4hep/MCRecoCaloParticleAssociationCollection.h" +#include "edm4hep/MCRecoClusterParticleAssociationCollection.h" +#include "edm4hep/MCRecoParticleAssociationCollection.h" +#include "edm4hep/MCRecoTrackParticleAssociationCollection.h" +#include "edm4hep/MCRecoTrackerAssociationCollection.h" +#include "edm4hep/MCRecoTrackerHitPlaneAssociationCollection.h" +#include "edm4hep/RecoParticleVertexAssociationCollection.h" + +#include "edm4hep/EDM4hepVersion.h" + // podio specific includes #include "podio/Frame.h" #include "podio/UserDataCollection.h" @@ -25,17 +44,28 @@ // STL #include +#include #include #include #include #include #include -nlohmann::json processEvent(const podio::Frame& frame, std::vector& collList, bool verboser, +template +void insertIntoJson(nlohmann::json& jsonDict, const podio::CollectionBase* coll, const std::string& name) { + const auto* typedColl = static_cast(coll); // safe to cast, since we have queried the type before + nlohmann::json jsonColl{ + {name, {{"collection", *typedColl}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; + jsonDict.insert(jsonColl.begin(), jsonColl.end()); +} + +nlohmann::json processEvent(const podio::Frame& frame, std::vector& collList, podio::version::Version podioVersion) { std::stringstream podioVersionStr; podioVersionStr << podioVersion; - nlohmann::json jsonDict = {{"podioVersion", podioVersionStr.str()}}; + std::stringstream e4hVersionStr; + e4hVersionStr << edm4hep::version::build_version; + nlohmann::json jsonDict = {{"podioVersion", podioVersionStr.str()}, {"edm4hepVersion", e4hVersionStr.str()}}; for (unsigned i = 0; i < collList.size(); ++i) { auto coll = frame.get(collList[i]); @@ -43,77 +73,87 @@ nlohmann::json processEvent(const podio::Frame& frame, std::vector& continue; } + // Datatypes if (coll->getTypeName() == "edm4hep::EventHeaderCollection") { - auto& eventCollection = frame.get(collList[i]); - nlohmann::json jsonColl{{ - collList[i], - {{"collection", eventCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}, - }}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); + insertIntoJson(jsonDict, coll, collList[i]); } else if (coll->getTypeName() == "edm4hep::MCParticleCollection") { - auto& particleCollection = frame.get(collList[i]); - nlohmann::json jsonColl{{ - collList[i], - {{"collection", particleCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}, - }}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); - } else if (coll->getTypeName() == "edm4hep::MCRecoParticleAssociationCollection") { - auto& assocCollection = frame.get(collList[i]); - nlohmann::json jsonColl{{ - collList[i], - {{"collection", assocCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}, - }}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); - } else if (coll->getTypeName() == "edm4hep::VertexCollection") { - auto& vertexCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], - {{"collection", vertexCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); - } else if (coll->getTypeName() == "edm4hep::TrackCollection") { - auto& trackCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], - {{"collection", trackCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); - } else if (coll->getTypeName() == "edm4hep::TrackerHitCollection") { - auto& hitCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], {{"collection", hitCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); + insertIntoJson(jsonDict, coll, collList[i]); } else if (coll->getTypeName() == "edm4hep::SimTrackerHitCollection") { - auto& hitCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], {{"collection", hitCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); - } else if (coll->getTypeName() == "edm4hep::CalorimeterHitCollection") { - auto& hitCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], {{"collection", hitCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::CaloHitContributionCollection") { + insertIntoJson(jsonDict, coll, collList[i]); } else if (coll->getTypeName() == "edm4hep::SimCalorimeterHitCollection") { - auto& hitCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], {{"collection", hitCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::RawCalorimeterHitCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::CalorimeterHitCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::ParticleIDCollection") { + insertIntoJson(jsonDict, coll, collList[i]); } else if (coll->getTypeName() == "edm4hep::ClusterCollection") { - auto& clusterCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], - {{"collection", clusterCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::TrackerHitCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::TrackerHitPlaneCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::RawTimeSeriesCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::TrackCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::VertexCollection") { + insertIntoJson(jsonDict, coll, collList[i]); } else if (coll->getTypeName() == "edm4hep::ReconstructedParticleCollection") { - auto& recoParticleCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], - {{"collection", recoParticleCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); - } else if (coll->getTypeName() == "edm4hep::ParticleIDCollection") { - auto& particleIDCollection = frame.get(collList[i]); - nlohmann::json jsonColl{ - {collList[i], - {{"collection", particleIDCollection}, {"collID", coll->getID()}, {"collType", coll->getTypeName()}}}}; - jsonDict.insert(jsonColl.begin(), jsonColl.end()); + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::SimPrimaryIonizationClusterCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::TrackerPulseCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::RecIonizationClusterCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::TimeSeriesCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::RecDqdxCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } + // Associations + else if (coll->getTypeName() == "edm4hep::MCRecoParticleAssociationCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::MCRecoCaloAssociationCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::MCRecoTrackerAssociationCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::MCRecoTrackerHitPlaneAssociationCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::MCRecoCaloParticleAssociationCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::MCRecoClusterParticleAssociationCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::MCRecoTrackParticleAssociationCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "edm4hep::RecoParticleVertexAssociationCollection") { + insertIntoJson(jsonDict, coll, collList[i]); + } + // Podio user data + else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); + } else if (coll->getTypeName() == "podio::UserDataCollection") { + insertIntoJson>(jsonDict, coll, collList[i]); } else { std::cout << "WARNING: Collection type not recognized!\n" << " " << coll->getTypeName() << "\n"; @@ -130,7 +170,9 @@ std::vector splitString(const std::string& inString) { while (sStream.good()) { std::string subString; getline(sStream, subString, ','); - outString.emplace_back(subString); + if (!subString.empty()) { + outString.emplace_back(subString); + } } return outString; @@ -146,6 +188,10 @@ int read_frames(const std::string& filename, const std::string& jsonFile, const nlohmann::json allEventsDict; unsigned nEvents = reader.getEntries(frameName); + if (nEvents < 1) { + std::cout << "WARNING: Input file contains no events!" << std::endl; + return EXIT_SUCCESS; + } if (nEventsMax > 0) { if ((unsigned)nEventsMax < nEvents) { nEvents = nEventsMax; @@ -153,6 +199,20 @@ int read_frames(const std::string& filename, const std::string& jsonFile, const } auto collList = splitString(requestedCollections); + if (collList.empty()) { + auto frame = podio::Frame(reader.readEntry(frameName, 0)); + collList = frame.getAvailableCollections(); + } + if (collList.empty()) { + std::cout << "WARNING: Input file does not contain any collections!" << std::endl; + return EXIT_SUCCESS; + } + if (verboser) { + std::cout << "INFO: Following collections will be converted:" << std::endl; + for (const auto& collName : collList) { + std::cout << " * " << collName << std::endl; + } + } std::vector eventVec; if (!requestedEvents.empty()) { @@ -210,20 +270,20 @@ int read_frames(const std::string& filename, const std::string& jsonFile, const } if (eventVec.empty()) { - unsigned modInfo = nEvents / 10; + const unsigned modInfo = nEvents > 10 ? nEvents / 10 : 1; for (unsigned i = 0; i < nEvents; ++i) { if (verboser && i % modInfo == 0) { std::cout << "INFO: Reading event " << i << std::endl; } - auto frame = podio::Frame(reader.readNextEntry(frameName)); - auto eventDict = processEvent(frame, collList, verboser, reader.currentFileVersion()); + auto frame = podio::Frame(reader.readEntry(frameName, i)); + auto eventDict = processEvent(frame, collList, reader.currentFileVersion()); allEventsDict["Event " + std::to_string(i)] = eventDict; } } else { for (auto& i : eventVec) { auto frame = podio::Frame(reader.readEntry(frameName, i)); - auto eventDict = processEvent(frame, collList, verboser, reader.currentFileVersion()); + auto eventDict = processEvent(frame, collList, reader.currentFileVersion()); allEventsDict["Event " + std::to_string(i)] = eventDict; } } diff --git a/tools/src/edm4hep2json.cxx b/tools/src/edm4hep2json.cxx index cdd3e6460..affbf2b4a 100644 --- a/tools/src/edm4hep2json.cxx +++ b/tools/src/edm4hep2json.cxx @@ -2,7 +2,11 @@ #include "edm4hep2json.hxx" // ROOT +#include "TFile.h" + +// podio #include "podio/ROOTFrameReader.h" +#include "podio/ROOTLegacyReader.h" // std #include @@ -27,8 +31,8 @@ void printHelp() { } int main(int argc, char** argv) { - std::filesystem::path inFile; - std::filesystem::path outFile; + std::filesystem::path inFilePath; + std::filesystem::path outFilePath; std::string requestedCollections; std::string requestedEvents; std::string frameName = "events"; @@ -54,10 +58,10 @@ int main(int argc, char** argv) { switch (opt) { case 'i': - inFile = std::filesystem::path(optarg); + inFilePath = std::filesystem::path(optarg); break; case 'o': - outFile = std::filesystem::path(optarg); + outFilePath = std::filesystem::path(optarg); break; case 'l': requestedCollections = std::string(optarg); @@ -92,39 +96,44 @@ int main(int argc, char** argv) { } for (int i = optind; i < argc; ++i) { - inFile = std::string(argv[i]); + inFilePath = std::string(argv[i]); } - if (inFile.empty()) { - std::cout << "ERROR: Input .root file not provided!" << std::endl; + if (inFilePath.empty()) { + std::cerr << "ERROR: Input .root file not provided!" << std::endl; return EXIT_FAILURE; } - if (!std::filesystem::exists(inFile)) { - std::cout << "ERROR: Input .root file can't be read!" << std::endl; + if (!std::filesystem::exists(inFilePath)) { + std::cerr << "ERROR: Input .root file can't be read!\n " << inFilePath << std::endl; return EXIT_FAILURE; } - if (requestedCollections.empty()) { - requestedCollections = "GenParticles,BuildUpVertices,SiTracks," - "PandoraClusters,VertexJets,EventHeader"; - if (verboser) { - std::cout << "DEBUG: Using default collection to convert:\n" - << " " << requestedCollections << std::endl; - } - } - - if (outFile.empty()) { - std::string outFileStr = inFile.stem().string(); + if (outFilePath.empty()) { + std::string outFileStr = inFilePath.stem().string(); if (outFileStr.find(".edm4hep") != std::string::npos) { outFileStr = outFileStr.erase(outFileStr.find(".edm4hep"), 8); } if (outFileStr.find("_edm4hep") != std::string::npos) { outFileStr = outFileStr.erase(outFileStr.find("_edm4hep"), 8); } - outFile = std::filesystem::path(outFileStr + ".edm4hep.json"); + outFilePath = std::filesystem::path(outFileStr + ".edm4hep.json"); + } + + bool legacyReader = false; + { + std::unique_ptr inFile(TFile::Open(inFilePath.c_str(), "READ")); + legacyReader = !inFile->GetListOfKeys()->FindObject("podio_metadata"); + } + + if (legacyReader) { + std::cout << "WARNING: Reading legacy file, some collections might not be recognized!" << std::endl; + return read_frames(inFilePath, outFilePath, requestedCollections, requestedEvents, + frameName, nEventsMax, verboser); + } else { + return read_frames(inFilePath, outFilePath, requestedCollections, requestedEvents, + frameName, nEventsMax, verboser); } - return read_frames(inFile, outFile, requestedCollections, requestedEvents, frameName, - nEventsMax, verboser); + return EXIT_SUCCESS; }