From 50bfe41d7d8350bb5e828bc767c15726addd0f47 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 11 Jun 2024 16:11:57 +0200 Subject: [PATCH] [WIP] Make the TTree based ROOT backend store the GenericParameters the same way as the RNTuple based one (#615) * Refactor root utilities and reduce code duplication - Lift common type defs into one header to not redefine them - Keep all root related utilitie classes in one header * Rename CollectionInfo to CategoryInfo to better convey intention * Remove unnecessary member variables * Add functionality to get all values for a type to GenericParameters * Move some global state into category state * Avoid unnecessary copy * Fix include guards to conform to style guide * Make sure to take lock long enough * Switch to tuple for consistency * Remove obsolete reset * Make the ROOTWriter write split GenericParameters * Make the ROOTReader read the files again * Add tests to make sure that v00-99 files stay readable * Make older compilers happy with in place construction * Make sure to only run SIO legacy tests when input data is available * Make tests work by hiding forward declaration from CLING * Add comment for reasoning about excluding forward decl for CLING --- include/podio/CollectionBranches.h | 23 ---- include/podio/GenericParameters.h | 27 ++++ include/podio/RNTupleReader.h | 2 - include/podio/RNTupleWriter.h | 56 ++++---- include/podio/ROOTLegacyReader.h | 5 +- include/podio/ROOTReader.h | 7 +- include/podio/ROOTWriter.h | 34 ++--- include/podio/utilities/RootHelpers.h | 81 ++++++++++++ src/CMakeLists.txt | 1 + src/RNTupleWriter.cc | 113 ++++++++-------- src/ROOTLegacyReader.cc | 6 +- src/ROOTReader.cc | 122 ++++++++++++++---- src/ROOTWriter.cc | 61 ++++++--- src/rootUtils.h | 15 +-- src/selection.xml | 8 ++ tests/CMakeLists.txt | 2 + .../input_files/v00-99-example_frame.root.md5 | 1 + .../input_files/v00-99-example_frame.sio.md5 | 1 + tests/root_io/CMakeLists.txt | 4 +- tests/sio_io/CMakeLists.txt | 4 +- 20 files changed, 380 insertions(+), 193 deletions(-) delete mode 100644 include/podio/CollectionBranches.h create mode 100644 include/podio/utilities/RootHelpers.h create mode 100644 tests/input_files/v00-99-example_frame.root.md5 create mode 100644 tests/input_files/v00-99-example_frame.sio.md5 diff --git a/include/podio/CollectionBranches.h b/include/podio/CollectionBranches.h deleted file mode 100644 index 3ab3e8129..000000000 --- a/include/podio/CollectionBranches.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef PODIO_COLLECTIONBRANCHES_H -#define PODIO_COLLECTIONBRANCHES_H - -#include "TBranch.h" - -#include -#include - -namespace podio::root_utils { -/// Small helper struct to collect all branches that are necessary to read or -/// write a collection. Needed to cache the branch pointers and avoid having to -/// get them from a TTree/TChain for every event. -struct CollectionBranches { - TBranch* data{nullptr}; - std::vector refs{}; - std::vector vecs{}; - std::vector refNames{}; ///< The names of the relation branches - std::vector vecNames{}; ///< The names of the vector member branches -}; - -} // namespace podio::root_utils - -#endif diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 55119a2e8..88d8953f4 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -30,6 +31,12 @@ class RNTupleWriter; namespace podio { +#if !defined(__CLING__) +// cling doesn't really deal well (i.e. at all in this case) with the forward +// declaration here and errors out, breaking e.g. python bindings. +class ROOTReader; +#endif + /// The types which are supported in the GenericParameters using SupportedGenericDataTypes = std::tuple; @@ -105,6 +112,10 @@ class GenericParameters { template > std::vector getKeys() const; + /// Get all the available values for a given type + template > + std::vector> getValues() const; + /// erase all elements void clear() { _intMap.clear(); @@ -129,6 +140,10 @@ class GenericParameters { friend RNTupleWriter; #endif +#if !defined(__CLING__) + friend ROOTReader; +#endif + /// Get a reference to the internal map for a given type template const MapType>& getMap() const { @@ -243,5 +258,17 @@ std::vector GenericParameters::getKeys() const { return keys; } +template +std::vector> GenericParameters::getValues() const { + std::vector> values; + { + auto& mtx = getMutex(); + const auto& map = getMap(); + std::lock_guard lock{mtx}; + values.reserve(map.size()); + std::transform(map.begin(), map.end(), std::back_inserter(values), [](const auto& pair) { return pair.second; }); + } + return values; +} } // namespace podio #endif diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index 1cffa149f..057b9fcef 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -1,8 +1,6 @@ #ifndef PODIO_RNTUPLEREADER_H #define PODIO_RNTUPLEREADER_H -#include "podio/CollectionBranches.h" -#include "podio/ICollectionProvider.h" #include "podio/ROOTFrameData.h" #include "podio/SchemaEvolution.h" #include "podio/podioVersion.h" diff --git a/include/podio/RNTupleWriter.h b/include/podio/RNTupleWriter.h index ed0f3012c..d8a53ba43 100644 --- a/include/podio/RNTupleWriter.h +++ b/include/podio/RNTupleWriter.h @@ -1,11 +1,11 @@ #ifndef PODIO_RNTUPLEWRITER_H #define PODIO_RNTUPLEWRITER_H -#include "podio/CollectionBase.h" #include "podio/Frame.h" #include "podio/GenericParameters.h" #include "podio/SchemaEvolution.h" #include "podio/utilities/DatamodelRegistryIOHelpers.h" +#include "podio/utilities/RootHelpers.h" #include "TFile.h" #include @@ -102,42 +102,42 @@ class RNTupleWriter { checkConsistency(const std::vector& collsToWrite, const std::string& category) const; private: - template - void fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry); + std::unique_ptr + createModels(const std::vector& collections); + + /// Helper struct to group all the necessary information for one category. + struct CategoryInfo { + std::unique_ptr writer{nullptr}; ///< The RNTupleWriter for this category + + // The following are assumed to run in parallel! + std::vector ids{}; ///< The ids of all collections + std::vector names{}; ///< The names of all collections + std::vector types{}; ///< The types of all collections + std::vector subsetCollections{}; ///< The flags identifying the subcollections + std::vector schemaVersions{}; ///< The schema versions of all collections + + // Storage for the keys & values of all the parameters of this category + // (resp. at least the current entry) + root_utils::ParamStorage intParams{}; + root_utils::ParamStorage floatParams{}; + root_utils::ParamStorage doubleParams{}; + root_utils::ParamStorage stringParams{}; + }; + CategoryInfo& getCategoryInfo(const std::string& category); - using StoreCollection = std::pair; - std::unique_ptr createModels(const std::vector& collections); + template + void fillParams(const GenericParameters& params, CategoryInfo& catInfo, ROOT::Experimental::REntry* entry); - std::unique_ptr m_metadata{}; - std::unique_ptr m_metadataWriter{}; + template + root_utils::ParamStorage& getParamStorage(CategoryInfo& catInfo); std::unique_ptr m_file{}; DatamodelDefinitionCollector m_datamodelCollector{}; - struct CollectionInfo { - std::vector id{}; - std::vector name{}; - std::vector type{}; - std::vector isSubsetCollection{}; - std::vector schemaVersion{}; - std::unique_ptr writer{nullptr}; - }; - CollectionInfo& getCategoryInfo(const std::string& category); - - std::unordered_map m_categories{}; + std::unordered_map m_categories{}; bool m_finished{false}; - - std::vector m_intkeys{}, m_floatkeys{}, m_doublekeys{}, m_stringkeys{}; - - std::vector> m_intvalues{}; - std::vector> m_floatvalues{}; - std::vector> m_doublevalues{}; - std::vector> m_stringvalues{}; - - template - std::pair&, std::vector>&> getKeyValueVectors(); }; } // namespace podio diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index c379eea1c..9383ca51b 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -1,13 +1,12 @@ #ifndef PODIO_ROOTLEGACYREADER_H #define PODIO_ROOTLEGACYREADER_H -#include "podio/CollectionBranches.h" #include "podio/ROOTFrameData.h" #include "podio/podioVersion.h" +#include "podio/utilities/RootHelpers.h" #include "TChain.h" -#include #include #include #include @@ -114,7 +113,7 @@ class ROOTLegacyReader { private: std::pair getLocalTreeAndEntry(const std::string& treename); - void createCollectionBranches(const std::vector>& collInfo); + void createCollectionBranches(const std::vector& collInfo); podio::GenericParameters readEventMetaData(); diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index b14a08d17..e6ecbd0e0 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -1,14 +1,13 @@ #ifndef PODIO_ROOTREADER_H #define PODIO_ROOTREADER_H -#include "podio/CollectionBranches.h" #include "podio/ROOTFrameData.h" #include "podio/podioVersion.h" #include "podio/utilities/DatamodelRegistryIOHelpers.h" +#include "podio/utilities/RootHelpers.h" #include "TChain.h" -#include #include #include #include @@ -157,6 +156,10 @@ class ROOTReader { /// Read the parameters for the entry specified in the passed CategoryInfo GenericParameters readEntryParameters(CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry); + template + static void readParams(CategoryInfo& catInfo, podio::GenericParameters& params, bool reloadBranches, + unsigned int localEntry); + /// Read the data entry specified in the passed CategoryInfo, and increase the /// counter afterwards. In case the requested entry is larger than the /// available number of entries, return a nullptr. diff --git a/include/podio/ROOTWriter.h b/include/podio/ROOTWriter.h index 06aa61a46..5ee23835f 100644 --- a/include/podio/ROOTWriter.h +++ b/include/podio/ROOTWriter.h @@ -1,9 +1,9 @@ #ifndef PODIO_ROOTWRITER_H #define PODIO_ROOTWRITER_H -#include "podio/CollectionBranches.h" #include "podio/CollectionIDTable.h" #include "podio/utilities/DatamodelRegistryIOHelpers.h" +#include "podio/utilities/RootHelpers.h" #include "TFile.h" @@ -100,32 +100,34 @@ class ROOTWriter { checkConsistency(const std::vector& collsToWrite, const std::string& category) const; private: - using StoreCollection = std::pair; - - // collectionID, collectionType, subsetCollection - // @note same as in rootUtils.h private header! - using CollectionInfoT = std::tuple; - /// Helper struct to group together all necessary state to write / process a /// given category. Created during the first writing of a category struct CategoryInfo { - TTree* tree{nullptr}; ///< The TTree to which this category is written - std::vector branches{}; ///< The branches for this category - std::vector collInfo{}; ///< Collection info for this category - podio::CollectionIDTable idTable{}; ///< The collection id table for this category - std::vector collsToWrite{}; ///< The collections to write for this category + TTree* tree{nullptr}; ///< The TTree to which this category is written + std::vector branches{}; ///< The branches for this category + std::vector collInfo{}; ///< Collection info for this category + podio::CollectionIDTable idTable{}; ///< The collection id table for this category + std::vector collsToWrite{}; ///< The collections to write for this category + + // Storage for the keys & values of all the parameters of this category + // (resp. at least the current entry) + root_utils::ParamStorage intParams{}; + root_utils::ParamStorage floatParams{}; + root_utils::ParamStorage doubleParams{}; + root_utils::ParamStorage stringParams{}; }; /// Initialize the branches for this category - void initBranches(CategoryInfo& catInfo, const std::vector& collections, + void initBranches(CategoryInfo& catInfo, const std::vector& collections, /*const*/ podio::GenericParameters& parameters); /// Get the (potentially uninitialized category information for this category) CategoryInfo& getCategoryInfo(const std::string& category); - static void resetBranches(std::vector& branches, - const std::vector& collections, - /*const*/ podio::GenericParameters* parameters); + static void resetBranches(CategoryInfo& categoryInfo, const std::vector& collections); + + /// Fill the parameter keys and values into the CategoryInfo storage + static void fillParams(CategoryInfo& catInfo, const GenericParameters& params); std::unique_ptr m_file{nullptr}; ///< The storage file std::unordered_map m_categories{}; ///< All categories diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h new file mode 100644 index 000000000..8e3118b4d --- /dev/null +++ b/include/podio/utilities/RootHelpers.h @@ -0,0 +1,81 @@ +#ifndef PODIO_UTILITIES_ROOTHELPERS_H +#define PODIO_UTILITIES_ROOTHELPERS_H + +#include "TBranch.h" + +#include +#include +#include + +namespace podio { +class CollectionBase; + +namespace root_utils { + + // A collection of additional information that describes the collection: the + // collectionID, the collection (data) type, whether it is a subset + // collection, and its schema version + using CollectionWriteInfoT = std::tuple; + // for backwards compatibility + using CollectionInfoWithoutSchemaT = std::tuple; + + /// A collection name and a base pointer grouped together for writing + using StoreCollection = std::tuple; + + /// Small helper struct to collect all branches that are necessary to read or + /// write a collection. Needed to cache the branch pointers and avoid having to + /// get them from a TTree/TChain for every event. + struct CollectionBranches { + CollectionBranches() = default; + ~CollectionBranches() = default; + CollectionBranches(const CollectionBranches&) = delete; + CollectionBranches& operator=(const CollectionBranches&) = delete; + CollectionBranches(CollectionBranches&&) = default; + CollectionBranches& operator=(CollectionBranches&&) = default; + + CollectionBranches(TBranch* dataBranch) : data(dataBranch) { + } + + TBranch* data{nullptr}; + std::vector refs{}; + std::vector vecs{}; + std::vector refNames{}; ///< The names of the relation branches + std::vector vecNames{}; ///< The names of the vector member branches + }; + + /// Pair of keys and values for one type of the ones that can be stored in + /// GenericParameters + template + struct ParamStorage { + ParamStorage() = default; + ~ParamStorage() = default; + ParamStorage(const ParamStorage&) = delete; + ParamStorage& operator=(const ParamStorage&) = delete; + ParamStorage(ParamStorage&&) = default; + ParamStorage& operator=(ParamStorage&&) = default; + + ParamStorage(const std::vector& ks, const std::vector>& vs) : keys(ks), values(vs) { + } + + auto keysPtr() { + m_keysPtr = &keys; + return &m_keysPtr; + } + + auto valuesPtr() { + m_valuesPtr = &values; + return &m_valuesPtr; + } + + std::vector keys{}; + std::vector> values{}; + + private: + std::vector* m_keysPtr{nullptr}; + std::vector>* m_valuesPtr{nullptr}; + }; + +} // namespace root_utils +} // namespace podio + +#endif // PODIO_UTILITIES_ROOTHELPERS_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 393e93383..561e2a9b9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,6 +94,7 @@ SET(root_headers ${PROJECT_SOURCE_DIR}/include/podio/ROOTLegacyReader.h ${PROJECT_SOURCE_DIR}/include/podio/ROOTWriter.h ${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameData.h + ${PROJECT_SOURCE_DIR}/include/podio/utilities/RootHelpers.h ) if(ENABLE_RNTUPLE) list(APPEND root_headers diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 38231cc18..55d23167f 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -17,7 +17,6 @@ namespace podio { RNTupleWriter::RNTupleWriter(const std::string& filename) : - m_metadata(ROOT::Experimental::RNTupleModel::Create()), m_file(new TFile(filename.c_str(), "RECREATE", "data file")) { } @@ -28,40 +27,35 @@ RNTupleWriter::~RNTupleWriter() { } template -std::pair&, std::vector>&> RNTupleWriter::getKeyValueVectors() { +root_utils::ParamStorage& RNTupleWriter::getParamStorage(CategoryInfo& catInfo) { if constexpr (std::is_same_v) { - return {m_intkeys, m_intvalues}; + return catInfo.intParams; } else if constexpr (std::is_same_v) { - return {m_floatkeys, m_floatvalues}; + return catInfo.floatParams; } else if constexpr (std::is_same_v) { - return {m_doublekeys, m_doublevalues}; + return catInfo.doubleParams; } else if constexpr (std::is_same_v) { - return {m_stringkeys, m_stringvalues}; + return catInfo.stringParams; } else { throw std::runtime_error("Unknown type"); } } template -void RNTupleWriter::fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry) { - auto [key, value] = getKeyValueVectors(); +void RNTupleWriter::fillParams(const GenericParameters& params, CategoryInfo& catInfo, + ROOT::Experimental::REntry* entry) { + auto& paramStorage = getParamStorage(catInfo); + auto& keys = paramStorage.keys; + auto& values = paramStorage.values; + keys = params.getKeys(); + values = params.getValues(); #if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) - entry->BindRawPtr(root_utils::getGPKeyName(), &key); - entry->BindRawPtr(root_utils::getGPValueName(), &value); + entry->BindRawPtr(root_utils::getGPKeyName(), &keys); + entry->BindRawPtr(root_utils::getGPValueName(), &values); #else - entry->CaptureValueUnsafe(root_utils::getGPKeyName(), &key); - entry->CaptureValueUnsafe(root_utils::getGPValueName(), &value); + entry->CaptureValueUnsafe(root_utils::getGPKeyName(), &keys); + entry->CaptureValueUnsafe(root_utils::getGPValueName(), &values); #endif - - key.clear(); - key.reserve(params.getMap().size()); - value.clear(); - value.reserve(params.getMap().size()); - - for (auto& [kk, vv] : params.getMap()) { - key.emplace_back(kk); - value.emplace_back(vv); - } } void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& category) { @@ -77,14 +71,14 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat const bool new_category = (catInfo.writer == nullptr); if (new_category) { // This is the minimal information that we need for now - catInfo.name = root_utils::sortAlphabeticaly(collsToWrite); + catInfo.names = root_utils::sortAlphabeticaly(collsToWrite); } - std::vector collections; - collections.reserve(catInfo.name.size()); + std::vector collections; + collections.reserve(catInfo.names.size()); // Only loop over the collections that were requested in the first Frame of // this category - for (const auto& name : catInfo.name) { + for (const auto& name : catInfo.names) { auto* coll = frame.getCollectionForWrite(name); if (!coll) { // Make sure all collections that we want to write are actually available @@ -101,15 +95,15 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat catInfo.writer = ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); for (const auto& [name, coll] : collections) { - catInfo.id.emplace_back(coll->getID()); - catInfo.type.emplace_back(coll->getTypeName()); - catInfo.isSubsetCollection.emplace_back(coll->isSubsetCollection()); - catInfo.schemaVersion.emplace_back(coll->getSchemaVersion()); + catInfo.ids.emplace_back(coll->getID()); + catInfo.types.emplace_back(coll->getTypeName()); + catInfo.subsetCollections.emplace_back(coll->isSubsetCollection()); + catInfo.schemaVersions.emplace_back(coll->getSchemaVersion()); } } else { - if (!root_utils::checkConsistentColls(catInfo.name, collsToWrite)) { + if (!root_utils::checkConsistentColls(catInfo.names, collsToWrite)) { throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content. " + - root_utils::getInconsistentCollsMsg(catInfo.name, collsToWrite)); + root_utils::getInconsistentCollsMsg(catInfo.names, collsToWrite)); } } @@ -178,17 +172,17 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat // &const_cast(frame.getParameters())); } - auto params = frame.getParameters(); - fillParams(params, entry.get()); - fillParams(params, entry.get()); - fillParams(params, entry.get()); - fillParams(params, entry.get()); + const auto& params = frame.getParameters(); + fillParams(params, catInfo, entry.get()); + fillParams(params, catInfo, entry.get()); + fillParams(params, catInfo, entry.get()); + fillParams(params, catInfo, entry.get()); m_categories[category].writer->Fill(*entry); } std::unique_ptr -RNTupleWriter::createModels(const std::vector& collections) { +RNTupleWriter::createModels(const std::vector& collections) { auto model = ROOT::Experimental::RNTupleModel::CreateBare(); #if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) @@ -260,49 +254,49 @@ RNTupleWriter::createModels(const std::vector& collections) { return model; } -RNTupleWriter::CollectionInfo& RNTupleWriter::getCategoryInfo(const std::string& category) { +RNTupleWriter::CategoryInfo& RNTupleWriter::getCategoryInfo(const std::string& category) { if (auto it = m_categories.find(category); it != m_categories.end()) { return it->second; } - auto [it, _] = m_categories.try_emplace(category, CollectionInfo{}); + auto [it, _] = m_categories.try_emplace(category, CategoryInfo{}); return it->second; } void RNTupleWriter::finish() { + auto metadata = ROOT::Experimental::RNTupleModel::Create(); auto podioVersion = podio::version::build_version; - auto versionField = m_metadata->MakeField>(root_utils::versionBranchName); + auto versionField = metadata->MakeField>(root_utils::versionBranchName); *versionField = {podioVersion.major, podioVersion.minor, podioVersion.patch}; auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite(); - auto edmField = - m_metadata->MakeField>>(root_utils::edmDefBranchName); + auto edmField = metadata->MakeField>>(root_utils::edmDefBranchName); *edmField = std::move(edmDefinitions); - auto availableCategoriesField = m_metadata->MakeField>(root_utils::availableCategories); + auto availableCategoriesField = metadata->MakeField>(root_utils::availableCategories); for (auto& [c, _] : m_categories) { availableCategoriesField->push_back(c); } for (auto& [category, collInfo] : m_categories) { - auto idField = m_metadata->MakeField>({root_utils::idTableName(category)}); - *idField = collInfo.id; - auto collectionNameField = m_metadata->MakeField>({root_utils::collectionName(category)}); - *collectionNameField = collInfo.name; - auto collectionTypeField = m_metadata->MakeField>({root_utils::collInfoName(category)}); - *collectionTypeField = collInfo.type; - auto subsetCollectionField = m_metadata->MakeField>({root_utils::subsetCollection(category)}); - *subsetCollectionField = collInfo.isSubsetCollection; - auto schemaVersionField = m_metadata->MakeField>({"schemaVersion_" + category}); - *schemaVersionField = collInfo.schemaVersion; + auto idField = metadata->MakeField>({root_utils::idTableName(category)}); + *idField = collInfo.ids; + auto collectionNameField = metadata->MakeField>({root_utils::collectionName(category)}); + *collectionNameField = collInfo.names; + auto collectionTypeField = metadata->MakeField>({root_utils::collInfoName(category)}); + *collectionTypeField = collInfo.types; + auto subsetCollectionField = metadata->MakeField>({root_utils::subsetCollection(category)}); + *subsetCollectionField = collInfo.subsetCollections; + auto schemaVersionField = metadata->MakeField>({"schemaVersion_" + category}); + *schemaVersionField = collInfo.schemaVersions; } - m_metadata->Freeze(); - m_metadataWriter = - ROOT::Experimental::RNTupleWriter::Append(std::move(m_metadata), root_utils::metaTreeName, *m_file, {}); + metadata->Freeze(); + auto metadataWriter = + ROOT::Experimental::RNTupleWriter::Append(std::move(metadata), root_utils::metaTreeName, *m_file, {}); - m_metadataWriter->Fill(); + metadataWriter->Fill(); m_file->Write(); @@ -311,7 +305,6 @@ void RNTupleWriter::finish() { for (auto& [_, catInfo] : m_categories) { catInfo.writer.reset(); } - m_metadataWriter.reset(); m_finished = true; } @@ -319,7 +312,7 @@ void RNTupleWriter::finish() { std::tuple, std::vector> RNTupleWriter::checkConsistency(const std::vector& collsToWrite, const std::string& category) const { if (const auto it = m_categories.find(category); it != m_categories.end()) { - return root_utils::getInconsistentColls(it->second.name, collsToWrite); + return root_utils::getInconsistentColls(it->second.names, collsToWrite); } return {std::vector{}, collsToWrite}; diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index c5815e4e3..494ec4301 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -138,7 +138,7 @@ void ROOTLegacyReader::openFiles(const std::vector& filenames) { // Check if the CollectionTypeInfo branch is there and assume that the file // has been written with with podio pre #197 (<0.13.1) if that is not the case if (auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo")) { - auto collectionInfo = new std::vector; + auto collectionInfo = new std::vector; if (m_fileVersion < podio::version::Version{0, 16, 4}) { auto oldCollInfo = new std::vector(); @@ -171,7 +171,7 @@ unsigned ROOTLegacyReader::getEntries(const std::string& name) const { return m_chain->GetEntries(); } -void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { +void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { size_t collectionIndex{0}; for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { @@ -207,7 +207,7 @@ void ROOTLegacyReader::createCollectionBranches(const std::vector @@ -21,28 +20,91 @@ namespace podio { std::tuple, std::vector>> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo); + const std::vector& collInfo); std::tuple, std::vector>> createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo); + const std::vector& collInfo); + +/// Helper struct to get the negative offsets from the end of the branches +/// vector for the different types of generic parameters. +template +struct TypeToBranchIndexOffset; + +template <> +struct TypeToBranchIndexOffset { + constexpr static int keys = 8; + constexpr static int values = 7; +}; + +template <> +struct TypeToBranchIndexOffset { + constexpr static int keys = 6; + constexpr static int values = 5; +}; + +template <> +struct TypeToBranchIndexOffset { + constexpr static int keys = 4; + constexpr static int values = 3; +}; + +template <> +struct TypeToBranchIndexOffset { + constexpr static int keys = 2; + constexpr static int values = 1; +}; + +template +void ROOTReader::readParams(ROOTReader::CategoryInfo& catInfo, podio::GenericParameters& params, bool reloadBranches, + unsigned int localEntry) { + const auto nBranches = catInfo.branches.size(); + if (reloadBranches) { + auto& keyBranch = catInfo.branches[nBranches - TypeToBranchIndexOffset::keys].data; + keyBranch = root_utils::getBranch(catInfo.chain.get(), root_utils::getGPKeyName()); + auto& valueBranch = catInfo.branches[nBranches - TypeToBranchIndexOffset::values].data; + valueBranch = root_utils::getBranch(catInfo.chain.get(), root_utils::getGPValueName()); + } + + auto keyBranch = catInfo.branches[nBranches - TypeToBranchIndexOffset::keys].data; + auto valueBranch = catInfo.branches[nBranches - TypeToBranchIndexOffset::values].data; + + root_utils::ParamStorage storage; + keyBranch->SetAddress(storage.keysPtr()); + keyBranch->GetEntry(localEntry); + valueBranch->SetAddress(storage.valuesPtr()); + valueBranch->GetEntry(localEntry); + + for (size_t i = 0; i < storage.keys.size(); ++i) { + params.getMap().emplace(std::move(storage.keys[i]), std::move(storage.values[i])); + } +} GenericParameters ROOTReader::readEntryParameters(ROOTReader::CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry) { - // Parameter branch is always the last one - auto& paramBranches = catInfo.branches.back(); + GenericParameters params; - // Make sure to have a valid branch pointer after switching trees in the chain - // as well as on the first event - if (reloadBranches) { - paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName); + if (m_fileVersion < podio::version::Version{0, 99, 99}) { + // Parameter branch is always the last one + auto& paramBranches = catInfo.branches.back(); + + // Make sure to have a valid branch pointer after switching trees in the chain + // as well as on the first event + if (reloadBranches) { + paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName); + } + auto* branch = paramBranches.data; + + auto* emd = ¶ms; + branch->SetAddress(&emd); + branch->GetEntry(localEntry); + } else { + readParams(catInfo, params, reloadBranches, localEntry); + readParams(catInfo, params, reloadBranches, localEntry); + readParams(catInfo, params, reloadBranches, localEntry); + readParams(catInfo, params, reloadBranches, localEntry); } - auto* branch = paramBranches.data; - GenericParameters params; - auto* emd = ¶ms; - branch->SetAddress(&emd); - branch->GetEntry(localEntry); return params; } @@ -137,7 +199,7 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category)); - auto collInfo = new std::vector(); + auto collInfo = new std::vector(); if (m_fileVersion < podio::version::Version{0, 16, 4}) { auto oldCollInfo = new std::vector(); collInfoBranch->SetAddress(&oldCollInfo); @@ -162,13 +224,25 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category std::tie(catInfo.branches, catInfo.storedClasses) = createCollectionBranches(catInfo.chain.get(), *catInfo.table, *collInfo); } - delete collInfo; // Finally set up the branches for the parameters - root_utils::CollectionBranches paramBranches{}; - paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName); - catInfo.branches.push_back(paramBranches); + if (m_fileVersion < podio::version::Version{0, 99, 99}) { + root_utils::CollectionBranches paramBranches{}; + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName)); + } else { + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::intKeyName)); + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::intValueName)); + + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::floatKeyName)); + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::floatValueName)); + + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::doubleKeyName)); + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::doubleValueName)); + + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::stringKeyName)); + catInfo.branches.emplace_back(root_utils::getBranch(catInfo.chain.get(), root_utils::stringValueName)); + } } std::vector getAvailableCategories(TChain* metaChain) { @@ -253,7 +327,7 @@ std::vector ROOTReader::getAvailableCategories() const { std::tuple, std::vector>> createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo) { + const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; @@ -300,12 +374,12 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable collBranches.emplace_back(std::move(branches)); } - return {collBranches, storedClasses}; + return {std::move(collBranches), storedClasses}; } std::tuple, std::vector>> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo) { + const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; @@ -346,7 +420,7 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, collBranches.emplace_back(std::move(branches)); } - return {collBranches, storedClasses}; + return {std::move(collBranches), storedClasses}; } } // namespace podio diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index b38bc145b..df8e810fd 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -8,6 +8,7 @@ #include "rootUtils.h" #include "TTree.h" +#include namespace podio { @@ -37,7 +38,7 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego catInfo.tree->SetDirectory(m_file.get()); } - std::vector collections; + std::vector collections; collections.reserve(catInfo.collsToWrite.size()); for (const auto& name : catInfo.collsToWrite) { auto* coll = frame.getCollectionForWrite(name); @@ -61,7 +62,8 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content. " + root_utils::getInconsistentCollsMsg(catInfo.collsToWrite, collsToWrite)); } - resetBranches(catInfo.branches, collections, &const_cast(frame.getParameters())); + fillParams(catInfo, frame.getParameters()); + resetBranches(catInfo, collections); } catInfo.tree->Fill(); @@ -76,9 +78,10 @@ ROOTWriter::CategoryInfo& ROOTWriter::getCategoryInfo(const std::string& categor return it->second; } -void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vector& collections, +void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vector& collections, /*const*/ podio::GenericParameters& parameters) { - catInfo.branches.reserve(collections.size() + 1); // collections + parameters + catInfo.branches.reserve(collections.size() + + std::tuple_size_v * 2); // collections + parameters // First collections for (auto& [name, coll] : collections) { @@ -117,28 +120,45 @@ void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vectorgetTypeName(), + catInfo.branches.emplace_back(std::move(branches)); + catInfo.collInfo.emplace_back(catInfo.idTable.collectionID(name).value(), std::string(coll->getTypeName()), coll->isSubsetCollection(), coll->getSchemaVersion()); } - // Also make branches for the parameters - root_utils::CollectionBranches branches; - branches.data = catInfo.tree->Branch(root_utils::paramBranchName, ¶meters); - catInfo.branches.push_back(branches); + fillParams(catInfo, parameters); + catInfo.branches.emplace_back(catInfo.tree->Branch(root_utils::intKeyName, &catInfo.intParams.keys)); + catInfo.branches.emplace_back(catInfo.tree->Branch(root_utils::intValueName, &catInfo.intParams.values)); + + catInfo.branches.emplace_back(catInfo.tree->Branch(root_utils::floatKeyName, &catInfo.floatParams.keys)); + catInfo.branches.emplace_back(catInfo.tree->Branch(root_utils::floatValueName, &catInfo.floatParams.values)); + + catInfo.branches.emplace_back(catInfo.tree->Branch(root_utils::doubleKeyName, &catInfo.doubleParams.keys)); + catInfo.branches.emplace_back(catInfo.tree->Branch(root_utils::doubleValueName, &catInfo.doubleParams.values)); + + catInfo.branches.emplace_back(catInfo.tree->Branch(root_utils::stringKeyName, &catInfo.stringParams.keys)); + catInfo.branches.emplace_back(catInfo.tree->Branch(root_utils::stringValueName, &catInfo.stringParams.values)); } -void ROOTWriter::resetBranches(std::vector& branches, - const std::vector& collections, - /*const*/ podio::GenericParameters* parameters) { +void ROOTWriter::resetBranches(CategoryInfo& categoryInfo, + const std::vector& collections) { size_t iColl = 0; - for (auto& coll : collections) { - const auto& collBranches = branches[iColl]; - root_utils::setCollectionAddresses(coll.second->getBuffers(), collBranches); + for (auto& [_, coll] : collections) { + const auto& collBranches = categoryInfo.branches[iColl]; + root_utils::setCollectionAddresses(coll->getBuffers(), collBranches); iColl++; } - branches.back().data->SetAddress(¶meters); + categoryInfo.branches[iColl].data->SetAddress(categoryInfo.intParams.keysPtr()); + categoryInfo.branches[iColl + 1].data->SetAddress(categoryInfo.intParams.valuesPtr()); + + categoryInfo.branches[iColl + 2].data->SetAddress(categoryInfo.floatParams.keysPtr()); + categoryInfo.branches[iColl + 3].data->SetAddress(categoryInfo.floatParams.valuesPtr()); + + categoryInfo.branches[iColl + 4].data->SetAddress(categoryInfo.doubleParams.keysPtr()); + categoryInfo.branches[iColl + 5].data->SetAddress(categoryInfo.doubleParams.valuesPtr()); + + categoryInfo.branches[iColl + 6].data->SetAddress(categoryInfo.stringParams.keysPtr()); + categoryInfo.branches[iColl + 7].data->SetAddress(categoryInfo.stringParams.valuesPtr()); } void ROOTWriter::finish() { @@ -175,4 +195,11 @@ ROOTWriter::checkConsistency(const std::vector& collsToWrite, const return {std::vector{}, collsToWrite}; } +void ROOTWriter::fillParams(CategoryInfo& catInfo, const GenericParameters& params) { + catInfo.intParams = {params.getKeys(), params.getValues()}; + catInfo.floatParams = {params.getKeys(), params.getValues()}; + catInfo.doubleParams = {params.getKeys(), params.getValues()}; + catInfo.stringParams = {params.getKeys(), params.getValues()}; +} + } // namespace podio diff --git a/src/rootUtils.h b/src/rootUtils.h index 5fb7aa7d3..c9e7c1238 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -1,14 +1,10 @@ #ifndef PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy #define PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy -#include "podio/CollectionBase.h" -#include "podio/CollectionBranches.h" -#include "podio/CollectionBuffers.h" #include "podio/CollectionIDTable.h" +#include "podio/utilities/RootHelpers.h" #include "TBranch.h" -#include "TChain.h" -#include "TClass.h" #include "TTree.h" #include @@ -203,13 +199,6 @@ inline void setCollectionAddresses(const BufferT& collBuffers, const CollectionB } } -// A collection of additional information that describes the collection: the -// collectionID, the collection (data) type, whether it is a subset -// collection, and its schema version -using CollectionInfoT = std::tuple; -// for backwards compatibility -using CollectionInfoWithoutSchemaT = std::tuple; - inline void readBranchesData(const CollectionBranches& branches, Long64_t entry) { // Read all data if (branches.data) { @@ -233,7 +222,7 @@ inline void readBranchesData(const CollectionBranches& branches, Long64_t entry) * collections */ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable const& idTable) { - std::vector collInfo; + std::vector collInfo; for (size_t iColl = 0; iColl < idTable.names().size(); ++iColl) { const auto collID = idTable.ids()[iColl]; diff --git a/src/selection.xml b/src/selection.xml index 03686376f..82acdc661 100644 --- a/src/selection.xml +++ b/src/selection.xml @@ -10,6 +10,7 @@ + @@ -17,6 +18,12 @@ + + + + + + @@ -24,6 +31,7 @@ + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 729248b91..aa417badd 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -59,6 +59,7 @@ set(root_legacy_test_versions v00-16-02 v00-16-05 v00-16-06 + v00-99 ) add_subdirectory(root_io) @@ -66,6 +67,7 @@ if (ENABLE_SIO) set(sio_legacy_test_versions v00-16-05 v00-16-06 + v00-99 ) add_subdirectory(sio_io) diff --git a/tests/input_files/v00-99-example_frame.root.md5 b/tests/input_files/v00-99-example_frame.root.md5 new file mode 100644 index 000000000..5b52257e6 --- /dev/null +++ b/tests/input_files/v00-99-example_frame.root.md5 @@ -0,0 +1 @@ +dbd91affe295c85c1838c40fb9da5013 diff --git a/tests/input_files/v00-99-example_frame.sio.md5 b/tests/input_files/v00-99-example_frame.sio.md5 new file mode 100644 index 000000000..c57b2a6bb --- /dev/null +++ b/tests/input_files/v00-99-example_frame.sio.md5 @@ -0,0 +1 @@ +ad1945557d6b5e02620294c7edac3f99 diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index 6a4069a48..fd9c5c0e9 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -45,7 +45,9 @@ target_link_libraries(read_frame_legacy_root PRIVATE "${root_libs}") foreach(version IN LISTS root_legacy_test_versions) ADD_PODIO_LEGACY_TEST(${version} read_frame_root ${version}-example_frame.root) - ADD_PODIO_LEGACY_TEST(${version} read_frame_legacy_root ${version}-example.root) + if (version MATCHES "^v00-16") + ADD_PODIO_LEGACY_TEST(${version} read_frame_legacy_root ${version}-example.root) + endif() endforeach() #--- Write via python and the ROOT backend and see if we can read it back in in diff --git a/tests/sio_io/CMakeLists.txt b/tests/sio_io/CMakeLists.txt index 7102c36c7..7f53917fe 100644 --- a/tests/sio_io/CMakeLists.txt +++ b/tests/sio_io/CMakeLists.txt @@ -33,5 +33,7 @@ target_link_libraries(read_frame_legacy_sio PRIVATE "${sio_libs}" TestDataModel) foreach(version IN LISTS sio_legacy_test_versions) ADD_PODIO_LEGACY_TEST(${version} read_frame_sio ${version}-example_frame.sio) - ADD_PODIO_LEGACY_TEST(${version} read_frame_legacy_sio ${version}-example.sio) + if (version MATCHES "^v00-16") + ADD_PODIO_LEGACY_TEST(${version} read_frame_legacy_sio ${version}-example.sio) + endif() endforeach()