diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index b14a08d17..7a70145e9 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -71,6 +71,17 @@ class ROOTReader { /// /// @param filenames The filenames of all input files that should be read void openFiles(const std::vector& filenames); + + /** + * Open trees for reading from the specified TDirectory. + * + * This can be used with a TMemFile for in-memory operation via streaming. + * The specified directory should contain all trees including metadata + * and category trees. + * + * @param dir The TDirectory to look for the podio trees in. + */ + void openTDirectory(TDirectory *dir); /// Read the next data entry for a given category. /// @@ -127,6 +138,9 @@ class ROOTReader { } private: + + void readMetaData(); + /// Helper struct to group together all the necessary state to read / process /// a given category. A "category" in this case describes all frames with the /// same name which are constrained by the ROOT file structure that we use to @@ -134,9 +148,15 @@ class ROOTReader { /// reading from a TTree / TChain (i.e. collection infos, branches, ...) struct CategoryInfo { /// constructor from chain for more convenient map insertion - CategoryInfo(std::unique_ptr&& c) : chain(std::move(c)) { - } - std::unique_ptr chain{nullptr}; ///< The TChain with the data + CategoryInfo() : chain("unused"){} + + // The copy constructor and assignment operators are explicitly deleted + // here since TChain has these declared private and therefore inaccessible. + CategoryInfo(const podio::ROOTReader::CategoryInfo&) = delete; + CategoryInfo& operator=(const podio::ROOTReader::CategoryInfo&) = delete; + + TChain chain; ///< The TChain with the data (if reading from files) + TTree *tree = {nullptr}; ///< The TTree with the data (use this, not chain!) unsigned entry{0}; ///< The next entry to read std::vector> storedClasses{}; ///< The stored collections in this ///< category @@ -166,7 +186,8 @@ class ROOTReader { podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl, bool reloadBranches, unsigned int localEntry); - std::unique_ptr m_metaChain{nullptr}; ///< The metadata tree + TTree* m_metaTree{nullptr}; ///< The metadata tree (use this to access) + TChain m_metaChain{"unused"}; ///< A TChain (only used if reading from files. m_metaTree will point to this if needed) std::unordered_map m_categories{}; ///< All categories std::vector m_availCategories{}; ///< All available categories from this file diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 928000c7a..06eeac5be 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -20,11 +20,11 @@ namespace podio { std::tuple, std::vector>> -createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, +createCollectionBranches(TTree* tree, const podio::CollectionIDTable& idTable, const std::vector& collInfo); std::tuple, std::vector>> -createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, +createCollectionBranchesIndexBased(TTree* tree, const podio::CollectionIDTable& idTable, const std::vector& collInfo); GenericParameters ROOTReader::readEntryParameters(ROOTReader::CategoryInfo& catInfo, bool reloadBranches, @@ -35,7 +35,7 @@ GenericParameters ROOTReader::readEntryParameters(ROOTReader::CategoryInfo& catI // Make sure to have a valid branch pointer after switching trees in the chain // as well as on the first event if (reloadBranches) { - paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName); + paramBranches.data = root_utils::getBranch(catInfo.tree, root_utils::paramBranchName); } auto* branch = paramBranches.data; @@ -58,22 +58,30 @@ std::unique_ptr ROOTReader::readEntry(const std::string& name, co } std::unique_ptr ROOTReader::readEntry(ROOTReader::CategoryInfo& catInfo) { - if (!catInfo.chain) { + if (!catInfo.tree) { return nullptr; } - if (catInfo.entry >= catInfo.chain->GetEntries()) { + if (catInfo.entry >= catInfo.tree->GetEntries()) { return nullptr; } - // After switching trees in the chain, branch pointers get invalidated so - // they need to be reassigned. - // NOTE: root 6.22/06 requires that we get completely new branches here, - // with 6.20/04 we could just re-set them - const auto preTreeNo = catInfo.chain->GetTreeNumber(); - const auto localEntry = catInfo.chain->LoadTree(catInfo.entry); - const auto treeChange = catInfo.chain->GetTreeNumber() != preTreeNo; - // Also need to make sure to handle the first event - const auto reloadBranches = treeChange || localEntry == 0; + // Initialize assuming catInfo.tree is a TTree and not a TChain + auto localEntry = catInfo.entry; + auto reloadBranches = (localEntry == 0); + + // Handle case when catInfo.tree actually points to a TChain + if(catInfo.tree->IsA() == TChain::Class()){ + // After switching trees in the chain, branch pointers get invalidated so + // they need to be reassigned. + // NOTE: root 6.22/06 requires that we get completely new branches here, + // with 6.20/04 we could just re-set them + auto chain = static_cast(catInfo.tree); + const auto preTreeNo = chain->GetTreeNumber(); + localEntry = chain->LoadTree(catInfo.entry); + const auto treeChange = chain->GetTreeNumber() != preTreeNo; + // Also need to make sure to handle the first event + reloadBranches = treeChange || localEntry == 0; + } ROOTFrameData::BufferMap buffers; for (size_t i = 0; i < catInfo.storedClasses.size(); ++i) { @@ -99,7 +107,7 @@ podio::CollectionReadBuffers ROOTReader::getCollectionBuffers(ROOTReader::Catego auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{}); if (reloadBranches) { - root_utils::resetBranches(catInfo.chain.get(), branches, name); + root_utils::resetBranches(catInfo.tree, branches, name); } // set the addresses and read the data @@ -121,9 +129,9 @@ ROOTReader::CategoryInfo& ROOTReader::getCategoryInfo(const std::string& categor return it->second; } - // Use a nullptr TChain to signify an invalid category request + // Create empty CategoryInfo to signify an invalid category request // TODO: Warn / log - static auto invalidCategory = CategoryInfo{nullptr}; + static auto invalidCategory = CategoryInfo(); return invalidCategory; } @@ -131,11 +139,11 @@ ROOTReader::CategoryInfo& ROOTReader::getCategoryInfo(const std::string& categor void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category) { catInfo.table = std::make_shared(); auto* table = catInfo.table.get(); - auto* tableBranch = root_utils::getBranch(m_metaChain.get(), root_utils::idTableName(category)); + auto* tableBranch = root_utils::getBranch(m_metaTree, root_utils::idTableName(category)); tableBranch->SetAddress(&table); tableBranch->GetEntry(0); - auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category)); + auto* collInfoBranch = root_utils::getBranch(m_metaTree, root_utils::collInfoName(category)); auto collInfo = new std::vector(); if (m_fileVersion < podio::version::Version{0, 16, 4}) { @@ -157,22 +165,22 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category // from older versions if (m_fileVersion < podio::version::Version{0, 16, 99}) { std::tie(catInfo.branches, catInfo.storedClasses) = - createCollectionBranchesIndexBased(catInfo.chain.get(), *catInfo.table, *collInfo); + createCollectionBranchesIndexBased(catInfo.tree, *catInfo.table, *collInfo); } else { std::tie(catInfo.branches, catInfo.storedClasses) = - createCollectionBranches(catInfo.chain.get(), *catInfo.table, *collInfo); + createCollectionBranches(catInfo.tree, *catInfo.table, *collInfo); } delete collInfo; // Finally set up the branches for the parameters root_utils::CollectionBranches paramBranches{}; - paramBranches.data = root_utils::getBranch(catInfo.chain.get(), root_utils::paramBranchName); + paramBranches.data = root_utils::getBranch(catInfo.tree, root_utils::paramBranchName); catInfo.branches.push_back(paramBranches); } -std::vector getAvailableCategories(TChain* metaChain) { - auto* branches = metaChain->GetListOfBranches(); +std::vector getAvailableCategories(TTree* metaTree) { + auto* branches = metaTree->GetListOfBranches(); std::vector brNames; brNames.reserve(branches->GetEntries()); @@ -189,54 +197,87 @@ std::vector getAvailableCategories(TChain* metaChain) { return brNames; } +/// @brief Read version and data model from the m_metaTree +void ROOTReader::readMetaData() { + podio::version::Version* versionPtr{nullptr}; + if (auto* versionBranch = root_utils::getBranch(m_metaTree, root_utils::versionBranchName)) { + versionBranch->SetAddress(&versionPtr); + versionBranch->GetEntry(0); + } + m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; + delete versionPtr; + + if (auto* edmDefBranch = root_utils::getBranch(m_metaTree, root_utils::edmDefBranchName)) { + auto* datamodelDefs = new DatamodelDefinitionHolder::MapType{}; + edmDefBranch->SetAddress(&datamodelDefs); + edmDefBranch->GetEntry(0); + m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs)); + delete datamodelDefs; + } +} + void ROOTReader::openFile(const std::string& filename) { openFiles({filename}); } void ROOTReader::openFiles(const std::vector& filenames) { - m_metaChain = std::make_unique(root_utils::metaTreeName); + m_metaChain.SetName(root_utils::metaTreeName); // NOTE: We simply assume that the meta data doesn't change throughout the // chain! This essentially boils down to the assumption that all files that // are read this way were written with the same settings. // Reading all files is done to check that all file exists for (const auto& filename : filenames) { - if (!m_metaChain->Add(filename.c_str(), -1)) { + if (!m_metaChain.Add(filename.c_str(), -1)) { throw std::runtime_error("File " + filename + " couldn't be found or the \"" + root_utils::metaTreeName + "\" tree couldn't be read."); } } - podio::version::Version* versionPtr{nullptr}; - if (auto* versionBranch = root_utils::getBranch(m_metaChain.get(), root_utils::versionBranchName)) { - versionBranch->SetAddress(&versionPtr); - versionBranch->GetEntry(0); - } - m_fileVersion = versionPtr ? *versionPtr : podio::version::Version{0, 0, 0}; - delete versionPtr; + // Make m_metaTree point to m_metaChain. It is done this way in order + // to support both cases when files are used or a memory-resident TTree + // is used which cannot be part of a TChain. + m_metaTree = &m_metaChain; - if (auto* edmDefBranch = root_utils::getBranch(m_metaChain.get(), root_utils::edmDefBranchName)) { - auto* datamodelDefs = new DatamodelDefinitionHolder::MapType{}; - edmDefBranch->SetAddress(&datamodelDefs); - edmDefBranch->GetEntry(0); - m_datamodelHolder = DatamodelDefinitionHolder(std::move(*datamodelDefs)); - delete datamodelDefs; - } + // Read in version and data model info + readMetaData(); // Do some work up front for setting up categories and setup all the chains // and record the available categories. The rest of the setup follows on // demand when the category is first read - m_availCategories = ::podio::getAvailableCategories(m_metaChain.get()); + m_availCategories = ::podio::getAvailableCategories(m_metaTree); for (const auto& cat : m_availCategories) { - auto [it, _] = m_categories.try_emplace(cat, std::make_unique(cat.c_str())); + auto [it, _] = m_categories.try_emplace(cat); + it->second.chain.SetName(cat.c_str()); for (const auto& fn : filenames) { - it->second.chain->Add(fn.c_str()); + it->second.chain.Add(fn.c_str()); + } + it->second.tree = &it->second.chain; // Make the tree point to our internal chain + } +} + +void ROOTReader::openTDirectory(TDirectory *dir) { + + m_metaTree = dynamic_cast(dir->Get(root_utils::metaTreeName)); + + // Read in version and data model info + readMetaData(); + + // Do some work up front for setting up categories and setup all the chains + // and record the available categories. The rest of the setup follows on + // demand when the category is first read + m_availCategories = ::podio::getAvailableCategories(m_metaTree); + for (const auto& cat : m_availCategories) { + auto tree = dynamic_cast(dir->Get(cat.c_str())); + if( tree ){ + auto [it, _] = m_categories.try_emplace(cat); + it->second.tree = tree; } } } unsigned ROOTReader::getEntries(const std::string& name) const { if (auto it = m_categories.find(name); it != m_categories.end()) { - return it->second.chain->GetEntries(); + return it->second.tree->GetEntries(); } return 0; @@ -252,7 +293,7 @@ std::vector ROOTReader::getAvailableCategories() const { } std::tuple, std::vector>> -createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, +createCollectionBranchesIndexBased(TTree* tree, const podio::CollectionIDTable& idTable, const std::vector& collInfo) { size_t collectionIndex{0}; @@ -275,23 +316,23 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable if (isSubsetColl) { // Only one branch will exist and we can trivially get its name auto brName = root_utils::refBranch(name, 0); - branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.refs.push_back(root_utils::getBranch(tree, brName.c_str())); branches.refNames.emplace_back(std::move(brName)); } else { // This branch is guaranteed to exist since only collections that are // also written to file are in the info metadata that we work with here - branches.data = root_utils::getBranch(chain, name.c_str()); + branches.data = root_utils::getBranch(tree, name.c_str()); const auto buffers = collection->getBuffers(); for (size_t i = 0; i < buffers.references->size(); ++i) { auto brName = root_utils::refBranch(name, i); - branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.refs.push_back(root_utils::getBranch(tree, brName.c_str())); branches.refNames.emplace_back(std::move(brName)); } for (size_t i = 0; i < buffers.vectorMembers->size(); ++i) { auto brName = root_utils::vecBranch(name, i); - branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.vecs.push_back(root_utils::getBranch(tree, brName.c_str())); branches.vecNames.emplace_back(std::move(brName)); } } @@ -304,7 +345,7 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable } std::tuple, std::vector>> -createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, +createCollectionBranches(TTree* tree, const podio::CollectionIDTable& idTable, const std::vector& collInfo) { size_t collectionIndex{0}; @@ -322,22 +363,22 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, if (isSubsetColl) { // Only one branch will exist and we can trivially get its name auto brName = root_utils::subsetBranch(name); - branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.refs.push_back(root_utils::getBranch(tree, brName.c_str())); branches.refNames.emplace_back(std::move(brName)); } else { // This branch is guaranteed to exist since only collections that are // also written to file are in the info metadata that we work with here - branches.data = root_utils::getBranch(chain, name.c_str()); + branches.data = root_utils::getBranch(tree, name.c_str()); const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(collType); for (const auto& relName : relVecNames.relations) { auto brName = root_utils::refBranch(name, relName); - branches.refs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.refs.push_back(root_utils::getBranch(tree, brName.c_str())); branches.refNames.emplace_back(std::move(brName)); } for (const auto& vecName : relVecNames.vectorMembers) { auto brName = root_utils::refBranch(name, vecName); - branches.vecs.push_back(root_utils::getBranch(chain, brName.c_str())); + branches.vecs.push_back(root_utils::getBranch(tree, brName.c_str())); branches.vecNames.emplace_back(std::move(brName)); } } diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 396c32b78..297fc7409 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -15,6 +15,7 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ check_benchmark_outputs read_frame_legacy_root read_frame_root_multiple + read_frame_root_tdirectory write_python_frame_root read_python_frame_root read_and_write_frame_root diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index 1bc906755..f30dcf915 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -5,6 +5,7 @@ set(root_dependent_tests write_frame_root.cpp read_python_frame_root.cpp read_frame_root_multiple.cpp + read_frame_root_tdirectory.cpp read_and_write_frame_root.cpp ) if(ENABLE_RNTUPLE) @@ -23,6 +24,7 @@ endforeach() set_tests_properties( read_frame_root read_frame_root_multiple + read_frame_root_tdirectory read_and_write_frame_root PROPERTIES diff --git a/tests/root_io/read_frame_root_tdirectory.cpp b/tests/root_io/read_frame_root_tdirectory.cpp new file mode 100644 index 000000000..de591ce43 --- /dev/null +++ b/tests/root_io/read_frame_root_tdirectory.cpp @@ -0,0 +1,93 @@ +// +// This duplicated from read_frame_root_multiple.cpp and modified to check +// ROOTReader::openTDirectory +// + +#include "read_frame.h" + +#include "podio/ROOTReader.h" + +#include "TFile.h" + +int read_frames(podio::ROOTReader& reader) { + if (reader.currentFileVersion() != podio::version::build_version) { + std::cerr << "The podio build version could not be read back correctly. " + << "(expected:" << podio::version::build_version << ", actual: " << reader.currentFileVersion() << ")" + << std::endl; + return 1; + } + + if (reader.getEntries("events") != 10) { + std::cerr << "Could not read back the number of events correctly. " + << "(expected:" << 10 << ", actual: " << reader.getEntries("events") << ")" << std::endl; + return 1; + } + + if (reader.getEntries("events") != reader.getEntries("other_events")) { + std::cerr << "Could not read back the number of events correctly. " + << "(expected:" << 10 << ", actual: " << reader.getEntries("other_events") << ")" << std::endl; + return 1; + } + + // Read the frames in a different order than when writing them here to make + // sure that the writing/reading order does not impose any usage requirements + for (size_t i = 0; i < reader.getEntries("events"); ++i) { + auto frame = podio::Frame(reader.readNextEntry("events")); + if (frame.get("emptySubsetColl") == nullptr) { + std::cerr << "Could not retrieve an empty subset collection" << std::endl; + return 1; + } + if (frame.get("emptyCollection") == nullptr) { + std::cerr << "Could not retrieve an empty collection" << std::endl; + return 1; + } + + processEvent(frame, (i % 10), reader.currentFileVersion()); + + auto otherFrame = podio::Frame(reader.readNextEntry("other_events")); + processEvent(otherFrame, (i % 10) + 100, reader.currentFileVersion()); + // The other_events category also holds external collections + processExtensions(otherFrame, (i % 10) + 100, reader.currentFileVersion()); + } + + if (reader.readNextEntry("events")) { + std::cerr << "Trying to read more frame data than is present should return a nullptr" << std::endl; + return 1; + } + + std::cout << "========================================================\n" << std::endl; + if (reader.readNextEntry("not_present")) { + std::cerr << "Trying to read non-existant frame data should return a nullptr" << std::endl; + return 1; + } + + // Reading specific (jumping to) entry + { + auto frame = podio::Frame(reader.readEntry("events", 4)); + processEvent(frame, 4, reader.currentFileVersion()); + // Reading the next entry after jump, continues from after the jump + auto nextFrame = podio::Frame(reader.readNextEntry("events")); + processEvent(nextFrame, 5, reader.currentFileVersion()); + + // Jumping back also works + auto previousFrame = podio::Frame(reader.readEntry("other_events", 2)); + processEvent(previousFrame, 2 + 100, reader.currentFileVersion()); + processExtensions(previousFrame, 2 + 100, reader.currentFileVersion()); + } + + // Trying to read a Frame that is not present returns a nullptr + if (reader.readEntry("events", 30)) { + std::cerr << "Trying to read a specific entry that does not exist should return a nullptr" << std::endl; + return 1; + } + + return 0; +} + +int main() { + std::cout << "- - - openTDirectory" << std::endl; + auto reader = podio::ROOTReader(); + TFile f("example_frame.root"); + reader.openTDirectory(&f); + return read_frames(reader); +}