From 364446880a8236fd6a480f020fc0109759866059 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Fri, 22 Sep 2023 17:03:07 +0200 Subject: [PATCH] Fix reading relations to unpersisted objects (#486) * Switch test case from EventStore to Frame based I/O * Default initialize ObjectIDs to untracked * Add check for existence before getting collections * Properly lock the collection ID table * Make CollectionIDTable return optional for some queries * Make sure that each category gets its own id table --- include/podio/CollectionIDTable.h | 8 ++- include/podio/Frame.h | 11 ++-- include/podio/ObjectID.h | 10 ++-- include/podio/ROOTNTupleReader.h | 2 +- python/templates/Collection.cc.jinja2 | 4 +- python/templates/Obj.cc.jinja2 | 4 +- .../templates/macros/implementations.jinja2 | 2 +- src/CollectionIDTable.cc | 31 +++++++---- src/EventStore.cc | 4 +- src/ROOTFrameReader.cc | 4 +- src/ROOTFrameWriter.cc | 4 +- src/ROOTLegacyReader.cc | 2 +- src/ROOTNTupleReader.cc | 8 +-- src/ROOTReader.cc | 4 +- src/ROOTWriter.cc | 2 +- src/SIOBlock.cc | 2 +- src/SIOFrameData.cc | 2 +- src/SIOReader.cc | 2 +- src/sioUtils.h | 2 +- tests/root_io/read_and_write_associated.cpp | 51 ++++++++----------- 20 files changed, 83 insertions(+), 76 deletions(-) diff --git a/include/podio/CollectionIDTable.h b/include/podio/CollectionIDTable.h index 39b947c2e..90d35c5ed 100644 --- a/include/podio/CollectionIDTable.h +++ b/include/podio/CollectionIDTable.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -26,14 +27,17 @@ class CollectionIDTable { CollectionIDTable(const std::vector& ids, const std::vector& names); /// return collection ID for given name - uint32_t collectionID(const std::string& name) const; + std::optional collectionID(const std::string& name) const; /// return name for given collection ID - const std::string name(uint32_t collectionID) const; + std::optional name(uint32_t collectionID) const; /// Check if collection name is known bool present(const std::string& name) const; + /// Check if collection ID is known + bool present(uint32_t collectionID) const; + /// return registered names const std::vector& names() const { return m_names; diff --git a/include/podio/Frame.h b/include/podio/Frame.h index 7b6f39c7a..121473e97 100644 --- a/include/podio/Frame.h +++ b/include/podio/Frame.h @@ -380,7 +380,7 @@ podio::CollectionBase* Frame::FrameModel::doGet(const std::string& n } coll->prepareAfterRead(); - coll->setID(m_idTable.collectionID(name)); + coll->setID(m_idTable.collectionID(name).value()); { std::lock_guard mapLock{*m_mapMtx}; auto [it, success] = m_collections.emplace(name, std::move(coll)); @@ -400,17 +400,20 @@ podio::CollectionBase* Frame::FrameModel::doGet(const std::string& n template bool Frame::FrameModel::get(uint32_t collectionID, CollectionBase*& collection) const { - const auto& name = m_idTable.name(collectionID); + const auto name = m_idTable.name(collectionID); + if (!name) { + return false; + } const auto& [_, inserted] = m_retrievedIDs.insert(collectionID); if (inserted) { - auto coll = doGet(name); + auto coll = doGet(name.value()); if (coll) { collection = coll; return true; } } else { - auto coll = doGet(name, false); + auto coll = doGet(name.value(), false); if (coll) { collection = coll; return true; diff --git a/include/podio/ObjectID.h b/include/podio/ObjectID.h index 4347a5ba9..7f179f9f6 100644 --- a/include/podio/ObjectID.h +++ b/include/podio/ObjectID.h @@ -8,16 +8,16 @@ namespace podio { class ObjectID { public: - /// index of object in collection - int index; - /// ID of the collection - uint32_t collectionID; - /// not part of a collection static const int untracked = -1; /// invalid or non-available object static const int invalid = -2; + /// index of object in collection + int index{untracked}; + /// ID of the collection + uint32_t collectionID{static_cast(untracked)}; + /// index and collectionID uniquely defines the object. /// this operator is necessary for meaningful comparisons in python bool operator==(const ObjectID& other) const { diff --git a/include/podio/ROOTNTupleReader.h b/include/podio/ROOTNTupleReader.h index a25f66f2a..672840587 100644 --- a/include/podio/ROOTNTupleReader.h +++ b/include/podio/ROOTNTupleReader.h @@ -96,7 +96,7 @@ class ROOTNTupleReader { std::vector m_availableCategories{}; - std::shared_ptr m_table{}; + std::unordered_map> m_idTables{}; }; } // namespace podio diff --git a/python/templates/Collection.cc.jinja2 b/python/templates/Collection.cc.jinja2 index 1eac8fd99..93f691b06 100644 --- a/python/templates/Collection.cc.jinja2 +++ b/python/templates/Collection.cc.jinja2 @@ -29,10 +29,10 @@ {% with collection_type = class.bare_type + 'Collection' %} {{ collection_type }}::{{ collection_type }}() : - m_isValid(false), m_isPrepared(false), m_isSubsetColl(false), m_collectionID(0), m_storageMtx(std::make_unique()), m_storage() {} + m_isValid(false), m_isPrepared(false), m_isSubsetColl(false), m_collectionID(podio::ObjectID::untracked), m_storageMtx(std::make_unique()), m_storage() {} {{ collection_type }}::{{ collection_type }}({{ collection_type }}Data&& data, bool isSubsetColl) : - m_isValid(false), m_isPrepared(false), m_isSubsetColl(isSubsetColl), m_collectionID(0), m_storageMtx(std::make_unique()), m_storage(std::move(data)) {} + m_isValid(false), m_isPrepared(false), m_isSubsetColl(isSubsetColl), m_collectionID(podio::ObjectID::untracked), m_storageMtx(std::make_unique()), m_storage(std::move(data)) {} {{ collection_type }}::~{{ collection_type }}() { // Need to tell the storage how to clean-up diff --git a/python/templates/Obj.cc.jinja2 b/python/templates/Obj.cc.jinja2 index 58574057d..d651c4ecf 100644 --- a/python/templates/Obj.cc.jinja2 +++ b/python/templates/Obj.cc.jinja2 @@ -16,7 +16,7 @@ {{ utils.namespace_open(class.namespace) }} {% with obj_type = class.bare_type + 'Obj' %} {{ obj_type }}::{{ obj_type }}() : -{% raw %} ObjBase{{podio::ObjectID::untracked, 0}, 0}{% endraw %}, +{% raw %} ObjBase{{}, 0}{% endraw %}, data(){{ single_relations_initialize(OneToOneRelations) }} {%- for relation in OneToManyRelations + VectorMembers %}, m_{{ relation.name }}(new std::vector<{{ relation.full_type }}>()) @@ -29,7 +29,7 @@ { } {{ obj_type }}::{{ obj_type }}(const {{ obj_type }}& other) : -{% raw %} ObjBase{{podio::ObjectID::untracked, 0}, 0}{% endraw %}, +{% raw %} ObjBase{{}, 0}{% endraw %}, data(other.data){{ single_relations_initialize(OneToOneRelations) }} {%- for relation in OneToManyRelations + VectorMembers %}, m_{{ relation.name }}(new std::vector<{{ relation.full_type }}>(*(other.m_{{ relation.name }}))) diff --git a/python/templates/macros/implementations.jinja2 b/python/templates/macros/implementations.jinja2 index 096a93ae5..987fb350c 100644 --- a/python/templates/macros/implementations.jinja2 +++ b/python/templates/macros/implementations.jinja2 @@ -164,7 +164,7 @@ const podio::ObjectID {{ full_type }}::getObjectID() const { if (m_obj) { return m_obj->id; } - return podio::ObjectID{podio::ObjectID::invalid, 0}; + return podio::ObjectID{}; } {% set inverse_type = class.bare_type if prefix else 'Mutable' + class.bare_type %} diff --git a/src/CollectionIDTable.cc b/src/CollectionIDTable.cc index fa97a4bbb..ace8a91fe 100644 --- a/src/CollectionIDTable.cc +++ b/src/CollectionIDTable.cc @@ -1,10 +1,11 @@ // podio specific includes #include "podio/CollectionIDTable.h" -#include -#include #include "MurmurHash3.h" +#include +#include + namespace podio { CollectionIDTable::CollectionIDTable() : m_mutex(std::make_unique()) { @@ -18,22 +19,28 @@ CollectionIDTable::CollectionIDTable(const std::vector& ids, const std m_collectionIDs(ids), m_names(names), m_mutex(std::make_unique()) { } -const std::string CollectionIDTable::name(uint32_t ID) const { - std::lock_guard lock(*m_mutex); +std::optional CollectionIDTable::name(uint32_t ID) const { + std::lock_guard lock{*m_mutex}; const auto result = std::find(begin(m_collectionIDs), end(m_collectionIDs), ID); const auto index = std::distance(m_collectionIDs.begin(), result); + if (index >= static_cast(m_names.size())) { + return std::nullopt; + } return m_names[index]; } -uint32_t CollectionIDTable::collectionID(const std::string& name) const { - std::lock_guard lock(*m_mutex); +std::optional CollectionIDTable::collectionID(const std::string& name) const { + std::lock_guard lock{*m_mutex}; const auto result = std::find(begin(m_names), end(m_names), name); const auto index = std::distance(m_names.begin(), result); + if (index >= static_cast(m_collectionIDs.size())) { + return std::nullopt; + } return m_collectionIDs[index]; } void CollectionIDTable::print() const { - std::lock_guard lock(*m_mutex); + std::lock_guard lock{*m_mutex}; std::cout << "CollectionIDTable" << std::endl; for (unsigned i = 0; i < m_names.size(); ++i) { std::cout << "\t" << m_names[i] << " : " << m_collectionIDs[i] << std::endl; @@ -41,13 +48,15 @@ void CollectionIDTable::print() const { } bool CollectionIDTable::present(const std::string& name) const { - std::lock_guard lock(*m_mutex); - const auto result = std::find(begin(m_names), end(m_names), name); - return result != end(m_names); + return collectionID(name).has_value(); +} + +bool CollectionIDTable::present(uint32_t collectionID) const { + return name(collectionID).has_value(); } uint32_t CollectionIDTable::add(const std::string& name) { - std::lock_guard lock(*m_mutex); + std::lock_guard lock{*m_mutex}; const auto result = std::find(begin(m_names), end(m_names), name); uint32_t ID = 0; if (result == m_names.end()) { diff --git a/src/EventStore.cc b/src/EventStore.cc index 831cc7030..dde8961f8 100644 --- a/src/EventStore.cc +++ b/src/EventStore.cc @@ -20,12 +20,12 @@ bool EventStore::get(uint32_t id, CollectionBase*& collection) const { bool success = false; if (val.second == true) { // collection not yet retrieved in recursive-call - auto name = m_table->name(id); + auto name = m_table->name(id).value(); success = doGet(name, collection, true); } else { // collection already requested in recursive call // do not set the references to break collection dependency-cycle - auto name = m_table->name(id); + auto name = m_table->name(id).value(); success = doGet(name, collection, false); } // fg: the set should only be cleared at the end of event (in clear() ) ... diff --git a/src/ROOTFrameReader.cc b/src/ROOTFrameReader.cc index 9490a099e..cb1c24aea 100644 --- a/src/ROOTFrameReader.cc +++ b/src/ROOTFrameReader.cc @@ -263,7 +263,7 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here - const auto name = idTable.name(collID); + const auto name = idTable.name(collID).value(); const auto collectionClass = TClass::GetClass(collType.c_str()); // Need the collection here to setup all the branches. Have to manage the @@ -315,7 +315,7 @@ createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { // We only write collections that are in the collectionIDTable, so no need // to check here - const auto name = idTable.name(collID); + const auto name = idTable.name(collID).value(); root_utils::CollectionBranches branches{}; if (isSubsetColl) { diff --git a/src/ROOTFrameWriter.cc b/src/ROOTFrameWriter.cc index af7126596..48a47b482 100644 --- a/src/ROOTFrameWriter.cc +++ b/src/ROOTFrameWriter.cc @@ -105,8 +105,8 @@ void ROOTFrameWriter::initBranches(CategoryInfo& catInfo, const std::vectorgetTypeName(), coll->isSubsetCollection(), - coll->getSchemaVersion()); + catInfo.collInfo.emplace_back(catInfo.idTable.collectionID(name).value(), coll->getTypeName(), + coll->isSubsetCollection(), coll->getSchemaVersion()); } // Also make branches for the parameters diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index 6a8b55a3b..5a11f8cdc 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -177,7 +177,7 @@ void ROOTLegacyReader::createCollectionBranches(const std::vectorname(collID); + const auto name = m_table->name(collID).value(); root_utils::CollectionBranches branches{}; const auto collectionClass = TClass::GetClass(collType.c_str()); diff --git a/src/ROOTNTupleReader.cc b/src/ROOTNTupleReader.cc index 299f88da3..4b020b314 100644 --- a/src/ROOTNTupleReader.cc +++ b/src/ROOTNTupleReader.cc @@ -59,6 +59,9 @@ bool ROOTNTupleReader::initCategory(const std::string& category) { auto schemaVersion = m_metadata_readers[filename]->GetView>("schemaVersion_" + category); m_collectionInfo[category].schemaVersion = schemaVersion(0); + m_idTables[category] = + std::make_shared(m_collectionInfo[category].id, m_collectionInfo[category].name); + return true; } @@ -176,11 +179,8 @@ std::unique_ptr ROOTNTupleReader::readEntry(const std::string& ca m_readers[category][0]->LoadEntry(entNum); auto parameters = readEventMetaData(category, entNum); - if (!m_table) { - m_table = std::make_shared(m_collectionInfo[category].id, m_collectionInfo[category].name); - } - return std::make_unique(std::move(buffers), m_table, std::move(parameters)); + return std::make_unique(std::move(buffers), m_idTables[category], std::move(parameters)); } } // namespace podio diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index a366962ac..5b91378fe 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -129,7 +129,7 @@ CollectionBase* ROOTReader::readCollectionData(const root_utils::CollectionBranc } // do the unpacking - const auto id = m_table->collectionID(name); + const auto id = m_table->collectionID(name).value(); collection->setID(id); collection->prepareAfterRead(); @@ -249,7 +249,7 @@ void ROOTReader::createCollectionBranches(const std::vectorname(collID); + const auto name = m_table->name(collID).value(); root_utils::CollectionBranches branches{}; const auto collectionClass = TClass::GetClass(collType.c_str()); diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 6159c52f0..73e9f8e1f 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -106,7 +106,7 @@ void ROOTWriter::finish() { std::vector collectionInfo; collectionInfo.reserve(m_collectionsToWrite.size()); for (const auto& name : m_collectionsToWrite) { - const auto collID = collIDTable->collectionID(name); + const auto collID = collIDTable->collectionID(name).value(); const podio::CollectionBase* coll{nullptr}; // No check necessary, only registered collections possible m_store->get(name, coll); diff --git a/src/SIOBlock.cc b/src/SIOBlock.cc index d59dc5fae..efb26c867 100644 --- a/src/SIOBlock.cc +++ b/src/SIOBlock.cc @@ -24,7 +24,7 @@ SIOCollectionIDTableBlock::SIOCollectionIDTableBlock(podio::EventStore* store) : if (!store->get(id, tmp)) { std::cerr << "PODIO-ERROR cannot construct CollectionIDTableBlock because a collection is missing from the store (id: " - << id << ", name: " << table->name(id) << ")" << std::endl; + << id << ", name: " << table->name(id).value_or("") << ")" << std::endl; } _types.emplace_back(tmp->getValueTypeName()); diff --git a/src/SIOFrameData.cc b/src/SIOFrameData.cc index 1d2b08b39..e6170f7a0 100644 --- a/src/SIOFrameData.cc +++ b/src/SIOFrameData.cc @@ -42,7 +42,7 @@ std::vector SIOFrameData::getAvailableCollections() { // no guarantee that it coincides with the index in the blocks. // Additionally, collection indices start at 1 const auto collID = m_idTable.ids()[i - 1]; - collections.push_back(m_idTable.name(collID)); + collections.push_back(m_idTable.name(collID).value()); } } diff --git a/src/SIOReader.cc b/src/SIOReader.cc index 69c398e10..0ad0d9e15 100644 --- a/src/SIOReader.cc +++ b/src/SIOReader.cc @@ -30,7 +30,7 @@ CollectionBase* SIOReader::readCollection(const std::string& name) { std::find_if(begin(m_inputs), end(m_inputs), [&name](const SIOReader::Input& t) { return t.second == name; }); if (p != end(m_inputs)) { - p->first->setID(m_table->collectionID(name)); + p->first->setID(m_table->collectionID(name).value()); p->first->prepareAfterRead(); return p->first; } diff --git a/src/sioUtils.h b/src/sioUtils.h index 82297456b..352a338b6 100644 --- a/src/sioUtils.h +++ b/src/sioUtils.h @@ -51,7 +51,7 @@ namespace sio_utils { for (const auto& [name, coll] : collections) { names.emplace_back(name); - ids.emplace_back(collIdTable.collectionID(name)); + ids.emplace_back(collIdTable.collectionID(name).value()); types.emplace_back(coll->getValueTypeName()); subsetColl.emplace_back(coll->isSubsetCollection()); } diff --git a/tests/root_io/read_and_write_associated.cpp b/tests/root_io/read_and_write_associated.cpp index 412c06838..00388ea42 100644 --- a/tests/root_io/read_and_write_associated.cpp +++ b/tests/root_io/read_and_write_associated.cpp @@ -1,6 +1,6 @@ -#include "podio/EventStore.h" -#include "podio/ROOTReader.h" -#include "podio/ROOTWriter.h" +#include "podio/Frame.h" +#include "podio/ROOTFrameReader.h" +#include "podio/ROOTFrameWriter.h" #include "datamodel/EventInfoCollection.h" #include "datamodel/ExampleClusterCollection.h" @@ -10,34 +10,25 @@ #include void writeCollection() { - - auto store = podio::EventStore(); - podio::ROOTWriter writer("associations.root", &store); + podio::ROOTFrameWriter writer("associations.root"); std::cout << "start writting collections...\n"; - auto& info = store.create("info"); - auto& hits = store.create("hits"); - auto& clusters = store.create("clusters"); - auto& hits_subset = store.create("hits_subset"); - hits_subset.setSubsetCollection(true); - - writer.registerForWrite("clusters"); - // writer.registerForWrite("hits"); - writer.registerForWrite("hits_subset"); unsigned nevents = 2; for (unsigned i = 0; i < nevents; ++i) { + auto event = podio::Frame{}; + auto info = EventInfoCollection{}; auto item1 = MutableEventInfo(); item1.Number(i); info.push_back(item1); - auto& evtMD = store.getEventMetaData(); - evtMD.setValue("UserEventWeight", (float)100. * i); + event.putParameter("UserEventWeight", (float)100. * i); std::cout << " event number: " << i << std::endl; - evtMD.setValue("UserEventName", std::to_string(i)); + event.putParameter("UserEventName", std::to_string(i)); + auto hits = ExampleHitCollection{}; auto hit1 = ExampleHit(0xbad, 0., 0., 0., 23. + i); auto hit2 = ExampleHit(0xcaffee, 1., 0., 0., 12. + i); @@ -45,6 +36,7 @@ void writeCollection() { hits.push_back(hit2); // Clusters + auto clusters = ExampleClusterCollection{}; auto cluster = MutableExampleCluster(); auto clu0 = MutableExampleCluster(); auto clu1 = MutableExampleCluster(); @@ -60,6 +52,8 @@ void writeCollection() { cluster.addClusters(clu1); // Add tracked hits to subset hits collection + auto hits_subset = ExampleHitCollection{}; + hits_subset.setSubsetCollection(); hits_subset.push_back(hit1); hits_subset.push_back(hit2); @@ -68,24 +62,24 @@ void writeCollection() { clusters.push_back(clu1); clusters.push_back(cluster); - writer.writeEvent(); - store.clearCollections(); + // event.put(std::move(hits), "hits"); + event.put(std::move(info), "info"); + event.put(std::move(hits_subset), "hits_subset"); + event.put(std::move(clusters), "clusters"); + + writer.writeFrame(event, podio::Category::Event); } - writer.finish(); } void readCollection() { - // Start reading the input - auto reader = podio::ROOTReader(); + auto reader = podio::ROOTFrameReader(); reader.openFile("associations.root"); - auto store = podio::EventStore(); - store.setReader(&reader); - - const auto nEvents = reader.getEntries(); + const auto nEvents = reader.getEntries(podio::Category::Event); for (unsigned i = 0; i < nEvents; ++i) { + auto store = podio::Frame(reader.readNextEntry(podio::Category::Event)); auto& clusters = store.get("clusters"); if (clusters.isValid()) { @@ -121,9 +115,6 @@ void readCollection() { } else { throw std::runtime_error("Collection 'hits_subset' should be present"); } - - store.clear(); - reader.endOfEvent(); } }