From 024c0b15e5bf45e2cc6dd210abff4f35f114b96c Mon Sep 17 00:00:00 2001 From: tmadlener Date: Fri, 24 May 2024 16:21:05 +0200 Subject: [PATCH 01/11] Refactor root utilities and reduce code duplication - Lift common type defs into one header to not redefine them - Keep all root related utilitie classes in one header --- include/podio/CollectionBranches.h | 23 ---------------- include/podio/RNTupleReader.h | 2 -- include/podio/RNTupleWriter.h | 6 ++--- include/podio/ROOTLegacyReader.h | 5 ++-- include/podio/ROOTReader.h | 3 +-- include/podio/ROOTWriter.h | 22 ++++++--------- include/podio/utilities/RootHelpers.h | 39 +++++++++++++++++++++++++++ src/CMakeLists.txt | 1 + src/RNTupleWriter.cc | 4 +-- src/ROOTLegacyReader.cc | 4 +-- src/ROOTReader.cc | 10 +++---- src/ROOTWriter.cc | 8 +++--- src/rootUtils.h | 15 ++--------- 13 files changed, 69 insertions(+), 73 deletions(-) delete mode 100644 include/podio/CollectionBranches.h create mode 100644 include/podio/utilities/RootHelpers.h diff --git a/include/podio/CollectionBranches.h b/include/podio/CollectionBranches.h deleted file mode 100644 index 3ab3e8129..000000000 --- a/include/podio/CollectionBranches.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef PODIO_COLLECTIONBRANCHES_H -#define PODIO_COLLECTIONBRANCHES_H - -#include "TBranch.h" - -#include -#include - -namespace podio::root_utils { -/// Small helper struct to collect all branches that are necessary to read or -/// write a collection. Needed to cache the branch pointers and avoid having to -/// get them from a TTree/TChain for every event. -struct CollectionBranches { - TBranch* data{nullptr}; - std::vector refs{}; - std::vector vecs{}; - std::vector refNames{}; ///< The names of the relation branches - std::vector vecNames{}; ///< The names of the vector member branches -}; - -} // namespace podio::root_utils - -#endif diff --git a/include/podio/RNTupleReader.h b/include/podio/RNTupleReader.h index 1cffa149f..057b9fcef 100644 --- a/include/podio/RNTupleReader.h +++ b/include/podio/RNTupleReader.h @@ -1,8 +1,6 @@ #ifndef PODIO_RNTUPLEREADER_H #define PODIO_RNTUPLEREADER_H -#include "podio/CollectionBranches.h" -#include "podio/ICollectionProvider.h" #include "podio/ROOTFrameData.h" #include "podio/SchemaEvolution.h" #include "podio/podioVersion.h" diff --git a/include/podio/RNTupleWriter.h b/include/podio/RNTupleWriter.h index ed0f3012c..3aefe60db 100644 --- a/include/podio/RNTupleWriter.h +++ b/include/podio/RNTupleWriter.h @@ -1,11 +1,11 @@ #ifndef PODIO_RNTUPLEWRITER_H #define PODIO_RNTUPLEWRITER_H -#include "podio/CollectionBase.h" #include "podio/Frame.h" #include "podio/GenericParameters.h" #include "podio/SchemaEvolution.h" #include "podio/utilities/DatamodelRegistryIOHelpers.h" +#include "podio/utilities/RootHelpers.h" #include "TFile.h" #include @@ -105,8 +105,8 @@ class RNTupleWriter { template void fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry); - using StoreCollection = std::pair; - std::unique_ptr createModels(const std::vector& collections); + std::unique_ptr + createModels(const std::vector& collections); std::unique_ptr m_metadata{}; std::unique_ptr m_metadataWriter{}; diff --git a/include/podio/ROOTLegacyReader.h b/include/podio/ROOTLegacyReader.h index c379eea1c..9383ca51b 100644 --- a/include/podio/ROOTLegacyReader.h +++ b/include/podio/ROOTLegacyReader.h @@ -1,13 +1,12 @@ #ifndef PODIO_ROOTLEGACYREADER_H #define PODIO_ROOTLEGACYREADER_H -#include "podio/CollectionBranches.h" #include "podio/ROOTFrameData.h" #include "podio/podioVersion.h" +#include "podio/utilities/RootHelpers.h" #include "TChain.h" -#include #include #include #include @@ -114,7 +113,7 @@ class ROOTLegacyReader { private: std::pair getLocalTreeAndEntry(const std::string& treename); - void createCollectionBranches(const std::vector>& collInfo); + void createCollectionBranches(const std::vector& collInfo); podio::GenericParameters readEventMetaData(); diff --git a/include/podio/ROOTReader.h b/include/podio/ROOTReader.h index b14a08d17..bb6a1efdd 100644 --- a/include/podio/ROOTReader.h +++ b/include/podio/ROOTReader.h @@ -1,14 +1,13 @@ #ifndef PODIO_ROOTREADER_H #define PODIO_ROOTREADER_H -#include "podio/CollectionBranches.h" #include "podio/ROOTFrameData.h" #include "podio/podioVersion.h" #include "podio/utilities/DatamodelRegistryIOHelpers.h" +#include "podio/utilities/RootHelpers.h" #include "TChain.h" -#include #include #include #include diff --git a/include/podio/ROOTWriter.h b/include/podio/ROOTWriter.h index 06aa61a46..318f0f21f 100644 --- a/include/podio/ROOTWriter.h +++ b/include/podio/ROOTWriter.h @@ -1,9 +1,9 @@ #ifndef PODIO_ROOTWRITER_H #define PODIO_ROOTWRITER_H -#include "podio/CollectionBranches.h" #include "podio/CollectionIDTable.h" #include "podio/utilities/DatamodelRegistryIOHelpers.h" +#include "podio/utilities/RootHelpers.h" #include "TFile.h" @@ -100,31 +100,25 @@ class ROOTWriter { checkConsistency(const std::vector& collsToWrite, const std::string& category) const; private: - using StoreCollection = std::pair; - - // collectionID, collectionType, subsetCollection - // @note same as in rootUtils.h private header! - using CollectionInfoT = std::tuple; - /// Helper struct to group together all necessary state to write / process a /// given category. Created during the first writing of a category struct CategoryInfo { - TTree* tree{nullptr}; ///< The TTree to which this category is written - std::vector branches{}; ///< The branches for this category - std::vector collInfo{}; ///< Collection info for this category - podio::CollectionIDTable idTable{}; ///< The collection id table for this category - std::vector collsToWrite{}; ///< The collections to write for this category + TTree* tree{nullptr}; ///< The TTree to which this category is written + std::vector branches{}; ///< The branches for this category + std::vector collInfo{}; ///< Collection info for this category + podio::CollectionIDTable idTable{}; ///< The collection id table for this category + std::vector collsToWrite{}; ///< The collections to write for this category }; /// Initialize the branches for this category - void initBranches(CategoryInfo& catInfo, const std::vector& collections, + void initBranches(CategoryInfo& catInfo, const std::vector& collections, /*const*/ podio::GenericParameters& parameters); /// Get the (potentially uninitialized category information for this category) CategoryInfo& getCategoryInfo(const std::string& category); static void resetBranches(std::vector& branches, - const std::vector& collections, + const std::vector& collections, /*const*/ podio::GenericParameters* parameters); std::unique_ptr m_file{nullptr}; ///< The storage file diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h new file mode 100644 index 000000000..d941dc49a --- /dev/null +++ b/include/podio/utilities/RootHelpers.h @@ -0,0 +1,39 @@ +#ifndef PODIO_UTILITIES_ROOT_HELPERS_H +#define PODIO_UTILITIES_ROOT_HELPERS_H + +#include "TBranch.h" + +#include +#include +#include + +namespace podio { +class CollectionBase; + +namespace root_utils { + + // A collection of additional information that describes the collection: the + // collectionID, the collection (data) type, whether it is a subset + // collection, and its schema version + using CollectionWriteInfoT = std::tuple; + // for backwards compatibility + using CollectionInfoWithoutSchemaT = std::tuple; + + /// A collection name and a base pointer grouped together for writing + using StoreCollection = std::pair; + + /// Small helper struct to collect all branches that are necessary to read or + /// write a collection. Needed to cache the branch pointers and avoid having to + /// get them from a TTree/TChain for every event. + struct CollectionBranches { + TBranch* data{nullptr}; + std::vector refs{}; + std::vector vecs{}; + std::vector refNames{}; ///< The names of the relation branches + std::vector vecNames{}; ///< The names of the vector member branches + }; + +} // namespace root_utils +} // namespace podio + +#endif // PODIO_UTILITIES_ROOT_HELPERS_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2e67cf5ba..a25704707 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,6 +94,7 @@ SET(root_headers ${PROJECT_SOURCE_DIR}/include/podio/ROOTLegacyReader.h ${PROJECT_SOURCE_DIR}/include/podio/ROOTWriter.h ${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameData.h + ${PROJECT_SOURCE_DIR}/include/podio/utilities/RootHelpers.h ) if(ENABLE_RNTUPLE) list(APPEND root_headers diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 38231cc18..daa6d6dc6 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -80,7 +80,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat catInfo.name = root_utils::sortAlphabeticaly(collsToWrite); } - std::vector collections; + std::vector collections; collections.reserve(catInfo.name.size()); // Only loop over the collections that were requested in the first Frame of // this category @@ -188,7 +188,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat } std::unique_ptr -RNTupleWriter::createModels(const std::vector& collections) { +RNTupleWriter::createModels(const std::vector& collections) { auto model = ROOT::Experimental::RNTupleModel::CreateBare(); #if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) diff --git a/src/ROOTLegacyReader.cc b/src/ROOTLegacyReader.cc index c5815e4e3..83f160615 100644 --- a/src/ROOTLegacyReader.cc +++ b/src/ROOTLegacyReader.cc @@ -138,7 +138,7 @@ void ROOTLegacyReader::openFiles(const std::vector& filenames) { // Check if the CollectionTypeInfo branch is there and assume that the file // has been written with with podio pre #197 (<0.13.1) if that is not the case if (auto* collInfoBranch = root_utils::getBranch(metadatatree, "CollectionTypeInfo")) { - auto collectionInfo = new std::vector; + auto collectionInfo = new std::vector; if (m_fileVersion < podio::version::Version{0, 16, 4}) { auto oldCollInfo = new std::vector(); @@ -171,7 +171,7 @@ unsigned ROOTLegacyReader::getEntries(const std::string& name) const { return m_chain->GetEntries(); } -void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { +void ROOTLegacyReader::createCollectionBranches(const std::vector& collInfo) { size_t collectionIndex{0}; for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) { diff --git a/src/ROOTReader.cc b/src/ROOTReader.cc index 928000c7a..fab05d112 100644 --- a/src/ROOTReader.cc +++ b/src/ROOTReader.cc @@ -21,11 +21,11 @@ namespace podio { std::tuple, std::vector>> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo); + const std::vector& collInfo); std::tuple, std::vector>> createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo); + const std::vector& collInfo); GenericParameters ROOTReader::readEntryParameters(ROOTReader::CategoryInfo& catInfo, bool reloadBranches, unsigned int localEntry) { @@ -137,7 +137,7 @@ void ROOTReader::initCategory(CategoryInfo& catInfo, const std::string& category auto* collInfoBranch = root_utils::getBranch(m_metaChain.get(), root_utils::collInfoName(category)); - auto collInfo = new std::vector(); + auto collInfo = new std::vector(); if (m_fileVersion < podio::version::Version{0, 16, 4}) { auto oldCollInfo = new std::vector(); collInfoBranch->SetAddress(&oldCollInfo); @@ -253,7 +253,7 @@ std::vector ROOTReader::getAvailableCategories() const { std::tuple, std::vector>> createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo) { + const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; @@ -305,7 +305,7 @@ createCollectionBranchesIndexBased(TChain* chain, const podio::CollectionIDTable std::tuple, std::vector>> createCollectionBranches(TChain* chain, const podio::CollectionIDTable& idTable, - const std::vector& collInfo) { + const std::vector& collInfo) { size_t collectionIndex{0}; std::vector collBranches; diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index b38bc145b..6d7d66663 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -37,7 +37,7 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego catInfo.tree->SetDirectory(m_file.get()); } - std::vector collections; + std::vector collections; collections.reserve(catInfo.collsToWrite.size()); for (const auto& name : catInfo.collsToWrite) { auto* coll = frame.getCollectionForWrite(name); @@ -76,7 +76,7 @@ ROOTWriter::CategoryInfo& ROOTWriter::getCategoryInfo(const std::string& categor return it->second; } -void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vector& collections, +void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vector& collections, /*const*/ podio::GenericParameters& parameters) { catInfo.branches.reserve(collections.size() + 1); // collections + parameters @@ -118,7 +118,7 @@ void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vectorgetTypeName(), + catInfo.collInfo.emplace_back(catInfo.idTable.collectionID(name).value(), std::string(coll->getTypeName()), coll->isSubsetCollection(), coll->getSchemaVersion()); } @@ -129,7 +129,7 @@ void ROOTWriter::initBranches(CategoryInfo& catInfo, const std::vector& branches, - const std::vector& collections, + const std::vector& collections, /*const*/ podio::GenericParameters* parameters) { size_t iColl = 0; for (auto& coll : collections) { diff --git a/src/rootUtils.h b/src/rootUtils.h index 5fb7aa7d3..c9e7c1238 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -1,14 +1,10 @@ #ifndef PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy #define PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy -#include "podio/CollectionBase.h" -#include "podio/CollectionBranches.h" -#include "podio/CollectionBuffers.h" #include "podio/CollectionIDTable.h" +#include "podio/utilities/RootHelpers.h" #include "TBranch.h" -#include "TChain.h" -#include "TClass.h" #include "TTree.h" #include @@ -203,13 +199,6 @@ inline void setCollectionAddresses(const BufferT& collBuffers, const CollectionB } } -// A collection of additional information that describes the collection: the -// collectionID, the collection (data) type, whether it is a subset -// collection, and its schema version -using CollectionInfoT = std::tuple; -// for backwards compatibility -using CollectionInfoWithoutSchemaT = std::tuple; - inline void readBranchesData(const CollectionBranches& branches, Long64_t entry) { // Read all data if (branches.data) { @@ -233,7 +222,7 @@ inline void readBranchesData(const CollectionBranches& branches, Long64_t entry) * collections */ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable const& idTable) { - std::vector collInfo; + std::vector collInfo; for (size_t iColl = 0; iColl < idTable.names().size(); ++iColl) { const auto collID = idTable.ids()[iColl]; From 2af5336ccb2de8b5d7b421334a10f85ead4a8a0b Mon Sep 17 00:00:00 2001 From: tmadlener Date: Fri, 24 May 2024 17:26:15 +0200 Subject: [PATCH 02/11] Rename CollectionInfo to CategoryInfo to better convey intention --- include/podio/RNTupleWriter.h | 21 ++++++++++++--------- src/RNTupleWriter.cc | 34 +++++++++++++++++----------------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/include/podio/RNTupleWriter.h b/include/podio/RNTupleWriter.h index 3aefe60db..ff3f7f77f 100644 --- a/include/podio/RNTupleWriter.h +++ b/include/podio/RNTupleWriter.h @@ -115,17 +115,20 @@ class RNTupleWriter { DatamodelDefinitionCollector m_datamodelCollector{}; - struct CollectionInfo { - std::vector id{}; - std::vector name{}; - std::vector type{}; - std::vector isSubsetCollection{}; - std::vector schemaVersion{}; - std::unique_ptr writer{nullptr}; + /// Helper struct to group all the necessary information for one category. + struct CategoryInfo { + std::unique_ptr writer{nullptr}; ///< The RNTupleWriter for this category + + // The following are assumed to run in parallel! + std::vector ids{}; ///< The ids of all collections + std::vector names{}; ///< The names of all collections + std::vector types{}; ///< The types of all collections + std::vector subsetCollections{}; ///< The flags identifying the subcollections + std::vector schemaVersions{}; ///< The schema versions of all collections }; - CollectionInfo& getCategoryInfo(const std::string& category); + CategoryInfo& getCategoryInfo(const std::string& category); - std::unordered_map m_categories{}; + std::unordered_map m_categories{}; bool m_finished{false}; diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index daa6d6dc6..d777c0fae 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -77,14 +77,14 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat const bool new_category = (catInfo.writer == nullptr); if (new_category) { // This is the minimal information that we need for now - catInfo.name = root_utils::sortAlphabeticaly(collsToWrite); + catInfo.names = root_utils::sortAlphabeticaly(collsToWrite); } std::vector collections; - collections.reserve(catInfo.name.size()); + collections.reserve(catInfo.names.size()); // Only loop over the collections that were requested in the first Frame of // this category - for (const auto& name : catInfo.name) { + for (const auto& name : catInfo.names) { auto* coll = frame.getCollectionForWrite(name); if (!coll) { // Make sure all collections that we want to write are actually available @@ -101,15 +101,15 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat catInfo.writer = ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {}); for (const auto& [name, coll] : collections) { - catInfo.id.emplace_back(coll->getID()); - catInfo.type.emplace_back(coll->getTypeName()); - catInfo.isSubsetCollection.emplace_back(coll->isSubsetCollection()); - catInfo.schemaVersion.emplace_back(coll->getSchemaVersion()); + catInfo.ids.emplace_back(coll->getID()); + catInfo.types.emplace_back(coll->getTypeName()); + catInfo.subsetCollections.emplace_back(coll->isSubsetCollection()); + catInfo.schemaVersions.emplace_back(coll->getSchemaVersion()); } } else { - if (!root_utils::checkConsistentColls(catInfo.name, collsToWrite)) { + if (!root_utils::checkConsistentColls(catInfo.names, collsToWrite)) { throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content. " + - root_utils::getInconsistentCollsMsg(catInfo.name, collsToWrite)); + root_utils::getInconsistentCollsMsg(catInfo.names, collsToWrite)); } } @@ -260,12 +260,12 @@ RNTupleWriter::createModels(const std::vector& coll return model; } -RNTupleWriter::CollectionInfo& RNTupleWriter::getCategoryInfo(const std::string& category) { +RNTupleWriter::CategoryInfo& RNTupleWriter::getCategoryInfo(const std::string& category) { if (auto it = m_categories.find(category); it != m_categories.end()) { return it->second; } - auto [it, _] = m_categories.try_emplace(category, CollectionInfo{}); + auto [it, _] = m_categories.try_emplace(category, CategoryInfo{}); return it->second; } @@ -287,15 +287,15 @@ void RNTupleWriter::finish() { for (auto& [category, collInfo] : m_categories) { auto idField = m_metadata->MakeField>({root_utils::idTableName(category)}); - *idField = collInfo.id; + *idField = collInfo.ids; auto collectionNameField = m_metadata->MakeField>({root_utils::collectionName(category)}); - *collectionNameField = collInfo.name; + *collectionNameField = collInfo.names; auto collectionTypeField = m_metadata->MakeField>({root_utils::collInfoName(category)}); - *collectionTypeField = collInfo.type; + *collectionTypeField = collInfo.types; auto subsetCollectionField = m_metadata->MakeField>({root_utils::subsetCollection(category)}); - *subsetCollectionField = collInfo.isSubsetCollection; + *subsetCollectionField = collInfo.subsetCollections; auto schemaVersionField = m_metadata->MakeField>({"schemaVersion_" + category}); - *schemaVersionField = collInfo.schemaVersion; + *schemaVersionField = collInfo.schemaVersions; } m_metadata->Freeze(); @@ -319,7 +319,7 @@ void RNTupleWriter::finish() { std::tuple, std::vector> RNTupleWriter::checkConsistency(const std::vector& collsToWrite, const std::string& category) const { if (const auto it = m_categories.find(category); it != m_categories.end()) { - return root_utils::getInconsistentColls(it->second.name, collsToWrite); + return root_utils::getInconsistentColls(it->second.names, collsToWrite); } return {std::vector{}, collsToWrite}; From 283be1d0aef96a02d767851002a3db1c2a1ca4a2 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Fri, 24 May 2024 17:28:54 +0200 Subject: [PATCH 03/11] Remove unnecessary member variables --- include/podio/RNTupleWriter.h | 11 ++++------- src/RNTupleWriter.cc | 29 ++++++++++++++--------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/include/podio/RNTupleWriter.h b/include/podio/RNTupleWriter.h index ff3f7f77f..d5c3d64a8 100644 --- a/include/podio/RNTupleWriter.h +++ b/include/podio/RNTupleWriter.h @@ -108,13 +108,6 @@ class RNTupleWriter { std::unique_ptr createModels(const std::vector& collections); - std::unique_ptr m_metadata{}; - std::unique_ptr m_metadataWriter{}; - - std::unique_ptr m_file{}; - - DatamodelDefinitionCollector m_datamodelCollector{}; - /// Helper struct to group all the necessary information for one category. struct CategoryInfo { std::unique_ptr writer{nullptr}; ///< The RNTupleWriter for this category @@ -128,6 +121,10 @@ class RNTupleWriter { }; CategoryInfo& getCategoryInfo(const std::string& category); + std::unique_ptr m_file{}; + + DatamodelDefinitionCollector m_datamodelCollector{}; + std::unordered_map m_categories{}; bool m_finished{false}; diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index d777c0fae..9000e7358 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -17,7 +17,6 @@ namespace podio { RNTupleWriter::RNTupleWriter(const std::string& filename) : - m_metadata(ROOT::Experimental::RNTupleModel::Create()), m_file(new TFile(filename.c_str(), "RECREATE", "data file")) { } @@ -270,39 +269,39 @@ RNTupleWriter::CategoryInfo& RNTupleWriter::getCategoryInfo(const std::string& c } void RNTupleWriter::finish() { + auto metadata = ROOT::Experimental::RNTupleModel::Create(); auto podioVersion = podio::version::build_version; - auto versionField = m_metadata->MakeField>(root_utils::versionBranchName); + auto versionField = metadata->MakeField>(root_utils::versionBranchName); *versionField = {podioVersion.major, podioVersion.minor, podioVersion.patch}; auto edmDefinitions = m_datamodelCollector.getDatamodelDefinitionsToWrite(); - auto edmField = - m_metadata->MakeField>>(root_utils::edmDefBranchName); + auto edmField = metadata->MakeField>>(root_utils::edmDefBranchName); *edmField = std::move(edmDefinitions); - auto availableCategoriesField = m_metadata->MakeField>(root_utils::availableCategories); + auto availableCategoriesField = metadata->MakeField>(root_utils::availableCategories); for (auto& [c, _] : m_categories) { availableCategoriesField->push_back(c); } for (auto& [category, collInfo] : m_categories) { - auto idField = m_metadata->MakeField>({root_utils::idTableName(category)}); + auto idField = metadata->MakeField>({root_utils::idTableName(category)}); *idField = collInfo.ids; - auto collectionNameField = m_metadata->MakeField>({root_utils::collectionName(category)}); + auto collectionNameField = metadata->MakeField>({root_utils::collectionName(category)}); *collectionNameField = collInfo.names; - auto collectionTypeField = m_metadata->MakeField>({root_utils::collInfoName(category)}); + auto collectionTypeField = metadata->MakeField>({root_utils::collInfoName(category)}); *collectionTypeField = collInfo.types; - auto subsetCollectionField = m_metadata->MakeField>({root_utils::subsetCollection(category)}); + auto subsetCollectionField = metadata->MakeField>({root_utils::subsetCollection(category)}); *subsetCollectionField = collInfo.subsetCollections; - auto schemaVersionField = m_metadata->MakeField>({"schemaVersion_" + category}); + auto schemaVersionField = metadata->MakeField>({"schemaVersion_" + category}); *schemaVersionField = collInfo.schemaVersions; } - m_metadata->Freeze(); - m_metadataWriter = - ROOT::Experimental::RNTupleWriter::Append(std::move(m_metadata), root_utils::metaTreeName, *m_file, {}); + metadata->Freeze(); + auto metadataWriter = + ROOT::Experimental::RNTupleWriter::Append(std::move(metadata), root_utils::metaTreeName, *m_file, {}); - m_metadataWriter->Fill(); + metadataWriter->Fill(); m_file->Write(); @@ -311,7 +310,7 @@ void RNTupleWriter::finish() { for (auto& [_, catInfo] : m_categories) { catInfo.writer.reset(); } - m_metadataWriter.reset(); + metadataWriter.reset(); m_finished = true; } From 345058667818cc677dae7aa553c8e9b4b28d29cc Mon Sep 17 00:00:00 2001 From: tmadlener Date: Fri, 24 May 2024 16:21:21 +0200 Subject: [PATCH 04/11] Add functionality to get all values for a type to GenericParameters --- include/podio/GenericParameters.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 55119a2e8..b4819f198 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -105,6 +106,10 @@ class GenericParameters { template > std::vector getKeys() const; + /// Get all the available values for a given type + template > + std::vector> getValues() const; + /// erase all elements void clear() { _intMap.clear(); @@ -243,5 +248,18 @@ std::vector GenericParameters::getKeys() const { return keys; } +template +std::vector> GenericParameters::getValues() const { + std::vector> values; + const auto& map = getMap(); + values.reserve(map.size()); + + { + auto& mtx = getMutex(); + std::lock_guard lock{mtx}; + std::transform(map.begin(), map.end(), std::back_inserter(values), [](const auto& pair) { return pair.second; }); + } + return values; +} } // namespace podio #endif From 80d267af770cd92c829f9f271482e2d8cfff3511 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Fri, 24 May 2024 17:46:12 +0200 Subject: [PATCH 05/11] Move some global state into category state --- include/podio/RNTupleWriter.h | 26 ++++++++-------- include/podio/utilities/RootHelpers.h | 5 ++++ src/RNTupleWriter.cc | 43 +++++++++++---------------- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/include/podio/RNTupleWriter.h b/include/podio/RNTupleWriter.h index d5c3d64a8..d8a53ba43 100644 --- a/include/podio/RNTupleWriter.h +++ b/include/podio/RNTupleWriter.h @@ -102,9 +102,6 @@ class RNTupleWriter { checkConsistency(const std::vector& collsToWrite, const std::string& category) const; private: - template - void fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry); - std::unique_ptr createModels(const std::vector& collections); @@ -118,9 +115,22 @@ class RNTupleWriter { std::vector types{}; ///< The types of all collections std::vector subsetCollections{}; ///< The flags identifying the subcollections std::vector schemaVersions{}; ///< The schema versions of all collections + + // Storage for the keys & values of all the parameters of this category + // (resp. at least the current entry) + root_utils::ParamStorage intParams{}; + root_utils::ParamStorage floatParams{}; + root_utils::ParamStorage doubleParams{}; + root_utils::ParamStorage stringParams{}; }; CategoryInfo& getCategoryInfo(const std::string& category); + template + void fillParams(const GenericParameters& params, CategoryInfo& catInfo, ROOT::Experimental::REntry* entry); + + template + root_utils::ParamStorage& getParamStorage(CategoryInfo& catInfo); + std::unique_ptr m_file{}; DatamodelDefinitionCollector m_datamodelCollector{}; @@ -128,16 +138,6 @@ class RNTupleWriter { std::unordered_map m_categories{}; bool m_finished{false}; - - std::vector m_intkeys{}, m_floatkeys{}, m_doublekeys{}, m_stringkeys{}; - - std::vector> m_intvalues{}; - std::vector> m_floatvalues{}; - std::vector> m_doublevalues{}; - std::vector> m_stringvalues{}; - - template - std::pair&, std::vector>&> getKeyValueVectors(); }; } // namespace podio diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h index d941dc49a..5921e5c99 100644 --- a/include/podio/utilities/RootHelpers.h +++ b/include/podio/utilities/RootHelpers.h @@ -33,6 +33,11 @@ namespace root_utils { std::vector vecNames{}; ///< The names of the vector member branches }; + /// Pair of keys and values for one type of the ones that can be stored in + /// GenericParameters + template + using ParamStorage = std::tuple, std::vector>>; + } // namespace root_utils } // namespace podio diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 9000e7358..cf84ff0a4 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -27,40 +27,33 @@ RNTupleWriter::~RNTupleWriter() { } template -std::pair&, std::vector>&> RNTupleWriter::getKeyValueVectors() { +root_utils::ParamStorage& RNTupleWriter::getParamStorage(CategoryInfo& catInfo) { if constexpr (std::is_same_v) { - return {m_intkeys, m_intvalues}; + return catInfo.intParams; } else if constexpr (std::is_same_v) { - return {m_floatkeys, m_floatvalues}; + return catInfo.floatParams; } else if constexpr (std::is_same_v) { - return {m_doublekeys, m_doublevalues}; + return catInfo.doubleParams; } else if constexpr (std::is_same_v) { - return {m_stringkeys, m_stringvalues}; + return catInfo.stringParams; } else { throw std::runtime_error("Unknown type"); } } template -void RNTupleWriter::fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry) { - auto [key, value] = getKeyValueVectors(); +void RNTupleWriter::fillParams(const GenericParameters& params, CategoryInfo& catInfo, + ROOT::Experimental::REntry* entry) { + auto& [keys, values] = getParamStorage(catInfo); + keys = params.getKeys(); + values = params.getValues(); #if ROOT_VERSION_CODE >= ROOT_VERSION(6, 31, 0) - entry->BindRawPtr(root_utils::getGPKeyName(), &key); - entry->BindRawPtr(root_utils::getGPValueName(), &value); + entry->BindRawPtr(root_utils::getGPKeyName(), &keys); + entry->BindRawPtr(root_utils::getGPValueName(), &values); #else - entry->CaptureValueUnsafe(root_utils::getGPKeyName(), &key); - entry->CaptureValueUnsafe(root_utils::getGPValueName(), &value); + entry->CaptureValueUnsafe(root_utils::getGPKeyName(), &keys); + entry->CaptureValueUnsafe(root_utils::getGPValueName(), &values); #endif - - key.clear(); - key.reserve(params.getMap().size()); - value.clear(); - value.reserve(params.getMap().size()); - - for (auto& [kk, vv] : params.getMap()) { - key.emplace_back(kk); - value.emplace_back(vv); - } } void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& category) { @@ -178,10 +171,10 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat } auto params = frame.getParameters(); - fillParams(params, entry.get()); - fillParams(params, entry.get()); - fillParams(params, entry.get()); - fillParams(params, entry.get()); + fillParams(params, catInfo, entry.get()); + fillParams(params, catInfo, entry.get()); + fillParams(params, catInfo, entry.get()); + fillParams(params, catInfo, entry.get()); m_categories[category].writer->Fill(*entry); } From a882b519a236e3292425320789ad465b5ec3432f Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 5 Jun 2024 13:27:47 +0200 Subject: [PATCH 06/11] Avoid unnecessary copy --- src/RNTupleWriter.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index cf84ff0a4..e2ce337f6 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -170,7 +170,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat // &const_cast(frame.getParameters())); } - auto params = frame.getParameters(); + const auto& params = frame.getParameters(); fillParams(params, catInfo, entry.get()); fillParams(params, catInfo, entry.get()); fillParams(params, catInfo, entry.get()); From 10d1b3b76520d67a14ee20bc38b1ac862885005e Mon Sep 17 00:00:00 2001 From: tmadlener Date: Wed, 5 Jun 2024 16:07:52 +0200 Subject: [PATCH 07/11] Fix include guards to conform to style guide --- include/podio/utilities/RootHelpers.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h index 5921e5c99..5bc49915a 100644 --- a/include/podio/utilities/RootHelpers.h +++ b/include/podio/utilities/RootHelpers.h @@ -1,5 +1,5 @@ -#ifndef PODIO_UTILITIES_ROOT_HELPERS_H -#define PODIO_UTILITIES_ROOT_HELPERS_H +#ifndef PODIO_UTILITIES_ROOTHELPERS_H +#define PODIO_UTILITIES_ROOTHELPERS_H #include "TBranch.h" @@ -41,4 +41,4 @@ namespace root_utils { } // namespace root_utils } // namespace podio -#endif // PODIO_UTILITIES_ROOT_HELPERS_H +#endif // PODIO_UTILITIES_ROOTHELPERS_H From c4b48d4022ed03a146538acab699fe0eb3354858 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 11 Jun 2024 10:31:12 +0200 Subject: [PATCH 08/11] Make sure to take lock long enough --- include/podio/GenericParameters.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index b4819f198..779bef8ac 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -251,12 +251,11 @@ std::vector GenericParameters::getKeys() const { template std::vector> GenericParameters::getValues() const { std::vector> values; - const auto& map = getMap(); - values.reserve(map.size()); - { auto& mtx = getMutex(); + const auto& map = getMap(); std::lock_guard lock{mtx}; + values.reserve(map.size()); std::transform(map.begin(), map.end(), std::back_inserter(values), [](const auto& pair) { return pair.second; }); } return values; From 8fd68112814012f360929f2ead9987a2d7667363 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 11 Jun 2024 10:31:23 +0200 Subject: [PATCH 09/11] Switch to tuple for consistency --- include/podio/utilities/RootHelpers.h | 2 +- src/ROOTWriter.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/podio/utilities/RootHelpers.h b/include/podio/utilities/RootHelpers.h index 5bc49915a..55b97a74c 100644 --- a/include/podio/utilities/RootHelpers.h +++ b/include/podio/utilities/RootHelpers.h @@ -20,7 +20,7 @@ namespace root_utils { using CollectionInfoWithoutSchemaT = std::tuple; /// A collection name and a base pointer grouped together for writing - using StoreCollection = std::pair; + using StoreCollection = std::tuple; /// Small helper struct to collect all branches that are necessary to read or /// write a collection. Needed to cache the branch pointers and avoid having to diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 6d7d66663..9e46bf942 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -132,9 +132,9 @@ void ROOTWriter::resetBranches(std::vector& bran const std::vector& collections, /*const*/ podio::GenericParameters* parameters) { size_t iColl = 0; - for (auto& coll : collections) { + for (auto& [_, coll] : collections) { const auto& collBranches = branches[iColl]; - root_utils::setCollectionAddresses(coll.second->getBuffers(), collBranches); + root_utils::setCollectionAddresses(coll->getBuffers(), collBranches); iColl++; } From ba9f0906c523ab66a0931ab898f6f2fdef542d9a Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 11 Jun 2024 10:53:46 +0200 Subject: [PATCH 10/11] Remove obsolete reset --- src/RNTupleWriter.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index e2ce337f6..decdac8e4 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -303,7 +303,6 @@ void RNTupleWriter::finish() { for (auto& [_, catInfo] : m_categories) { catInfo.writer.reset(); } - metadataWriter.reset(); m_finished = true; } From c48790b8207735db3eb4cab83aabbea073b7a5e8 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Tue, 11 Jun 2024 15:57:39 +0200 Subject: [PATCH 11/11] Comment usage of the mutex --- include/podio/GenericParameters.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/podio/GenericParameters.h b/include/podio/GenericParameters.h index 779bef8ac..60adf8825 100644 --- a/include/podio/GenericParameters.h +++ b/include/podio/GenericParameters.h @@ -251,9 +251,11 @@ std::vector GenericParameters::getKeys() const { template std::vector> GenericParameters::getValues() const { std::vector> values; + auto& mtx = getMutex(); + const auto& map = getMap(); { - auto& mtx = getMutex(); - const auto& map = getMap(); + // Lock to avoid concurrent changes to the map while we get the stored + // values std::lock_guard lock{mtx}; values.reserve(map.size()); std::transform(map.begin(), map.end(), std::back_inserter(values), [](const auto& pair) { return pair.second; });