Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor the RNTupleWriter internals and reduce some code duplication #614

Merged
merged 11 commits into from
Jun 11, 2024
23 changes: 0 additions & 23 deletions include/podio/CollectionBranches.h

This file was deleted.

19 changes: 19 additions & 0 deletions include/podio/GenericParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <algorithm>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <mutex>
Expand Down Expand Up @@ -105,6 +106,10 @@ class GenericParameters {
template <typename T, typename = EnableIfValidGenericDataType<T>>
std::vector<std::string> getKeys() const;

/// Get all the available values for a given type
template <typename T, typename = EnableIfValidGenericDataType<T>>
std::vector<std::vector<T>> getValues() const;

/// erase all elements
void clear() {
_intMap.clear();
Expand Down Expand Up @@ -243,5 +248,19 @@ std::vector<std::string> GenericParameters::getKeys() const {
return keys;
}

template <typename T, typename>
std::vector<std::vector<T>> GenericParameters::getValues() const {
std::vector<std::vector<T>> values;
auto& mtx = getMutex<T>();
const auto& map = getMap<T>();
{
// Lock to avoid concurrent changes to the map while we get the stored
// values
std::lock_guard lock{mtx};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whenever locks are being used one should document where why and when they are needed

values.reserve(map.size());
std::transform(map.begin(), map.end(), std::back_inserter(values), [](const auto& pair) { return pair.second; });
}
return values;
}
} // namespace podio
#endif
2 changes: 0 additions & 2 deletions include/podio/RNTupleReader.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#ifndef PODIO_RNTUPLEREADER_H
#define PODIO_RNTUPLEREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ICollectionProvider.h"
#include "podio/ROOTFrameData.h"
#include "podio/SchemaEvolution.h"
#include "podio/podioVersion.h"
Expand Down
56 changes: 28 additions & 28 deletions include/podio/RNTupleWriter.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#ifndef PODIO_RNTUPLEWRITER_H
#define PODIO_RNTUPLEWRITER_H

#include "podio/CollectionBase.h"
#include "podio/Frame.h"
#include "podio/GenericParameters.h"
#include "podio/SchemaEvolution.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TFile.h"
#include <ROOT/RNTuple.hxx>
Expand Down Expand Up @@ -102,42 +102,42 @@ class RNTupleWriter {
checkConsistency(const std::vector<std::string>& collsToWrite, const std::string& category) const;

private:
template <typename T>
void fillParams(GenericParameters& params, ROOT::Experimental::REntry* entry);
std::unique_ptr<ROOT::Experimental::RNTupleModel>
createModels(const std::vector<root_utils::StoreCollection>& collections);

/// Helper struct to group all the necessary information for one category.
struct CategoryInfo {
std::unique_ptr<ROOT::Experimental::RNTupleWriter> writer{nullptr}; ///< The RNTupleWriter for this category

// The following are assumed to run in parallel!
std::vector<uint32_t> ids{}; ///< The ids of all collections
std::vector<std::string> names{}; ///< The names of all collections
std::vector<std::string> types{}; ///< The types of all collections
std::vector<short> subsetCollections{}; ///< The flags identifying the subcollections
std::vector<SchemaVersionT> schemaVersions{}; ///< The schema versions of all collections

// Storage for the keys & values of all the parameters of this category
// (resp. at least the current entry)
root_utils::ParamStorage<int> intParams{};
root_utils::ParamStorage<float> floatParams{};
root_utils::ParamStorage<double> doubleParams{};
root_utils::ParamStorage<std::string> stringParams{};
};
CategoryInfo& getCategoryInfo(const std::string& category);

using StoreCollection = std::pair<const std::string&, podio::CollectionBase*>;
std::unique_ptr<ROOT::Experimental::RNTupleModel> createModels(const std::vector<StoreCollection>& collections);
template <typename T>
void fillParams(const GenericParameters& params, CategoryInfo& catInfo, ROOT::Experimental::REntry* entry);

std::unique_ptr<ROOT::Experimental::RNTupleModel> m_metadata{};
std::unique_ptr<ROOT::Experimental::RNTupleWriter> m_metadataWriter{};
template <typename T>
root_utils::ParamStorage<T>& getParamStorage(CategoryInfo& catInfo);

std::unique_ptr<TFile> m_file{};

DatamodelDefinitionCollector m_datamodelCollector{};

struct CollectionInfo {
std::vector<uint32_t> id{};
std::vector<std::string> name{};
std::vector<std::string> type{};
std::vector<short> isSubsetCollection{};
std::vector<SchemaVersionT> schemaVersion{};
std::unique_ptr<ROOT::Experimental::RNTupleWriter> writer{nullptr};
};
CollectionInfo& getCategoryInfo(const std::string& category);

std::unordered_map<std::string, CollectionInfo> m_categories{};
std::unordered_map<std::string, CategoryInfo> m_categories{};

bool m_finished{false};

std::vector<std::string> m_intkeys{}, m_floatkeys{}, m_doublekeys{}, m_stringkeys{};

std::vector<std::vector<int>> m_intvalues{};
std::vector<std::vector<float>> m_floatvalues{};
std::vector<std::vector<double>> m_doublevalues{};
std::vector<std::vector<std::string>> m_stringvalues{};

template <typename T>
std::pair<std::vector<std::string>&, std::vector<std::vector<T>>&> getKeyValueVectors();
};

} // namespace podio
Expand Down
5 changes: 2 additions & 3 deletions include/podio/ROOTLegacyReader.h
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
#ifndef PODIO_ROOTLEGACYREADER_H
#define PODIO_ROOTLEGACYREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ROOTFrameData.h"
#include "podio/podioVersion.h"
#include "podio/utilities/RootHelpers.h"

#include "TChain.h"

#include <iostream>
#include <memory>
#include <string>
#include <tuple>
Expand Down Expand Up @@ -114,7 +113,7 @@ class ROOTLegacyReader {
private:
std::pair<TTree*, unsigned> getLocalTreeAndEntry(const std::string& treename);

void createCollectionBranches(const std::vector<std::tuple<uint32_t, std::string, bool, unsigned int>>& collInfo);
void createCollectionBranches(const std::vector<root_utils::CollectionWriteInfoT>& collInfo);

podio::GenericParameters readEventMetaData();

Expand Down
3 changes: 1 addition & 2 deletions include/podio/ROOTReader.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
#ifndef PODIO_ROOTREADER_H
#define PODIO_ROOTREADER_H

#include "podio/CollectionBranches.h"
#include "podio/ROOTFrameData.h"
#include "podio/podioVersion.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TChain.h"

#include <iostream>
#include <memory>
#include <string>
#include <string_view>
Expand Down
22 changes: 8 additions & 14 deletions include/podio/ROOTWriter.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#ifndef PODIO_ROOTWRITER_H
#define PODIO_ROOTWRITER_H

#include "podio/CollectionBranches.h"
#include "podio/CollectionIDTable.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include "TFile.h"

Expand Down Expand Up @@ -100,31 +100,25 @@ class ROOTWriter {
checkConsistency(const std::vector<std::string>& collsToWrite, const std::string& category) const;

private:
using StoreCollection = std::pair<const std::string&, podio::CollectionBase*>;

// collectionID, collectionType, subsetCollection
// @note same as in rootUtils.h private header!
using CollectionInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;

/// Helper struct to group together all necessary state to write / process a
/// given category. Created during the first writing of a category
struct CategoryInfo {
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<CollectionInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<root_utils::CollectionWriteInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category
};

/// Initialize the branches for this category
void initBranches(CategoryInfo& catInfo, const std::vector<StoreCollection>& collections,
void initBranches(CategoryInfo& catInfo, const std::vector<root_utils::StoreCollection>& collections,
/*const*/ podio::GenericParameters& parameters);

/// Get the (potentially uninitialized category information for this category)
CategoryInfo& getCategoryInfo(const std::string& category);

static void resetBranches(std::vector<root_utils::CollectionBranches>& branches,
const std::vector<ROOTWriter::StoreCollection>& collections,
const std::vector<root_utils::StoreCollection>& collections,
/*const*/ podio::GenericParameters* parameters);

std::unique_ptr<TFile> m_file{nullptr}; ///< The storage file
Expand Down
44 changes: 44 additions & 0 deletions include/podio/utilities/RootHelpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#ifndef PODIO_UTILITIES_ROOTHELPERS_H
#define PODIO_UTILITIES_ROOTHELPERS_H

#include "TBranch.h"

#include <string>
#include <tuple>
#include <vector>

namespace podio {
class CollectionBase;

namespace root_utils {

// A collection of additional information that describes the collection: the
// collectionID, the collection (data) type, whether it is a subset
// collection, and its schema version
using CollectionWriteInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;
// for backwards compatibility
using CollectionInfoWithoutSchemaT = std::tuple<int, std::string, bool>;

/// A collection name and a base pointer grouped together for writing
using StoreCollection = std::tuple<const std::string&, podio::CollectionBase*>;

/// Small helper struct to collect all branches that are necessary to read or
/// write a collection. Needed to cache the branch pointers and avoid having to
/// get them from a TTree/TChain for every event.
struct CollectionBranches {
TBranch* data{nullptr};
std::vector<TBranch*> refs{};
std::vector<TBranch*> vecs{};
std::vector<std::string> refNames{}; ///< The names of the relation branches
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in principle one could do indices instead of strings here, but for the sake of simplicity we stay with this.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indices would be harder to work with here IMHO, because we don't maintain a list of all TBranches that we have (let alone their order). We simply use this to group the pointers that we get from the TTree and let that do all the management.

std::vector<std::string> vecNames{}; ///< The names of the vector member branches
};

/// Pair of keys and values for one type of the ones that can be stored in
/// GenericParameters
template <typename T>
using ParamStorage = std::tuple<std::vector<std::string>, std::vector<std::vector<T>>>;

} // namespace root_utils
} // namespace podio

#endif // PODIO_UTILITIES_ROOTHELPERS_H
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ SET(root_headers
${PROJECT_SOURCE_DIR}/include/podio/ROOTLegacyReader.h
${PROJECT_SOURCE_DIR}/include/podio/ROOTWriter.h
${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameData.h
${PROJECT_SOURCE_DIR}/include/podio/utilities/RootHelpers.h
)
if(ENABLE_RNTUPLE)
list(APPEND root_headers
Expand Down
Loading