Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use MurmurHash3 to hash the algorithm name for the algorithm type in ParticleIDMeta #307

Merged
merged 4 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion test/utils/test_PIDHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ void checkHandlerValidReco(const edm4hep::utils::PIDHandler& handler, const edm4
}
}

TEST_CASE("ParticleIDMeta constructor") {
using namespace edm4hep::utils;

ParticleIDMeta pidMeta{"name", {}};
REQUIRE(pidMeta.algoName == "name");
REQUIRE(pidMeta.algoType() == -609270800); // 32 bit MurmurHash3 of "name"
andresailer marked this conversation as resolved.
Show resolved Hide resolved
}

TEST_CASE("PIDHandler basics", "[pid_utils]") {
using namespace edm4hep;

Expand Down Expand Up @@ -188,7 +196,7 @@ TEST_CASE("PIDHandler from Frame w/ metadata", "[pid_utils]") {

const auto pidInfo = utils::PIDHandler::getAlgoInfo(metadata, "particleIds_1").value();
REQUIRE(pidInfo.algoName == "pidAlgo_1");
REQUIRE(pidInfo.algoType == 42);
REQUIRE(pidInfo.algoType() == 42);
REQUIRE(pidInfo.paramNames.size() == 2);
REQUIRE(pidInfo.paramNames[0] == "first_param");
REQUIRE(pidInfo.paramNames[1] == "second_param");
Expand Down
1 change: 1 addition & 0 deletions utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ target_compile_features(kinematics INTERFACE cxx_std_17)

set(utils_sources
src/ParticleIDUtils.cc
src/MurmurHash3.cpp
)

add_library(utils SHARED ${utils_sources})
Expand Down
18 changes: 17 additions & 1 deletion utils/include/edm4hep/utils/ParticleIDUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,25 @@ namespace edm4hep::utils {

/// A simple struct bundling relevant metadata for a ParticleID collection
struct ParticleIDMeta {
ParticleIDMeta(const std::string& algName, int32_t algType, const std::vector<std::string>& parNames);
ParticleIDMeta(const std::string& algName, const std::vector<std::string>& parNames);

~ParticleIDMeta() = default;
ParticleIDMeta() = default;
ParticleIDMeta(const ParticleIDMeta&) = default;
ParticleIDMeta& operator=(const ParticleIDMeta&) = default;
ParticleIDMeta(ParticleIDMeta&&) = default;
ParticleIDMeta& operator=(ParticleIDMeta&&) = default;

std::string algoName{}; ///< The name of the algorithm
int32_t algoType{0}; ///< The (user defined) algorithm type
std::vector<std::string> paramNames{}; ///< The names of the parameters

int32_t algoType() const {
return m_algoType;
}

private:
int32_t m_algoType{0}; ///< The (user defined) algorithm type
};

/// Get the index of the parameter in the passed ParticleID meta info
Expand Down
118 changes: 118 additions & 0 deletions utils/src/MurmurHash3.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

// Note - The x86 and x64 versions do _not_ produce the same results, as the
andresailer marked this conversation as resolved.
Show resolved Hide resolved
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.

#include "MurmurHash3.h"

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

// Microsoft Visual Studio

#if defined(_MSC_VER)

#define FORCE_INLINE __forceinline

#include <stdlib.h>

#define ROTL32(x, y) _rotl(x, y)

// Other compilers

#else // defined(_MSC_VER)

#define FORCE_INLINE inline __attribute__((always_inline))

inline uint32_t rotl32(uint32_t x, int8_t r) {
return (x << r) | (x >> (32 - r));
}

#define ROTL32(x, y) rotl32(x, y)

#endif // !defined(_MSC_VER)

//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here

FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) {
return p[i];
}

//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche

FORCE_INLINE uint32_t fmix32(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;

return h;
}

//-----------------------------------------------------------------------------

void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out) {
const auto data = (const uint8_t*)key;
const int nblocks = len / 4;

uint32_t h1 = seed;

const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;

//----------
// body

const auto blocks = (const uint32_t*)(data + nblocks * 4);

for (int i = -nblocks; i; i++) {
uint32_t k1 = getblock32(blocks, i);

k1 *= c1;
k1 = ROTL32(k1, 15);
k1 *= c2;

h1 ^= k1;
h1 = ROTL32(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
}

//----------
// tail

const auto tail = (const uint8_t*)(data + nblocks * 4);

uint32_t k1 = 0;

switch (len & 3) {
case 3:
k1 ^= tail[2] << 16;
[[fallthrough]];
case 2:
k1 ^= tail[1] << 8;
[[fallthrough]];
case 1:
k1 ^= tail[0];
k1 *= c1;
k1 = ROTL32(k1, 15);
k1 *= c2;
h1 ^= k1;
};

//----------
// finalization

h1 ^= len;

h1 = fmix32(h1);

*(uint32_t*)out = h1;
}
33 changes: 33 additions & 0 deletions utils/src/MurmurHash3.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

#ifndef _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards
#define _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

// Microsoft Visual Studio

#if defined(_MSC_VER) && (_MSC_VER < 1600)

typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;

// Other compilers

#else // defined(_MSC_VER)

#include <cstdint>

#endif // !defined(_MSC_VER)

//-----------------------------------------------------------------------------

void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out);

//-----------------------------------------------------------------------------

#endif // _MURMURHASH3_H_
50 changes: 35 additions & 15 deletions utils/src/ParticleIDUtils.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <edm4hep/utils/ParticleIDUtils.h>

#include "MurmurHash3.h"

#include "edm4hep/Constants.h"

#include <podio/FrameCategories.h>
Expand All @@ -11,6 +13,22 @@

namespace edm4hep::utils {

namespace {
int32_t getAlgoID(const std::string& name) {
int32_t ID = 0;
MurmurHash3_x86_32(name.c_str(), name.size(), 0, &ID);
andresailer marked this conversation as resolved.
Show resolved Hide resolved
return ID;
}
} // namespace

ParticleIDMeta::ParticleIDMeta(const std::string& algName, int32_t algType, const std::vector<std::string>& parNames) :
algoName(algName), paramNames(parNames), m_algoType(algType) {
}

ParticleIDMeta::ParticleIDMeta(const std::string& algName, const std::vector<std::string>& parNames) :
algoName(algName), paramNames(parNames), m_algoType(getAlgoID(algName)) {
}

std::optional<int> getParamIndex(const ParticleIDMeta& pidMetaInfo, const std::string& param) {
const auto nameIt = std::find(pidMetaInfo.paramNames.begin(), pidMetaInfo.paramNames.end(), param);
if (nameIt != pidMetaInfo.paramNames.end()) {
Expand All @@ -31,17 +49,17 @@ void PIDHandler::addColl(const edm4hep::ParticleIDCollection& coll, const edm4he
}

void PIDHandler::addMetaInfo(const edm4hep::utils::ParticleIDMeta& pidInfo) {
const auto [algoIt, inserted] = m_algoTypes.emplace(pidInfo.algoName, pidInfo.algoType);
const auto [algoIt, inserted] = m_algoTypes.emplace(pidInfo.algoName, pidInfo.algoType());
if (!inserted) {
throw std::runtime_error("Cannot have duplicate algorithm names (" + pidInfo.algoName + " already exists)");
}

const auto [__, metaInserted] = m_algoPidMeta.emplace(pidInfo.algoType, pidInfo);
const auto [__, metaInserted] = m_algoPidMeta.emplace(pidInfo.algoType(), pidInfo);
if (!metaInserted) {
if (inserted) {
m_algoTypes.erase(algoIt);
}
throw std::runtime_error("Cannot have duplicate algorithm types (" + std::to_string(pidInfo.algoType) +
throw std::runtime_error("Cannot have duplicate algorithm types (" + std::to_string(pidInfo.algoType()) +
" already exists)");
}
}
Expand Down Expand Up @@ -106,7 +124,7 @@ std::optional<int32_t> PIDHandler::getAlgoType(const std::string& algoName) cons
void PIDHandler::setAlgoInfo(podio::Frame& metadata, edm4hep::ParticleIDCollection& pidColl,
const std::string& collName, const edm4hep::utils::ParticleIDMeta& pidMetaInfo) {
for (auto pid : pidColl) {
pid.setAlgorithmType(pidMetaInfo.algoType);
pid.setAlgorithmType(pidMetaInfo.algoType());
}

PIDHandler::setAlgoInfo(metadata, collName, pidMetaInfo);
Expand All @@ -115,38 +133,40 @@ void PIDHandler::setAlgoInfo(podio::Frame& metadata, edm4hep::ParticleIDCollecti
void PIDHandler::setAlgoInfo(podio::Frame& metadata, const std::string& collName,
const edm4hep::utils::ParticleIDMeta& pidMetaInfo) {
metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName), pidMetaInfo.algoName);
metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType), pidMetaInfo.algoType);
metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType), pidMetaInfo.algoType());
metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidParameterNames), pidMetaInfo.paramNames);
}

std::optional<edm4hep::utils::ParticleIDMeta> PIDHandler::getAlgoInfo(const podio::Frame& metadata,
const std::string& collName) {
ParticleIDMeta pidInfo{};

#if PODIO_BUILD_VERSION > PODIO_VERSION(0, 99, 0)
auto maybeAlgoName = metadata.getParameter<std::string>(podio::collMetadataParamName(collName, edm4hep::pidAlgoName));
if (!maybeAlgoName.has_value()) {
return std::nullopt;
}

pidInfo.algoName = std::move(maybeAlgoName.value());
pidInfo.algoType = metadata.getParameter<int>(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)).value();
pidInfo.paramNames =
ParticleIDMeta pidInfo{
std::move(maybeAlgoName.value()),
metadata.getParameter<int>(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)).value(),
metadata
.getParameter<std::vector<std::string>>(podio::collMetadataParamName(collName, edm4hep::pidParameterNames))
.value();
.value()};

#else
pidInfo.algoName = metadata.getParameter<std::string>(podio::collMetadataParamName(collName, edm4hep::pidAlgoName));

const auto& algoName =
metadata.getParameter<std::string>(podio::collMetadataParamName(collName, edm4hep::pidAlgoName));
// Use the algoName as proxy to see whether we could actually get the
// information from the metadata
if (pidInfo.algoName.empty()) {
if (algoName.empty()) {
return std::nullopt;
}

pidInfo.algoType = metadata.getParameter<int>(podio::collMetadataParamName(collName, edm4hep::pidAlgoType));
pidInfo.paramNames = metadata.getParameter<std::vector<std::string>>(
podio::collMetadataParamName(collName, edm4hep::pidParameterNames));
ParticleIDMeta pidInfo{algoName,
metadata.getParameter<int>(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)),
metadata.getParameter<std::vector<std::string>>(
podio::collMetadataParamName(collName, edm4hep::pidParameterNames))};
#endif

return pidInfo;
Expand Down
Loading