Skip to content

Commit

Permalink
Use MurmurHash3 to hash the algorithm name for the algorithm type in …
Browse files Browse the repository at this point in the history
…ParticleIDMeta (key4hep#307)
  • Loading branch information
tmadlener authored Jun 11, 2024
1 parent 0784e02 commit 2ee9646
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 17 deletions.
10 changes: 9 additions & 1 deletion test/utils/test_PIDHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ void checkHandlerValidReco(const edm4hep::utils::PIDHandler& handler, const edm4
}
}

TEST_CASE("ParticleIDMeta constructor") {
using namespace edm4hep::utils;

ParticleIDMeta pidMeta{"name", {}};
REQUIRE(pidMeta.algoName == "name");
REQUIRE(pidMeta.algoType() == -609270800); // 32 bit MurmurHash3 of "name"
}

TEST_CASE("PIDHandler basics", "[pid_utils]") {
using namespace edm4hep;

Expand Down Expand Up @@ -188,7 +196,7 @@ TEST_CASE("PIDHandler from Frame w/ metadata", "[pid_utils]") {

const auto pidInfo = utils::PIDHandler::getAlgoInfo(metadata, "particleIds_1").value();
REQUIRE(pidInfo.algoName == "pidAlgo_1");
REQUIRE(pidInfo.algoType == 42);
REQUIRE(pidInfo.algoType() == 42);
REQUIRE(pidInfo.paramNames.size() == 2);
REQUIRE(pidInfo.paramNames[0] == "first_param");
REQUIRE(pidInfo.paramNames[1] == "second_param");
Expand Down
1 change: 1 addition & 0 deletions utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ target_compile_features(kinematics INTERFACE cxx_std_17)

set(utils_sources
src/ParticleIDUtils.cc
src/MurmurHash3.cpp
)

add_library(utils SHARED ${utils_sources})
Expand Down
18 changes: 17 additions & 1 deletion utils/include/edm4hep/utils/ParticleIDUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,25 @@ namespace edm4hep::utils {

/// A simple struct bundling relevant metadata for a ParticleID collection
struct ParticleIDMeta {
ParticleIDMeta(const std::string& algName, int32_t algType, const std::vector<std::string>& parNames);
ParticleIDMeta(const std::string& algName, const std::vector<std::string>& parNames);

~ParticleIDMeta() = default;
ParticleIDMeta() = default;
ParticleIDMeta(const ParticleIDMeta&) = default;
ParticleIDMeta& operator=(const ParticleIDMeta&) = default;
ParticleIDMeta(ParticleIDMeta&&) = default;
ParticleIDMeta& operator=(ParticleIDMeta&&) = default;

std::string algoName{}; ///< The name of the algorithm
int32_t algoType{0}; ///< The (user defined) algorithm type
std::vector<std::string> paramNames{}; ///< The names of the parameters

int32_t algoType() const {
return m_algoType;
}

private:
int32_t m_algoType{0}; ///< The (user defined) algorithm type
};

/// Get the index of the parameter in the passed ParticleID meta info
Expand Down
118 changes: 118 additions & 0 deletions utils/src/MurmurHash3.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

// Note - The x86 and x64 versions do _not_ produce the same results, as the
// algorithms are optimized for their respective platforms. You can still
// compile and run any of them on any platform, but your performance with the
// non-native version will be less than optimal.

#include "MurmurHash3.h"

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

// Microsoft Visual Studio

#if defined(_MSC_VER)

#define FORCE_INLINE __forceinline

#include <stdlib.h>

#define ROTL32(x, y) _rotl(x, y)

// Other compilers

#else // defined(_MSC_VER)

#define FORCE_INLINE inline __attribute__((always_inline))

inline uint32_t rotl32(uint32_t x, int8_t r) {
return (x << r) | (x >> (32 - r));
}

#define ROTL32(x, y) rotl32(x, y)

#endif // !defined(_MSC_VER)

//-----------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here

FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) {
return p[i];
}

//-----------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche

FORCE_INLINE uint32_t fmix32(uint32_t h) {
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;

return h;
}

//-----------------------------------------------------------------------------

void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out) {
const auto data = (const uint8_t*)key;
const int nblocks = len / 4;

uint32_t h1 = seed;

const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;

//----------
// body

const auto blocks = (const uint32_t*)(data + nblocks * 4);

for (int i = -nblocks; i; i++) {
uint32_t k1 = getblock32(blocks, i);

k1 *= c1;
k1 = ROTL32(k1, 15);
k1 *= c2;

h1 ^= k1;
h1 = ROTL32(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
}

//----------
// tail

const auto tail = (const uint8_t*)(data + nblocks * 4);

uint32_t k1 = 0;

switch (len & 3) {
case 3:
k1 ^= tail[2] << 16;
[[fallthrough]];
case 2:
k1 ^= tail[1] << 8;
[[fallthrough]];
case 1:
k1 ^= tail[0];
k1 *= c1;
k1 = ROTL32(k1, 15);
k1 *= c2;
h1 ^= k1;
};

//----------
// finalization

h1 ^= len;

h1 = fmix32(h1);

*(uint32_t*)out = h1;
}
33 changes: 33 additions & 0 deletions utils/src/MurmurHash3.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//-----------------------------------------------------------------------------
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

#ifndef _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards
#define _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards

//-----------------------------------------------------------------------------
// Platform-specific functions and macros

// Microsoft Visual Studio

#if defined(_MSC_VER) && (_MSC_VER < 1600)

typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned __int64 uint64_t;

// Other compilers

#else // defined(_MSC_VER)

#include <cstdint>

#endif // !defined(_MSC_VER)

//-----------------------------------------------------------------------------

void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out);

//-----------------------------------------------------------------------------

#endif // _MURMURHASH3_H_
50 changes: 35 additions & 15 deletions utils/src/ParticleIDUtils.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include <edm4hep/utils/ParticleIDUtils.h>

#include "MurmurHash3.h"

#include "edm4hep/Constants.h"

#include <podio/FrameCategories.h>
Expand All @@ -11,6 +13,22 @@

namespace edm4hep::utils {

namespace {
int32_t getAlgoID(const std::string& name) {
int32_t ID = 0;
MurmurHash3_x86_32(name.c_str(), name.size(), 0, &ID);
return ID;
}
} // namespace

ParticleIDMeta::ParticleIDMeta(const std::string& algName, int32_t algType, const std::vector<std::string>& parNames) :
algoName(algName), paramNames(parNames), m_algoType(algType) {
}

ParticleIDMeta::ParticleIDMeta(const std::string& algName, const std::vector<std::string>& parNames) :
algoName(algName), paramNames(parNames), m_algoType(getAlgoID(algName)) {
}

std::optional<int> getParamIndex(const ParticleIDMeta& pidMetaInfo, const std::string& param) {
const auto nameIt = std::find(pidMetaInfo.paramNames.begin(), pidMetaInfo.paramNames.end(), param);
if (nameIt != pidMetaInfo.paramNames.end()) {
Expand All @@ -31,17 +49,17 @@ void PIDHandler::addColl(const edm4hep::ParticleIDCollection& coll, const edm4he
}

void PIDHandler::addMetaInfo(const edm4hep::utils::ParticleIDMeta& pidInfo) {
const auto [algoIt, inserted] = m_algoTypes.emplace(pidInfo.algoName, pidInfo.algoType);
const auto [algoIt, inserted] = m_algoTypes.emplace(pidInfo.algoName, pidInfo.algoType());
if (!inserted) {
throw std::runtime_error("Cannot have duplicate algorithm names (" + pidInfo.algoName + " already exists)");
}

const auto [__, metaInserted] = m_algoPidMeta.emplace(pidInfo.algoType, pidInfo);
const auto [__, metaInserted] = m_algoPidMeta.emplace(pidInfo.algoType(), pidInfo);
if (!metaInserted) {
if (inserted) {
m_algoTypes.erase(algoIt);
}
throw std::runtime_error("Cannot have duplicate algorithm types (" + std::to_string(pidInfo.algoType) +
throw std::runtime_error("Cannot have duplicate algorithm types (" + std::to_string(pidInfo.algoType()) +
" already exists)");
}
}
Expand Down Expand Up @@ -106,7 +124,7 @@ std::optional<int32_t> PIDHandler::getAlgoType(const std::string& algoName) cons
void PIDHandler::setAlgoInfo(podio::Frame& metadata, edm4hep::ParticleIDCollection& pidColl,
const std::string& collName, const edm4hep::utils::ParticleIDMeta& pidMetaInfo) {
for (auto pid : pidColl) {
pid.setAlgorithmType(pidMetaInfo.algoType);
pid.setAlgorithmType(pidMetaInfo.algoType());
}

PIDHandler::setAlgoInfo(metadata, collName, pidMetaInfo);
Expand All @@ -115,38 +133,40 @@ void PIDHandler::setAlgoInfo(podio::Frame& metadata, edm4hep::ParticleIDCollecti
void PIDHandler::setAlgoInfo(podio::Frame& metadata, const std::string& collName,
const edm4hep::utils::ParticleIDMeta& pidMetaInfo) {
metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName), pidMetaInfo.algoName);
metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType), pidMetaInfo.algoType);
metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType), pidMetaInfo.algoType());
metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidParameterNames), pidMetaInfo.paramNames);
}

std::optional<edm4hep::utils::ParticleIDMeta> PIDHandler::getAlgoInfo(const podio::Frame& metadata,
const std::string& collName) {
ParticleIDMeta pidInfo{};

#if PODIO_BUILD_VERSION > PODIO_VERSION(0, 99, 0)
auto maybeAlgoName = metadata.getParameter<std::string>(podio::collMetadataParamName(collName, edm4hep::pidAlgoName));
if (!maybeAlgoName.has_value()) {
return std::nullopt;
}

pidInfo.algoName = std::move(maybeAlgoName.value());
pidInfo.algoType = metadata.getParameter<int>(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)).value();
pidInfo.paramNames =
ParticleIDMeta pidInfo{
std::move(maybeAlgoName.value()),
metadata.getParameter<int>(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)).value(),
metadata
.getParameter<std::vector<std::string>>(podio::collMetadataParamName(collName, edm4hep::pidParameterNames))
.value();
.value()};

#else
pidInfo.algoName = metadata.getParameter<std::string>(podio::collMetadataParamName(collName, edm4hep::pidAlgoName));

const auto& algoName =
metadata.getParameter<std::string>(podio::collMetadataParamName(collName, edm4hep::pidAlgoName));
// Use the algoName as proxy to see whether we could actually get the
// information from the metadata
if (pidInfo.algoName.empty()) {
if (algoName.empty()) {
return std::nullopt;
}

pidInfo.algoType = metadata.getParameter<int>(podio::collMetadataParamName(collName, edm4hep::pidAlgoType));
pidInfo.paramNames = metadata.getParameter<std::vector<std::string>>(
podio::collMetadataParamName(collName, edm4hep::pidParameterNames));
ParticleIDMeta pidInfo{algoName,
metadata.getParameter<int>(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)),
metadata.getParameter<std::vector<std::string>>(
podio::collMetadataParamName(collName, edm4hep::pidParameterNames))};
#endif

return pidInfo;
Expand Down

0 comments on commit 2ee9646

Please sign in to comment.