From f37b595d4bca43196effb56aef8840eb1f8ad4de Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 16 May 2024 16:42:14 +0200 Subject: [PATCH] Hash the algoName to get the algoID by default --- test/utils/test_PIDHandler.cpp | 2 +- utils/CMakeLists.txt | 1 + utils/include/edm4hep/utils/ParticleIDUtils.h | 18 +- utils/src/MurmurHash3.cpp | 442 ++++++++++++++++++ utils/src/MurmurHash3.h | 37 ++ utils/src/ParticleIDUtils.cc | 50 +- 6 files changed, 533 insertions(+), 17 deletions(-) create mode 100644 utils/src/MurmurHash3.cpp create mode 100644 utils/src/MurmurHash3.h diff --git a/test/utils/test_PIDHandler.cpp b/test/utils/test_PIDHandler.cpp index 12eaf3317..a38680f29 100644 --- a/test/utils/test_PIDHandler.cpp +++ b/test/utils/test_PIDHandler.cpp @@ -188,7 +188,7 @@ TEST_CASE("PIDHandler from Frame w/ metadata", "[pid_utils]") { const auto pidInfo = utils::PIDHandler::getAlgoInfo(metadata, "particleIds_1").value(); REQUIRE(pidInfo.algoName == "pidAlgo_1"); - REQUIRE(pidInfo.algoType == 42); + REQUIRE(pidInfo.algoType() == 42); REQUIRE(pidInfo.paramNames.size() == 2); REQUIRE(pidInfo.paramNames[0] == "first_param"); REQUIRE(pidInfo.paramNames[1] == "second_param"); diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 821938008..9c6a4654d 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -10,6 +10,7 @@ target_compile_features(kinematics INTERFACE cxx_std_17) set(utils_sources src/ParticleIDUtils.cc + src/MurmurHash3.cpp ) add_library(utils SHARED ${utils_sources}) diff --git a/utils/include/edm4hep/utils/ParticleIDUtils.h b/utils/include/edm4hep/utils/ParticleIDUtils.h index 204ee448c..390251b44 100644 --- a/utils/include/edm4hep/utils/ParticleIDUtils.h +++ b/utils/include/edm4hep/utils/ParticleIDUtils.h @@ -16,9 +16,25 @@ namespace edm4hep::utils { /// A simple struct bundling relevant metadata for a ParticleID collection struct ParticleIDMeta { + ParticleIDMeta(const std::string& algName, int32_t algType, const std::vector& parNames); + ParticleIDMeta(const std::string& algName, const std::vector& parNames); + + ~ParticleIDMeta() = default; + ParticleIDMeta() = default; + ParticleIDMeta(const ParticleIDMeta&) = default; + ParticleIDMeta& operator=(const ParticleIDMeta&) = default; + ParticleIDMeta(ParticleIDMeta&&) = default; + ParticleIDMeta& operator=(ParticleIDMeta&&) = default; + std::string algoName{}; ///< The name of the algorithm - int32_t algoType{0}; ///< The (user defined) algorithm type std::vector paramNames{}; ///< The names of the parameters + + int32_t algoType() const { + return m_algoType; + } + +private: + int32_t m_algoType{0}; ///< The (user defined) algorithm type }; /// Get the index of the parameter in the passed ParticleID meta info diff --git a/utils/src/MurmurHash3.cpp b/utils/src/MurmurHash3.cpp new file mode 100644 index 000000000..b3bc67064 --- /dev/null +++ b/utils/src/MurmurHash3.cpp @@ -0,0 +1,442 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "MurmurHash3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define FORCE_INLINE __forceinline + +#include + +#define ROTL32(x, y) _rotl(x, y) +#define ROTL64(x, y) _rotl64(x, y) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define FORCE_INLINE inline __attribute__((always_inline)) + +inline uint32_t rotl32(uint32_t x, int8_t r) { + return (x << r) | (x >> (32 - r)); +} + +inline uint64_t rotl64(uint64_t x, int8_t r) { + return (x << r) | (x >> (64 - r)); +} + +#define ROTL32(x, y) rotl32(x, y) +#define ROTL64(x, y) rotl64(x, y) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { + return p[i]; +} + +FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) { + return p[i]; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +FORCE_INLINE uint32_t fmix32(uint32_t h) { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//---------- + +FORCE_INLINE uint64_t fmix64(uint64_t k) { + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + //---------- + // body + + const auto blocks = (const uint32_t*)(data + nblocks * 4); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = getblock32(blocks, i); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_128(const void* key, const int len, uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint32_t h1 = seed; + uint32_t h2 = seed; + uint32_t h3 = seed; + uint32_t h4 = seed; + + const uint32_t c1 = 0x239b961b; + const uint32_t c2 = 0xab0e9789; + const uint32_t c3 = 0x38b34ae5; + const uint32_t c4 = 0xa1e38b93; + + //---------- + // body + + const auto blocks = (const uint32_t*)(data + nblocks * 16); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = getblock32(blocks, i * 4 + 0); + uint32_t k2 = getblock32(blocks, i * 4 + 1); + uint32_t k3 = getblock32(blocks, i * 4 + 2); + uint32_t k4 = getblock32(blocks, i * 4 + 3); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + + h1 = ROTL32(h1, 19); + h1 += h2; + h1 = h1 * 5 + 0x561ccd1b; + + k2 *= c2; + k2 = ROTL32(k2, 16); + k2 *= c3; + h2 ^= k2; + + h2 = ROTL32(h2, 17); + h2 += h3; + h2 = h2 * 5 + 0x0bcaa747; + + k3 *= c3; + k3 = ROTL32(k3, 17); + k3 *= c4; + h3 ^= k3; + + h3 = ROTL32(h3, 15); + h3 += h4; + h3 = h3 * 5 + 0x96cd1c35; + + k4 *= c4; + k4 = ROTL32(k4, 18); + k4 *= c1; + h4 ^= k4; + + h4 = ROTL32(h4, 13); + h4 += h1; + h4 = h4 * 5 + 0x32ac3b17; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 16); + + uint32_t k1 = 0; + uint32_t k2 = 0; + uint32_t k3 = 0; + uint32_t k4 = 0; + + switch (len & 15) { + case 15: + k4 ^= tail[14] << 16; + [[fallthrough]]; + case 14: + k4 ^= tail[13] << 8; + [[fallthrough]]; + case 13: + k4 ^= tail[12] << 0; + k4 *= c4; + k4 = ROTL32(k4, 18); + k4 *= c1; + h4 ^= k4; + [[fallthrough]]; + + case 12: + k3 ^= tail[11] << 24; + [[fallthrough]]; + case 11: + k3 ^= tail[10] << 16; + [[fallthrough]]; + case 10: + k3 ^= tail[9] << 8; + [[fallthrough]]; + case 9: + k3 ^= tail[8] << 0; + k3 *= c3; + k3 = ROTL32(k3, 17); + k3 *= c4; + h3 ^= k3; + [[fallthrough]]; + + case 8: + k2 ^= tail[7] << 24; + [[fallthrough]]; + case 7: + k2 ^= tail[6] << 16; + [[fallthrough]]; + case 6: + k2 ^= tail[5] << 8; + [[fallthrough]]; + case 5: + k2 ^= tail[4] << 0; + k2 *= c2; + k2 = ROTL32(k2, 16); + k2 *= c3; + h2 ^= k2; + [[fallthrough]]; + + case 4: + k1 ^= tail[3] << 24; + [[fallthrough]]; + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0] << 0; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + h2 ^= len; + h3 ^= len; + h4 ^= len; + + h1 += h2; + h1 += h3; + h1 += h4; + h2 += h1; + h3 += h1; + h4 += h1; + + h1 = fmix32(h1); + h2 = fmix32(h2); + h3 = fmix32(h3); + h4 = fmix32(h4); + + h1 += h2; + h1 += h3; + h1 += h4; + h2 += h1; + h3 += h1; + h4 += h1; + + ((uint32_t*)out)[0] = h1; + ((uint32_t*)out)[1] = h2; + ((uint32_t*)out)[2] = h3; + ((uint32_t*)out)[3] = h4; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128(const void* key, const int len, const uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const auto blocks = (const uint64_t*)(data); + + for (int i = 0; i < nblocks; i++) { + uint64_t k1 = getblock64(blocks, i * 2 + 0); + uint64_t k2 = getblock64(blocks, i * 2 + 1); + + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + h1 ^= k1; + + h1 = ROTL64(h1, 27); + h1 += h2; + h1 = h1 * 5 + 0x52dce729; + + k2 *= c2; + k2 = ROTL64(k2, 33); + k2 *= c1; + h2 ^= k2; + + h2 = ROTL64(h2, 31); + h2 += h1; + h2 = h2 * 5 + 0x38495ab5; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch (len & 15) { + case 15: + k2 ^= ((uint64_t)tail[14]) << 48; + [[fallthrough]]; + case 14: + k2 ^= ((uint64_t)tail[13]) << 40; + [[fallthrough]]; + case 13: + k2 ^= ((uint64_t)tail[12]) << 32; + [[fallthrough]]; + case 12: + k2 ^= ((uint64_t)tail[11]) << 24; + [[fallthrough]]; + case 11: + k2 ^= ((uint64_t)tail[10]) << 16; + [[fallthrough]]; + case 10: + k2 ^= ((uint64_t)tail[9]) << 8; + [[fallthrough]]; + case 9: + k2 ^= ((uint64_t)tail[8]) << 0; + k2 *= c2; + k2 = ROTL64(k2, 33); + k2 *= c1; + h2 ^= k2; + [[fallthrough]]; + + case 8: + k1 ^= ((uint64_t)tail[7]) << 56; + [[fallthrough]]; + case 7: + k1 ^= ((uint64_t)tail[6]) << 48; + [[fallthrough]]; + case 6: + k1 ^= ((uint64_t)tail[5]) << 40; + [[fallthrough]]; + case 5: + k1 ^= ((uint64_t)tail[4]) << 32; + [[fallthrough]]; + case 4: + k1 ^= ((uint64_t)tail[3]) << 24; + [[fallthrough]]; + case 3: + k1 ^= ((uint64_t)tail[2]) << 16; + [[fallthrough]]; + case 2: + k1 ^= ((uint64_t)tail[1]) << 8; + [[fallthrough]]; + case 1: + k1 ^= ((uint64_t)tail[0]) << 0; + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} + +//----------------------------------------------------------------------------- diff --git a/utils/src/MurmurHash3.h b/utils/src/MurmurHash3.h new file mode 100644 index 000000000..1729c4abe --- /dev/null +++ b/utils/src/MurmurHash3.h @@ -0,0 +1,37 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards +#define _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out); + +void MurmurHash3_x86_128(const void* key, int len, uint32_t seed, void* out); + +void MurmurHash3_x64_128(const void* key, int len, uint32_t seed, void* out); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ diff --git a/utils/src/ParticleIDUtils.cc b/utils/src/ParticleIDUtils.cc index 8b6337642..64b741e60 100644 --- a/utils/src/ParticleIDUtils.cc +++ b/utils/src/ParticleIDUtils.cc @@ -1,5 +1,7 @@ #include +#include "MurmurHash3.h" + #include "edm4hep/Constants.h" #include @@ -11,6 +13,22 @@ namespace edm4hep::utils { +namespace { + int32_t getAlgoID(const std::string& name) { + int32_t ID = 0; + MurmurHash3_x86_32(name.c_str(), name.size(), 0, &ID); + return ID; + } +} // namespace + +ParticleIDMeta::ParticleIDMeta(const std::string& algName, int32_t algType, const std::vector& parNames) : + algoName(algName), paramNames(parNames), m_algoType(algType) { +} + +ParticleIDMeta::ParticleIDMeta(const std::string& algName, const std::vector& parNames) : + algoName(algName), paramNames(parNames), m_algoType(getAlgoID(algName)) { +} + std::optional getParamIndex(const ParticleIDMeta& pidMetaInfo, const std::string& param) { const auto nameIt = std::find(pidMetaInfo.paramNames.begin(), pidMetaInfo.paramNames.end(), param); if (nameIt != pidMetaInfo.paramNames.end()) { @@ -31,17 +49,17 @@ void PIDHandler::addColl(const edm4hep::ParticleIDCollection& coll, const edm4he } void PIDHandler::addMetaInfo(const edm4hep::utils::ParticleIDMeta& pidInfo) { - const auto [algoIt, inserted] = m_algoTypes.emplace(pidInfo.algoName, pidInfo.algoType); + const auto [algoIt, inserted] = m_algoTypes.emplace(pidInfo.algoName, pidInfo.algoType()); if (!inserted) { throw std::runtime_error("Cannot have duplicate algorithm names (" + pidInfo.algoName + " already exists)"); } - const auto [__, metaInserted] = m_algoPidMeta.emplace(pidInfo.algoType, pidInfo); + const auto [__, metaInserted] = m_algoPidMeta.emplace(pidInfo.algoType(), pidInfo); if (!metaInserted) { if (inserted) { m_algoTypes.erase(algoIt); } - throw std::runtime_error("Cannot have duplicate algorithm types (" + std::to_string(pidInfo.algoType) + + throw std::runtime_error("Cannot have duplicate algorithm types (" + std::to_string(pidInfo.algoType()) + " already exists)"); } } @@ -106,7 +124,7 @@ std::optional PIDHandler::getAlgoType(const std::string& algoName) cons void PIDHandler::setAlgoInfo(podio::Frame& metadata, edm4hep::ParticleIDCollection& pidColl, const std::string& collName, const edm4hep::utils::ParticleIDMeta& pidMetaInfo) { for (auto pid : pidColl) { - pid.setAlgorithmType(pidMetaInfo.algoType); + pid.setAlgorithmType(pidMetaInfo.algoType()); } PIDHandler::setAlgoInfo(metadata, collName, pidMetaInfo); @@ -115,13 +133,12 @@ void PIDHandler::setAlgoInfo(podio::Frame& metadata, edm4hep::ParticleIDCollecti void PIDHandler::setAlgoInfo(podio::Frame& metadata, const std::string& collName, const edm4hep::utils::ParticleIDMeta& pidMetaInfo) { metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName), pidMetaInfo.algoName); - metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType), pidMetaInfo.algoType); + metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType), pidMetaInfo.algoType()); metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidParameterNames), pidMetaInfo.paramNames); } std::optional PIDHandler::getAlgoInfo(const podio::Frame& metadata, const std::string& collName) { - ParticleIDMeta pidInfo{}; #if PODIO_BUILD_VERSION > PODIO_VERSION(0, 99, 0) auto maybeAlgoName = metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName)); @@ -129,24 +146,27 @@ std::optional PIDHandler::getAlgoInfo(const podi return std::nullopt; } - pidInfo.algoName = std::move(maybeAlgoName.value()); - pidInfo.algoType = metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)).value(); - pidInfo.paramNames = + ParticleIDMeta pidInfo{ + std::move(maybeAlgoName.value()), + metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)).value(), metadata .getParameter>(podio::collMetadataParamName(collName, edm4hep::pidParameterNames)) - .value(); + .value()}; #else - pidInfo.algoName = metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName)); + + const auto& algoName = + metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName)); // Use the algoName as proxy to see whether we could actually get the // information from the metadata - if (pidInfo.algoName.empty()) { + if (algoName.empty()) { return std::nullopt; } - pidInfo.algoType = metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)); - pidInfo.paramNames = metadata.getParameter>( - podio::collMetadataParamName(collName, edm4hep::pidParameterNames)); + ParticlIDMeta pidInfo{algoName, + metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)), + metadata.getParameter>( + podio::collMetadataParamName(collName, edm4hep::pidParameterNames))}; #endif return pidInfo;