diff --git a/test/utils/test_PIDHandler.cpp b/test/utils/test_PIDHandler.cpp index 12eaf3317..9e60195d5 100644 --- a/test/utils/test_PIDHandler.cpp +++ b/test/utils/test_PIDHandler.cpp @@ -74,6 +74,14 @@ void checkHandlerValidReco(const edm4hep::utils::PIDHandler& handler, const edm4 } } +TEST_CASE("ParticleIDMeta constructor") { + using namespace edm4hep::utils; + + ParticleIDMeta pidMeta{"name", {}}; + REQUIRE(pidMeta.algoName == "name"); + REQUIRE(pidMeta.algoType() == -609270800); // 32 bit MurmurHash3 of "name" +} + TEST_CASE("PIDHandler basics", "[pid_utils]") { using namespace edm4hep; @@ -188,7 +196,7 @@ TEST_CASE("PIDHandler from Frame w/ metadata", "[pid_utils]") { const auto pidInfo = utils::PIDHandler::getAlgoInfo(metadata, "particleIds_1").value(); REQUIRE(pidInfo.algoName == "pidAlgo_1"); - REQUIRE(pidInfo.algoType == 42); + REQUIRE(pidInfo.algoType() == 42); REQUIRE(pidInfo.paramNames.size() == 2); REQUIRE(pidInfo.paramNames[0] == "first_param"); REQUIRE(pidInfo.paramNames[1] == "second_param"); diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 821938008..9c6a4654d 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -10,6 +10,7 @@ target_compile_features(kinematics INTERFACE cxx_std_17) set(utils_sources src/ParticleIDUtils.cc + src/MurmurHash3.cpp ) add_library(utils SHARED ${utils_sources}) diff --git a/utils/include/edm4hep/utils/ParticleIDUtils.h b/utils/include/edm4hep/utils/ParticleIDUtils.h index 204ee448c..390251b44 100644 --- a/utils/include/edm4hep/utils/ParticleIDUtils.h +++ b/utils/include/edm4hep/utils/ParticleIDUtils.h @@ -16,9 +16,25 @@ namespace edm4hep::utils { /// A simple struct bundling relevant metadata for a ParticleID collection struct ParticleIDMeta { + ParticleIDMeta(const std::string& algName, int32_t algType, const std::vector& parNames); + ParticleIDMeta(const std::string& algName, const std::vector& parNames); + + ~ParticleIDMeta() = default; + ParticleIDMeta() = default; + ParticleIDMeta(const ParticleIDMeta&) = default; + ParticleIDMeta& operator=(const ParticleIDMeta&) = default; + ParticleIDMeta(ParticleIDMeta&&) = default; + ParticleIDMeta& operator=(ParticleIDMeta&&) = default; + std::string algoName{}; ///< The name of the algorithm - int32_t algoType{0}; ///< The (user defined) algorithm type std::vector paramNames{}; ///< The names of the parameters + + int32_t algoType() const { + return m_algoType; + } + +private: + int32_t m_algoType{0}; ///< The (user defined) algorithm type }; /// Get the index of the parameter in the passed ParticleID meta info diff --git a/utils/src/MurmurHash3.cpp b/utils/src/MurmurHash3.cpp new file mode 100644 index 000000000..91975d660 --- /dev/null +++ b/utils/src/MurmurHash3.cpp @@ -0,0 +1,118 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "MurmurHash3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define FORCE_INLINE __forceinline + +#include + +#define ROTL32(x, y) _rotl(x, y) + +// Other compilers + +#else // defined(_MSC_VER) + +#define FORCE_INLINE inline __attribute__((always_inline)) + +inline uint32_t rotl32(uint32_t x, int8_t r) { + return (x << r) | (x >> (32 - r)); +} + +#define ROTL32(x, y) rotl32(x, y) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) { + return p[i]; +} + +//----------------------------------------------------------------------------- +// Finalization mix - force all bits of a hash block to avalanche + +FORCE_INLINE uint32_t fmix32(uint32_t h) { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out) { + const auto data = (const uint8_t*)key; + const int nblocks = len / 4; + + uint32_t h1 = seed; + + const uint32_t c1 = 0xcc9e2d51; + const uint32_t c2 = 0x1b873593; + + //---------- + // body + + const auto blocks = (const uint32_t*)(data + nblocks * 4); + + for (int i = -nblocks; i; i++) { + uint32_t k1 = getblock32(blocks, i); + + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + h1 ^= k1; + h1 = ROTL32(h1, 13); + h1 = h1 * 5 + 0xe6546b64; + } + + //---------- + // tail + + const auto tail = (const uint8_t*)(data + nblocks * 4); + + uint32_t k1 = 0; + + switch (len & 3) { + case 3: + k1 ^= tail[2] << 16; + [[fallthrough]]; + case 2: + k1 ^= tail[1] << 8; + [[fallthrough]]; + case 1: + k1 ^= tail[0]; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; + + h1 = fmix32(h1); + + *(uint32_t*)out = h1; +} diff --git a/utils/src/MurmurHash3.h b/utils/src/MurmurHash3.h new file mode 100644 index 000000000..61dac2dc8 --- /dev/null +++ b/utils/src/MurmurHash3.h @@ -0,0 +1,33 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards +#define _MURMURHASH3_H_ // NOLINT(llvm-header-guard): Keep original header guards + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +void MurmurHash3_x86_32(const void* key, int len, uint32_t seed, void* out); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ diff --git a/utils/src/ParticleIDUtils.cc b/utils/src/ParticleIDUtils.cc index 8b6337642..154b6b61d 100644 --- a/utils/src/ParticleIDUtils.cc +++ b/utils/src/ParticleIDUtils.cc @@ -1,5 +1,7 @@ #include +#include "MurmurHash3.h" + #include "edm4hep/Constants.h" #include @@ -11,6 +13,22 @@ namespace edm4hep::utils { +namespace { + int32_t getAlgoID(const std::string& name) { + int32_t ID = 0; + MurmurHash3_x86_32(name.c_str(), name.size(), 0, &ID); + return ID; + } +} // namespace + +ParticleIDMeta::ParticleIDMeta(const std::string& algName, int32_t algType, const std::vector& parNames) : + algoName(algName), paramNames(parNames), m_algoType(algType) { +} + +ParticleIDMeta::ParticleIDMeta(const std::string& algName, const std::vector& parNames) : + algoName(algName), paramNames(parNames), m_algoType(getAlgoID(algName)) { +} + std::optional getParamIndex(const ParticleIDMeta& pidMetaInfo, const std::string& param) { const auto nameIt = std::find(pidMetaInfo.paramNames.begin(), pidMetaInfo.paramNames.end(), param); if (nameIt != pidMetaInfo.paramNames.end()) { @@ -31,17 +49,17 @@ void PIDHandler::addColl(const edm4hep::ParticleIDCollection& coll, const edm4he } void PIDHandler::addMetaInfo(const edm4hep::utils::ParticleIDMeta& pidInfo) { - const auto [algoIt, inserted] = m_algoTypes.emplace(pidInfo.algoName, pidInfo.algoType); + const auto [algoIt, inserted] = m_algoTypes.emplace(pidInfo.algoName, pidInfo.algoType()); if (!inserted) { throw std::runtime_error("Cannot have duplicate algorithm names (" + pidInfo.algoName + " already exists)"); } - const auto [__, metaInserted] = m_algoPidMeta.emplace(pidInfo.algoType, pidInfo); + const auto [__, metaInserted] = m_algoPidMeta.emplace(pidInfo.algoType(), pidInfo); if (!metaInserted) { if (inserted) { m_algoTypes.erase(algoIt); } - throw std::runtime_error("Cannot have duplicate algorithm types (" + std::to_string(pidInfo.algoType) + + throw std::runtime_error("Cannot have duplicate algorithm types (" + std::to_string(pidInfo.algoType()) + " already exists)"); } } @@ -106,7 +124,7 @@ std::optional PIDHandler::getAlgoType(const std::string& algoName) cons void PIDHandler::setAlgoInfo(podio::Frame& metadata, edm4hep::ParticleIDCollection& pidColl, const std::string& collName, const edm4hep::utils::ParticleIDMeta& pidMetaInfo) { for (auto pid : pidColl) { - pid.setAlgorithmType(pidMetaInfo.algoType); + pid.setAlgorithmType(pidMetaInfo.algoType()); } PIDHandler::setAlgoInfo(metadata, collName, pidMetaInfo); @@ -115,13 +133,12 @@ void PIDHandler::setAlgoInfo(podio::Frame& metadata, edm4hep::ParticleIDCollecti void PIDHandler::setAlgoInfo(podio::Frame& metadata, const std::string& collName, const edm4hep::utils::ParticleIDMeta& pidMetaInfo) { metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName), pidMetaInfo.algoName); - metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType), pidMetaInfo.algoType); + metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType), pidMetaInfo.algoType()); metadata.putParameter(podio::collMetadataParamName(collName, edm4hep::pidParameterNames), pidMetaInfo.paramNames); } std::optional PIDHandler::getAlgoInfo(const podio::Frame& metadata, const std::string& collName) { - ParticleIDMeta pidInfo{}; #if PODIO_BUILD_VERSION > PODIO_VERSION(0, 99, 0) auto maybeAlgoName = metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName)); @@ -129,24 +146,27 @@ std::optional PIDHandler::getAlgoInfo(const podi return std::nullopt; } - pidInfo.algoName = std::move(maybeAlgoName.value()); - pidInfo.algoType = metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)).value(); - pidInfo.paramNames = + ParticleIDMeta pidInfo{ + std::move(maybeAlgoName.value()), + metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)).value(), metadata .getParameter>(podio::collMetadataParamName(collName, edm4hep::pidParameterNames)) - .value(); + .value()}; #else - pidInfo.algoName = metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName)); + + const auto& algoName = + metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoName)); // Use the algoName as proxy to see whether we could actually get the // information from the metadata - if (pidInfo.algoName.empty()) { + if (algoName.empty()) { return std::nullopt; } - pidInfo.algoType = metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)); - pidInfo.paramNames = metadata.getParameter>( - podio::collMetadataParamName(collName, edm4hep::pidParameterNames)); + ParticleIDMeta pidInfo{algoName, + metadata.getParameter(podio::collMetadataParamName(collName, edm4hep::pidAlgoType)), + metadata.getParameter>( + podio::collMetadataParamName(collName, edm4hep::pidParameterNames))}; #endif return pidInfo;