From 374cc26feac87e541aa78ab73ad1df6f40e63000 Mon Sep 17 00:00:00 2001 From: Hoyt Koepke Date: Thu, 1 Oct 2020 18:54:21 -0700 Subject: [PATCH] Updated random number library to use c++11 random library instead of boost. --- src/core/random/random.cpp | 127 +++------------ src/core/random/random.hpp | 145 +++++++----------- .../storage/query_engine/algorithm/sort.cpp | 2 +- src/model_server/lib/image_util.cpp | 5 +- 4 files changed, 85 insertions(+), 194 deletions(-) diff --git a/src/core/random/random.cpp b/src/core/random/random.cpp index b49cb1d8ea..81fd2a67df 100644 --- a/src/core/random/random.cpp +++ b/src/core/random/random.cpp @@ -8,8 +8,6 @@ #include #include -#include -#include #include #include @@ -29,31 +27,6 @@ namespace turi { namespace random { - /** Get as close to a true source of randomness as possible. - * - * nanoseconds clock from program start. This should be pretty good. - * - * In case subsequent calls on a platform that does not support nanosecond resolution - * happen, also increment a base count to make sure that subsequent seeds - * are never the same. - * - * hash all these together to get a final seed hash. - */ - uint64_t pure_random_seed() { - static auto base_start_time = std::chrono::high_resolution_clock::now(); - static uint64_t base_seed = hash64(time(NULL)); - static atomic base_count = 0; - - - ++base_count; - - auto now = std::chrono::high_resolution_clock::now(); - - uint64_t cur_seed = std::chrono::duration_cast(now-base_start_time).count(); - - return hash64(base_seed, hash64(cur_seed, base_count)); - } - /** * A truely nondeterministic generator */ @@ -64,81 +37,33 @@ namespace turi { return global_gen; } - typedef size_t result_type; - BOOST_STATIC_CONSTANT(result_type, min_value = - boost::integer_traits::const_min); - BOOST_STATIC_CONSTANT(result_type, max_value = - boost::integer_traits::const_max); - result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return min_value; } - result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const { return max_value; } - - nondet_generator() { -#ifndef _WIN32 - rnd_dev.open("/dev/urandom", std::ios::binary | std::ios::in); - ASSERT_TRUE(rnd_dev.good()); -#else - auto rnd_dev_ret = CryptAcquireContext(&rnd_dev, - NULL, - NULL, - PROV_RSA_FULL, - 0); - if(!rnd_dev_ret) { - auto err_code = GetLastError(); - if(err_code == NTE_BAD_KEYSET) { - if(!CryptAcquireContext(&rnd_dev, - NULL, - NULL, - PROV_RSA_FULL, - CRYPT_NEWKEYSET)) { - } else { - err_code = GetLastError(); - log_and_throw(get_last_err_str(err_code)); - } - } else { - log_and_throw(get_last_err_str(err_code)); - } - } -#endif - } - // Close the random number generator -#ifndef _WIN32 - ~nondet_generator() { rnd_dev.close(); } -#else - ~nondet_generator() { CryptReleaseContext(rnd_dev, 0); } -#endif + nondet_generator() + : _random_devices(thread::cpu_count()) + {} + + typedef unsigned int result_type; + + static constexpr result_type min() { return std::numeric_limits::min(); } + static constexpr result_type max() { return std::numeric_limits::max(); } + // read a size_t from the source - result_type operator()() { - // read a machine word into result - result_type result(0); -#ifndef _WIN32 - mut.lock(); - ASSERT_TRUE(rnd_dev.good()); - rnd_dev.read(reinterpret_cast(&result), sizeof(result_type)); - ASSERT_TRUE(rnd_dev.good()); - mut.unlock(); - // std::cerr << result << std::endl; - return result; -#else - mut.lock(); - ASSERT_TRUE(CryptGenRandom(rnd_dev,8,(BYTE *)&result)); - mut.unlock(); - return result; -#endif + inline result_type operator()() { + int thread_id = thread::thread_id(); + + return _random_devices.at(thread_id)(); } - private: -#ifndef _WIN32 - std::ifstream rnd_dev; -#else - HCRYPTPROV rnd_dev; -#endif - mutex mut; + + std::vector _random_devices; }; //nondet_generator global_nondet_rng; - - - - + /** Use the C++11 standard generato to get as close to a true source of randomness as possible. + * + * Returns a 64 bit truely random seed. + */ + uint64_t pure_random_seed() { + return hash64_combine(hash64(nondet_generator::global()()), hash64(nondet_generator::global()())); + } /** * This class represents a master registery of all active random @@ -301,12 +226,10 @@ namespace turi { // Get the global nondeterministic random number generator. nondet_generator& nondet_rnd(nondet_generator::global()); mut.lock(); + // std::cerr << "initializing real rng" << std::endl; - real_rng.seed(static_cast(nondet_rnd())); - // std::cerr << "initializing discrete rng" << std::endl; - discrete_rng.seed(static_cast(nondet_rnd())); - // std::cerr << "initializing fast discrete rng" << std::endl; - fast_discrete_rng.seed(static_cast(nondet_rnd())); + m_rng.seed(static_cast(nondet_rnd())); + mut.unlock(); } diff --git a/src/core/random/random.hpp b/src/core/random/random.hpp index c9de7fb567..9b711c0c9d 100644 --- a/src/core/random/random.hpp +++ b/src/core/random/random.hpp @@ -13,8 +13,8 @@ #include #include #include +#include -#include #include #include @@ -51,30 +51,30 @@ namespace turi { */ template struct uniform { - typedef boost::uniform_int distribution_type; + typedef std::uniform_int_distribution distribution_type; template static inline IntType sample(RealRNG& real_rng, - DiscreteRNG& discrete_rng, + DiscreteRNG& m_rng, const IntType& min, const IntType& max) { - return distribution_type(min, max)(discrete_rng); + return distribution_type(min, max)(m_rng); } }; template<> struct uniform { - typedef boost::uniform_real distribution_type; + typedef std::uniform_real_distribution distribution_type; template static inline double sample(RealRNG& real_rng, - DiscreteRNG& discrete_rng, + DiscreteRNG& m_rng, const double& min, const double& max) { return distribution_type(min, max)(real_rng); } }; template<> struct uniform { - typedef boost::uniform_real distribution_type; + typedef std::uniform_real_distribution distribution_type; template static inline float sample(RealRNG& real_rng, - DiscreteRNG& discrete_rng, + DiscreteRNG& m_rng, const float& min, const float& max) { return distribution_type(min, max)(real_rng); } @@ -89,9 +89,6 @@ namespace turi { class generator { public: // base Generator types - typedef boost::lagged_fibonacci607 real_rng_type; - typedef boost::mt11213b discrete_rng_type; - typedef boost::rand48 fast_discrete_rng_type; generator() { time_seed(); @@ -100,9 +97,7 @@ namespace turi { //! Seed the generator using the default seed inline void seed() { mut.lock(); - real_rng.seed(); - discrete_rng.seed(); - fast_discrete_rng.seed(); + m_rng.seed(); mut.unlock(); } @@ -123,57 +118,60 @@ namespace turi { } mut.lock(); - fast_discrete_rng.seed(_seed); - real_rng.seed(fast_discrete_rng); - discrete_rng.seed(fast_discrete_rng); + m_rng.seed(_seed); mut.unlock(); } //! Seed the generator using another generator void seed(generator& other){ - mut.lock(); - real_rng.seed(other.real_rng); - discrete_rng.seed(other.discrete_rng); - fast_discrete_rng.seed(other.fast_discrete_rng()); - mut.unlock(); + uint32_t _seed = other.uniform(0, std::numeric_limits::max()); + seed(_seed); } /** * Generate a random number in the uniform real with range [min, * max) or [min, max] if the number type is discrete. */ - template - inline NumType uniform(const NumType min, const NumType max) { - mut.lock(); - const NumType result = distributions::uniform:: - sample(real_rng, discrete_rng, min, max); - mut.unlock(); - return result; + template + inline NumType uniform(const NumType min, const NumType max, + typename std::enable_if::value>::type* = nullptr) { + + std::uniform_int_distribution d(min, max); + std::lock_guard lg(mut); + return d(m_rng); } // end of uniform - + /** * Generate a random number in the uniform real with range [min, * max) or [min, max] if the number type is discrete. + * [Double overload] */ - template + template + inline NumType uniform(const NumType min, const NumType max, + typename std::enable_if::value>::type* = nullptr) { + + std::uniform_real_distribution d(min, max); + std::lock_guard lg(mut); + return d(m_rng); + } // end of uniform + + /** + * Generate a random number in the uniform real with range [min, + * max) or [min, max] if the number type is discrete. + */ + template inline NumType fast_uniform(const NumType min, const NumType max) { - mut.lock(); - const NumType result = distributions::uniform:: - sample(real_rng, fast_discrete_rng, min, max); - mut.unlock(); - return result; - } // end of fast_uniform - + return uniform(min, max); + } /** * Generate a random number in the uniform real with range [min, * max); */ inline double gamma(const double alpha = double(1)) { - boost::gamma_distribution gamma_dist(alpha); - mut.lock(); - const double result = gamma_dist(real_rng); - mut.unlock(); + std::gamma_distribution gamma_dist(alpha); + std::lock_guard lg(mut); + const double result = gamma_dist(m_rng); return result; } // end of gamma @@ -184,9 +182,9 @@ namespace turi { */ inline double gaussian(const double mean = double(0), const double stdev = double(1)) { - boost::normal_distribution normal_dist(mean, stdev); + std::normal_distribution normal_dist(mean, stdev); mut.lock(); - const double result = normal_dist(real_rng); + const double result = normal_dist(m_rng); mut.unlock(); return result; } // end of gaussian @@ -206,30 +204,21 @@ namespace turi { */ inline double cauchy(const double location = double(0), const double scale = double(1)) { - boost::cauchy_distribution cauchy_dist(location, scale); + std::cauchy_distribution cauchy_dist(location, scale); mut.lock(); - const double result = cauchy_dist(real_rng); + const double result = cauchy_dist(m_rng); mut.unlock(); return result; } // end of cauchy inline bool bernoulli(const double p = double(0.5)) { - boost::bernoulli_distribution dist(p); + std::bernoulli_distribution dist(p); mut.lock(); - const double result(dist(discrete_rng)); + const double result(dist(m_rng)); mut.unlock(); return result; } // end of bernoulli - inline bool fast_bernoulli(const double p = double(0.5)) { - boost::bernoulli_distribution dist(p); - mut.lock(); - const double result(dist(fast_discrete_rng)); - mut.unlock(); - return result; - } // end of bernoulli - - /** * Draw a random number from a multinomial */ @@ -260,7 +249,7 @@ namespace turi { size_t multinomial(const VecType& prb, VType norm) { if(norm < 1e-20) { - return fast_uniform(0, prb.size() - 1); + return uniform(0, prb.size() - 1); } #ifndef NDEBUG @@ -273,7 +262,7 @@ namespace turi { ASSERT_LT(double(std::abs(norm - total)), std::max(1e-20, 1e-6 * norm)); #endif - VType rnd = fast_uniform(0,norm - (std::is_integral::value ? 1 : 0)); + VType rnd = uniform(0,norm - (std::is_integral::value ? 1 : 0)); for(size_t i = 0; i < size_t(prb.size()); ++i) { if(rnd <= prb[i]) { @@ -323,32 +312,15 @@ namespace turi { template void shuffle(Iterator begin, Iterator end) { mut.lock(); - shuffle_functor functor(*this); - std::random_shuffle(begin, end, functor); + std::shuffle(begin, end, m_rng); mut.unlock(); } // end of shuffle private: - ////////////////////////////////////////////////////// - /// Data members - struct shuffle_functor { - generator& gen; - inline shuffle_functor(generator& gen) : gen(gen) { } - inline std::ptrdiff_t operator()(std::ptrdiff_t end) { - return distributions::uniform:: - sample(gen.real_rng, gen.fast_discrete_rng, 0, end-1); - } - }; - - //! The real random number generator - real_rng_type real_rng; //! The discrete random number generator - discrete_rng_type discrete_rng; - //! The fast discrete random number generator - fast_discrete_rng_type fast_discrete_rng; - //! lock used to access local members - mutex mut; + std::mt19937_64 m_rng; + std::mutex mut; }; // end of class generator @@ -406,7 +378,7 @@ namespace turi { if (min == max) return min; return get_source().uniform(min, max); } // end of uniform - + /** * \ingroup random * Generate a random number in the uniform real with range [min, @@ -416,7 +388,8 @@ namespace turi { inline NumType fast_uniform(const NumType min, const NumType max) { if (min == max) return min; return get_source().fast_uniform(min, max); - } // end of fast_uniform + } // end of uniform + /** * \ingroup random @@ -428,7 +401,7 @@ namespace turi { * \ingroup random * Simulates the standard rand function as defined in cstdlib */ - inline int rand() { return fast_uniform(0, RAND_MAX); } + inline int rand() { return uniform(0, RAND_MAX); } /** @@ -479,14 +452,6 @@ namespace turi { return get_source().bernoulli(p); } - /** - * \ingroup random - * Draw a sample form a bernoulli distribution using the faster generator - */ - inline bool fast_bernoulli(const double p = double(0.5)) { - return get_source().fast_bernoulli(p); - } - /** * \ingroup random * Generate a draw from a multinomial. This function diff --git a/src/core/storage/query_engine/algorithm/sort.cpp b/src/core/storage/query_engine/algorithm/sort.cpp index 67aec51c6e..23dd25c0b6 100644 --- a/src/core/storage/query_engine/algorithm/sort.cpp +++ b/src/core/storage/query_engine/algorithm/sort.cpp @@ -64,7 +64,7 @@ create_quantile_sketch(std::shared_ptr& sframe_planner_node, if (num_sampled == num_to_sample) { return true; } - if (turi::random::fast_bernoulli(sample_ratio)) { + if (turi::random::bernoulli(sample_ratio)) { local_sketch.add(row); ++num_sampled; } diff --git a/src/model_server/lib/image_util.cpp b/src/model_server/lib/image_util.cpp index f852e42017..1022c7ee75 100644 --- a/src/model_server/lib/image_util.cpp +++ b/src/model_server/lib/image_util.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -376,7 +377,9 @@ std::shared_ptr load_images(std::string url, std::string format, b image_sarray->set_type(flex_type_enum::IMAGE); if (random_order) { - std::random_shuffle(all_files.begin(), all_files.end()); + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(all_files.begin(), all_files.end(), g); } else { std::sort(all_files.begin(), all_files.end()); }