Changed fast_uniform to uniform (as uniform was faster).

apple · Oct 2, 2020 · af22629 · af22629
1 parent 374cc26
commit af22629
Show file tree

Hide file tree

Showing 54 changed files with 177 additions and 180 deletions.
diff --git a/src/core/logging/table_printer/table_printer.hpp b/src/core/logging/table_printer/table_printer.hpp
@@ -156,7 +156,7 @@ struct progress_time {
  *
  *        for(size_t i = 0; i < 50000; ++i) {
  *          table.print_progress_row(proc, proc, progress_time(), i);
- *          proc += random::fast_uniform<size_t>(0, 100);
+ *          proc += random::uniform<size_t>(0, 100);
  *          usleep(100);  // sleep for 200 microseconds
  *        }
  *

diff --git a/src/core/logging/table_printer/table_printer_examples.cpp b/src/core/logging/table_printer/table_printer_examples.cpp
@@ -59,7 +59,7 @@ int main(int argc, char **argv) {
 
     for(size_t i = 0; i < 50000; ++i) {
       table.print_progress_row(proc, proc, progress_time(), i);
-      proc += random::fast_uniform<size_t>(0, 100);
+      proc += random::uniform<size_t>(0, 100);
       usleep(100);  // sleep for 200 microseconds
     }
 

diff --git a/src/core/random/alias.cpp b/src/core/random/alias.cpp
@@ -56,8 +56,8 @@ alias_sampler::alias_sampler(const std::vector<double>& p) {
 }
 
 size_t alias_sampler::sample() {
-  size_t k = random::fast_uniform<size_t>(0, K - 1);
-  if (q[k] > random::fast_uniform<double>(0, 1)) {
+  size_t k = random::uniform<size_t>(0, K - 1);
+  if (q[k] > random::uniform<double>(0, 1)) {
     return k;
   } else {
     return J[k];

diff --git a/src/core/random/random.hpp b/src/core/random/random.hpp
@@ -144,7 +144,7 @@ namespace turi {
       /**
        * Generate a random number in the uniform real with range [min,
        * max) or [min, max] if the number type is discrete.
-       * [Double overload]
+       * [Float overload]
        */
       template <typename NumType>
       inline NumType uniform(const NumType min, const NumType max, 
@@ -155,14 +155,6 @@ namespace turi {
         return d(m_rng);
       } // end of uniform 
 
-      /**
-       * Generate a random number in the uniform real with range [min,
-       * max) or [min, max] if the number type is discrete.
-       */
-      template <typename NumType>
-      inline NumType fast_uniform(const NumType min, const NumType max) {
-        return uniform<NumType>(min, max);
-      }
 
       /**
        * Generate a random number in the uniform real with range [min,
@@ -385,11 +377,16 @@ namespace turi {
      * max) or [min, max] if the number type is discrete.
      */
     template<typename NumType>
-    inline NumType fast_uniform(const NumType min, const NumType max) {
+    inline NumType uniform(const NumType min, const NumType max) {
       if (min == max) return min;
+<<<<<<< Updated upstream
       return get_source().fast_uniform<NumType>(min, max);
     } // end of uniform
 
+=======
+      return get_source().uniform<NumType>(min, max);
+    } // end of uniform
+>>>>>>> Stashed changes
 
     /**
      * \ingroup random

diff --git a/src/core/storage/serialization/dir_archive.cpp b/src/core/storage/serialization/dir_archive.cpp
@@ -278,7 +278,7 @@ size_t get_next_random_number() {
     gen.nondet_seed();
     initialized = true;
   }
-  return gen.fast_uniform<size_t>(0, std::numeric_limits<size_t>::max());
+  return gen.uniform<size_t>(0, std::numeric_limits<size_t>::max());
 }
 
 

diff --git a/src/core/storage/sframe_data/sarray_file_format_v2.hpp b/src/core/storage/sframe_data/sarray_file_format_v2.hpp
@@ -314,7 +314,7 @@ class sarray_format_reader_v2: public sarray_format_reader<T> {
    * (looping around).
    */
   void try_evict_something_from_cache() {
-    size_t b = turi::random::fast_uniform<size_t>(0, m_cache.size() - 1);
+    size_t b = turi::random::uniform<size_t>(0, m_cache.size() - 1);
     /*
      * if the current bit is not 1, try to find the next one bit
      * if there is no bit after that, loop around, reset and 0 and try the bit

diff --git a/src/core/util/testing_utils.cpp b/src/core/util/testing_utils.cpp
@@ -56,7 +56,7 @@ std::string _get_unique_directory(const std::string& file, size_t line) {
 
   ss  << "t" << thread::thread_id() << "__";
 
-  ss << random::fast_uniform<size_t>(0, size_t(-1));
+  ss << random::uniform<size_t>(0, size_t(-1));
 
   return ss.str();
 }

diff --git a/src/core/util/testing_utils.hpp b/src/core/util/testing_utils.hpp
@@ -48,7 +48,7 @@ void _save_and_load_object(T& dest, const U& src, std::string dir) {
 
   std::string arc_name = dir + "/test_archive";
 
-  uint64_t random_number = hash64(random::fast_uniform<size_t>(0,size_t(-1)));
+  uint64_t random_number = hash64(random::uniform<size_t>(0,size_t(-1)));
 
   // Save it
   dir_archive archive_write;

diff --git a/src/ml/ml_data/ml_data.cpp b/src/ml/ml_data/ml_data.cpp
@@ -746,7 +746,7 @@ ml_data ml_data::create_subsampled_copy(size_t n_rows, size_t random_seed) const
   for(size_t i = 0; i < n_rows; ++i) {
     size_t lb = (i > 0) ? (samples[i - 1] + 1) : 0;
     size_t ub = (i < n_rows - 1) ? (samples[i + 1] - 1) : data_size - 1;
-    samples[i] = random::fast_uniform<size_t>(lb, ub);
+    samples[i] = random::uniform<size_t>(lb, ub);
   }
 
   // Break them up into groups

diff --git a/src/ml/sketches/countmin.hpp b/src/ml/sketches/countmin.hpp
@@ -69,7 +69,7 @@ class countmin {
       gen.seed(seed);
       // Initialize hash functions and count matrix
       for (size_t j = 0; j < num_hash; ++j) {
-        seeds.push_back(gen.fast_uniform<size_t>(0, std::numeric_limits<size_t>::max()));
+        seeds.push_back(gen.uniform<size_t>(0, std::numeric_limits<size_t>::max()));
         counts.push_back(std::vector<size_t>(num_bins));
       }
    }

diff --git a/src/ml/sketches/countsketch.hpp b/src/ml/sketches/countsketch.hpp
@@ -62,8 +62,8 @@ class countsketch {
       gen.seed(seed);
       // Initialize hash functions and count matrix
       for (size_t j = 0; j < num_hash; ++j) {
-        seeds.push_back(gen.fast_uniform<size_t>(0, std::numeric_limits<size_t>::max()));
-        seeds_binary.push_back(gen.fast_uniform<size_t>(0, std::numeric_limits<size_t>::max()));
+        seeds.push_back(gen.uniform<size_t>(0, std::numeric_limits<size_t>::max()));
+        seeds_binary.push_back(gen.uniform<size_t>(0, std::numeric_limits<size_t>::max()));
 
         counts.push_back(std::vector<counter_int>(num_bins));
       }

diff --git a/src/toolkits/clustering/kmeans.cpp b/src/toolkits/clustering/kmeans.cpp
@@ -722,7 +722,7 @@ void kmeans_model::choose_random_centers() {
     progress_table.print_header();
 
     // Choose the first center and set in the model.
-    size_t idx_center = turi::random::fast_uniform<size_t>(0, seeds.size() - 1);
+    size_t idx_center = turi::random::uniform<size_t>(0, seeds.size() - 1);
     progress_table.print_progress_row(0, 0, idx_center);
     clusters[0].center = seeds[idx_center];
 

diff --git a/src/toolkits/factorization/factorization_model_impl.hpp b/src/toolkits/factorization/factorization_model_impl.hpp
@@ -178,7 +178,7 @@ class factorization_model_impl final : public factorization_model {
           size_t end_w_idx = ((thread_idx + 1) * n_total_dimensions) / num_threads;
 
           for(size_t i = start_w_idx; i < end_w_idx; ++i)
-            w[i] = (sd > 0) ? random::fast_uniform<double>(-sd/2, sd/2) : 0;
+            w[i] = (sd > 0) ? random::uniform<double>(-sd/2, sd/2) : 0;
         } else {
           w.setZero();
         }
@@ -206,9 +206,9 @@ class factorization_model_impl final : public factorization_model {
               // observations, num_factors > 100), this gave good
               // starting values and didn't diverge on reset.
 
-              V(i, j) = (V_sd > 0) ? random::fast_uniform<double>(lb, ub) : 0;
+              V(i, j) = (V_sd > 0) ? random::uniform<double>(lb, ub) : 0;
 
-              if(random::fast_uniform<size_t>(0, num_factors()) > std::min<size_t>(4ULL, num_factors() / 2))
+              if(random::uniform<size_t>(0, num_factors()) > std::min<size_t>(4ULL, num_factors() / 2))
                 V(i, j) /= 1000;
             }
           }

diff --git a/src/toolkits/factorization/ranking_sgd_solver_base.hpp b/src/toolkits/factorization/ranking_sgd_solver_base.hpp
@@ -479,7 +479,7 @@ class ranking_sgd_solver_base : public sgd::sgd_solver_base {
         // Get num_sampled_negative_examples candidate points.
 
         for(size_t i = 0; i < num_sampled_negative_examples; ++i) {
-          size_t candidate_item = random::fast_uniform<size_t>(0, n_items - 1);
+          size_t candidate_item = random::uniform<size_t>(0, n_items - 1);
           item_observed.prefetch(candidate_item);
           candidate_negative_items[i] = candidate_item;
         }
@@ -554,7 +554,7 @@ class ranking_sgd_solver_base : public sgd::sgd_solver_base {
 
       proc_buf.available_item_list_chosen_indices.resize(num_sampled_negative_examples);
       for(size_t i = 0; i < num_sampled_negative_examples; ++i) {
-        size_t idx = random::fast_uniform<size_t>(0, proc_buf.available_item_list.size()-1);
+        size_t idx = random::uniform<size_t>(0, proc_buf.available_item_list.size()-1);
         chosen_negative_items[i] = proc_buf.available_item_list[idx];
         proc_buf.available_item_list_chosen_indices[i] = idx;
         DASSERT_FALSE(item_observed.get(chosen_negative_items[i]));

diff --git a/src/toolkits/ml_data_2/ml_data.cpp b/src/toolkits/ml_data_2/ml_data.cpp
@@ -639,7 +639,7 @@ ml_data ml_data::create_subsampled_copy(size_t n_rows, size_t random_seed) const
   for(size_t i = 0; i < n_rows; ++i) {
     size_t lb = (i > 0) ? (samples[i - 1] + 1) : 0;
     size_t ub = (i < n_rows - 1) ? (samples[i + 1] - 1) : data_size - 1;
-    samples[i] = random::fast_uniform<size_t>(lb, ub);
+    samples[i] = random::uniform<size_t>(lb, ub);
   }
 
   // Break them up into groups

diff --git a/src/toolkits/ml_data_2/ml_data_setup.cpp b/src/toolkits/ml_data_2/ml_data_setup.cpp
@@ -564,7 +564,7 @@ void ml_data::_fill_data_blocks(bool in_training_mode) {
             while(true) {
 
               // If it's a full block, write it to a random location.
-              size_t write_out_segment = random::fast_uniform<size_t>(0, output_iterators.size()-1);
+              size_t write_out_segment = random::uniform<size_t>(0, output_iterators.size()-1);
 
               if(output_iterator_locks[write_out_segment].try_lock()) {
                 auto& it_out = output_iterators[write_out_segment];

diff --git a/src/toolkits/nearest_neighbors/lsh_family.cpp b/src/toolkits/nearest_neighbors/lsh_family.cpp
@@ -92,7 +92,7 @@ void lsh_euclidean::pre_lsh(const v2::ml_data& mld_ref, bool is_sparse) {
     });
   }
   w = std::max(size_t(1), static_cast<size_t>(distance_matrix.mean()));
-  rand_vec = rand_vec.unaryExpr([&](double x) { return random::fast_uniform<double>(0., w); });
+  rand_vec = rand_vec.unaryExpr([&](double x) { return random::uniform<double>(0., w); });
 }
 
 void lsh_euclidean::init_model(size_t num_dimensions) {
@@ -101,7 +101,7 @@ void lsh_euclidean::init_model(size_t num_dimensions) {
   rand_mat.resize(num_projections, num_input_dimensions);
   rand_vec.resize(num_projections);
   rand_mat = rand_mat.unaryExpr([](double x) { return random::normal(0., 1.); });
-  rand_vec = rand_vec.unaryExpr([&](double x) { return random::fast_uniform<double>(0., w); });
+  rand_vec = rand_vec.unaryExpr([&](double x) { return random::uniform<double>(0., w); });
 }
 
 void lsh_euclidean::save(turi::oarchive& oarc) const {
@@ -172,7 +172,7 @@ void lsh_manhattan::pre_lsh(const v2::ml_data& mld_ref, bool is_sparse) {
   }
 
   w = std::max(size_t(1), static_cast<size_t>(distance_matrix.mean()));
-  rand_vec = rand_vec.unaryExpr([&](double x) { return random::fast_uniform<double>(0., w); });
+  rand_vec = rand_vec.unaryExpr([&](double x) { return random::uniform<double>(0., w); });
 }
 
 void lsh_manhattan::init_model(size_t num_dimensions) {
@@ -181,7 +181,7 @@ void lsh_manhattan::init_model(size_t num_dimensions) {
   rand_mat.resize(num_projections, num_input_dimensions);
   rand_vec.resize(num_projections);
   rand_mat = rand_mat.unaryExpr([](double x) { return random::cauchy(0., 1.); });
-  rand_vec = rand_vec.unaryExpr([&](double x) { return random::fast_uniform<double>(0., w); });
+  rand_vec = rand_vec.unaryExpr([&](double x) { return random::uniform<double>(0., w); });
 }
 
 void lsh_cosine::init_model(size_t num_dimensions) {
@@ -233,7 +233,7 @@ void lsh_jaccard::init_model(size_t num_dimensions) {
   rand_sign.assign(num_input_dimensions, 0);
   parallel_for (0, num_input_dimensions, [&](size_t idx) {
     rand_permutation[idx] = idx;
-    if (random::fast_uniform<double>(0., 1.) > 0.5) {
+    if (random::uniform<double>(0., 1.) > 0.5) {
       rand_sign[idx] = 1;
     }
   });

diff --git a/src/toolkits/sparse_similarity/sparse_similarity_lookup_impl.hpp b/src/toolkits/sparse_similarity/sparse_similarity_lookup_impl.hpp
@@ -582,8 +582,8 @@ class sparse_similarity_lookup_impl : public sparse_similarity_lookup {
 
           for(size_t s_idx = sample_start_idx; s_idx < sample_end_idx; ++s_idx) {
             auto& s = samples[s_idx];
-            s.i = random::fast_uniform<size_t>(0, num_items - 1);
-            s.j = random::fast_uniform<size_t>(0, num_items - 1);
+            s.i = random::uniform<size_t>(0, num_items - 1);
+            s.j = random::uniform<size_t>(0, num_items - 1);
             s.log_1_m_q = 0;
           }
 
@@ -598,7 +598,7 @@ class sparse_similarity_lookup_impl : public sparse_similarity_lookup {
             // Do an iid sample here.
             item_count_distribution.resize(user_count_dist_sample_size);
             for(size_t i = 0; i < user_count_dist_sample_size; ++i) {
-              size_t idx = random::fast_uniform<size_t>(0, items_per_user.size() - 1);
+              size_t idx = random::uniform<size_t>(0, items_per_user.size() - 1);
               item_count_distribution[i] = items_per_user[idx];
             }
             mult_factor = double(items_per_user.size()) / item_count_distribution.size();

diff --git a/src/toolkits/text/alias.cpp b/src/toolkits/text/alias.cpp
@@ -563,7 +563,7 @@ size_t alias_topic_model::sample_topic(size_t d, size_t w, size_t s,
   double prob_sparse_sample = Q(0, w) / (Pdw + Q(0, w));
 
   size_t t = static_cast<size_t>(-1);
-  if (random::fast_uniform<double>(0, 1) < prob_sparse_sample) {
+  if (random::uniform<double>(0, 1) < prob_sparse_sample) {
 
     // Use samples precomputed via Alias sampler
     t = word_samples[w].back();
@@ -576,7 +576,7 @@ size_t alias_topic_model::sample_topic(size_t d, size_t w, size_t s,
   } else {
 
     // Inverse CDF method on the sparse part
-    double cutoff = random::fast_uniform<double>(0, Pdw);
+    double cutoff = random::uniform<double>(0, Pdw);
     double current = 0.0;
 
     if (doc_topic_counts.get_row(d).size() == 0) {
@@ -609,7 +609,7 @@ size_t alias_topic_model::sample_topic(size_t d, size_t w, size_t s,
               (Pdw * pdwt + Q(0, w) * q(w, t));
 
   // Perform MH step
-  size_t chosen_topic = (random::fast_uniform<double>(0, 1) <
+  size_t chosen_topic = (random::uniform<double>(0, 1) <
                          std::min(1.0, pi)) ? t : s;
   // Reset probs to 0
   for (const auto& kv : doc_topic_counts.get_row(d)) {

diff --git a/src/toolkits/text/cgs.cpp b/src/toolkits/text/cgs.cpp
@@ -258,7 +258,7 @@ std::map<std::string, size_t> cgs_topic_model::sample_counts(
       // Iterate through each token
 
       // Choose a random spot in the document to try first.   This way we reduce biases.
-      size_t shift = random::fast_uniform<size_t>(0, x.size()-1);
+      size_t shift = random::uniform<size_t>(0, x.size()-1);
       for (size_t _j = 0; _j < x.size(); ++_j) {
         size_t j = (_j + shift) % x.size();
 

diff --git a/src/toolkits/text/scvb.hpp b/src/toolkits/text/scvb.hpp
@@ -84,7 +84,7 @@ class scvb0_solver {
   void initialize_N_theta_j(size_t C_j) {
      N_theta_j = Eigen::MatrixXd::Zero(model->num_topics, 1);
      for (size_t i = 0; i < C_j; ++i) {
-       size_t ix = random::fast_uniform<size_t>(0, model->num_topics-1);
+       size_t ix = random::uniform<size_t>(0, model->num_topics-1);
        N_theta_j(ix) += 1;
      }
   }

diff --git a/src/toolkits/text/topic_model.cpp b/src/toolkits/text/topic_model.cpp
@@ -350,7 +350,7 @@ topic_model::count_matrix_type topic_model::predict_counts(std::shared_ptr<sarra
           } else {
 
             num_words_in_doc += freq;
-            size_t topic = random::fast_uniform<size_t>(0, num_topics - 1);
+            size_t topic = random::uniform<size_t>(0, num_topics - 1);
             DASSERT_TRUE(topic < num_topics);
             topic_assignments.push_back(topic);
             doc_topic_counts(doc_id, topic) += freq;
@@ -373,7 +373,7 @@ topic_model::count_matrix_type topic_model::predict_counts(std::shared_ptr<sarra
 
       // Sample topics for this document
       for (size_t burnin = 0; burnin < num_burnin; ++burnin) {
-        size_t shift = random::fast_uniform<size_t>(0, x.size()-1);
+        size_t shift = random::uniform<size_t>(0, x.size()-1);
         for (size_t _j = 0; _j < x.size(); ++_j) {
           size_t j = (_j + shift) % x.size();
 

diff --git a/src/toolkits/util/data_generators.cpp b/src/toolkits/util/data_generators.cpp
@@ -139,7 +139,7 @@ sframe lm_data_generator::generate(size_t n_observations,
           if(n_categorical_values[j] == 0) {
             x[j] = random::normal(0, 1);
           } else {
-            x[j] = random::fast_uniform<size_t>(0, n_categorical_values[j] - 1);
+            x[j] = random::uniform<size_t>(0, n_categorical_values[j] - 1);
           }
         }
 

diff --git a/src/toolkits/util/random_sframe_generation.cpp b/src/toolkits/util/random_sframe_generation.cpp
@@ -160,7 +160,7 @@ gl_sframe _generate_random_classification_sframe(size_t n_rows, std::string colu
   }
 
   for(size_t i = num_classes; i < n_bins; ++i) {
-    bin_to_class_map[i] = random::fast_uniform<size_t>(0, num_classes - 1);
+    bin_to_class_map[i] = random::uniform<size_t>(0, num_classes - 1);
   }
 
   random::shuffle(bin_to_class_map);

diff --git a/test/fileio/block_cache_test.cxx b/test/fileio/block_cache_test.cxx
@@ -40,8 +40,8 @@ struct block_cache_test {
       for (size_t nprobes = 0; nprobes < num_probes; ++nprobes) {
         TS_ASSERT_EQUALS(cache.value_length(std::to_string(key)), 256);
         // generate a random start-end sequence to read
-        size_t start = random::fast_uniform<size_t>(0, 255);
-        size_t end = random::fast_uniform<size_t>(0, 256);
+        size_t start = random::uniform<size_t>(0, 255);
+        size_t end = random::uniform<size_t>(0, 256);
         std::string value;
         auto ret = cache.read(std::to_string(key), value, start, end);
         // make sure the values we read were valid