From 0a1a322fb1db4462c2342e4f9e7c73d12f2d6528 Mon Sep 17 00:00:00 2001 From: Robert Keith Date: Wed, 7 Oct 2020 17:50:56 -0700 Subject: [PATCH 1/3] WIP integrating ideal and real solition distributions --- src/distribution/ideal_soliton.rs | 424 +++++++++++++++++++++++++ src/distribution/mod.rs | 23 ++ src/distribution/robust_soliton.rs | 490 +++++++++++++++++++++++++++++ 3 files changed, 937 insertions(+) create mode 100644 src/distribution/ideal_soliton.rs create mode 100644 src/distribution/robust_soliton.rs diff --git a/src/distribution/ideal_soliton.rs b/src/distribution/ideal_soliton.rs new file mode 100644 index 00000000..c11ca88a --- /dev/null +++ b/src/distribution/ideal_soliton.rs @@ -0,0 +1,424 @@ +use crate::distribution::{Discrete, Univariate, Soliton}; +use crate::statistics::*; +use crate::{Result, StatsError}; +use rand::distributions::Distribution; +use rand::Rng; +use std::f64; + +/// Implements the [Discrete +/// Uniform](https://en.wikipedia.org/wiki/Discrete_uniform_distribution) +/// distribution +/// +/// # Examples +/// +/// ``` +/// use statrs::distribution::{DiscreteUniform, Discrete}; +/// use statrs::statistics::Mean; +/// +/// let n = DiscreteUniform::new(0, 5).unwrap(); +/// assert_eq!(n.mean(), 2.5); +/// assert_eq!(n.pmf(3), 1.0 / 6.0); +/// ``` + +#[derive(Debug, Clone, PartialEq)] +pub struct IdealSoliton { + min: i64, + max: i64, +} + +impl IdealSoliton { + /// Constructs a new discrete uniform distribution with a minimum value + /// of `min` and a maximum value of `max`. + /// + /// # Errors + /// + /// Returns an error if `max < min` + /// + /// # Examples + /// + /// ``` + /// use statrs::distribution::DiscreteUniform; + /// + /// let mut result = DiscreteUniform::new(0, 5); + /// assert!(result.is_ok()); + /// + /// result = DiscreteUniform::new(5, 0); + /// assert!(result.is_err()); + /// ``` + pub fn new(max: i64) -> Result { + if max < 1 { + Err(StatsError::BadParams) + } else { + Ok(IdealSoliton { min: 1, max }) + } + } +} + +impl Distribution for IdealSoliton { + fn sample(&self, r: &mut R) -> f64 { + r.gen_range(self.min, self.max + 1) as f64 + } +} + +impl Univariate for IdealSoliton { + /// Calculates the cumulative distribution function for the + /// discrete uniform distribution at `x` + /// + /// # Formula + /// + /// ```ignore + /// (floor(x) - min + 1) / (max - min + 1) + /// ``` + fn cdf(&self, x: f64) -> f64 { + if x < self.min as f64 { + 0.0 + } else if x >= self.max as f64 { + 1.0 + } else { + let lower = self.min as f64; + let upper = self.max as f64; + let ans = (x.floor() - lower + 1.0) / (upper - lower + 1.0); + if ans > 1.0 { + 1.0 + } else { + ans + } + } + } +} + +impl Min for IdealSoliton { + /// Returns the minimum value in the domain of the discrete uniform + /// distribution + /// + /// # Remarks + /// + /// This is the same value as the minimum passed into the constructor + fn min(&self) -> i64 { + self.min + } +} + +impl Max for IdealSoliton { + /// Returns the maximum value in the domain of the discrete uniform + /// distribution + /// + /// # Remarks + /// + /// This is the same value as the maximum passed into the constructor + fn max(&self) -> i64 { + self.max + } +} + +impl Mean for IdealSoliton { + /// Returns the mean of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// (min + max) / 2 + /// ``` + fn mean(&self) -> f64 { + (self.min + self.max) as f64 / 2.0 + } +} + +impl Variance for IdealSoliton { + /// Returns the variance of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// ((max - min + 1)^2 - 1) / 12 + /// ``` + fn variance(&self) -> f64 { + let diff = (self.max - self.min) as f64; + ((diff + 1.0) * (diff + 1.0) - 1.0) / 12.0 + } + + /// Returns the standard deviation of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// sqrt(((max - min + 1)^2 - 1) / 12) + /// ``` + fn std_dev(&self) -> f64 { + self.variance().sqrt() + } +} + +impl Entropy for IdealSoliton { + /// Returns the entropy of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// ln(max - min + 1) + /// ``` + fn entropy(&self) -> f64 { + let diff = (self.max - self.min) as f64; + (diff + 1.0).ln() + } +} + +impl Skewness for IdealSoliton { + /// Returns the skewness of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// 0 + /// ``` + fn skewness(&self) -> f64 { + 0.0 + } +} + +impl Median for IdealSoliton { + /// Returns the median of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// (max + min) / 2 + /// ``` + fn median(&self) -> f64 { + (self.min + self.max) as f64 / 2.0 + } +} + +impl Mode for IdealSoliton { + /// Returns the mode for the discrete uniform distribution + /// + /// # Remarks + /// + /// Since every element has an equal probability, mode simply + /// returns the middle element + /// + /// # Formula + /// + /// ```ignore + /// N/A // (max + min) / 2 for the middle element + /// ``` + fn mode(&self) -> i64 { + ((self.min + self.max) as f64 / 2.0).floor() as i64 + } +} + +impl Discrete for IdealSoliton { + /// Calculates the probability mass function for the discrete uniform + /// distribution at `x` + /// + /// # Remarks + /// + /// Returns `0.0` if `x` is not in `[min, max]` + /// + /// # Formula + /// + /// ```ignore + /// 1 / (max - min + 1) + /// ``` + fn pmf(&self, x: i64) -> f64 { + if x >= self.min && x <= self.max { + 1.0 / (self.max - self.min + 1) as f64 + } else { + 0.0 + } + } + + /// Calculates the log probability mass function for the discrete uniform + /// distribution at `x` + /// + /// # Remarks + /// + /// Returns `f64::NEG_INFINITY` if `x` is not in `[min, max]` + /// + /// # Formula + /// + /// ```ignore + /// ln(1 / (max - min + 1)) + /// ``` + fn ln_pmf(&self, x: i64) -> f64 { + if x >= self.min && x <= self.max { + -((self.max - self.min + 1) as f64).ln() + } else { + f64::NEG_INFINITY + } + } +} + +impl Soliton for IdealSoliton { + /// Calculates the ideal soliton for the + /// discrete uniform distribution at `x` + /// + /// # Remarks + /// + /// Returns `0.0` if `x` is not in `[min, max]` + /// + /// # Formula + /// + /// ```ignore + /// p(1) = 1 / (max) + /// p(x) = 1/(x(x-1)) + /// ``` + fn soliton(&self, x: i64) -> f64 { + if x > 1 && x <= self.max { + 1.0 / ((x as f64) * (x as f64 - 1.0)) + } else if x == 1 { + 1.0 / self.max as f64 + } else { + // Point must be in range (0, limit] + 0.0 + } + } +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +#[cfg(test)] +mod test { + use std::fmt::Debug; + use std::f64; + use crate::statistics::*; + use crate::distribution::{Univariate, Discrete, DiscreteUniform}; + + fn try_create(min: i64, max: i64) -> DiscreteUniform { + let n = DiscreteUniform::new(min, max); + assert!(n.is_ok()); + n.unwrap() + } + + fn create_case(min: i64, max: i64) { + let n = try_create(min, max); + assert_eq!(min, n.min()); + assert_eq!(max, n.max()); + } + + fn bad_create_case(min: i64, max: i64) { + let n = DiscreteUniform::new(min, max); + assert!(n.is_err()); + } + + fn get_value(min: i64, max: i64, eval: F) -> T + where T: PartialEq + Debug, + F: Fn(DiscreteUniform) -> T + { + let n = try_create(min, max); + eval(n) + } + + fn test_case(min: i64, max: i64, expected: T, eval: F) + where T: PartialEq + Debug, + F: Fn(DiscreteUniform) -> T + { + let x = get_value(min, max, eval); + assert_eq!(expected, x); + } + + #[test] + fn test_create() { + create_case(-10, 10); + create_case(0, 4); + create_case(10, 20); + create_case(20, 20); + } + + #[test] + fn test_bad_create() { + bad_create_case(-1, -2); + bad_create_case(6, 5); + } + + #[test] + fn test_mean() { + test_case(-10, 10, 0.0, |x| x.mean()); + test_case(0, 4, 2.0, |x| x.mean()); + test_case(10, 20, 15.0, |x| x.mean()); + test_case(20, 20, 20.0, |x| x.mean()); + } + + #[test] + fn test_variance() { + test_case(-10, 10, 36.66666666666666666667, |x| x.variance()); + test_case(0, 4, 2.0, |x| x.variance()); + test_case(10, 20, 10.0, |x| x.variance()); + test_case(20, 20, 0.0, |x| x.variance()); + } + + #[test] + fn test_std_dev() { + test_case(-10, 10, (36.66666666666666666667f64).sqrt(), |x| x.std_dev()); + test_case(0, 4, (2.0f64).sqrt(), |x| x.std_dev()); + test_case(10, 20, (10.0f64).sqrt(), |x| x.std_dev()); + test_case(20, 20, 0.0, |x| x.std_dev()); + } + + #[test] + fn test_entropy() { + test_case(-10, 10, 3.0445224377234229965005979803657054342845752874046093, |x| x.entropy()); + test_case(0, 4, 1.6094379124341003746007593332261876395256013542685181, |x| x.entropy()); + test_case(10, 20, 2.3978952727983705440619435779651292998217068539374197, |x| x.entropy()); + test_case(20, 20, 0.0, |x| x.entropy()); + } + + #[test] + fn test_skewness() { + test_case(-10, 10, 0.0, |x| x.skewness()); + test_case(0, 4, 0.0, |x| x.skewness()); + test_case(10, 20, 0.0, |x| x.skewness()); + test_case(20, 20, 0.0, |x| x.skewness()); + } + + #[test] + fn test_median() { + test_case(-10, 10, 0.0, |x| x.median()); + test_case(0, 4, 2.0, |x| x.median()); + test_case(10, 20, 15.0, |x| x.median()); + test_case(20, 20, 20.0, |x| x.median()); + } + + #[test] + fn test_mode() { + test_case(-10, 10, 0, |x| x.mode()); + test_case(0, 4, 2, |x| x.mode()); + test_case(10, 20, 15, |x| x.mode()); + test_case(20, 20, 20, |x| x.mode()); + } + + #[test] + fn test_pmf() { + test_case(-10, 10, 0.04761904761904761904762, |x| x.pmf(-5)); + test_case(-10, 10, 0.04761904761904761904762, |x| x.pmf(1)); + test_case(-10, 10, 0.04761904761904761904762, |x| x.pmf(10)); + test_case(-10, -10, 0.0, |x| x.pmf(0)); + test_case(-10, -10, 1.0, |x| x.pmf(-10)); + } + + #[test] + fn test_ln_pmf() { + test_case(-10, 10, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(-5)); + test_case(-10, 10, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(1)); + test_case(-10, 10, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(10)); + test_case(-10, -10, f64::NEG_INFINITY, |x| x.ln_pmf(0)); + test_case(-10, -10, 0.0, |x| x.ln_pmf(-10)); + } + + #[test] + fn test_cdf() { + test_case(-10, 10, 0.2857142857142857142857, |x| x.cdf(-5.0)); + test_case(-10, 10, 0.5714285714285714285714, |x| x.cdf(1.0)); + test_case(-10, 10, 1.0, |x| x.cdf(10.0)); + test_case(-10, -10, 1.0, |x| x.cdf(-10.0)); + } + + #[test] + fn test_cdf_lower_bound() { + test_case(0, 3, 0.0, |x| x.cdf(-1.0)); + } + + #[test] + fn test_cdf_upper_bound() { + test_case(0, 3, 1.0, |x| x.cdf(5.0)); + } +} diff --git a/src/distribution/mod.rs b/src/distribution/mod.rs index f899faf0..bc41d657 100644 --- a/src/distribution/mod.rs +++ b/src/distribution/mod.rs @@ -30,6 +30,8 @@ pub use self::students_t::StudentsT; pub use self::triangular::Triangular; pub use self::uniform::Uniform; pub use self::weibull::Weibull; +pub use self::robust_soliton::RobustSoliton; +pub use self::ideal_soliton::IdealSoliton; use crate::statistics::{Max, Min}; mod bernoulli; @@ -48,6 +50,7 @@ mod fisher_snedecor; mod gamma; mod geometric; mod hypergeometric; +mod ideal_soliton; mod internal; mod inverse_gamma; mod log_normal; @@ -57,6 +60,7 @@ mod negative_binomial; mod normal; mod pareto; mod poisson; +mod robust_soliton; mod students_t; mod triangular; mod uniform; @@ -269,3 +273,22 @@ pub trait CheckedDiscrete { /// ``` fn checked_ln_pmf(&self, x: T) -> Result; } + +/// The 'IdealSoliton' trait provides an interface for interacting +/// with discrete statistical distributions from integers 1..N with +/// N as the single parameter for the distribution +pub trait Soliton { + /// Returns the probability mass function calculated at `x` for + /// a given distribution + /// + /// # Examples + /// + /// ``` + /// use statrs::distribution::{Soliton, Uniform}; + /// use statrs::prec; + /// + /// let n = Uniform::new(1, 5).unwrap(); + /// assert_eq!(n.soliton(1), 1/5); + /// ``` + fn soliton(&self, x: T) -> K; +} diff --git a/src/distribution/robust_soliton.rs b/src/distribution/robust_soliton.rs new file mode 100644 index 00000000..a9b04bca --- /dev/null +++ b/src/distribution/robust_soliton.rs @@ -0,0 +1,490 @@ +use crate::distribution::{Discrete, Univariate, Soliton}; +use crate::statistics::*; +use crate::{Result, StatsError}; +use rand::distributions::Distribution; +use rand::Rng; +use std::f64; + +/// Implements the [Discrete +/// Uniform](https://en.wikipedia.org/wiki/Discrete_uniform_distribution) +/// distribution +/// +/// # Examples +/// +/// ``` +/// use statrs::distribution::{DiscreteUniform, Discrete}; +/// use statrs::statistics::Mean; +/// +/// let n = DiscreteUniform::new(0, 5).unwrap(); +/// assert_eq!(n.mean(), 2.5); +/// assert_eq!(n.pmf(3), 1.0 / 6.0); +/// ``` + +#[derive(Debug, Clone, PartialEq)] +pub struct RobustSoliton { + min: i64, + max: i64, + spike: Option, + cumulative_probability_table: Vec, + ripple: Ripple, + fail_probability: f64, +} + +impl RobustSoliton { + /// Constructs a new discrete uniform distribution with a minimum value + /// of `min` and a maximum value of `max`. + /// + /// # Errors + /// + /// Returns an error if `max < min` + /// + /// # Examples + /// + /// ``` + /// use statrs::distribution::DiscreteUniform; + /// + /// let mut result = DiscreteUniform::new(0, 5); + /// assert!(result.is_ok()); + /// + /// result = DiscreteUniform::new(5, 0); + /// assert!(result.is_err()); + /// ``` + pub fn new(max: i64, heuristic: bool, ripple: f64, fail_probability: f64) -> Result { + if max < 1 { + Err(StatsError::BadParams) + } else { + let pmf_table: Vec = Vec::new(); + Ok(RobustSoliton { + min: 1, + max, + spike: None, + cumulative_probability_table: pmf_table, + ripple: match heuristic { + false => Ripple::Fixed(ripple), + true => Ripple::Heuristic(ripple), + }, + fail_probability, + }) + } + } + + fn normalization_factor(&self) -> f64 { + let mut normalization_factor: f64 = 0.0; + for i in 1..(self.max+1) { + normalization_factor += self.soliton(i); + normalization_factor += self.additive_probability(i); + } + normalization_factor + } + + fn additive_probability(&self, val: i64) -> f64 { + let ripple_size = self.ripple.size(self.max, self.fail_probability); + let swap = self.max as f64 / ripple_size; + if val == 0 || val > self.max { + panic!("Point must be in range (0,max]. Given {} - {}", val, self.max); + } else if (val as f64) < swap { + ripple_size / ((val * self.max) as f64) + } else if (val as f64) == swap { + (ripple_size * (ripple_size / self.fail_probability).ln()) / (self.max as f64) + } else { + 0.0 + } + } +} + +#[derive(PartialEq, Clone, Debug)] +enum Ripple { + Fixed(f64), + Heuristic(f64) +} + +impl Ripple { + fn size(&self, max: i64, fail_probability: f64) -> f64 { + match self { + &Ripple::Fixed(n) => n, + &Ripple::Heuristic(n) => { + n * (max as f64 / fail_probability).ln() * (max as f64).sqrt() + } + } + } +} + +impl Distribution for RobustSoliton { + fn sample(&self, r: &mut R) -> f64 { + r.gen_range(self.min, self.max + 1) as f64 + } +} + +impl Univariate for RobustSoliton { + /// Calculates the cumulative distribution function for the + /// discrete uniform distribution at `x` + /// + /// # Formula + /// + /// ```ignore + /// (floor(x) - min + 1) / (max - min + 1) + /// ``` + fn cdf(&self, x: f64) -> f64 { + if x < self.min as f64 { + 0.0 + } else if x >= self.max as f64 { + 1.0 + } else { + let lower = self.min as f64; + let upper = self.max as f64; + let ans = (x.floor() - lower + 1.0) / (upper - lower + 1.0); + if ans > 1.0 { + 1.0 + } else { + ans + } + } + } +} + +impl Min for RobustSoliton { + /// Returns the minimum value in the domain of the discrete uniform + /// distribution + /// + /// # Remarks + /// + /// This is the same value as the minimum passed into the constructor + fn min(&self) -> i64 { + self.min + } +} + +impl Max for RobustSoliton { + /// Returns the maximum value in the domain of the discrete uniform + /// distribution + /// + /// # Remarks + /// + /// This is the same value as the maximum passed into the constructor + fn max(&self) -> i64 { + self.max + } +} + +impl Mean for RobustSoliton { + /// Returns the mean of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// (min + max) / 2 + /// ``` + fn mean(&self) -> f64 { + (self.min + self.max) as f64 / 2.0 + } +} + +impl Variance for RobustSoliton { + /// Returns the variance of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// ((max - min + 1)^2 - 1) / 12 + /// ``` + fn variance(&self) -> f64 { + let diff = (self.max - self.min) as f64; + ((diff + 1.0) * (diff + 1.0) - 1.0) / 12.0 + } + + /// Returns the standard deviation of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// sqrt(((max - min + 1)^2 - 1) / 12) + /// ``` + fn std_dev(&self) -> f64 { + self.variance().sqrt() + } +} + +impl Entropy for RobustSoliton { + /// Returns the entropy of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// ln(max - min + 1) + /// ``` + fn entropy(&self) -> f64 { + let diff = (self.max - self.min) as f64; + (diff + 1.0).ln() + } +} + +impl Skewness for RobustSoliton { + /// Returns the skewness of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// 0 + /// ``` + fn skewness(&self) -> f64 { + 0.0 + } +} + +impl Median for RobustSoliton { + /// Returns the median of the discrete uniform distribution + /// + /// # Formula + /// + /// ```ignore + /// (max + min) / 2 + /// ``` + fn median(&self) -> f64 { + (self.min + self.max) as f64 / 2.0 + } +} + +impl Mode for RobustSoliton { + /// Returns the mode for the discrete uniform distribution + /// + /// # Remarks + /// + /// Since every element has an equal probability, mode simply + /// returns the middle element + /// + /// # Formula + /// + /// ```ignore + /// N/A // (max + min) / 2 for the middle element + /// ``` + fn mode(&self) -> i64 { + ((self.min + self.max) as f64 / 2.0).floor() as i64 + } +} + +impl Discrete for RobustSoliton { + /// Calculates the probability mass function for the discrete uniform + /// distribution at `x` + /// + /// # Remarks + /// + /// Returns `0.0` if `x` is not in `[min, max]` + /// + /// # Formula + /// + /// ```ignore + /// 1 / (max - min + 1) + /// ``` + fn pmf(&self, x: i64) -> f64 { + if x >= self.min && x <= self.max { + 1.0 / (self.max - self.min + 1) as f64 + } else { + 0.0 + } + } + + /// Calculates the log probability mass function for the discrete uniform + /// distribution at `x` + /// + /// # Remarks + /// + /// Returns `f64::NEG_INFINITY` if `x` is not in `[min, max]` + /// + /// # Formula + /// + /// ```ignore + /// ln(1 / (max - min + 1)) + /// ``` + fn ln_pmf(&self, x: i64) -> f64 { + if x >= self.min && x <= self.max { + -((self.max - self.min + 1) as f64).ln() + } else { + f64::NEG_INFINITY + } + } +} + +impl Soliton for RobustSoliton { + /// Calculates the ideal soliton for the + /// discrete uniform distribution at `x` + /// + /// # Remarks + /// + /// Returns `0.0` if `x` is not in `[min, max]` + /// + /// # Formula + /// + /// ```ignore + /// ``` + fn soliton(&self, x: i64) -> f64 { + if x > 1 && x < self.max { + let ideal_sol = { + if x > 1 && x <= self.max { + 1.0 / ((x as f64) * (x as f64 - 1.0)) + } else if x == 1 { + 1.0 / self.max as f64 + } else { + // Point must be in range (0, limit] + 0.0 + } + }; + (ideal_sol + self.additive_probability(x))/(self.normalization_factor()) + } else if x == 1 { + 1.0 + } else { + // Point must be in range (0, limit] + 0.0 + } + } +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +#[cfg(test)] +mod test { + use std::fmt::Debug; + use std::f64; + use crate::statistics::*; + use crate::distribution::{Univariate, Discrete, Soliton, RobustSoliton}; + + fn try_create(max: i64, heuristic: bool, ripple: f64, fail_probability: f64) -> RobustSoliton { + let n = RobustSoliton::new(max, heuristic, ripple, fail_probability); + assert!(n.is_ok()); + n.unwrap() + } + + fn create_case(max: i64, heuristic: bool, ripple: f64, fail_probability: f64) { + let n = try_create(max, heuristic, ripple, fail_probability); + assert_eq!(1, n.min()); + assert_eq!(max, n.max()); + } + + fn bad_create_case(max: i64, heuristic: bool, ripple: f64, fail_probability: f64) { + let n = RobustSoliton::new(max, heuristic, ripple, fail_probability); + assert!(n.is_err()); + } + + fn get_value(max: i64, heuristic: bool, ripple: f64, fail_probability: f64, eval: F) -> T + where T: PartialEq + Debug, + F: Fn(RobustSoliton) -> T + { + let n = try_create(max, heuristic, ripple, fail_probability); + eval(n) + } + + fn test_case(max: i64, heuristic: bool, ripple: f64, fail_probability: f64, expected: T, eval: F) + where T: PartialEq + Debug, + F: Fn(RobustSoliton) -> T + { + let x = get_value(max, heuristic, ripple, fail_probability, eval); + assert_eq!(expected, x); + } + + #[test] + fn test_create() { + create_case(5, true, 0.1, 0.1); + create_case(10, true, 0.1, 0.1); + create_case(25, true, 0.1, 0.1); + create_case(50, true, 0.1, 0.1); + } + + #[test] + fn test_bad_create() { + bad_create_case(-1, true, 0.1, 0.1); + bad_create_case(5, true, 0.1, 0.1); + } + + #[test] + fn test_mean() { + test_case(10, true, 0.1, 0.1, 0.0, |x| x.mean()); + test_case(10, true, 0.1, 0.1, 2.0, |x| x.mean()); + test_case(10, true, 0.1, 0.1, 15.0, |x| x.mean()); + test_case(10, true, 0.1, 0.1, 20.0, |x| x.mean()); + } + + #[test] + fn test_variance() { + test_case(10, true, 0.1, 0.1, 36.66666666666666666667, |x| x.variance()); + test_case(4, true, 0.1, 0.1, 2.0, |x| x.variance()); + test_case(20, true, 0.1, 0.1, 10.0, |x| x.variance()); + test_case(20, true, 0.1, 0.1, 0.0, |x| x.variance()); + } + + #[test] + fn test_std_dev() { + test_case(10, true, 0.1, 0.1, (36.66666666666666666667f64).sqrt(), |x| x.std_dev()); + test_case(4, true, 0.1, 0.1, (2.0f64).sqrt(), |x| x.std_dev()); + test_case(20, true, 0.1, 0.1, (10.0f64).sqrt(), |x| x.std_dev()); + test_case(20, true, 0.1, 0.1, 0.0, |x| x.std_dev()); + } + + #[test] + fn test_entropy() { + test_case(10, true, 0.1, 0.1, 3.0445224377234229965005979803657054342845752874046093, |x| x.entropy()); + test_case(4, true, 0.1, 0.1, 1.6094379124341003746007593332261876395256013542685181, |x| x.entropy()); + test_case(20, true, 0.1, 0.1, 2.3978952727983705440619435779651292998217068539374197, |x| x.entropy()); + test_case(20, true, 0.1, 0.1, 0.0, |x| x.entropy()); + } + + #[test] + fn test_skewness() { + test_case(10, true, 0.1, 0.1, 0.0, |x| x.skewness()); + test_case(4, true, 0.1, 0.1, 0.0, |x| x.skewness()); + test_case(20, true, 0.1, 0.1, 0.0, |x| x.skewness()); + test_case(20, true, 0.1, 0.1, 0.0, |x| x.skewness()); + } + + #[test] + fn test_median() { + test_case(10, true, 0.1, 0.1, 0.0, |x| x.median()); + test_case(4, true, 0.1, 0.1, 2.0, |x| x.median()); + test_case(20, true, 0.1, 0.1, 15.0, |x| x.median()); + test_case(20, true, 0.1, 0.1, 20.0, |x| x.median()); + } + + #[test] + fn test_mode() { + test_case(10, true, 0.1, 0.1, 0, |x| x.mode()); + test_case(4, true, 0.1, 0.1, 2, |x| x.mode()); + test_case(20, true, 0.1, 0.1, 15, |x| x.mode()); + test_case(20, true, 0.1, 0.1, 20, |x| x.mode()); + } + + #[test] + fn test_pmf() { + test_case(10, true, 0.1, 0.1, 0.04761904761904761904762, |x| x.pmf(5)); + test_case(10, true, 0.1, 0.1, 0.04761904761904761904762, |x| x.pmf(1)); + test_case(10, true, 0.1, 0.1, 0.04761904761904761904762, |x| x.pmf(10)); + test_case(10, true, 0.1, 0.1, 0.0, |x| x.pmf(0)); + } + + #[test] + fn test_ln_pmf() { + test_case(10, true, 0.1, 0.1, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(1)); + test_case(10, true, 0.1, 0.1, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(10)); + test_case(10, true, 0.1, 0.1, f64::NEG_INFINITY, |x| x.ln_pmf(0)); + test_case(10, true, 0.1, 0.1, 0.0, |x| x.ln_pmf(10)); + } + + #[test] + fn test_cdf() { + test_case(10, true, 0.1, 0.1, 0.5714285714285714285714, |x| x.cdf(1.0)); + test_case(10, true, 0.1, 0.1, 1.0, |x| x.cdf(10.0)); + } + + #[test] + fn test_cdf_lower_bound() { + test_case(3, true, 0.1, 0.1, 0.0, |x| x.cdf(1.0)); + } + + #[test] + fn test_cdf_upper_bound() { + test_case(3, true, 0.1, 0.1, 1.0, |x| x.cdf(5.0)); + } + + #[test] + fn test_solition() { + test_case(10, true, 0.1, 0.1, 0.0, |x| x.soliton(-1)); + test_case(10, true, 0.1, 0.1, 0.0, |x| x.soliton(0)); + test_case(10, true, 0.1, 0.1, 1.0, |x| x.soliton(10)); + } +} From b0374b39ef6d4470e2e74a16ce57b69a05ce6e49 Mon Sep 17 00:00:00 2001 From: Robert Keith Date: Thu, 8 Oct 2020 17:40:02 -0700 Subject: [PATCH 2/3] Tests passing, must verify soliton is accurate in practice w/ lt crate --- src/distribution/ideal_soliton.rs | 273 ++++----------------- src/distribution/mod.rs | 8 +- src/distribution/robust_soliton.rs | 365 +++++++++-------------------- 3 files changed, 160 insertions(+), 486 deletions(-) diff --git a/src/distribution/ideal_soliton.rs b/src/distribution/ideal_soliton.rs index c11ca88a..15153d02 100644 --- a/src/distribution/ideal_soliton.rs +++ b/src/distribution/ideal_soliton.rs @@ -1,4 +1,4 @@ -use crate::distribution::{Discrete, Univariate, Soliton}; +use crate::distribution::Soliton; use crate::statistics::*; use crate::{Result, StatsError}; use rand::distributions::Distribution; @@ -56,34 +56,7 @@ impl IdealSoliton { impl Distribution for IdealSoliton { fn sample(&self, r: &mut R) -> f64 { - r.gen_range(self.min, self.max + 1) as f64 - } -} - -impl Univariate for IdealSoliton { - /// Calculates the cumulative distribution function for the - /// discrete uniform distribution at `x` - /// - /// # Formula - /// - /// ```ignore - /// (floor(x) - min + 1) / (max - min + 1) - /// ``` - fn cdf(&self, x: f64) -> f64 { - if x < self.min as f64 { - 0.0 - } else if x >= self.max as f64 { - 1.0 - } else { - let lower = self.min as f64; - let upper = self.max as f64; - let ans = (x.floor() - lower + 1.0) / (upper - lower + 1.0); - if ans > 1.0 { - 1.0 - } else { - ans - } - } + r.gen_range(0, 1) as f64 } } @@ -149,106 +122,6 @@ impl Variance for IdealSoliton { } } -impl Entropy for IdealSoliton { - /// Returns the entropy of the discrete uniform distribution - /// - /// # Formula - /// - /// ```ignore - /// ln(max - min + 1) - /// ``` - fn entropy(&self) -> f64 { - let diff = (self.max - self.min) as f64; - (diff + 1.0).ln() - } -} - -impl Skewness for IdealSoliton { - /// Returns the skewness of the discrete uniform distribution - /// - /// # Formula - /// - /// ```ignore - /// 0 - /// ``` - fn skewness(&self) -> f64 { - 0.0 - } -} - -impl Median for IdealSoliton { - /// Returns the median of the discrete uniform distribution - /// - /// # Formula - /// - /// ```ignore - /// (max + min) / 2 - /// ``` - fn median(&self) -> f64 { - (self.min + self.max) as f64 / 2.0 - } -} - -impl Mode for IdealSoliton { - /// Returns the mode for the discrete uniform distribution - /// - /// # Remarks - /// - /// Since every element has an equal probability, mode simply - /// returns the middle element - /// - /// # Formula - /// - /// ```ignore - /// N/A // (max + min) / 2 for the middle element - /// ``` - fn mode(&self) -> i64 { - ((self.min + self.max) as f64 / 2.0).floor() as i64 - } -} - -impl Discrete for IdealSoliton { - /// Calculates the probability mass function for the discrete uniform - /// distribution at `x` - /// - /// # Remarks - /// - /// Returns `0.0` if `x` is not in `[min, max]` - /// - /// # Formula - /// - /// ```ignore - /// 1 / (max - min + 1) - /// ``` - fn pmf(&self, x: i64) -> f64 { - if x >= self.min && x <= self.max { - 1.0 / (self.max - self.min + 1) as f64 - } else { - 0.0 - } - } - - /// Calculates the log probability mass function for the discrete uniform - /// distribution at `x` - /// - /// # Remarks - /// - /// Returns `f64::NEG_INFINITY` if `x` is not in `[min, max]` - /// - /// # Formula - /// - /// ```ignore - /// ln(1 / (max - min + 1)) - /// ``` - fn ln_pmf(&self, x: i64) -> f64 { - if x >= self.min && x <= self.max { - -((self.max - self.min + 1) as f64).ln() - } else { - f64::NEG_INFINITY - } - } -} - impl Soliton for IdealSoliton { /// Calculates the ideal soliton for the /// discrete uniform distribution at `x` @@ -264,7 +137,7 @@ impl Soliton for IdealSoliton { /// p(x) = 1/(x(x-1)) /// ``` fn soliton(&self, x: i64) -> f64 { - if x > 1 && x <= self.max { + if x > 1 && x < self.max { 1.0 / ((x as f64) * (x as f64 - 1.0)) } else if x == 1 { 1.0 / self.max as f64 @@ -273,6 +146,14 @@ impl Soliton for IdealSoliton { 0.0 } } + + fn normalization_factor(&self) -> f64 { + 0.0 + } + + fn additive_probability(&self, _x: i64) -> f64 { + 0.0 + } } #[cfg_attr(rustfmt, rustfmt_skip)] @@ -281,144 +162,76 @@ mod test { use std::fmt::Debug; use std::f64; use crate::statistics::*; - use crate::distribution::{Univariate, Discrete, DiscreteUniform}; + use crate::distribution::IdealSoliton; - fn try_create(min: i64, max: i64) -> DiscreteUniform { - let n = DiscreteUniform::new(min, max); + fn try_create(max: i64) -> IdealSoliton { + let n = IdealSoliton::new(max); assert!(n.is_ok()); n.unwrap() } - fn create_case(min: i64, max: i64) { - let n = try_create(min, max); - assert_eq!(min, n.min()); + fn create_case(max: i64) { + let n = try_create(max); + assert_eq!(1, n.min()); assert_eq!(max, n.max()); } - fn bad_create_case(min: i64, max: i64) { - let n = DiscreteUniform::new(min, max); + fn bad_create_case(max: i64) { + let n = IdealSoliton::new(max); assert!(n.is_err()); } - fn get_value(min: i64, max: i64, eval: F) -> T + fn get_value(max: i64, eval: F) -> T where T: PartialEq + Debug, - F: Fn(DiscreteUniform) -> T + F: Fn(IdealSoliton) -> T { - let n = try_create(min, max); + let n = try_create(max); eval(n) } - fn test_case(min: i64, max: i64, expected: T, eval: F) + fn test_case(max: i64, expected: T, eval: F) where T: PartialEq + Debug, - F: Fn(DiscreteUniform) -> T + F: Fn(IdealSoliton) -> T { - let x = get_value(min, max, eval); + let x = get_value(max, eval); assert_eq!(expected, x); } + fn test_case_greater(max: i64, expected: T, eval: F) + where T: PartialEq + Debug + Into, + F: Fn(IdealSoliton) -> T + { + let sol = get_value(max, eval); + let a: f64 = sol.into(); + let b = expected.into(); + assert!(a > b, "{} greater than {}", a, b); + } + #[test] fn test_create() { - create_case(-10, 10); - create_case(0, 4); - create_case(10, 20); - create_case(20, 20); + create_case(10); + create_case(4); + create_case(20); } #[test] fn test_bad_create() { - bad_create_case(-1, -2); - bad_create_case(6, 5); + bad_create_case(-2); + bad_create_case(0); } #[test] fn test_mean() { - test_case(-10, 10, 0.0, |x| x.mean()); - test_case(0, 4, 2.0, |x| x.mean()); - test_case(10, 20, 15.0, |x| x.mean()); - test_case(20, 20, 20.0, |x| x.mean()); + test_case_greater(10, 0.9, |x| x.mean()); } #[test] fn test_variance() { - test_case(-10, 10, 36.66666666666666666667, |x| x.variance()); - test_case(0, 4, 2.0, |x| x.variance()); - test_case(10, 20, 10.0, |x| x.variance()); - test_case(20, 20, 0.0, |x| x.variance()); + test_case(10, 8.25, |x| x.variance()); } #[test] fn test_std_dev() { - test_case(-10, 10, (36.66666666666666666667f64).sqrt(), |x| x.std_dev()); - test_case(0, 4, (2.0f64).sqrt(), |x| x.std_dev()); - test_case(10, 20, (10.0f64).sqrt(), |x| x.std_dev()); - test_case(20, 20, 0.0, |x| x.std_dev()); - } - - #[test] - fn test_entropy() { - test_case(-10, 10, 3.0445224377234229965005979803657054342845752874046093, |x| x.entropy()); - test_case(0, 4, 1.6094379124341003746007593332261876395256013542685181, |x| x.entropy()); - test_case(10, 20, 2.3978952727983705440619435779651292998217068539374197, |x| x.entropy()); - test_case(20, 20, 0.0, |x| x.entropy()); - } - - #[test] - fn test_skewness() { - test_case(-10, 10, 0.0, |x| x.skewness()); - test_case(0, 4, 0.0, |x| x.skewness()); - test_case(10, 20, 0.0, |x| x.skewness()); - test_case(20, 20, 0.0, |x| x.skewness()); - } - - #[test] - fn test_median() { - test_case(-10, 10, 0.0, |x| x.median()); - test_case(0, 4, 2.0, |x| x.median()); - test_case(10, 20, 15.0, |x| x.median()); - test_case(20, 20, 20.0, |x| x.median()); - } - - #[test] - fn test_mode() { - test_case(-10, 10, 0, |x| x.mode()); - test_case(0, 4, 2, |x| x.mode()); - test_case(10, 20, 15, |x| x.mode()); - test_case(20, 20, 20, |x| x.mode()); - } - - #[test] - fn test_pmf() { - test_case(-10, 10, 0.04761904761904761904762, |x| x.pmf(-5)); - test_case(-10, 10, 0.04761904761904761904762, |x| x.pmf(1)); - test_case(-10, 10, 0.04761904761904761904762, |x| x.pmf(10)); - test_case(-10, -10, 0.0, |x| x.pmf(0)); - test_case(-10, -10, 1.0, |x| x.pmf(-10)); - } - - #[test] - fn test_ln_pmf() { - test_case(-10, 10, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(-5)); - test_case(-10, 10, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(1)); - test_case(-10, 10, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(10)); - test_case(-10, -10, f64::NEG_INFINITY, |x| x.ln_pmf(0)); - test_case(-10, -10, 0.0, |x| x.ln_pmf(-10)); - } - - #[test] - fn test_cdf() { - test_case(-10, 10, 0.2857142857142857142857, |x| x.cdf(-5.0)); - test_case(-10, 10, 0.5714285714285714285714, |x| x.cdf(1.0)); - test_case(-10, 10, 1.0, |x| x.cdf(10.0)); - test_case(-10, -10, 1.0, |x| x.cdf(-10.0)); - } - - #[test] - fn test_cdf_lower_bound() { - test_case(0, 3, 0.0, |x| x.cdf(-1.0)); - } - - #[test] - fn test_cdf_upper_bound() { - test_case(0, 3, 1.0, |x| x.cdf(5.0)); + test_case(10, (8.25f64).sqrt(), |x| x.std_dev()); } } diff --git a/src/distribution/mod.rs b/src/distribution/mod.rs index bc41d657..54bf1660 100644 --- a/src/distribution/mod.rs +++ b/src/distribution/mod.rs @@ -284,11 +284,13 @@ pub trait Soliton { /// # Examples /// /// ``` - /// use statrs::distribution::{Soliton, Uniform}; + /// use statrs::distribution::{IdealSoliton, Soliton}; /// use statrs::prec; /// - /// let n = Uniform::new(1, 5).unwrap(); - /// assert_eq!(n.soliton(1), 1/5); + /// let n = IdealSoliton::new(5).unwrap(); + /// assert_eq!(n.soliton(1), 0.2); /// ``` fn soliton(&self, x: T) -> K; + fn normalization_factor(&self) -> K; + fn additive_probability(&self, x: T) -> K; } diff --git a/src/distribution/robust_soliton.rs b/src/distribution/robust_soliton.rs index a9b04bca..6be85d96 100644 --- a/src/distribution/robust_soliton.rs +++ b/src/distribution/robust_soliton.rs @@ -1,4 +1,4 @@ -use crate::distribution::{Discrete, Univariate, Soliton}; +use crate::distribution::{Soliton, IdealSoliton}; use crate::statistics::*; use crate::{Result, StatsError}; use rand::distributions::Distribution; @@ -30,6 +30,12 @@ pub struct RobustSoliton { fail_probability: f64, } +#[derive(Debug, Clone, PartialEq)] +pub struct RobustSolitonDistribution { + ripple: Ripple, + fail_probability: f64, +} + impl RobustSoliton { /// Constructs a new discrete uniform distribution with a minimum value /// of `min` and a maximum value of `max`. @@ -53,8 +59,8 @@ impl RobustSoliton { if max < 1 { Err(StatsError::BadParams) } else { - let pmf_table: Vec = Vec::new(); - Ok(RobustSoliton { + let pmf_table: Vec = Vec::with_capacity(max as usize); + let mut rs = RobustSoliton { min: 1, max, spike: None, @@ -64,32 +70,33 @@ impl RobustSoliton { true => Ripple::Heuristic(ripple), }, fail_probability, - }) + }; + + let mut cumulative_probability = 0.0; + for i in 1..(max + 1) { + cumulative_probability += rs.soliton(i); + rs.cumulative_probability_table.push(cumulative_probability); + } + Ok(rs) } } - fn normalization_factor(&self) -> f64 { - let mut normalization_factor: f64 = 0.0; + pub fn query_table(&self, r: &mut R) -> Result { + let n = self.sample(r); + for i in 1..(self.max+1) { - normalization_factor += self.soliton(i); - normalization_factor += self.additive_probability(i); + if n < self.cumulative_probability_table[i as usize] { + return Ok(i); + } } - normalization_factor + Err(StatsError::ContainerExpectedSum("Elements in probability table expected to sum to 1 but didn't! Limit is", self.max as f64)) } - fn additive_probability(&self, val: i64) -> f64 { - let ripple_size = self.ripple.size(self.max, self.fail_probability); - let swap = self.max as f64 / ripple_size; - if val == 0 || val > self.max { - panic!("Point must be in range (0,max]. Given {} - {}", val, self.max); - } else if (val as f64) < swap { - ripple_size / ((val * self.max) as f64) - } else if (val as f64) == swap { - (ripple_size * (ripple_size / self.fail_probability).ln()) / (self.max as f64) - } else { - 0.0 - } + pub fn query_table_int(&self, r: &mut R) -> Result { + let n = r.gen_range(self.min as usize, self.max as usize); + Ok(n) } + } #[derive(PartialEq, Clone, Debug)] @@ -111,39 +118,12 @@ impl Ripple { impl Distribution for RobustSoliton { fn sample(&self, r: &mut R) -> f64 { - r.gen_range(self.min, self.max + 1) as f64 - } -} - -impl Univariate for RobustSoliton { - /// Calculates the cumulative distribution function for the - /// discrete uniform distribution at `x` - /// - /// # Formula - /// - /// ```ignore - /// (floor(x) - min + 1) / (max - min + 1) - /// ``` - fn cdf(&self, x: f64) -> f64 { - if x < self.min as f64 { - 0.0 - } else if x >= self.max as f64 { - 1.0 - } else { - let lower = self.min as f64; - let upper = self.max as f64; - let ans = (x.floor() - lower + 1.0) / (upper - lower + 1.0); - if ans > 1.0 { - 1.0 - } else { - ans - } - } + r.gen_range(0, 1) as f64 } } impl Min for RobustSoliton { - /// Returns the minimum value in the domain of the discrete uniform + /// Returns the minimum value in the domain of the robust soliton /// distribution /// /// # Remarks @@ -155,7 +135,7 @@ impl Min for RobustSoliton { } impl Max for RobustSoliton { - /// Returns the maximum value in the domain of the discrete uniform + /// Returns the maximum value in the domain of the robust soliton /// distribution /// /// # Remarks @@ -167,143 +147,50 @@ impl Max for RobustSoliton { } impl Mean for RobustSoliton { - /// Returns the mean of the discrete uniform distribution + /// Returns the mean of the robust soliton distribution /// /// # Formula /// /// ```ignore - /// (min + max) / 2 + /// sum(Xn)/n /// ``` fn mean(&self) -> f64 { - (self.min + self.max) as f64 / 2.0 + let sum: f64 = Iterator::sum(self.cumulative_probability_table.iter()); + let mean = sum / self.cumulative_probability_table.len() as f64; + mean } } impl Variance for RobustSoliton { - /// Returns the variance of the discrete uniform distribution + /// Returns the variance of the robust soliton distribution /// /// # Formula /// /// ```ignore - /// ((max - min + 1)^2 - 1) / 12 + /// sum((Xn - mean)^2) / n n from 0..max /// ``` fn variance(&self) -> f64 { - let diff = (self.max - self.min) as f64; - ((diff + 1.0) * (diff + 1.0) - 1.0) / 12.0 + let mut sumsq = 0.0; + let mean = self.mean(); + for i in &self.cumulative_probability_table { + sumsq += (i - mean)*(i - mean); + } + let var = sumsq / self.cumulative_probability_table.len() as f64; + var } - /// Returns the standard deviation of the discrete uniform distribution + /// Returns the standard deviation of the robust soliton distribution /// /// # Formula /// /// ```ignore - /// sqrt(((max - min + 1)^2 - 1) / 12) + /// sqrt(variance) /// ``` fn std_dev(&self) -> f64 { self.variance().sqrt() } } -impl Entropy for RobustSoliton { - /// Returns the entropy of the discrete uniform distribution - /// - /// # Formula - /// - /// ```ignore - /// ln(max - min + 1) - /// ``` - fn entropy(&self) -> f64 { - let diff = (self.max - self.min) as f64; - (diff + 1.0).ln() - } -} - -impl Skewness for RobustSoliton { - /// Returns the skewness of the discrete uniform distribution - /// - /// # Formula - /// - /// ```ignore - /// 0 - /// ``` - fn skewness(&self) -> f64 { - 0.0 - } -} - -impl Median for RobustSoliton { - /// Returns the median of the discrete uniform distribution - /// - /// # Formula - /// - /// ```ignore - /// (max + min) / 2 - /// ``` - fn median(&self) -> f64 { - (self.min + self.max) as f64 / 2.0 - } -} - -impl Mode for RobustSoliton { - /// Returns the mode for the discrete uniform distribution - /// - /// # Remarks - /// - /// Since every element has an equal probability, mode simply - /// returns the middle element - /// - /// # Formula - /// - /// ```ignore - /// N/A // (max + min) / 2 for the middle element - /// ``` - fn mode(&self) -> i64 { - ((self.min + self.max) as f64 / 2.0).floor() as i64 - } -} - -impl Discrete for RobustSoliton { - /// Calculates the probability mass function for the discrete uniform - /// distribution at `x` - /// - /// # Remarks - /// - /// Returns `0.0` if `x` is not in `[min, max]` - /// - /// # Formula - /// - /// ```ignore - /// 1 / (max - min + 1) - /// ``` - fn pmf(&self, x: i64) -> f64 { - if x >= self.min && x <= self.max { - 1.0 / (self.max - self.min + 1) as f64 - } else { - 0.0 - } - } - - /// Calculates the log probability mass function for the discrete uniform - /// distribution at `x` - /// - /// # Remarks - /// - /// Returns `f64::NEG_INFINITY` if `x` is not in `[min, max]` - /// - /// # Formula - /// - /// ```ignore - /// ln(1 / (max - min + 1)) - /// ``` - fn ln_pmf(&self, x: i64) -> f64 { - if x >= self.min && x <= self.max { - -((self.max - self.min + 1) as f64).ln() - } else { - f64::NEG_INFINITY - } - } -} - impl Soliton for RobustSoliton { /// Calculates the ideal soliton for the /// discrete uniform distribution at `x` @@ -318,17 +205,10 @@ impl Soliton for RobustSoliton { /// ``` fn soliton(&self, x: i64) -> f64 { if x > 1 && x < self.max { - let ideal_sol = { - if x > 1 && x <= self.max { - 1.0 / ((x as f64) * (x as f64 - 1.0)) - } else if x == 1 { - 1.0 / self.max as f64 - } else { - // Point must be in range (0, limit] - 0.0 - } - }; - (ideal_sol + self.additive_probability(x))/(self.normalization_factor()) + let ideal_sol = IdealSoliton::new(self.max).unwrap(); + let ideal_val = ideal_sol.soliton(x); + //println!("I: {:?} | A: {:?} | N: {:?}", ideal_val, self.additive_probability(x), self.normalization_factor()); + (ideal_val + self.additive_probability(x))/(self.normalization_factor()) } else if x == 1 { 1.0 } else { @@ -336,6 +216,32 @@ impl Soliton for RobustSoliton { 0.0 } } + + fn normalization_factor(&self) -> f64 { + let mut normalization_factor: f64 = 0.0; + let ideal_sol = IdealSoliton::new(self.max).unwrap(); + for i in 1..(self.max+1) { + normalization_factor += ideal_sol.soliton(i); + normalization_factor += self.additive_probability(i); + } + normalization_factor + } + + fn additive_probability(&self, val: i64) -> f64 { + let ripple_size = self.ripple.size(self.max, self.fail_probability); + let swap = self.max as f64 / ripple_size; + if val == 0 || val > self.max { + panic!("Point must be in range (0,max]. Given {} - {}", val, self.max); + } else if (val as f64) < swap { + ripple_size / ((val * self.max) as f64) + } else if (val as f64) == swap { + (ripple_size * (ripple_size / self.fail_probability).ln()) / (self.max as f64) + } else { + 0.0 + } + } + + } #[cfg_attr(rustfmt, rustfmt_skip)] @@ -344,8 +250,7 @@ mod test { use std::fmt::Debug; use std::f64; use crate::statistics::*; - use crate::distribution::{Univariate, Discrete, Soliton, RobustSoliton}; - + use crate::distribution::{Soliton, RobustSoliton}; fn try_create(max: i64, heuristic: bool, ripple: f64, fail_probability: f64) -> RobustSoliton { let n = RobustSoliton::new(max, heuristic, ripple, fail_probability); assert!(n.is_ok()); @@ -375,8 +280,25 @@ mod test { where T: PartialEq + Debug, F: Fn(RobustSoliton) -> T { - let x = get_value(max, heuristic, ripple, fail_probability, eval); - assert_eq!(expected, x); + let sol = get_value(max, heuristic, ripple, fail_probability, eval); + assert_eq!(expected, sol); + } + + fn test_case_greater(max: i64, heuristic: bool, ripple: f64, fail_probability: f64, expected: T, eval: F) + where T: PartialEq + Debug + Into, + F: Fn(RobustSoliton) -> T + { + let sol = get_value(max, heuristic, ripple, fail_probability, eval); + let a: f64 = sol.into(); + let b = expected.into(); + assert!(a > b, "{} greater than {}", a, b); + } + + fn test_case_query(max: i64, heuristic: bool, ripple: f64, fail_probability: f64) + { + let mut rng = rand::thread_rng(); + let sol = try_create(max, heuristic, ripple, fail_probability); + assert!(sol.query_table(&mut rng).is_ok()); } #[test] @@ -390,101 +312,38 @@ mod test { #[test] fn test_bad_create() { bad_create_case(-1, true, 0.1, 0.1); - bad_create_case(5, true, 0.1, 0.1); + bad_create_case(0, true, 0.1, 0.1); } #[test] fn test_mean() { - test_case(10, true, 0.1, 0.1, 0.0, |x| x.mean()); - test_case(10, true, 0.1, 0.1, 2.0, |x| x.mean()); - test_case(10, true, 0.1, 0.1, 15.0, |x| x.mean()); - test_case(10, true, 0.1, 0.1, 20.0, |x| x.mean()); + test_case_greater(10, true, 0.1, 0.1, 0.9, |x| x.mean()); + test_case_greater(20, true, 0.1, 0.1, 0.9, |x| x.mean()); + test_case_greater(30, true, 0.1, 0.1, 0.9, |x| x.mean()); + test_case_greater(40, true, 0.1, 0.1, 0.9, |x| x.mean()); } #[test] fn test_variance() { - test_case(10, true, 0.1, 0.1, 36.66666666666666666667, |x| x.variance()); - test_case(4, true, 0.1, 0.1, 2.0, |x| x.variance()); - test_case(20, true, 0.1, 0.1, 10.0, |x| x.variance()); - test_case(20, true, 0.1, 0.1, 0.0, |x| x.variance()); + test_case(10, true, 0.1, 0.1, 0.0601467074373455, |x| x.variance()); } #[test] fn test_std_dev() { - test_case(10, true, 0.1, 0.1, (36.66666666666666666667f64).sqrt(), |x| x.std_dev()); - test_case(4, true, 0.1, 0.1, (2.0f64).sqrt(), |x| x.std_dev()); - test_case(20, true, 0.1, 0.1, (10.0f64).sqrt(), |x| x.std_dev()); - test_case(20, true, 0.1, 0.1, 0.0, |x| x.std_dev()); - } - - #[test] - fn test_entropy() { - test_case(10, true, 0.1, 0.1, 3.0445224377234229965005979803657054342845752874046093, |x| x.entropy()); - test_case(4, true, 0.1, 0.1, 1.6094379124341003746007593332261876395256013542685181, |x| x.entropy()); - test_case(20, true, 0.1, 0.1, 2.3978952727983705440619435779651292998217068539374197, |x| x.entropy()); - test_case(20, true, 0.1, 0.1, 0.0, |x| x.entropy()); + test_case(10, true, 0.1, 0.1, (0.0601467074373455f64).sqrt(), |x| x.std_dev()); } #[test] - fn test_skewness() { - test_case(10, true, 0.1, 0.1, 0.0, |x| x.skewness()); - test_case(4, true, 0.1, 0.1, 0.0, |x| x.skewness()); - test_case(20, true, 0.1, 0.1, 0.0, |x| x.skewness()); - test_case(20, true, 0.1, 0.1, 0.0, |x| x.skewness()); - } - - #[test] - fn test_median() { - test_case(10, true, 0.1, 0.1, 0.0, |x| x.median()); - test_case(4, true, 0.1, 0.1, 2.0, |x| x.median()); - test_case(20, true, 0.1, 0.1, 15.0, |x| x.median()); - test_case(20, true, 0.1, 0.1, 20.0, |x| x.median()); - } - - #[test] - fn test_mode() { - test_case(10, true, 0.1, 0.1, 0, |x| x.mode()); - test_case(4, true, 0.1, 0.1, 2, |x| x.mode()); - test_case(20, true, 0.1, 0.1, 15, |x| x.mode()); - test_case(20, true, 0.1, 0.1, 20, |x| x.mode()); - } - - #[test] - fn test_pmf() { - test_case(10, true, 0.1, 0.1, 0.04761904761904761904762, |x| x.pmf(5)); - test_case(10, true, 0.1, 0.1, 0.04761904761904761904762, |x| x.pmf(1)); - test_case(10, true, 0.1, 0.1, 0.04761904761904761904762, |x| x.pmf(10)); - test_case(10, true, 0.1, 0.1, 0.0, |x| x.pmf(0)); - } - - #[test] - fn test_ln_pmf() { - test_case(10, true, 0.1, 0.1, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(1)); - test_case(10, true, 0.1, 0.1, -3.0445224377234229965005979803657054342845752874046093, |x| x.ln_pmf(10)); - test_case(10, true, 0.1, 0.1, f64::NEG_INFINITY, |x| x.ln_pmf(0)); - test_case(10, true, 0.1, 0.1, 0.0, |x| x.ln_pmf(10)); - } - - #[test] - fn test_cdf() { - test_case(10, true, 0.1, 0.1, 0.5714285714285714285714, |x| x.cdf(1.0)); - test_case(10, true, 0.1, 0.1, 1.0, |x| x.cdf(10.0)); - } - - #[test] - fn test_cdf_lower_bound() { - test_case(3, true, 0.1, 0.1, 0.0, |x| x.cdf(1.0)); - } - - #[test] - fn test_cdf_upper_bound() { - test_case(3, true, 0.1, 0.1, 1.0, |x| x.cdf(5.0)); + fn test_solition() { + test_case(10, true, 0.1, 0.1, 0.0, |x| x.soliton(-1)); + test_case(10, true, 0.1, 0.1, 0.05879983550709325, |x| x.soliton(5)); + test_case(10, true, 0.1, 0.1, 0.0, |x| x.soliton(10)); + test_case(10, true, 0.1, 0.1, 1.0, |x| x.soliton(1)); } #[test] - fn test_solition() { - test_case(10, true, 0.1, 0.1, 0.0, |x| x.soliton(-1)); - test_case(10, true, 0.1, 0.1, 0.0, |x| x.soliton(0)); - test_case(10, true, 0.1, 0.1, 1.0, |x| x.soliton(10)); + fn test_query_table() { + test_case_query(10, true, 0.1, 0.1); + test_case_query(20, true, 0.1, 0.3); } } From 52603cb8afd263444ca8b2f8e4ca317322510928 Mon Sep 17 00:00:00 2001 From: Robert Keith Date: Wed, 18 Nov 2020 11:48:08 -0800 Subject: [PATCH 3/3] change examples to include soliton --- src/distribution/ideal_soliton.rs | 13 ++++++++++++- src/distribution/robust_soliton.rs | 24 ++++++++++-------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/distribution/ideal_soliton.rs b/src/distribution/ideal_soliton.rs index 15153d02..4a8897ca 100644 --- a/src/distribution/ideal_soliton.rs +++ b/src/distribution/ideal_soliton.rs @@ -7,17 +7,20 @@ use std::f64; /// Implements the [Discrete /// Uniform](https://en.wikipedia.org/wiki/Discrete_uniform_distribution) -/// distribution +/// distribution and an related ideal soliton of the discrete uniform distribuiton /// /// # Examples /// /// ``` /// use statrs::distribution::{DiscreteUniform, Discrete}; /// use statrs::statistics::Mean; +/// use statrs::distribution::IdealSoliton; /// +/// let sol = IdealSoliton::new(5).unwrap(); /// let n = DiscreteUniform::new(0, 5).unwrap(); /// assert_eq!(n.mean(), 2.5); /// assert_eq!(n.pmf(3), 1.0 / 6.0); +/// assert_eq!(sol.soliton(1), 1.0/5.0); /// ``` #[derive(Debug, Clone, PartialEq)] @@ -30,6 +33,9 @@ impl IdealSoliton { /// Constructs a new discrete uniform distribution with a minimum value /// of `min` and a maximum value of `max`. /// + /// Additionally construct the ideal soliton of the same max value of `max` + /// falling between (1, max) + /// /// # Errors /// /// Returns an error if `max < min` @@ -38,12 +44,17 @@ impl IdealSoliton { /// /// ``` /// use statrs::distribution::DiscreteUniform; + /// use statrs::distribution::IdealSoliton; /// + /// let mut sol = IdealSoliton::new(5); /// let mut result = DiscreteUniform::new(0, 5); /// assert!(result.is_ok()); + /// assert!(sol.is_ok()); /// /// result = DiscreteUniform::new(5, 0); + /// sol = IdealSoliton::new(-1); /// assert!(result.is_err()); + /// assert!(sol.is_err()); /// ``` pub fn new(max: i64) -> Result { if max < 1 { diff --git a/src/distribution/robust_soliton.rs b/src/distribution/robust_soliton.rs index 6be85d96..7e8f06f4 100644 --- a/src/distribution/robust_soliton.rs +++ b/src/distribution/robust_soliton.rs @@ -5,19 +5,16 @@ use rand::distributions::Distribution; use rand::Rng; use std::f64; -/// Implements the [Discrete -/// Uniform](https://en.wikipedia.org/wiki/Discrete_uniform_distribution) +/// Implements the [Robust Soliton] +/// (https://en.wikipedia.org/wiki/Soliton_distribution) /// distribution /// /// # Examples /// /// ``` -/// use statrs::distribution::{DiscreteUniform, Discrete}; -/// use statrs::statistics::Mean; -/// -/// let n = DiscreteUniform::new(0, 5).unwrap(); -/// assert_eq!(n.mean(), 2.5); -/// assert_eq!(n.pmf(3), 1.0 / 6.0); +/// use statrs::distribution::{Soliton, IdealSoliton}; +/// let n = RobustSoliton::new(10, true, 0.1, 0.1).unwrap(); +/// assert_eq!(n.soliton(5), 0.05879983550709325); /// ``` #[derive(Debug, Clone, PartialEq)] @@ -37,22 +34,21 @@ pub struct RobustSolitonDistribution { } impl RobustSoliton { - /// Constructs a new discrete uniform distribution with a minimum value - /// of `min` and a maximum value of `max`. + /// Constructs a new robust soliton distribution with a maximum value of `max`. /// /// # Errors /// - /// Returns an error if `max < min` + /// Returns an error if `max < 0` /// /// # Examples /// /// ``` - /// use statrs::distribution::DiscreteUniform; + /// use statrs::distribution::RobustSoliton; /// - /// let mut result = DiscreteUniform::new(0, 5); + /// let mut result = RobustSoliton::new(10, true, 0.1, 0.1); /// assert!(result.is_ok()); /// - /// result = DiscreteUniform::new(5, 0); + /// result = RobustSoliton::new(-1, true, 0.1, 0.1); /// assert!(result.is_err()); /// ``` pub fn new(max: i64, heuristic: bool, ripple: f64, fail_probability: f64) -> Result {