Skip to content

Commit

Permalink
Merge pull request DarkEnergySurvey#88 from DarkEnergySurvey/stats
Browse files Browse the repository at this point in the history
Updates to stats tests
  • Loading branch information
kadrlica authored Jul 14, 2021
2 parents 4860691 + 644f6a1 commit b350ce5
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 40 deletions.
107 changes: 77 additions & 30 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
"""
Tests of ugali stats
"""
import unittest
# Execute tests in order: https://stackoverflow.com/a/22317851/4075339
unittest.TestLoader.sortTestMethodsUsing = None

import numpy as np
import scipy.stats

Expand All @@ -17,6 +21,16 @@ def generate_distribution(p1=[1.0,0.1,10000],p2=[-1.0,0.6,30000]):
samples = np.concatenate([g1,g2])
return samples

def generate_gaussian():
p1=[0.0,1.0,int(1e5)]
p2=[0.0,0.0,0]
return generate_distribution(p1,p2)

def generate_bimodal():
p1=[1.0,0.1,10000]
p2=[-1.0,0.6,30000]
return generate_distribution(p1,p2)

def plot_intervals():
import pylab as plt

Expand All @@ -40,37 +54,70 @@ def plot_intervals():

plt.legend(loc='upper left',fontsize=12)

def test_peak():
np.random.seed(12345)
data = generate_distribution()
peak = ugali.utils.stats.kde_peak(data)
np.testing.assert_allclose(peak,1.0,atol=1e-2)

def test_median_interval():
np.random.seed(12345)
data = generate_distribution()

peak,[lo,hi] = ugali.utils.stats.median_interval(data,alpha=0.32)
np.testing.assert_allclose(peak , 1.00, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.29, atol=1e-2)

def test_peak_interval():
np.random.seed(12345)
data = generate_distribution()

peak,[lo,hi] = ugali.utils.stats.peak_interval(data,alpha=0.32)
np.testing.assert_array_less([lo,-hi],[peak,-peak])
np.testing.assert_allclose(peak , 1.00, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.29, atol=1e-2)

def test_min_interval():
np.random.seed(12345)
data = generate_distribution()

# Note that min_interval <= peak_interval
center,[lo,hi] = ugali.utils.stats.min_interval(data,alpha=0.32)
np.testing.assert_allclose(center, -0.99, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.02, atol=1e-2)
class TestStats(unittest.TestCase):
"""Test the stats """

def setUp(self):
np.random.seed(12345)
self.bimodal = generate_bimodal()
np.random.seed(12345)
self.gaussian = generate_gaussian()

def test_peak(self):
mean = self.gaussian.mean()
median = np.median(self.gaussian)
std = self.gaussian.std()
peak = ugali.utils.stats.kde_peak(self.gaussian)

np.testing.assert_allclose(mean,0.0,atol=1e-2)
np.testing.assert_allclose(median,0.0,atol=1e-2)
np.testing.assert_allclose(std ,1.0,atol=1e-2)
# Only accurate at the ~5% level...
np.testing.assert_allclose(peak,0.0,atol=1e-1)

# Bimodal distribution
peak = ugali.utils.stats.kde_peak(self.bimodal)
np.testing.assert_allclose(peak,1.0,atol=1e-1)

def test_mean_interval(self):
peak,[lo,hi] = ugali.utils.stats.mean_interval(self.gaussian,alpha=0.32)
np.testing.assert_allclose(peak , 0.00, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.00, atol=1e-2)

peak,[lo,hi] = ugali.utils.stats.mean_interval(self.bimodal,alpha=0.32)
np.testing.assert_allclose(peak ,-0.50, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.01, atol=1e-2)

def test_median_interval(self):
peak,[lo,hi] = ugali.utils.stats.median_interval(self.gaussian,alpha=0.32)
np.testing.assert_allclose(peak , 0.00, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.00, atol=1e-2)

peak,[lo,hi] = ugali.utils.stats.median_interval(self.bimodal,alpha=0.32)
np.testing.assert_allclose(peak ,-0.75, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.44, atol=1e-2)

def test_peak_interval(self):
peak,[lo,hi] = ugali.utils.stats.peak_interval(self.gaussian,alpha=0.32)
np.testing.assert_array_less([lo,-hi],[peak,-peak])
np.testing.assert_allclose(peak , 0.00, atol=1e-1)
np.testing.assert_allclose(hi-lo, 1.98, atol=1e-2)

peak,[lo,hi] = ugali.utils.stats.peak_interval(self.bimodal,alpha=0.32)
np.testing.assert_array_less([lo,-hi],[peak,-peak])
np.testing.assert_allclose(peak , 1.00, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.29, atol=1e-2)

def test_min_interval(self):
center,[lo,hi] = ugali.utils.stats.min_interval(self.gaussian,alpha=0.32)
np.testing.assert_allclose(center,-0.03, atol=1e-2)
np.testing.assert_allclose(hi-lo, 1.98, atol=1e-2)

# Note that min_interval <= peak_interval
center,[lo,hi] = ugali.utils.stats.min_interval(self.bimodal,alpha=0.32)
np.testing.assert_allclose(center, -0.99, atol=1e-2)
np.testing.assert_allclose(hi-lo, 2.02, atol=1e-2)

if __name__ == "__main__":
import argparse
Expand Down
54 changes: 44 additions & 10 deletions ugali/utils/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def mean_interval(data, alpha=_alpha):
"""
Interval assuming gaussian posterior.
"""
mean =np.mean(data)
mean = np.mean(data)
sigma = np.std(data)
scale = scipy.stats.norm.ppf(1-alpha/2.)
return interval(mean,mean-scale*sigma,mean+scale*sigma)
Expand All @@ -49,35 +49,69 @@ def median_interval(data, alpha=_alpha):
return interval(med,lo,hi)

def peak(data, bins=_nbins):
"""
Bin the distribution and find the mode
Parameters:
-----------
data : The 1d data sample
bins : Number of bins
Returns
-------
peak : peak of the kde
"""
num,edges = np.histogram(data,bins=bins)
centers = (edges[1:]+edges[:-1])/2.
return centers[np.argmax(num)]

def kde_peak(data, npoints=_npoints):
def kde_peak(data, npoints=_npoints, clip=5.0):
"""
Identify peak using Gaussian kernel density estimator.
Parameters:
-----------
data : The 1d data sample
npoints : The number of kde points to evaluate
clip : NMAD to clip
Returns
-------
peak : peak of the kde
"""
return kde(data,npoints)[0]
return kde(data,npoints,clip)[0]

def kde(data, npoints=_npoints):
def kde(data, npoints=_npoints, clip=5.0):
"""
Identify peak using Gaussian kernel density estimator.
Parameters:
-----------
data : The 1d data sample
data : The 1d data sample
npoints : The number of kde points to evaluate
clip : NMAD to clip
Returns
-------
peak : peak of the kde
"""
# Clipping of severe outliers to concentrate more KDE samples in the parameter range of interest

# Clipping of severe outliers to concentrate more KDE samples
# in the parameter range of interest
mad = np.median(np.fabs(np.median(data) - data))
cut = (data > np.median(data) - 5. * mad) & (data < np.median(data) + 5. * mad)
x = data[cut]
if clip > 0:
cut = (data > np.median(data) - clip * mad)
cut &= (data < np.median(data) + clip * mad)
x = data[cut]
else:
x = data
kde = scipy.stats.gaussian_kde(x)
# No penalty for using a finer sampling for KDE evaluation except computation time
# No penalty for using a finer sampling for KDE evaluation
# except computation time
values = np.linspace(np.min(x), np.max(x), npoints)
kde_values = kde.evaluate(values)
peak = values[np.argmax(kde_values)]
return values[np.argmax(kde_values)], kde.evaluate(peak)
return peak, kde.evaluate(peak)

def peak_interval(data, alpha=_alpha, npoints=_npoints):
"""Identify minimum interval containing the peak of the posterior as
Expand Down

0 comments on commit b350ce5

Please sign in to comment.