Skip to content

Commit

Permalink
Merge pull request #31 from aimalz/issue/23/histogram
Browse files Browse the repository at this point in the history
Issue/23/histogram
  • Loading branch information
drphilmarshall authored Dec 7, 2016
2 parents 0da3d83 + 4d30fc2 commit 93d4249
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 61 deletions.
64 changes: 47 additions & 17 deletions docs/notebooks/demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"source": [
"# `qp` Demo\n",
"\n",
"In this notebook we use the `qp` module to approximate some standard 1-D PDFs using sets of quantiles, and assess the accuracy of the quantile parametrization(TM).\n",
"In this notebook we use the `qp` module to approximate some standard 1-D PDFs using sets of quantiles, and assess the accuracy of the quantile parametrization(TM) in comparison to other popular parametrizations.\n",
"\n",
"### Requirements\n",
"\n",
Expand Down Expand Up @@ -79,7 +79,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Now, let's compute a set of evenly spaced quantiles. These will be carried by the `PDF` object as `p.quantiles`. We also demonstrate the initialization of a PDF object with quantiles and no truth function."
"Now, let's compute a set of evenly spaced quantiles. These will be carried by the `PDF` object as `p.quantiles`. We also demonstrate the initialization of a `PDF` object with quantiles and no truth function."
]
},
{
Expand All @@ -98,7 +98,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's test interpolation at a single point within the quantile range."
"Let's also compute a histogram representation that will be carried by the `PDF` object as `p.histogram`. We can similary initialize a `PDF` object with a histogram and no truth function."
]
},
{
Expand All @@ -109,7 +109,27 @@
},
"outputs": [],
"source": [
"P.approximate(0.314)"
"T = qp.PDF(truth=dist)\n",
"histogram = T.histogramize(nbins=10, binrange=[-2., 2.])\n",
"R = qp.PDF(histogram=histogram)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's test interpolation at a single point for the quantile parametrization."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(P.approximate(0.314, using='quantiles'))"
]
},
{
Expand All @@ -128,7 +148,7 @@
"outputs": [],
"source": [
"grid = np.linspace(-3., 3., 100)\n",
"P.approximate(grid)"
"P.approximate(grid, using='quantiles')"
]
},
{
Expand All @@ -147,7 +167,8 @@
"outputs": [],
"source": [
"bounds = (-3.0, 3.0)\n",
"P.plot(limits=bounds, points=grid)"
"P.plot(limits=bounds, points=grid)\n",
"T.plot(limits=bounds, points=grid)"
]
},
{
Expand All @@ -163,16 +184,21 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"D1 = qp.utils.calculate_kl_divergence(P, Q, limits=(-1.,1.))\n",
"D0 = qp.utils.calculate_kl_divergence(P, Q, limits=(min(quantiles), max(quantiles)))\n",
"D2 = qp.utils.calculate_kl_divergence(P, Q, limits=(-2.,2.))\n",
"D3 = qp.utils.calculate_kl_divergence(P, Q, limits=(-3.,3.))\n",
"qD1 = qp.utils.calculate_kl_divergence(P, Q, limits=(-1.,1.))\n",
"qD2 = qp.utils.calculate_kl_divergence(P, Q, limits=(-2.,2.))\n",
"qD3 = qp.utils.calculate_kl_divergence(P, Q, limits=(-3.,3.))\n",
"\n",
"print(D1, D0, D2, D3)"
"hD1 = qp.utils.calculate_kl_divergence(T, R, limits=(-1.,1.))\n",
"hD2 = qp.utils.calculate_kl_divergence(T, R, limits=(-2.,2.))\n",
"hD3 = qp.utils.calculate_kl_divergence(T, R, limits=(-3.,3.))\n",
"\n",
"print(qD1, qD2, qD3)\n",
"print(hD1, hD2, hD3)"
]
},
{
Expand All @@ -190,12 +216,16 @@
},
"outputs": [],
"source": [
"RMS1 = qp.utils.calculate_rms(P, Q, limits=(-1.,1.))\n",
"RMS0 = qp.utils.calculate_rms(P, Q, limits=(min(quantiles), max(quantiles)))\n",
"RMS2 = qp.utils.calculate_rms(P, Q, limits=(-2.,2.))\n",
"RMS3 = qp.utils.calculate_rms(P, Q, limits=(-3.,3.))\n",
"qRMS1 = qp.utils.calculate_rms(P, Q, limits=(-1.,1.))\n",
"qRMS2 = qp.utils.calculate_rms(P, Q, limits=(-2.,2.))\n",
"qRMS3 = qp.utils.calculate_rms(P, Q, limits=(-3.,3.))\n",
"\n",
"hRMS1 = qp.utils.calculate_rms(T, R, limits=(-1.,1.))\n",
"hRMS2 = qp.utils.calculate_rms(T, R, limits=(-2.,2.))\n",
"hRMS3 = qp.utils.calculate_rms(T, R, limits=(-3.,3.))\n",
"\n",
"print(RMS1, RMS0, RMS2, RMS3)"
"print(qRMS1, qRMS2, qRMS3)\n",
"print(hRMS1, hRMS2, hRMS3)"
]
},
{
Expand Down
176 changes: 132 additions & 44 deletions qp/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,41 @@

import qp


<<<<<<< HEAD
class PDF(object):
"""
A PDF object, with some representation of a distribution.
Parameters
----------
truth: scipy.stats.rv_continuous object, optional
continuous, parametric form of the PDF
quantiles: ndarray, optional
array of quantile values separated by
vb: boolean
report on progress to stdout?
"""
def __init__(self, truth=None, quantiles=None, vb=True):

def __init__(self, truth=None, quantiles=None, histogram=None,
vb=True):
"""
An object representing a probability density function in
various ways.
Parameters
----------
truth: scipy.stats.rv_continuous object, optional
Continuous, parametric form of the PDF
quantiles: ndarray, optional
Array of quantile values separated by 100./(len(quantiles)+1) percentiles
histogram: tuple of ndarrays, optional
Pair of arrays of lengths (nbins+1, nbins) containing endpoints of bins and values in bins
vb: boolean
report on progress to stdout?
"""
self.truth = truth
self.quantiles = quantiles
# Should make this a proper exception rather than just printing an advisory notice
if vb and self.truth is None and self.quantiles is None:
self.histogram = histogram
self.initialized = None

if self.truth is not None:
self.initialized = 'truth'
elif self.quantiles is not None:
self.initialized = 'quantiles'
elif self.histogram is not None:
self.initialized = 'histogram'
self.last = self.initialized

if vb and self.truth is None and self.quantiles is None
and self.histogram is None:
print 'Warning: initializing a PDF object without inputs'
self.difs = None
self.mids = None
Expand All @@ -32,7 +48,8 @@ def __init__(self, truth=None, quantiles=None, vb=True):

def evaluate(self, loc, vb=True):
"""
Evaluates the PDF (either the true version, or an approximation of it) at given location(s).
Evaluates the PDF (either the true version, or the most recent
approximation of it) at the given location(s).
Parameters
----------
Expand All @@ -53,11 +70,9 @@ def evaluate(self, loc, vb=True):
if self.truth is not None:
if vb: print('Evaluating the true distribution.')
val = self.truth.pdf(loc)
elif self.quantiles is not None:
if vb: print('Evaluating an interpolation of the quantile distribution.')
val = self.approximate(loc)[1]
else:
if vb: print('No representation provided for evaluation.')
if vb: print('Evaluating an interpolation of the '+self.last+' parametrization.')
val = self.approximate(loc, using=self.last)[1]

return(val)

Expand All @@ -83,13 +98,13 @@ def integrate(self, limits):

def quantize(self, percent=1., number=None, vb=True):
"""
Computes an array of evenly-spaced quantiles.
Computes an array of evenly-spaced quantiles from the truth.
Parameters
----------
percent: float
the separation of the requested quantiles, in percent
num_points: int
number: int
the number of quantiles to compute.
vb: boolean
report on progress to stdout?
Expand Down Expand Up @@ -123,16 +138,70 @@ def quantize(self, percent=1., number=None, vb=True):
self.quantiles = self.truth.ppf(points)
else:
print('New quantiles can only be computed from a truth distribution in this version.')
return

if vb: print("Result: ", self.quantiles)
self.last = 'quantiles'
return self.quantiles

def interpolate(self, vb=True):
def histogramize(self, binends=None, nbins=10, binrange=[0., 1.], vb=True):
"""
Constructs an `interpolator` function based on the quantiles.
Computes the histogram values from the truth.
Parameters
----------
binends: ndarray, float, optional
Array of N+1 endpoints of N bins
nbins: int, optional
Number of bins if no binends provided
range: tuple, float, optional
Pair of values of endpoints of total bin range
vb: boolean
Report on progress to stdout?
Returns
-------
self.histogram: tuple of ndarrays of floats
Pair of arrays of lengths (nbins+1, nbins) containing endpoints of bins and values in bins
Comments
--------
A histogram representation of a PDF is a popular approximate way to store it. This method computes some histogram bin heights from a truth distribution (other representations forthcoming)
and stores them in the `self.histogram` attribute.
Uses the `.cdf` method of the `rvs_continuous` distribution
object stored in `self.truth`. This calculates the CDF.
See `the Scipy docs <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.cdf.html#scipy.stats.rv_continuous.cdf>`_ for details.
"""
if binends is None:
step = float(binrange[1]-binrange[0])/nbins
binends = np.arange(binrange[0], binrange[1]+step, step)

nbins = len(binends)-1
self.histogram = np.zeros(nbins)
if vb: print("Calculating histogram: ", binends)
if self.truth is not None:
cdf = self.truth.cdf(binends)
for b in range(nbins):
self.histogram[b] = (cdf[b+1]-cdf[b])/(binends[b+1]-binends[b])
else:
print('New histograms can only be computed from a truth distribution in this version.')
return

self.histogram = (binends, self.histogram)
if vb: print("Result: ", self.histogram[1])
self.last = 'histogram'
return self.histogram

def interpolate(self, using=None, vb=True):
"""
Constructs an `interpolator` function based on the parametrization.
Parameters
----------
using: string
Parametrization on which to interpolate, currently supports 'quantiles', 'histogram'
vb: boolean
report on progress to stdout?
Expand All @@ -142,31 +211,47 @@ def interpolate(self, vb=True):
Notes
-----
The `self.interpolator` object is a function, that is used by the `approximate` method.
The `self.interpolator` object is a function that is used by the `approximate` method.
"""
# First find the quantiles if none exist:
if self.quantiles is None:
self.quantiles = self.quantize()
if using == 'truth':
print('The truth needs no interpolation. Try converting to an approximate parametrization first.')
return

self.difs = self.quantiles[1:]-self.quantiles[:-1]
self.mids = (self.quantiles[1:]+self.quantiles[:-1])/2.
self.quantvals = (1.0/(len(self.quantiles)+1))/self.difs
if using == 'quantiles':
# First find the quantiles if none exist:
if self.quantiles is None:
self.quantiles = self.quantize()

if vb: print("Creating interpolator")
self.interpolator = spi.interp1d(self.mids, self.quantvals, fill_value="extrapolate")
self.difs = self.quantiles[1:]-self.quantiles[:-1]
self.mids = (self.quantiles[1:]+self.quantiles[:-1])/2.
self.vals = (1.0/(len(self.quantiles)+1))/self.difs

if using == 'histogram':
# First find the histogram if none exists:
if self.histogram is None:
self.histogram = self.histogramize()

self.mids = (self.histogram[0][1:]+self.histogram[0][:-1])/2.
self.vals = self.histogram[1]

if vb: print("Creating interpolator for "+using+' parametrization.')
self.interpolator = spi.interp1d(self.mids, self.vals, fill_value="extrapolate")

return

def approximate(self, points, vb=True):
def approximate(self, points, using=None, vb=True):
"""
Interpolates between the quantiles to get an approximation to the density.
Interpolates the parametrization to get an approximation to the density.
Parameters
----------
number: int
the number of points over which to interpolate, bounded by the quantile value endpoints
points: ndarray
the value(s) at which to evaluate the interpolated function
using: string
approximation parametrization, currently either 'quantiles'
or 'histogram'
vb: boolean
report on progress to stdout?
Expand All @@ -186,9 +271,8 @@ def approximate(self, points, vb=True):
x, y = p.approximate(np.linspace(-1., 1., 100))
"""

# First construct interpolator function if it does not already exist.
if self.interpolator is None:
self.interpolate()
self.interpolate(using=using)
interpolated = self.interpolator(points)
interpolated[interpolated<0.] = 0.

Expand Down Expand Up @@ -216,12 +300,16 @@ def plot(self, limits, points=None):
plt.plot(x, self.truth.pdf(x), color='k', linestyle='-', lw=1.0, alpha=1.0, label='True PDF')

if self.quantiles is not None:
y = [0., 1.]
plt.vlines(self.quantiles, y[0], y[1], color='k', linestyle='--', lw=1.0, alpha=1., label='Quantiles')

if points is not None:
(grid, interpolated) = self.approximate(points)
plt.plot(grid, interpolated, color='r', linestyle=':', lw=2.0, alpha=1.0, label='Interpolated PDF')
plt.vlines(self.quantiles, 0., 1., color='b', linestyle='--', lw=1.0, alpha=1., label='Quantiles')
if points is not None:
(grid, qinterpolated) = self.approximate(points, using='quantiles')
plt.plot(grid, qinterpolated, color='b', linestyle=':', lw=2.0, alpha=1.0, label='Quantile Interpolated PDF')

if self.histogram is not None:
plt.hlines(self.histogram[1], self.histogram[0][:-1], self.histogram[0][1:], color='r', linestyle='--', lw=1.0, alpha=1., label='Histogram')
if points is not None:
(grid, hinterpolated) = self.approximate(points, using='histogram')
plt.plot(grid, hinterpolated, color='r', linestyle=':', lw=2.0, alpha=1.0, label='Histogram Interpolated PDF')

plt.legend()
plt.xlabel('x')
Expand Down

0 comments on commit 93d4249

Please sign in to comment.