Skip to content

Commit

Permalink
Merge branch 'release/0.1.7'
Browse files Browse the repository at this point in the history
  • Loading branch information
morganjwilliams committed Mar 26, 2019
2 parents 5c64e48 + d203897 commit 047e983
Show file tree
Hide file tree
Showing 13 changed files with 261 additions and 99 deletions.
1 change: 0 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ omit =
# Omit extensions with extra dependencies
pyrolite/util/wfs.py
pyrolite/util/database.py
pyrolite/util/skl.py

# Ignore _version.py
*/_version.py
44 changes: 42 additions & 2 deletions docs/examples/comp/log_transformations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,46 @@
import numpy as np
import pandas as pd
from pyrolite.comp.codata import ilr, inverse_ilr, close

from pyrolite.compositions import *
np.random.seed(82)

np.random.seed(110)

def random_compositional_trend(m1, m2, c1, c2, resolution=20, size=1000):
# generate means intermediate between m1 and m2
mv = np.vstack([ilr(close(m1)).reshape(1, -1), ilr(close(m2)).reshape(1, -1)])
ms = np.apply_along_axis(lambda x: np.linspace(*x, resolution), 0, mv)
# generate covariance matricies intermediate between c1 and c2
cv = np.vstack([c1.reshape(1, -1), c2.reshape(1, -1)])
cs = np.apply_along_axis(lambda x: np.linspace(*x, resolution), 0, cv)
cs = cs.reshape(cs.shape[0], *c1.shape)
# generate samples from each
samples = np.vstack(
[
np.random.multivariate_normal(m.flatten(), cs[ix], size=size // resolution)
for ix, m in enumerate(ms)
]
)
# combine together.
return inverse_ilr(samples)


m1, m2 = np.array([[0.3, 0.1, 2.1]]), np.array([[0.5, 2.5, 0.05]])
c1, c2 = np.eye(2) / 100, np.eye(2) / 100

trend = pd.DataFrame(
random_compositional_trend(m1, m2, c1, c2, resolution=100, size=5000)
)
ax = trend.pyroplot.density(mode="density", bins=100)
ax.tax.scatter(
inverse_ilr(np.nanmean(ilr(trend.values), axis=0)[np.newaxis, :]) * 100,
marker="D",
color="k",
label="LogMean",
)

ax.tax.scatter(
close(np.nanmean(trend.values, axis=0))[np.newaxis, :] * 100,
marker="o",
color="r",
label="LogMean",
)
Binary file modified docs/source/_static/spider_modes.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
42 changes: 40 additions & 2 deletions pyrolite/plot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
logging.getLogger(__name__).addHandler(logging.NullHandler())
logger = logging.getLogger(__name__)

from ..util.plot import plot_cooccurence
from ..util.pd import to_frame
from ..util.meta import get_additional_params
from ..geochem import common_elements, REE
Expand All @@ -19,6 +20,7 @@
import pandas as pd


# note that only some of these methods will be valid for series
@pd.api.extensions.register_series_accessor("pyroplot")
@pd.api.extensions.register_dataframe_accessor("pyroplot")
class pyroplot(object):
Expand Down Expand Up @@ -224,11 +226,47 @@ def REE(self, index="radii", ax=None, mode="plot", **kwargs):
obj = to_frame(self._obj)
ree = REE()

reedata = obj.loc[:, ree].values
ax = spider.REE_v_radii(reedata, ree=ree, mode=mode, ax=ax, **kwargs)
ax = spider.REE_v_radii(
obj.loc[:, ree].values, ree=ree, mode=mode, ax=ax, **kwargs
)
ax.set_ylabel(" $\mathrm{X / X_{Reference}}$")
return ax

def cooccurence(
self,
ax=None,
normalize=True,
log=False,
colorbar=False,
**kwargs
):
"""
Plot the co-occurence frequency matrix for a given input.
Parameters
-----------
ax : :class:`matplotlib.axes.Axes`, :code:`None`
The subplot to draw on.
normalize : :class:`bool`
Whether to normalize the cooccurence to compare disparate variables.
log : :class:`bool`
Whether to take the log of the cooccurence.
colorbar : :class:`bool`
Whether to append a colorbar.
Returns
--------
:class:`matplotlib.axes.Axes`
Axes on which the cooccurence plot is added.
"""
obj = to_frame(self._obj)
ax = plot_cooccurence(
obj.values, ax=ax, normalize=normalize, log=log, colorbar=colorbar, **kwargs
)
ax.set_xticklabels(obj.columns, minor=False, rotation=90)
ax.set_yticklabels(obj.columns, minor=False)
return ax


# ideally we would i) check for the same params and ii) aggregate all others across
# inherited or chained functions. This simply imports the params from another docstring
Expand Down
6 changes: 3 additions & 3 deletions pyrolite/util/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def flattengrid(grid):
--------
:class:`numpy.ndarray`
"""
return np.vstack([g.flatten() for g in grid])
return np.vstack([g.flatten() for g in grid]).T


def linspc_(_min, _max, step=0.0, bins=20):
Expand Down Expand Up @@ -191,8 +191,8 @@ def logspc_(_min, _max, step=1.0, bins=20):
:class:`numpy.ndarray`
Log-spaced array.
"""
if step < 1:
step = 1/step
if step < 1.:
step = 1./step
return np.logspace(np.log(_min / step), np.log(_max * step), bins, base=np.e)


Expand Down
34 changes: 34 additions & 0 deletions pyrolite/util/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,37 @@ def md_pattern(Y):
for ID in np.unique(pID).astype(int):
pD[ID]["freq"] = np.sum(pID == ID)
return pID, pD


def cooccurence_pattern(Y, normalize=False, log=False):
"""
Get the co-occurence patterns from an array.
Parameters
------------
Y : :class:`numpy.ndarray`
Input dataset.
normalize : :class:`bool`
Whether to normalize the cooccurence to compare disparate variables.
log : :class:`bool`
Whether to take the log of the cooccurence.
Returns
---------
co_occur : :class:`numpy.ndarray`
Cooccurence frequency array.
"""
_Y = Y.copy()
_Y[~np.isfinite(_Y)] = 0
_Y[_Y > 0] = 1
_Y = _Y.astype(int)
co_occur = _Y.T @ _Y
d = co_occur.shape[0]
if normalize:
diags = np.diagonal(co_occur)
for i in range(d):
for j in range(d):
co_occur[i, j] = co_occur[i, j] / np.max([diags[i], diags[j]])
if log:
co_occur = np.log(co_occur)
return co_occur
58 changes: 48 additions & 10 deletions pyrolite/util/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
linspc_,
logspc_,
)
from ..util.missing import cooccurence_pattern
from ..comp.codata import close, alr, ilr, clr, inverse_alr, inverse_clr, inverse_ilr

logging.getLogger(__name__).addHandler(logging.NullHandler())
Expand Down Expand Up @@ -139,8 +140,9 @@ def bin_centres_to_edges(centres):
------
* This can be updated to unevenly spaced bins, just need to calculate outer bins.
"""
step = (centres[1] - centres[0]) / 2
return np.append(centres - step, centres[-1] + step)
sortcentres = np.sort(centres.flatten())
step = (sortcentres[1] - sortcentres[0]) / 2.0
return np.append(sortcentres - step, [sortcentres[-1] + step])


def bin_edges_to_centres(edges):
Expand Down Expand Up @@ -411,7 +413,6 @@ def conditional_prob_density(
xy = interpolate_line(xy, n=resolution, logy=logy)
x, y = np.swapaxes(xy, 0, 1)


xx = np.sort(x[0])
ymin, ymax = np.nanmin(y), np.nanmax(y)
ystep = [(ymax - ymin) / ybins, (ymax / ymin) / ybins][logy]
Expand All @@ -431,7 +432,7 @@ def conditional_prob_density(
else:
raise ImportError("Requires statsmodels.")
# statsmodels pdf takes values in reverse order
zi = dens_c.pdf(*flattengrid([yi, xi])).reshape(xi.shape)
zi = dens_c.pdf(yi.flatten(), xi.flatten()).reshape(xi.shape)
elif mode == "kde": # kde of dataset
try:
kde = gaussian_kde(np.vstack([x.flatten(), y.flatten()]))
Expand All @@ -441,18 +442,21 @@ def conditional_prob_density(
kde = gaussian_kde(flattengrid([x, y]).T)

xkde = gaussian_kde(x[0])(x[0]) # marginal density along x
zi = kde(flattengrid([xi, yi])).T.reshape(xi.shape) / xkde[np.newaxis, :]
zi = kde(flattengrid([xi, yi]).T).reshape(xi.shape) / xkde[np.newaxis, :]
elif mode == "binkde": # calclate a kde per bin
zi = np.zeros(xi.shape)
for bin in range(x.shape[1]):
kde = gaussian_kde(y[:, bin])
zi[:, bin] = kde(yi[:, bin])
elif "hist" in mode.lower(): # simply compute the histogram
# histogram monotonically increasing bins
H, hedges = np.histogramdd(
flattengrid([x, y]).T,
bins=[bin_centres_to_edges(xx), bin_centres_to_edges(yy)],
)
# histogram monotonically increasing bins, requires logbins be transformed
# calculate histogram in logy if needed
if logy:
y, yy = np.log(y), np.log(yy)
bins = [bin_centres_to_edges(xx), bin_centres_to_edges(yy)]
H, xe, ye = np.histogram2d(x.flatten(), y.flatten(), bins=bins)
if logy:
y, yy, yedges = np.exp(y), np.exp(yy), np.exp(ye)
zi = H.T.reshape(xi.shape)
else:
raise NotImplementedError
Expand Down Expand Up @@ -743,6 +747,40 @@ def plot_Z_percentiles(
return cs


def plot_cooccurence(arr, ax=None, normalize=True, log=False, colorbar=False, **kwargs):
"""
Plot the co-occurence frequency matrix for a given input.
Parameters
-----------
ax : :class:`matplotlib.axes.Axes`, :code:`None`
The subplot to draw on.
normalize : :class:`bool`
Whether to normalize the cooccurence to compare disparate variables.
log : :class:`bool`
Whether to take the log of the cooccurence.
colorbar : :class:`bool`
Whether to append a colorbar.
Returns
--------
:class:`matplotlib.axes.Axes`
Axes on which the cooccurence plot is added.
"""
arr = np.array(arr)
if ax is None:
fig, ax = plt.subplots(1, figsize=(4 + [0.0, 0.2][colorbar], 4))
co_occur = cooccurence_pattern(arr, normalize=normalize, log=log)
heatmap = ax.pcolor(co_occur, **kwargs)
ax.set_yticks(np.arange(co_occur.shape[0]) + 0.5, minor=False)
ax.set_xticks(np.arange(co_occur.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
if colorbar:
add_colorbar(heatmap, **kwargs)
return ax


def nan_scatter(xdata, ydata, ax=None, axes_width=0.2, **kwargs):
"""
Scatter plot with additional marginal axes to plot data for which data is partially
Expand Down
31 changes: 0 additions & 31 deletions pyrolite/util/skl.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,37 +188,6 @@ def plot_gs_results(gs, xvar=None, yvar=None):
return ax


def plot_cooccurence(
df,
ax=None,
normalize=True,
log=False,
norm=mplc.Normalize(vmin=0, vmax=1.0),
**kwargs
):
if ax is None:
fig, ax = plt.subplots(1, figsize=(4.2, 4))
co_occur = df.fillna(0)
co_occur[co_occur > 0] = 1
co_occur = co_occur.T.dot(co_occur).astype(int)
if normalize:
diags = np.diagonal(co_occur)
for i in range(diags.shape[0]):
for j in range(diags.shape[0]):
co_occur.iloc[i, j] = co_occur.iloc[i, j] / np.max([diags[i], diags[j]])
if log:
co_occur = co_occur.applymap(np.log)
heatmap = ax.pcolor(co_occur, norm=norm, **kwargs)
ax.set_yticks(np.arange(co_occur.shape[0]) + 0.5, minor=False)
ax.set_xticks(np.arange(co_occur.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
ax.set_xticklabels(df.columns, minor=False, rotation=90)
ax.set_yticklabels(df.columns, minor=False)
add_colorbar(heatmap)
return ax


class DropBelowZero(BaseEstimator, TransformerMixin):
"""
Transformer for scikit-learn like use.
Expand Down
5 changes: 5 additions & 0 deletions pyrolite/util/synthetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,14 @@ def random_cov_matrix(dim, validate=False):
--------
:class:`numpy.ndarray`
Covariance matrix of shape :code:`(dim, dim)`.
Todo
-----
* Implement a characteristic scale for the covariance matrix.
"""
cov = np.random.randn(dim, dim)
cov = np.dot(cov, cov.T)

if validate:
try:
assert (cov == cov.T).all()
Expand Down
22 changes: 22 additions & 0 deletions test/plot/plot_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,28 @@ def test_density_with_more_components_specified(self):
def test_density_with_more_components_specified_ternary(self):
self.multidf.pyroplot.density(components=self.multidf.columns[:3])

def test_cooccurence_default(self):
self.multidf.pyroplot.cooccurence()

def test_cooccurence_normalize(self):
for normalize in [True, False]:
with self.subTest(normalize=normalize):
self.multidf.pyroplot.cooccurence(normalize=normalize)

def test_cooccurence_log(self):
for log in [True, False]:
with self.subTest(log=log):
self.multidf.pyroplot.cooccurence(log=log)

def test_cooccurence_colorbar(self):
for colorbar in [True, False]:
with self.subTest(colorbar=colorbar):
self.multidf.pyroplot.cooccurence(colorbar=colorbar)

def test_cooccurencet_external_ax(self):
fig, ax = plt.subplots(1)
self.multidf.pyroplot.cooccurence(ax=ax)

def tearDown(self):
plt.close("all")

Expand Down
Loading

0 comments on commit 047e983

Please sign in to comment.