diff --git a/README.md b/README.md
index f05de50..d4160fd 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-
bi_multi_variate_eva
+:bar_chart: bi_multi_variate_eva :chart_with_upwards_trend:
[![Downloads](https://img.shields.io/github/downloads/arfogg/bi_multi_variate_eva/total.svg)](#)
[![GitHub release](https://img.shields.io/github/v/release/arfogg/bi_multi_variate_eva)](#)
@@ -17,9 +17,6 @@ Python package to run bivariate and multivariate extreme value analysis on gener
**Support:** please [create an issue](https://github.com/arfogg/bi_multi_variate_eva/issues) or contact [arfogg](https://github.com/arfogg) directly. Any input on the code / issues found are greatly appreciated and will help to improve the software.
-## Ongoing tasks
-- [ ] Include Dáire acknowledgement statement
-
## Table of Contents
- [Required Packages](#required-packages)
- [Installing the code](#installing-the-code)
diff --git a/src/bivariate/calculate_return_periods_values.py b/src/bivariate/calculate_return_periods_values.py
index e833b31..1ea17df 100644
--- a/src/bivariate/calculate_return_periods_values.py
+++ b/src/bivariate/calculate_return_periods_values.py
@@ -25,9 +25,8 @@ def calculate_return_period(copula, sample_grid,
----------
copula : copulas copula
Copula that has been fit to some data
- sample_grid : pd.DataFrame
- Two columns with names same as copula, containing x and y values
- to find the return period for.
+ sample_grid : np.array
+ Grid containing x and y values to find the return period for.
block_size : pd.Timedelta, optional
Size over which block maxima have been found. The default
is pd.to_timedelta("365.2425D").
@@ -319,7 +318,7 @@ def plot_return_period_as_function_x_y(copula,
ci_percentiles=ci_percentiles)
# Initialise plot
- fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(24, 6))
+ fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(9, 21))
# ----- RETURN PERIOD -----
rp_cbar_norm = colors.LogNorm(vmin=np.quantile(shaped_return_period, 0.1),
diff --git a/src/bivariate/determine_AD_AI.py b/src/bivariate/determine_AD_AI.py
index 4dedfb8..5d092bb 100644
--- a/src/bivariate/determine_AD_AI.py
+++ b/src/bivariate/determine_AD_AI.py
@@ -160,7 +160,7 @@ def plot_extremal_dependence_coefficient(x_data, y_data, x_bs_um, y_bs_um,
# Formatting
ax_edc.set_xlabel("Quantiles", fontsize=fontsize)
- ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi$",
+ ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi_{u}$",
fontsize=fontsize)
for label in (ax_edc.get_xticklabels() + ax_edc.get_yticklabels()):
label.set_fontsize(fontsize)
diff --git a/src/bivariate/fit_copula_to_extremes.py b/src/bivariate/fit_copula_to_extremes.py
index 03b2bec..5681cdc 100644
--- a/src/bivariate/fit_copula_to_extremes.py
+++ b/src/bivariate/fit_copula_to_extremes.py
@@ -153,3 +153,56 @@ def qualitative_copula_fit_check_bivariate(x_extremes, y_extremes,
ax[2].set_ylim(ax[0].get_ylim())
return fig, ax
+
+
+def qualitative_copula_fit_check_bivariate_scatter(x_extremes, y_extremes,
+ x_sample, y_sample,
+ x_name, y_name):
+ """
+ Function to do a qualitative diagnostic plot for copula fit.
+
+ Parameters
+ ----------
+ x_extremes : np.array or pd.Series
+ Observed x extremes (magnitude).
+ y_extremes : np.array or pd.Series
+ Observed y extremes (magnitude).
+ x_sample : np.array or pd.Series
+ Random copula sample (x) in data scale.
+ y_sample : np.array or pd.Series
+ Random copula sample (y) in data scale.
+ x_name : string
+ Name for x.
+ y_name : string
+ Name for y.
+
+ Returns
+ -------
+ fig : matplotlib figure
+ Diagnostic figure.
+ ax : array of matplotlib axes
+ Axes from fig.
+
+ """
+
+ fontsize = 15
+ fig, ax = plt.subplots(figsize=(10, 10))
+
+ # Both both sets of component-wise maxima as a scatter plot
+ ax.plot(x_extremes, y_extremes, marker='^', color="indigo",
+ label=str(round(len(x_extremes))) + ' Observations',
+ fillstyle='none', linewidth=0., markersize=fontsize)
+ ax.plot(x_sample, y_sample, marker='*', color="darkgoldenrod",
+ label=str(round(len(x_sample))) + " Copula Samples",
+ fillstyle='none', linewidth=0., markersize=fontsize)
+
+ # Some decor
+ ax.set_xlabel(x_name, fontsize=fontsize)
+ ax.set_ylabel(y_name, fontsize=fontsize)
+ for label in (ax.get_xticklabels() + ax.get_yticklabels()):
+ label.set_fontsize(fontsize)
+
+ ax.set_title('Component-wise maxima', fontsize=fontsize)
+ ax.legend(fontsize=fontsize)
+
+ return fig, ax
diff --git a/src/bivariate/gevd_fitter.py b/src/bivariate/gevd_fitter.py
index 4dbd241..a0cc430 100644
--- a/src/bivariate/gevd_fitter.py
+++ b/src/bivariate/gevd_fitter.py
@@ -21,7 +21,8 @@ class gevd_fitter():
on input extrema.
"""
- def __init__(self, extremes, dist=None, fit_guess={}):
+ def __init__(self, extremes, dist=None, fit_guess={},
+ shape_threshold=0.005):
"""
Initialise the gevd_fitter class. Fits a GEVD or
Gumbel distribution.
@@ -40,6 +41,10 @@ def __init__(self, extremes, dist=None, fit_guess={}):
for fitting the distribution. Keys 'c' for shape,
'scale', and 'loc' for location. The default is
{}.
+ shape_threshold : float, optional
+ A genextreme distribution is fitted. If the absolute value of
+ the resulting shape parameter is less than or equal to this value,
+ a gumbel_r distribution is returned instead.
Returns
-------
@@ -68,7 +73,7 @@ def __init__(self, extremes, dist=None, fit_guess={}):
'scale': self.scale,
'loc': self.location}
- def fit_model(self, dist=None, fit_guess={}):
+ def fit_model(self, dist=None, fit_guess={}, shape_threshold=0.005):
"""
Fit a GEVD or Gumbel to the parsed
extrema.
@@ -83,8 +88,11 @@ def fit_model(self, dist=None, fit_guess={}):
fit_guess : dictionary, optional
Dictionary containing guess initial parameters
for fitting the distribution. Keys 'c' for shape,
- 'scale', and 'loc' for location. The default is
- {}.
+ 'scale', and 'loc' for location. The default is {}.
+ shape_threshold : float, optional
+ A genextreme distribution is fitted. If the absolute value of
+ the resulting shape parameter is less than or equal to this value,
+ a gumbel_r distribution is returned instead.
Returns
-------
@@ -114,6 +122,7 @@ def fit_model(self, dist=None, fit_guess={}):
elif self.distribution_name == 'gumbel_r':
fitted_params = self.distribution.fit(self.extremes,
**fit_guess)
+
# Freeze the fitted model
self.frozen_dist = self.distribution(*fitted_params)
@@ -133,14 +142,17 @@ def fit_model(self, dist=None, fit_guess={}):
# Assign AIC
self.aic = self.akaike_info_criterion(self.extremes, self.frozen_dist)
- def select_distribution(self):
+ def select_distribution(self, shape_threshold=0.005):
"""
Choose the best fitting distribution
based on which has the lowest AIC.
Parameters
----------
- None.
+ shape_threshold : float, optional
+ A genextreme distribution is fitted. If the absolute value of
+ the resulting shape parameter is less than or equal to this value,
+ a gumbel_r distribution is returned instead.
Returns
-------
@@ -148,28 +160,18 @@ def select_distribution(self):
"""
- # Define loop lists
- distributions = [genextreme, gumbel_r]
- names = ['genextreme', 'gumbel_r']
- aic_arr = []
-
- for dist in distributions:
- # Fit the model
- params = dist.fit(self.extremes)
+ # Fit GEVD, and see what the shape value is
+ shape_, location, scale = genextreme.fit(self.extremes)
- # Freeze distribution
- frozen = dist(*params)
-
- # Calculate AIC
- aic = self.akaike_info_criterion(self.extremes, frozen)
- aic_arr.append(aic)
-
- # Find model with lowest AIC
- min_index, = np.where(aic_arr == np.min(aic_arr))
-
- # Assign the selected distribution to the class
- self.distribution_name = names[min_index[0]]
- self.distribution = distributions[min_index[0]]
+ # Assess the magnitude of the shape parameter
+ if abs(shape_) > shape_threshold:
+ # Shape is large enough, genextreme is returned
+ self.distribution_name = 'genextreme'
+ self.distribution = genextreme
+ else:
+ # Shape is small, so a Gumbel is likely a better fit
+ self.distribution_name = 'gumbel_r'
+ self.distribution = gumbel_r
def akaike_info_criterion(self, data, model):
"""
diff --git a/src/bivariate/return_period_plot_1d.py b/src/bivariate/return_period_plot_1d.py
index cae9253..ccd4205 100644
--- a/src/bivariate/return_period_plot_1d.py
+++ b/src/bivariate/return_period_plot_1d.py
@@ -138,7 +138,9 @@ def return_period_plot(gevd_fitter, bootstrap_gevd_fit, block_size, data_tag,
' observed at least once\nper return period (' +
str(data_units_fm) + ')')
ax.set_xscale('log')
- ax.legend(loc='lower right')
+ # Best possible legend position, limiting to bottom 85% vertically to
+ # leave room for axes labels
+ ax.legend(loc='best', bbox_to_anchor=(0, 0, 1, 0.85))
ax.set_title('Return Period Plot')
return ax
@@ -270,11 +272,10 @@ def calculate_return_period_empirical(data, block_size):
Function to calculate the return period of provided
extrema based on exceedance probability.
- Pr_exceedance = 1 - (rank / (n + 1) )
- rank - the ranking of the ordered data
- n - number of data points
-
- tau = 1 / (Pr_exceedance * n_ex_per_year)
+ tau = ( (N + 1) / rank ) / n
+ rank - rank of data in descending order
+ N - number of data points
+ n - number of blocks per year
Parameters
----------
@@ -291,17 +292,14 @@ def calculate_return_period_empirical(data, block_size):
"""
- # Calculate the number of extrema per year based on block_size
- extrema_per_year = pd.to_timedelta("365.2425D")/block_size
+ # Calculate the number of blocks per year based on block_size
+ n = pd.to_timedelta("365.2425D")/block_size
# Rank the data
- rank = stats.rankdata(data)
-
- # Calculate exceedance probability
- exceedance_prob = 1. - ((rank)/(data.size + 1.0))
+ rank = (len(data) - stats.rankdata(data)) + 1
# Calculate Return Period
- tau = (1.0/(exceedance_prob*extrema_per_year))
+ tau = ((len(data) + 1) / (rank)) / n
return tau
diff --git a/src/bivariate/transform_uniform_margins.py b/src/bivariate/transform_uniform_margins.py
index e18d5c3..a63cd82 100644
--- a/src/bivariate/transform_uniform_margins.py
+++ b/src/bivariate/transform_uniform_margins.py
@@ -394,8 +394,8 @@ def plot_diagnostic(gevd_fitter, bootstrap_gevd_fit, data_tag, data_units_fm,
data_units_fm, bootstrap_gevd_fit)
# Some decor
- t = ax[1, 2].text(0.03, 0.90, '(f)', transform=ax[1, 2].transAxes,
- va='top', ha='left')
+ t = ax[1, 2].text(0.03, 0.96, '(f)', transform=ax[1, 2].transAxes,
+ va='bottom', ha='left')
t.set_bbox(dict(facecolor='white', alpha=0.5, edgecolor='grey'))
fig.tight_layout()