diff --git a/README.md b/README.md
index f05de50..d4160fd 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-
bi_multi_variate_eva
+:bar_chart: bi_multi_variate_eva :chart_with_upwards_trend:
[![Downloads](https://img.shields.io/github/downloads/arfogg/bi_multi_variate_eva/total.svg)](#)
[![GitHub release](https://img.shields.io/github/v/release/arfogg/bi_multi_variate_eva)](#)
@@ -17,9 +17,6 @@ Python package to run bivariate and multivariate extreme value analysis on gener
**Support:** please [create an issue](https://github.com/arfogg/bi_multi_variate_eva/issues) or contact [arfogg](https://github.com/arfogg) directly. Any input on the code / issues found are greatly appreciated and will help to improve the software.
-## Ongoing tasks
-- [ ] Include Dáire acknowledgement statement
-
## Table of Contents
- [Required Packages](#required-packages)
- [Installing the code](#installing-the-code)
diff --git a/src/bivariate/determine_AD_AI.py b/src/bivariate/determine_AD_AI.py
index 4dedfb8..5d092bb 100644
--- a/src/bivariate/determine_AD_AI.py
+++ b/src/bivariate/determine_AD_AI.py
@@ -160,7 +160,7 @@ def plot_extremal_dependence_coefficient(x_data, y_data, x_bs_um, y_bs_um,
# Formatting
ax_edc.set_xlabel("Quantiles", fontsize=fontsize)
- ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi$",
+ ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi_{u}$",
fontsize=fontsize)
for label in (ax_edc.get_xticklabels() + ax_edc.get_yticklabels()):
label.set_fontsize(fontsize)
diff --git a/src/bivariate/gevd_fitter.py b/src/bivariate/gevd_fitter.py
index 4dbd241..a0cc430 100644
--- a/src/bivariate/gevd_fitter.py
+++ b/src/bivariate/gevd_fitter.py
@@ -21,7 +21,8 @@ class gevd_fitter():
on input extrema.
"""
- def __init__(self, extremes, dist=None, fit_guess={}):
+ def __init__(self, extremes, dist=None, fit_guess={},
+ shape_threshold=0.005):
"""
Initialise the gevd_fitter class. Fits a GEVD or
Gumbel distribution.
@@ -40,6 +41,10 @@ def __init__(self, extremes, dist=None, fit_guess={}):
for fitting the distribution. Keys 'c' for shape,
'scale', and 'loc' for location. The default is
{}.
+ shape_threshold : float, optional
+ A genextreme distribution is fitted. If the absolute value of
+ the resulting shape parameter is less than or equal to this value,
+ a gumbel_r distribution is returned instead.
Returns
-------
@@ -68,7 +73,7 @@ def __init__(self, extremes, dist=None, fit_guess={}):
'scale': self.scale,
'loc': self.location}
- def fit_model(self, dist=None, fit_guess={}):
+ def fit_model(self, dist=None, fit_guess={}, shape_threshold=0.005):
"""
Fit a GEVD or Gumbel to the parsed
extrema.
@@ -83,8 +88,11 @@ def fit_model(self, dist=None, fit_guess={}):
fit_guess : dictionary, optional
Dictionary containing guess initial parameters
for fitting the distribution. Keys 'c' for shape,
- 'scale', and 'loc' for location. The default is
- {}.
+ 'scale', and 'loc' for location. The default is {}.
+ shape_threshold : float, optional
+ A genextreme distribution is fitted. If the absolute value of
+ the resulting shape parameter is less than or equal to this value,
+ a gumbel_r distribution is returned instead.
Returns
-------
@@ -114,6 +122,7 @@ def fit_model(self, dist=None, fit_guess={}):
elif self.distribution_name == 'gumbel_r':
fitted_params = self.distribution.fit(self.extremes,
**fit_guess)
+
# Freeze the fitted model
self.frozen_dist = self.distribution(*fitted_params)
@@ -133,14 +142,17 @@ def fit_model(self, dist=None, fit_guess={}):
# Assign AIC
self.aic = self.akaike_info_criterion(self.extremes, self.frozen_dist)
- def select_distribution(self):
+ def select_distribution(self, shape_threshold=0.005):
"""
Choose the best fitting distribution
based on which has the lowest AIC.
Parameters
----------
- None.
+ shape_threshold : float, optional
+ A genextreme distribution is fitted. If the absolute value of
+ the resulting shape parameter is less than or equal to this value,
+ a gumbel_r distribution is returned instead.
Returns
-------
@@ -148,28 +160,18 @@ def select_distribution(self):
"""
- # Define loop lists
- distributions = [genextreme, gumbel_r]
- names = ['genextreme', 'gumbel_r']
- aic_arr = []
-
- for dist in distributions:
- # Fit the model
- params = dist.fit(self.extremes)
+ # Fit GEVD, and see what the shape value is
+ shape_, location, scale = genextreme.fit(self.extremes)
- # Freeze distribution
- frozen = dist(*params)
-
- # Calculate AIC
- aic = self.akaike_info_criterion(self.extremes, frozen)
- aic_arr.append(aic)
-
- # Find model with lowest AIC
- min_index, = np.where(aic_arr == np.min(aic_arr))
-
- # Assign the selected distribution to the class
- self.distribution_name = names[min_index[0]]
- self.distribution = distributions[min_index[0]]
+ # Assess the magnitude of the shape parameter
+ if abs(shape_) > shape_threshold:
+ # Shape is large enough, genextreme is returned
+ self.distribution_name = 'genextreme'
+ self.distribution = genextreme
+ else:
+ # Shape is small, so a Gumbel is likely a better fit
+ self.distribution_name = 'gumbel_r'
+ self.distribution = gumbel_r
def akaike_info_criterion(self, data, model):
"""
diff --git a/src/bivariate/return_period_plot_1d.py b/src/bivariate/return_period_plot_1d.py
index 8d26fa6..ccd4205 100644
--- a/src/bivariate/return_period_plot_1d.py
+++ b/src/bivariate/return_period_plot_1d.py
@@ -272,11 +272,10 @@ def calculate_return_period_empirical(data, block_size):
Function to calculate the return period of provided
extrema based on exceedance probability.
- Pr_exceedance = 1 - (rank / (n + 1) )
- rank - the ranking of the ordered data
- n - number of data points
-
- tau = 1 / (Pr_exceedance * n_ex_per_year)
+ tau = ( (N + 1) / rank ) / n
+ rank - rank of data in descending order
+ N - number of data points
+ n - number of blocks per year
Parameters
----------
@@ -293,17 +292,14 @@ def calculate_return_period_empirical(data, block_size):
"""
- # Calculate the number of extrema per year based on block_size
- extrema_per_year = pd.to_timedelta("365.2425D")/block_size
+ # Calculate the number of blocks per year based on block_size
+ n = pd.to_timedelta("365.2425D")/block_size
# Rank the data
- rank = stats.rankdata(data)
-
- # Calculate exceedance probability
- exceedance_prob = 1. - ((rank)/(data.size + 1.0))
+ rank = (len(data) - stats.rankdata(data)) + 1
# Calculate Return Period
- tau = (1.0/(exceedance_prob*extrema_per_year))
+ tau = ((len(data) + 1) / (rank)) / n
return tau