From 5254684b53f79ced4dd3b9b691a35e665735b9f4 Mon Sep 17 00:00:00 2001 From: Alexandra Ruth Fogg Date: Wed, 28 Aug 2024 15:32:37 +0100 Subject: [PATCH] collaborator comments and README edits --- README.md | 5 +-- src/bivariate/determine_AD_AI.py | 2 +- src/bivariate/gevd_fitter.py | 56 +++++++++++++------------- src/bivariate/return_period_plot_1d.py | 20 ++++----- 4 files changed, 39 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index f05de50..d4160fd 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

bi_multi_variate_eva

+

:bar_chart: bi_multi_variate_eva :chart_with_upwards_trend:

[![Downloads](https://img.shields.io/github/downloads/arfogg/bi_multi_variate_eva/total.svg)](#) [![GitHub release](https://img.shields.io/github/v/release/arfogg/bi_multi_variate_eva)](#) @@ -17,9 +17,6 @@ Python package to run bivariate and multivariate extreme value analysis on gener **Support:** please [create an issue](https://github.com/arfogg/bi_multi_variate_eva/issues) or contact [arfogg](https://github.com/arfogg) directly. Any input on the code / issues found are greatly appreciated and will help to improve the software. -## Ongoing tasks -- [ ] Include Dáire acknowledgement statement - ## Table of Contents - [Required Packages](#required-packages) - [Installing the code](#installing-the-code) diff --git a/src/bivariate/determine_AD_AI.py b/src/bivariate/determine_AD_AI.py index 4dedfb8..5d092bb 100644 --- a/src/bivariate/determine_AD_AI.py +++ b/src/bivariate/determine_AD_AI.py @@ -160,7 +160,7 @@ def plot_extremal_dependence_coefficient(x_data, y_data, x_bs_um, y_bs_um, # Formatting ax_edc.set_xlabel("Quantiles", fontsize=fontsize) - ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi$", + ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi_{u}$", fontsize=fontsize) for label in (ax_edc.get_xticklabels() + ax_edc.get_yticklabels()): label.set_fontsize(fontsize) diff --git a/src/bivariate/gevd_fitter.py b/src/bivariate/gevd_fitter.py index 4dbd241..a0cc430 100644 --- a/src/bivariate/gevd_fitter.py +++ b/src/bivariate/gevd_fitter.py @@ -21,7 +21,8 @@ class gevd_fitter(): on input extrema. """ - def __init__(self, extremes, dist=None, fit_guess={}): + def __init__(self, extremes, dist=None, fit_guess={}, + shape_threshold=0.005): """ Initialise the gevd_fitter class. Fits a GEVD or Gumbel distribution. @@ -40,6 +41,10 @@ def __init__(self, extremes, dist=None, fit_guess={}): for fitting the distribution. Keys 'c' for shape, 'scale', and 'loc' for location. The default is {}. + shape_threshold : float, optional + A genextreme distribution is fitted. If the absolute value of + the resulting shape parameter is less than or equal to this value, + a gumbel_r distribution is returned instead. Returns ------- @@ -68,7 +73,7 @@ def __init__(self, extremes, dist=None, fit_guess={}): 'scale': self.scale, 'loc': self.location} - def fit_model(self, dist=None, fit_guess={}): + def fit_model(self, dist=None, fit_guess={}, shape_threshold=0.005): """ Fit a GEVD or Gumbel to the parsed extrema. @@ -83,8 +88,11 @@ def fit_model(self, dist=None, fit_guess={}): fit_guess : dictionary, optional Dictionary containing guess initial parameters for fitting the distribution. Keys 'c' for shape, - 'scale', and 'loc' for location. The default is - {}. + 'scale', and 'loc' for location. The default is {}. + shape_threshold : float, optional + A genextreme distribution is fitted. If the absolute value of + the resulting shape parameter is less than or equal to this value, + a gumbel_r distribution is returned instead. Returns ------- @@ -114,6 +122,7 @@ def fit_model(self, dist=None, fit_guess={}): elif self.distribution_name == 'gumbel_r': fitted_params = self.distribution.fit(self.extremes, **fit_guess) + # Freeze the fitted model self.frozen_dist = self.distribution(*fitted_params) @@ -133,14 +142,17 @@ def fit_model(self, dist=None, fit_guess={}): # Assign AIC self.aic = self.akaike_info_criterion(self.extremes, self.frozen_dist) - def select_distribution(self): + def select_distribution(self, shape_threshold=0.005): """ Choose the best fitting distribution based on which has the lowest AIC. Parameters ---------- - None. + shape_threshold : float, optional + A genextreme distribution is fitted. If the absolute value of + the resulting shape parameter is less than or equal to this value, + a gumbel_r distribution is returned instead. Returns ------- @@ -148,28 +160,18 @@ def select_distribution(self): """ - # Define loop lists - distributions = [genextreme, gumbel_r] - names = ['genextreme', 'gumbel_r'] - aic_arr = [] - - for dist in distributions: - # Fit the model - params = dist.fit(self.extremes) + # Fit GEVD, and see what the shape value is + shape_, location, scale = genextreme.fit(self.extremes) - # Freeze distribution - frozen = dist(*params) - - # Calculate AIC - aic = self.akaike_info_criterion(self.extremes, frozen) - aic_arr.append(aic) - - # Find model with lowest AIC - min_index, = np.where(aic_arr == np.min(aic_arr)) - - # Assign the selected distribution to the class - self.distribution_name = names[min_index[0]] - self.distribution = distributions[min_index[0]] + # Assess the magnitude of the shape parameter + if abs(shape_) > shape_threshold: + # Shape is large enough, genextreme is returned + self.distribution_name = 'genextreme' + self.distribution = genextreme + else: + # Shape is small, so a Gumbel is likely a better fit + self.distribution_name = 'gumbel_r' + self.distribution = gumbel_r def akaike_info_criterion(self, data, model): """ diff --git a/src/bivariate/return_period_plot_1d.py b/src/bivariate/return_period_plot_1d.py index 8d26fa6..ccd4205 100644 --- a/src/bivariate/return_period_plot_1d.py +++ b/src/bivariate/return_period_plot_1d.py @@ -272,11 +272,10 @@ def calculate_return_period_empirical(data, block_size): Function to calculate the return period of provided extrema based on exceedance probability. - Pr_exceedance = 1 - (rank / (n + 1) ) - rank - the ranking of the ordered data - n - number of data points - - tau = 1 / (Pr_exceedance * n_ex_per_year) + tau = ( (N + 1) / rank ) / n + rank - rank of data in descending order + N - number of data points + n - number of blocks per year Parameters ---------- @@ -293,17 +292,14 @@ def calculate_return_period_empirical(data, block_size): """ - # Calculate the number of extrema per year based on block_size - extrema_per_year = pd.to_timedelta("365.2425D")/block_size + # Calculate the number of blocks per year based on block_size + n = pd.to_timedelta("365.2425D")/block_size # Rank the data - rank = stats.rankdata(data) - - # Calculate exceedance probability - exceedance_prob = 1. - ((rank)/(data.size + 1.0)) + rank = (len(data) - stats.rankdata(data)) + 1 # Calculate Return Period - tau = (1.0/(exceedance_prob*extrema_per_year)) + tau = ((len(data) + 1) / (rank)) / n return tau