diff --git a/README.md b/README.md index f05de50..d4160fd 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

bi_multi_variate_eva

+

:bar_chart: bi_multi_variate_eva :chart_with_upwards_trend:

[![Downloads](https://img.shields.io/github/downloads/arfogg/bi_multi_variate_eva/total.svg)](#) [![GitHub release](https://img.shields.io/github/v/release/arfogg/bi_multi_variate_eva)](#) @@ -17,9 +17,6 @@ Python package to run bivariate and multivariate extreme value analysis on gener **Support:** please [create an issue](https://github.com/arfogg/bi_multi_variate_eva/issues) or contact [arfogg](https://github.com/arfogg) directly. Any input on the code / issues found are greatly appreciated and will help to improve the software. -## Ongoing tasks -- [ ] Include Dáire acknowledgement statement - ## Table of Contents - [Required Packages](#required-packages) - [Installing the code](#installing-the-code) diff --git a/src/bivariate/calculate_return_periods_values.py b/src/bivariate/calculate_return_periods_values.py index e833b31..1ea17df 100644 --- a/src/bivariate/calculate_return_periods_values.py +++ b/src/bivariate/calculate_return_periods_values.py @@ -25,9 +25,8 @@ def calculate_return_period(copula, sample_grid, ---------- copula : copulas copula Copula that has been fit to some data - sample_grid : pd.DataFrame - Two columns with names same as copula, containing x and y values - to find the return period for. + sample_grid : np.array + Grid containing x and y values to find the return period for. block_size : pd.Timedelta, optional Size over which block maxima have been found. The default is pd.to_timedelta("365.2425D"). @@ -319,7 +318,7 @@ def plot_return_period_as_function_x_y(copula, ci_percentiles=ci_percentiles) # Initialise plot - fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(24, 6)) + fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(9, 21)) # ----- RETURN PERIOD ----- rp_cbar_norm = colors.LogNorm(vmin=np.quantile(shaped_return_period, 0.1), diff --git a/src/bivariate/determine_AD_AI.py b/src/bivariate/determine_AD_AI.py index 4dedfb8..5d092bb 100644 --- a/src/bivariate/determine_AD_AI.py +++ b/src/bivariate/determine_AD_AI.py @@ -160,7 +160,7 @@ def plot_extremal_dependence_coefficient(x_data, y_data, x_bs_um, y_bs_um, # Formatting ax_edc.set_xlabel("Quantiles", fontsize=fontsize) - ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi$", + ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi_{u}$", fontsize=fontsize) for label in (ax_edc.get_xticklabels() + ax_edc.get_yticklabels()): label.set_fontsize(fontsize) diff --git a/src/bivariate/fit_copula_to_extremes.py b/src/bivariate/fit_copula_to_extremes.py index 03b2bec..5681cdc 100644 --- a/src/bivariate/fit_copula_to_extremes.py +++ b/src/bivariate/fit_copula_to_extremes.py @@ -153,3 +153,56 @@ def qualitative_copula_fit_check_bivariate(x_extremes, y_extremes, ax[2].set_ylim(ax[0].get_ylim()) return fig, ax + + +def qualitative_copula_fit_check_bivariate_scatter(x_extremes, y_extremes, + x_sample, y_sample, + x_name, y_name): + """ + Function to do a qualitative diagnostic plot for copula fit. + + Parameters + ---------- + x_extremes : np.array or pd.Series + Observed x extremes (magnitude). + y_extremes : np.array or pd.Series + Observed y extremes (magnitude). + x_sample : np.array or pd.Series + Random copula sample (x) in data scale. + y_sample : np.array or pd.Series + Random copula sample (y) in data scale. + x_name : string + Name for x. + y_name : string + Name for y. + + Returns + ------- + fig : matplotlib figure + Diagnostic figure. + ax : array of matplotlib axes + Axes from fig. + + """ + + fontsize = 15 + fig, ax = plt.subplots(figsize=(10, 10)) + + # Both both sets of component-wise maxima as a scatter plot + ax.plot(x_extremes, y_extremes, marker='^', color="indigo", + label=str(round(len(x_extremes))) + ' Observations', + fillstyle='none', linewidth=0., markersize=fontsize) + ax.plot(x_sample, y_sample, marker='*', color="darkgoldenrod", + label=str(round(len(x_sample))) + " Copula Samples", + fillstyle='none', linewidth=0., markersize=fontsize) + + # Some decor + ax.set_xlabel(x_name, fontsize=fontsize) + ax.set_ylabel(y_name, fontsize=fontsize) + for label in (ax.get_xticklabels() + ax.get_yticklabels()): + label.set_fontsize(fontsize) + + ax.set_title('Component-wise maxima', fontsize=fontsize) + ax.legend(fontsize=fontsize) + + return fig, ax diff --git a/src/bivariate/gevd_fitter.py b/src/bivariate/gevd_fitter.py index 4dbd241..a0cc430 100644 --- a/src/bivariate/gevd_fitter.py +++ b/src/bivariate/gevd_fitter.py @@ -21,7 +21,8 @@ class gevd_fitter(): on input extrema. """ - def __init__(self, extremes, dist=None, fit_guess={}): + def __init__(self, extremes, dist=None, fit_guess={}, + shape_threshold=0.005): """ Initialise the gevd_fitter class. Fits a GEVD or Gumbel distribution. @@ -40,6 +41,10 @@ def __init__(self, extremes, dist=None, fit_guess={}): for fitting the distribution. Keys 'c' for shape, 'scale', and 'loc' for location. The default is {}. + shape_threshold : float, optional + A genextreme distribution is fitted. If the absolute value of + the resulting shape parameter is less than or equal to this value, + a gumbel_r distribution is returned instead. Returns ------- @@ -68,7 +73,7 @@ def __init__(self, extremes, dist=None, fit_guess={}): 'scale': self.scale, 'loc': self.location} - def fit_model(self, dist=None, fit_guess={}): + def fit_model(self, dist=None, fit_guess={}, shape_threshold=0.005): """ Fit a GEVD or Gumbel to the parsed extrema. @@ -83,8 +88,11 @@ def fit_model(self, dist=None, fit_guess={}): fit_guess : dictionary, optional Dictionary containing guess initial parameters for fitting the distribution. Keys 'c' for shape, - 'scale', and 'loc' for location. The default is - {}. + 'scale', and 'loc' for location. The default is {}. + shape_threshold : float, optional + A genextreme distribution is fitted. If the absolute value of + the resulting shape parameter is less than or equal to this value, + a gumbel_r distribution is returned instead. Returns ------- @@ -114,6 +122,7 @@ def fit_model(self, dist=None, fit_guess={}): elif self.distribution_name == 'gumbel_r': fitted_params = self.distribution.fit(self.extremes, **fit_guess) + # Freeze the fitted model self.frozen_dist = self.distribution(*fitted_params) @@ -133,14 +142,17 @@ def fit_model(self, dist=None, fit_guess={}): # Assign AIC self.aic = self.akaike_info_criterion(self.extremes, self.frozen_dist) - def select_distribution(self): + def select_distribution(self, shape_threshold=0.005): """ Choose the best fitting distribution based on which has the lowest AIC. Parameters ---------- - None. + shape_threshold : float, optional + A genextreme distribution is fitted. If the absolute value of + the resulting shape parameter is less than or equal to this value, + a gumbel_r distribution is returned instead. Returns ------- @@ -148,28 +160,18 @@ def select_distribution(self): """ - # Define loop lists - distributions = [genextreme, gumbel_r] - names = ['genextreme', 'gumbel_r'] - aic_arr = [] - - for dist in distributions: - # Fit the model - params = dist.fit(self.extremes) + # Fit GEVD, and see what the shape value is + shape_, location, scale = genextreme.fit(self.extremes) - # Freeze distribution - frozen = dist(*params) - - # Calculate AIC - aic = self.akaike_info_criterion(self.extremes, frozen) - aic_arr.append(aic) - - # Find model with lowest AIC - min_index, = np.where(aic_arr == np.min(aic_arr)) - - # Assign the selected distribution to the class - self.distribution_name = names[min_index[0]] - self.distribution = distributions[min_index[0]] + # Assess the magnitude of the shape parameter + if abs(shape_) > shape_threshold: + # Shape is large enough, genextreme is returned + self.distribution_name = 'genextreme' + self.distribution = genextreme + else: + # Shape is small, so a Gumbel is likely a better fit + self.distribution_name = 'gumbel_r' + self.distribution = gumbel_r def akaike_info_criterion(self, data, model): """ diff --git a/src/bivariate/return_period_plot_1d.py b/src/bivariate/return_period_plot_1d.py index cae9253..ccd4205 100644 --- a/src/bivariate/return_period_plot_1d.py +++ b/src/bivariate/return_period_plot_1d.py @@ -138,7 +138,9 @@ def return_period_plot(gevd_fitter, bootstrap_gevd_fit, block_size, data_tag, ' observed at least once\nper return period (' + str(data_units_fm) + ')') ax.set_xscale('log') - ax.legend(loc='lower right') + # Best possible legend position, limiting to bottom 85% vertically to + # leave room for axes labels + ax.legend(loc='best', bbox_to_anchor=(0, 0, 1, 0.85)) ax.set_title('Return Period Plot') return ax @@ -270,11 +272,10 @@ def calculate_return_period_empirical(data, block_size): Function to calculate the return period of provided extrema based on exceedance probability. - Pr_exceedance = 1 - (rank / (n + 1) ) - rank - the ranking of the ordered data - n - number of data points - - tau = 1 / (Pr_exceedance * n_ex_per_year) + tau = ( (N + 1) / rank ) / n + rank - rank of data in descending order + N - number of data points + n - number of blocks per year Parameters ---------- @@ -291,17 +292,14 @@ def calculate_return_period_empirical(data, block_size): """ - # Calculate the number of extrema per year based on block_size - extrema_per_year = pd.to_timedelta("365.2425D")/block_size + # Calculate the number of blocks per year based on block_size + n = pd.to_timedelta("365.2425D")/block_size # Rank the data - rank = stats.rankdata(data) - - # Calculate exceedance probability - exceedance_prob = 1. - ((rank)/(data.size + 1.0)) + rank = (len(data) - stats.rankdata(data)) + 1 # Calculate Return Period - tau = (1.0/(exceedance_prob*extrema_per_year)) + tau = ((len(data) + 1) / (rank)) / n return tau diff --git a/src/bivariate/transform_uniform_margins.py b/src/bivariate/transform_uniform_margins.py index e18d5c3..a63cd82 100644 --- a/src/bivariate/transform_uniform_margins.py +++ b/src/bivariate/transform_uniform_margins.py @@ -394,8 +394,8 @@ def plot_diagnostic(gevd_fitter, bootstrap_gevd_fit, data_tag, data_units_fm, data_units_fm, bootstrap_gevd_fit) # Some decor - t = ax[1, 2].text(0.03, 0.90, '(f)', transform=ax[1, 2].transAxes, - va='top', ha='left') + t = ax[1, 2].text(0.03, 0.96, '(f)', transform=ax[1, 2].transAxes, + va='bottom', ha='left') t.set_bbox(dict(facecolor='white', alpha=0.5, edgecolor='grey')) fig.tight_layout()