Skip to content

Commit

Permalink
Merge pull request #8 from arfogg/collaborator_edits
Browse files Browse the repository at this point in the history
Collaborator edits
  • Loading branch information
arfogg authored Aug 28, 2024
2 parents 7dafc93 + 5254684 commit aaee698
Show file tree
Hide file tree
Showing 7 changed files with 100 additions and 51 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<h1 align="center">bi_multi_variate_eva</h1>
<h1 align="center">:bar_chart: bi_multi_variate_eva :chart_with_upwards_trend:</h1>

[![Downloads](https://img.shields.io/github/downloads/arfogg/bi_multi_variate_eva/total.svg)](#)
[![GitHub release](https://img.shields.io/github/v/release/arfogg/bi_multi_variate_eva)](#)
Expand All @@ -17,9 +17,6 @@ Python package to run bivariate and multivariate extreme value analysis on gener

**Support:** please [create an issue](https://github.com/arfogg/bi_multi_variate_eva/issues) or contact [arfogg](https://github.com/arfogg) directly. Any input on the code / issues found are greatly appreciated and will help to improve the software.

## Ongoing tasks
- [ ] Include Dáire acknowledgement statement

## Table of Contents
- [Required Packages](#required-packages)
- [Installing the code](#installing-the-code)
Expand Down
7 changes: 3 additions & 4 deletions src/bivariate/calculate_return_periods_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@ def calculate_return_period(copula, sample_grid,
----------
copula : copulas copula
Copula that has been fit to some data
sample_grid : pd.DataFrame
Two columns with names same as copula, containing x and y values
to find the return period for.
sample_grid : np.array
Grid containing x and y values to find the return period for.
block_size : pd.Timedelta, optional
Size over which block maxima have been found. The default
is pd.to_timedelta("365.2425D").
Expand Down Expand Up @@ -319,7 +318,7 @@ def plot_return_period_as_function_x_y(copula,
ci_percentiles=ci_percentiles)

# Initialise plot
fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(24, 6))
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(9, 21))

# ----- RETURN PERIOD -----
rp_cbar_norm = colors.LogNorm(vmin=np.quantile(shaped_return_period, 0.1),
Expand Down
2 changes: 1 addition & 1 deletion src/bivariate/determine_AD_AI.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def plot_extremal_dependence_coefficient(x_data, y_data, x_bs_um, y_bs_um,

# Formatting
ax_edc.set_xlabel("Quantiles", fontsize=fontsize)
ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi$",
ax_edc.set_ylabel("Extremal Dependence Coefficient, $\chi_{u}$",
fontsize=fontsize)
for label in (ax_edc.get_xticklabels() + ax_edc.get_yticklabels()):
label.set_fontsize(fontsize)
Expand Down
53 changes: 53 additions & 0 deletions src/bivariate/fit_copula_to_extremes.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,56 @@ def qualitative_copula_fit_check_bivariate(x_extremes, y_extremes,
ax[2].set_ylim(ax[0].get_ylim())

return fig, ax


def qualitative_copula_fit_check_bivariate_scatter(x_extremes, y_extremes,
x_sample, y_sample,
x_name, y_name):
"""
Function to do a qualitative diagnostic plot for copula fit.
Parameters
----------
x_extremes : np.array or pd.Series
Observed x extremes (magnitude).
y_extremes : np.array or pd.Series
Observed y extremes (magnitude).
x_sample : np.array or pd.Series
Random copula sample (x) in data scale.
y_sample : np.array or pd.Series
Random copula sample (y) in data scale.
x_name : string
Name for x.
y_name : string
Name for y.
Returns
-------
fig : matplotlib figure
Diagnostic figure.
ax : array of matplotlib axes
Axes from fig.
"""

fontsize = 15
fig, ax = plt.subplots(figsize=(10, 10))

# Both both sets of component-wise maxima as a scatter plot
ax.plot(x_extremes, y_extremes, marker='^', color="indigo",
label=str(round(len(x_extremes))) + ' Observations',
fillstyle='none', linewidth=0., markersize=fontsize)
ax.plot(x_sample, y_sample, marker='*', color="darkgoldenrod",
label=str(round(len(x_sample))) + " Copula Samples",
fillstyle='none', linewidth=0., markersize=fontsize)

# Some decor
ax.set_xlabel(x_name, fontsize=fontsize)
ax.set_ylabel(y_name, fontsize=fontsize)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontsize(fontsize)

ax.set_title('Component-wise maxima', fontsize=fontsize)
ax.legend(fontsize=fontsize)

return fig, ax
56 changes: 29 additions & 27 deletions src/bivariate/gevd_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ class gevd_fitter():
on input extrema.
"""

def __init__(self, extremes, dist=None, fit_guess={}):
def __init__(self, extremes, dist=None, fit_guess={},
shape_threshold=0.005):
"""
Initialise the gevd_fitter class. Fits a GEVD or
Gumbel distribution.
Expand All @@ -40,6 +41,10 @@ def __init__(self, extremes, dist=None, fit_guess={}):
for fitting the distribution. Keys 'c' for shape,
'scale', and 'loc' for location. The default is
{}.
shape_threshold : float, optional
A genextreme distribution is fitted. If the absolute value of
the resulting shape parameter is less than or equal to this value,
a gumbel_r distribution is returned instead.
Returns
-------
Expand Down Expand Up @@ -68,7 +73,7 @@ def __init__(self, extremes, dist=None, fit_guess={}):
'scale': self.scale,
'loc': self.location}

def fit_model(self, dist=None, fit_guess={}):
def fit_model(self, dist=None, fit_guess={}, shape_threshold=0.005):
"""
Fit a GEVD or Gumbel to the parsed
extrema.
Expand All @@ -83,8 +88,11 @@ def fit_model(self, dist=None, fit_guess={}):
fit_guess : dictionary, optional
Dictionary containing guess initial parameters
for fitting the distribution. Keys 'c' for shape,
'scale', and 'loc' for location. The default is
{}.
'scale', and 'loc' for location. The default is {}.
shape_threshold : float, optional
A genextreme distribution is fitted. If the absolute value of
the resulting shape parameter is less than or equal to this value,
a gumbel_r distribution is returned instead.
Returns
-------
Expand Down Expand Up @@ -114,6 +122,7 @@ def fit_model(self, dist=None, fit_guess={}):
elif self.distribution_name == 'gumbel_r':
fitted_params = self.distribution.fit(self.extremes,
**fit_guess)

# Freeze the fitted model
self.frozen_dist = self.distribution(*fitted_params)

Expand All @@ -133,43 +142,36 @@ def fit_model(self, dist=None, fit_guess={}):
# Assign AIC
self.aic = self.akaike_info_criterion(self.extremes, self.frozen_dist)

def select_distribution(self):
def select_distribution(self, shape_threshold=0.005):
"""
Choose the best fitting distribution
based on which has the lowest AIC.
Parameters
----------
None.
shape_threshold : float, optional
A genextreme distribution is fitted. If the absolute value of
the resulting shape parameter is less than or equal to this value,
a gumbel_r distribution is returned instead.
Returns
-------
None.
"""

# Define loop lists
distributions = [genextreme, gumbel_r]
names = ['genextreme', 'gumbel_r']
aic_arr = []

for dist in distributions:
# Fit the model
params = dist.fit(self.extremes)
# Fit GEVD, and see what the shape value is
shape_, location, scale = genextreme.fit(self.extremes)

# Freeze distribution
frozen = dist(*params)

# Calculate AIC
aic = self.akaike_info_criterion(self.extremes, frozen)
aic_arr.append(aic)

# Find model with lowest AIC
min_index, = np.where(aic_arr == np.min(aic_arr))

# Assign the selected distribution to the class
self.distribution_name = names[min_index[0]]
self.distribution = distributions[min_index[0]]
# Assess the magnitude of the shape parameter
if abs(shape_) > shape_threshold:
# Shape is large enough, genextreme is returned
self.distribution_name = 'genextreme'
self.distribution = genextreme
else:
# Shape is small, so a Gumbel is likely a better fit
self.distribution_name = 'gumbel_r'
self.distribution = gumbel_r

def akaike_info_criterion(self, data, model):
"""
Expand Down
24 changes: 11 additions & 13 deletions src/bivariate/return_period_plot_1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ def return_period_plot(gevd_fitter, bootstrap_gevd_fit, block_size, data_tag,
' observed at least once\nper return period (' +
str(data_units_fm) + ')')
ax.set_xscale('log')
ax.legend(loc='lower right')
# Best possible legend position, limiting to bottom 85% vertically to
# leave room for axes labels
ax.legend(loc='best', bbox_to_anchor=(0, 0, 1, 0.85))
ax.set_title('Return Period Plot')

return ax
Expand Down Expand Up @@ -270,11 +272,10 @@ def calculate_return_period_empirical(data, block_size):
Function to calculate the return period of provided
extrema based on exceedance probability.
Pr_exceedance = 1 - (rank / (n + 1) )
rank - the ranking of the ordered data
n - number of data points
tau = 1 / (Pr_exceedance * n_ex_per_year)
tau = ( (N + 1) / rank ) / n
rank - rank of data in descending order
N - number of data points
n - number of blocks per year
Parameters
----------
Expand All @@ -291,17 +292,14 @@ def calculate_return_period_empirical(data, block_size):
"""

# Calculate the number of extrema per year based on block_size
extrema_per_year = pd.to_timedelta("365.2425D")/block_size
# Calculate the number of blocks per year based on block_size
n = pd.to_timedelta("365.2425D")/block_size

# Rank the data
rank = stats.rankdata(data)

# Calculate exceedance probability
exceedance_prob = 1. - ((rank)/(data.size + 1.0))
rank = (len(data) - stats.rankdata(data)) + 1

# Calculate Return Period
tau = (1.0/(exceedance_prob*extrema_per_year))
tau = ((len(data) + 1) / (rank)) / n

return tau

Expand Down
4 changes: 2 additions & 2 deletions src/bivariate/transform_uniform_margins.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,8 @@ def plot_diagnostic(gevd_fitter, bootstrap_gevd_fit, data_tag, data_units_fm,
data_units_fm, bootstrap_gevd_fit)

# Some decor
t = ax[1, 2].text(0.03, 0.90, '(f)', transform=ax[1, 2].transAxes,
va='top', ha='left')
t = ax[1, 2].text(0.03, 0.96, '(f)', transform=ax[1, 2].transAxes,
va='bottom', ha='left')
t.set_bbox(dict(facecolor='white', alpha=0.5, edgecolor='grey'))

fig.tight_layout()
Expand Down

0 comments on commit aaee698

Please sign in to comment.