Skip to content

Commit

Permalink
Convert river and tidal modules to xarray (MHKiT-Software#285)
Browse files Browse the repository at this point in the history
* convert river.graphics to xarray

* convert mhkit.river.resource to xarray

* add pandas to xarray helper function

* mhkit.river.io.usgs to xarray

* convert mhkit.river.io.usgs to xarray

* convert river.io.d3d to xarray

* fix discharge_to_velocity

* move convert_to_dataset into mhkit.utils.data_utils

* update dataset labeling

* black formatting for previously changed river files

* black formatting for data_utils.py

* all xarray tests passing

* add helper function to convert input to xr.DataArray

* update river to use convert_to_dataArray

* update utils.convert_to_dataArray

* all river modulel tests passing

* black formatting

* fix typo in river.io.usgs

* update tidal.graphics to use utils.convert_to_dataArray

* tidal.performance to xarray

* remove tidal.performance dead code

* convert tidal.resource to xarray

* tidal tests passing

* add option to return noaa data as xarray, add xarray test

* add tidal.performance xarray tests, tests passing

* add parameter validation for to_pandas flag

* add tests for data_utils functions

* black formatting

* revert river.io.usgs._read_usgs_json to pandas for now, still allow xarray output

* fix cast to xarray in performance test

* fix create_points function after simplifying and converting to xarray

* black formatting

* fix output format for some river resource functions

* black formatting

* fix final call to exceedance_probability

* move convert_to_dataset, convert_to_dataarray to type_handling

* replace out variables with descriptive name

* correct test function name

* correct case on convert_to_dataArray

* update return variable name in velocity_profiles

* update handling of dataset to dataarray

* black formatting
  • Loading branch information
akeeste authored Mar 14, 2024
1 parent 09add83 commit 5d014f4
Show file tree
Hide file tree
Showing 18 changed files with 861 additions and 403 deletions.
74 changes: 6 additions & 68 deletions mhkit/power/characteristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import xarray as xr
import numpy as np
from scipy.signal import hilbert
from mhkit.utils import convert_to_dataset


def instantaneous_frequency(um, time_dimension="", to_pandas=True):
Expand Down Expand Up @@ -39,7 +40,7 @@ def instantaneous_frequency(um, time_dimension="", to_pandas=True):
)

# Convert input to xr.Dataset
um = _convert_to_dataset(um, "data")
um = convert_to_dataset(um, "data")

if time_dimension != "" and time_dimension not in um.coords:
raise ValueError(
Expand Down Expand Up @@ -114,8 +115,8 @@ def dc_power(voltage, current, to_pandas=True):
raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}")

# Convert inputs to xr.Dataset
voltage = _convert_to_dataset(voltage, "voltage")
current = _convert_to_dataset(current, "current")
voltage = convert_to_dataset(voltage, "voltage")
current = convert_to_dataset(current, "current")

# Check that sizes are the same
if not (
Expand Down Expand Up @@ -190,8 +191,8 @@ def ac_power_three_phase(
raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}")

# Convert inputs to xr.Dataset
voltage = _convert_to_dataset(voltage, "voltage")
current = _convert_to_dataset(current, "current")
voltage = convert_to_dataset(voltage, "voltage")
current = convert_to_dataset(current, "current")

# Check that sizes are the same
if not len(voltage.data_vars) == 3:
Expand All @@ -215,66 +216,3 @@ def ac_power_three_phase(
P = P.to_pandas()

return P


def _convert_to_dataset(data, name="data"):
"""
Converts the given data to an xarray.Dataset.
This function is designed to handle inputs that can be either a pandas DataFrame, a pandas Series,
an xarray DataArray, or an xarray Dataset. It ensures that the output is consistently an xarray.Dataset.
Parameters
----------
data: pandas DataFrame, pandas Series, xarray DataArray, or xarray Dataset
The data to be converted.
name: str (Optional)
The name to assign to the data variable in case the input is an xarray DataArray without a name.
Default value is 'data'.
Returns
-------
xarray.Dataset
The input data converted to an xarray.Dataset. If the input is already an xarray.Dataset,
it is returned as is.
Examples
--------
>>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
>>> ds = _convert_to_dataset(df)
>>> type(ds)
<class 'xarray.core.dataset.Dataset'>
>>> series = pd.Series([1, 2, 3], name='C')
>>> ds = _convert_to_dataset(series)
>>> type(ds)
<class 'xarray.core.dataset.Dataset'>
>>> data_array = xr.DataArray([1, 2, 3])
>>> ds = _convert_to_dataset(data_array, name='D')
>>> type(ds)
<class 'xarray.core.dataset.Dataset'>
"""
if not isinstance(data, (pd.DataFrame, pd.Series, xr.DataArray, xr.Dataset)):
raise TypeError(
"Input data must be of type pandas.DataFrame, pandas.Series, "
"xarray.DataArray, or xarray.Dataset"
)

if not isinstance(name, str):
raise TypeError("The 'name' parameter must be a string")

# Takes data that could be pd.DataFrame, pd.Series, xr.DataArray, or
# xr.Dataset and converts it to xr.Dataset
if isinstance(data, (pd.DataFrame, pd.Series)):
data = data.to_xarray()

if isinstance(data, xr.DataArray):
if data.name is None:
data.name = (
name # xr.DataArray.to_dataset() breaks if the data variable is unnamed
)
data = data.to_dataset()

return data
10 changes: 5 additions & 5 deletions mhkit/power/quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
from scipy import fftpack
import xarray as xr
from .characteristics import _convert_to_dataset
from mhkit.utils import convert_to_dataset


# This group of functions are to be used for power quality assessments
Expand Down Expand Up @@ -46,7 +46,7 @@ def harmonics(x, freq, grid_freq, to_pandas=True):
raise TypeError(f"to_pandas must be of type bool. Got {type(to_pandas)}")

# Convert input to xr.Dataset
x = _convert_to_dataset(x, "data")
x = convert_to_dataset(x, "data")

sample_spacing = 1.0 / freq

Expand Down Expand Up @@ -120,7 +120,7 @@ def harmonic_subgroups(harmonics, grid_freq, frequency_dimension="", to_pandas=T
)

# Convert input to xr.Dataset
harmonics = _convert_to_dataset(harmonics, "harmonics")
harmonics = convert_to_dataset(harmonics, "harmonics")

if frequency_dimension != "" and frequency_dimension not in harmonics.coords:
raise ValueError(
Expand Down Expand Up @@ -200,7 +200,7 @@ def total_harmonic_current_distortion(
)

# Convert input to xr.Dataset
harmonics_subgroup = _convert_to_dataset(harmonics_subgroup, "harmonics")
harmonics_subgroup = convert_to_dataset(harmonics_subgroup, "harmonics")

if frequency_dimension != "" and frequency_dimension not in harmonics.coords:
raise ValueError(
Expand Down Expand Up @@ -263,7 +263,7 @@ def interharmonics(harmonics, grid_freq, frequency_dimension="", to_pandas=True)
raise TypeError(f"to_pandas must be of type bool. Got: {type(to_pandas)}")

# Convert input to xr.Dataset
harmonics = _convert_to_dataset(harmonics, "harmonics")
harmonics = convert_to_dataset(harmonics, "harmonics")

if frequency_dimension != "" and frequency_dimension not in harmonics.coords:
raise ValueError(
Expand Down
36 changes: 23 additions & 13 deletions mhkit/river/graphics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from mhkit.utils import convert_to_dataarray


def _xy_plot(x, y, fmt=".", label=None, xlabel=None, ylabel=None, title=None, ax=None):
Expand Down Expand Up @@ -74,8 +75,8 @@ def plot_flow_duration_curve(D, F, label=None, ax=None):
"""
# Sort by F
temp = pd.DataFrame({"D": D, "F": F})
temp.sort_values("F", ascending=False, kind="mergesort", inplace=True)
temp = xr.Dataset(data_vars={"D": D, "F": F})
temp.sortby("F", ascending=False)

ax = _xy_plot(
temp["D"],
Expand Down Expand Up @@ -116,8 +117,8 @@ def plot_velocity_duration_curve(V, F, label=None, ax=None):
"""
# Sort by F
temp = pd.DataFrame({"V": V, "F": F})
temp.sort_values("F", ascending=False, kind="mergesort", inplace=True)
temp = xr.Dataset(data_vars={"V": V, "F": F})
temp.sortby("F", ascending=False)

ax = _xy_plot(
temp["V"],
Expand Down Expand Up @@ -157,8 +158,8 @@ def plot_power_duration_curve(P, F, label=None, ax=None):
"""
# Sort by F
temp = pd.DataFrame({"P": P, "F": F})
temp.sort_values("F", ascending=False, kind="mergesort", inplace=True)
temp = xr.Dataset(data_vars={"P": P, "F": F})
temp.sortby("F", ascending=False)

ax = _xy_plot(
temp["P"],
Expand All @@ -173,7 +174,7 @@ def plot_power_duration_curve(P, F, label=None, ax=None):
return ax


def plot_discharge_timeseries(Q, label=None, ax=None):
def plot_discharge_timeseries(Q, time_dimension="", label=None, ax=None):
"""
Plots discharge time-series
Expand All @@ -182,6 +183,10 @@ def plot_discharge_timeseries(Q, label=None, ax=None):
Q: array-like
Discharge [m3/s] indexed by time
time_dimension: string (optional)
Name of the xarray dimension corresponding to time. If not supplied,
defaults to the first dimension.
label: string
Label to use in the legend
Expand All @@ -194,8 +199,13 @@ def plot_discharge_timeseries(Q, label=None, ax=None):
ax : matplotlib pyplot axes
"""
Q = convert_to_dataarray(Q)

if time_dimension == "":
time_dimension = list(Q.coords)[0]

ax = _xy_plot(
Q.index,
Q.coords[time_dimension].values,
Q,
fmt="-",
label=label,
Expand All @@ -213,10 +223,10 @@ def plot_discharge_vs_velocity(D, V, polynomial_coeff=None, label=None, ax=None)
Parameters
------------
D : pandas Series
D : array-like
Discharge [m/s] indexed by time
V : pandas Series
V : array-like
Velocity [m/s] indexed by time
polynomial_coeff: numpy polynomial
Expand Down Expand Up @@ -263,10 +273,10 @@ def plot_velocity_vs_power(V, P, polynomial_coeff=None, label=None, ax=None):
Parameters
------------
V : pandas Series
V : array-like
Velocity [m/s] indexed by time
P: pandas Series
P: array-like
Power [W] indexed by time
polynomial_coeff: numpy polynomial
Expand Down
Loading

0 comments on commit 5d014f4

Please sign in to comment.