Skip to content

Commit

Permalink
Fix backtest and residuals for multi series with different number of …
Browse files Browse the repository at this point in the history
…historical forecasts (#2604)

* fix backtest and residuals for mutliple series with different number of forecasts

* update changelog
  • Loading branch information
dennisbader authored Nov 22, 2024
1 parent 31a7b36 commit d103a05
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 8 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co

**Fixed**

- Fixed a bug which raised an error when computing residuals (or backtest with "per time step" metrics) on multiple series with corresponding historical forecasts of different lengths. [#2604](https://github.com/unit8co/darts/pull/2604) by [Dennis Bader](https://github.com/dennisbader).
- Fixed a bug when using `darts.utils.data.tabularization.create_lagged_component_names()` with target `lags=None`, that did not return any lagged target label component names. [#2576](https://github.com/unit8co/darts/pull/2576) by [Dennis Bader](https://github.com/dennisbader).
- Fixed a bug when using `num_samples > 1` with a deterministic regression model and the optimized `historical_forecasts()` method, an exception was not raised. [#2576](https://github.com/unit8co/darts/pull/2588) by [Antoine Madrona](https://github.com/madtoinou).

Expand Down
15 changes: 13 additions & 2 deletions darts/models/forecasting/forecasting_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1504,13 +1504,24 @@ def __getitem__(self, index) -> TimeSeries:
kwargs["insample"] = series_gen

errors.append(metric_f(series_gen, forecasts_list, **kwargs))
errors = np.array(errors)
try:
# multiple series can result in different number of forecasts; try if we can run it efficiently
errors = np.array(errors)
is_arr = True
except ValueError:
# otherwise, compute array later
is_arr = False

# get errors for each input `series`
backtest_list = []
for i in range(len(cum_len) - 1):
# errors_series with shape `(n metrics, n series specific historical forecasts, *)`
errors_series = errors[:, cum_len[i] : cum_len[i + 1]]
if is_arr:
errors_series = errors[:, cum_len[i] : cum_len[i + 1]]
else:
errors_series = np.array([
errors_[cum_len[i] : cum_len[i + 1]] for errors_ in errors
])

if reduction is not None:
# shape `(n metrics, n forecasts, *)` -> `(n metrics, *)`
Expand Down
20 changes: 14 additions & 6 deletions darts/tests/models/forecasting/test_residuals.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ def test_output_single_series_hfc_lpo_false(self, config):
@pytest.mark.parametrize(
"config",
itertools.product(
[True, False],
[True, False], # is univariate
[True, False], # same lengths
[
(metrics.err, ((0.0, 0.0), (-1.0, -2.0))),
(metrics.ape, ((0.0, 0.0), (100.0, 100.0))),
Expand All @@ -159,10 +160,12 @@ def test_output_single_series_hfc_lpo_false(self, config):
)
def test_output_multi_series_hfc_lpo_true(self, config):
"""Tests residuals based on historical forecasts generated on multiple `series` with last_points_only=True"""
is_univariate, (metric, score_exp) = config
is_univariate, same_lengths, (metric, score_exp) = config
n_ts = 10
y = ct(value=1.0, length=n_ts)
hfc = ct(value=2.0, length=n_ts)
if not same_lengths:
y = y.append_values([1.0])
if not is_univariate:
y = y.stack(y + 1.0)
hfc = hfc.stack(hfc + 2.0)
Expand All @@ -173,8 +176,9 @@ def test_output_multi_series_hfc_lpo_true(self, config):
# expected residuals values of shape (n time steps, n components, n samples=1) per forecast
scores_exp = []
for i in range(len(hfc)):
num_fcs = len(hfc[i])
scores_exp.append(
np.array([score_exp[i][:n_comps]] * 10).reshape(n_ts, -1, 1)
np.array([score_exp[i][:n_comps]] * num_fcs).reshape(num_fcs, -1, 1)
)

model = NaiveDrift()
Expand Down Expand Up @@ -208,7 +212,8 @@ def test_output_multi_series_hfc_lpo_true(self, config):
@pytest.mark.parametrize(
"config",
itertools.product(
[True, False],
[True, False], # is univariate
[True, False], # same lengths
[
(metrics.err, ((0.0, 0.0), (-1.0, -2.0))),
(metrics.ape, ((0.0, 0.0), (100.0, 100.0))),
Expand All @@ -219,10 +224,12 @@ def test_output_multi_series_hfc_lpo_false(self, config):
"""Tests residuals based on historical forecasts generated on multiple `series` with
last_points_only=False.
"""
is_univariate, (metric, score_exp) = config
is_univariate, same_lengths, (metric, score_exp) = config
n_ts = 10
y = ct(value=1.0, length=n_ts)
hfc = ct(value=2.0, length=n_ts)
if not same_lengths:
y = y.append_values([1.0])
if not is_univariate:
y = y.stack(y + 1.0)
hfc = hfc.stack(hfc + 2.0)
Expand All @@ -233,8 +240,9 @@ def test_output_multi_series_hfc_lpo_false(self, config):
# expected residuals values of shape (n time steps, n components, n samples=1) per forecast
scores_exp = []
for i in range(len(hfc)):
num_fcs = len(hfc[i][0])
scores_exp.append(
np.array([score_exp[i][:n_comps]] * 10).reshape(n_ts, -1, 1)
np.array([score_exp[i][:n_comps]] * num_fcs).reshape(num_fcs, -1, 1)
)

model = NaiveDrift()
Expand Down

0 comments on commit d103a05

Please sign in to comment.