From 184bd635e1aea6bd1dd0ac7fa2339257b9ca6bdb Mon Sep 17 00:00:00 2001 From: Grzegorz Skorupa Date: Mon, 17 Apr 2023 12:20:31 +0200 Subject: [PATCH] Fix numerical issues that resulted in failing box-cox transformation --- requirements.txt | 53 ++++++++++++++++------- requirements_stable.txt | 77 +++++++++++++++++----------------- tbats/__init__.py | 2 +- tbats/abstract/SeedFinder.py | 6 +++ tbats/transformation/BoxCox.py | 6 +-- test/tbats/TBATS_test.py | 17 ++++++++ 6 files changed, 104 insertions(+), 57 deletions(-) diff --git a/requirements.txt b/requirements.txt index e92055d..6b4f7ae 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,35 +1,60 @@ # -# This file is autogenerated by pip-compile with python 3.9 -# To update, run: +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: # # pip-compile --output-file=requirements.txt setup.py # -cython==0.29.2 +cython==0.29.34 # via pmdarima -numpy==1.15.4 +joblib==1.2.0 + # via + # pmdarima + # scikit-learn +numpy==1.24.2 # via # pandas + # patsy # pmdarima # scikit-learn + # scipy + # statsmodels # tbats (setup.py) -pandas==0.23.4 - # via pmdarima -pmdarima==1.0.0 +packaging==23.1 + # via statsmodels +pandas==2.0.0 + # via + # pmdarima + # statsmodels +patsy==0.5.3 + # via statsmodels +pmdarima==2.0.3 # via tbats (setup.py) -python-dateutil==2.7.5 +python-dateutil==2.8.2 # via pandas -pytz==2018.7 +pytz==2023.3 # via pandas -scikit-learn==0.20.2 +scikit-learn==1.2.2 # via # pmdarima # tbats (setup.py) -scipy==1.2.0 +scipy==1.10.1 # via # pmdarima # scikit-learn + # statsmodels # tbats (setup.py) -six==1.12.0 - # via python-dateutil -statsmodels==0.9.0 +six==1.16.0 + # via + # patsy + # python-dateutil +statsmodels==0.13.5 + # via pmdarima +threadpoolctl==3.1.0 + # via scikit-learn +tzdata==2023.3 + # via pandas +urllib3==1.26.15 # via pmdarima + +# The following packages are considered to be unsafe in a requirements file: +# setuptools diff --git a/requirements_stable.txt b/requirements_stable.txt index a197c8b..02abbf5 100644 --- a/requirements_stable.txt +++ b/requirements_stable.txt @@ -1,55 +1,54 @@ -attrs==22.1.0 -bleach==5.0.1 -build==0.8.0 -certifi==2022.9.24 +bleach==6.0.0 +build==0.10.0 +certifi==2022.12.7 cffi==1.15.1 -charset-normalizer==2.1.1 +charset-normalizer==3.1.0 click==8.1.3 -commonmark==0.9.1 -Cython==0.29.32 +Cython==0.29.34 docutils==0.19 +exceptiongroup==1.1.1 idna==3.4 -importlib-metadata==5.0.0 -iniconfig==1.1.1 +importlib-metadata==6.4.1 +iniconfig==2.0.0 jaraco.classes==3.2.3 Jinja2==3.1.2 joblib==1.2.0 -keyring==23.9.3 -MarkupSafe==2.1.1 -more-itertools==8.14.0 -numpy==1.23.3 -packaging==21.3 -pandas==1.5.0 -patsy==0.5.2 -pep517==0.13.0 -pip-tools==6.8.0 -pkginfo==1.8.3 +keyring==23.13.1 +markdown-it-py==2.2.0 +MarkupSafe==2.1.2 +mdurl==0.1.2 +more-itertools==9.1.0 +numpy==1.24.2 +packaging==23.1 +pandas==2.0.0 +patsy==0.5.3 +pip-tools==6.13.0 +pkginfo==1.9.6 pluggy==1.0.0 -pmdarima==2.0.1 -py==1.11.0 +pmdarima==2.0.3 pycparser==2.21 -Pygments==2.13.0 -pyparsing==3.0.9 -pytest==7.1.3 +Pygments==2.15.0 +pyproject_hooks==1.0.0 +pytest==7.3.1 python-dateutil==2.8.2 -pytz==2022.4 +pytz==2023.3 pytz-deprecation-shim==0.1.0.post0 -readme-renderer==37.2 -requests==2.28.1 -requests-toolbelt==0.9.1 +readme-renderer==37.3 +requests==2.28.2 +requests-toolbelt==0.10.1 rfc3986==2.0.0 -rich==12.6.0 -rpy2==3.5.4 -scikit-learn==1.1.2 -scipy==1.9.1 +rich==13.3.4 +rpy2==3.5.11 +scikit-learn==1.2.2 +scipy==1.10.1 six==1.16.0 -statsmodels==0.13.2 --e git+ssh://git@github.com/intive-DataScience/tbats.git@b654a0ebaa2d105f9b1aea410ddddd3e13045fb4#egg=tbats +statsmodels==0.13.5 +-e git+ssh://git@github.com/intive-DataScience/tbats.git@1e172cefdfa51e5f7e8f697b58203bc5bca86702#egg=tbats threadpoolctl==3.1.0 tomli==2.0.1 -twine==4.0.1 -tzdata==2022.4 -tzlocal==4.2 -urllib3==1.26.12 +twine==4.0.2 +tzdata==2023.3 +tzlocal==4.3 +urllib3==1.26.15 webencodings==0.5.1 -zipp==3.8.1 +zipp==3.15.0 diff --git a/tbats/__init__.py b/tbats/__init__.py index e4c196a..35a1310 100644 --- a/tbats/__init__.py +++ b/tbats/__init__.py @@ -1,4 +1,4 @@ -__version__ = '1.1.2' +__version__ = '1.1.3' import tbats.abstract as abstract import tbats.bats as bats diff --git a/tbats/abstract/SeedFinder.py b/tbats/abstract/SeedFinder.py index 493f5d9..aa88c4e 100644 --- a/tbats/abstract/SeedFinder.py +++ b/tbats/abstract/SeedFinder.py @@ -24,6 +24,12 @@ def from_linear_regression_coefs_to_x0(self, linear_regression_coefs): def find(self, w_tilda, residuals): w_for_lr = self.to_matrix_for_linear_regression(w_tilda) + # this makes sure that coefficient for all zeroes dimension will be zero + # without this calculated coefficient may be very large and unrealistic + for i in range(w_for_lr.shape[1]): + if np.allclose(w_for_lr[:, i], 0): + w_for_lr[:, i] = [0] * len(w_for_lr) + linear_regression = LinearRegression(fit_intercept=False) coefs = np.asarray(linear_regression.fit(w_for_lr, residuals).coef_) return self.from_linear_regression_coefs_to_x0(coefs) diff --git a/tbats/transformation/BoxCox.py b/tbats/transformation/BoxCox.py index 0e7eb69..6ea9360 100644 --- a/tbats/transformation/BoxCox.py +++ b/tbats/transformation/BoxCox.py @@ -18,7 +18,7 @@ def boxcox(y, lam=None, seasonal_periods=None, bounds=(-1, 2)): copy=False, dtype=np.float64)) # type: np.ndarray if lam is None: lam = find_box_cox_lambda(y, seasonal_periods=seasonal_periods, bounds=bounds) - if lam <= 0 and np.any(y <= 0): + if (lam <= 0 or np.isclose(lam, 0)) and np.any(y <= 0): raise error.InputArgsException('y must have only positive values for box-cox transformation.') if np.isclose(0.0, lam): return np.log(y) @@ -28,11 +28,11 @@ def boxcox(y, lam=None, seasonal_periods=None, bounds=(-1, 2)): def inv_boxcox(y, lam, force_valid=False): y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1, copy=False, dtype=np.float64)) # type: np.ndarray + if np.isclose(0.0, lam): + return np.exp(y) if lam < 0 and force_valid: y[y > -1 / lam] = -1 / lam if lam < 0 and np.any(y > -1 / lam): raise error.InputArgsException('Not possible to transform back such y values.') - if np.isclose(0.0, lam): - return np.exp(y) yy = y * lam + 1 return np.sign(yy) * (np.abs(yy) ** (1 / lam)) \ No newline at end of file diff --git a/test/tbats/TBATS_test.py b/test/tbats/TBATS_test.py index 4988752..e1b6d95 100644 --- a/test/tbats/TBATS_test.py +++ b/test/tbats/TBATS_test.py @@ -171,3 +171,20 @@ def test_fit_predict_trigonometric_seasonal(self, seasonal_periods, seasonal_har # forecast should be close to actual y_predicted = fitted_model.forecast(steps=steps) assert np.allclose(y_to_predict, y_predicted, 0.2) + + def test_no_numeric_regression_issues(self): + """ + Test https://github.com/intive-DataScience/tbats/issues/40. + + The issue was caused by numeric precision issue for harmonics that start as 0 but are regressed + to absurd high values. + """ + y = [ + 4140, 4510, 4378, 5010, 5222, 6260, 5094, 5854, 6010, 6428, 5890, 6414, + 6346, 6034, 6770, 7450, 7266, 6788, 6162, 8476, 6480, 5872, 5810, 6522, + 6444, 6110, 5778, 6068, 6018, 6174, 5202, 4820, 5114, 5686, 4946, + ] + estimator = TBATS(seasonal_periods=[2], n_jobs=1, use_box_cox=True) + _ = estimator.fit(y) # should not fail + +