Skip to content

Commit

Permalink
Fix numerical issues that resulted in failing box-cox transformation
Browse files Browse the repository at this point in the history
  • Loading branch information
cotterpl committed Apr 17, 2023
1 parent 1e172ce commit 184bd63
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 57 deletions.
53 changes: 39 additions & 14 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,35 +1,60 @@
#
# This file is autogenerated by pip-compile with python 3.9
# To update, run:
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --output-file=requirements.txt setup.py
#
cython==0.29.2
cython==0.29.34
# via pmdarima
numpy==1.15.4
joblib==1.2.0
# via
# pmdarima
# scikit-learn
numpy==1.24.2
# via
# pandas
# patsy
# pmdarima
# scikit-learn
# scipy
# statsmodels
# tbats (setup.py)
pandas==0.23.4
# via pmdarima
pmdarima==1.0.0
packaging==23.1
# via statsmodels
pandas==2.0.0
# via
# pmdarima
# statsmodels
patsy==0.5.3
# via statsmodels
pmdarima==2.0.3
# via tbats (setup.py)
python-dateutil==2.7.5
python-dateutil==2.8.2
# via pandas
pytz==2018.7
pytz==2023.3
# via pandas
scikit-learn==0.20.2
scikit-learn==1.2.2
# via
# pmdarima
# tbats (setup.py)
scipy==1.2.0
scipy==1.10.1
# via
# pmdarima
# scikit-learn
# statsmodels
# tbats (setup.py)
six==1.12.0
# via python-dateutil
statsmodels==0.9.0
six==1.16.0
# via
# patsy
# python-dateutil
statsmodels==0.13.5
# via pmdarima
threadpoolctl==3.1.0
# via scikit-learn
tzdata==2023.3
# via pandas
urllib3==1.26.15
# via pmdarima

# The following packages are considered to be unsafe in a requirements file:
# setuptools
77 changes: 38 additions & 39 deletions requirements_stable.txt
Original file line number Diff line number Diff line change
@@ -1,55 +1,54 @@
attrs==22.1.0
bleach==5.0.1
build==0.8.0
certifi==2022.9.24
bleach==6.0.0
build==0.10.0
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==2.1.1
charset-normalizer==3.1.0
click==8.1.3
commonmark==0.9.1
Cython==0.29.32
Cython==0.29.34
docutils==0.19
exceptiongroup==1.1.1
idna==3.4
importlib-metadata==5.0.0
iniconfig==1.1.1
importlib-metadata==6.4.1
iniconfig==2.0.0
jaraco.classes==3.2.3
Jinja2==3.1.2
joblib==1.2.0
keyring==23.9.3
MarkupSafe==2.1.1
more-itertools==8.14.0
numpy==1.23.3
packaging==21.3
pandas==1.5.0
patsy==0.5.2
pep517==0.13.0
pip-tools==6.8.0
pkginfo==1.8.3
keyring==23.13.1
markdown-it-py==2.2.0
MarkupSafe==2.1.2
mdurl==0.1.2
more-itertools==9.1.0
numpy==1.24.2
packaging==23.1
pandas==2.0.0
patsy==0.5.3
pip-tools==6.13.0
pkginfo==1.9.6
pluggy==1.0.0
pmdarima==2.0.1
py==1.11.0
pmdarima==2.0.3
pycparser==2.21
Pygments==2.13.0
pyparsing==3.0.9
pytest==7.1.3
Pygments==2.15.0
pyproject_hooks==1.0.0
pytest==7.3.1
python-dateutil==2.8.2
pytz==2022.4
pytz==2023.3
pytz-deprecation-shim==0.1.0.post0
readme-renderer==37.2
requests==2.28.1
requests-toolbelt==0.9.1
readme-renderer==37.3
requests==2.28.2
requests-toolbelt==0.10.1
rfc3986==2.0.0
rich==12.6.0
rpy2==3.5.4
scikit-learn==1.1.2
scipy==1.9.1
rich==13.3.4
rpy2==3.5.11
scikit-learn==1.2.2
scipy==1.10.1
six==1.16.0
statsmodels==0.13.2
-e git+ssh://[email protected]/intive-DataScience/tbats.git@b654a0ebaa2d105f9b1aea410ddddd3e13045fb4#egg=tbats
statsmodels==0.13.5
-e git+ssh://[email protected]/intive-DataScience/tbats.git@1e172cefdfa51e5f7e8f697b58203bc5bca86702#egg=tbats
threadpoolctl==3.1.0
tomli==2.0.1
twine==4.0.1
tzdata==2022.4
tzlocal==4.2
urllib3==1.26.12
twine==4.0.2
tzdata==2023.3
tzlocal==4.3
urllib3==1.26.15
webencodings==0.5.1
zipp==3.8.1
zipp==3.15.0
2 changes: 1 addition & 1 deletion tbats/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '1.1.2'
__version__ = '1.1.3'

import tbats.abstract as abstract
import tbats.bats as bats
Expand Down
6 changes: 6 additions & 0 deletions tbats/abstract/SeedFinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ def from_linear_regression_coefs_to_x0(self, linear_regression_coefs):
def find(self, w_tilda, residuals):
w_for_lr = self.to_matrix_for_linear_regression(w_tilda)

# this makes sure that coefficient for all zeroes dimension will be zero
# without this calculated coefficient may be very large and unrealistic
for i in range(w_for_lr.shape[1]):
if np.allclose(w_for_lr[:, i], 0):
w_for_lr[:, i] = [0] * len(w_for_lr)

linear_regression = LinearRegression(fit_intercept=False)
coefs = np.asarray(linear_regression.fit(w_for_lr, residuals).coef_)
return self.from_linear_regression_coefs_to_x0(coefs)
6 changes: 3 additions & 3 deletions tbats/transformation/BoxCox.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def boxcox(y, lam=None, seasonal_periods=None, bounds=(-1, 2)):
copy=False, dtype=np.float64)) # type: np.ndarray
if lam is None:
lam = find_box_cox_lambda(y, seasonal_periods=seasonal_periods, bounds=bounds)
if lam <= 0 and np.any(y <= 0):
if (lam <= 0 or np.isclose(lam, 0)) and np.any(y <= 0):
raise error.InputArgsException('y must have only positive values for box-cox transformation.')
if np.isclose(0.0, lam):
return np.log(y)
Expand All @@ -28,11 +28,11 @@ def boxcox(y, lam=None, seasonal_periods=None, bounds=(-1, 2)):
def inv_boxcox(y, lam, force_valid=False):
y = c1d(check_array(y, ensure_2d=False, force_all_finite=True, ensure_min_samples=1,
copy=False, dtype=np.float64)) # type: np.ndarray
if np.isclose(0.0, lam):
return np.exp(y)
if lam < 0 and force_valid:
y[y > -1 / lam] = -1 / lam
if lam < 0 and np.any(y > -1 / lam):
raise error.InputArgsException('Not possible to transform back such y values.')
if np.isclose(0.0, lam):
return np.exp(y)
yy = y * lam + 1
return np.sign(yy) * (np.abs(yy) ** (1 / lam))
17 changes: 17 additions & 0 deletions test/tbats/TBATS_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,20 @@ def test_fit_predict_trigonometric_seasonal(self, seasonal_periods, seasonal_har
# forecast should be close to actual
y_predicted = fitted_model.forecast(steps=steps)
assert np.allclose(y_to_predict, y_predicted, 0.2)

def test_no_numeric_regression_issues(self):
"""
Test https://github.com/intive-DataScience/tbats/issues/40.
The issue was caused by numeric precision issue for harmonics that start as 0 but are regressed
to absurd high values.
"""
y = [
4140, 4510, 4378, 5010, 5222, 6260, 5094, 5854, 6010, 6428, 5890, 6414,
6346, 6034, 6770, 7450, 7266, 6788, 6162, 8476, 6480, 5872, 5810, 6522,
6444, 6110, 5778, 6068, 6018, 6174, 5202, 4820, 5114, 5686, 4946,
]
estimator = TBATS(seasonal_periods=[2], n_jobs=1, use_box_cox=True)
_ = estimator.fit(y) # should not fail


0 comments on commit 184bd63

Please sign in to comment.