diff --git a/doc/releasenotes.rst b/doc/releasenotes.rst index f27c725bb..2b6f0deda 100644 --- a/doc/releasenotes.rst +++ b/doc/releasenotes.rst @@ -8,7 +8,7 @@ Release Notes .......... -0.9.25 (2020-01-07) +0.9.25 (2020-01-08) ------------------- * Add summary statistics callback plot function (#231). @@ -20,6 +20,7 @@ Release Notes * Log stopping reason in ABCSMC.run (all #236). * Implement Poisson (#237) and negative binomial (#239) stochastic kernels. * Enable password protection for Redis sampler (#238). +* Fix scipy deprecations (#234, #241). 0.9.24 (2019-11-19) diff --git a/pyabc/distance/distance.py b/pyabc/distance/distance.py index dd75a2fb1..b9cb65f58 100644 --- a/pyabc/distance/distance.py +++ b/pyabc/distance/distance.py @@ -1,5 +1,3 @@ -import scipy as sp - import numpy as np from scipy import linalg as la from typing import List, Callable, Union @@ -658,10 +656,10 @@ def __init__(self, measures_to_use='all'): self._whitening_transformation_matrix = None def _dict_to_vect(self, x): - return sp.asarray([x[key] for key in self.measures_to_use]) + return np.asarray([x[key] for key in self.measures_to_use]) def _calculate_whitening_transformation_matrix(self, sum_stats): - samples_vec = sp.asarray([self._dict_to_vect(x) + samples_vec = np.asarray([self._dict_to_vect(x) for x in sum_stats]) # samples_vec is an array of shape nr_samples x nr_features means = samples_vec.mean(axis=0) @@ -669,7 +667,7 @@ def _calculate_whitening_transformation_matrix(self, sum_stats): covariance = centered.T.dot(centered) w, v = la.eigh(covariance) self._whitening_transformation_matrix = ( - v.dot(sp.diag(1. / sp.sqrt(w))).dot(v.T)) + v.dot(np.diag(1. / np.sqrt(w))).dot(v.T)) def initialize(self, t: int, @@ -813,12 +811,12 @@ class PercentileDistance(RangeEstimatorDistance): @staticmethod def upper(parameter_list): - return sp.percentile(parameter_list, + return np.percentile(parameter_list, 100 - PercentileDistance.PERCENTILE) @staticmethod def lower(parameter_list): - return sp.percentile(parameter_list, + return np.percentile(parameter_list, PercentileDistance.PERCENTILE) def get_config(self): diff --git a/pyabc/epsilon/epsilon.py b/pyabc/epsilon/epsilon.py index e5a942740..1bca9fa34 100644 --- a/pyabc/epsilon/epsilon.py +++ b/pyabc/epsilon/epsilon.py @@ -1,4 +1,4 @@ -import scipy as sp +import numpy as np import pandas as pd import logging from typing import Callable, List, Union @@ -218,7 +218,7 @@ def _update(self, weights /= weights.sum() else: len_distances = len(distances) - weights = sp.ones(len_distances) / len_distances + weights = np.ones(len_distances) / len_distances # compute weighted quantile quantile = weighted_quantile( diff --git a/pyabc/pyabc_rand_choice.py b/pyabc/pyabc_rand_choice.py index ec6660e00..f8eca3756 100644 --- a/pyabc/pyabc_rand_choice.py +++ b/pyabc/pyabc_rand_choice.py @@ -1,4 +1,4 @@ -import scipy as sp +import numpy as np def fast_random_choice(weights): @@ -9,7 +9,7 @@ def fast_random_choice(weights): of a factor of 2 """ cs = 0 - u = sp.random.rand() + u = np.random.rand() for k in range(weights.size): cs += weights[k] if u <= cs: diff --git a/pyabc/storage/history.py b/pyabc/storage/history.py index c96f5824b..36c2a71e2 100644 --- a/pyabc/storage/history.py +++ b/pyabc/storage/history.py @@ -4,7 +4,6 @@ import json import numpy as np import pandas as pd -import scipy as sp from sqlalchemy import func from sqlalchemy.orm import subqueryload from functools import wraps @@ -911,7 +910,7 @@ def get_weighted_sum_stats_for_model(self, m: int = 0, t: int = None) \ for ss in sample.summary_statistics: sum_stats[ss.name] = ss.value results.append(sum_stats) - return sp.array(weights), results + return np.array(weights), results @with_session def get_weighted_sum_stats(self, t: int = None) \ diff --git a/pyabc/transition/local_transition.py b/pyabc/transition/local_transition.py index 1db34313b..1a005beb7 100644 --- a/pyabc/transition/local_transition.py +++ b/pyabc/transition/local_transition.py @@ -1,5 +1,5 @@ import numpy.linalg as la -import scipy as sp +import numpy as np import pandas as pd from .base import Transition from scipy.spatial import cKDTree @@ -84,29 +84,29 @@ def fit(self, X, w): covs, inv_covs, dets = list(zip(*[self._cov_and_inv(n, indices) for n in range(X.shape[0])])) - self.covs = sp.array(covs) - self.inv_covs = sp.array(inv_covs) - self.determinants = sp.array(dets) + self.covs = np.array(covs) + self.inv_covs = np.array(inv_covs) + self.determinants = np.array(dets) - self.normalization = sp.sqrt( - (2 * sp.pi) ** self.X_arr.shape[1] * self.determinants) + self.normalization = np.sqrt( + (2 * np.pi) ** self.X_arr.shape[1] * self.determinants) - if not sp.isreal(self.normalization).all(): + if not np.isreal(self.normalization).all(): raise Exception("Normalization not real") - self.normalization = sp.real(self.normalization) + self.normalization = np.real(self.normalization) def pdf(self, x): x = x[self.X.columns].values if len(x.shape) == 1: return self._pdf_single(x) else: - return sp.array([self._pdf_single(x) for x in x]) + return np.array([self._pdf_single(x) for x in x]) def _pdf_single(self, x): distance = self.X_arr - x - cov_distance = sp.einsum("ij,ijk,ik->i", + cov_distance = np.einsum("ij,ijk,ik->i", distance, self.inv_covs, distance) - return sp.average(sp.exp(-.5 * cov_distance) / self.normalization, + return np.average(np.exp(-.5 * cov_distance) / self.normalization, weights=self.w) def _cov_and_inv(self, n, indices): @@ -117,7 +117,7 @@ def _cov_and_inv(self, n, indices): cov = self._cov(indices, n) det = la.det(cov) while det <= 0: - cov += sp.identity(cov.shape[0]) * self.EPS + cov += np.identity(cov.shape[0]) * self.EPS det = la.det(cov) inv_cov = la.inv(cov) return cov, inv_cov, det @@ -129,17 +129,17 @@ def _cov(self, indices, n): - self.X_arr[n]) local_weights = self.w[surrounding_indices] else: - nearest_vector_deltas = sp.absolute(self.X_arr) - local_weights = sp.array([1]) + nearest_vector_deltas = np.absolute(self.X_arr) + local_weights = np.array([1]) cov = smart_cov(nearest_vector_deltas, local_weights / local_weights.sum()) - if sp.absolute(cov.sum()) == 0: + if np.absolute(cov.sum()) == 0: for k in range(cov.shape[0]): - cov[k, k] = sp.absolute(self.X_arr[0, k]) + cov[k, k] = np.absolute(self.X_arr[0, k]) return cov * self.scaling def rvs_single(self): - support_index = sp.random.choice(self.w.shape[0], p=self.w) - sample = sp.random.multivariate_normal(self.X_arr[support_index], + support_index = np.random.choice(self.w.shape[0], p=self.w) + sample = np.random.multivariate_normal(self.X_arr[support_index], self.covs[support_index]) return pd.Series(sample, index=self.X.columns) diff --git a/test/test_bytesstorage.py b/test/test_bytesstorage.py index 1af79db39..1b3c98897 100644 --- a/test/test_bytesstorage.py +++ b/test/test_bytesstorage.py @@ -3,7 +3,6 @@ from pyabc.storage.numpy_bytes_storage import _primitive_types import pandas as pd import numpy as np -import scipy as sp from rpy2.robjects import r import rpy2.robjects as robjects from rpy2.robjects import pandas2ri @@ -38,24 +37,24 @@ def object_(request): if par == "empty": return pd.DataFrame() if par == "df-int": - return pd.DataFrame({"a": sp.random.randint(-20, 20, 100), - "b": sp.random.randint(-20, 20, 100)}) + return pd.DataFrame({"a": np.random.randint(-20, 20, 100), + "b": np.random.randint(-20, 20, 100)}) if par == "df-float": - return pd.DataFrame({"a": sp.randn(100), - "b": sp.randn(100)}) + return pd.DataFrame({"a": np.random.randn(100), + "b": np.random.randn(100)}) if par == "df-non_numeric_str": return pd.DataFrame({"a": ["foo", "bar"], "b": ["bar", "foo"]}) if par == "df-numeric_str": - return pd.DataFrame({"a": list(map(str, sp.randn(100))), + return pd.DataFrame({"a": list(map(str, np.random.randn(100))), "b": list(map(str, - sp.random.randint(-20, 20, 100)))}) + np.random.randint(-20, 20, 100)))}) if par == "df-int-float-numeric_str": - return pd.DataFrame({"a": sp.random.randint(-20, 20, 100), - "b": sp.randn(100), + return pd.DataFrame({"a": np.random.randint(-20, 20, 100), + "b": np.random.randn(100), "c": list(map(str, - sp.random.randint(-20, 20, 100)))}) + np.random.randint(-20, 20, 100)))}) if par == "df-int-float-non_numeric_str-str_ind": return pd.DataFrame({"a": [1, 2], "b": [1.1, 2.2], @@ -69,7 +68,7 @@ def object_(request): if par == "series": return pd.Series({'a': 42, 'b': 3.8, 'c': 4.2}) if par == "series-no_ind": - return pd.Series(sp.randn(10)) + return pd.Series(np.random.randn(10)) if par == "py-int": return 42 if par == "py-float": @@ -77,9 +76,9 @@ def object_(request): if par == "py-str": return "foo bar" if par == "np-int": - return sp.random.randint(-20, 20, 100) + return np.random.randint(-20, 20, 100) if par == "np-float": - return sp.random.randn(100) + return np.random.randn(100) if par == "np-str": return np.array(["foo", "bar"]) if par == "np-single-int": @@ -151,7 +150,7 @@ def _check_type(object_, rebuilt): def test_reference_parameter(): def model(parameter): - return {"data": parameter["mean"] + 0.5 * sp.randn()} + return {"data": parameter["mean"] + 0.5 * np.random.randn()} prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 5), p1=pyabc.RV("uniform", 0, 1)) diff --git a/test/test_dataframeserialization.py b/test/test_dataframeserialization.py index 17e4c1b49..db19438da 100644 --- a/test/test_dataframeserialization.py +++ b/test/test_dataframeserialization.py @@ -1,7 +1,7 @@ import pytest from pyabc.storage.dataframe_bytes_storage import df_to_bytes, df_from_bytes import pandas as pd -import scipy as sp +import numpy as np @pytest.fixture(params=["empty", "int", "float", "non_numeric_str", @@ -13,24 +13,24 @@ def df(request): if par == "empty": return pd.DataFrame() if par == "int": - return pd.DataFrame({"a": sp.random.randint(-20, 20, 100), - "b": sp.random.randint(-20, 20, 100)}) + return pd.DataFrame({"a": np.random.randint(-20, 20, 100), + "b": np.random.randint(-20, 20, 100)}) if par == "float": - return pd.DataFrame({"a": sp.randn(100), - "b": sp.randn(100)}) + return pd.DataFrame({"a": np.random.randn(100), + "b": np.random.randn(100)}) if par == "non_numeric_str": return pd.DataFrame({"a": ["foo", "bar"], "b": ["bar", "foo"]}) if par == "numeric_str": - return pd.DataFrame({"a": list(map(str, sp.randn(100))), + return pd.DataFrame({"a": list(map(str, np.random.randn(100))), "b": list(map(str, - sp.random.randint(-20, 20, 100)))}) + np.random.randint(-20, 20, 100)))}) if par == "int-float-numeric_str": - return pd.DataFrame({"a": sp.random.randint(-20, 20, 100), - "b": sp.randn(100), + return pd.DataFrame({"a": np.random.randint(-20, 20, 100), + "b": np.random.randn(100), "c": list(map(str, - sp.random.randint(-20, 20, 100)))}) + np.random.randint(-20, 20, 100)))}) if par == "int-float-non_numeric_str-str_ind": return pd.DataFrame({"a": [1, 2], "b": [1.1, 2.2], diff --git a/test/test_distance_function.py b/test/test_distance_function.py index a66f6fec4..d3f8f1f2b 100644 --- a/test/test_distance_function.py +++ b/test/test_distance_function.py @@ -67,7 +67,7 @@ def test_single_parameter_percentile(): dist_f.initialize(0, abc.sample_from_prior) d = dist_f({"a": 1}, {"a": 2}) expected = ( - 1 / (sp.percentile([-3, 3, 10], 80) - sp.percentile([-3, 3, 10], 20)) + 1 / (np.percentile([-3, 3, 10], 80) - np.percentile([-3, 3, 10], 20)) ) assert expected == d