Skip to content

Commit

Permalink
Scalers: Add test of minmaxscaler when the feature is Null
Browse files Browse the repository at this point in the history
  • Loading branch information
lucianolorenti committed Jun 17, 2024
1 parent 33e8146 commit 1ad630c
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 3 deletions.
2 changes: 0 additions & 2 deletions ceruleo/transformation/features/scalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
try:
divisor = self.data_max - self.data_min



mask = np.abs((divisor)) > 1e-25
X = X.astype(float)
X.loc[:, mask] = (
Expand Down
53 changes: 52 additions & 1 deletion tests/test_scalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,61 @@
import numpy as np
import pandas as pd

from ceruleo.transformation.features.scalers import (RobustMinMaxScaler)
from ceruleo.transformation.features.scalers import (MinMaxScaler, RobustMinMaxScaler)
from sklearn.preprocessing import RobustScaler

class TestImputers():
def test_MinMaxScaler_withNonde(self):
scaler = MinMaxScaler(range=(-1, 1), clip=False)


df1 = pd.DataFrame({
'a': [None] * 6000,
'b': np.random.randn(6000)*5 + 25
})
scaler.partial_fit(df1)


df2 = pd.DataFrame({
'a': [None] * 6000,
'b': np.random.randn(6000)*3 + 88
})
scaler.partial_fit(df2)

scaled_df1 = scaler.transform(df1)
scaled_df2 = scaler.transform(df2)
assert np.all(scaled_df1['b'] >= -1) and np.all(scaled_df1['b'] <= 1)


def test_MinMaxScaler(self):

scaler = MinMaxScaler(range=(-1, 1), clip=False)


df1 = pd.DataFrame({
'a': np.random.randn(6000)*5 + 25,
'b': np.random.randn(6000)*5 + 25
})
scaler.partial_fit(df1)


df2 = pd.DataFrame({
'a': np.random.randn(6000)*3 + 15,
'b': np.random.randn(6000)*3 + 88
})
scaler.partial_fit(df2)

scaled_df1 = scaler.transform(df1)
scaled_df2 = scaler.transform(df2)
assert np.all(scaled_df1['a'] >= -1) and np.all(scaled_df1['a'] <= 1)
assert np.all(scaled_df1['b'] >= -1) and np.all(scaled_df1['b'] <= 1)
assert np.all(scaled_df2['a'] >= -1) and np.all(scaled_df2['a'] <= 1)
assert np.all(scaled_df2['b'] >= -1) and np.all(scaled_df2['b'] <= 1)






def test_RobustMinMaxScaler(self):

Expand Down

0 comments on commit 1ad630c

Please sign in to comment.