-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
# Conflicts: # dist/lightautoml-0.3.8b1-py3-none-any.whl
- Loading branch information
Showing
8 changed files
with
2,053 additions
and
440 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,4 +2,4 @@ | |
from .matcher import Matcher | ||
|
||
|
||
__all__ = ["Matcher"] | ||
__all__ = ["Matcher"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
from ..matcher import Matcher | ||
|
||
__all__ = ["Matcher"] | ||
__all__ = ["Matcher"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,78 +1,72 @@ | ||
import pandas as pd | ||
import pytest | ||
|
||
from lightautoml.addons.hypex.ABTesting.ab_tester import AATest | ||
from lightautoml.addons.hypex.utils.tutorial_data_creation import create_test_data | ||
|
||
|
||
def test_aa_simple(): | ||
data = create_test_data(rs=52) | ||
info_col = "user_id" | ||
iterations = 20 | ||
@pytest.fixture | ||
def data(): | ||
return create_test_data(rs=52) | ||
|
||
model = AATest( | ||
data=data, | ||
target_fields=["pre_spends", "post_spends"], | ||
info_cols=info_col | ||
) | ||
res, datas_dict = model.search_dist_uniform_sampling(iterations=iterations) | ||
|
||
@pytest.fixture | ||
def iterations(): | ||
return 20 | ||
|
||
|
||
@pytest.fixture | ||
def info_col(): | ||
return "user_id" | ||
|
||
|
||
def test_aa_simple(data, iterations, info_col): | ||
model = AATest(target_fields=["pre_spends", "post_spends"], info_cols=info_col) | ||
res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
|
||
assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
assert res.shape[0] == iterations, "Metrics dataframe contains more or less rows with random states " \ | ||
"(#rows should be equal #of experiments" | ||
assert info_col not in model.data, "Info_col is take part in experiment, it should be deleted in preprocess" | ||
assert res.shape[0] == iterations, ( | ||
"Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
) | ||
assert isinstance(datas_dict, dict), "Result is not dict" | ||
assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
assert all(data.columns) == all(datas_dict[0].drop(columns=['group']).columns), \ | ||
"Columns in the result are not the same as columns in initial data " | ||
assert all(data.columns) == all( | ||
datas_dict[0].drop(columns=["group"]).columns | ||
), "Columns in the result are not the same as columns in initial data " | ||
|
||
|
||
def test_aa_group(): | ||
data = create_test_data(rs=52) | ||
info_col = "user_id" | ||
group_cols = 'industry' | ||
iterations = 20 | ||
def test_aa_group(data, iterations, info_col): | ||
group_cols = "industry" | ||
|
||
model = AATest( | ||
data=data, | ||
target_fields=["pre_spends", "post_spends"], | ||
info_cols=info_col, | ||
group_cols=group_cols | ||
) | ||
res, datas_dict = model.search_dist_uniform_sampling(iterations=iterations) | ||
model = AATest(target_fields=["pre_spends", "post_spends"], info_cols=info_col, group_cols=group_cols) | ||
res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
|
||
assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
assert res.shape[0] == iterations, "Metrics dataframe contains more or less rows with random states " \ | ||
"(#rows should be equal #of experiments" | ||
assert info_col not in model.data, "Info_col is take part in experiment, it should be deleted in preprocess" | ||
assert res.shape[0] == iterations, ( | ||
"Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
) | ||
assert isinstance(datas_dict, dict), "Result is not dict" | ||
assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
assert all(data.columns) == all(datas_dict[0].drop(columns=['group']).columns), "Columns in the result are not " \ | ||
"the same as columns in initial " \ | ||
"data " | ||
assert all(data.columns) == all(datas_dict[0].drop(columns=["group"]).columns), ( | ||
"Columns in the result are not " "the same as columns in initial " "data " | ||
) | ||
|
||
|
||
def test_aa_quantfields(): | ||
data = create_test_data(rs=52) | ||
info_col = "user_id" | ||
group_cols = 'industry' | ||
quant_field = 'gender' | ||
iterations = 20 | ||
def test_aa_quantfields(data, iterations, info_col): | ||
group_cols = "industry" | ||
quant_field = "gender" | ||
|
||
model = AATest( | ||
data=data, | ||
target_fields=["pre_spends", "post_spends"], | ||
info_cols=info_col, | ||
group_cols=group_cols, | ||
quant_field=quant_field | ||
target_fields=["pre_spends", "post_spends"], info_cols=info_col, group_cols=group_cols, quant_field=quant_field | ||
) | ||
res, datas_dict = model.search_dist_uniform_sampling(iterations=iterations) | ||
res, datas_dict = model.search_dist_uniform_sampling(data, iterations=iterations) | ||
|
||
assert isinstance(res, pd.DataFrame), "Metrics are not dataframes" | ||
assert res.shape[0] == iterations, "Metrics dataframe contains more or less rows with random states " \ | ||
"(#rows should be equal #of experiments" | ||
assert info_col not in model.data, "Info_col is take part in experiment, it should be deleted in preprocess" | ||
assert res.shape[0] == iterations, ( | ||
"Metrics dataframe contains more or less rows with random states " "(#rows should be equal #of experiments" | ||
) | ||
assert isinstance(datas_dict, dict), "Result is not dict" | ||
assert len(datas_dict) == iterations, "# of dataframes is not equal # of iterations" | ||
assert all(data.columns) == all(datas_dict[0].drop(columns=['group']).columns), "Columns in the result are not " \ | ||
"the same as columns in initial " \ | ||
"data " | ||
|
||
assert all(data.columns) == all(datas_dict[0].drop(columns=["group"]).columns), ( | ||
"Columns in the result are not " "the same as columns in initial " "data " | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,69 +1,92 @@ | ||
from lightautoml.addons.hypex.ABTesting.ab_tester import ABTest | ||
from lightautoml.addons.hypex.utils.tutorial_data_creation import create_test_data | ||
|
||
|
||
# def test_split_ab(): | ||
# data = create_test_data() | ||
# half_data = int(data.shape[0] / 2) | ||
# data['group'] = ['test'] * half_data + ['control'] * half_data | ||
# | ||
# group_field = 'group' | ||
# | ||
# model = ABTest() | ||
# splitted_data = model.split_ab(data, group_field) | ||
# | ||
# assert isinstance(splitted_data, dict), "result of split_ab is not dict" | ||
# assert len(splitted_data) == 2, "split_ab contains not of 2 values" | ||
# assert list(splitted_data.keys()) == ['test', 'control'], "changed keys in result of split_ab" | ||
# | ||
# | ||
# def test_calc_difference(): | ||
# data = create_test_data() | ||
# half_data = int(data.shape[0] / 2) | ||
# data['group'] = ['test'] * half_data + ['control'] * half_data | ||
# | ||
# group_field = 'group' | ||
# target_field = 'post_spends' | ||
# | ||
# model = ABTest() | ||
# splitted_data = model.split_ab(data, group_field) | ||
# differences = model.calc_difference(splitted_data, target_field) | ||
# | ||
# assert isinstance(differences, dict), "result of calc_difference is not dict" | ||
|
||
|
||
def test_calc_p_value(): | ||
data = create_test_data() | ||
half_data = int(data.shape[0] / 2) | ||
data['group'] = ['test'] * half_data + ['control'] * half_data | ||
|
||
group_field = 'group' | ||
target_field = 'post_spends' | ||
|
||
model = ABTest() | ||
splitted_data = model.split_ab(data, group_field) | ||
pvalues = model.calc_p_value(splitted_data, target_field) | ||
|
||
assert isinstance(pvalues, dict), "result of calc_p_value is not dict" | ||
|
||
|
||
def test_execute(): | ||
data = create_test_data() | ||
half_data = int(data.shape[0] / 2) | ||
data['group'] = ['test'] * half_data + ['control'] * half_data | ||
|
||
target_field = 'post_spends' | ||
target_field_before = 'pre_spends' | ||
group_field = 'group' | ||
|
||
model = ABTest() | ||
result = model.execute( | ||
data=data, | ||
target_field=target_field, | ||
target_field_before=target_field_before, | ||
group_field=group_field | ||
|
||
import pytest | ||
import pandas as pd | ||
import numpy as np | ||
|
||
DATA_SIZE = 100 | ||
|
||
|
||
@pytest.fixture | ||
def ab_test(): | ||
return ABTest() | ||
|
||
|
||
@pytest.fixture | ||
def data(): | ||
# Generate synthetic data for group A | ||
group_a_data = np.random.normal(loc=10, scale=2, size=DATA_SIZE) | ||
# Generate synthetic data for group B | ||
group_b_data = np.random.normal(loc=12, scale=2, size=DATA_SIZE) | ||
group_bp_data = np.random.normal(loc=10, scale=2, size=DATA_SIZE * 2) | ||
return pd.DataFrame( | ||
{ | ||
"group": ["control"] * len(group_a_data) + ["test"] * len(group_b_data), | ||
"value": list(group_a_data) + list(group_b_data), | ||
"previous_value": group_bp_data, | ||
} | ||
) | ||
|
||
assert isinstance(result, dict), "result of func execution is not dict" | ||
assert len(result) == 3, "result of execution is changed, len of dict was 3" | ||
assert list(result.keys()) == ['size', 'difference', 'p_value'] | ||
|
||
@pytest.fixture | ||
def target_field(): | ||
return "value" | ||
|
||
|
||
@pytest.fixture | ||
def group_field(): | ||
return "group" | ||
|
||
|
||
@pytest.fixture | ||
def previous_value(): | ||
return "previous_value" | ||
|
||
|
||
@pytest.fixture | ||
def alpha(): | ||
return 0.05 | ||
|
||
|
||
def test_split_ab(ab_test, data, group_field): | ||
result = ab_test.split_ab(data, group_field) | ||
assert len(result["test"]) == DATA_SIZE | ||
assert len(result["control"]) == DATA_SIZE | ||
|
||
|
||
def test_calc_difference(ab_test, data, group_field, target_field, previous_value): | ||
splitted_data = ab_test.split_ab(data, group_field) | ||
result = ab_test.calc_difference(splitted_data, target_field, previous_value) | ||
assert 1 < result["ate"] < 3 | ||
assert 1 < result["cuped"] < 3 | ||
assert 1 < result["diff_in_diff"] < 3 | ||
|
||
|
||
def test_calc_difference_with_previous_value(ab_test, data, group_field, target_field, previous_value): | ||
ab_test.calc_difference_method = "ate" | ||
splitted_data = ab_test.split_ab(data, group_field) | ||
result = ab_test.calc_difference(splitted_data, previous_value) | ||
assert -1 < result["ate"] < 1 | ||
|
||
|
||
def test_calc_p_value(ab_test, data, group_field, target_field, previous_value, alpha): | ||
splitted_data = ab_test.split_ab(data, group_field) | ||
result = ab_test.calc_p_value(splitted_data, target_field) | ||
assert result["t_test"] < alpha | ||
assert result["mann_whitney"] < alpha | ||
|
||
result = ab_test.calc_p_value(splitted_data, previous_value) | ||
assert result["t_test"] > alpha | ||
assert result["mann_whitney"] > alpha | ||
|
||
|
||
def test_execute(ab_test, data, group_field, target_field, previous_value, alpha): | ||
result = ab_test.execute(data, target_field, group_field, previous_value) | ||
print(result) | ||
assert result["size"]["test"] == DATA_SIZE | ||
assert result["size"]["control"] == DATA_SIZE | ||
assert 1 < result["difference"]["ate"] < 3 | ||
assert 1 < result["difference"]["cuped"] < 3 | ||
assert 1 < result["difference"]["diff_in_diff"] < 3 | ||
assert result["p_value"]["t_test"] < alpha | ||
assert result["p_value"]["mann_whitney"] < alpha |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters