From e601c2dd116858e3173d845da4ae7157843364e4 Mon Sep 17 00:00:00 2001 From: Stefano Zamboni <39366866+SteZamboni@users.noreply.github.com> Date: Fri, 19 Jul 2024 09:01:35 +0200 Subject: [PATCH 1/2] fix: added psi to enum drift dto (#124) --- api/app/models/metrics/drift_dto.py | 1 + 1 file changed, 1 insertion(+) diff --git a/api/app/models/metrics/drift_dto.py b/api/app/models/metrics/drift_dto.py index 09ada9b2..cee2d971 100644 --- a/api/app/models/metrics/drift_dto.py +++ b/api/app/models/metrics/drift_dto.py @@ -10,6 +10,7 @@ class DriftAlgorithm(str, Enum): KS = 'KS' CHI2 = 'CHI2' + PSI = 'PSI' class FeatureDriftCalculation(BaseModel): From 62f3852cece909311e145fa6e1e5a16ee249e094 Mon Sep 17 00:00:00 2001 From: Stefano Zamboni <39366866+SteZamboni@users.noreply.github.com> Date: Fri, 19 Jul 2024 09:09:42 +0200 Subject: [PATCH 2/2] feat: added CHI2 drift to float columns when cardinality < 15 (#121) * feat: added CHI2 drift to float columns when cardinality < 15 * feat: fixed tests * feat: removed print --- spark/jobs/metrics/drift_calculator.py | 53 +++++++++++++++---- .../tests/results/drift_calculator_results.py | 48 ----------------- spark/tests/results/jobs_results.py | 8 +-- .../results/regression_current_results.py | 4 -- 4 files changed, 48 insertions(+), 65 deletions(-) diff --git a/spark/jobs/metrics/drift_calculator.py b/spark/jobs/metrics/drift_calculator.py index ad056477..1a7fadff 100644 --- a/spark/jobs/metrics/drift_calculator.py +++ b/spark/jobs/metrics/drift_calculator.py @@ -63,18 +63,53 @@ def calculate_drift( for column in float_features: feature_dict_to_append = { "feature_name": column, - "drift_calc": { - "type": "KS", - }, + "drift_calc": {}, } - result_tmp = ks.test(column, column) - feature_dict_to_append["drift_calc"]["value"] = float( - result_tmp["ks_statistic"] + unique_values_ref = ( + reference_dataset.reference.select(column) + .distinct() + .rdd.flatMap(lambda x: x) + .collect() ) - feature_dict_to_append["drift_calc"]["has_drift"] = bool( - result_tmp["ks_statistic"] > result_tmp["critical_value"] + unique_values_cur = ( + current_dataset.current.select(column) + .distinct() + .rdd.flatMap(lambda x: x) + .collect() ) - drift_result["feature_metrics"].append(feature_dict_to_append) + unique_values_refcur = unique_values_ref + unique_values_cur + lookup = set() + unique_values_tot = [ + x + for x in unique_values_refcur + if x is not None and x not in lookup and lookup.add(x) is None + ] + if len(unique_values_tot) < 15: + feature_dict_to_append["drift_calc"]["type"] = "CHI2" + if ( + reference_dataset.reference_count > 5 + and current_dataset.current_count > 5 + ): + result_tmp = chi2.test(column, column) + feature_dict_to_append["drift_calc"]["value"] = float( + result_tmp["pValue"] + ) + feature_dict_to_append["drift_calc"]["has_drift"] = bool( + result_tmp["pValue"] <= 0.05 + ) + else: + feature_dict_to_append["drift_calc"]["value"] = None + feature_dict_to_append["drift_calc"]["has_drift"] = False + else: + feature_dict_to_append["drift_calc"]["type"] = "KS" + result_tmp = ks.test(column, column) + feature_dict_to_append["drift_calc"]["value"] = float( + result_tmp["ks_statistic"] + ) + feature_dict_to_append["drift_calc"]["has_drift"] = bool( + result_tmp["ks_statistic"] > result_tmp["critical_value"] + ) + drift_result["feature_metrics"].append(feature_dict_to_append) int_features = [ int_f.name for int_f in reference_dataset.model.get_int_features() diff --git a/spark/tests/results/drift_calculator_results.py b/spark/tests/results/drift_calculator_results.py index 094914e0..f2ccdc46 100644 --- a/spark/tests/results/drift_calculator_results.py +++ b/spark/tests/results/drift_calculator_results.py @@ -16,14 +16,6 @@ "has_drift": False, }, }, - { - "feature_name": "num1", - "drift_calc": {"type": "KS", "value": 0.9, "has_drift": True}, - }, - { - "feature_name": "num2", - "drift_calc": {"type": "KS", "value": 0.3, "has_drift": False}, - }, ] } @@ -37,14 +29,6 @@ "feature_name": "cat2", "drift_calc": {"type": "CHI2", "value": None, "has_drift": False}, }, - { - "feature_name": "num1", - "drift_calc": {"type": "KS", "value": 0.75, "has_drift": False}, - }, - { - "feature_name": "num2", - "drift_calc": {"type": "KS", "value": 0.7, "has_drift": False}, - }, ] } @@ -66,14 +50,6 @@ "has_drift": True, }, }, - { - "feature_name": "num1", - "drift_calc": {"type": "KS", "value": 0.4, "has_drift": False}, - }, - { - "feature_name": "num2", - "drift_calc": {"type": "KS", "value": 0.3, "has_drift": False}, - }, ] } @@ -95,31 +71,11 @@ "has_drift": False, }, }, - { - "feature_name": "num1", - "drift_calc": { - "type": "KS", - "value": 0.9230769231, - "has_drift": True, - }, - }, - { - "feature_name": "num2", - "drift_calc": { - "type": "KS", - "value": 0.5384615385, - "has_drift": False, - }, - }, ] } test_drift_bike_res = { "feature_metrics": [ - { - "feature_name": "weathersit", - "drift_calc": {"type": "KS", "value": 0.6219091927, "has_drift": True}, - }, { "feature_name": "temp", "drift_calc": {"type": "KS", "value": 0.5259741552, "has_drift": True}, @@ -241,10 +197,6 @@ "feature_name": "primary_camera_front", "drift_calc": {"type": "KS", "value": 0.3139650146, "has_drift": True}, }, - { - "feature_name": "extended_upto", - "drift_calc": {"type": "KS", "value": 0.5237507289, "has_drift": True}, - }, { "feature_name": "price", "drift_calc": {"type": "PSI", "value": 0.0, "has_drift": False}, diff --git a/spark/tests/results/jobs_results.py b/spark/tests/results/jobs_results.py index ddf6bcfb..305b78bb 100644 --- a/spark/tests/results/jobs_results.py +++ b/spark/tests/results/jobs_results.py @@ -2,7 +2,7 @@ "MODEL_QUALITY": '{"global_metrics":{"f1":1.0,"accuracy":1.0,"weighted_precision":1.0,"weighted_recall":1.0,"weighted_true_positive_rate":1.0,"weighted_false_positive_rate":null,"weighted_f_measure":1.0,"true_positive_rate":1.0,"false_positive_rate":null,"precision":1.0,"recall":1.0,"f_measure":1.0,"true_positive_count":7,"false_positive_count":0,"true_negative_count":0,"false_negative_count":0,"area_under_roc":1.0,"area_under_pr":1.0},"grouped_metrics":{"f1":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"accuracy":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"weighted_precision":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"weighted_recall":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"weighted_true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"weighted_false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":null}],"weighted_f_measure":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":null}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"area_under_roc":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"area_under_pr":[{"timestamp":"2024-06-16 00:00:00","value":1.0}]}}', "STATISTICS": '{"n_variables":8,"n_observations":7,"missing_cells":0,"missing_cells_perc":0.0,"duplicate_rows":0,"duplicate_rows_perc":0.0,"numeric":4,"categorical":3,"datetime":1}', "DATA_QUALITY": '{"n_observations":7,"class_metrics":[{"name":"1.0","count":7,"percentage":100.0},{"name":"0.0","count":0,"percentage":0.0}],"class_metrics_prediction":[{"name":"1.0","count":7,"percentage":100.0},{"name":"0.0","count":0,"percentage":0.0}],"feature_metrics":[{"feature_name":"num1","type":"numerical","missing_value":{"count":0,"percentage":0.0},"mean":1.0,"std":0.0,"min":1.0,"max":1.0,"median_metrics":{"perc_25":1.0,"median":1.0,"perc_75":1.0},"class_median_metrics":[],"histogram":{"buckets":[1.0,1.0],"reference_values":[7],"current_values":[7]}},{"feature_name":"num2","type":"numerical","missing_value":{"count":0,"percentage":0.0},"mean":100.0,"std":0.0,"min":100.0,"max":100.0,"median_metrics":{"perc_25":100.0,"median":100.0,"perc_75":100.0},"class_median_metrics":[],"histogram":{"buckets":[100.0,100.0],"reference_values":[7],"current_values":[7]}},{"feature_name":"cat1","type":"categorical","missing_value":{"count":0,"percentage":0.0},"category_frequency":[{"name":"F","count":1,"frequency":0.14285714285714285},{"name":"E","count":1,"frequency":0.14285714285714285},{"name":"B","count":1,"frequency":0.14285714285714285},{"name":"D","count":1,"frequency":0.14285714285714285},{"name":"C","count":1,"frequency":0.14285714285714285},{"name":"A","count":1,"frequency":0.14285714285714285},{"name":"G","count":1,"frequency":0.14285714285714285}],"distinct_value":7},{"feature_name":"cat2","type":"categorical","missing_value":{"count":0,"percentage":0.0},"category_frequency":[{"name":"F","count":1,"frequency":0.14285714285714285},{"name":"E","count":1,"frequency":0.14285714285714285},{"name":"B","count":1,"frequency":0.14285714285714285},{"name":"D","count":1,"frequency":0.14285714285714285},{"name":"C","count":1,"frequency":0.14285714285714285},{"name":"A","count":1,"frequency":0.14285714285714285},{"name":"G","count":1,"frequency":0.14285714285714285}],"distinct_value":7}]}', - "DRIFT": '{"feature_metrics":[{"feature_name":"cat1","drift_calc":{"type":"CHI2","value":0.22696283284780316,"has_drift":false}},{"feature_name":"cat2","drift_calc":{"type":"CHI2","value":0.22696283284780316,"has_drift":false}},{"feature_name":"num1","drift_calc":{"type":"KS","value":0.8571428571,"has_drift":true}},{"feature_name":"num2","drift_calc":{"type":"KS","value":0.8571428571,"has_drift":true}}]}', + "DRIFT": '{"feature_metrics":[{"feature_name":"cat1","drift_calc":{"type":"CHI2","value":0.22696283284780316,"has_drift":false}},{"feature_name":"cat2","drift_calc":{"type":"CHI2","value":0.22696283284780316,"has_drift":false}}]}', } test_bc_complete_reference_res = { @@ -28,7 +28,7 @@ "STATISTICS": '{"n_variables":7,"n_observations":10,"missing_cells":3,"missing_cells_perc":4.285714285714286,"duplicate_rows":0,"duplicate_rows_perc":0.0,"numeric":4,"categorical":2,"datetime":1}', "DATA_QUALITY": '{"n_observations":10,"class_metrics":[{"name":"0","count":3,"percentage":30.0},{"name":"1","count":3,"percentage":30.0},{"name":"2","count":3,"percentage":30.0},{"name":"3","count":1,"percentage":10.0}],"class_metrics_prediction":[{"name":"0","count":2,"percentage":20.0},{"name":"1","count":4,"percentage":40.0},{"name":"2","count":2,"percentage":20.0},{"name":"3","count":2,"percentage":20.0}],"feature_metrics":[{"feature_name":"num1","type":"numerical","missing_value":{"count":1,"percentage":10.0},"mean":1.1666666666666667,"std":0.75,"min":0.5,"max":3.0,"median_metrics":{"perc_25":1.0,"median":1.0,"perc_75":1.0},"class_median_metrics":[],"histogram":{"buckets":[0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.25,2.5,2.75,3.0],"reference_values":[2,0,5,0,1,0,0,0,0,1],"current_values":[2,0,5,0,1,0,0,0,0,1]}},{"feature_name":"num2","type":"numerical","missing_value":{"count":2,"percentage":20.0},"mean":277.675,"std":201.88635947695215,"min":1.4,"max":499.0,"median_metrics":{"perc_25":117.25,"median":250.0,"perc_75":499.0},"class_median_metrics":[],"histogram":{"buckets":[1.4,51.160000000000004,100.92000000000002,150.68000000000004,200.44000000000003,250.20000000000002,299.96000000000004,349.72,399.48,449.24,499.0],"reference_values":[1,1,1,1,0,0,1,0,0,3],"current_values":[1,1,1,1,0,0,1,0,0,3]}},{"feature_name":"cat1","type":"categorical","missing_value":{"count":0,"percentage":0.0},"category_frequency":[{"name":"B","count":4,"frequency":0.4},{"name":"C","count":1,"frequency":0.1},{"name":"A","count":5,"frequency":0.5}],"distinct_value":3},{"feature_name":"cat2","type":"categorical","missing_value":{"count":0,"percentage":0.0},"category_frequency":[{"name":"Y","count":1,"frequency":0.1},{"name":"X","count":9,"frequency":0.9}],"distinct_value":2}]}', "MODEL_QUALITY": '{"classes":["0","1","2","3"],"class_metrics":[{"class_name":"0","metrics":{"true_positive_rate":0.6666666666666666,"false_positive_rate":0.0,"precision":1.0,"recall":0.6666666666666666,"f_measure":0.8},"grouped_metrics":{"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.6666666666666666}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.0}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":0.6666666666666666}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":0.8}]}},{"class_name":"1","metrics":{"true_positive_rate":1.0,"false_positive_rate":0.14285714285714285,"precision":0.75,"recall":1.0,"f_measure":0.8571428571428571},"grouped_metrics":{"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.14285714285714285}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":0.75}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":0.8571428571428571}]}},{"class_name":"2","metrics":{"true_positive_rate":0.3333333333333333,"false_positive_rate":0.14285714285714285,"precision":0.5,"recall":0.3333333333333333,"f_measure":0.4},"grouped_metrics":{"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.3333333333333333}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.14285714285714285}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":0.5}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":0.3333333333333333}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":0.4}]}},{"class_name":"3","metrics":{"true_positive_rate":0.0,"false_positive_rate":0.2222222222222222,"precision":0.0,"recall":0.0,"f_measure":0.0},"grouped_metrics":{"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.0}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.2222222222222222}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":0.0}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":0.0}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":0.0}]}}],"global_metrics":{"f1":0.6171428571428572,"accuracy":0.6,"weighted_precision":0.675,"weighted_recall":0.6000000000000001,"weighted_true_positive_rate":0.6000000000000001,"weighted_false_positive_rate":0.10793650793650794,"weighted_f_measure":0.6171428571428572,"confusion_matrix":[[2.0,0.0,1.0,0.0],[0.0,3.0,0.0,0.0],[0.0,0.0,1.0,2.0],[0.0,1.0,0.0,0.0]]}}', - "DRIFT": '{"feature_metrics":[{"feature_name":"cat1","drift_calc":{"type":"CHI2","value":0.0004993992273872871,"has_drift":true}},{"feature_name":"cat2","drift_calc":{"type":"CHI2","value":0.0015654022580025018,"has_drift":true}},{"feature_name":"num1","drift_calc":{"type":"KS","value":0.4,"has_drift":false}},{"feature_name":"num2","drift_calc":{"type":"KS","value":0.3,"has_drift":false}}]}', + "DRIFT": '{"feature_metrics":[{"feature_name":"cat1","drift_calc":{"type":"CHI2","value":0.0004993992273872871,"has_drift":true}},{"feature_name":"cat2","drift_calc":{"type":"CHI2","value":0.0015654022580025018,"has_drift":true}}]}', } test_mc_target_int_reference_res = { @@ -41,7 +41,7 @@ "STATISTICS": '{"n_variables":7,"n_observations":10,"missing_cells":3,"missing_cells_perc":4.285714285714286,"duplicate_rows":0,"duplicate_rows_perc":0.0,"numeric":2,"categorical":4,"datetime":1}', "DATA_QUALITY": '{"n_observations":10,"class_metrics":[{"name":"HEALTHY","count":3,"percentage":30.0},{"name":"ORPHAN","count":1,"percentage":10.0},{"name":"UNHEALTHY","count":3,"percentage":30.0},{"name":"UNKNOWN","count":3,"percentage":30.0}],"class_metrics_prediction":[{"name":"HEALTHY","count":4,"percentage":40.0},{"name":"ORPHAN","count":2,"percentage":20.0},{"name":"UNHEALTHY","count":2,"percentage":20.0},{"name":"UNKNOWN","count":2,"percentage":20.0}],"feature_metrics":[{"feature_name":"num1","type":"numerical","missing_value":{"count":1,"percentage":10.0},"mean":1.1666666666666667,"std":0.75,"min":0.5,"max":3.0,"median_metrics":{"perc_25":1.0,"median":1.0,"perc_75":1.0},"class_median_metrics":[],"histogram":{"buckets":[0.5,0.75,1.0,1.25,1.5,1.75,2.0,2.25,2.5,2.75,3.0],"reference_values":[2,0,5,0,1,0,0,0,0,1],"current_values":[2,0,5,0,1,0,0,0,0,1]}},{"feature_name":"num2","type":"numerical","missing_value":{"count":2,"percentage":20.0},"mean":277.675,"std":201.88635947695215,"min":1.4,"max":499.0,"median_metrics":{"perc_25":117.25,"median":250.0,"perc_75":499.0},"class_median_metrics":[],"histogram":{"buckets":[1.4,51.160000000000004,100.92000000000002,150.68000000000004,200.44000000000003,250.20000000000002,299.96000000000004,349.72,399.48,449.24,499.0],"reference_values":[1,1,1,1,0,0,1,0,0,3],"current_values":[1,1,1,1,0,0,1,0,0,3]}},{"feature_name":"cat1","type":"categorical","missing_value":{"count":0,"percentage":0.0},"category_frequency":[{"name":"B","count":4,"frequency":0.4},{"name":"C","count":1,"frequency":0.1},{"name":"A","count":5,"frequency":0.5}],"distinct_value":3},{"feature_name":"cat2","type":"categorical","missing_value":{"count":0,"percentage":0.0},"category_frequency":[{"name":"Y","count":1,"frequency":0.1},{"name":"X","count":9,"frequency":0.9}],"distinct_value":2}]}', "MODEL_QUALITY": '{"classes":["HEALTHY","ORPHAN","UNHEALTHY","UNKNOWN"],"class_metrics":[{"class_name":"HEALTHY","metrics":{"true_positive_rate":1.0,"false_positive_rate":0.14285714285714285,"precision":0.75,"recall":1.0,"f_measure":0.8571428571428571},"grouped_metrics":{"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.14285714285714285}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":0.75}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":0.8571428571428571}]}},{"class_name":"ORPHAN","metrics":{"true_positive_rate":0.0,"false_positive_rate":0.2222222222222222,"precision":0.0,"recall":0.0,"f_measure":0.0},"grouped_metrics":{"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.0}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.2222222222222222}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":0.0}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":0.0}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":0.0}]}},{"class_name":"UNHEALTHY","metrics":{"true_positive_rate":0.6666666666666666,"false_positive_rate":0.0,"precision":1.0,"recall":0.6666666666666666,"f_measure":0.8},"grouped_metrics":{"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.6666666666666666}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.0}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":1.0}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":0.6666666666666666}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":0.8}]}},{"class_name":"UNKNOWN","metrics":{"true_positive_rate":0.3333333333333333,"false_positive_rate":0.14285714285714285,"precision":0.5,"recall":0.3333333333333333,"f_measure":0.4},"grouped_metrics":{"true_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.3333333333333333}],"false_positive_rate":[{"timestamp":"2024-06-16 00:00:00","value":0.14285714285714285}],"precision":[{"timestamp":"2024-06-16 00:00:00","value":0.5}],"recall":[{"timestamp":"2024-06-16 00:00:00","value":0.3333333333333333}],"f_measure":[{"timestamp":"2024-06-16 00:00:00","value":0.4}]}}],"global_metrics":{"f1":0.6171428571428572,"accuracy":0.6,"weighted_precision":0.6749999999999999,"weighted_recall":0.6000000000000001,"weighted_true_positive_rate":0.6000000000000001,"weighted_false_positive_rate":0.10793650793650793,"weighted_f_measure":0.6171428571428572,"confusion_matrix":[[3.0,0.0,0.0,0.0],[1.0,0.0,0.0,0.0],[0.0,0.0,2.0,1.0],[0.0,2.0,0.0,1.0]]}}', - "DRIFT": '{"feature_metrics":[{"feature_name":"cat1","drift_calc":{"type":"CHI2","value":0.0004993992273872871,"has_drift":true}},{"feature_name":"cat2","drift_calc":{"type":"CHI2","value":0.0015654022580025018,"has_drift":true}},{"feature_name":"num1","drift_calc":{"type":"KS","value":0.4,"has_drift":false}},{"feature_name":"num2","drift_calc":{"type":"KS","value":0.3,"has_drift":false}}]}', + "DRIFT": '{"feature_metrics":[{"feature_name":"cat1","drift_calc":{"type":"CHI2","value":0.0004993992273872871,"has_drift":true}},{"feature_name":"cat2","drift_calc":{"type":"CHI2","value":0.0015654022580025018,"has_drift":true}}]}', } test_mc_target_string_reference_res = { @@ -67,7 +67,7 @@ "STATISTICS": '{"n_variables":14,"n_observations":100,"missing_cells":7,"missing_cells_perc":0.5,"duplicate_rows":2,"duplicate_rows_perc":2.0,"numeric":13,"categorical":0,"datetime":1}', "DATA_QUALITY": '{"n_observations":100,"target_metrics":{"feature_name":"ground_truth","type":"numerical","missing_value":{"count":0,"percentage":0.0},"mean":288.63,"std":317.1797010012979,"min":9.0,"max":1651.0,"median_metrics":{"perc_25":82.0,"median":167.5,"perc_75":354.0},"histogram":{"buckets":[2.0,342.8,683.6,1024.4,1365.2,1706.0,2046.8000000000002,2387.6,2728.4,3069.2000000000003,3410.0],"reference_values":[204,144,165,89,44,23,26,22,9,5],"current_values":[74,14,8,2,2,0,0,0,0,0]}},"feature_metrics":[{"feature_name":"season","type":"numerical","missing_value":{"count":0,"percentage":0.0},"mean":1.21,"std":0.40936018074033254,"min":1.0,"max":2.0,"median_metrics":{"perc_25":1.0,"median":1.0,"perc_75":1.0},"class_median_metrics":[],"histogram":{"buckets":[1.0,1.3,1.6,1.9,2.2,2.5,2.8,3.1,3.4,3.6999999999999997,4.0],"reference_values":[181,0,0,184,0,0,188,0,0,178],"current_values":[79,0,0,21,0,0,0,0,0,0]}},{"feature_name":"yr","type":"numerical","missing_value":{"count":0,"percentage":0.0},"mean":0.0,"std":0.0,"min":0.0,"max":0.0,"median_metrics":{"perc_25":0.0,"median":0.0,"perc_75":0.0},"class_median_metrics":[],"histogram":{"buckets":[0.0,0.1,0.2,0.30000000000000004,0.4,0.5,0.6000000000000001,0.7000000000000001,0.8,0.9,1.0],"reference_values":[365,0,0,0,0,0,0,0,0,366],"current_values":[100,0,0,0,0,0,0,0,0,0]}},{"feature_name":"mnth","type":"numerical","missing_value":{"count":1,"percentage":1.0},"mean":2.242424242424242,"std":0.9803259463254868,"min":1.0,"max":4.0,"median_metrics":{"perc_25":1.0,"median":2.0,"perc_75":3.0},"class_median_metrics":[],"histogram":{"buckets":[1.0,2.1,3.2,4.300000000000001,5.4,6.5,7.6000000000000005,8.700000000000001,9.8,10.9,12.0],"reference_values":[119,62,60,62,60,62,62,60,62,122],"current_values":[57,32,10,0,0,0,0,0,0,0]}},{"feature_name":"holiday","type":"numerical","missing_value":{"count":1,"percentage":1.0},"mean":0.020202020202020204,"std":0.14140677897022574,"min":0.0,"max":1.0,"median_metrics":{"perc_25":0.0,"median":0.0,"perc_75":0.0},"class_median_metrics":[],"histogram":{"buckets":[0.0,0.1,0.2,0.30000000000000004,0.4,0.5,0.6000000000000001,0.7000000000000001,0.8,0.9,1.0],"reference_values":[710,0,0,0,0,0,0,0,0,21],"current_values":[97,0,0,0,0,0,0,0,0,2]}},{"feature_name":"weekday","type":"numerical","missing_value":{"count":1,"percentage":1.0},"mean":2.95959595959596,"std":1.9893553048571038,"min":0.0,"max":6.0,"median_metrics":{"perc_25":1.0,"median":3.0,"perc_75":5.0},"class_median_metrics":[],"histogram":{"buckets":[0.0,0.6,1.2,1.7999999999999998,2.4,3.0,3.5999999999999996,4.2,4.8,5.3999999999999995,6.0],"reference_values":[105,105,0,104,0,104,104,0,104,105],"current_values":[14,15,0,14,0,14,15,0,14,13]}},{"feature_name":"workingday","type":"numerical","missing_value":{"count":0,"percentage":0.0},"mean":0.7,"std":0.46056618647183833,"min":0.0,"max":1.0,"median_metrics":{"perc_25":0.0,"median":1.0,"perc_75":1.0},"class_median_metrics":[],"histogram":{"buckets":[0.0,0.1,0.2,0.30000000000000004,0.4,0.5,0.6000000000000001,0.7000000000000001,0.8,0.9,1.0],"reference_values":[231,0,0,0,0,0,0,0,0,500],"current_values":[30,0,0,0,0,0,0,0,0,70]}},{"feature_name":"weathersit","type":"numerical","missing_value":{"count":0,"percentage":0.0},"mean":1.45,"std":0.5573204290227127,"min":1.0,"max":3.0,"median_metrics":{"perc_25":1.0,"median":1.0,"perc_75":2.0},"class_median_metrics":[],"histogram":{"buckets":[1.0,1.2,1.4,1.6,1.8,2.0,2.2,2.4000000000000004,2.6,2.8,3.0],"reference_values":[463,0,0,0,0,247,0,0,0,21],"current_values":[58,0,0,0,0,39,0,0,0,3]}},{"feature_name":"temp","type":"numerical","missing_value":{"count":2,"percentage":2.0},"mean":0.28181619795918367,"std":0.10183360371563194,"min":0.0591304,"max":0.573333,"median_metrics":{"perc_25":0.19874975,"median":0.26749999999999996,"perc_75":0.3432335},"class_median_metrics":[],"histogram":{"buckets":[0.0591304,0.13938405999999998,0.21963771999999998,0.29989137999999993,0.38014503999999993,0.46039869999999994,0.5406523599999999,0.62090602,0.70115968,0.78141334,0.861667],"reference_values":[7,36,90,104,93,80,93,101,103,24],"current_values":[5,27,26,21,15,3,1,0,0,0]}},{"feature_name":"atemp","type":"numerical","missing_value":{"count":1,"percentage":1.0},"mean":0.28198808787878793,"std":0.09538584350774348,"min":0.0790696,"max":0.542929,"median_metrics":{"perc_25":0.2166045,"median":0.263879,"perc_75":0.339734},"class_median_metrics":[],"histogram":{"buckets":[0.0790696,0.15525223999999999,0.23143488,0.30761752,0.38380016,0.45998279999999997,0.53616544,0.61234808,0.6885307199999999,0.7647133599999999,0.840896],"reference_values":[11,34,97,99,98,93,122,112,57,8],"current_values":[8,21,35,18,13,3,1,0,0,0]}},{"feature_name":"hum","type":"numerical","missing_value":{"count":0,"percentage":0.0},"mean":0.5767590300000002,"std":0.17338158044464802,"min":0.0,"max":0.948261,"median_metrics":{"perc_25":0.4671605,"median":0.538125,"perc_75":0.6866479999999999},"class_median_metrics":[],"histogram":{"buckets":[0.0,0.09725,0.1945,0.29175,0.389,0.48625,0.5835,0.68075,0.778,0.8752500000000001,0.9725],"reference_values":[1,1,3,18,95,173,164,169,73,34],"current_values":[1,1,0,7,22,24,18,12,9,6]}},{"feature_name":"windspeed","type":"numerical","missing_value":{"count":1,"percentage":1.0},"mean":0.22147813232323232,"std":0.0817791420054435,"min":0.0454083,"max":0.507463,"median_metrics":{"perc_25":0.165519,"median":0.22015,"perc_75":0.2636855},"class_median_metrics":[],"histogram":{"buckets":[0.0223917,0.07089883,0.11940595999999999,0.16791309,0.21642022,0.26492735,0.31343447999999996,0.36194160999999997,0.41044874,0.45895587,0.507463],"reference_values":[26,99,191,173,124,62,35,14,6,1],"current_values":[2,5,18,22,29,12,4,5,1,1]}}]}', "MODEL_QUALITY": '{"global_metrics":{"mae":71.82559791564941,"mape":64.05699022707124,"mse":17820.506660010054,"rmse":133.49347047706138,"r2":0.8210737408739541,"adj_r2":0.7987079584831984,"variance":118288.02759401732,"residuals":{"ks":{"p_value":-2.220446049250313e-16,"statistic":0.6999575270692112},"correlation_coefficient":0.9312137386402216,"histogram":{"buckets":[-691.5699999999999,-601.625,-511.67999999999995,-421.73499999999996,-331.78999999999996,-241.84499999999997,-151.89999999999998,-61.95499999999993,27.99000000000001,117.93499999999995,207.88],"values":[1,1,1,2,3,20,62,6,4]},"standardized_residuals":[0.4446159899234772,0.4991498291492462,0.21789957582950592,0.0040146526880562305,0.5322710871696472,0.13537704944610596,-0.46778324246406555,0.30202603340148926,0.3025874197483063,0.3220752477645874,0.3298543393611908,0.2805332839488983,0.11059622466564178,0.3782130181789398,0.2687443494796753,-0.7558501958847046,-0.2531765401363373,0.12960287928581238,0.38510993123054504,-0.5215151309967041,0.22142823040485382,0.4759729504585266,0.5447818040847778,0.32175445556640625,0.14580263197422028,-0.6692376732826233,0.239151731133461,-0.055330995470285416,0.33458593487739563,-0.6881641149520874,0.20859673619270325,0.2835005819797516,-0.34492170810699463,-0.41509392857551575,0.2987379729747772,-0.14009903371334076,0.23297657072544098,0.34469074010849,0.20298296213150024,0.5024378895759583,0.6636335253715515,-0.9259476661682129,-1.318992257118225,-0.24852512776851654,0.35623908042907715,-0.21764935553073883,-2.853398084640503,-3.805013418197632,1.6875865459442139,-0.3142865300178528,0.464023619890213,0.3198297321796417,0.13216917216777802,-0.6799840331077576,0.32648608088493347,-0.07842767983675003,0.10426067560911179,0.10426067560911179,0.19929391145706177,0.2117244154214859,0.21413032710552216,0.21413032710552216,0.5585756897926331,-0.10376987606287003,-3.4072372913360596,0.6151946187019348,0.4379596412181854,0.47204330563545227,-2.8373587131500244,0.2397933006286621,-0.4462102949619293,0.4345111846923828,0.52144455909729,0.6685255169868469,0.6502406597137451,0.7085437178611755,-5.151036739349365,-1.2634960412979126,-0.25462010502815247,1.2400882244110107,-0.019322622567415237,0.27876895666122437,0.3636171817779541,0.642541766166687,2.0622661113739014,0.43707749247550964,-0.19054283201694489,-0.05228351429104805,-0.007694082800298929,0.45030996203422546,0.43411019444465637,1.5360946655273438,1.5385807752609253,-0.3092341423034668,-0.12959325313568115,0.10827051848173141,0.46995818614959717,-0.37651926279067993,0.4154243469238281,0.020535197108983994],"predictions":[113.83,95.03,104.1,136.77,130.9,100.39,161.6,52.61,54.54,34.11,46.14,68.29,257.48,253.11,132.76,152.52,158.84,116.11,76.25,207.3,171.66,75.92,167.34,43.15,46.09,170.72,142.45,196.17,49.55,182.08,95.26,74.92,180.28,201.03,366.02,186.74,84.22,59.29,70.96,135.62,254.52,561.73,421.74,220.26,222.85,335.41,984.07,1055.73,477.84,283.46,65.41,148.39,132.79,254.06,432.56,753.05,117.27,117.27,161.42,253.87,145.57,145.57,193.62,702.21,588.13,216.56,310.66,181.41,449.07,266.37,828.91,977.09,343.25,254.91,289.19,384.92,1575.57,1630.82,1128.02,295.64,511.68,217.51,169.93,269.15,773.12,466.77,295.03,372.79,218.23,172.12,302.14,755.73,1508.42,821.83,232.43,448.77,561.67,268.22,876.47,1234.71],"targets":[120,108,82,88,148,68,54,41,43,25,38,54,222,251,117,9,78,83,75,93,150,86,186,34,15,38,123,140,42,47,72,61,88,100,354,120,64,53,47,149,288,397,208,140,218,259,579,532,639,195,74,139,100,120,424,694,81,81,137,231,123,123,214,640,114,244,316,191,46,247,724,982,359,289,321,424,884,1424,1047,401,460,203,166,300,981,472,222,317,168,179,307,898,1651,734,167,413,571,172,879,1188],"regression_line":{"coefficient":1.0043705635148725,"intercept":48.00912425270233}}},"grouped_metrics":{"mae":[{"timestamp":"2011-01-01 00:00:00","value":35.67896665375808},{"timestamp":"2011-02-01 00:00:00","value":89.13965238373855},{"timestamp":"2011-03-01 00:00:00","value":91.54030847549438},{"timestamp":"2011-04-01 00:00:00","value":63.352996826171875}],"mape":[{"timestamp":"2011-01-01 00:00:00","value":106.34668638669385},{"timestamp":"2011-02-01 00:00:00","value":50.266650033642435},{"timestamp":"2011-03-01 00:00:00","value":53.63275529139244},{"timestamp":"2011-04-01 00:00:00","value":14.766409719281478}],"mse":[{"timestamp":"2011-01-01 00:00:00","value":2848.1117152678507},{"timestamp":"2011-02-01 00:00:00","value":21631.812814960613},{"timestamp":"2011-03-01 00:00:00","value":31460.34954782362},{"timestamp":"2011-04-01 00:00:00","value":6540.166909402423}],"rmse":[{"timestamp":"2011-01-01 00:00:00","value":53.36770292290882},{"timestamp":"2011-02-01 00:00:00","value":147.07757414018158},{"timestamp":"2011-03-01 00:00:00","value":177.37065582509305},{"timestamp":"2011-04-01 00:00:00","value":80.87129842782556}],"r2":[{"timestamp":"2011-01-01 00:00:00","value":0.17834457710460982},{"timestamp":"2011-02-01 00:00:00","value":0.3895389519246505},{"timestamp":"2011-03-01 00:00:00","value":0.7043715304337479},{"timestamp":"2011-04-01 00:00:00","value":0.9678020649997567}],"adj_r2":[{"timestamp":"2011-01-01 00:00:00","value":-0.3533148141806426},{"timestamp":"2011-02-01 00:00:00","value":-0.005465255653516854},{"timestamp":"2011-03-01 00:00:00","value":0.5417758721723092},{"timestamp":"2011-04-01 00:00:00","value":1.1448907075010948}],"variance":[{"timestamp":"2011-01-01 00:00:00","value":4720.867246089001},{"timestamp":"2011-02-01 00:00:00","value":70942.48575413873},{"timestamp":"2011-03-01 00:00:00","value":150522.0080596708},{"timestamp":"2011-04-01 00:00:00","value":163422.9263027128}]}}', - "DRIFT": '{"feature_metrics":[{"feature_name":"weathersit","drift_calc":{"type":"KS","value":0.6219091927,"has_drift":true}},{"feature_name":"temp","drift_calc":{"type":"KS","value":0.5259741552,"has_drift":true}},{"feature_name":"atemp","drift_calc":{"type":"KS","value":0.5322880465,"has_drift":true}},{"feature_name":"hum","drift_calc":{"type":"KS","value":0.2230727748,"has_drift":true}},{"feature_name":"windspeed","drift_calc":{"type":"KS","value":0.2180156245,"has_drift":true}},{"feature_name":"season","drift_calc":{"type":"CHI2","value":0.7058231915368379,"has_drift":false}},{"feature_name":"yr","drift_calc":{"type":"CHI2","value":1.0,"has_drift":false}},{"feature_name":"mnth","drift_calc":{"type":"CHI2","value":0.9637289558298074,"has_drift":false}},{"feature_name":"holiday","drift_calc":{"type":"CHI2","value":0.8374533320041525,"has_drift":false}},{"feature_name":"weekday","drift_calc":{"type":"CHI2","value":0.5795400085655207,"has_drift":false}},{"feature_name":"workingday","drift_calc":{"type":"CHI2","value":0.09216569222802284,"has_drift":false}}]}', + "DRIFT": '{"feature_metrics":[{"feature_name":"temp","drift_calc":{"type":"KS","value":0.5259741552,"has_drift":true}},{"feature_name":"atemp","drift_calc":{"type":"KS","value":0.5322880465,"has_drift":true}},{"feature_name":"hum","drift_calc":{"type":"KS","value":0.2230727748,"has_drift":true}},{"feature_name":"windspeed","drift_calc":{"type":"KS","value":0.2180156245,"has_drift":true}},{"feature_name":"season","drift_calc":{"type":"CHI2","value":0.7058231915368379,"has_drift":false}},{"feature_name":"yr","drift_calc":{"type":"CHI2","value":1.0,"has_drift":false}},{"feature_name":"mnth","drift_calc":{"type":"CHI2","value":0.9637289558298074,"has_drift":false}},{"feature_name":"holiday","drift_calc":{"type":"CHI2","value":0.8374533320041525,"has_drift":false}},{"feature_name":"weekday","drift_calc":{"type":"CHI2","value":0.5795400085655207,"has_drift":false}},{"feature_name":"workingday","drift_calc":{"type":"CHI2","value":0.09216569222802284,"has_drift":false}}]}', } test_reg_bike_reference_res = { diff --git a/spark/tests/results/regression_current_results.py b/spark/tests/results/regression_current_results.py index 3b1a4477..544269fd 100644 --- a/spark/tests/results/regression_current_results.py +++ b/spark/tests/results/regression_current_results.py @@ -12104,10 +12104,6 @@ test_drift_regression_res = { "feature_metrics": [ - { - "feature_name": "weathersit", - "drift_calc": {"type": "KS", "value": 0.6219091927, "has_drift": True}, - }, { "feature_name": "temp", "drift_calc": {"type": "KS", "value": 0.5259741552, "has_drift": True},