From 49cf7be1eb07ebdb29b95c56799f473337e6c89d Mon Sep 17 00:00:00 2001 From: mike0sv Date: Tue, 27 Aug 2024 02:23:53 +0300 Subject: [PATCH] fix recsys stuff #1233 #1231 --- .../descriptors/semantic_similarity.py | 2 +- src/evidently/metrics/recsys/base_top_k.py | 26 ++++++++++++++++--- src/evidently/metrics/recsys/f_beta_top_k.py | 4 +-- src/evidently/metrics/recsys/hit_rate_k.py | 4 +-- tests/metrics/recsys/test_precision_top_k.py | 16 ++++++++++++ 5 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/evidently/descriptors/semantic_similarity.py b/src/evidently/descriptors/semantic_similarity.py index 07f6b287bc..664dc41c1d 100644 --- a/src/evidently/descriptors/semantic_similarity.py +++ b/src/evidently/descriptors/semantic_similarity.py @@ -12,7 +12,7 @@ def feature(self, columns: List[str]) -> GeneratedFeature: return SemanticSimilarityFeature(columns=columns, display_name=self.display_name) -class SemanticSimilatiryDescriptor(FeatureDescriptor): +class SemanticSimilarityDescriptor(FeatureDescriptor): with_column: str def feature(self, column_name: str) -> GeneratedFeatures: diff --git a/src/evidently/metrics/recsys/base_top_k.py b/src/evidently/metrics/recsys/base_top_k.py index 4e31100ad9..ca66d31719 100644 --- a/src/evidently/metrics/recsys/base_top_k.py +++ b/src/evidently/metrics/recsys/base_top_k.py @@ -30,7 +30,25 @@ class Config: k: int current: pd.Series + current_value: float reference: Optional[pd.Series] = None + reference_value: Optional[float] = None + + def __init__( + self, + k: int, + current: pd.Series, + current_value: Optional[float] = None, + reference: Optional[pd.Series] = None, + reference_value: Optional[float] = None, + ): + super().__init__( + k=k, + current=current, + current_value=current_value if current_value is not None else current[k - 1], + reference=reference, + reference_value=reference_value if reference_value is not None or reference is None else reference[k - 1], + ) class TopKMetric(Metric[TopKMetricResult], abc.ABC): @@ -56,11 +74,11 @@ def calculate(self, data: InputData) -> TopKMetricResult: if self.no_feedback_users: key = f"{self.key()}_include_no_feedback" - current = pd.Series(index=result.current["k"], data=result.current[key]) + current = pd.Series(data=result.current[key]) ref_data = result.reference reference: Optional[pd.Series] = None if ref_data is not None: - reference = pd.Series(index=ref_data["k"], data=ref_data[key]) + reference = pd.Series(data=ref_data[key]) return TopKMetricResult(k=self.k, reference=reference, current=current) @abc.abstractmethod @@ -76,9 +94,9 @@ class TopKMetricRenderer(MetricRenderer): def render_html(self, obj: TopKMetric) -> List[BaseWidgetInfo]: metric_result = obj.get_result() k = metric_result.k - counters = [CounterData.float(label="current", value=metric_result.current[k], precision=3)] + counters = [CounterData.float(label="current", value=metric_result.current[k - 1], precision=3)] if metric_result.reference is not None: - counters.append(CounterData.float(label="reference", value=metric_result.reference[k], precision=3)) + counters.append(CounterData.float(label="reference", value=metric_result.reference[k - 1], precision=3)) fig = plot_metric_k(metric_result.current, metric_result.reference, self.yaxis_name) header_part = " No feedback users included." if not obj.no_feedback_users: diff --git a/src/evidently/metrics/recsys/f_beta_top_k.py b/src/evidently/metrics/recsys/f_beta_top_k.py index 59ff2c1249..28c99f29ea 100644 --- a/src/evidently/metrics/recsys/f_beta_top_k.py +++ b/src/evidently/metrics/recsys/f_beta_top_k.py @@ -47,11 +47,11 @@ def calculate(self, data: InputData) -> TopKMetricResult: pr_key = "precision" rc_key = "recall" result = self._precision_recall_calculation.get_result() - current = pd.Series(index=result.current["k"], data=self.fbeta(result.current[pr_key], result.current[rc_key])) + current = pd.Series(data=self.fbeta(result.current[pr_key], result.current[rc_key])) ref_data = result.reference reference: Optional[pd.Series] = None if ref_data is not None: - reference = pd.Series(index=ref_data["k"], data=self.fbeta(ref_data[pr_key], ref_data[rc_key])) + reference = pd.Series(data=self.fbeta(ref_data[pr_key], ref_data[rc_key])) return TopKMetricResult(k=self.k, reference=reference, current=current) def fbeta(self, precision, recall): diff --git a/src/evidently/metrics/recsys/hit_rate_k.py b/src/evidently/metrics/recsys/hit_rate_k.py index f605eb032c..e5a7292c61 100644 --- a/src/evidently/metrics/recsys/hit_rate_k.py +++ b/src/evidently/metrics/recsys/hit_rate_k.py @@ -44,7 +44,7 @@ def get_values(self, df, max_k): for k in range(1, max_k + 1): df_k = df[(df.target == 1) & (df.preds <= k)] res.append(df_k.users.nunique() / user_num) - return pd.Series(index=[x for x in range(1, max_k + 1)], data=res) + return pd.Series(data=res) def calculate(self, data: InputData) -> HitRateKMetricResult: curr, ref = get_curr_and_ref_df(data, self.min_rel_score, self.no_feedback_users, True) @@ -57,6 +57,6 @@ def calculate(self, data: InputData) -> HitRateKMetricResult: @default_renderer(wrap_type=HitRateKMetric) -class PrecisionTopKMetricRenderer(TopKMetricRenderer): +class HitRateKMetricRenderer(TopKMetricRenderer): yaxis_name = "HitRate@k" header = "Hit Rate" diff --git a/tests/metrics/recsys/test_precision_top_k.py b/tests/metrics/recsys/test_precision_top_k.py index 56a71efc05..e6c15d8dda 100644 --- a/tests/metrics/recsys/test_precision_top_k.py +++ b/tests/metrics/recsys/test_precision_top_k.py @@ -1,10 +1,26 @@ +import json + import numpy as np import pandas as pd +from evidently._pydantic_compat import parse_obj_as +from evidently.base_metric import MetricResult from evidently.metrics import PrecisionTopKMetric +from evidently.metrics.recsys.base_top_k import TopKMetricResult from evidently.pipeline.column_mapping import ColumnMapping from evidently.pipeline.column_mapping import RecomType from evidently.report import Report +from evidently.utils import NumpyEncoder + + +def test_value(): + result = TopKMetricResult( + k=2, current=pd.Series([0, 1]), current_value=1, reference=pd.Series([2, 3]), reference_value=3 + ) + payload = json.loads(json.dumps(result.dict(), cls=NumpyEncoder)) + payload2 = {k: v for k, v in payload.items() if not k.endswith("_value")} + result2 = parse_obj_as(MetricResult, payload2) + assert json.loads(json.dumps(result2.dict(), cls=NumpyEncoder)) == payload def test_precision_value():