Skip to content

Commit

Permalink
fix recsys stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
mike0sv committed Aug 26, 2024
1 parent af0cd76 commit 49cf7be
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/evidently/descriptors/semantic_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def feature(self, columns: List[str]) -> GeneratedFeature:
return SemanticSimilarityFeature(columns=columns, display_name=self.display_name)


class SemanticSimilatiryDescriptor(FeatureDescriptor):
class SemanticSimilarityDescriptor(FeatureDescriptor):
with_column: str

def feature(self, column_name: str) -> GeneratedFeatures:
Expand Down
26 changes: 22 additions & 4 deletions src/evidently/metrics/recsys/base_top_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,25 @@ class Config:

k: int
current: pd.Series
current_value: float
reference: Optional[pd.Series] = None
reference_value: Optional[float] = None

def __init__(
self,
k: int,
current: pd.Series,
current_value: Optional[float] = None,
reference: Optional[pd.Series] = None,
reference_value: Optional[float] = None,
):
super().__init__(
k=k,
current=current,
current_value=current_value if current_value is not None else current[k - 1],
reference=reference,
reference_value=reference_value if reference_value is not None or reference is None else reference[k - 1],
)


class TopKMetric(Metric[TopKMetricResult], abc.ABC):
Expand All @@ -56,11 +74,11 @@ def calculate(self, data: InputData) -> TopKMetricResult:
if self.no_feedback_users:
key = f"{self.key()}_include_no_feedback"

current = pd.Series(index=result.current["k"], data=result.current[key])
current = pd.Series(data=result.current[key])
ref_data = result.reference
reference: Optional[pd.Series] = None
if ref_data is not None:
reference = pd.Series(index=ref_data["k"], data=ref_data[key])
reference = pd.Series(data=ref_data[key])
return TopKMetricResult(k=self.k, reference=reference, current=current)

@abc.abstractmethod
Expand All @@ -76,9 +94,9 @@ class TopKMetricRenderer(MetricRenderer):
def render_html(self, obj: TopKMetric) -> List[BaseWidgetInfo]:
metric_result = obj.get_result()
k = metric_result.k
counters = [CounterData.float(label="current", value=metric_result.current[k], precision=3)]
counters = [CounterData.float(label="current", value=metric_result.current[k - 1], precision=3)]
if metric_result.reference is not None:
counters.append(CounterData.float(label="reference", value=metric_result.reference[k], precision=3))
counters.append(CounterData.float(label="reference", value=metric_result.reference[k - 1], precision=3))
fig = plot_metric_k(metric_result.current, metric_result.reference, self.yaxis_name)
header_part = " No feedback users included."
if not obj.no_feedback_users:
Expand Down
4 changes: 2 additions & 2 deletions src/evidently/metrics/recsys/f_beta_top_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ def calculate(self, data: InputData) -> TopKMetricResult:
pr_key = "precision"
rc_key = "recall"
result = self._precision_recall_calculation.get_result()
current = pd.Series(index=result.current["k"], data=self.fbeta(result.current[pr_key], result.current[rc_key]))
current = pd.Series(data=self.fbeta(result.current[pr_key], result.current[rc_key]))
ref_data = result.reference
reference: Optional[pd.Series] = None
if ref_data is not None:
reference = pd.Series(index=ref_data["k"], data=self.fbeta(ref_data[pr_key], ref_data[rc_key]))
reference = pd.Series(data=self.fbeta(ref_data[pr_key], ref_data[rc_key]))
return TopKMetricResult(k=self.k, reference=reference, current=current)

def fbeta(self, precision, recall):
Expand Down
4 changes: 2 additions & 2 deletions src/evidently/metrics/recsys/hit_rate_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def get_values(self, df, max_k):
for k in range(1, max_k + 1):
df_k = df[(df.target == 1) & (df.preds <= k)]
res.append(df_k.users.nunique() / user_num)
return pd.Series(index=[x for x in range(1, max_k + 1)], data=res)
return pd.Series(data=res)

def calculate(self, data: InputData) -> HitRateKMetricResult:
curr, ref = get_curr_and_ref_df(data, self.min_rel_score, self.no_feedback_users, True)
Expand All @@ -57,6 +57,6 @@ def calculate(self, data: InputData) -> HitRateKMetricResult:


@default_renderer(wrap_type=HitRateKMetric)
class PrecisionTopKMetricRenderer(TopKMetricRenderer):
class HitRateKMetricRenderer(TopKMetricRenderer):
yaxis_name = "HitRate@k"
header = "Hit Rate"
16 changes: 16 additions & 0 deletions tests/metrics/recsys/test_precision_top_k.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
import json

import numpy as np
import pandas as pd

from evidently._pydantic_compat import parse_obj_as
from evidently.base_metric import MetricResult
from evidently.metrics import PrecisionTopKMetric
from evidently.metrics.recsys.base_top_k import TopKMetricResult
from evidently.pipeline.column_mapping import ColumnMapping
from evidently.pipeline.column_mapping import RecomType
from evidently.report import Report
from evidently.utils import NumpyEncoder


def test_value():
result = TopKMetricResult(
k=2, current=pd.Series([0, 1]), current_value=1, reference=pd.Series([2, 3]), reference_value=3
)
payload = json.loads(json.dumps(result.dict(), cls=NumpyEncoder))
payload2 = {k: v for k, v in payload.items() if not k.endswith("_value")}
result2 = parse_obj_as(MetricResult, payload2)
assert json.loads(json.dumps(result2.dict(), cls=NumpyEncoder)) == payload


def test_precision_value():
Expand Down

0 comments on commit 49cf7be

Please sign in to comment.