facebook · esantorella · Dec 19, 2024 · Dec 19, 2024 · Dec 19, 2024
diff --git a/ax/benchmark/benchmark_metric.py b/ax/benchmark/benchmark_metric.py
@@ -11,13 +11,11 @@
 Metrics vary on two dimensions: Whether they are `MapMetric`s or not, and
 whether they are available while running or not.
 
-There are two Metric classes:
-- `BenchmarkMetric`: For when outputs should be `Data` (not `MapData`) and data
+There are four Metric classes:
+- `BenchmarkMetric`: A non-Map metric
     is not available while running.
 - `BenchmarkMapMetric`: For when outputs should be `MapData` (not `Data`) and
     data is available while running.
-
-There are further benchmark classes that are not yet implemented:
 - `BenchmarkTimeVaryingMetric`: For when outputs should be `Data` and the metric
   is available while running.
 - `BenchmarkMapUnavailableWhileRunningMetric`: For when outputs should be
@@ -200,7 +198,7 @@ def fetch_trial_data(self, trial: BaseTrial, **kwargs: Any) -> MetricFetchResult
             available_data = df[df["virtual runtime"] <= max_t]
 
         if not self.observe_noise_sd:
-            available_data["sem"] = None
+            available_data.loc[:, "sem"] = None
         return self._df_to_result(df=available_data.drop(columns=["virtual runtime"]))
 
     @abstractmethod
@@ -214,8 +212,10 @@ def _df_to_result(self, df: DataFrame) -> MetricFetchResult:
 
 class BenchmarkMetric(BenchmarkMetricBase):
     """
-    Metric for benchmarking that produces `Data` and is not available while
-    running.
+    Non-map Metric for benchmarking that is not available while running.
+
+    It cannot process data with multiple time steps, as it would only return one
+    value -- the value it has at completion time -- regardless.
     """
 
     def _class_specific_metdata_validation(
@@ -234,12 +234,27 @@ def _df_to_result(self, df: DataFrame) -> MetricFetchResult:
         return Ok(value=Data(df=df.drop(columns=["step"])))
 
 
-class BenchmarkMapMetric(MapMetric, BenchmarkMetricBase):
+class BenchmarkTimeVaryingMetric(BenchmarkMetricBase):
     """
-    Metric for benchmarking that produces `Data` and is available while
-    running.
+    Non-Map Metric for benchmarking that is available while running.
+
+    It can produce different values at different times depending on when it is
+    called, using the `time` on a `BackendSimulator`.
     """
 
+    @classmethod
+    def is_available_while_running(cls) -> bool:
+        return True
+
+    def _df_to_result(self, df: DataFrame) -> MetricFetchResult:
+        return Ok(
+            value=Data(df=df[df["step"] == df["step"].max()].drop(columns=["step"]))
+        )
+
+
+class BenchmarkMapMetric(MapMetric, BenchmarkMetricBase):
+    """MapMetric for benchmarking. It is available while running."""
+
     # pyre-fixme: Inconsistent override [15]: `map_key_info` overrides attribute
     # defined in `MapMetric` inconsistently. Type `MapKeyInfo[int]` is not a
     # subtype of the overridden attribute `MapKeyInfo[float]`
@@ -253,3 +268,15 @@ def _df_to_result(self, df: DataFrame) -> MetricFetchResult:
         # Just in case the key was renamed by a subclass
         df = df.rename(columns={"step": self.map_key_info.key})
         return Ok(value=MapData(df=df, map_key_infos=[self.map_key_info]))
+
+
+class BenchmarkMapUnavailableWhileRunningMetric(MapMetric, BenchmarkMetricBase):
+    # pyre-fixme: Inconsistent override [15]: `map_key_info` overrides attribute
+    # defined in `MapMetric` inconsistently. Type `MapKeyInfo[int]` is not a
+    # subtype of the overridden attribute `MapKeyInfo[float]`
+    map_key_info: MapKeyInfo[int] = MapKeyInfo(key="step", default_value=0)
+
+    def _df_to_result(self, df: DataFrame) -> MetricFetchResult:
+        # Just in case the key was renamed by a subclass
+        df = df.rename(columns={"step": self.map_key_info.key})
+        return Ok(value=MapData(df=df, map_key_infos=[self.map_key_info]))
diff --git a/ax/benchmark/benchmark_trial_metadata.py b/ax/benchmark/benchmark_trial_metadata.py
@@ -20,7 +20,7 @@ class BenchmarkTrialMetadata:
 
     Args:
         df: A dict mapping each metric name to a Pandas DataFrame with columns
-            ["metric_name", "arm_name", "mean", "sem", and "t"]. The "sem" is
+            ["metric_name", "arm_name", "mean", "sem", and "step"]. The "sem" is
             always present in this df even if noise levels are unobserved;
             ``BenchmarkMetric`` and ``BenchmarkMapMetric`` hide that data if it
             should not be observed, and ``BenchmarkMapMetric``s drop data from