Move note computation for MALT0

IGNF · Dec 11, 2023 · 90300a6 · 90300a6
1 parent e5500de
commit 90300a6
Show file tree

Hide file tree

Showing 5 changed files with 133 additions and 64 deletions.
diff --git a/coclico/malt0/malt0.py b/coclico/malt0/malt0.py
@@ -2,8 +2,10 @@
 from pathlib import Path
 from typing import List
 
+import pandas as pd
 from gpao.job import Job
 
+from coclico.metrics.commons import bounded_affine_function
 from coclico.metrics.metric import Metric
 from coclico.version import __version__
 
@@ -40,7 +42,7 @@ class MALT0(Metric):
 
     # Pixel size for MNx
     pixel_size = 0.5
-    metric_name = "MALT0"
+    metric_name = "malt0"
 
     def create_metric_intrinsic_one_job(self, name: str, input: Path, output: Path, is_ref: bool):
         job_name = f"{self.metric_name}_intrinsic_{name}_{input.stem}"
@@ -95,3 +97,21 @@ def create_metric_relative_to_ref_jobs(
             job.add_dependency(ref_job)
 
         return [job]
+
+    @staticmethod
+    def compute_note(metric_df: pd.DataFrame):
+        """_summary_
+
+        Args:
+            relative_metric_df (pd.DataFrame): _description_
+            max_diff, mean_diff, std_diff
+        """
+        max_note = bounded_affine_function((0.1, 1), (4, 0), metric_df["max_diff"])  # 0 <= max_note <= 1
+        mean_note = bounded_affine_function((0.01, 2), (0.5, 0), metric_df["mean_diff"])  # 0 <= mean_note <= 2
+        std_note = bounded_affine_function((0.01, 2), (0.5, 0), metric_df["std_diff"])  # 0 <= std_note <= 2
+
+        metric_df[MALT0.metric_name] = (max_note + mean_note + std_note) / 5
+
+        metric_df.drop(columns=["max_diff", "mean_diff", "std_diff"], inplace=True)
+
+        return metric_df
diff --git a/coclico/malt0/malt0_relative.py b/coclico/malt0/malt0_relative.py
@@ -2,31 +2,14 @@
 import json
 import logging
 from pathlib import Path
-from typing import Dict, List
+from typing import Dict
 
 import numpy as np
 import numpy.ma as ma
 import pandas as pd
 import rasterio
 
 from coclico.config import csv_separator
-from coclico.metrics.commons import bounded_affine_function
-
-
-def compute_note(mean_diff: np.array, std_diff: np.array, max_diff: np.array, classes: List[int]) -> Dict:
-    def compute_one_note(mean_diff, std_diff, max_diff):
-        max_note = bounded_affine_function((0.1, 1), (4, 0), max_diff)  # 0 <= max_note <= 1
-        mean_note = bounded_affine_function((0.01, 2), (0.5, 0), mean_diff)  # 0 <= mean_note <= 2
-        std_note = bounded_affine_function((0.01, 2), (0.5, 0), std_diff)  # 0 <= std_note <= 2
-
-        # divide by 5 because weights are : max_note(1), mean_note(2), std_note(2)
-        note = (max_note + mean_note + std_note) / 5
-
-        return note
-
-    notes = {k: compute_one_note(mean_diff[ii], std_diff[ii], max_diff[ii]) for ii, k in enumerate(classes)}
-
-    return notes
 
 
 def compute_stats_single_raster(raster: np.array, occupancy_raster: np.array):
@@ -89,7 +72,6 @@ def compute_metric_relative(
     c1_dir: Path, ref_dir: Path, occupancy_dir: Path, class_weights: Dict, output_csv: Path, output_csv_tile: Path
 ):
     """TODO"""
-    metric = "malt0"
     classes = sorted(class_weights.keys())
     csv_data = []
 
@@ -124,9 +106,17 @@ def compute_metric_relative(
         max_diff, count, mean_diff, std_diff, m2_diff = compute_stats_single_raster(
             np.abs(c1_raster - ref_raster), occupancy_raster
         )
-        note = compute_note(mean_diff, std_diff, max_diff, classes)
 
-        new_line = [{"tile": ref_file.stem, "class": cl, metric: note[cl]} for cl in classes]
+        new_line = [
+            {
+                "tile": ref_file.stem,
+                "class": cl,
+                "max_diff": max_diff[ii],
+                "mean_diff": mean_diff[ii],
+                "std_diff": std_diff[ii],
+            }
+            for ii, cl in enumerate(classes)
+        ]
         csv_data.extend(new_line)
 
         total_max_diff, total_count, total_mean_diff, total_m2 = update_overall_stats(
@@ -140,9 +130,16 @@ def compute_metric_relative(
     df.to_csv(output_csv_tile, index=False, sep=csv_separator)
     logging.debug(df.to_markdown())
 
-    total_notes = compute_note(total_mean_diff, total_std_diff, total_max_diff, classes)
+    data = [
+        {
+            "class": cl,
+            "max_diff": total_max_diff[ii],
+            "mean_diff": total_mean_diff[ii],
+            "std_diff": total_std_diff[ii],
+        }
+        for ii, cl in enumerate(classes)
+    ]
 
-    data = [{"class": cl, metric: total_notes.get(cl, 0)} for cl in classes]
     df = pd.DataFrame(data)
     df.to_csv(output_csv, index=False, sep=csv_separator)
 

diff --git a/test/malt0/test_malt0.py b/test/malt0/test_malt0.py
@@ -0,0 +1,83 @@
+import shutil
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from coclico.malt0.malt0 import MALT0
+
+pytestmark = pytest.mark.docker
+
+TMP_PATH = Path("./tmp/malt0")
+
+
+def setup_module(module):
+    if TMP_PATH.is_dir():
+        shutil.rmtree(TMP_PATH)
+
+
+def generate_metric_dataframes():
+    # Cases:
+    # - tile a: Test with mean condition only
+    # - tile b: Test with standard deviation condition only
+    # - tile c: Test with maximum condition only
+    # - tile d: Test all conditions together
+    input_tile_a = pd.DataFrame(
+        {
+            "tile": ["a"] * 3,
+            "class": ["6", "0", "2_3"],
+            "max_diff": [0, 0, 0],
+            "mean_diff": [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],
+            "std_diff": [0, 0, 0],
+        }
+    )
+    expected_tile_a = pd.DataFrame({"tile": ["a"] * 3, "class": ["6", "0", "2_3"], "malt0": [1, 3 / 5, 4 / 5]})
+
+    input_tile_b = pd.DataFrame(
+        {
+            "tile": ["b"] * 4,
+            "class": ["6", "0", "2_3", "4"],
+            "max_diff": [0, 0, 0, 0],
+            "mean_diff": [0, 0, 0, 0],
+            "std_diff": [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2, np.nan],
+        }
+    )
+    expected_tile_b = pd.DataFrame(
+        {"tile": ["b"] * 4, "class": ["6", "0", "2_3", "4"], "malt0": [1, 3 / 5, 4 / 5, np.nan]}
+    )
+
+    input_tile_c = pd.DataFrame(
+        {
+            "tile": ["c"] * 3,
+            "class": ["6", "0", "2_3"],
+            "max_diff": [0.1, 4, 0.1 + (4 - 0.1) / 2],
+            "mean_diff": [0, 0, 0],
+            "std_diff": [0, 0, 0],
+        }
+    )
+
+    expected_tile_c = pd.DataFrame({"tile": ["c"] * 3, "class": ["6", "0", "2_3"], "malt0": [1, 4 / 5, 4.5 / 5]})
+
+    input_tile_d = pd.DataFrame(
+        {
+            "tile": ["d"] * 3,
+            "class": ["6", "0", "2_3"],
+            "max_diff": [0.09, 4.0001, 0.1 + (4 - 0.1) / 2],
+            "mean_diff": [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],
+            "std_diff": [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],
+        }
+    )
+
+    expected_tile_d = pd.DataFrame({"tile": ["d"] * 3, "class": ["6", "0", "2_3"], "malt0": [1, 0, 0.5]})
+
+    input_df = pd.concat([input_tile_a, input_tile_b, input_tile_c, input_tile_d])
+    expected_out = pd.concat([expected_tile_a, expected_tile_b, expected_tile_c, expected_tile_d])
+
+    return input_df, expected_out
+
+
+def test_compute_note():
+    input_df, expected_out = generate_metric_dataframes()
+    out_df = MALT0.compute_note(input_df)
+    assert out_df.equals(expected_out)
diff --git a/test/malt0/test_malt0_intrinsic.py b/test/malt0/test_malt0_intrinsic.py
@@ -13,7 +13,7 @@
 
 pytestmark = pytest.mark.docker
 
-TMP_PATH = Path("./tmp/malt0")
+TMP_PATH = Path("./tmp/malt0_intrinsic")
 
 
 def setup_module(module):

diff --git a/test/malt0/test_malt0_relative.py b/test/malt0/test_malt0_relative.py
@@ -14,7 +14,7 @@
 
 pytestmark = pytest.mark.docker
 
-TMP_PATH = Path("./tmp/malt0")
+TMP_PATH = Path("./tmp/malt0_relative")
 
 
 def setup_module(module):
@@ -88,44 +88,6 @@ def test_update_overall_stats():
     # is ok as long as std is the same between the 2 methods
 
 
-note_mpla0_data = [
-    ({}, {}, {}, {}),  # limit case
-    (
-        [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],  # mean difference between MNXs
-        [0, 0, 0],  # standard deviation difference between MNXs
-        [0, 0, 0],  # maximum difference between MNXs
-        {"6": 1, "0": 3 / 5, "2_3": 4 / 5},  # expected score
-    ),  # Test with mean deviation condition only
-    (
-        [0, 0, 0, 0],  # mean difference between MNXs
-        [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2, np.nan],  # standard deviation difference between MNXs
-        [0, 0, 0, 0],  # maximum difference between MNXs
-        {"6": 1, "0": 3 / 5, "2_3": 4 / 5, "4": np.nan},  # expected score
-    ),  # Test with standard deviation condition only
-    (
-        [0, 0, 0],  # mean difference between MNXs
-        [0, 0, 0],  # standard deviation difference between MNXs
-        [0.1, 4, 0.1 + (4 - 0.1) / 2],  # maximum difference between MNXs
-        {"6": 1, "0": 4 / 5, "2_3": 4.5 / 5},  # expected score
-    ),  # Test with maximum condition only
-    (
-        [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],  # mean difference between MNXs
-        [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],  # standard deviation difference between MNXs
-        [0.09, 4.0001, 0.1 + (4 - 0.1) / 2],  # maximum difference between MNXs
-        {"6": 1, "0": 0, "2_3": 0.5},  # expected score
-    ),  # Test all conditions together
-]
-
-
-@pytest.mark.parametrize("mean_diff,std_diff,max_diff,expected", note_mpla0_data)
-def test_compute_note(mean_diff, std_diff, max_diff, expected):
-    ret = malt0_relative.compute_note(mean_diff, std_diff, max_diff, expected.keys())
-    # Check that the dictionaries are equal for nan and non-nan values
-    assert ret.keys() == expected.keys()
-    for k in expected.keys():
-        assert (ret[k] == expected[k]) or (np.isnan(ret[k]) and np.isnan(expected[k]))
-
-
 def test_compute_metric_relative(ensure_malt0_data):
     c1_dir = Path("./data/malt0/c1/intrinsic/mnx")
     ref_dir = Path("./data/malt0/ref/intrinsic/mnx")
@@ -141,12 +103,19 @@ def test_compute_metric_relative(ensure_malt0_data):
     )
     output_csv = TMP_PATH / "relative" / "result.csv"
     output_csv_tile = TMP_PATH / "relative" / "result_tile.csv"
+    expected_cols = {"class", "max_diff", "mean_diff", "std_diff"}
 
     malt0_relative.compute_metric_relative(c1_dir, ref_dir, occupancy_dir, class_weights, output_csv, output_csv_tile)
 
+    df = pd.read_csv(output_csv_tile, sep=csv_separator)
+    assert set(df.columns) == expected_cols | {"tile"}
+
     expected_rows = 2 * 5  # 2 files * 5 classes
     assert utils.csv_num_rows(output_csv_tile) == expected_rows
 
+    df = pd.read_csv(output_csv, sep=csv_separator)
+    assert set(df.columns) == expected_cols
+
     expected_rows = 5  # 5 classes
     assert utils.csv_num_rows(output_csv) == expected_rows