Skip to content

Commit

Permalink
Move note computation for MALT0
Browse files Browse the repository at this point in the history
  • Loading branch information
leavauchier committed Dec 11, 2023
1 parent e5500de commit 90300a6
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 64 deletions.
22 changes: 21 additions & 1 deletion coclico/malt0/malt0.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
from pathlib import Path
from typing import List

import pandas as pd
from gpao.job import Job

from coclico.metrics.commons import bounded_affine_function
from coclico.metrics.metric import Metric
from coclico.version import __version__

Expand Down Expand Up @@ -40,7 +42,7 @@ class MALT0(Metric):

# Pixel size for MNx
pixel_size = 0.5
metric_name = "MALT0"
metric_name = "malt0"

def create_metric_intrinsic_one_job(self, name: str, input: Path, output: Path, is_ref: bool):
job_name = f"{self.metric_name}_intrinsic_{name}_{input.stem}"
Expand Down Expand Up @@ -95,3 +97,21 @@ def create_metric_relative_to_ref_jobs(
job.add_dependency(ref_job)

return [job]

@staticmethod
def compute_note(metric_df: pd.DataFrame):
"""_summary_
Args:
relative_metric_df (pd.DataFrame): _description_
max_diff, mean_diff, std_diff
"""
max_note = bounded_affine_function((0.1, 1), (4, 0), metric_df["max_diff"]) # 0 <= max_note <= 1
mean_note = bounded_affine_function((0.01, 2), (0.5, 0), metric_df["mean_diff"]) # 0 <= mean_note <= 2
std_note = bounded_affine_function((0.01, 2), (0.5, 0), metric_df["std_diff"]) # 0 <= std_note <= 2

metric_df[MALT0.metric_name] = (max_note + mean_note + std_note) / 5

metric_df.drop(columns=["max_diff", "mean_diff", "std_diff"], inplace=True)

return metric_df
43 changes: 20 additions & 23 deletions coclico/malt0/malt0_relative.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,14 @@
import json
import logging
from pathlib import Path
from typing import Dict, List
from typing import Dict

import numpy as np
import numpy.ma as ma
import pandas as pd
import rasterio

from coclico.config import csv_separator
from coclico.metrics.commons import bounded_affine_function


def compute_note(mean_diff: np.array, std_diff: np.array, max_diff: np.array, classes: List[int]) -> Dict:
def compute_one_note(mean_diff, std_diff, max_diff):
max_note = bounded_affine_function((0.1, 1), (4, 0), max_diff) # 0 <= max_note <= 1
mean_note = bounded_affine_function((0.01, 2), (0.5, 0), mean_diff) # 0 <= mean_note <= 2
std_note = bounded_affine_function((0.01, 2), (0.5, 0), std_diff) # 0 <= std_note <= 2

# divide by 5 because weights are : max_note(1), mean_note(2), std_note(2)
note = (max_note + mean_note + std_note) / 5

return note

notes = {k: compute_one_note(mean_diff[ii], std_diff[ii], max_diff[ii]) for ii, k in enumerate(classes)}

return notes


def compute_stats_single_raster(raster: np.array, occupancy_raster: np.array):
Expand Down Expand Up @@ -89,7 +72,6 @@ def compute_metric_relative(
c1_dir: Path, ref_dir: Path, occupancy_dir: Path, class_weights: Dict, output_csv: Path, output_csv_tile: Path
):
"""TODO"""
metric = "malt0"
classes = sorted(class_weights.keys())
csv_data = []

Expand Down Expand Up @@ -124,9 +106,17 @@ def compute_metric_relative(
max_diff, count, mean_diff, std_diff, m2_diff = compute_stats_single_raster(
np.abs(c1_raster - ref_raster), occupancy_raster
)
note = compute_note(mean_diff, std_diff, max_diff, classes)

new_line = [{"tile": ref_file.stem, "class": cl, metric: note[cl]} for cl in classes]
new_line = [
{
"tile": ref_file.stem,
"class": cl,
"max_diff": max_diff[ii],
"mean_diff": mean_diff[ii],
"std_diff": std_diff[ii],
}
for ii, cl in enumerate(classes)
]
csv_data.extend(new_line)

total_max_diff, total_count, total_mean_diff, total_m2 = update_overall_stats(
Expand All @@ -140,9 +130,16 @@ def compute_metric_relative(
df.to_csv(output_csv_tile, index=False, sep=csv_separator)
logging.debug(df.to_markdown())

total_notes = compute_note(total_mean_diff, total_std_diff, total_max_diff, classes)
data = [
{
"class": cl,
"max_diff": total_max_diff[ii],
"mean_diff": total_mean_diff[ii],
"std_diff": total_std_diff[ii],
}
for ii, cl in enumerate(classes)
]

data = [{"class": cl, metric: total_notes.get(cl, 0)} for cl in classes]
df = pd.DataFrame(data)
df.to_csv(output_csv, index=False, sep=csv_separator)

Expand Down
83 changes: 83 additions & 0 deletions test/malt0/test_malt0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import shutil
from pathlib import Path

import numpy as np
import pandas as pd
import pytest

from coclico.malt0.malt0 import MALT0

pytestmark = pytest.mark.docker

TMP_PATH = Path("./tmp/malt0")


def setup_module(module):
if TMP_PATH.is_dir():
shutil.rmtree(TMP_PATH)


def generate_metric_dataframes():
# Cases:
# - tile a: Test with mean condition only
# - tile b: Test with standard deviation condition only
# - tile c: Test with maximum condition only
# - tile d: Test all conditions together
input_tile_a = pd.DataFrame(
{
"tile": ["a"] * 3,
"class": ["6", "0", "2_3"],
"max_diff": [0, 0, 0],
"mean_diff": [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],
"std_diff": [0, 0, 0],
}
)
expected_tile_a = pd.DataFrame({"tile": ["a"] * 3, "class": ["6", "0", "2_3"], "malt0": [1, 3 / 5, 4 / 5]})

input_tile_b = pd.DataFrame(
{
"tile": ["b"] * 4,
"class": ["6", "0", "2_3", "4"],
"max_diff": [0, 0, 0, 0],
"mean_diff": [0, 0, 0, 0],
"std_diff": [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2, np.nan],
}
)
expected_tile_b = pd.DataFrame(
{"tile": ["b"] * 4, "class": ["6", "0", "2_3", "4"], "malt0": [1, 3 / 5, 4 / 5, np.nan]}
)

input_tile_c = pd.DataFrame(
{
"tile": ["c"] * 3,
"class": ["6", "0", "2_3"],
"max_diff": [0.1, 4, 0.1 + (4 - 0.1) / 2],
"mean_diff": [0, 0, 0],
"std_diff": [0, 0, 0],
}
)

expected_tile_c = pd.DataFrame({"tile": ["c"] * 3, "class": ["6", "0", "2_3"], "malt0": [1, 4 / 5, 4.5 / 5]})

input_tile_d = pd.DataFrame(
{
"tile": ["d"] * 3,
"class": ["6", "0", "2_3"],
"max_diff": [0.09, 4.0001, 0.1 + (4 - 0.1) / 2],
"mean_diff": [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],
"std_diff": [0.01, 0.5, 0.01 + (0.5 - 0.01) / 2],
}
)

expected_tile_d = pd.DataFrame({"tile": ["d"] * 3, "class": ["6", "0", "2_3"], "malt0": [1, 0, 0.5]})

input_df = pd.concat([input_tile_a, input_tile_b, input_tile_c, input_tile_d])
expected_out = pd.concat([expected_tile_a, expected_tile_b, expected_tile_c, expected_tile_d])

return input_df, expected_out


def test_compute_note():
input_df, expected_out = generate_metric_dataframes()
out_df = MALT0.compute_note(input_df)
assert out_df.equals(expected_out)
2 changes: 1 addition & 1 deletion test/malt0/test_malt0_intrinsic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

pytestmark = pytest.mark.docker

TMP_PATH = Path("./tmp/malt0")
TMP_PATH = Path("./tmp/malt0_intrinsic")


def setup_module(module):
Expand Down
47 changes: 8 additions & 39 deletions test/malt0/test_malt0_relative.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

pytestmark = pytest.mark.docker

TMP_PATH = Path("./tmp/malt0")
TMP_PATH = Path("./tmp/malt0_relative")


def setup_module(module):
Expand Down Expand Up @@ -88,44 +88,6 @@ def test_update_overall_stats():
# is ok as long as std is the same between the 2 methods


note_mpla0_data = [
({}, {}, {}, {}), # limit case
(
[0.01, 0.5, 0.01 + (0.5 - 0.01) / 2], # mean difference between MNXs
[0, 0, 0], # standard deviation difference between MNXs
[0, 0, 0], # maximum difference between MNXs
{"6": 1, "0": 3 / 5, "2_3": 4 / 5}, # expected score
), # Test with mean deviation condition only
(
[0, 0, 0, 0], # mean difference between MNXs
[0.01, 0.5, 0.01 + (0.5 - 0.01) / 2, np.nan], # standard deviation difference between MNXs
[0, 0, 0, 0], # maximum difference between MNXs
{"6": 1, "0": 3 / 5, "2_3": 4 / 5, "4": np.nan}, # expected score
), # Test with standard deviation condition only
(
[0, 0, 0], # mean difference between MNXs
[0, 0, 0], # standard deviation difference between MNXs
[0.1, 4, 0.1 + (4 - 0.1) / 2], # maximum difference between MNXs
{"6": 1, "0": 4 / 5, "2_3": 4.5 / 5}, # expected score
), # Test with maximum condition only
(
[0.01, 0.5, 0.01 + (0.5 - 0.01) / 2], # mean difference between MNXs
[0.01, 0.5, 0.01 + (0.5 - 0.01) / 2], # standard deviation difference between MNXs
[0.09, 4.0001, 0.1 + (4 - 0.1) / 2], # maximum difference between MNXs
{"6": 1, "0": 0, "2_3": 0.5}, # expected score
), # Test all conditions together
]


@pytest.mark.parametrize("mean_diff,std_diff,max_diff,expected", note_mpla0_data)
def test_compute_note(mean_diff, std_diff, max_diff, expected):
ret = malt0_relative.compute_note(mean_diff, std_diff, max_diff, expected.keys())
# Check that the dictionaries are equal for nan and non-nan values
assert ret.keys() == expected.keys()
for k in expected.keys():
assert (ret[k] == expected[k]) or (np.isnan(ret[k]) and np.isnan(expected[k]))


def test_compute_metric_relative(ensure_malt0_data):
c1_dir = Path("./data/malt0/c1/intrinsic/mnx")
ref_dir = Path("./data/malt0/ref/intrinsic/mnx")
Expand All @@ -141,12 +103,19 @@ def test_compute_metric_relative(ensure_malt0_data):
)
output_csv = TMP_PATH / "relative" / "result.csv"
output_csv_tile = TMP_PATH / "relative" / "result_tile.csv"
expected_cols = {"class", "max_diff", "mean_diff", "std_diff"}

malt0_relative.compute_metric_relative(c1_dir, ref_dir, occupancy_dir, class_weights, output_csv, output_csv_tile)

df = pd.read_csv(output_csv_tile, sep=csv_separator)
assert set(df.columns) == expected_cols | {"tile"}

expected_rows = 2 * 5 # 2 files * 5 classes
assert utils.csv_num_rows(output_csv_tile) == expected_rows

df = pd.read_csv(output_csv, sep=csv_separator)
assert set(df.columns) == expected_cols

expected_rows = 5 # 5 classes
assert utils.csv_num_rows(output_csv) == expected_rows

Expand Down

0 comments on commit 90300a6

Please sign in to comment.