From 97c8d6bd49ea5f4170ec733b4a23f50b5f2c3f1f Mon Sep 17 00:00:00 2001 From: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Date: Mon, 15 Jul 2024 11:08:35 +0200 Subject: [PATCH] Add scripts to publish results on PaperWithCode (#561) * Add scripts to publish results on paperwithcode * Create tasks manually instead (API error 403 forbidden) * Try to create the evaluation first and other things, not working... * Update whats_new.rst * Fix example from API's README; still results in a "403 Forbidden" * Fix task_id * Allow passing multiple results files * Save commands used as comments * Add comment --------- Signed-off-by: PierreGtch <25532709+PierreGtch@users.noreply.github.com> Signed-off-by: Bru Co-authored-by: Bru --- docs/source/whats_new.rst | 2 + .../create_datasets_and_tasks.py | 155 +++++++++++++++ scripts/paperswithcode/upload_results.py | 182 ++++++++++++++++++ 3 files changed, 339 insertions(+) create mode 100644 scripts/paperswithcode/create_datasets_and_tasks.py create mode 100644 scripts/paperswithcode/upload_results.py diff --git a/docs/source/whats_new.rst b/docs/source/whats_new.rst index a6268315d..8c9453563 100644 --- a/docs/source/whats_new.rst +++ b/docs/source/whats_new.rst @@ -17,9 +17,11 @@ Develop branch Enhancements ~~~~~~~~~~~~ +- Add scripts to upload results on PapersWithCode (:gh:`561` by `Pierre Guetschel`_) - Centralize dataset summary tables in CSV files (:gh:`635` by `Pierre Guetschel`_) - Add new dataset :class:`moabb.datasets.Liu2024` dataset (:gh:`619` by `Taha Habib`_) + Bugs ~~~~ - Fix caching in the workflows (:gh:`632` by `Pierre Guetschel`_) diff --git a/scripts/paperswithcode/create_datasets_and_tasks.py b/scripts/paperswithcode/create_datasets_and_tasks.py new file mode 100644 index 000000000..935a93ad2 --- /dev/null +++ b/scripts/paperswithcode/create_datasets_and_tasks.py @@ -0,0 +1,155 @@ +import pickle +import re +from argparse import ArgumentParser +from dataclasses import dataclass + +from paperswithcode import PapersWithCodeClient +from paperswithcode.models import DatasetCreateRequest + + +def dataset_name(dataset): + return f"{dataset.code} MOABB" + + +def dataset_full_name(dataset): + s = dataset.__doc__.split("\n\n")[0] + s = re.sub(r" \[\d+\]_", "", s) + s = re.sub(r"\s+", " ", s) + return s + + +def dataset_url(dataset): + return f"http://moabb.neurotechx.com/docs/generated/moabb.datasets.{dataset.__class__.__name__}.html" + + +def valid_datasets(): + from moabb.datasets.utils import dataset_list + from moabb.utils import aliases_list + + deprecated_names = [n[0] for n in aliases_list] + return [ + d() + for d in dataset_list + if (d.__name__ not in deprecated_names) and ("Fake" not in d.__name__) + ] + + +_paradigms = { + "MotorImagery": ( + "Motor Imagery", + ["all classes", "left hand vs. right hand", "right hand vs. feet"], + "Motor Imagery", + ), + "P300": ("ERP", None, "Event-Related Potential (ERP)"), + "SSVEP": ("SSVEP", None, "Steady-State Visually Evoked Potential (SSVEP)"), + "CVEP": ("c-VEP", None, "Code-Modulated Visual Evoked Potential (c-VEP)"), +} +_evaluations = { + "WithinSession": "Within-Session", + "CrossSession": "Cross-Session", + "CrossSubject": "Cross-Subject", +} + + +@dataclass +class Task: + id: str + name: str + description: str + area: str + parent_task: str + + @classmethod + def make(cls, name, description, area, parent_task): + # to snake case + task_id = ( + name.lower().replace(" ", "-").replace("(", "").replace(")", "").split(".")[0] + ) + return cls(task_id, name, description, area, parent_task) + + +def create_tasks(client: PapersWithCodeClient): + tasks = {} + for paradigm_class, ( + paradigm_name, + subparadigms, + paradigm_fullname, + ) in _paradigms.items(): + description = f"Classification of examples recorded under the {paradigm_fullname} paradigm, as part of Brain-Computer Interfaces (BCI)." + d = dict( + name=paradigm_name, + description=description, + area="Medical", + parent_task="Brain Computer Interface", + ) + # task = client.task_add(TaskCreateRequest(**d)) + task = Task.make(**d) + tasks[paradigm_class] = task + for evaluation_class, evaluation in _evaluations.items(): + eval_url = f'http://moabb.neurotechx.com/docs/generated/moabb.evaluations.{evaluation.replace("-", "")}Evaluation.html' + d = dict( + name=f"{evaluation} {paradigm_name}", + description=f"""MOABB's {evaluation} evaluation for the {paradigm_name} paradigm. + +Evaluation details: [{eval_url}]({eval_url})""", + area="medical", + parent_task=task.id, + ) + # subtask = client.task_add(TaskCreateRequest(**d)) + subtask = Task.make(**d) + tasks[(paradigm_class, evaluation_class)] = subtask + if subparadigms is not None: + for subparadigm in subparadigms: + d = dict( + name=f"{evaluation} {paradigm_name} ({subparadigm})", + description=f"""MOABB's {evaluation} evaluation for the {paradigm_name} paradigm ({subparadigm}). + +Evaluation details: [{eval_url}]({eval_url})""", + area="medical", + parent_task=subtask.id, + ) + # subsubtask = client.task_add(TaskCreateRequest(**d)) + subsubtask = Task.make(**d) + tasks[(paradigm_class, evaluation_class, subparadigm)] = subsubtask + return tasks + + +def create_datasets(client): + datasets = valid_datasets() + pwc_datasets = {} + for dataset in datasets: + pwc_dataset = client.dataset_add( + DatasetCreateRequest( + name=dataset_name(dataset), + full_name=dataset_full_name(dataset), + url=dataset_url(dataset), + ) + ) + pwc_datasets[dataset.code] = pwc_dataset + return pwc_datasets + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("token", type=str, help="PapersWithCode API token") + parser.add_argument( + "-o", + "--output", + type=str, + help="Pickle output file", + default="paperswithcode_datasets_and_tasks.pickle", + ) + args = parser.parse_args() + + client = PapersWithCodeClient(token=args.token) + + # create tasks + tasks = create_tasks(client) + + # create datasets + datasets = create_datasets(client) + obj = {"datasets": datasets, "tasks": tasks} + + with open(args.output, "wb") as f: + pickle.dump(obj, f) + print(f"Datasets and tasks saved to {args.output}") diff --git a/scripts/paperswithcode/upload_results.py b/scripts/paperswithcode/upload_results.py new file mode 100644 index 000000000..c344b3d17 --- /dev/null +++ b/scripts/paperswithcode/upload_results.py @@ -0,0 +1,182 @@ +import pickle +from argparse import ArgumentParser +from dataclasses import dataclass +from math import isnan + +import pandas as pd +from paperswithcode import PapersWithCodeClient +from paperswithcode.models import ( + EvaluationTableSyncRequest, + MetricSyncRequest, + ResultSyncRequest, +) + + +@dataclass +class Task: + id: str + name: str + description: str + area: str + parent_task: str + + +_metrics = { + "time": "training time (s)", + "carbon_emission": "CO2 Emission (g)", +} + + +def make_table(results_csv_list: list[str], metric: str): + df_list = [] + for results_csv in results_csv_list: + df = pd.read_csv(results_csv) + columns = ["score"] + if "time" in df.columns: + columns.append("time") + if "carbon_emission" in df.columns: + columns.append("carbon_emission") + df = ( + df.groupby( + ["dataset", "paradigm", "evaluation", "pipeline"], + )[columns] + .mean() + .reset_index() + ) + df.score = df.score * 100 + columns = dict(**_metrics, score=metric) + df.rename(columns=columns, inplace=True) + df.paradigm = df.paradigm.replace( + {"FilterBankMotorImagery": "MotorImagery", "LeftRightImagery": "MotorImagery"} + ) + print(df.head()) + df_list.append(df) + return pd.concat(df_list) + + +def upload_subtable(client, df, dataset, task, paper, evaluated_on): + kwargs = dict( + task=task.id, + dataset=dataset.id, + description=task.description, + external_id=f"{dataset.id}-{task.id}", + mirror_url="http://moabb.neurotechx.com/docs/benchmark_summary.html", + ) + print(f"Uploading {kwargs=}") + # client.evaluation_create(EvaluationTableCreateRequest(**kwargs)) + + r = EvaluationTableSyncRequest( + **kwargs, + metrics=[ + MetricSyncRequest(name=metric, is_loss=metric in _metrics.values()) + for metric in df.columns + ], + results=[ + ResultSyncRequest( + metrics={k: str(v) for k, v in row.to_dict().items() if not isnan(v)}, + paper=paper, + methodology=pipeline, + external_id=f"{dataset.id}-{task.id}-{pipeline}", + evaluated_on=evaluated_on, + # external_source_url="http://moabb.neurotechx.com/docs/benchmark_summary.html", + # TODO: maybe update url with the exact row of the result + ) + for pipeline, row in df.iterrows() + ], + ) + print(r) + leaderboard_id = client.evaluation_synchronize(r) + print(f"{leaderboard_id=}") + return leaderboard_id + + +def upload_table(client, df, datasets, tasks, paper, evaluated_on, subsubtask): + gp_cols = ["dataset", "paradigm", "evaluation"] + df_gp = df.groupby(gp_cols) + ids = [] + for (dataset_name, paradigm_name, evaluation_name), sub_df in df_gp: + dataset = datasets[dataset_name] + task_key = (paradigm_name, evaluation_name) + if subsubtask is not None: + task_key += (subsubtask,) + task = tasks[task_key] + id = upload_subtable( + client, + sub_df.set_index("pipeline").drop( + columns=gp_cols + ), # + list(_metrics.values())), + dataset, + task, + paper, + evaluated_on, + ) + ids.append(id) + return ids + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("token", type=str, help="PapersWithCode API token") + parser.add_argument( + "metric", + type=str, + help="Metric used in the results CSV (see PapersWithCode metrics)", + ) + parser.add_argument( + "results_csv", type=str, help="CSV file with results to upload", nargs="+" + ) + + parser.add_argument( + "-s", + "--subsubtask", + type=str, + default=None, + help="If relevant, the type of motor imagery task (see create_datasets_and_tasks.py)", + ) + parser.add_argument( + "-d", + "--datasets", + type=str, + help="Pickle file created by create_datasets_and_tasks.py", + default="paperswithcode_datasets_and_tasks.pickle", + ) + parser.add_argument( + "-o", + "--output", + type=str, + help="Pickle output file", + default="paperswithcode_results.pickle", + ) + parser.add_argument("-p", "--paper", type=str, help="Paper URL", default="") + parser.add_argument( + "-e", + "--evaluated_on", + type=str, + help="Results date YYYY-MM-DD", + default="2024-04-09", + ) + args = parser.parse_args() + + with open(args.datasets, "rb") as f: + datasets = pickle.load(f) + summary_table = make_table(args.results_csv, metric=args.metric) + + client = PapersWithCodeClient(token=args.token) + + upload_table( + client, + summary_table, + datasets["datasets"], + datasets["tasks"], + args.paper, + args.evaluated_on, + args.subsubtask, + ) + +# Commands used to upload the results of the benchmark paper: +# (generate a new API token, this one is expired) +# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_rf_Optuna.csv -s="right hand vs. feet" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 +# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_lhrh_Optuna.csv -s="left hand vs. right hand" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 +# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b Accuracy ../moabb_paper_plots/DATA/results_All_Optuna.csv -s="all classes" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 +# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b Accuracy ../moabb_paper_plots/DATA/results_SSVEP.csv ../moabb_paper_plots/DATA/results_SSVEP_DL.csv -d paperswithcode_datasets_and_tasks2.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 +# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_P300.csv ../moabb_paper_plots/DATA/results_P300_DL.csv -d paperswithcode_datasets_and_tasks2.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03