Skip to content

Commit

Permalink
Add scripts to publish results on PaperWithCode (#561)
Browse files Browse the repository at this point in the history
* Add scripts to publish results on paperwithcode

* Create tasks manually instead (API error 403 forbidden)

* Try to create the evaluation first and other things, not working...

* Update whats_new.rst

* Fix example from API's README; still results in a "403 Forbidden"

* Fix task_id

* Allow passing multiple results files

* Save commands used as comments

* Add comment

---------

Signed-off-by: PierreGtch <[email protected]>
Signed-off-by: Bru <[email protected]>
Co-authored-by: Bru <[email protected]>
  • Loading branch information
PierreGtch and bruAristimunha authored Jul 15, 2024
1 parent bb944ba commit 97c8d6b
Show file tree
Hide file tree
Showing 3 changed files with 339 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docs/source/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ Develop branch

Enhancements
~~~~~~~~~~~~
- Add scripts to upload results on PapersWithCode (:gh:`561` by `Pierre Guetschel`_)
- Centralize dataset summary tables in CSV files (:gh:`635` by `Pierre Guetschel`_)
- Add new dataset :class:`moabb.datasets.Liu2024` dataset (:gh:`619` by `Taha Habib`_)


Bugs
~~~~
- Fix caching in the workflows (:gh:`632` by `Pierre Guetschel`_)
Expand Down
155 changes: 155 additions & 0 deletions scripts/paperswithcode/create_datasets_and_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import pickle
import re
from argparse import ArgumentParser
from dataclasses import dataclass

from paperswithcode import PapersWithCodeClient
from paperswithcode.models import DatasetCreateRequest


def dataset_name(dataset):
return f"{dataset.code} MOABB"


def dataset_full_name(dataset):
s = dataset.__doc__.split("\n\n")[0]
s = re.sub(r" \[\d+\]_", "", s)
s = re.sub(r"\s+", " ", s)
return s


def dataset_url(dataset):
return f"http://moabb.neurotechx.com/docs/generated/moabb.datasets.{dataset.__class__.__name__}.html"


def valid_datasets():
from moabb.datasets.utils import dataset_list
from moabb.utils import aliases_list

deprecated_names = [n[0] for n in aliases_list]
return [
d()
for d in dataset_list
if (d.__name__ not in deprecated_names) and ("Fake" not in d.__name__)
]


_paradigms = {
"MotorImagery": (
"Motor Imagery",
["all classes", "left hand vs. right hand", "right hand vs. feet"],
"Motor Imagery",
),
"P300": ("ERP", None, "Event-Related Potential (ERP)"),
"SSVEP": ("SSVEP", None, "Steady-State Visually Evoked Potential (SSVEP)"),
"CVEP": ("c-VEP", None, "Code-Modulated Visual Evoked Potential (c-VEP)"),
}
_evaluations = {
"WithinSession": "Within-Session",
"CrossSession": "Cross-Session",
"CrossSubject": "Cross-Subject",
}


@dataclass
class Task:
id: str
name: str
description: str
area: str
parent_task: str

@classmethod
def make(cls, name, description, area, parent_task):
# to snake case
task_id = (
name.lower().replace(" ", "-").replace("(", "").replace(")", "").split(".")[0]
)
return cls(task_id, name, description, area, parent_task)


def create_tasks(client: PapersWithCodeClient):
tasks = {}
for paradigm_class, (
paradigm_name,
subparadigms,
paradigm_fullname,
) in _paradigms.items():
description = f"Classification of examples recorded under the {paradigm_fullname} paradigm, as part of Brain-Computer Interfaces (BCI)."
d = dict(
name=paradigm_name,
description=description,
area="Medical",
parent_task="Brain Computer Interface",
)
# task = client.task_add(TaskCreateRequest(**d))
task = Task.make(**d)
tasks[paradigm_class] = task
for evaluation_class, evaluation in _evaluations.items():
eval_url = f'http://moabb.neurotechx.com/docs/generated/moabb.evaluations.{evaluation.replace("-", "")}Evaluation.html'
d = dict(
name=f"{evaluation} {paradigm_name}",
description=f"""MOABB's {evaluation} evaluation for the {paradigm_name} paradigm.
Evaluation details: [{eval_url}]({eval_url})""",
area="medical",
parent_task=task.id,
)
# subtask = client.task_add(TaskCreateRequest(**d))
subtask = Task.make(**d)
tasks[(paradigm_class, evaluation_class)] = subtask
if subparadigms is not None:
for subparadigm in subparadigms:
d = dict(
name=f"{evaluation} {paradigm_name} ({subparadigm})",
description=f"""MOABB's {evaluation} evaluation for the {paradigm_name} paradigm ({subparadigm}).
Evaluation details: [{eval_url}]({eval_url})""",
area="medical",
parent_task=subtask.id,
)
# subsubtask = client.task_add(TaskCreateRequest(**d))
subsubtask = Task.make(**d)
tasks[(paradigm_class, evaluation_class, subparadigm)] = subsubtask
return tasks


def create_datasets(client):
datasets = valid_datasets()
pwc_datasets = {}
for dataset in datasets:
pwc_dataset = client.dataset_add(
DatasetCreateRequest(
name=dataset_name(dataset),
full_name=dataset_full_name(dataset),
url=dataset_url(dataset),
)
)
pwc_datasets[dataset.code] = pwc_dataset
return pwc_datasets


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("token", type=str, help="PapersWithCode API token")
parser.add_argument(
"-o",
"--output",
type=str,
help="Pickle output file",
default="paperswithcode_datasets_and_tasks.pickle",
)
args = parser.parse_args()

client = PapersWithCodeClient(token=args.token)

# create tasks
tasks = create_tasks(client)

# create datasets
datasets = create_datasets(client)
obj = {"datasets": datasets, "tasks": tasks}

with open(args.output, "wb") as f:
pickle.dump(obj, f)
print(f"Datasets and tasks saved to {args.output}")
182 changes: 182 additions & 0 deletions scripts/paperswithcode/upload_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import pickle
from argparse import ArgumentParser
from dataclasses import dataclass
from math import isnan

import pandas as pd
from paperswithcode import PapersWithCodeClient
from paperswithcode.models import (
EvaluationTableSyncRequest,
MetricSyncRequest,
ResultSyncRequest,
)


@dataclass
class Task:
id: str
name: str
description: str
area: str
parent_task: str


_metrics = {
"time": "training time (s)",
"carbon_emission": "CO2 Emission (g)",
}


def make_table(results_csv_list: list[str], metric: str):
df_list = []
for results_csv in results_csv_list:
df = pd.read_csv(results_csv)
columns = ["score"]
if "time" in df.columns:
columns.append("time")
if "carbon_emission" in df.columns:
columns.append("carbon_emission")
df = (
df.groupby(
["dataset", "paradigm", "evaluation", "pipeline"],
)[columns]
.mean()
.reset_index()
)
df.score = df.score * 100
columns = dict(**_metrics, score=metric)
df.rename(columns=columns, inplace=True)
df.paradigm = df.paradigm.replace(
{"FilterBankMotorImagery": "MotorImagery", "LeftRightImagery": "MotorImagery"}
)
print(df.head())
df_list.append(df)
return pd.concat(df_list)


def upload_subtable(client, df, dataset, task, paper, evaluated_on):
kwargs = dict(
task=task.id,
dataset=dataset.id,
description=task.description,
external_id=f"{dataset.id}-{task.id}",
mirror_url="http://moabb.neurotechx.com/docs/benchmark_summary.html",
)
print(f"Uploading {kwargs=}")
# client.evaluation_create(EvaluationTableCreateRequest(**kwargs))

r = EvaluationTableSyncRequest(
**kwargs,
metrics=[
MetricSyncRequest(name=metric, is_loss=metric in _metrics.values())
for metric in df.columns
],
results=[
ResultSyncRequest(
metrics={k: str(v) for k, v in row.to_dict().items() if not isnan(v)},
paper=paper,
methodology=pipeline,
external_id=f"{dataset.id}-{task.id}-{pipeline}",
evaluated_on=evaluated_on,
# external_source_url="http://moabb.neurotechx.com/docs/benchmark_summary.html",
# TODO: maybe update url with the exact row of the result
)
for pipeline, row in df.iterrows()
],
)
print(r)
leaderboard_id = client.evaluation_synchronize(r)
print(f"{leaderboard_id=}")
return leaderboard_id


def upload_table(client, df, datasets, tasks, paper, evaluated_on, subsubtask):
gp_cols = ["dataset", "paradigm", "evaluation"]
df_gp = df.groupby(gp_cols)
ids = []
for (dataset_name, paradigm_name, evaluation_name), sub_df in df_gp:
dataset = datasets[dataset_name]
task_key = (paradigm_name, evaluation_name)
if subsubtask is not None:
task_key += (subsubtask,)
task = tasks[task_key]
id = upload_subtable(
client,
sub_df.set_index("pipeline").drop(
columns=gp_cols
), # + list(_metrics.values())),
dataset,
task,
paper,
evaluated_on,
)
ids.append(id)
return ids


if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("token", type=str, help="PapersWithCode API token")
parser.add_argument(
"metric",
type=str,
help="Metric used in the results CSV (see PapersWithCode metrics)",
)
parser.add_argument(
"results_csv", type=str, help="CSV file with results to upload", nargs="+"
)

parser.add_argument(
"-s",
"--subsubtask",
type=str,
default=None,
help="If relevant, the type of motor imagery task (see create_datasets_and_tasks.py)",
)
parser.add_argument(
"-d",
"--datasets",
type=str,
help="Pickle file created by create_datasets_and_tasks.py",
default="paperswithcode_datasets_and_tasks.pickle",
)
parser.add_argument(
"-o",
"--output",
type=str,
help="Pickle output file",
default="paperswithcode_results.pickle",
)
parser.add_argument("-p", "--paper", type=str, help="Paper URL", default="")
parser.add_argument(
"-e",
"--evaluated_on",
type=str,
help="Results date YYYY-MM-DD",
default="2024-04-09",
)
args = parser.parse_args()

with open(args.datasets, "rb") as f:
datasets = pickle.load(f)
summary_table = make_table(args.results_csv, metric=args.metric)

client = PapersWithCodeClient(token=args.token)

upload_table(
client,
summary_table,
datasets["datasets"],
datasets["tasks"],
args.paper,
args.evaluated_on,
args.subsubtask,
)

# Commands used to upload the results of the benchmark paper:
# (generate a new API token, this one is expired)
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_rf_Optuna.csv -s="right hand vs. feet" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_lhrh_Optuna.csv -s="left hand vs. right hand" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b Accuracy ../moabb_paper_plots/DATA/results_All_Optuna.csv -s="all classes" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b Accuracy ../moabb_paper_plots/DATA/results_SSVEP.csv ../moabb_paper_plots/DATA/results_SSVEP_DL.csv -d paperswithcode_datasets_and_tasks2.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_P300.csv ../moabb_paper_plots/DATA/results_P300_DL.csv -d paperswithcode_datasets_and_tasks2.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03

0 comments on commit 97c8d6b

Please sign in to comment.