-
Notifications
You must be signed in to change notification settings - Fork 183
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add scripts to publish results on PaperWithCode (#561)
* Add scripts to publish results on paperwithcode * Create tasks manually instead (API error 403 forbidden) * Try to create the evaluation first and other things, not working... * Update whats_new.rst * Fix example from API's README; still results in a "403 Forbidden" * Fix task_id * Allow passing multiple results files * Save commands used as comments * Add comment --------- Signed-off-by: PierreGtch <[email protected]> Signed-off-by: Bru <[email protected]> Co-authored-by: Bru <[email protected]>
- Loading branch information
1 parent
bb944ba
commit 97c8d6b
Showing
3 changed files
with
339 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
import pickle | ||
import re | ||
from argparse import ArgumentParser | ||
from dataclasses import dataclass | ||
|
||
from paperswithcode import PapersWithCodeClient | ||
from paperswithcode.models import DatasetCreateRequest | ||
|
||
|
||
def dataset_name(dataset): | ||
return f"{dataset.code} MOABB" | ||
|
||
|
||
def dataset_full_name(dataset): | ||
s = dataset.__doc__.split("\n\n")[0] | ||
s = re.sub(r" \[\d+\]_", "", s) | ||
s = re.sub(r"\s+", " ", s) | ||
return s | ||
|
||
|
||
def dataset_url(dataset): | ||
return f"http://moabb.neurotechx.com/docs/generated/moabb.datasets.{dataset.__class__.__name__}.html" | ||
|
||
|
||
def valid_datasets(): | ||
from moabb.datasets.utils import dataset_list | ||
from moabb.utils import aliases_list | ||
|
||
deprecated_names = [n[0] for n in aliases_list] | ||
return [ | ||
d() | ||
for d in dataset_list | ||
if (d.__name__ not in deprecated_names) and ("Fake" not in d.__name__) | ||
] | ||
|
||
|
||
_paradigms = { | ||
"MotorImagery": ( | ||
"Motor Imagery", | ||
["all classes", "left hand vs. right hand", "right hand vs. feet"], | ||
"Motor Imagery", | ||
), | ||
"P300": ("ERP", None, "Event-Related Potential (ERP)"), | ||
"SSVEP": ("SSVEP", None, "Steady-State Visually Evoked Potential (SSVEP)"), | ||
"CVEP": ("c-VEP", None, "Code-Modulated Visual Evoked Potential (c-VEP)"), | ||
} | ||
_evaluations = { | ||
"WithinSession": "Within-Session", | ||
"CrossSession": "Cross-Session", | ||
"CrossSubject": "Cross-Subject", | ||
} | ||
|
||
|
||
@dataclass | ||
class Task: | ||
id: str | ||
name: str | ||
description: str | ||
area: str | ||
parent_task: str | ||
|
||
@classmethod | ||
def make(cls, name, description, area, parent_task): | ||
# to snake case | ||
task_id = ( | ||
name.lower().replace(" ", "-").replace("(", "").replace(")", "").split(".")[0] | ||
) | ||
return cls(task_id, name, description, area, parent_task) | ||
|
||
|
||
def create_tasks(client: PapersWithCodeClient): | ||
tasks = {} | ||
for paradigm_class, ( | ||
paradigm_name, | ||
subparadigms, | ||
paradigm_fullname, | ||
) in _paradigms.items(): | ||
description = f"Classification of examples recorded under the {paradigm_fullname} paradigm, as part of Brain-Computer Interfaces (BCI)." | ||
d = dict( | ||
name=paradigm_name, | ||
description=description, | ||
area="Medical", | ||
parent_task="Brain Computer Interface", | ||
) | ||
# task = client.task_add(TaskCreateRequest(**d)) | ||
task = Task.make(**d) | ||
tasks[paradigm_class] = task | ||
for evaluation_class, evaluation in _evaluations.items(): | ||
eval_url = f'http://moabb.neurotechx.com/docs/generated/moabb.evaluations.{evaluation.replace("-", "")}Evaluation.html' | ||
d = dict( | ||
name=f"{evaluation} {paradigm_name}", | ||
description=f"""MOABB's {evaluation} evaluation for the {paradigm_name} paradigm. | ||
Evaluation details: [{eval_url}]({eval_url})""", | ||
area="medical", | ||
parent_task=task.id, | ||
) | ||
# subtask = client.task_add(TaskCreateRequest(**d)) | ||
subtask = Task.make(**d) | ||
tasks[(paradigm_class, evaluation_class)] = subtask | ||
if subparadigms is not None: | ||
for subparadigm in subparadigms: | ||
d = dict( | ||
name=f"{evaluation} {paradigm_name} ({subparadigm})", | ||
description=f"""MOABB's {evaluation} evaluation for the {paradigm_name} paradigm ({subparadigm}). | ||
Evaluation details: [{eval_url}]({eval_url})""", | ||
area="medical", | ||
parent_task=subtask.id, | ||
) | ||
# subsubtask = client.task_add(TaskCreateRequest(**d)) | ||
subsubtask = Task.make(**d) | ||
tasks[(paradigm_class, evaluation_class, subparadigm)] = subsubtask | ||
return tasks | ||
|
||
|
||
def create_datasets(client): | ||
datasets = valid_datasets() | ||
pwc_datasets = {} | ||
for dataset in datasets: | ||
pwc_dataset = client.dataset_add( | ||
DatasetCreateRequest( | ||
name=dataset_name(dataset), | ||
full_name=dataset_full_name(dataset), | ||
url=dataset_url(dataset), | ||
) | ||
) | ||
pwc_datasets[dataset.code] = pwc_dataset | ||
return pwc_datasets | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = ArgumentParser() | ||
parser.add_argument("token", type=str, help="PapersWithCode API token") | ||
parser.add_argument( | ||
"-o", | ||
"--output", | ||
type=str, | ||
help="Pickle output file", | ||
default="paperswithcode_datasets_and_tasks.pickle", | ||
) | ||
args = parser.parse_args() | ||
|
||
client = PapersWithCodeClient(token=args.token) | ||
|
||
# create tasks | ||
tasks = create_tasks(client) | ||
|
||
# create datasets | ||
datasets = create_datasets(client) | ||
obj = {"datasets": datasets, "tasks": tasks} | ||
|
||
with open(args.output, "wb") as f: | ||
pickle.dump(obj, f) | ||
print(f"Datasets and tasks saved to {args.output}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
import pickle | ||
from argparse import ArgumentParser | ||
from dataclasses import dataclass | ||
from math import isnan | ||
|
||
import pandas as pd | ||
from paperswithcode import PapersWithCodeClient | ||
from paperswithcode.models import ( | ||
EvaluationTableSyncRequest, | ||
MetricSyncRequest, | ||
ResultSyncRequest, | ||
) | ||
|
||
|
||
@dataclass | ||
class Task: | ||
id: str | ||
name: str | ||
description: str | ||
area: str | ||
parent_task: str | ||
|
||
|
||
_metrics = { | ||
"time": "training time (s)", | ||
"carbon_emission": "CO2 Emission (g)", | ||
} | ||
|
||
|
||
def make_table(results_csv_list: list[str], metric: str): | ||
df_list = [] | ||
for results_csv in results_csv_list: | ||
df = pd.read_csv(results_csv) | ||
columns = ["score"] | ||
if "time" in df.columns: | ||
columns.append("time") | ||
if "carbon_emission" in df.columns: | ||
columns.append("carbon_emission") | ||
df = ( | ||
df.groupby( | ||
["dataset", "paradigm", "evaluation", "pipeline"], | ||
)[columns] | ||
.mean() | ||
.reset_index() | ||
) | ||
df.score = df.score * 100 | ||
columns = dict(**_metrics, score=metric) | ||
df.rename(columns=columns, inplace=True) | ||
df.paradigm = df.paradigm.replace( | ||
{"FilterBankMotorImagery": "MotorImagery", "LeftRightImagery": "MotorImagery"} | ||
) | ||
print(df.head()) | ||
df_list.append(df) | ||
return pd.concat(df_list) | ||
|
||
|
||
def upload_subtable(client, df, dataset, task, paper, evaluated_on): | ||
kwargs = dict( | ||
task=task.id, | ||
dataset=dataset.id, | ||
description=task.description, | ||
external_id=f"{dataset.id}-{task.id}", | ||
mirror_url="http://moabb.neurotechx.com/docs/benchmark_summary.html", | ||
) | ||
print(f"Uploading {kwargs=}") | ||
# client.evaluation_create(EvaluationTableCreateRequest(**kwargs)) | ||
|
||
r = EvaluationTableSyncRequest( | ||
**kwargs, | ||
metrics=[ | ||
MetricSyncRequest(name=metric, is_loss=metric in _metrics.values()) | ||
for metric in df.columns | ||
], | ||
results=[ | ||
ResultSyncRequest( | ||
metrics={k: str(v) for k, v in row.to_dict().items() if not isnan(v)}, | ||
paper=paper, | ||
methodology=pipeline, | ||
external_id=f"{dataset.id}-{task.id}-{pipeline}", | ||
evaluated_on=evaluated_on, | ||
# external_source_url="http://moabb.neurotechx.com/docs/benchmark_summary.html", | ||
# TODO: maybe update url with the exact row of the result | ||
) | ||
for pipeline, row in df.iterrows() | ||
], | ||
) | ||
print(r) | ||
leaderboard_id = client.evaluation_synchronize(r) | ||
print(f"{leaderboard_id=}") | ||
return leaderboard_id | ||
|
||
|
||
def upload_table(client, df, datasets, tasks, paper, evaluated_on, subsubtask): | ||
gp_cols = ["dataset", "paradigm", "evaluation"] | ||
df_gp = df.groupby(gp_cols) | ||
ids = [] | ||
for (dataset_name, paradigm_name, evaluation_name), sub_df in df_gp: | ||
dataset = datasets[dataset_name] | ||
task_key = (paradigm_name, evaluation_name) | ||
if subsubtask is not None: | ||
task_key += (subsubtask,) | ||
task = tasks[task_key] | ||
id = upload_subtable( | ||
client, | ||
sub_df.set_index("pipeline").drop( | ||
columns=gp_cols | ||
), # + list(_metrics.values())), | ||
dataset, | ||
task, | ||
paper, | ||
evaluated_on, | ||
) | ||
ids.append(id) | ||
return ids | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = ArgumentParser() | ||
parser.add_argument("token", type=str, help="PapersWithCode API token") | ||
parser.add_argument( | ||
"metric", | ||
type=str, | ||
help="Metric used in the results CSV (see PapersWithCode metrics)", | ||
) | ||
parser.add_argument( | ||
"results_csv", type=str, help="CSV file with results to upload", nargs="+" | ||
) | ||
|
||
parser.add_argument( | ||
"-s", | ||
"--subsubtask", | ||
type=str, | ||
default=None, | ||
help="If relevant, the type of motor imagery task (see create_datasets_and_tasks.py)", | ||
) | ||
parser.add_argument( | ||
"-d", | ||
"--datasets", | ||
type=str, | ||
help="Pickle file created by create_datasets_and_tasks.py", | ||
default="paperswithcode_datasets_and_tasks.pickle", | ||
) | ||
parser.add_argument( | ||
"-o", | ||
"--output", | ||
type=str, | ||
help="Pickle output file", | ||
default="paperswithcode_results.pickle", | ||
) | ||
parser.add_argument("-p", "--paper", type=str, help="Paper URL", default="") | ||
parser.add_argument( | ||
"-e", | ||
"--evaluated_on", | ||
type=str, | ||
help="Results date YYYY-MM-DD", | ||
default="2024-04-09", | ||
) | ||
args = parser.parse_args() | ||
|
||
with open(args.datasets, "rb") as f: | ||
datasets = pickle.load(f) | ||
summary_table = make_table(args.results_csv, metric=args.metric) | ||
|
||
client = PapersWithCodeClient(token=args.token) | ||
|
||
upload_table( | ||
client, | ||
summary_table, | ||
datasets["datasets"], | ||
datasets["tasks"], | ||
args.paper, | ||
args.evaluated_on, | ||
args.subsubtask, | ||
) | ||
|
||
# Commands used to upload the results of the benchmark paper: | ||
# (generate a new API token, this one is expired) | ||
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_rf_Optuna.csv -s="right hand vs. feet" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 | ||
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_lhrh_Optuna.csv -s="left hand vs. right hand" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 | ||
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b Accuracy ../moabb_paper_plots/DATA/results_All_Optuna.csv -s="all classes" -d paperswithcode_datasets_and_tasks2.pickle -o test_out.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 | ||
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b Accuracy ../moabb_paper_plots/DATA/results_SSVEP.csv ../moabb_paper_plots/DATA/results_SSVEP_DL.csv -d paperswithcode_datasets_and_tasks2.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 | ||
# python scripts/paperswithcode/upload_results.py 5a4bd76b2b66908f0b8f28fb45dd41b918d3440b AUC-ROC ../moabb_paper_plots/DATA/results_P300.csv ../moabb_paper_plots/DATA/results_P300_DL.csv -d paperswithcode_datasets_and_tasks2.pickle -p "https://arxiv.org/abs/2404.15319v1" -e=2024-04-03 |