From f88bc9a212770c373e1f2e7937fac13ae0722e82 Mon Sep 17 00:00:00 2001 From: alvinn Date: Thu, 18 Apr 2024 01:25:48 +0300 Subject: [PATCH] publish blind dev-set-2 and revealed dev-set-1 --- README.md | 6 +++--- run_inference.py | 6 +++--- utils/azure_storage.py | 17 ++++++++++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 1312c51..4ed05a2 100644 --- a/README.md +++ b/README.md @@ -240,7 +240,7 @@ calculate the loss when training. # NOTSOFAR-1 Datasets - Download Instructions This section is for those specifically interested in downloading the NOTSOFAR datasets.
The NOTSOFAR-1 Challenge provides two datasets: a recorded meeting dataset and a simulated training dataset.
-The datasets are stored in Azure Blob Storage, to download them, you will need to setup [AzCopy](https://aka.ms/downloadazcopy-v10-linux-arm64) +The datasets are stored in Azure Blob Storage, to download them, you will need to setup [AzCopy](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10#download-azcopy). You can use either the python utilities in `utils/azure_storage.py` or the `AzCopy` command to download the datasets as described below. @@ -260,7 +260,7 @@ Alternatively, using AzCopy CLI, set these arguments and run the following comma - `version`: version to download (`240103g` / etc.). Use the latest version. - `datasets_path` - path to the directory where you want to download the benchmarking dataset (destination directory must exist).
-Train, dev, and eval sets are released for the NOTSOFAR challenge are released in stages. +Train, dev, and eval sets for the NOTSOFAR challenge are released in stages. See release timeline on the [NOTSOFAR page](https://www.chimechallenge.org/current/task2/index#dates). See doc in `download_meeting_subset` function in [utils/azure_storage.py](https://github.com/microsoft/NOTSOFAR1-Challenge/blob/main/utils/azure_storage.py#L109) @@ -272,7 +272,7 @@ azcopy copy https://notsofarsa.blob.core.windows.net/benchmark-datasets/ InferenceCfg: elif config_name == 'dev_set_1_mc_debug': # for quick debug: 'tiny' Whisper, one MC (multi-channel) session conf_file = project_root / 'configs/inference/debug_inference.yaml' - session_query = 'device_name == "plaza_0" and is_mc == True and meeting_id == "MTG_30860"' + session_query = 'device_name == "plaza_0" and is_mc == True and meeting_id == "MTG_30500"' else: raise ValueError(f'unknown config name: {config_name}') @@ -54,8 +54,8 @@ def main(config_name: ConfigName = 'dev_set_1_mc_debug', output_dir: str = ""): # download the entire dev-set (all sessions, multi-channel and single-channel) meetings_root = project_root / 'artifacts' / 'meeting_data' - dev_meetings_dir = download_meeting_subset(subset_name='dev_set', # dev-set is without GT for now - version='240208.2_dev', + dev_meetings_dir = download_meeting_subset(subset_name='dev_set', # dev-set-2 is without GT for now + version='240415.2_dev', destination_dir=str(meetings_root)) if dev_meetings_dir is None: diff --git a/utils/azure_storage.py b/utils/azure_storage.py index d0c1652..77ec348 100644 --- a/utils/azure_storage.py +++ b/utils/azure_storage.py @@ -125,16 +125,23 @@ def download_meeting_subset(subset_name: Literal['train_set', 'dev_set', 'eval_s Latest available versions: - # dev_set, no GT available. submit your systems to leaderboard to measure WER. - res_dir = download_meeting_subset(subset_name='dev_set', version='240208.2_dev', destination_dir=...) - - # first and second train-set batches combined, with GT for training models. - res_dir = download_meeting_subset(subset_name='train_set', version='240229.1_train', destination_dir=...) + # dev-set-2, no GT available. Submit your systems to leaderboard to measure WER. + # dev-set-2 includes mostly new participants compared to the training sets and dev-set-1. + res_dir = download_meeting_subset(subset_name='dev_set', version='240415.2_dev', destination_dir=...) + # first and second train-set batches and dev-set-1 (GT unveiled) combined, with GT for training models. + # dev-set-1 and the training sets have significant participant overlap. Use dev-set-2 for development. + res_dir = download_meeting_subset(subset_name='train_set', version='240415.1_train', destination_dir=...) Previous versions: + # dev-set-1, no GT available. Previous leaderboard was used to measure WER. + res_dir = download_meeting_subset(subset_name='dev_set', version='240208.2_dev', destination_dir=...) + + # first and second train-set batches combined, with GT for training models. + res_dir = download_meeting_subset(subset_name='train_set', version='240229.1_train', destination_dir=...) + # first train-set batch, with GT for training models. res_dir = download_meeting_subset(subset_name='train_set', version='240208.2_train', destination_dir=...)