Merge pull request #37 from microsoft/alvinn/dev-set-2_release

Release dev-set-2 and reveal dev-set-1
microsoft · Apr 17, 2024 · 2f348a9 · 2f348a9
2 parents 0993574 + f88bc9a
commit 2f348a9
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -240,7 +240,7 @@ calculate the loss when training.
 # NOTSOFAR-1 Datasets - Download Instructions
 This section is for those specifically interested in downloading the NOTSOFAR datasets.<br>
 The NOTSOFAR-1 Challenge provides two datasets: a recorded meeting dataset and a simulated training dataset. <br>
-The datasets are stored in Azure Blob Storage, to download them, you will need to setup [AzCopy](https://aka.ms/downloadazcopy-v10-linux-arm64)
+The datasets are stored in Azure Blob Storage, to download them, you will need to setup [AzCopy](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-v10#download-azcopy).
 
 You can use either the python utilities in `utils/azure_storage.py` or the `AzCopy` command to download the datasets as described below.
 
@@ -260,7 +260,7 @@ Alternatively, using AzCopy CLI, set these arguments and run the following comma
 - `version`: version to download (`240103g` / etc.). Use the latest version. 
 - `datasets_path` - path to the directory where you want to download the benchmarking dataset (destination directory must exist). <br>
 
-Train, dev, and eval sets are released for the NOTSOFAR challenge are released in stages. 
+Train, dev, and eval sets for the NOTSOFAR challenge are released in stages. 
 See release timeline on the [NOTSOFAR page](https://www.chimechallenge.org/current/task2/index#dates).
 See doc in `download_meeting_subset` function in 
 [utils/azure_storage.py](https://github.com/microsoft/NOTSOFAR1-Challenge/blob/main/utils/azure_storage.py#L109) 
@@ -272,7 +272,7 @@ azcopy copy https://notsofarsa.blob.core.windows.net/benchmark-datasets/<subset_
 
 Example:
 ```bash
-azcopy copy https://notsofarsa.blob.core.windows.net/benchmark-datasets/dev_set/240208.2_dev/MTG . --recursive
+azcopy copy https://notsofarsa.blob.core.windows.net/benchmark-datasets/dev_set/240415.2_dev/MTG . --recursive
 ````
 
 

diff --git a/run_inference.py b/run_inference.py
@@ -33,7 +33,7 @@ def load_config(config_name: ConfigName) -> InferenceCfg:
     elif config_name == 'dev_set_1_mc_debug':
         # for quick debug: 'tiny' Whisper, one MC (multi-channel) session
         conf_file = project_root / 'configs/inference/debug_inference.yaml'
-        session_query = 'device_name == "plaza_0" and is_mc == True and meeting_id == "MTG_30860"'
+        session_query = 'device_name == "plaza_0" and is_mc == True and meeting_id == "MTG_30500"'
 
     else:
         raise ValueError(f'unknown config name: {config_name}')
@@ -54,8 +54,8 @@ def main(config_name: ConfigName = 'dev_set_1_mc_debug', output_dir: str = ""):
 
     # download the entire dev-set (all sessions, multi-channel and single-channel)
     meetings_root = project_root / 'artifacts' / 'meeting_data'
-    dev_meetings_dir = download_meeting_subset(subset_name='dev_set',  # dev-set is without GT for now
-                                               version='240208.2_dev',
+    dev_meetings_dir = download_meeting_subset(subset_name='dev_set',  # dev-set-2 is without GT for now
+                                               version='240415.2_dev',
                                                destination_dir=str(meetings_root))
 
     if dev_meetings_dir is None:

diff --git a/utils/azure_storage.py b/utils/azure_storage.py
@@ -125,16 +125,23 @@ def download_meeting_subset(subset_name: Literal['train_set', 'dev_set', 'eval_s
 
     Latest available versions:
 
-    # dev_set, no GT available. submit your systems to leaderboard to measure WER.
-    res_dir = download_meeting_subset(subset_name='dev_set', version='240208.2_dev', destination_dir=...)
-
-    # first and second train-set batches combined, with GT for training models.
-    res_dir = download_meeting_subset(subset_name='train_set', version='240229.1_train', destination_dir=...)
+    # dev-set-2, no GT available. Submit your systems to leaderboard to measure WER.
+    # dev-set-2 includes mostly new participants compared to the training sets and dev-set-1.
+    res_dir = download_meeting_subset(subset_name='dev_set', version='240415.2_dev', destination_dir=...)
 
+    # first and second train-set batches and dev-set-1 (GT unveiled) combined, with GT for training models.
+    # dev-set-1 and the training sets have significant participant overlap. Use dev-set-2 for development.
+    res_dir = download_meeting_subset(subset_name='train_set', version='240415.1_train', destination_dir=...)
 
 
     Previous versions:
 
+    # dev-set-1, no GT available. Previous leaderboard was used to measure WER.
+    res_dir = download_meeting_subset(subset_name='dev_set', version='240208.2_dev', destination_dir=...)
+
+    # first and second train-set batches combined, with GT for training models.
+    res_dir = download_meeting_subset(subset_name='train_set', version='240229.1_train', destination_dir=...)
+
     # first train-set batch, with GT for training models.
     res_dir = download_meeting_subset(subset_name='train_set', version='240208.2_train', destination_dir=...)