Merge pull request #35 from microsoft/ilyagu/conformer1.0_config_files

Added conformer1.0 SC/MC configuration files.
microsoft · Mar 18, 2024 · 3d80ac4 · 3d80ac4
2 parents db797d2 + ef49ce7
commit 3d80ac4
Show file tree

Hide file tree

Showing 2 changed files with 88 additions and 0 deletions.
diff --git a/configs/train_css/local/conformer_v1.0_mc.yaml b/configs/train_css/local/conformer_v1.0_mc.yaml
@@ -0,0 +1,42 @@
+# Note that this model uses a mask-based loss, in contrast to a masked magnitude loss that was used in conformer_v0.5.
+train_dir: ./v1.5/200hrs/train
+val_dir: ./v1.5/200hrs/val
+out_dir: ./
+
+train_set_cfg:
+  sample_frac: 1.0
+  max_urls: null  # null means no limit
+val_set_cfg:
+  sample_frac: 1.0
+  max_urls: null  # null means no limit
+
+calc_side_info: True
+log_params_mlflow: True
+log_metrics_mlflow: True
+
+scheduler_step_every: [1, iterations]
+scheduler_name: step_lr
+scheduler_step_lr_cfg:
+  # Fixed LR
+  step_size: 1
+  gamma: 1.0
+
+stop_after: [520000, iterations]
+eval_every: [1000, iterations]
+save_every: [1000, iterations]
+
+loss_name: 'mask'
+base_loss_name: 'l1'
+
+global_batch_size: 256
+learning_rate: 1e-4
+weight_decay: 1e-2  # according to the paper set to 1e-2
+
+# Large model per CSS with Conformer definition
+conformer_css_cfg:
+  nnet_conf:
+    conformer_conf:
+      attention_dim: 512  # default 256
+      attention_heads: 8  # default 4
+      num_blocks: 18  # default 16
+      dropout_rate: 0.0  # New! The default was 0.1.
diff --git a/configs/train_css/local/conformer_v1.0_sc.yaml b/configs/train_css/local/conformer_v1.0_sc.yaml
@@ -0,0 +1,46 @@
+# Note that this model uses a mask-based loss, in contrast to a masked magnitude loss that was used in conformer_v0.5.
+train_dir: ./v1.5/1000hrs/train
+val_dir: ./v1.5/200hrs/val  # enough for validation
+out_dir: ./
+
+single_channel: True
+
+train_set_cfg:
+  sample_frac: 1.0
+  max_urls: 640  # out of a total of 800. Subsample to fit local storage and avoid cache misses.
+val_set_cfg:
+  sample_frac: 1.0
+  max_urls: null  # null means no limit
+
+calc_side_info: True
+log_params_mlflow: True
+log_metrics_mlflow: True
+
+scheduler_step_every: [1, iterations]
+scheduler_name: linear_warmup_decay
+scheduler_linear_warmup_decay_cfg:
+  warmup: 10000
+  decay: 520000
+
+stop_after: [520000, iterations]
+eval_every: [1000, iterations]
+save_every: [1000, iterations]
+
+loss_name: 'mask'
+base_loss_name: 'l1'
+
+global_batch_size: 256
+learning_rate: 1e-4
+weight_decay: 1e-2  # according to the paper set to 1e-2
+
+# Large model per CSS with Conformer definition
+conformer_css_cfg:
+  extractor_conf:
+    ipd_index: ''  # For MC '1,0;2,0;3,0;4,0;5,0;6,0'. For SC ''.
+  nnet_conf:
+    conformer_conf:
+      attention_dim: 512  # default 256
+      attention_heads: 8  # default 4
+      num_blocks: 18  # default 16
+      dropout_rate: 0.0  # New! The default was 0.1.
+    in_features: 257  # For MC 1799. For SC 257.