Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tedlium Hybrid Baseline #151

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ce3fb26
initial hybrid baseline commit
Atticus1806 Jul 12, 2023
5c8f69a
update tedlium2/data.py
Atticus1806 Jul 12, 2023
fc23342
update default tools
Atticus1806 Jul 12, 2023
2d2f39a
updates to gmm args
Atticus1806 Jul 12, 2023
e38010d
update baseline config
Atticus1806 Jul 12, 2023
ddf6155
updates to baseline config
Atticus1806 Jul 12, 2023
780625c
update hybrid data
Atticus1806 Jul 12, 2023
f58e2ff
update nn_config
Atticus1806 Jul 12, 2023
c71e88c
update hybrid decoder
Atticus1806 Jul 12, 2023
d6b8feb
update hybrid system
Atticus1806 Jul 12, 2023
ca73e1b
update nn_system
Atticus1806 Jul 12, 2023
f5e2cfc
updates to nn
Atticus1806 Jul 12, 2023
7d27d3a
updates to sctk
Atticus1806 Jul 12, 2023
a7d5792
update sctk
Atticus1806 Jul 12, 2023
2799615
updates now finished
Atticus1806 Aug 29, 2023
fd1cd09
revert forced align
Atticus1806 Sep 13, 2023
6be293d
fix setting of TrainingDataInputs
Atticus1806 Sep 13, 2023
4b5e42e
make use of nn prior optional
Atticus1806 Sep 13, 2023
008d230
black
Atticus1806 Sep 13, 2023
8c9a3d8
updates from main
Atticus1806 Sep 13, 2023
f6c514b
Merge branch 'main' into bene_tedlium_hybrid_baseline
Atticus1806 Dec 13, 2023
b7ed5fd
delete nnpy
Atticus1806 Dec 13, 2023
d4b5bad
black
Atticus1806 Dec 13, 2023
93c563f
Update common/baselines/tedlium2/gmm/baseline_args.py
christophmluscher Jan 23, 2024
3b1aca1
Merge branch 'main' into bene_tedlium_hybrid_baseline
Atticus1806 Jan 23, 2024
d08d4de
possible fix
Atticus1806 Jan 23, 2024
a556253
fix dict name and fix fsa
christophmluscher Jan 23, 2024
fe0e8f2
black
christophmluscher Jan 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions common/baselines/tedlium2/default_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
version listed here. Nevertheless, the most recent "head" should be safe to be used as well.
"""
from sisyphus import tk
from i6_experiments.common.tools.audio import compile_ffmpeg_binary
from i6_experiments.common.tools.rasr import compile_rasr_binaries_i6mode
from i6_experiments.common.tools.sctk import compile_sctk
from i6_core.tools.git import CloneGitRepositoryJob

PACKAGE = __package__

RASR_BINARY_PATH = compile_rasr_binaries_i6mode(
branch="apptainer_tf_2_8", configure_options=["--apptainer-patch=2023-05-08_tensorflow-2.8_v1"]
configure_options=["--apptainer-patch=2023-05-08_tensorflow-2.8_v1"]
) # use most recent RASR
# RASR_BINARY_PATH = tk.Path("/work/asr4/rossenbach/neon_test/rasr_versions/rasr_no_tf/arch/linux-x86_64-standard/")
assert RASR_BINARY_PATH, "Please set a specific RASR_BINARY_PATH before running the pipeline"
RASR_BINARY_PATH.hash_overwrite = "TEDLIUM2_DEFAULT_RASR_BINARY_PATH"

Expand All @@ -25,3 +26,14 @@

SRILM_PATH = tk.Path("/work/tools/users/luescher/srilm-1.7.3/bin/i686-m64/")
SRILM_PATH.hash_overwrite = "TEDLIUM2_DEFAULT_SRILM_PATH"

RETURNN_EXE = tk.Path(
"/usr/bin/python3",
hash_overwrite="GENERIC_RETURNN_LAUNCHER",
)

RETURNN_RC_ROOT = CloneGitRepositoryJob(
"https://github.com/rwth-i6/returnn",
commit="11d33468ad56a6c254168560c29e77e65eb45b7c",
).out_repository
RETURNN_RC_ROOT.hash_overwrite = "TEDLIUM2_DEFAULT_RETURNN_RC_ROOT"
28 changes: 15 additions & 13 deletions common/baselines/tedlium2/gmm/baseline_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

from i6_experiments.common.setups.rasr import util
from i6_experiments.common.datasets.tedlium2.cart import CartQuestions
from i6_experiments.common.baselines.librispeech.default_tools import SCTK_BINARY_PATH
from i6_experiments.common.baselines.tedlium2.default_tools import SCTK_BINARY_PATH

USE_CORRECTED_APPLICATOR = True


def get_init_args():
Expand Down Expand Up @@ -86,7 +88,7 @@ def get_monophone_args():
"extra_merge_args": None,
"extra_config": None,
"extra_post_config": None,
"use_corrected_applicator": False,
"use_corrected_applicator": USE_CORRECTED_APPLICATOR,
}

monophone_training_args = {
Expand All @@ -97,7 +99,7 @@ def get_monophone_args():
"splits": 10,
"accs_per_split": 2,
"dump_alignment_score_report": True,
"use_corrected_applicator": False,
"use_corrected_applicator": USE_CORRECTED_APPLICATOR,
}

monophone_recognition_args = {
Expand Down Expand Up @@ -184,7 +186,7 @@ def get_triphone_args():
"align_extra_rqmt": {"mem": 8},
"accumulate_extra_rqmt": {"mem": 8},
"split_extra_rqmt": {"mem": 8},
"use_corrected_applicator": False,
"use_corrected_applicator": USE_CORRECTED_APPLICATOR,
}

triphone_recognition_args = {
Expand Down Expand Up @@ -250,7 +252,7 @@ def get_vtln_args():
"align_extra_rqmt": {"mem": 8},
"accumulate_extra_rqmt": {"mem": 8},
"split_extra_rqmt": {"mem": 8},
"use_corrected_applicator": False,
"use_corrected_applicator": USE_CORRECTED_APPLICATOR,
},
}

Expand Down Expand Up @@ -306,7 +308,7 @@ def get_sat_args():
"align_extra_rqmt": {"mem": 8},
"accumulate_extra_rqmt": {"mem": 8},
"split_extra_rqmt": {"mem": 8},
"use_corrected_applicator": False,
"use_corrected_applicator": USE_CORRECTED_APPLICATOR,
}

sat_recognition_args = {
Expand All @@ -320,10 +322,10 @@ def get_sat_args():
"feature_cache": "mfcc",
"cache_regex": "^mfcc.*$",
"cmllr_mixtures": "estimate_mixtures_sdm.tri",
"iters": [8, 10],
"iters": [8, 9, 10],
"feature_flow": "uncached_mfcc+context+lda",
"pronunciation_scales": [1.0],
"lm_scales": [25],
"pronunciation_scales": [0.0],
christophmluscher marked this conversation as resolved.
Show resolved Hide resolved
"lm_scales": [8.0, 20.0, 25.0],
"lm_lookahead": True,
"lookahead_options": None,
"create_lattice": True,
Expand Down Expand Up @@ -371,7 +373,7 @@ def get_vtln_sat_args():
"align_extra_rqmt": {"mem": 8},
"accumulate_extra_rqmt": {"mem": 8},
"split_extra_rqmt": {"mem": 8},
"use_corrected_applicator": False,
"use_corrected_applicator": USE_CORRECTED_APPLICATOR,
}

vtln_sat_recognition_args = {
Expand All @@ -385,10 +387,10 @@ def get_vtln_sat_args():
"feature_cache": "mfcc",
"cache_regex": "^mfcc.*$",
"cmllr_mixtures": "estimate_mixtures_sdm.vtln",
"iters": [8, 10],
"iters": [8, 9, 10],
"feature_flow": "uncached_mfcc+context+lda+vtln",
"pronunciation_scales": [1.0],
"lm_scales": [25],
"pronunciation_scales": [0.0],
"lm_scales": [25, 20, 8.0],
"lm_lookahead": True,
"lookahead_options": None,
"create_lattice": True,
Expand Down
Empty file.
51 changes: 51 additions & 0 deletions common/baselines/tedlium2/hybrid/baseline_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from i6_core.features import filter_width_from_channels


def get_gammatone_feature_extraction_args():
return {
"gt_options": {
"minfreq": 100,
"maxfreq": 7500,
"channels": 50,
"tempint_type": "hanning",
"tempint_shift": 0.01,
"tempint_length": 0.025,
"flush_before_gap": True,
"do_specint": False,
"specint_type": "hanning",
"specint_shift": 4,
"specint_length": 9,
"normalize": True,
"preemphasis": True,
"legacy_scaling": False,
"without_samples": False,
"samples_options": {
"audio_format": "wav",
"dc_detection": False,
},
"normalization_options": {},
}
}


def get_log_mel_feature_extraction_args():

return {
"fb": {
"filterbank_options": {
"warping_function": "mel",
"filter_width": filter_width_from_channels(channels=80, warping_function="mel", f_max=8000),
"normalize": True,
"normalization_options": None,
"without_samples": False,
"samples_options": {
"audio_format": "wav",
"dc_detection": False,
},
"fft_options": None,
"add_features_output": True,
"apply_log": True,
"add_epsilon": True,
}
}
}
71 changes: 71 additions & 0 deletions common/baselines/tedlium2/hybrid/baseline_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import copy
from sisyphus import gs, tk

from i6_core.features import FilterbankJob

from i6_experiments.common.setups.rasr.util import RasrSteps
from i6_experiments.common.setups.rasr.hybrid_system import HybridSystem
from i6_experiments.common.baselines.tedlium2.default_tools import RETURNN_RC_ROOT, RASR_BINARY_PATH

from .data import get_corpus_data_inputs
from .baseline_args import get_log_mel_feature_extraction_args
from .nn_config.nn_args import get_nn_args


def run_gmm_system():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the FSA bug correction included?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure. Where / how would that be included? RASR version?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now it should be

from i6_experiments.common.baselines.tedlium2.gmm.baseline_config import (
run_tedlium2_common_baseline,
)

system = run_tedlium2_common_baseline()
return system


def run_tedlium2_hybrid_baseline():
gs.ALIAS_AND_OUTPUT_SUBDIR = "baselines/tedlium2/hybrid/baseline"

gmm_system = run_gmm_system()
rasr_init_args = copy.deepcopy(gmm_system.rasr_init_args)
rasr_init_args.feature_extraction_args = get_log_mel_feature_extraction_args()
(
nn_train_data_inputs,
nn_cv_data_inputs,
nn_devtrain_data_inputs,
nn_dev_data_inputs,
nn_test_data_inputs,
) = get_corpus_data_inputs(
gmm_system,
rasr_init_args.feature_extraction_args["fb"],
FilterbankJob,
alias_prefix="experiments/tedlium2/hybrid/wei_baseline",
)
# image only, so just python3
returnn_exe = tk.Path("/usr/bin/python3", hash_overwrite="GENERIC_RETURNN_LAUNCHER")
blas_lib = tk.Path("/lib/x86_64-linux-gnu/liblapack.so.3")
blas_lib.hash_overwrite = "TEDLIUM2_DEFAULT_RASR_BINARY_PATH"
steps = RasrSteps()
steps.add_step("extract", rasr_init_args.feature_extraction_args)
gmm_system.run(steps)
nn_args = get_nn_args(num_epochs=160)
nn_steps = RasrSteps()
nn_steps.add_step("nn", nn_args)

tedlium_nn_system = HybridSystem(
returnn_root=RETURNN_RC_ROOT,
returnn_python_exe=returnn_exe,
blas_lib=blas_lib,
rasr_arch="linux-x86_64-standard",
rasr_binary_path=RASR_BINARY_PATH,
)
tedlium_nn_system.init_system(
rasr_init_args=rasr_init_args,
train_data=nn_train_data_inputs,
cv_data=nn_cv_data_inputs,
devtrain_data=nn_devtrain_data_inputs,
dev_data=nn_dev_data_inputs,
test_data=nn_test_data_inputs,
train_cv_pairing=[tuple(["train.train", "dev.cv"])],
)
tedlium_nn_system.run(nn_steps)

gs.ALIAS_AND_OUTPUT_SUBDIR = ""
Loading
Loading