diff --git a/modules/whisper/base_transcription_pipeline.py b/modules/whisper/base_transcription_pipeline.py index bf3b445..35605f5 100644 --- a/modules/whisper/base_transcription_pipeline.py +++ b/modules/whisper/base_transcription_pipeline.py @@ -492,7 +492,7 @@ def validate_gradio_values(params: TranscriptionPipelineParams): params.whisper.lang = None else: language_code_dict = {value: key for key, value in whisper.tokenizer.LANGUAGES.items()} - params.whisper.lang = language_code_dict[params.lang] + params.whisper.lang = language_code_dict[params.whisper.lang] if params.whisper.initial_prompt == GRADIO_NONE_STR: params.whisper.initial_prompt = None @@ -528,6 +528,9 @@ def cache_parameters( if cached_yaml["whisper"].get("lang", None) is None: cached_yaml["whisper"]["lang"] = AUTOMATIC_DETECTION.unwrap() + else: + language_dict = whisper.tokenizer.LANGUAGES + cached_yaml["whisper"]["lang"] = language_dict[cached_yaml["whisper"]["lang"]] if cached_yaml["vad"].get("max_speech_duration_s", float('inf')) == float('inf'): cached_yaml["vad"]["max_speech_duration_s"] = GRADIO_NONE_NUMBER_MAX