diff --git a/livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py b/livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py index 686213535..c69043146 100644 --- a/livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py +++ b/livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py @@ -87,6 +87,7 @@ class _TTSOptions: api_key: str voice: Voice model: TTSModels | str + language: str | None base_url: str encoding: TTSEncoding sample_rate: int @@ -114,6 +115,7 @@ def __init__( http_session: aiohttp.ClientSession | None = None, # deprecated model_id: TTSModels | str | None = None, + language: str | None = None, ) -> None: """ Create a new instance of ElevenLabs TTS. @@ -129,6 +131,7 @@ def __init__( enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False. chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260]. http_session (aiohttp.ClientSession | None): Custom HTTP session for API requests. Optional. + language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional. """ super().__init__( @@ -162,6 +165,7 @@ def __init__( word_tokenizer=word_tokenizer, chunk_length_schedule=chunk_length_schedule, enable_ssml_parsing=enable_ssml_parsing, + language=language, ) self._session = http_session @@ -523,8 +527,12 @@ def _stream_url(opts: _TTSOptions) -> str: output_format = opts.encoding latency = opts.streaming_latency enable_ssml = str(opts.enable_ssml_parsing).lower() - return ( + language = opts.language + url = ( f"{base_url}/text-to-speech/{voice_id}/stream-input?" f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}&" f"enable_ssml_parsing={enable_ssml}" ) + if language is not None: + url += f"&language_code={language}" + return url