Skip to content

Commit

Permalink
fix CI & add missing changetsets (#914)
Browse files Browse the repository at this point in the history
  • Loading branch information
theomonnom authored Oct 15, 2024
1 parent 5a0f994 commit c1f2674
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 20 deletions.
5 changes: 5 additions & 0 deletions .changeset/light-tools-jump.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"livekit-plugins-azure": minor
---

Azure TTS Prosody SSML support #912
2 changes: 1 addition & 1 deletion livekit-agents/livekit/agents/pipeline/pipeline_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ def _commit_user_question_if_needed() -> None:

if tool_calls:
extra_tools_messages.append(
ChatMessage.create_tool_calls(tool_calls, content=collected_text)
ChatMessage.create_tool_calls(tool_calls, text=collected_text)
)
extra_tools_messages.extend(tool_calls_results_msg)

Expand Down
42 changes: 23 additions & 19 deletions livekit-plugins/livekit-plugins-azure/livekit/plugins/azure/tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
from dataclasses import dataclass
from typing import Literal

import azure.cognitiveservices.speech as speechsdk # type: ignore
from livekit.agents import tts, utils

import azure.cognitiveservices.speech as speechsdk # type: ignore

AZURE_SAMPLE_RATE: int = 16000
AZURE_BITS_PER_SAMPLE: int = 16
AZURE_NUM_CHANNELS: int = 1
Expand Down Expand Up @@ -65,7 +66,13 @@ def validate(self) -> None:
"Prosody volume must be one of 'silent', 'x-soft', 'soft', 'medium', 'loud', 'x-loud'"
)

if self.pitch and self.pitch not in ["x-low", "low", "medium", "high", "x-high"]:
if self.pitch and self.pitch not in [
"x-low",
"low",
"medium",
"high",
"x-high",
]:
raise ValueError(
"Prosody pitch must be one of 'x-low', 'low', 'medium', 'high', 'x-high'"
)
Expand Down Expand Up @@ -153,25 +160,22 @@ async def _main_task(self):
stream=stream_callback,
)

def _create_ssml_text(text: str, opts: _TTSOptions) -> str:
ssml = f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{opts.language or "en-US"}">'
prosody_ssml = "<prosody"
if opts.prosody.rate:
prosody_ssml += f' rate="{opts.prosody.rate}"'
if opts.prosody.volume:
prosody_ssml += f' volume="{opts.prosody.volume}"'
if opts.prosody.pitch:
prosody_ssml += f' pitch="{opts.prosody.pitch}"'
prosody_ssml += ">"
ssml += prosody_ssml
ssml += text
ssml += "</prosody></speak>"
return ssml

def _synthesize() -> speechsdk.SpeechSynthesisResult:
if self._opts.prosody:
ssml_text = _create_ssml_text(self._text, self._opts)
return synthesizer.speak_ssml_async(ssml_text).get()
ssml = f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{self._opts.language or "en-US"}">'
prosody_ssml = "<prosody"
if self._opts.prosody.rate:
prosody_ssml += f' rate="{self._opts.prosody.rate}"'
if self._opts.prosody.volume:
prosody_ssml += f' volume="{self._opts.prosody.volume}"'
if self._opts.prosody.pitch:
prosody_ssml += f' pitch="{self._opts.prosody.pitch}"'
prosody_ssml += ">"
ssml += prosody_ssml
ssml += self._text
ssml += "</prosody></speak>"
return synthesizer.speak_ssml_async(ssml).get() # type: ignore

return synthesizer.speak_text_async(self._text).get() # type: ignore

result = None
Expand Down

0 comments on commit c1f2674

Please sign in to comment.