add metrics utilities & use dataclasses instead of TypedDict (#1009)

Co-authored-by: Théo Monnom <[email protected]> Co-authored-by: Théo Monnom <[email protected]>
livekit · Oct 30, 2024 · 5376f08 · 5376f08
1 parent dbf9d1d
commit 5376f08
Show file tree

Hide file tree

Showing 19 changed files with 438 additions and 349 deletions.
diff --git a/.changeset/gold-eagles-flash.md b/.changeset/gold-eagles-flash.md
@@ -0,0 +1,5 @@
+---
+"livekit-agents": patch
+---
+
+Reorganized metrics, added create_metrics_logger
diff --git a/examples/voice-pipeline-agent/cost_metrics.py b/examples/voice-pipeline-agent/cost_metrics.py
@@ -0,0 +1,80 @@
+import logging
+
+from dotenv import load_dotenv
+from livekit.agents import (
+    AutoSubscribe,
+    JobContext,
+    JobProcess,
+    WorkerOptions,
+    cli,
+    llm,
+    metrics,
+)
+from livekit.agents.pipeline import VoicePipelineAgent
+from livekit.plugins import deepgram, openai, silero
+
+load_dotenv()
+logger = logging.getLogger("metrics-example")
+
+# This example logs pipeline metrics and computes cost of the session
+
+OPENAI_LLM_INPUT_PRICE = 2.50 / (10**6)  # $2.50 per million tokens
+OPENAI_LLM_OUTPUT_PRICE = 10 / (10**6)  # $10 per million tokens
+OPENAI_TTS_PRICE = 15 / (10**6)  # $15 per million characters
+DEEPGRAM_STT_PRICE = 0.0043  # $0.0043 per minute
+
+
+def prewarm(proc: JobProcess):
+    proc.userdata["vad"] = silero.VAD.load()
+
+
+async def entrypoint(ctx: JobContext):
+    initial_ctx = llm.ChatContext().append(
+        role="system",
+        text=(
+            "You are a voice assistant created by LiveKit. Your interface with users will be voice. "
+            "You should use short and concise responses, and avoiding usage of unpronouncable punctuation."
+        ),
+    )
+
+    await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
+
+    participant = await ctx.wait_for_participant()
+    agent = VoicePipelineAgent(
+        vad=ctx.proc.userdata["vad"],
+        stt=deepgram.STT(),
+        llm=openai.LLM(),
+        tts=openai.TTS(),
+        chat_ctx=initial_ctx,
+    )
+
+    usage_collector = metrics.UsageCollector()
+
+    @agent.on("metrics_collected")
+    def _on_metrics_collected(metrics: metrics.AgentMetrics):
+        metrics.log_metrics(metrics)
+        usage_collector.add_usage(metrics)
+
+    async def log_session_cost():
+        summary = usage_collector.get_summary()
+        llm_cost = (
+            summary.llm_prompt_tokens * OPENAI_LLM_INPUT_PRICE
+            + summary.llm_completion_tokens * OPENAI_LLM_OUTPUT_PRICE
+        )
+        tts_cost = summary.tts_characters_count * OPENAI_TTS_PRICE
+        stt_cost = summary.stt_audio_duration * DEEPGRAM_STT_PRICE / 60
+
+        total_cost = llm_cost + tts_cost + stt_cost
+
+        logger.info(
+            f"Total cost: ${total_cost:.4f} (LLM: ${llm_cost:.4f}, TTS: ${tts_cost:.4f}, STT: ${stt_cost:.4f})"
+        )
+
+    ctx.add_shutdown_callback(log_session_cost)
+
+    agent.start(ctx.room, participant)
+    await agent.say("Hey, how can I help you today?", allow_interruptions=True)
+
+
+if __name__ == "__main__":
+    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm))
diff --git a/examples/voice-pipeline-agent/minimal_assistant.py b/examples/voice-pipeline-agent/minimal_assistant.py
@@ -10,6 +10,7 @@
     WorkerOptions,
     cli,
     llm,
+    metrics,
 )
 from livekit.agents.pipeline import VoicePipelineAgent
 from livekit.plugins import deepgram, openai, silero
@@ -53,6 +54,19 @@ async def entrypoint(ctx: JobContext):
 
     agent.start(ctx.room, participant)
 
+    usage_collector = metrics.UsageCollector()
+
+    @agent.on("metrics_collected")
+    def _on_metrics_collected(metrics: metrics.AgentMetrics):
+        metrics.log_metrics(metrics)
+        usage_collector.add_usage(metrics)
+
+    async def log_usage():
+        summary = usage_collector.get_summary()
+        logger.info(f"Usage: ${summary}")
+
+    ctx.add_shutdown_callback(log_usage)
+
     # listen to incoming chat messages, only required if you'd like the agent to
     # answer incoming messages from Chat
     chat = rtc.ChatManager(ctx.room)

diff --git a/examples/voice-pipeline-agent/pipeline_metrics.py b/examples/voice-pipeline-agent/pipeline_metrics.py
diff --git a/livekit-agents/livekit/agents/__init__.py b/livekit-agents/livekit/agents/__init__.py
@@ -16,6 +16,7 @@
     cli,
     ipc,
     llm,
+    metrics,
     multimodal,
     pipeline,
     stt,
@@ -60,6 +61,7 @@
     "tts",
     "tokenize",
     "llm",
+    "metrics",
     "transcription",
     "pipeline",
     "multimodal",

diff --git a/livekit-agents/livekit/agents/llm/__init__.py b/livekit-agents/livekit/agents/llm/__init__.py
@@ -16,13 +16,11 @@
     Choice,
     ChoiceDelta,
     CompletionUsage,
-    LLMMetrics,
     LLMStream,
 )
 
 __all__ = [
     "LLM",
-    "LLMMetrics",
     "LLMStream",
     "ChatContext",
     "ChatRole",

diff --git a/livekit-agents/livekit/agents/llm/llm.py b/livekit-agents/livekit/agents/llm/llm.py
@@ -4,29 +4,17 @@
 import time
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
-from typing import Any, AsyncIterable, AsyncIterator, Literal, TypedDict
+from typing import Any, AsyncIterable, AsyncIterator, Literal
 
 from livekit import rtc
 
 from .. import utils
+from ..metrics import LLMMetrics
 from ..utils import aio
 from . import function_context
 from .chat_context import ChatContext, ChatRole
 
 
-class LLMMetrics(TypedDict):
-    request_id: str
-    timestamp: float
-    ttft: float
-    duration: float
-    label: str
-    cancelled: bool
-    completion_tokens: int
-    prompt_tokens: int
-    total_tokens: int
-    tokens_per_second: float
-
-
 @dataclass
 class ChoiceDelta:
     role: ChatRole
@@ -115,18 +103,19 @@ async def _metrics_monitor_task(
                 usage = ev.usage
 
         duration = time.perf_counter() - start_time
-        metrics: LLMMetrics = {
-            "timestamp": time.time(),
-            "request_id": request_id,
-            "ttft": ttft,
-            "duration": duration,
-            "cancelled": self._task.cancelled(),
-            "label": self._llm._label,
-            "completion_tokens": usage.completion_tokens if usage else 0,
-            "prompt_tokens": usage.prompt_tokens if usage else 0,
-            "total_tokens": usage.total_tokens if usage else 0,
-            "tokens_per_second": usage.completion_tokens / duration if usage else 0.0,
-        }
+        metrics = LLMMetrics(
+            timestamp=time.time(),
+            request_id=request_id,
+            ttft=ttft,
+            duration=duration,
+            cancelled=self._task.cancelled(),
+            label=self._llm._label,
+            completion_tokens=usage.completion_tokens if usage else 0,
+            prompt_tokens=usage.prompt_tokens if usage else 0,
+            total_tokens=usage.total_tokens if usage else 0,
+            tokens_per_second=usage.completion_tokens / duration if usage else 0.0,
+            error=None,
+        )
         self._llm.emit("metrics_collected", metrics)
 
     @property

diff --git a/livekit-agents/livekit/agents/metrics/__init__.py b/livekit-agents/livekit/agents/metrics/__init__.py
@@ -0,0 +1,30 @@
+from .base import (
+    AgentMetrics,
+    LLMMetrics,
+    PipelineEOUMetrics,
+    PipelineLLMMetrics,
+    PipelineSTTMetrics,
+    PipelineTTSMetrics,
+    PipelineVADMetrics,
+    STTMetrics,
+    TTSMetrics,
+    VADMetrics,
+)
+from .usage_collector import UsageCollector, UsageSummary
+from .utils import log_metrics
+
+__all__ = [
+    "LLMMetrics",
+    "AgentMetrics",
+    "PipelineEOUMetrics",
+    "PipelineSTTMetrics",
+    "PipelineTTSMetrics",
+    "PipelineVADMetrics",
+    "PipelineLLMMetrics",
+    "VADMetrics",
+    "STTMetrics",
+    "TTSMetrics",
+    "UsageSummary",
+    "UsageCollector",
+    "log_metrics",
+]