Skip to content

Commit

Permalink
Merge pull request #32 from mobiusml/asr_streaming_transcribe
Browse files Browse the repository at this point in the history
Streaming output for whisper transcribe
  • Loading branch information
Jiltseb authored Dec 20, 2023
2 parents af968bf + 10cd9b4 commit 54a0a1b
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 38 deletions.
19 changes: 12 additions & 7 deletions aana/configs/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,25 @@
summary="Transcribe a video using Whisper Medium",
outputs=[
EndpointOutput(
name="transcription", output="videos_transcriptions_whisper_medium"
name="transcription",
output="video_transcriptions_whisper_medium",
streaming=True,
),
EndpointOutput(
name="segments",
output="videos_transcriptions_segments_whisper_medium",
),
EndpointOutput(
name="info", output="videos_transcriptions_info_whisper_medium"
output="video_transcriptions_segments_whisper_medium",
streaming=True,
),
EndpointOutput(
name="transcription_ids", output="videos_transcription_ids"
name="info",
output="video_transcriptions_info_whisper_medium",
streaming=True,
),
EndpointOutput(name="transcription_id", output="transcription_id"),
EndpointOutput(name="transcription_path", output="transcription_path"),
],
)
streaming=True,
),
],
"chat_with_video": [
Endpoint(
Expand Down
24 changes: 18 additions & 6 deletions aana/deployments/whisper_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,6 @@ async def transcribe_stream(
) -> AsyncGenerator[WhisperOutput, None]:
"""Transcribe the media with the whisper model in a streaming fashion.
Right now this is the same as transcribe, but we will add support for
streaming in the future to support larger media and to make the ASR more responsive.
Args:
media (Video): The media to transcribe.
params (WhisperParams): The parameters for the whisper model.
Expand All @@ -192,9 +189,24 @@ async def transcribe_stream(
transcription_info (AsrTranscriptionInfo): The ASR transcription info.
transcription (AsrTranscription): The ASR transcription.
"""
# TODO: add streaming support
output = await self.transcribe(media, params)
yield output
if not params:
params = WhisperParams()
media_path: str = str(media.path)
try:
segments, info = self.model.transcribe(media_path, **params.dict())
except Exception as e:
raise InferenceException(self.model_name) from e

asr_transcription_info = AsrTranscriptionInfo.from_whisper(info)
for segment in segments:
asr_segments = [AsrSegment.from_whisper(segment)]
asr_transcription = AsrTranscription(text=segment.text)

yield WhisperOutput(
segments=asr_segments,
transcription_info=asr_transcription_info,
transcription=asr_transcription,
)

async def transcribe_batch(
self, media_batch: list[Video], params: WhisperParams = None
Expand Down
14 changes: 11 additions & 3 deletions aana/tests/deployments/test_whisper_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
from importlib import resources
from pathlib import Path
from collections import defaultdict

import pytest
import ray
Expand Down Expand Up @@ -94,12 +95,19 @@ async def test_whisper_deployment(video_file):
stream = handle.options(stream=True).transcribe_stream.remote(
media=video, params=WhisperParams(word_timestamps=True)
)
# We only have one chunk now
# TODO: test multiple chunks when steaming is implemented properly

# Combine individual segments and compare with the final dict
grouped_dict = defaultdict(list)
transcript = ""
async for chunk in stream:
chunk = await chunk
output = pydantic_to_dict(chunk)
compare_transcriptions(expected_output, output)
transcript += output["transcription"]["text"]
grouped_dict["segments"].append(output.get("segments")[0])

grouped_dict["transcription"] = {"text": transcript}
grouped_dict["transcription_info"] = output.get("transcription_info")
compare_transcriptions(expected_output, dict(grouped_dict))

# Test transcribe_batch method
videos = [video, video]
Expand Down
Loading

0 comments on commit 54a0a1b

Please sign in to comment.