Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenAI STT fixed #466

Merged
merged 1 commit into from
Feb 10, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions ayushma/utils/speech_to_text.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import os

import openai
import requests
from django.conf import settings
from google.cloud import speech
from openai import OpenAI

from ayushma.models.enums import STTEngine

Expand All @@ -14,19 +14,14 @@ def __init__(self, api_key, language_code):
self.language_code = language_code

def recognize(self, audio):
# workaround for setting api version ( https://github.com/openai/openai-python/pull/491 )
current_api_version = openai.api_version
openai.api_version = "2020-11-07"
transcription = openai.Audio.transcribe(
"whisper-1",
file=audio,
client = OpenAI(api_key=self.api_key)
transcription = client.audio.transcriptions.create(
model="whisper-1",
# https://github.com/openai/openai-python/tree/main#file-uploads
file=(audio.name, audio.read()),
language=self.language_code.replace("-IN", ""),
api_key=self.api_key,
api_base="https://api.openai.com/v1",
api_type="open_ai",
api_version="2020-11-07", # Bug in openai package, this parameter is ignored
# api_version="2020-11-07",
)
openai.api_version = current_api_version
return transcription.text


Expand Down Expand Up @@ -94,6 +89,7 @@ def speech_to_text(engine_id, audio, language_code):
recognized_text = engine.recognize(audio)
if not recognized_text:
raise ValueError("Failed to detect any speech in provided audio")
return recognized_text
except Exception as e:
print(f"Failed to recognize speech with {engine_name} engine: {e}")
raise e
Loading