Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(error): added more specific errors #491

Merged
merged 1 commit into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ayushma/utils/converse.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ def converse_api(
# store time to complete request
stats["start_time"] = time.time()
if converse_type == "audio" and not audio:
return Exception("Please provide audio to generate transcript")
return Exception("[Converse] Please provide audio to generate transcript")
if converse_type == "text" and not text:
return Exception("Please provide text to generate transcript")
return Exception("[Converse] Please provide text to generate transcript")

if converse_type == "audio":
stats["transcript_start_time"] = time.time()
Expand Down Expand Up @@ -195,7 +195,8 @@ def converse_api(
response_message = list(response_message)[0]

return Response(
ChatMessageSerializer(response_message).data, status=status.HTTP_200_OK
ChatMessageSerializer(response_message).data,
status=status.HTTP_200_OK,
)

return response
13 changes: 7 additions & 6 deletions ayushma/utils/language_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def translate_text(target, text):
result = translate_client.translate(text, target_language=target)
return result["translatedText"]
except Exception as e:
print(e)
raise APIException("Translation failed")
print(f"Translation failed: {e}")
raise APIException("[Translation] Failed to translate the text")


language_code_voice_map = {
Expand Down Expand Up @@ -55,7 +55,8 @@ def text_to_speech(text, language_code, service):
synthesis_input = texttospeech.SynthesisInput(text=text)

voice = texttospeech.VoiceSelectionParams(
language_code=language_code, name=language_code_voice_map[language_code]
language_code=language_code,
name=language_code_voice_map[language_code],
)
audio_config = texttospeech.AudioConfig(
audio_encoding=texttospeech.AudioEncoding.MP3
Expand All @@ -77,7 +78,7 @@ def text_to_speech(text, language_code, service):
)
return response.read()
else:
raise APIException("Service not supported")
raise APIException("[Text to Speech] Service not supported.")
except Exception as e:
print(e)
return None
print(f"Failed to convert text to speech: {e}")
raise APIException("[Text to Speech] Failed to convert text to speech.")
36 changes: 24 additions & 12 deletions ayushma/utils/openaiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_sanitized_reference(pinecone_references: List[QueryResponse]) -> str:
else:
sanitized_reference[document_id] = text
except Exception as e:
print(e)
print(f"Error extracting reference: {e}")
pass

return json.dumps(sanitized_reference)
Expand Down Expand Up @@ -143,17 +143,17 @@ def get_reference(text, openai_key, namespace, top_k):
try:
embeddings.append(get_embedding(text=[text], openai_api_key=openai_key))
except Exception as e:
return Exception(
e.__str__(),
)
print(f"Error generating embeddings: {e}")
return Exception("[Reference] Error generating embeddings")
else:
parts = split_text(text)
for part in parts:
try:
embeddings.append(get_embedding(text=[part], openai_api_key=openai_key))
except Exception as e:
print(f"Error generating embeddings: {e}")
raise Exception(
e.__str__(),
"[Reference] Error generating embeddings for split text"
)
# find similar embeddings from pinecone index for each embedding
pinecone_references: List[QueryResponse] = []
Expand Down Expand Up @@ -187,7 +187,7 @@ def add_reference_documents(chat_message):
except Document.DoesNotExist:
pass
except Exception as e:
print("Error adding reference documents: ", e)
print(f"Error adding reference documents: {e}")

chat_message.original_message = chat_text[
:ref_start_idx
Expand Down Expand Up @@ -297,10 +297,13 @@ def converse(
elif fetch_references and chat.project and chat.project.external_id:
try:
reference = get_reference(
english_text, openai_key, str(chat.project.external_id), match_number
english_text,
openai_key,
str(chat.project.external_id),
match_number,
)
except Exception as e:
print(e)
print(f"Error fetching references: {e}")
reference = ""
else:
reference = ""
Expand Down Expand Up @@ -438,8 +441,10 @@ def converse(
None,
)
except Exception as e:
print(e)
error_text = str(e)
print(f"Error in streaming response: {e}")
error_text = (
"[Streaming] Something went wrong in getting response, stream stopped"
)
translated_error_text = error_text
if user_language != "en-IN":
translated_error_text = translate_text(user_language, error_text)
Expand All @@ -464,7 +469,13 @@ def converse(
},
)
yield create_json_response(
local_translated_text, chat.external_id, "", str(e), True, True, None
local_translated_text,
chat.external_id,
"",
str(e),
True,
True,
None,
)


Expand Down Expand Up @@ -499,7 +510,8 @@ def converse_thread(
if status == "completed":
break
else:
raise Exception("Thread timed out")
print("[Thread] Run did not complete, timed out")
raise Exception("[Thread] Run did not complete, timed out")

response = (
client.beta.threads.messages.list(thread_id=thread.thread_id)
Expand Down
101 changes: 60 additions & 41 deletions ayushma/utils/speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,21 @@ def __init__(self, api_key, language_code):
self.language_code = language_code

def recognize(self, audio):
client = OpenAI(api_key=self.api_key)
transcription = client.audio.transcriptions.create(
model="whisper-1",
# https://github.com/openai/openai-python/tree/main#file-uploads
file=(audio.name, audio.read()),
language=self.language_code.replace("-IN", ""),
# api_version="2020-11-07",
)
return transcription.text
try:
client = OpenAI(api_key=self.api_key)
transcription = client.audio.transcriptions.create(
model="whisper-1",
# https://github.com/openai/openai-python/tree/main#file-uploads
file=(audio.name, audio.read()),
language=self.language_code.replace("-IN", ""),
# api_version="2020-11-07",
)
return transcription.text
except Exception as e:
print(f"Failed to recognize speech with whisper engine: {e}")
raise ValueError(
"[Speech to Text] Failed to recognize speech with Whisper engine"
)


class GoogleEngine:
Expand All @@ -31,41 +37,52 @@ def __init__(self, api_key, language_code):
self.language_code = language_code

def recognize(self, audio):
client = speech.SpeechClient()
audio_content = audio.file.read()
audio_data = speech.RecognitionAudio(content=audio_content)

config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
language_code=self.language_code,
)

response = client.recognize(config=config, audio=audio_data)
if not response.results:
return ""
return response.results[0].alternatives[0].transcript
try:
client = speech.SpeechClient()
audio_content = audio.file.read()
audio_data = speech.RecognitionAudio(content=audio_content)

config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
language_code=self.language_code,
)

response = client.recognize(config=config, audio=audio_data)
if not response.results:
return ""
return response.results[0].alternatives[0].transcript
except Exception as e:
print(f"Failed to recognize speech with google engine: {e}")
raise ValueError(
"[Speech to Text] Failed to recognize speech with Google engine"
)


class SelfHostedEngine:
def __init__(self, api_key, language_code):
self.language_code = language_code

def recognize(self, audio):
response = requests.post(
settings.SELF_HOSTED_ENDPOINT,
files={"audio": audio},
data={
# change this model to get faster results see: https://github.com/coronasafe/care-whisper
"model": "small",
"language": self.language_code.replace("-IN", ""),
},
)

if not response.ok:
print("Failed to recognize speech with self hosted engine")
return ""
response = response.json()
return response["data"]["transcription"].strip()
try:
response = requests.post(
settings.SELF_HOSTED_ENDPOINT,
files={"audio": audio},
data={
# change this model to get faster results see: https://github.com/coronasafe/care-whisper
"model": "small",
"language": self.language_code.replace("-IN", ""),
},
)

if not response.ok:
print("Failed to recognize speech with self hosted engine")
return ""
response = response.json()
return response["data"]["transcription"].strip()
except Exception as e:
raise ValueError(
"[Speech to Text] Failed to recognize speech with Self Hosted engine"
)


engines = {
Expand All @@ -82,14 +99,16 @@ def speech_to_text(engine_id, audio, language_code):
engine_class = engines.get(engine_name)

if not engine_class:
raise ValueError(f"Invalid STT engine ID: {engine_id}")
raise ValueError(f"[Speech to Text] Engine with ID {engine_id} not found")

try:
engine = engine_class(api_key, language_code)
recognized_text = engine.recognize(audio)
if not recognized_text:
raise ValueError("Failed to detect any speech in provided audio")
raise ValueError("[Speech to Text] No text recognized")
return recognized_text
except Exception as e:
print(f"Failed to recognize speech with {engine_name} engine: {e}")
raise e
print(f"Failed to transcribe speech with {engine_name} engine: {e}")
raise ValueError(
f"[Speech to Text] Failed to transcribe speech with {engine_name} engine"
)
7 changes: 5 additions & 2 deletions ayushma/utils/upsert.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def upsert(
None
"""
pinecone.init(
api_key=settings.PINECONE_API_KEY, environment=settings.PINECONE_ENVIRONMENT
api_key=settings.PINECONE_API_KEY,
environment=settings.PINECONE_ENVIRONMENT,
)
print("Initialized Pinecone and OpenAI")

Expand All @@ -80,7 +81,9 @@ def upsert(
raise Exception("Either filepath, url or text must be provided")

if len(document_lines) == 0:
raise Exception("No text found in document")
raise Exception(
"[Upsert] No text found in the document. Please check the document."
)
print(document_lines)

batch_size = (
Expand Down
8 changes: 5 additions & 3 deletions ayushma/views/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ def speech_to_text(self, *args, **kwarg):
stt_engine = Project.objects.get(external_id=project_id).stt_engine
except Project.DoesNotExist:
return Response(
{"error": "Project not found"}, status=status.HTTP_400_BAD_REQUEST
{"error": "Project not found"},
status=status.HTTP_400_BAD_REQUEST,
)
try:
stats["transcript_start_time"] = time.time()
Expand All @@ -133,13 +134,14 @@ def speech_to_text(self, *args, **kwarg):
print(f"Failed to transcribe speech with {stt_engine} engine: {e}")
return Response(
{
"error": "Something went wrong in getting transcription, please try again later"
"error": "[Transcribing] Something went wrong in getting transcription, please try again later"
},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

return Response(
{"transcript": translated_text, "stats": stats}, status=status.HTTP_200_OK
{"transcript": translated_text, "stats": stats},
status=status.HTTP_200_OK,
)

@extend_schema(
Expand Down
Loading