ohcnetwork · shivankacker · Apr 4, 2024 · Mar 24, 2024
diff --git a/ayushma/utils/converse.py b/ayushma/utils/converse.py
@@ -118,9 +118,9 @@ def converse_api(
     # store time to complete request
     stats["start_time"] = time.time()
     if converse_type == "audio" and not audio:
-        return Exception("Please provide audio to generate transcript")
+        return Exception("[Converse] Please provide audio to generate transcript")
     if converse_type == "text" and not text:
-        return Exception("Please provide text to generate transcript")
+        return Exception("[Converse] Please provide text to generate transcript")
 
     if converse_type == "audio":
         stats["transcript_start_time"] = time.time()
@@ -195,7 +195,8 @@ def converse_api(
         response_message = list(response_message)[0]
 
         return Response(
-            ChatMessageSerializer(response_message).data, status=status.HTTP_200_OK
+            ChatMessageSerializer(response_message).data,
+            status=status.HTTP_200_OK,
         )
 
     return response
diff --git a/ayushma/utils/language_helpers.py b/ayushma/utils/language_helpers.py
@@ -16,8 +16,8 @@ def translate_text(target, text):
         result = translate_client.translate(text, target_language=target)
         return result["translatedText"]
     except Exception as e:
-        print(e)
-        raise APIException("Translation failed")
+        print(f"Translation failed: {e}")
+        raise APIException("[Translation] Failed to translate the text")
 
 
 language_code_voice_map = {
@@ -55,7 +55,8 @@ def text_to_speech(text, language_code, service):
             synthesis_input = texttospeech.SynthesisInput(text=text)
 
             voice = texttospeech.VoiceSelectionParams(
-                language_code=language_code, name=language_code_voice_map[language_code]
+                language_code=language_code,
+                name=language_code_voice_map[language_code],
             )
             audio_config = texttospeech.AudioConfig(
                 audio_encoding=texttospeech.AudioEncoding.MP3
@@ -77,7 +78,7 @@ def text_to_speech(text, language_code, service):
             )
             return response.read()
         else:
-            raise APIException("Service not supported")
+            raise APIException("[Text to Speech] Service not supported.")
     except Exception as e:
-        print(e)
-        return None
+        print(f"Failed to convert text to speech: {e}")
+        raise APIException("[Text to Speech] Failed to convert text to speech.")
diff --git a/ayushma/utils/openaiapi.py b/ayushma/utils/openaiapi.py
@@ -94,7 +94,7 @@ def get_sanitized_reference(pinecone_references: List[QueryResponse]) -> str:
                 else:
                     sanitized_reference[document_id] = text
             except Exception as e:
-                print(e)
+                print(f"Error extracting reference: {e}")
                 pass
 
     return json.dumps(sanitized_reference)
@@ -143,17 +143,17 @@ def get_reference(text, openai_key, namespace, top_k):
         try:
             embeddings.append(get_embedding(text=[text], openai_api_key=openai_key))
         except Exception as e:
-            return Exception(
-                e.__str__(),
-            )
+            print(f"Error generating embeddings: {e}")
+            return Exception("[Reference] Error generating embeddings")
     else:
         parts = split_text(text)
         for part in parts:
             try:
                 embeddings.append(get_embedding(text=[part], openai_api_key=openai_key))
             except Exception as e:
+                print(f"Error generating embeddings: {e}")
                 raise Exception(
-                    e.__str__(),
+                    "[Reference] Error generating embeddings for split text"
                 )
     # find similar embeddings from pinecone index for each embedding
     pinecone_references: List[QueryResponse] = []
@@ -187,7 +187,7 @@ def add_reference_documents(chat_message):
             except Document.DoesNotExist:
                 pass
     except Exception as e:
-        print("Error adding reference documents: ", e)
+        print(f"Error adding reference documents: {e}")
 
     chat_message.original_message = chat_text[
         :ref_start_idx
@@ -297,10 +297,13 @@ def converse(
     elif fetch_references and chat.project and chat.project.external_id:
         try:
             reference = get_reference(
-                english_text, openai_key, str(chat.project.external_id), match_number
+                english_text,
+                openai_key,
+                str(chat.project.external_id),
+                match_number,
             )
         except Exception as e:
-            print(e)
+            print(f"Error fetching references: {e}")
             reference = ""
     else:
         reference = ""
@@ -438,8 +441,10 @@ def converse(
                         None,
                     )
         except Exception as e:
-            print(e)
-            error_text = str(e)
+            print(f"Error in streaming response: {e}")
+            error_text = (
+                "[Streaming] Something went wrong in getting response, stream stopped"
+            )
             translated_error_text = error_text
             if user_language != "en-IN":
                 translated_error_text = translate_text(user_language, error_text)
@@ -464,7 +469,13 @@ def converse(
                 },
             )
             yield create_json_response(
-                local_translated_text, chat.external_id, "", str(e), True, True, None
+                local_translated_text,
+                chat.external_id,
+                "",
+                str(e),
+                True,
+                True,
+                None,
             )
 
 
@@ -499,7 +510,8 @@ def converse_thread(
         if status == "completed":
             break
     else:
-        raise Exception("Thread timed out")
+        print("[Thread] Run did not complete, timed out")
+        raise Exception("[Thread] Run did not complete, timed out")
 
     response = (
         client.beta.threads.messages.list(thread_id=thread.thread_id)

diff --git a/ayushma/utils/speech_to_text.py b/ayushma/utils/speech_to_text.py
@@ -14,15 +14,21 @@ def __init__(self, api_key, language_code):
         self.language_code = language_code
 
     def recognize(self, audio):
-        client = OpenAI(api_key=self.api_key)
-        transcription = client.audio.transcriptions.create(
-            model="whisper-1",
-            # https://github.com/openai/openai-python/tree/main#file-uploads
-            file=(audio.name, audio.read()),
-            language=self.language_code.replace("-IN", ""),
-            # api_version="2020-11-07",
-        )
-        return transcription.text
+        try:
+            client = OpenAI(api_key=self.api_key)
+            transcription = client.audio.transcriptions.create(
+                model="whisper-1",
+                # https://github.com/openai/openai-python/tree/main#file-uploads
+                file=(audio.name, audio.read()),
+                language=self.language_code.replace("-IN", ""),
+                # api_version="2020-11-07",
+            )
+            return transcription.text
+        except Exception as e:
+            print(f"Failed to recognize speech with whisper engine: {e}")
+            raise ValueError(
+                "[Speech to Text] Failed to recognize speech with Whisper engine"
+            )
 
 
 class GoogleEngine:
@@ -31,41 +37,52 @@ def __init__(self, api_key, language_code):
         self.language_code = language_code
 
     def recognize(self, audio):
-        client = speech.SpeechClient()
-        audio_content = audio.file.read()
-        audio_data = speech.RecognitionAudio(content=audio_content)
-
-        config = speech.RecognitionConfig(
-            encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
-            language_code=self.language_code,
-        )
-
-        response = client.recognize(config=config, audio=audio_data)
-        if not response.results:
-            return ""
-        return response.results[0].alternatives[0].transcript
+        try:
+            client = speech.SpeechClient()
+            audio_content = audio.file.read()
+            audio_data = speech.RecognitionAudio(content=audio_content)
+
+            config = speech.RecognitionConfig(
+                encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED,
+                language_code=self.language_code,
+            )
+
+            response = client.recognize(config=config, audio=audio_data)
+            if not response.results:
+                return ""
+            return response.results[0].alternatives[0].transcript
+        except Exception as e:
+            print(f"Failed to recognize speech with google engine: {e}")
+            raise ValueError(
+                "[Speech to Text] Failed to recognize speech with Google engine"
+            )
 
 
 class SelfHostedEngine:
     def __init__(self, api_key, language_code):
         self.language_code = language_code
 
     def recognize(self, audio):
-        response = requests.post(
-            settings.SELF_HOSTED_ENDPOINT,
-            files={"audio": audio},
-            data={
-                # change this model to get faster results see: https://github.com/coronasafe/care-whisper
-                "model": "small",
-                "language": self.language_code.replace("-IN", ""),
-            },
-        )
-
-        if not response.ok:
-            print("Failed to recognize speech with self hosted engine")
-            return ""
-        response = response.json()
-        return response["data"]["transcription"].strip()
+        try:
+            response = requests.post(
+                settings.SELF_HOSTED_ENDPOINT,
+                files={"audio": audio},
+                data={
+                    # change this model to get faster results see: https://github.com/coronasafe/care-whisper
+                    "model": "small",
+                    "language": self.language_code.replace("-IN", ""),
+                },
+            )
+
+            if not response.ok:
+                print("Failed to recognize speech with self hosted engine")
+                return ""
+            response = response.json()
+            return response["data"]["transcription"].strip()
+        except Exception as e:
+            raise ValueError(
+                "[Speech to Text] Failed to recognize speech with Self Hosted engine"
+            )
 
 
 engines = {
@@ -82,14 +99,16 @@ def speech_to_text(engine_id, audio, language_code):
     engine_class = engines.get(engine_name)
 
     if not engine_class:
-        raise ValueError(f"Invalid STT engine ID: {engine_id}")
+        raise ValueError(f"[Speech to Text] Engine with ID {engine_id} not found")
 
     try:
         engine = engine_class(api_key, language_code)
         recognized_text = engine.recognize(audio)
         if not recognized_text:
-            raise ValueError("Failed to detect any speech in provided audio")
+            raise ValueError("[Speech to Text] No text recognized")
         return recognized_text
     except Exception as e:
-        print(f"Failed to recognize speech with {engine_name} engine: {e}")
-        raise e
+        print(f"Failed to transcribe speech with {engine_name} engine: {e}")
+        raise ValueError(
+            f"[Speech to Text] Failed to transcribe speech with {engine_name} engine"
+        )
diff --git a/ayushma/utils/upsert.py b/ayushma/utils/upsert.py
@@ -58,7 +58,8 @@ def upsert(
         None
     """
     pinecone.init(
-        api_key=settings.PINECONE_API_KEY, environment=settings.PINECONE_ENVIRONMENT
+        api_key=settings.PINECONE_API_KEY,
+        environment=settings.PINECONE_ENVIRONMENT,
     )
     print("Initialized Pinecone and OpenAI")
 
@@ -80,7 +81,9 @@ def upsert(
         raise Exception("Either filepath, url or text must be provided")
 
     if len(document_lines) == 0:
-        raise Exception("No text found in document")
+        raise Exception(
+            "[Upsert] No text found in the document. Please check the document."
+        )
     print(document_lines)
 
     batch_size = (

diff --git a/ayushma/views/chat.py b/ayushma/views/chat.py
@@ -122,7 +122,8 @@ def speech_to_text(self, *args, **kwarg):
             stt_engine = Project.objects.get(external_id=project_id).stt_engine
         except Project.DoesNotExist:
             return Response(
-                {"error": "Project not found"}, status=status.HTTP_400_BAD_REQUEST
+                {"error": "Project not found"},
+                status=status.HTTP_400_BAD_REQUEST,
             )
         try:
             stats["transcript_start_time"] = time.time()
@@ -133,13 +134,14 @@ def speech_to_text(self, *args, **kwarg):
             print(f"Failed to transcribe speech with {stt_engine} engine: {e}")
             return Response(
                 {
-                    "error": "Something went wrong in getting transcription, please try again later"
+                    "error": "[Transcribing] Something went wrong in getting transcription, please try again later"
                 },
                 status=status.HTTP_500_INTERNAL_SERVER_ERROR,
             )
 
         return Response(
-            {"transcript": translated_text, "stats": stats}, status=status.HTTP_200_OK
+            {"transcript": translated_text, "stats": stats},
+            status=status.HTTP_200_OK,
         )
 
     @extend_schema(