kom-senapati · kom-senapati · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,6 @@
 \myenv
 
 /tailwind/*
-*.log
+*.log
+/temp_audio/*
+app/temp_audio/*
diff --git a/app/ai.py b/app/ai.py
@@ -6,18 +6,25 @@
 from openai import OpenAI
 import google.generativeai as genai
 from anthropic import Anthropic
+from gtts import gTTS
+import uuid
+from bs4 import BeautifulSoup
+import markdown
 
 load_dotenv()
 
 # Set up logging
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
+
 def chat_with_chatbot(messages: List[Dict[str, str]], apiKey: str, engine: str) -> str:
     if not apiKey:
         logger.error("API key is missing.")
         raise ValueError("API key is required for making API requests.")
-    
+
     try:
         if engine == "groq":
             content = chat_with_groq(messages, apiKey)
@@ -36,6 +43,7 @@ def chat_with_chatbot(messages: List[Dict[str, str]], apiKey: str, engine: str)
         logger.error(f"Error in chat_with_chatbot function with engine {engine}: {e}")
         raise
 
+
 def chat_with_groq(messages: List[Dict[str, str]], apiKey: str) -> str:
     try:
         client = Groq(api_key=apiKey)
@@ -48,6 +56,7 @@ def chat_with_groq(messages: List[Dict[str, str]], apiKey: str) -> str:
         logger.error(f"Error in chat_with_groq: {e}")
         raise
 
+
 def chat_with_openai(messages: List[Dict[str, str]], apiKey: str) -> str:
     try:
         client = OpenAI(api_key=apiKey)
@@ -60,6 +69,7 @@ def chat_with_openai(messages: List[Dict[str, str]], apiKey: str) -> str:
         logger.error(f"Error in chat_with_openai: {e}")
         raise
 
+
 def chat_with_anthropic(messages: List[Dict[str, str]], apiKey: str) -> str:
     try:
         client = Anthropic(api_key=apiKey)
@@ -73,6 +83,7 @@ def chat_with_anthropic(messages: List[Dict[str, str]], apiKey: str) -> str:
         logger.error(f"Error in chat_with_anthropic: {e}")
         raise
 
+
 def chat_with_gemini(messages: List[Dict[str, str]], apiKey: str) -> str:
     try:
         genai.configure(api_key=apiKey)
@@ -89,3 +100,29 @@ def chat_with_gemini(messages: List[Dict[str, str]], apiKey: str) -> str:
     except Exception as e:
         logger.error(f"Error in chat_with_gemini: {e}")
         raise
+
+
+def markdown_to_text(markdown_text: str) -> str:
+    # Convert Markdown to HTML
+    html = markdown.markdown(markdown_text)
+    # Use BeautifulSoup to extract text
+    soup = BeautifulSoup(html, "html.parser")
+    return soup.get_text()
+
+
+def text_to_mp3(text: str):
+    base_path = os.path.dirname(
+        os.path.abspath(__file__)
+    )  # Get the absolute path of the script
+    temp_audio_dir = os.path.join(base_path, "temp_audio")
+    os.makedirs(temp_audio_dir, exist_ok=True)
+    plain_text = markdown_to_text(text)
+    filename = f"{uuid.uuid4()}.mp3"
+    filepath = os.path.join(temp_audio_dir, filename)
+    # print(filepath)
+
+    # Generate speech audio file
+    tts = gTTS(text=plain_text, lang="en")
+    tts.save(filepath)
+
+    return filepath
diff --git a/app/api_routes.py b/app/api_routes.py
@@ -1,19 +1,12 @@
-from flask import (
-    Flask,
-    Blueprint,
-    request,
-    jsonify,
-    session,
-    Response,
-)
+from flask import Flask, Blueprint, request, jsonify, session, Response, send_file
 import re
-
+import os
 from sqlalchemy import func
 from .models import User, Chatbot, Chat, Image, Comment
 from sqlalchemy.exc import IntegrityError
 from flask_login import login_user
 from typing import Union, List, Optional, Dict
-from .ai import chat_with_chatbot
+from .ai import chat_with_chatbot, text_to_mp3
 from .constants import BOT_AVATAR_API, USER_AVATAR_API
 from .helpers import create_default_chatbots
 from .data_fetcher import fetch_contribution_data
@@ -690,3 +683,27 @@ def api_comment_chatbot():
 
     except Exception as e:
         return jsonify({"success": False, "message": str(e)}), 500
+
+
+@api_bp.route("/api/tts", methods=["POST"])
+# @jwt_required()
+def api_tts():
+    try:
+        data = request.get_json()
+        text = data.get("text")
+        if not text:
+            return jsonify({"success": False, "message": "Text not found"}), 400
+
+        filepath = text_to_mp3(text)
+        print(filepath)
+
+        response = send_file(filepath, as_attachment=True)
+
+        @response.call_on_close
+        def remove_file():
+            os.remove(filepath)
+
+        return response
+
+    except Exception as e:
+        return jsonify({"success": False, "message": str(e)}), 500
diff --git a/client/bun.lockb b/client/bun.lockb
diff --git a/client/src/App.tsx b/client/src/App.tsx
@@ -21,6 +21,7 @@ import ScrollToTop from "react-scroll-to-top";
 import { ArrowBigUpDash } from "lucide-react";
 import LeaderboardPage from "./pages/Leaderboard";
 import ChatbotViewPage from "./pages/ChatbotView";
+import TextToSpeechDownload from "./pages/Test";
 
 const queryClient = new QueryClient();
 function App() {
@@ -39,6 +40,7 @@ function App() {
             <Routes>
               <Route path="*" element={<NotFound />} />
               <Route path="/" element={<LandingPage />} />
+              <Route path="/test" element={<TextToSpeechDownload />} />
               <Route path="/login" element={<LoginPage />} />
               <Route path="/signup" element={<SignupPage />} />
               <Route path="/anonymous" element={<AnonymousPage />} />

diff --git a/client/src/components/modals/Tts-magic-modal.tsx b/client/src/components/modals/Tts-magic-modal.tsx
@@ -0,0 +1,138 @@
+import {
+  AlertDialog,
+  AlertDialogContent,
+  AlertDialogDescription,
+  AlertDialogFooter,
+  AlertDialogHeader,
+  AlertDialogTitle,
+} from "@/components/ui/alert-dialog";
+import { SERVER_URL } from "@/lib/utils";
+import { useTtsMagicModal } from "@/stores/modal-store";
+import axios from "axios";
+import { useEffect, useState } from "react";
+import { Button } from "../ui/button";
+import toast from "react-hot-toast";
+import { Textarea } from "../ui/textarea";
+
+import { AudioLines, Download, X } from "lucide-react";
+
+const markdownToPlainText = (markdown: string) => {
+  return markdown
+    .replace(/(\*\*|__)(.*?)\1/g, "$2") // Bold
+    .replace(/(\*|_)(.*?)\1/g, "$2") // Italics
+    .replace(/~~(.*?)~~/g, "$1") // Strikethrough
+    .replace(/`{1,2}(.*?)`{1,2}/g, "$1") // Inline code
+    .replace(/### (.*?)\n/g, "$1\n") // H3
+    .replace(/## (.*?)\n/g, "$1\n") // H2
+    .replace(/# (.*?)\n/g, "$1\n") // H1
+    .replace(/>\s?(.*?)(?=\n|$)/g, "$1") // Blockquote
+    .replace(/^\s*\n/g, "") // Remove empty lines
+    .replace(/\n+/g, "\n") // Consolidate newlines
+    .trim(); // Trim whitespace
+};
+
+export default function TtsMagicModal() {
+  const modal = useTtsMagicModal();
+  const { text: initialText } = modal.extras;
+
+  const [text, setText] = useState("");
+  const [loading, setLoading] = useState(false);
+
+  // Set initial text when the modal opens
+  useEffect(() => {
+    if (modal.isOpen) {
+      setText(initialText); // Set the initial text from modal extras
+    }
+  }, [modal.isOpen, initialText]); // Depend on modal open state and initial text
+
+  const downloadAudio = async () => {
+    setLoading(true);
+    try {
+      const response = await axios.post(
+        `${SERVER_URL}/api/tts`,
+        { text: markdownToPlainText(text) },
+        { responseType: "blob" }
+      );
+      const url = window.URL.createObjectURL(new Blob([response.data]));
+      const link = document.createElement("a");
+      link.href = url;
+      link.setAttribute("download", "speech.mp3");
+      document.body.appendChild(link);
+      link.click();
+      link.remove();
+      modal.onClose();
+    } catch (error) {
+      toast.error("Error generating audio");
+      console.error("Error generating audio", error);
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  const downloadTextFile = () => {
+    // Convert Markdown to plain text
+    const plainText = markdownToPlainText(text); // For simplicity, using the existing text directly
+    const blob = new Blob([plainText], { type: "text/plain" });
+    const url = window.URL.createObjectURL(blob);
+    const link = document.createElement("a");
+    link.href = url;
+    link.setAttribute("download", "text.txt");
+    document.body.appendChild(link);
+    link.click();
+    link.remove();
+  };
+
+  return (
+    <AlertDialog open={modal.isOpen} onOpenChange={() => modal.onClose()}>
+      <AlertDialogContent>
+        <AlertDialogHeader>
+          <AlertDialogTitle>
+            <div className="flex items-center justify-between">
+              <p>Convert Text to speech and download</p>
+              <Button
+                variant={"outline"}
+                size={"icon"}
+                className="rounded-full"
+                onClick={() => modal.onClose()}
+              >
+                <X />
+              </Button>
+            </div>
+          </AlertDialogTitle>
+          <AlertDialogDescription>
+            text is converted to audio file in mp3 format that will be
+            downloaded automatically.
+          </AlertDialogDescription>
+          <div className="my-4">
+            <Textarea
+              disabled={loading}
+              value={text}
+              onChange={(e) => setText(e.target.value)} // Update state on change
+              rows={5}
+              className="w-full p-2 border rounded"
+              placeholder="Enter text here..."
+            />
+          </div>
+        </AlertDialogHeader>
+        <AlertDialogFooter>
+          <Button
+            disabled={loading}
+            onClick={downloadTextFile}
+            className="btn btn-secondary ml-2"
+            variant={"secondary"}
+          >
+            <Download /> Text
+          </Button>
+          <Button
+            disabled={loading}
+            onClick={downloadAudio}
+            className="btn btn-primary"
+          >
+            <AudioLines />
+            {loading ? "Generating..." : "Generate"}
+          </Button>
+        </AlertDialogFooter>
+      </AlertDialogContent>
+    </AlertDialog>
+  );
+}
diff --git a/client/src/contexts/modals.tsx b/client/src/contexts/modals.tsx
@@ -2,6 +2,7 @@ import CreateChatbotModal from "@/components/modals/create-chatbot-modal";
 import DeleteChatbotModal from "@/components/modals/delete-chatbot-modal";
 import SettingsModal from "@/components/modals/settings-modal";
 import ShareModal from "@/components/modals/share-modal";
+import TtsMagicModal from "@/components/modals/Tts-magic-modal";
 import UpdateChatbotModal from "@/components/modals/update-chatbot-modal";
 import UpdateProfileModal from "@/components/modals/update-profile-modal";
 
@@ -13,6 +14,7 @@ export default function Modals() {
       <UpdateProfileModal />
       <SettingsModal />
       <ShareModal />
+      <TtsMagicModal />
       <DeleteChatbotModal />
     </>
   );

diff --git a/client/src/pages/Chatbot.tsx b/client/src/pages/Chatbot.tsx
@@ -14,7 +14,7 @@ import { SERVER_URL } from "@/lib/utils";
 import { zodResolver } from "@hookform/resolvers/zod";
 import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
 import axios from "axios";
-import { ArrowLeft, Loader2, Menu, SendIcon } from "lucide-react";
+import { ArrowLeft, Loader2, Menu, SendIcon, Sparkles } from "lucide-react";
 import { useCallback, useEffect, useRef, useState } from "react";
 import { useForm } from "react-hook-form";
 import toast from "react-hot-toast";
@@ -23,7 +23,7 @@ import { z } from "zod";
 import Markdown from "react-markdown";
 import { Skeleton } from "@/components/ui/skeleton";
 import { useSettings } from "@/contexts/settings-context";
-import { useSettingsModal } from "@/stores/modal-store";
+import { useSettingsModal, useTtsMagicModal } from "@/stores/modal-store";
 import {
   DropdownMenu,
   DropdownMenuContent,
@@ -43,6 +43,7 @@ export default function ChatbotPage() {
   const messageEl = useRef(null);
   const singleClickTimeout = useRef<NodeJS.Timeout | null>(null);
   const settingsModal = useSettingsModal();
+  const ttsMagicModal = useTtsMagicModal();
   const { currentConfig } = useSettings();
   const [loading, setLoading] = useState(false); // Loading state for request
   const rq = useQueryClient();
@@ -189,11 +190,26 @@ export default function ChatbotPage() {
                     <p className="text-sm">{chat.user_query}</p>
                   </div>
                 </div>
-                <div className="flex justify-start items-center space-x-2 mb-2">
+                <div className="flex justify-start items-center space-x-2">
                   <div className="max-w-md bg-white dark:bg-dark dark:text-dark/90 text-gray-900 rounded-xl p-4 drop-shadow-md shadow border border-gray-100 dark:border-darker flex flex-col">
                     <p className="text-sm flex-1">
                       <Markdown>{chat.response}</Markdown>
                     </p>
+                    <div className="flex justify-end">
+                      <Button
+                        className="rounded-full hover:bg-primary/10"
+                        variant={"ghost"}
+                        onClick={() =>
+                          ttsMagicModal.onOpen({
+                            text: chat.response,
+                          })
+                        }
+                        size={"icon"}
+                      >
+                        <Sparkles className="text-primary-foreground" />
+                        <span className="sr-only">Action</span>
+                      </Button>
+                    </div>
                   </div>
                 </div>
               </>