diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 4dc2482..81c35f5 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -123,6 +123,14 @@ flask db migrate -m "Describe migration" flask db upgrade ``` +### (Optional) + +Install `tesseract` for OCR + +``` +sudo apt install tesseract-ocr +``` + --- ## Running the Project diff --git a/app/api_routes.py b/app/api_routes.py index cb2c836..d7aba91 100644 --- a/app/api_routes.py +++ b/app/api_routes.py @@ -12,6 +12,8 @@ from .helpers import create_default_chatbots from .data_fetcher import fetch_contribution_data from datetime import datetime +import PIL +import pytesseract import re from flask_jwt_extended import ( create_access_token, @@ -316,7 +318,9 @@ def api_chatbot(chatbot_id: int) -> Union[Response, tuple[Response, int]]: query: str = data.get("query") apikey = request.headers["apikey"] engine = request.headers["engine"] - chat_to_pass: List[Dict[str, str]] = [{"role": "system", "content": chatbot.latest_version.prompt}] + chat_to_pass: List[Dict[str, str]] = [ + {"role": "system", "content": chatbot.latest_version.prompt} + ] for chat in chats: chat_to_pass.append({"role": "user", "content": chat.user_query}) chat_to_pass.append({"role": "assistant", "content": chat.response}) @@ -822,3 +826,27 @@ def api_translate(): except Exception as e: return jsonify({"success": False, "message": str(e)}), 500 + + +@api_bp.route("/api/ocr", methods=["POST"]) +@jwt_required() +def api_ocr(): + try: + if "file" not in request.files: + return jsonify({"success": False, "error": "No file provided"}), 400 + + file = request.files["file"] + base_path = os.path.dirname(os.path.abspath(__file__)) + temp_audio_dir = os.path.join(base_path, "temp_images") + os.makedirs(temp_audio_dir, exist_ok=True) + filepath = os.path.join(temp_audio_dir, file.filename) + file.save(filepath) + + image = PIL.Image.open(filepath) + text = pytesseract.image_to_string(image) + + os.remove(filepath) + return jsonify({"success": True, "text": text}), 200 + + except Exception as e: + return jsonify({"success": False, "message": str(e)}), 500 diff --git a/client/src/components/modals/command-modal.tsx b/client/src/components/modals/command-modal.tsx index 6d70dab..3793722 100644 --- a/client/src/components/modals/command-modal.tsx +++ b/client/src/components/modals/command-modal.tsx @@ -20,10 +20,12 @@ import { Languages, PanelTopInactive, Plus, + TextCursorInput, } from "lucide-react"; import { useCreateChatbotModal, useImagineModal, + useOcrMagic, useSettingsModal, useTranslateMagicModal, useTtsMagicModal, @@ -38,6 +40,7 @@ export function CommandModal() { const settingsModal = useSettingsModal(); const imagineModal = useImagineModal(); const ttsModal = useTtsMagicModal(); + const ocrModal = useOcrMagic(); const translateModal = useTranslateMagicModal(); const navigate = useNavigate(); @@ -72,6 +75,10 @@ export function CommandModal() { {t("commandbox.translate")} + ocrModal.onOpen()}> + + Text Extractor (OCR) + imagineModal.onOpen()}> {t("commandbox.image_generation")} diff --git a/client/src/components/modals/ocr-magic-modal.tsx b/client/src/components/modals/ocr-magic-modal.tsx new file mode 100644 index 0000000..d126ca7 --- /dev/null +++ b/client/src/components/modals/ocr-magic-modal.tsx @@ -0,0 +1,131 @@ +import { + AlertDialog, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from "@/components/ui/alert-dialog"; +import { SERVER_URL } from "@/lib/utils"; +import { useOcrMagic } from "@/stores/modal-store"; +import axios from "axios"; +import { useState } from "react"; +import { Button } from "../ui/button"; +import toast from "react-hot-toast"; + +import { X } from "lucide-react"; + +import { Input } from "../ui/input"; +import { Skeleton } from "../ui/skeleton"; + +export default function OcrMagicModal() { + const modal = useOcrMagic(); + const [loading, setLoading] = useState(false); + const [ocrText, setOcrText] = useState(""); + + const [selectedFile, setSelectedFile] = useState(null); + const [imagePreview, setImagePreview] = useState(null); + const handleFileChange = (event: React.ChangeEvent) => { + const file = event.target.files ? event.target.files[0] : null; + if (file) { + setSelectedFile(file); + setImagePreview(URL.createObjectURL(file)); + } + }; + + const handleOcrSubmit = async (event: any) => { + event.preventDefault(); + setOcrText(""); + if (!selectedFile) return toast.error("Please select a file!"); + + const formData = new FormData(); + formData.append("file", selectedFile); + setLoading(true); + try { + const token = localStorage.getItem("token"); + + const authHeaders = { + Authorization: `Bearer ${token || ""}`, + }; + const response = await axios.post(`${SERVER_URL}/api/ocr`, formData, { + headers: authHeaders, + }); + setOcrText(response.data.text); + } catch (error) { + console.error("Error fetching OCR text:", error); + } finally { + setLoading(false); + } + }; + + return ( + modal.onClose()}> + + + + + OCR Magic Tool + modal.onClose()} + > + + + + + + Extract Text from Image. + + + {imagePreview && ( + + + + )} + {loading && ( + + + + + + )} + {ocrText && ( + + {ocrText} + + )} + + + + + {loading ? "Extracting..." : "Extract"} + + + + + + + + + ); +} diff --git a/client/src/contexts/modals.tsx b/client/src/contexts/modals.tsx index d666ba4..ef37f89 100644 --- a/client/src/contexts/modals.tsx +++ b/client/src/contexts/modals.tsx @@ -1,6 +1,7 @@ import CreateChatbotModal from "@/components/modals/create-chatbot-modal"; import DeleteChatbotModal from "@/components/modals/delete-chatbot-modal"; import ImagineModal from "@/components/modals/imgine-modal"; +import OcrMagicModal from "@/components/modals/ocr-magic-modal"; import SettingsModal from "@/components/modals/settings-modal"; import ShareModal from "@/components/modals/share-modal"; import TranslateMagicModal from "@/components/modals/translate-magic-modal"; @@ -16,6 +17,7 @@ export default function Modals() { + diff --git a/client/src/stores/modal-store.ts b/client/src/stores/modal-store.ts index debc8ff..f2ab298 100644 --- a/client/src/stores/modal-store.ts +++ b/client/src/stores/modal-store.ts @@ -10,3 +10,4 @@ export const useShareModal = create(defaultModalValues); export const useTtsMagicModal = create(defaultModalValues); export const useTranslateMagicModal = create(defaultModalValues); export const useImagineModal = create(defaultModalValues); +export const useOcrMagic = create(defaultModalValues); diff --git a/dev-requirements.txt b/dev-requirements.txt index 020d4c9..564c4bc 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -17,4 +17,6 @@ anthropic gTTS beautifulsoup4 Markdown -translate \ No newline at end of file +translate +pytesseract +pillow \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 47948e2..43fcb59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ anthropic gTTS beautifulsoup4 Markdown -translate \ No newline at end of file +pytesseract +pillow \ No newline at end of file
OCR Magic Tool
{ocrText}