Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: OCR Magic Tool added #272

Merged
merged 1 commit into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,14 @@ flask db migrate -m "Describe migration"
flask db upgrade
```

### (Optional)

Install `tesseract` for OCR

```
sudo apt install tesseract-ocr
```

---

## Running the Project
Expand Down
30 changes: 29 additions & 1 deletion app/api_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from .helpers import create_default_chatbots
from .data_fetcher import fetch_contribution_data
from datetime import datetime
import PIL
import pytesseract
import re
from flask_jwt_extended import (
create_access_token,
Expand Down Expand Up @@ -316,7 +318,9 @@ def api_chatbot(chatbot_id: int) -> Union[Response, tuple[Response, int]]:
query: str = data.get("query")
apikey = request.headers["apikey"]
engine = request.headers["engine"]
chat_to_pass: List[Dict[str, str]] = [{"role": "system", "content": chatbot.latest_version.prompt}]
chat_to_pass: List[Dict[str, str]] = [
{"role": "system", "content": chatbot.latest_version.prompt}
]
for chat in chats:
chat_to_pass.append({"role": "user", "content": chat.user_query})
chat_to_pass.append({"role": "assistant", "content": chat.response})
Expand Down Expand Up @@ -822,3 +826,27 @@ def api_translate():

except Exception as e:
return jsonify({"success": False, "message": str(e)}), 500


@api_bp.route("/api/ocr", methods=["POST"])
@jwt_required()
def api_ocr():
try:
if "file" not in request.files:
return jsonify({"success": False, "error": "No file provided"}), 400

file = request.files["file"]
base_path = os.path.dirname(os.path.abspath(__file__))
temp_audio_dir = os.path.join(base_path, "temp_images")
os.makedirs(temp_audio_dir, exist_ok=True)
filepath = os.path.join(temp_audio_dir, file.filename)
file.save(filepath)

image = PIL.Image.open(filepath)
text = pytesseract.image_to_string(image)

os.remove(filepath)
return jsonify({"success": True, "text": text}), 200

except Exception as e:
return jsonify({"success": False, "message": str(e)}), 500
7 changes: 7 additions & 0 deletions client/src/components/modals/command-modal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ import {
Languages,
PanelTopInactive,
Plus,
TextCursorInput,
} from "lucide-react";
import {
useCreateChatbotModal,
useImagineModal,
useOcrMagic,
useSettingsModal,
useTranslateMagicModal,
useTtsMagicModal,
Expand All @@ -38,6 +40,7 @@ export function CommandModal() {
const settingsModal = useSettingsModal();
const imagineModal = useImagineModal();
const ttsModal = useTtsMagicModal();
const ocrModal = useOcrMagic();
const translateModal = useTranslateMagicModal();
const navigate = useNavigate();

Expand Down Expand Up @@ -72,6 +75,10 @@ export function CommandModal() {
<Languages />
<span>{t("commandbox.translate")}</span>
</CommandItem>
<CommandItem onSelect={() => ocrModal.onOpen()}>
<TextCursorInput />
<span>Text Extractor (OCR)</span>
</CommandItem>
<CommandItem onSelect={() => imagineModal.onOpen()}>
<Image />
<span>{t("commandbox.image_generation")}</span>
Expand Down
131 changes: 131 additions & 0 deletions client/src/components/modals/ocr-magic-modal.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import {
AlertDialog,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from "@/components/ui/alert-dialog";
import { SERVER_URL } from "@/lib/utils";
import { useOcrMagic } from "@/stores/modal-store";
import axios from "axios";
import { useState } from "react";
import { Button } from "../ui/button";
import toast from "react-hot-toast";

import { X } from "lucide-react";

import { Input } from "../ui/input";
import { Skeleton } from "../ui/skeleton";

export default function OcrMagicModal() {
const modal = useOcrMagic();
const [loading, setLoading] = useState(false);
const [ocrText, setOcrText] = useState("");

const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [imagePreview, setImagePreview] = useState<string | null>(null);
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files ? event.target.files[0] : null;
if (file) {
setSelectedFile(file);
setImagePreview(URL.createObjectURL(file));
}
};

const handleOcrSubmit = async (event: any) => {
event.preventDefault();
setOcrText("");
if (!selectedFile) return toast.error("Please select a file!");

const formData = new FormData();
formData.append("file", selectedFile);
setLoading(true);
try {
const token = localStorage.getItem("token");

const authHeaders = {
Authorization: `Bearer ${token || ""}`,
};
const response = await axios.post(`${SERVER_URL}/api/ocr`, formData, {
headers: authHeaders,
});
setOcrText(response.data.text);
} catch (error) {
console.error("Error fetching OCR text:", error);
} finally {
setLoading(false);
}
};

return (
<AlertDialog open={modal.isOpen} onOpenChange={() => modal.onClose()}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>
<div className="flex items-center justify-between">
<p>OCR Magic Tool</p>
<Button
variant={"outline"}
size={"icon"}
className="rounded-full"
onClick={() => modal.onClose()}
>
<X />
</Button>
</div>
</AlertDialogTitle>
<AlertDialogDescription>
Extract Text from Image.
</AlertDialogDescription>
<div className="grid gap-4 w-full">
{imagePreview && (
<div className="relative aspect-video">
<img
src={imagePreview}
alt="Uploaded image preview"
className="object-contain w-full h-full"
/>
</div>
)}
{loading && (
<div className="space-y-2">
<Skeleton className="h-4 w-full" />
<Skeleton className="h-4 w-[90%]" />
<Skeleton className="h-4 w-[75%]" />
</div>
)}
{ocrText && (
<div className="p-4 bg-muted rounded-md">
<p className="text-sm">{ocrText}</p>
</div>
)}
<div className="flex items-center gap-4">
<form
onSubmit={handleOcrSubmit}
className="w-full flex items-center flex-col gap-4"
>
<Input
disabled={loading}
type="file"
onChange={handleFileChange}
accept="image/*"
className="cursor-pointer"
/>
<Button
disabled={loading}
className="w-full"
variant={"outline"}
type="submit"
>
{loading ? "Extracting..." : "Extract"}
</Button>
</form>
</div>
</div>
</AlertDialogHeader>
<AlertDialogFooter></AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
);
}
2 changes: 2 additions & 0 deletions client/src/contexts/modals.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import CreateChatbotModal from "@/components/modals/create-chatbot-modal";
import DeleteChatbotModal from "@/components/modals/delete-chatbot-modal";
import ImagineModal from "@/components/modals/imgine-modal";
import OcrMagicModal from "@/components/modals/ocr-magic-modal";
import SettingsModal from "@/components/modals/settings-modal";
import ShareModal from "@/components/modals/share-modal";
import TranslateMagicModal from "@/components/modals/translate-magic-modal";
Expand All @@ -16,6 +17,7 @@ export default function Modals() {
<UpdateProfileModal />
<SettingsModal />
<ShareModal />
<OcrMagicModal />
<TtsMagicModal />
<TranslateMagicModal />
<ImagineModal />
Expand Down
1 change: 1 addition & 0 deletions client/src/stores/modal-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ export const useShareModal = create<DefaultModal>(defaultModalValues);
export const useTtsMagicModal = create<DefaultModal>(defaultModalValues);
export const useTranslateMagicModal = create<DefaultModal>(defaultModalValues);
export const useImagineModal = create<DefaultModal>(defaultModalValues);
export const useOcrMagic = create<DefaultModal>(defaultModalValues);
4 changes: 3 additions & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ anthropic
gTTS
beautifulsoup4
Markdown
translate
translate
pytesseract
pillow
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ anthropic
gTTS
beautifulsoup4
Markdown
translate
pytesseract
pillow