Skip to content

Commit

Permalink
Merge pull request #272 from priyanshuverma-dev/feat-ocr-magic
Browse files Browse the repository at this point in the history
feat: OCR Magic Tool added
  • Loading branch information
kom-senapati authored Nov 5, 2024
2 parents 48f05f6 + 7b61876 commit 02a7803
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 3 deletions.
8 changes: 8 additions & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,14 @@ flask db migrate -m "Describe migration"
flask db upgrade
```

### (Optional)

Install `tesseract` for OCR

```
sudo apt install tesseract-ocr
```

---

## Running the Project
Expand Down
30 changes: 29 additions & 1 deletion app/api_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from .helpers import create_default_chatbots
from .data_fetcher import fetch_contribution_data
from datetime import datetime
import PIL
import pytesseract
import re
from flask_jwt_extended import (
create_access_token,
Expand Down Expand Up @@ -316,7 +318,9 @@ def api_chatbot(chatbot_id: int) -> Union[Response, tuple[Response, int]]:
query: str = data.get("query")
apikey = request.headers["apikey"]
engine = request.headers["engine"]
chat_to_pass: List[Dict[str, str]] = [{"role": "system", "content": chatbot.latest_version.prompt}]
chat_to_pass: List[Dict[str, str]] = [
{"role": "system", "content": chatbot.latest_version.prompt}
]
for chat in chats:
chat_to_pass.append({"role": "user", "content": chat.user_query})
chat_to_pass.append({"role": "assistant", "content": chat.response})
Expand Down Expand Up @@ -822,3 +826,27 @@ def api_translate():

except Exception as e:
return jsonify({"success": False, "message": str(e)}), 500


@api_bp.route("/api/ocr", methods=["POST"])
@jwt_required()
def api_ocr():
try:
if "file" not in request.files:
return jsonify({"success": False, "error": "No file provided"}), 400

file = request.files["file"]
base_path = os.path.dirname(os.path.abspath(__file__))
temp_audio_dir = os.path.join(base_path, "temp_images")
os.makedirs(temp_audio_dir, exist_ok=True)
filepath = os.path.join(temp_audio_dir, file.filename)
file.save(filepath)

image = PIL.Image.open(filepath)
text = pytesseract.image_to_string(image)

os.remove(filepath)
return jsonify({"success": True, "text": text}), 200

except Exception as e:
return jsonify({"success": False, "message": str(e)}), 500
7 changes: 7 additions & 0 deletions client/src/components/modals/command-modal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ import {
Languages,
PanelTopInactive,
Plus,
TextCursorInput,
} from "lucide-react";
import {
useCreateChatbotModal,
useImagineModal,
useOcrMagic,
useSettingsModal,
useTranslateMagicModal,
useTtsMagicModal,
Expand All @@ -38,6 +40,7 @@ export function CommandModal() {
const settingsModal = useSettingsModal();
const imagineModal = useImagineModal();
const ttsModal = useTtsMagicModal();
const ocrModal = useOcrMagic();
const translateModal = useTranslateMagicModal();
const navigate = useNavigate();

Expand Down Expand Up @@ -72,6 +75,10 @@ export function CommandModal() {
<Languages />
<span>{t("commandbox.translate")}</span>
</CommandItem>
<CommandItem onSelect={() => ocrModal.onOpen()}>
<TextCursorInput />
<span>Text Extractor (OCR)</span>
</CommandItem>
<CommandItem onSelect={() => imagineModal.onOpen()}>
<Image />
<span>{t("commandbox.image_generation")}</span>
Expand Down
131 changes: 131 additions & 0 deletions client/src/components/modals/ocr-magic-modal.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import {
AlertDialog,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
} from "@/components/ui/alert-dialog";
import { SERVER_URL } from "@/lib/utils";
import { useOcrMagic } from "@/stores/modal-store";
import axios from "axios";
import { useState } from "react";
import { Button } from "../ui/button";
import toast from "react-hot-toast";

import { X } from "lucide-react";

import { Input } from "../ui/input";
import { Skeleton } from "../ui/skeleton";

export default function OcrMagicModal() {
const modal = useOcrMagic();
const [loading, setLoading] = useState(false);
const [ocrText, setOcrText] = useState("");

const [selectedFile, setSelectedFile] = useState<File | null>(null);
const [imagePreview, setImagePreview] = useState<string | null>(null);
const handleFileChange = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files ? event.target.files[0] : null;
if (file) {
setSelectedFile(file);
setImagePreview(URL.createObjectURL(file));
}
};

const handleOcrSubmit = async (event: any) => {
event.preventDefault();
setOcrText("");
if (!selectedFile) return toast.error("Please select a file!");

const formData = new FormData();
formData.append("file", selectedFile);
setLoading(true);
try {
const token = localStorage.getItem("token");

const authHeaders = {
Authorization: `Bearer ${token || ""}`,
};
const response = await axios.post(`${SERVER_URL}/api/ocr`, formData, {
headers: authHeaders,
});
setOcrText(response.data.text);
} catch (error) {
console.error("Error fetching OCR text:", error);
} finally {
setLoading(false);
}
};

return (
<AlertDialog open={modal.isOpen} onOpenChange={() => modal.onClose()}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>
<div className="flex items-center justify-between">
<p>OCR Magic Tool</p>
<Button
variant={"outline"}
size={"icon"}
className="rounded-full"
onClick={() => modal.onClose()}
>
<X />
</Button>
</div>
</AlertDialogTitle>
<AlertDialogDescription>
Extract Text from Image.
</AlertDialogDescription>
<div className="grid gap-4 w-full">
{imagePreview && (
<div className="relative aspect-video">
<img
src={imagePreview}
alt="Uploaded image preview"
className="object-contain w-full h-full"
/>
</div>
)}
{loading && (
<div className="space-y-2">
<Skeleton className="h-4 w-full" />
<Skeleton className="h-4 w-[90%]" />
<Skeleton className="h-4 w-[75%]" />
</div>
)}
{ocrText && (
<div className="p-4 bg-muted rounded-md">
<p className="text-sm">{ocrText}</p>
</div>
)}
<div className="flex items-center gap-4">
<form
onSubmit={handleOcrSubmit}
className="w-full flex items-center flex-col gap-4"
>
<Input
disabled={loading}
type="file"
onChange={handleFileChange}
accept="image/*"
className="cursor-pointer"
/>
<Button
disabled={loading}
className="w-full"
variant={"outline"}
type="submit"
>
{loading ? "Extracting..." : "Extract"}
</Button>
</form>
</div>
</div>
</AlertDialogHeader>
<AlertDialogFooter></AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
);
}
2 changes: 2 additions & 0 deletions client/src/contexts/modals.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import CreateChatbotModal from "@/components/modals/create-chatbot-modal";
import DeleteChatbotModal from "@/components/modals/delete-chatbot-modal";
import ImagineModal from "@/components/modals/imgine-modal";
import OcrMagicModal from "@/components/modals/ocr-magic-modal";
import SettingsModal from "@/components/modals/settings-modal";
import ShareModal from "@/components/modals/share-modal";
import TranslateMagicModal from "@/components/modals/translate-magic-modal";
Expand All @@ -16,6 +17,7 @@ export default function Modals() {
<UpdateProfileModal />
<SettingsModal />
<ShareModal />
<OcrMagicModal />
<TtsMagicModal />
<TranslateMagicModal />
<ImagineModal />
Expand Down
1 change: 1 addition & 0 deletions client/src/stores/modal-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ export const useShareModal = create<DefaultModal>(defaultModalValues);
export const useTtsMagicModal = create<DefaultModal>(defaultModalValues);
export const useTranslateMagicModal = create<DefaultModal>(defaultModalValues);
export const useImagineModal = create<DefaultModal>(defaultModalValues);
export const useOcrMagic = create<DefaultModal>(defaultModalValues);
4 changes: 3 additions & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ anthropic
gTTS
beautifulsoup4
Markdown
translate
translate
pytesseract
pillow
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ anthropic
gTTS
beautifulsoup4
Markdown
translate
pytesseract
pillow

0 comments on commit 02a7803

Please sign in to comment.