Merge pull request #128 from BhavyaFattania/fix-issue-113

add image-to-text model
abhisek247767 · Oct 30, 2024 · 7d9a997 · 7d9a997
2 parents 43a3a7f + f1569e1
commit 7d9a997
Show file tree

Hide file tree

Showing 5 changed files with 185 additions and 0 deletions.
diff --git a/image-to-text model/model.py b/image-to-text model/model.py
@@ -0,0 +1,55 @@
+import requests
+from PIL import Image
+import streamlit as st
+from transformers import BlipProcessor, BlipForConditionalGeneration
+import torch
+
+# Initialize processor and model
+processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
+model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cuda" if torch.cuda.is_available() else "cpu")
+
+# Function to process and caption an image from a URL
+def caption_image(image_url):
+    try:
+        # Load image from the provided URL
+        raw_image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
+
+        # Conditional image captioning
+        text = "a photography of"
+        inputs = processor(raw_image, text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
+        out = model.generate(**inputs)
+        conditional_caption = processor.decode(out[0], skip_special_tokens=True)
+
+        # Unconditional image captioning
+        inputs = processor(raw_image, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
+        out = model.generate(**inputs)
+        unconditional_caption = processor.decode(out[0], skip_special_tokens=True)
+
+        return raw_image, conditional_caption, unconditional_caption
+
+    except Exception as e:
+        st.error(f"Error occurred: {e}")
+        return None, None, None
+
+# Streamlit App
+st.title("Image captioning model")
+# Input field for image URL
+image_url = st.text_input("Enter the image URL:", "")
+
+
+# Process and display captions when the user submits an image URL
+if st.button("Generate Captions"):
+    if image_url:
+        with st.spinner("Processing..."):
+            raw_image, conditional_caption, unconditional_caption = caption_image(image_url)
+
+        if raw_image:
+            # Display the image
+            st.image(raw_image, caption="Uploaded Image", use_column_width=True)
+
+            # Display captions
+            st.subheader("Generated Captions:")
+            st.write(f"**Conditional Caption:** {conditional_caption}")
+            st.write(f"**Unconditional Caption:** {unconditional_caption}")
+    else:
+        st.error("Please enter a valid image URL.")
diff --git a/image-to-text model/model_implementation.txt b/image-to-text model/model_implementation.txt
@@ -0,0 +1,50 @@
+Title: Image Captioning model
+
+Overview:
+This implementation provides a Streamlit application that generates captions for images using the BLIP model.
+
+Dependencies:
+- Streamlit
+- PyTorch
+- Hugging Face Transformers
+- Pillow
+- Requests
+
+Installation Instructions:
+1. Clone the repository:
+   git clone https://github.com/UppuluriKalyani/ML-Nexus
+2. Navigate to the project directory:
+   cd Computer Vision
+3. Create a virtual environment (optional but recommended):
+   python -m venv venv
+   source venv/bin/activate  # On Windows use `venv\Scripts\activate`
+4. Install the required packages:
+   pip install -r requirements.txt
+
+Usage Instructions:
+1. Run the Streamlit application:
+   streamlit run image_captioningModel.py
+2. Open a web browser and go to the provided local URL.
+3. Enter an image URL and click "Generate Captions" to see the output.
+
+Features:
+- Generates conditional and unconditional captions for images.
+- Supports various image formats.
+
+Example Input and Output:
+Input: https://example.com/image.jpg
+Output:
+- Conditional Caption: "A photography of..."
+- Unconditional Caption: "A beautiful scenery..."
+
+Limitations:
+- The model performance may vary based on the input image quality.
+
+Future Work:
+- Implement functionality for uploading local images.
+
+Conclusion:
+This project demonstrates the capability of the BLIP model in generating image captions, paving the way for future developments in image processing and NLP.
+
+Acknowledgments:
+Special thanks to the authors of the BLIP model and the Hugging Face Transformers library for providing the tools used in this implementation.
diff --git a/image-to-text model/model_readme.md b/image-to-text model/model_readme.md
@@ -0,0 +1,75 @@
+Image-to-Text Model
+This project implements an image-to-text model using the BLIP (Bootstrapped Language-Image Pre-training) model. It allows users to input an image (via URL), and the model generates both conditional and unconditional captions describing the content of the image.
+
+Table of Contents
+1)Introduction
+2)Features
+3)Installation
+4)Usage
+5)Licence
+
+1)Introduction
+
+The Image-to-Text Model leverages the BLIP model to generate captions for images. The model can generate captions in two modes:
+
+Conditional Captions: A description generated with an initial prompt.
+Unconditional Captions: A description generated without any prompt.
+The model is useful for various tasks, such as:
+
+Automatic image annotation.
+Assisting visually impaired individuals by describing images.
+Image-based content generation.
+
+2)Features
+
+Conditional Captioning: Generates a caption with the context of a prompt (e.g., "a photography of ...").
+Unconditional Captioning: Generates a general caption for the image without a prompt.
+Streamlit Web App: Easy-to-use web interface for uploading images and generating captions.
+Hugging Face Transformers: Uses the Salesforce BLIP model for robust image-caption generation.
+3)Installation
+
+Clone the repository:
+
+bash
+Copy code
+git clone https://github.com/your-username/ML-Nexus.git
+cd ML-Nexus
+
+Create and activate a virtual environment (optional but recommended):
+
+bash
+Copy code
+python -m venv venv
+source venv/bin/activate   # On Windows, use: venv\Scripts\activate
+Install the required dependencies:
+
+bash
+Copy code
+pip install -r requirements.txt
+Install additional dependencies if required for Streamlit:
+
+bash
+Copy code
+pip install streamlit
+
+4)Usage
+
+Running the Streamlit App
+Start the Streamlit web app:
+
+bash
+Copy code
+streamlit run app.py
+Access the app at http://localhost:8501. Input an image URL to generate captions.
+
+Running the Script from Command Line
+You can run the model directly from the command line using:
+
+bash
+Copy code
+python Generative\ Models/image-to-text\ model/image_to_text_model.py
+Enter the URL of an image when prompted, and it will generate and print captions for you.
+
+5) Licence -this project is Licenced under MIT 
+
+result 
diff --git a/image-to-text model/requirements.txt b/image-to-text model/requirements.txt
@@ -0,0 +1,5 @@
+streamlit
+torch
+transformers
+Pillow
+requests
diff --git a/image-to-text model/result.png b/image-to-text model/result.png