From 6f359bf2767244404877158b44d33c01e5a8ec2f Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 20 Apr 2024 04:37:07 +0200 Subject: [PATCH 01/13] streamlit Llava instead of gemini --- LICENSE | 22 +-------- app.py | 120 ++++++++++++++++++++++------------------------- requirements.txt | 4 +- 3 files changed, 60 insertions(+), 86 deletions(-) diff --git a/LICENSE b/LICENSE index b922f41..fd9e641 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1 @@ -MIT License - -Copyright (c) 2024 Kamalakar Satapathi - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +tbd \ No newline at end of file diff --git a/app.py b/app.py index a92c17c..81cf5bd 100644 --- a/app.py +++ b/app.py @@ -1,72 +1,66 @@ import streamlit as st -import os from PIL import Image -import google.generativeai as genai +import base64 +import requests +from io import BytesIO -st.title('Image Captioning and Tagging') +# Setting up the title of the application +st.title('Image Inference with DeepInfra') -uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) +# Input for the API key +API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") -API_KEY = st.text_input("Enter your API Key: Get your Google Studio API key from [here](https://makersuite.google.com/app/apikey)", type="password") -if uploaded_file is not None: - if st.button('Upload'): - if API_KEY.strip() == '': - st.error('Enter a valid API key') - else: - file_path = os.path.join("temp", uploaded_file.name) - with open(file_path, "wb") as f: - f.write(uploaded_file.getvalue()) - img = Image.open(file_path) - try: - genai.configure(api_key=API_KEY) - model = genai.GenerativeModel('gemini-pro-vision') - caption = model.generate_content(["Write a caption for the image in english",img]) - tags=model.generate_content(["Generate 5 hash tags for the image in a line in english",img]) - st.image(img, caption=f"Caption: {caption.text}") - st.write(f"Tags: {tags.text}") - except Exception as e: - error_msg = str(e) - if "API_KEY_INVALID" in error_msg: - st.error("Invalid API Key. Please enter a valid API Key.") - else: - st.error(f"Failed to configure API due to {error_msg}") -footer=""" - +if uploaded_file is not None and API_KEY: + if st.button('Analyze Image'): + try: + img = Image.open(uploaded_file) + img = img.convert("RGB") # Convert image to RGB + encoded_image = encode_image_to_base64(img) # Encode the image to base64 + result = call_deepinfra_api(encoded_image, API_KEY) # Send the encoded image for inference + inference_result = result.get('choices', [{}])[0].get('message', {}).get('content', 'No inference result available') + st.write(f"Inference Result: {inference_result}") # Display the inference result as plain text + except Exception as e: + st.error(f"Failed to process image due to: {str(e)}") -
-""" -st.markdown(footer,unsafe_allow_html=True) +# Add footer or additional UI elements below if necessary diff --git a/requirements.txt b/requirements.txt index 74eb0c0..f7b9518 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -google-generativeai==0.3.1 -streamlit==1.29.0 +deepinfra==1.0.0 +streamlit==1.29.0 \ No newline at end of file From cdc3a7f15ce2fc560b71ccd296f7614ce29d21a5 Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 20 Apr 2024 04:44:45 +0200 Subject: [PATCH 02/13] Update requirements.txt --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f7b9518..9f8e9b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -deepinfra==1.0.0 streamlit==1.29.0 \ No newline at end of file From 2176429ad859078afca2b0e73349b31a017a3aa3 Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 20 Apr 2024 05:09:23 +0200 Subject: [PATCH 03/13] image url instead --- app.py | 53 +++++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/app.py b/app.py index 81cf5bd..9136d27 100644 --- a/app.py +++ b/app.py @@ -10,14 +10,16 @@ # Input for the API key API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") -# File uploader allows the user to upload an image -uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) +# Input for the image URL +image_url = st.text_input("Enter the URL of the image:") -def encode_image_to_base64(image): - """Encode image to base64.""" - buffered = BytesIO() - image.save(buffered, format="JPEG") - return base64.b64encode(buffered.getvalue()).decode("utf-8") + + +#def encode_image_to_base64(image): + #"""Encode image to base64.""" + #buffered = BytesIO() + #image.save(buffered, format="JPEG") + #return base64.b64encode(buffered.getvalue()).decode("utf-8") def call_deepinfra_api(base64_image, api_key): """Send the base64 encoded image to DeepInfra for inference.""" @@ -25,23 +27,24 @@ def call_deepinfra_api(base64_image, api_key): headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" - } + } data = { "model": "llava-hf/llava-1.5-7b-hf", "messages": [ { "role": "user", - "content": { - "type": "image_base64", - "image_base64": base64_image - } - }, - { - "role": "system", - "content": { - "type": "text", - "text": "Analyze this image." - } + "content": [ + { + "type": "image_url", + "image_url": { + "url": image_url + } + }, + { + "type": "text", + "text": "use json format to describe the image. 1. colours, 2. art types, 3. objects in the image, 4. artistic style, 5. image_url" + } + ] } ] } @@ -51,15 +54,13 @@ def call_deepinfra_api(base64_image, api_key): else: raise Exception("API request failed with status code: " + str(response.status_code)) -if uploaded_file is not None and API_KEY: +if image_url and API_KEY: if st.button('Analyze Image'): try: - img = Image.open(uploaded_file) - img = img.convert("RGB") # Convert image to RGB - encoded_image = encode_image_to_base64(img) # Encode the image to base64 - result = call_deepinfra_api(encoded_image, API_KEY) # Send the encoded image for inference - inference_result = result.get('choices', [{}])[0].get('message', {}).get('content', 'No inference result available') - st.write(f"Inference Result: {inference_result}") # Display the inference result as plain text + result = call_deepinfra_api(image_url, API_KEY) + if result: + inference_result = result.get('choices', [{}])[0].get('message', {}).get('content', 'No inference result available') + st.write(f"Inference Result: {inference_result}") except Exception as e: st.error(f"Failed to process image due to: {str(e)}") From 946273a26b05e3579db6fe4d6b676e1a8a19731d Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 20 Apr 2024 05:11:51 +0200 Subject: [PATCH 04/13] Update app.py --- app.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/app.py b/app.py index 9136d27..31f021d 100644 --- a/app.py +++ b/app.py @@ -13,13 +13,14 @@ # Input for the image URL image_url = st.text_input("Enter the URL of the image:") +# File uploader allows the user to upload an image +# uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) - -#def encode_image_to_base64(image): - #"""Encode image to base64.""" - #buffered = BytesIO() - #image.save(buffered, format="JPEG") - #return base64.b64encode(buffered.getvalue()).decode("utf-8") +#d ef encode_image_to_base64(image): + # """Encode image to base64.""" + # buffered = BytesIO() + # image.save(buffered, format="JPEG") + # return base64.b64encode(buffered.getvalue()).decode("utf-8") def call_deepinfra_api(base64_image, api_key): """Send the base64 encoded image to DeepInfra for inference.""" From cc849ab38dbd50b11f905a2f2343af0ac0316bde Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 20 Apr 2024 05:27:53 +0200 Subject: [PATCH 05/13] Update app.py --- app.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app.py b/app.py index 31f021d..af0a660 100644 --- a/app.py +++ b/app.py @@ -2,10 +2,10 @@ from PIL import Image import base64 import requests -from io import BytesIO +# from io import BytesIO # Setting up the title of the application -st.title('Image Inference with DeepInfra') +st.title('Art Style Explorer') # Input for the API key API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") @@ -24,7 +24,7 @@ def call_deepinfra_api(base64_image, api_key): """Send the base64 encoded image to DeepInfra for inference.""" - url = "https://api.deepinfra.com/v1/inference/llava-hf/llava-1.5-7b-hf" + url = "https://api.deepinfra.com/v1/openai/chat/completions" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" From c118dc34ea8b7d5348eb171550c32b6f8747d59c Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 20 Apr 2024 05:41:12 +0200 Subject: [PATCH 06/13] Custom prompt, json format --- app.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index af0a660..d11b456 100644 --- a/app.py +++ b/app.py @@ -13,6 +13,9 @@ # Input for the image URL image_url = st.text_input("Enter the URL of the image:") +# Input for the prompt sent to the API +prompt = st.text_input("What would you like the model to tell you from this image?") + # File uploader allows the user to upload an image # uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png", "jpeg"]) @@ -22,8 +25,8 @@ # image.save(buffered, format="JPEG") # return base64.b64encode(buffered.getvalue()).decode("utf-8") -def call_deepinfra_api(base64_image, api_key): - """Send the base64 encoded image to DeepInfra for inference.""" +def call_deepinfra_api(image_url, prompt, api_key): + """Send the image URL and prompt to DeepInfra for inference.""" url = "https://api.deepinfra.com/v1/openai/chat/completions" headers = { "Content-Type": "application/json", @@ -43,7 +46,7 @@ def call_deepinfra_api(base64_image, api_key): }, { "type": "text", - "text": "use json format to describe the image. 1. colours, 2. art types, 3. objects in the image, 4. artistic style, 5. image_url" + "text": prompt } ] } @@ -53,16 +56,17 @@ def call_deepinfra_api(base64_image, api_key): if response.status_code == 200: return response.json() else: - raise Exception("API request failed with status code: " + str(response.status_code)) + st.error(f"API request failed with status code: {response.status_code} and message: {response.text}") + return None -if image_url and API_KEY: +if image_url and API_KEY and prompt: if st.button('Analyze Image'): try: - result = call_deepinfra_api(image_url, API_KEY) + result = call_deepinfra_api(image_url, API_KEY, prompt) if result: - inference_result = result.get('choices', [{}])[0].get('message', {}).get('content', 'No inference result available') - st.write(f"Inference Result: {inference_result}") + # Convert the dictionary to JSON formatted string and display it + json_result = json.dumps(result, indent=2) # Beautify the JSON response + st.json(json_result) # Use st.json to render the JSON in the UI except Exception as e: st.error(f"Failed to process image due to: {str(e)}") - -# Add footer or additional UI elements below if necessary + \ No newline at end of file From aa41ac9bafdda295242e25975b55cc7479ddf3da Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 20 Apr 2024 05:58:56 +0200 Subject: [PATCH 07/13] api url change --- app.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app.py b/app.py index d11b456..592e976 100644 --- a/app.py +++ b/app.py @@ -27,7 +27,7 @@ def call_deepinfra_api(image_url, prompt, api_key): """Send the image URL and prompt to DeepInfra for inference.""" - url = "https://api.deepinfra.com/v1/openai/chat/completions" + url = "https://api.deepinfra.com/v1/inference/" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" @@ -69,4 +69,3 @@ def call_deepinfra_api(image_url, prompt, api_key): st.json(json_result) # Use st.json to render the JSON in the UI except Exception as e: st.error(f"Failed to process image due to: {str(e)}") - \ No newline at end of file From 5762633362e736c57c4685e35c18eaa7b67f3001 Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 27 Apr 2024 20:29:03 +0200 Subject: [PATCH 08/13] Update app.py --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 592e976..4522f32 100644 --- a/app.py +++ b/app.py @@ -68,4 +68,4 @@ def call_deepinfra_api(image_url, prompt, api_key): json_result = json.dumps(result, indent=2) # Beautify the JSON response st.json(json_result) # Use st.json to render the JSON in the UI except Exception as e: - st.error(f"Failed to process image due to: {str(e)}") + st.error(f"Failed to process image due to: {str(e)}") \ No newline at end of file From b7a6251e207bc7c677a02269a46b9532b2d23f0b Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 27 Apr 2024 21:51:52 +0200 Subject: [PATCH 09/13] remove deepinfra key --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 4522f32..3bbeab3 100644 --- a/app.py +++ b/app.py @@ -8,7 +8,7 @@ st.title('Art Style Explorer') # Input for the API key -API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") +# API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") # Input for the image URL image_url = st.text_input("Enter the URL of the image:") From 0763029900f6e2b03790d97c5c567a9df693a0c7 Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 27 Apr 2024 21:53:56 +0200 Subject: [PATCH 10/13] added the key again --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 3bbeab3..4522f32 100644 --- a/app.py +++ b/app.py @@ -8,7 +8,7 @@ st.title('Art Style Explorer') # Input for the API key -# API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") +API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") # Input for the image URL image_url = st.text_input("Enter the URL of the image:") From 495a423b4fe5e1a386fa7199e9be34425d904994 Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 27 Apr 2024 21:56:52 +0200 Subject: [PATCH 11/13] changed url for inference --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 4522f32..018c3f1 100644 --- a/app.py +++ b/app.py @@ -27,7 +27,7 @@ def call_deepinfra_api(image_url, prompt, api_key): """Send the image URL and prompt to DeepInfra for inference.""" - url = "https://api.deepinfra.com/v1/inference/" + url = "https://api.deepinfra.com/v1/openai/chat/completions" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {api_key}" From b9710724a2810ca4920341ecff3a7395b7407b33 Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 27 Apr 2024 22:08:02 +0200 Subject: [PATCH 12/13] casse --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 018c3f1..014d40c 100644 --- a/app.py +++ b/app.py @@ -8,7 +8,7 @@ st.title('Art Style Explorer') # Input for the API key -API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") +api_key = st.text_input("Enter your DeepInfra API Key:", type="password") # Input for the image URL image_url = st.text_input("Enter the URL of the image:") From 5661ed1ded3cf99264651f5051b7ded066f04a65 Mon Sep 17 00:00:00 2001 From: Gabs <40191213+GabZoFar@users.noreply.github.com> Date: Sat, 27 Apr 2024 22:09:54 +0200 Subject: [PATCH 13/13] casse --- app.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index 014d40c..8c30bca 100644 --- a/app.py +++ b/app.py @@ -2,13 +2,14 @@ from PIL import Image import base64 import requests +import json # from io import BytesIO # Setting up the title of the application st.title('Art Style Explorer') # Input for the API key -api_key = st.text_input("Enter your DeepInfra API Key:", type="password") +API_KEY = st.text_input("Enter your DeepInfra API Key:", type="password") # Input for the image URL image_url = st.text_input("Enter the URL of the image:")