predictionguard · jmansdorfer · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 2, 2024
diff --git a/fern/docs.yml b/fern/docs.yml
@@ -47,6 +47,10 @@ navigation:
             path: ./docs/pages/usingllms/agents.mdx
           - page: Streaming
             path: ./docs/pages/usingllms/streaming.mdx
+          - page: Chat Vision
+            path: ./docs/pages/usingllms/chat_vision.mdx
+          - page: Emebddings
+            path: ./docs/pages/usingllms/embeddings.mdx
       - section: Process LLM Input
         contents:
           - page: PII

diff --git a/fern/docs/pages/reference/embeddings.mdx b/fern/docs/pages/reference/embeddings.mdx
@@ -207,20 +207,22 @@ The output will look something like this.
 
 ```json
 {
-   "id":"emb-oM1AChyqjQKqT6XKiUqY0bnqIUikK",
-   "object":"embedding_batch",
-   "created":1717780716,
-   "model":"bridgetower-large-itm-mlm-itc",
-   "data":[
-      {
-         "status":"error: could not call model, contact support",
-         "index":0,
-         "object":"embedding",
-         "embedding":[
-
-         ]
-      }
-   ]
+    "created": 1722607307,
+    "data": [
+        {
+            "embedding": [
+                0.0073536476120352745,
+                -0.08882588893175125,
+                ...
+            ],
+            "index": 0,
+            "object": "embedding",
+            "status": "success"
+        }
+    ],
+    "id": "emb-weuRKl9D1kN4rzbM3mX29cqtdH3Cj",
+    "model": "bridgetower-large-itm-mlm-itc",
+    "object": "embedding_batch"
 }
 ```
 

diff --git a/fern/docs/pages/usingllms/chat_vision.mdx b/fern/docs/pages/usingllms/chat_vision.mdx
@@ -0,0 +1,202 @@
+
+When sending a request to the Vision models, Prediction Guard offers various options to upload your image. You can upload the image from using a URL, a local image file, data URI, or base64 encoded image.
+Here is an example of how to use an image from a URL:
+
+``` Python
+import os
+import json
+from predictionguard import PredictionGuard
+
+# Set your Prediction Guard token as an environmental variable.
+os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+
+client = PredictionGuard()
+
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "What's in this image?"
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+                }
+            }
+        ]
+    },
+]
+
+result = client.chat.completions.create(
+    model="llava-1.5-7b-hf",
+    messages=messages
+)
+
+print(json.dumps(
+    result,
+    sort_keys=True,
+    indent=4,
+    separators=(',', ': ')
+))
+
+
+```
+
+This example shows how you can upload the image from a local file:
+
+``` Python
+import os
+import json
+from predictionguard import PredictionGuard
+
+# Set your Prediction Guard token as an environmental variable.
+os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+
+client = PredictionGuard()
+
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "What's in this image?"
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": "image_data/Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
+                }
+            }
+        ]
+    },
+]
+
+result = client.chat.completions.create(
+    model="llava-1.5-7b-hf",
+    messages=messages
+)
+
+print(json.dumps(
+    result,
+    sort_keys=True,
+    indent=4,
+    separators=(',', ': ')
+))
+
+
+```
+
+When using base64 encoded image inputs or data URIs, you first need to encode the image.
+
+Here is how you convert an image to base64 encoding
+
+```Python
+import base64
+
+def encode_image_to_base64(image_path):
+    with open(image_path, 'rb') as image_file:
+        image_data = image_file.read()
+        base64_encoded_data = base64.b64encode(image_data)
+        base64_message = base64_encoded_data.decode('utf-8')
+        return base64_message
+
+image_path = 'image_data/Gfp-wisconsin-madison-the-nature-boardwalk.jpg'
+encoded_image = encode_image_to_base64(image_path)
+
+```
+
+This example shows how to enter just the base64 encoded image:
+
+```Python
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "What's in this image?"
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": encoded_image,
+                }
+            }
+        ]
+    },
+]
+
+result = client.chat.completions.create(
+    model="llava-1.5-7b-hf",
+    messages=messages
+)
+
+print(json.dumps(
+    result,
+    sort_keys=True,
+    indent=4,
+    separators=(',', ': ')
+))
+```
+
+And this example shows how to use a data URI
+
+```Python
+data_uri = "data:image/png;base64," + encoded_string
+
+messages = [
+    {
+        "role": "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "What's in this image?"
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": data_uri,
+                }
+            }
+        ]
+    },
+]
+
+result = client.chat.completions.create(
+    model="llava-1.5-7b-hf",
+    messages=messages
+)
+
+print(json.dumps(
+    result,
+    sort_keys=True,
+    indent=4,
+    separators=(',', ': ')
+))
+```
+
+The output of these will be similar to this:
+
+```json
+{
+    "choices": [
+        {
+            "index": 0,
+            "message": {
+                "content": "The image features a beautiful wooden path lined with green grass and a blue sky overhead. The pathway leads towards a body of water, creating a serene atmosphere. Along the path, there is a bench overlooking the pond, inviting to sit and relax. The scene also includes trees in the background, adding to the picturesque scenery.\nWith the combination of the peaceful atmosphere, the sunny blue sky, and the presence of water nearby, this image",
+                "output": null,
+                "role": "assistant"
+            },
+            "status": "success"
+        }
+    ],
+    "created": 1722545890,
+    "id": "chat-xX9FDkWknG8G0ZHQjCgNdp47uBQZy",
+    "model": "llava-1.5-7b-hf",
+    "object": "chat_completion"
+}
+```
diff --git a/fern/docs/pages/usingllms/embeddings.mdx b/fern/docs/pages/usingllms/embeddings.mdx
@@ -0,0 +1,142 @@
+# Embeddings endpoint
+
+At Prediction Guard, we offer an embedding endpoint capable of generating embeddings for both text and images. This feature is particularly useful when you want to load embeddings into a vector database for performing semantically similar searches etc.
+
+The Bridgetower model is a cross-modal encoder that handles both images and text. Here is a simple illustration of how to make a call to the embeddings endpoint with both image and text inputs. This endpoint accepts image URL, local image files, data URIs, and base64 encoded image strings as input.
+
+## Embeddings for text and image
+
+```Python
+import os
+import json
+
+from predictionguard import PredictionGuard
+
+# Set your Prediction Guard token as an environmental variable.
+os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+
+client = PredictionGuard()
+
+response = client.embeddings.create(
+  model="bridgetower-large-itm-mlm-itc",
+  input=[
+    {
+        "text": "Cool skateboarding tricks you can try this summer",
+        "image": "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg"
+    }
+  ]
+)
+
+print(json.dumps(
+    response,
+    sort_keys=True,
+    indent=4,
+    separators=(',', ': ')
+))
+```
+
+This will yield a json object with the embedding.
+
+## Embeddings for text only 
+
+```Python
+import os
+import json
+
+from predictionguard import PredictionGuard
+
+# Set your Prediction Guard token as an environmental variable.
+os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+
+client = PredictionGuard()
+
+response = client.embeddings.create(
+  model="bridgetower-large-itm-mlm-itc",
+  input=[
+    {
+        "text": "Tell me a joke.",
+    }
+  ]
+)
+
+print(json.dumps(
+    response,
+    sort_keys=True,
+    indent=4,
+    separators=(',', ': ')
+))
+```
+
+## Embeddings for Image only
+
+```Python
+import os
+import json
+
+from predictionguard import PredictionGuard
+
+# Set your Prediction Guard token as an environmental variable.
+os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+
+client = PredictionGuard()
+
+response = client.embeddings.create(
+  model="bridgetower-large-itm-mlm-itc",
+  input=[
+    {
+         "image": "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg",
+    }
+  ]
+)
+
+print(json.dumps(
+    response,
+    sort_keys=True,
+    indent=4,
+    separators=(',', ': ')
+))
+```
+
+Once we have computed the embeddings, we can use them to calculate the similarity between two embeddings. First, we compute the embeddings using the PG API. Then, we convert the embeddings into tensors and pass them to a function that calculates the cosine similarity between the images.
+
+```Python
+import os
+import json
+from predictionguard import PredictionGuard
+import torch
+import numpy
+os.environ["PREDICTIONGUARD_API_KEY"] = "<api key>"
+client = PredictionGuard()
+
+response1 = client.embeddings.create(
+  model="bridgetower-large-itm-mlm-itc",
+  input=[
+    {
+         "image": "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg",
+    }
+  ]
+)
+
+response2 = client.embeddings.create(
+  model="bridgetower-large-itm-mlm-itc",
+  input=[
+    {
+         "image": "https://ichef.bbci.co.uk/news/976/cpsprodpb/10A6B/production/_133130286_gettyimages-1446849679.jpg",
+    }
+  ]
+)
+
+embedding1 = response1['data'][0]['embedding']
+embedding2 = response2['data'][0]['embedding']
+
+tensor1 = torch.tensor(embedding1)
+tensor2 = torch.tensor(embedding2)
+
+def compute_scores(emb_one, emb_two):
+    """Computes cosine similarity between two vectors."""
+    scores = torch.nn.functional.cosine_similarity(emb_one.unsqueeze(0), emb_two.unsqueeze(0))
+    return scores.numpy().tolist()
+
+similarity_score = compute_scores(tensor1, tensor2)
+print("Cosine Similarity Score:", similarity_score)
+```