From 68b4f33ad79e9b91d632516c7d817f83695966f0 Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Fri, 8 Nov 2024 10:41:45 -0500
Subject: [PATCH 1/5] adding support for embeddings using predictionguard

---
 .../text_embedding/predictionguard.ipynb      | 428 ++++++++++++++++++
 .../embeddings/__init__.py                    |   3 +
 .../embeddings/predictionguard.py             | 159 +++++++
 .../embeddings/test_predictionguard.py        |  85 ++++
 4 files changed, 675 insertions(+)
 create mode 100644 docs/docs/integrations/text_embedding/predictionguard.ipynb
 create mode 100644 libs/community/langchain_community/embeddings/predictionguard.py
 create mode 100644 libs/community/tests/integration_tests/embeddings/test_predictionguard.py
diff --git a/docs/docs/integrations/text_embedding/predictionguard.ipynb b/docs/docs/integrations/text_embedding/predictionguard.ipynb
new file mode 100644
index 0000000000000..cbb8a2f8f3173
--- /dev/null
+++ b/docs/docs/integrations/text_embedding/predictionguard.ipynb
@@ -0,0 +1,428 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "# PredictionGuardEmbeddings"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": ">[Prediction Guard](https://predictionguard.com) is a secure, scalable GenAI platform that safeguards sensitive data, prevents common AI malfunctions, and runs on affordable hardware."
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Overview"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "### Integration details\n",
+    "This integration shows how to use the Prediction Guard embeddings integration with Langchain. This integration supports text and images, separately or together in matched pairs."
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Setup\n",
+    "To access Prediction Guard models, contact us [here](https://predictionguard.com/get-started) to get a Prediction Guard API key and get started. \n"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "### Credentials\n",
+    "Once you have a key, you can set it with \n"
+   ]
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:59:10.422135Z",
+     "start_time": "2024-10-08T18:59:10.419563Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"PREDICTIONGUARD_API_KEY\"] = \"<Prediction Guard API Key>\""
+   ],
+   "outputs": [],
+   "execution_count": 21
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Installation"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "%pip install --upgrade --quiet predictionguard langchain"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Instantiation"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "First, install the Prediction Guard and LangChain packages. Then, set the required env vars and set up package imports."
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:13:40.463622Z",
+     "start_time": "2024-10-08T18:13:40.240249Z"
+    }
+   },
+   "source": "from langchain_community.embeddings.predictionguard import PredictionGuardEmbeddings",
+   "outputs": [],
+   "execution_count": 1
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:14:14.324100Z",
+     "start_time": "2024-10-08T18:14:13.997521Z"
+    }
+   },
+   "source": [
+    "embeddings = PredictionGuardEmbeddings(model=\"bridgetower-large-itm-mlm-itc\")"
+   ],
+   "outputs": [],
+   "execution_count": 4
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": ""
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Prediction Guard embeddings generation supports both text and images. This integration includes that support spread across various functions."
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "## Indexing and Retrieval"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:14:29.307881Z",
+     "start_time": "2024-10-08T18:14:28.405099Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "# Create a vector store with a sample text\n",
+    "from langchain_core.vectorstores import InMemoryVectorStore\n",
+    "\n",
+    "text = \"LangChain is the framework for building context-aware reasoning applications.\"\n",
+    "\n",
+    "vectorstore = InMemoryVectorStore.from_texts(\n",
+    "    [text],\n",
+    "    embedding=embeddings,\n",
+    ")\n",
+    "\n",
+    "# Use the vectorstore as a retriever\n",
+    "retriever = vectorstore.as_retriever()\n",
+    "\n",
+    "# Retrieve the most similar text\n",
+    "retrieved_documents = retriever.invoke(\"What is LangChain?\")\n",
+    "\n",
+    "# Show the retrieved document's content\n",
+    "retrieved_documents[0].page_content"
+   ],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'LangChain is the framework for building context-aware reasoning applications.'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 6
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Direct Usage\n",
+    "The vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings from the texts used in the `from_texts` and retrieval `invoke` operations.\n",
+    "\n",
+    "These methods can be directly called with the following commands."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "### Embed single texts"
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:16:00.824334Z",
+     "start_time": "2024-10-08T18:16:00.368665Z"
+    }
+   },
+   "source": [
+    "# Embedding a single string\n",
+    "text = \"This is an embedding example.\"\n",
+    "single_vector = embeddings.embed_query(text)\n",
+    "\n",
+    "single_vector[:5]"
+   ],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0.01456777285784483,\n",
+       " -0.08131945133209229,\n",
+       " -0.013045587576925755,\n",
+       " -0.09488929063081741,\n",
+       " -0.003087474964559078]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "execution_count": 14
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Embed multiple texts"
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:16:11.076843Z",
+     "start_time": "2024-10-08T18:16:10.655925Z"
+    }
+   },
+   "source": [
+    "# Embedding multiple strings\n",
+    "docs = [\n",
+    "    \"This is an embedding example.\",\n",
+    "    \"This is another embedding example.\",\n",
+    "]\n",
+    "\n",
+    "two_vectors = embeddings.embed_documents(docs)\n",
+    "\n",
+    "for vector in two_vectors:\n",
+    "    print(vector[:5])"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.01456777285784483, -0.08131945133209229, -0.013045587576925755, -0.09488929063081741, -0.003087474964559078]\n",
+      "[-0.0015021917643025517, -0.08883760124444962, -0.0025286630261689425, -0.1052245944738388, 0.014225339516997337]\n"
+     ]
+    }
+   ],
+   "execution_count": 15
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "### Embed single images"
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:16:44.853569Z",
+     "start_time": "2024-10-08T18:16:43.457282Z"
+    }
+   },
+   "source": [
+    "# Embedding a single image. These functions accept image URLs, image files, data URIs, and base64 encoded strings.\n",
+    "image = [\n",
+    "    \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n",
+    "]\n",
+    "single_vector = embeddings.embed_images(image)\n",
+    "\n",
+    "print(single_vector[0][:5])"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.06482088565826416, -0.026690427213907242, 0.07683052867650986, -0.060580912977457047, 0.0001994583144551143]\n"
+     ]
+    }
+   ],
+   "execution_count": 17
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Embed multiple images"
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:17:02.165077Z",
+     "start_time": "2024-10-08T18:17:00.612485Z"
+    }
+   },
+   "source": [
+    "# Embedding multiple images\n",
+    "images = [\n",
+    "    \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n",
+    "    \"https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg\",\n",
+    "]\n",
+    "\n",
+    "two_vectors = embeddings.embed_images(images)\n",
+    "\n",
+    "for vector in two_vectors:\n",
+    "    print(vector[:5])"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.06482088565826416, -0.026690427213907242, 0.07683052867650986, -0.060580912977457047, 0.0001994583144551143]\n",
+      "[0.0911610797047615, -0.034427884966135025, 0.007927080616354942, -0.03500846028327942, 0.022317267954349518]\n"
+     ]
+    }
+   ],
+   "execution_count": 18
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "### Embed single text-image pairs"
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:17:17.113169Z",
+     "start_time": "2024-10-08T18:17:15.669474Z"
+    }
+   },
+   "source": [
+    "# Embedding a single text-image pair\n",
+    "inputs = [\n",
+    "    {\n",
+    "        \"text\": \"This is an embedding example.\",\n",
+    "        \"image\": \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n",
+    "    },\n",
+    "]\n",
+    "single_vector = embeddings.embed_image_text(inputs)\n",
+    "\n",
+    "print(single_vector[0][:5])"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.025471875444054604, -0.07661919295787811, 0.06256384402513504, -0.06042419373989105, 0.016889123246073723]\n"
+     ]
+    }
+   ],
+   "execution_count": 19
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### Embed multiple text-image pairs"
+  },
+  {
+   "cell_type": "code",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2024-10-08T18:17:31.948434Z",
+     "start_time": "2024-10-08T18:17:30.393415Z"
+    }
+   },
+   "source": [
+    "# Embedding multiple text-image pairs\n",
+    "inputs = [\n",
+    "    {\n",
+    "        \"text\": \"This is an embedding example.\",\n",
+    "        \"image\": \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n",
+    "    },\n",
+    "    {\n",
+    "        \"text\": \"This is another embedding example.\",\n",
+    "        \"image\": \"https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg\",\n",
+    "    },\n",
+    "]\n",
+    "two_vectors = embeddings.embed_image_text(inputs)\n",
+    "\n",
+    "for vector in two_vectors:\n",
+    "    print(vector[:5])"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.025471875444054604, -0.07661919295787811, 0.06256384402513504, -0.06042419373989105, 0.016889123246073723]\n",
+      "[0.026654226705431938, -0.10080841928720474, -0.012732953764498234, -0.04365091398358345, 0.036743905395269394]\n"
+     ]
+    }
+   ],
+   "execution_count": 20
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## API Reference\n",
+    "For detailed documentation of all PredictionGuardEmbeddings features and configurations check out the API reference: https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.predictionguard.PredictionGuardEmbeddings.html"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py
index 38c7d5a76bc1d..9f957f2f6fcb2 100644
--- a/libs/community/langchain_community/embeddings/__init__.py
+++ b/libs/community/langchain_community/embeddings/__init__.py
@@ -188,6 +188,9 @@
     from langchain_community.embeddings.premai import (
         PremAIEmbeddings,
     )
+    from langchain_community.embeddings.predictionguard import (
+        PredictionGuardEmbeddings,
+    )
     from langchain_community.embeddings.sagemaker_endpoint import (
         SagemakerEndpointEmbeddings,
     )
diff --git a/libs/community/langchain_community/embeddings/predictionguard.py b/libs/community/langchain_community/embeddings/predictionguard.py
new file mode 100644
index 0000000000000..04ff8d56c12a0
--- /dev/null
+++ b/libs/community/langchain_community/embeddings/predictionguard.py
@@ -0,0 +1,159 @@
+import logging
+from typing import Any, Dict, List, Optional
+
+from langchain_core.embeddings import Embeddings
+from langchain_core.utils import get_from_dict_or_env
+from pydantic import BaseModel, ConfigDict, model_validator
+
+logger = logging.getLogger(__name__)
+
+
+class PredictionGuardEmbeddings(BaseModel, Embeddings):
+    """Prediction Guard chat models.
+
+    To use, you should have the ``predictionguard`` python package installed, and the
+    environment variable ``PREDICTIONGUARD_API_KEY`` set with your api_key, or pass
+    it as a named parameter to the constructor.
+
+    Example:
+        .. code-block:: python
+
+            embeddings = PredictionGuardEmbeddings(
+                                    model="bridgetower-large-itm-mlm-itc",
+                                    api_key="my-api-key"
+                                    )
+    """
+
+    client: Any = None  #: :meta private:
+    """Prediction Guard Client"""
+
+    model: Optional[str] = "bridgetower-large-itm-mlm-itc"
+    """Model name to use."""
+
+    predictionguard_api_key: Optional[str] = None
+    """Prediction Guard API key."""
+
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+
+    @model_validator(mode="before")
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that the api_key and python package exists in environment."""
+        pg_api_key = get_from_dict_or_env(
+            values, "predictionguard_api_key", "PREDICTIONGUARD_API_KEY"
+        )
+
+        try:
+            from predictionguard import PredictionGuard
+
+            values["client"] = PredictionGuard(
+                api_key=pg_api_key,
+            )
+
+        except ImportError:
+            raise ImportError(
+                "Could not import predictionguard python package. "
+                "Please install it with `pip install predictionguard`."
+            )
+
+        return values
+
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Call out to Prediction Guard's embedding endpoint for embedding documents.
+
+        Args:
+            texts:
+                List of dictionaries containing text inputs.
+
+        Returns:
+            Embeddings for the texts.
+        """
+
+        inputs = []
+        for text in texts:
+            input = {"text": text}
+            inputs.append(input)
+
+        response = self.client.embeddings.create(model=self.model, input=inputs)
+
+        res = []
+        indx = 0
+        for re in response["data"]:
+            if re["index"] == indx:
+                res.append(re["embedding"])
+                indx += 1
+            else:
+                continue
+
+        return res
+
+    def embed_query(self, text: str) -> List[float]:
+        """Call out to Prediction Guard's embedding endpoint for embedding query text.
+
+        Args:
+            text: The text to embed.
+
+        Returns:
+            Embeddings for the text.
+        """
+
+        inputs = [{"text": text}]
+        response = self.client.embeddings.create(model=self.model, input=inputs)
+
+        return response["data"][0]["embedding"]
+
+    def embed_images(self, images: List[str]) -> List[float]:
+        """
+        Call out to Prediction Guard's embedding endpoint for embedding multiple images.
+
+        Args:
+            images: A list of images to embed.
+            Supports image file paths, image URLs, data URIs, and base64 encoded images.
+
+        Returns:
+            Embeddings for the images.
+        """
+
+        inputs = []
+        for image in images:
+            input = {"image": image}
+
+            inputs.append(input)
+
+        response = self.client.embeddings.create(model=self.model, input=inputs)
+
+        res = []
+        indx = 0
+        for re in response["data"]:
+            if re["index"] == indx:
+                res.append(re["embedding"])
+                indx += 1
+            else:
+                continue
+
+        return res
+
+    def embed_image_text(self, inputs: List[Dict[str, str]]) -> List[float]:
+        """
+        Call out to Prediction Guard embeddings for embedding an image and text.
+
+        Args:
+            inputs: A list of dictionaries containing the text and images to embed.
+
+        Returns:
+            Embeddings for the text and images.
+        """
+
+        response = self.client.embeddings.create(model=self.model, input=inputs)
+
+        res = []
+        indx = 0
+        for re in response["data"]:
+            if re["index"] == indx:
+                res.append(re["embedding"])
+                indx += 1
+            else:
+                continue
+
+        return res
\ No newline at end of file
diff --git a/libs/community/tests/integration_tests/embeddings/test_predictionguard.py b/libs/community/tests/integration_tests/embeddings/test_predictionguard.py
new file mode 100644
index 0000000000000..2c43898a65b29
--- /dev/null
+++ b/libs/community/tests/integration_tests/embeddings/test_predictionguard.py
@@ -0,0 +1,85 @@
+"""Test Prediction Guard API wrapper"""
+
+from langchain_community.embeddings.predictionguard import PredictionGuardEmbeddings
+
+
+def test_predictionguard_embeddings_documents() -> None:
+    """Test Prediction Guard embeddings."""
+    embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
+    documents = [
+        "embed this",
+    ]
+    output = embeddings.embed_documents(documents)
+    assert len(output) == 1
+    assert len(output[0]) > 2
+
+
+def test_predictionguard_embeddings_documents_multiple() -> None:
+    """Test Prediction Guard embeddings."""
+    embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
+    documents = [
+        "embed me",
+        "embed this",
+    ]
+    output = embeddings.embed_documents(documents)
+    assert len(output[0]) > 2
+    assert len(output[1]) > 2
+
+
+def test_predictionguard_embeddings_query() -> None:
+    """Test Prediction Guard embeddings."""
+    embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
+    document = "embed this"
+    output = embeddings.embed_query(document)
+    assert len(output) > 2
+
+
+def test_predictionguard_embeddings_images() -> None:
+    """Test Prediction Guard embeddings."""
+    embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
+    image = [
+        "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png",
+    ]
+    output = embeddings.embed_images(image)
+    assert len(output) == 1
+
+
+def test_predictionguard_embeddings_images_multiple() -> None:
+    """Test Prediction Guard embeddings."""
+    embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
+    images = [
+        "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg",
+        "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png",
+    ]
+    output = embeddings.embed_images(images)
+    assert len(output) == 2
+
+
+def test_predictionguard_embeddings_image_text() -> None:
+    """Test Prediction Guard Embeddings"""
+    embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
+    inputs = [
+        {
+            "text": "embed me",
+            "image": "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png",
+        },
+    ]
+    output = embeddings.embed_image_text(inputs)
+    assert len(output) == 1
+
+
+def test_predictionguard_embeddings_image_text_multiple() -> None:
+    """Test Prediction Guard Embeddings"""
+    embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
+    inputs = [
+        {
+            "text": "embed me",
+            "image": "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png",
+        },
+        {
+            "text": "embed this",
+            "image": "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg",
+        },
+    ]
+    output = embeddings.embed_image_text(inputs)
+    assert len(output) == 2
\ No newline at end of file

From 27947903210c7ad0c4a9b8a136b6ee8a42059802 Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Fri, 8 Nov 2024 11:01:37 -0500
Subject: [PATCH 2/5] fixing linting errors

---
 .../langchain_community/embeddings/__init__.py         |  8 +++++---
 .../langchain_community/embeddings/predictionguard.py  |  2 +-
 .../embeddings/test_predictionguard.py                 | 10 +++++-----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py
index 9f957f2f6fcb2..d786a2fc1e7b8 100644
--- a/libs/community/langchain_community/embeddings/__init__.py
+++ b/libs/community/langchain_community/embeddings/__init__.py
@@ -185,12 +185,12 @@
     from langchain_community.embeddings.ovhcloud import (
         OVHCloudEmbeddings,
     )
-    from langchain_community.embeddings.premai import (
-        PremAIEmbeddings,
-    )
     from langchain_community.embeddings.predictionguard import (
         PredictionGuardEmbeddings,
     )
+    from langchain_community.embeddings.premai import (
+        PremAIEmbeddings,
+    )
     from langchain_community.embeddings.sagemaker_endpoint import (
         SagemakerEndpointEmbeddings,
     )
@@ -304,6 +304,7 @@
     "OpenVINOEmbeddings",
     "OracleEmbeddings",
     "OVHCloudEmbeddings",
+    "PredictionGuardEmbeddings",
     "PremAIEmbeddings",
     "QianfanEmbeddingsEndpoint",
     "QuantizedBgeEmbeddings",
@@ -390,6 +391,7 @@
     "QuantizedBiEncoderEmbeddings": "langchain_community.embeddings.optimum_intel",
     "OracleEmbeddings": "langchain_community.embeddings.oracleai",
     "OVHCloudEmbeddings": "langchain_community.embeddings.ovhcloud",
+    "PredictionGuardEmbeddings": "langchain_community.embeddings.predictionguard",
     "SagemakerEndpointEmbeddings": "langchain_community.embeddings.sagemaker_endpoint",
     "SambaStudioEmbeddings": "langchain_community.embeddings.sambanova",
     "SelfHostedEmbeddings": "langchain_community.embeddings.self_hosted",
diff --git a/libs/community/langchain_community/embeddings/predictionguard.py b/libs/community/langchain_community/embeddings/predictionguard.py
index 04ff8d56c12a0..58edecb9adff8 100644
--- a/libs/community/langchain_community/embeddings/predictionguard.py
+++ b/libs/community/langchain_community/embeddings/predictionguard.py
@@ -156,4 +156,4 @@ def embed_image_text(self, inputs: List[Dict[str, str]]) -> List[float]:
             else:
                 continue
 
-        return res
\ No newline at end of file
+        return res
diff --git a/libs/community/tests/integration_tests/embeddings/test_predictionguard.py b/libs/community/tests/integration_tests/embeddings/test_predictionguard.py
index 2c43898a65b29..419923a975d26 100644
--- a/libs/community/tests/integration_tests/embeddings/test_predictionguard.py
+++ b/libs/community/tests/integration_tests/embeddings/test_predictionguard.py
@@ -38,7 +38,7 @@ def test_predictionguard_embeddings_images() -> None:
     """Test Prediction Guard embeddings."""
     embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
     image = [
-        "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png",
+        "https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI",
     ]
     output = embeddings.embed_images(image)
     assert len(output) == 1
@@ -49,7 +49,7 @@ def test_predictionguard_embeddings_images_multiple() -> None:
     embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
     images = [
         "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg",
-        "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png",
+        "https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI",
     ]
     output = embeddings.embed_images(images)
     assert len(output) == 2
@@ -61,7 +61,7 @@ def test_predictionguard_embeddings_image_text() -> None:
     inputs = [
         {
             "text": "embed me",
-            "image": "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png",
+            "image": "https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI",
         },
     ]
     output = embeddings.embed_image_text(inputs)
@@ -74,7 +74,7 @@ def test_predictionguard_embeddings_image_text_multiple() -> None:
     inputs = [
         {
             "text": "embed me",
-            "image": "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png",
+            "image": "https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI",
         },
         {
             "text": "embed this",
@@ -82,4 +82,4 @@ def test_predictionguard_embeddings_image_text_multiple() -> None:
         },
     ]
     output = embeddings.embed_image_text(inputs)
-    assert len(output) == 2
\ No newline at end of file
+    assert len(output) == 2

From 3cb093139a1218c4b4fb5dc8feeef6ccd1ab9d2c Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Fri, 8 Nov 2024 11:06:12 -0500
Subject: [PATCH 3/5] adding new import to import tests

---
 libs/community/tests/unit_tests/embeddings/test_imports.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libs/community/tests/unit_tests/embeddings/test_imports.py b/libs/community/tests/unit_tests/embeddings/test_imports.py
index a6f26ce0c3fa6..1642f70ab1769 100644
--- a/libs/community/tests/unit_tests/embeddings/test_imports.py
+++ b/libs/community/tests/unit_tests/embeddings/test_imports.py
@@ -82,6 +82,7 @@
     "AscendEmbeddings",
     "ZhipuAIEmbeddings",
     "TextEmbedEmbeddings",
+    "PredictionGuardEmbeddings",
 ]
 
 

From aa040a00eae18b9f9f0b19b6955316ed3f234e7c Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Fri, 8 Nov 2024 11:22:13 -0500
Subject: [PATCH 4/5] fixing images in embeddings notebook

---
 .../text_embedding/predictionguard.ipynb      | 82 +++++++++----------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/docs/docs/integrations/text_embedding/predictionguard.ipynb b/docs/docs/integrations/text_embedding/predictionguard.ipynb
index cbb8a2f8f3173..b09d5bd924c1b 100644
--- a/docs/docs/integrations/text_embedding/predictionguard.ipynb
+++ b/docs/docs/integrations/text_embedding/predictionguard.ipynb
@@ -42,18 +42,18 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:59:10.422135Z",
-     "start_time": "2024-10-08T18:59:10.419563Z"
+     "end_time": "2024-11-08T16:20:01.598574Z",
+     "start_time": "2024-11-08T16:20:01.595887Z"
     }
    },
    "cell_type": "code",
    "source": [
     "import os\n",
     "\n",
-    "os.environ[\"PREDICTIONGUARD_API_KEY\"] = \"<Prediction Guard API Key>\""
+    "os.environ[\"PREDICTIONGUARD_API_KEY\"] = \"<Prediction Guard API Key\""
    ],
    "outputs": [],
-   "execution_count": 21
+   "execution_count": 1
   },
   {
    "metadata": {},
@@ -81,27 +81,27 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:13:40.463622Z",
-     "start_time": "2024-10-08T18:13:40.240249Z"
+     "end_time": "2024-11-08T16:20:05.912657Z",
+     "start_time": "2024-11-08T16:20:05.679414Z"
     }
    },
    "source": "from langchain_community.embeddings.predictionguard import PredictionGuardEmbeddings",
    "outputs": [],
-   "execution_count": 1
+   "execution_count": 2
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:14:14.324100Z",
-     "start_time": "2024-10-08T18:14:13.997521Z"
+     "end_time": "2024-11-08T16:20:08.538960Z",
+     "start_time": "2024-11-08T16:20:08.164922Z"
     }
    },
    "source": [
     "embeddings = PredictionGuardEmbeddings(model=\"bridgetower-large-itm-mlm-itc\")"
    ],
    "outputs": [],
-   "execution_count": 4
+   "execution_count": 3
   },
   {
    "cell_type": "markdown",
@@ -123,8 +123,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:14:29.307881Z",
-     "start_time": "2024-10-08T18:14:28.405099Z"
+     "end_time": "2024-11-08T16:21:11.729799Z",
+     "start_time": "2024-11-08T16:21:10.518236Z"
     }
    },
    "cell_type": "code",
@@ -155,12 +155,12 @@
        "'LangChain is the framework for building context-aware reasoning applications.'"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 6
+   "execution_count": 5
   },
   {
    "metadata": {},
@@ -181,8 +181,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:16:00.824334Z",
-     "start_time": "2024-10-08T18:16:00.368665Z"
+     "end_time": "2024-11-08T16:21:16.331585Z",
+     "start_time": "2024-11-08T16:21:15.918706Z"
     }
    },
    "source": [
@@ -203,12 +203,12 @@
        " -0.003087474964559078]"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 14
+   "execution_count": 6
   },
   {
    "metadata": {},
@@ -219,8 +219,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:16:11.076843Z",
-     "start_time": "2024-10-08T18:16:10.655925Z"
+     "end_time": "2024-11-08T16:21:18.619883Z",
+     "start_time": "2024-11-08T16:21:18.200337Z"
     }
    },
    "source": [
@@ -245,7 +245,7 @@
      ]
     }
    ],
-   "execution_count": 15
+   "execution_count": 7
   },
   {
    "cell_type": "markdown",
@@ -256,14 +256,14 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:16:44.853569Z",
-     "start_time": "2024-10-08T18:16:43.457282Z"
+     "end_time": "2024-11-08T16:21:20.599812Z",
+     "start_time": "2024-11-08T16:21:19.881001Z"
     }
    },
    "source": [
     "# Embedding a single image. These functions accept image URLs, image files, data URIs, and base64 encoded strings.\n",
     "image = [\n",
-    "    \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n",
+    "    \"https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg\",\n",
     "]\n",
     "single_vector = embeddings.embed_images(image)\n",
     "\n",
@@ -274,11 +274,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[0.06482088565826416, -0.026690427213907242, 0.07683052867650986, -0.060580912977457047, 0.0001994583144551143]\n"
+      "[0.0911610797047615, -0.034427884966135025, 0.007927080616354942, -0.03500846028327942, 0.022317267954349518]\n"
      ]
     }
    ],
-   "execution_count": 17
+   "execution_count": 8
   },
   {
    "metadata": {},
@@ -289,14 +289,14 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:17:02.165077Z",
-     "start_time": "2024-10-08T18:17:00.612485Z"
+     "end_time": "2024-11-08T16:21:22.805707Z",
+     "start_time": "2024-11-08T16:21:22.068759Z"
     }
    },
    "source": [
     "# Embedding multiple images\n",
     "images = [\n",
-    "    \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n",
+    "    \"https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI\",\n",
     "    \"https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg\",\n",
     "]\n",
     "\n",
@@ -310,12 +310,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[0.06482088565826416, -0.026690427213907242, 0.07683052867650986, -0.060580912977457047, 0.0001994583144551143]\n",
+      "[0.1593627631664276, -0.03636132553219795, -0.013229663483798504, -0.08789524435997009, 0.062290553003549576]\n",
       "[0.0911610797047615, -0.034427884966135025, 0.007927080616354942, -0.03500846028327942, 0.022317267954349518]\n"
      ]
     }
    ],
-   "execution_count": 18
+   "execution_count": 9
   },
   {
    "cell_type": "markdown",
@@ -326,8 +326,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:17:17.113169Z",
-     "start_time": "2024-10-08T18:17:15.669474Z"
+     "end_time": "2024-11-08T16:21:24.925186Z",
+     "start_time": "2024-11-08T16:21:24.215510Z"
     }
    },
    "source": [
@@ -335,7 +335,7 @@
     "inputs = [\n",
     "    {\n",
     "        \"text\": \"This is an embedding example.\",\n",
-    "        \"image\": \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n",
+    "        \"image\": \"https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg\",\n",
     "    },\n",
     "]\n",
     "single_vector = embeddings.embed_image_text(inputs)\n",
@@ -347,11 +347,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[0.025471875444054604, -0.07661919295787811, 0.06256384402513504, -0.06042419373989105, 0.016889123246073723]\n"
+      "[0.0363212488591671, -0.10172265768051147, -0.014760786667466164, -0.046511903405189514, 0.03860781341791153]\n"
      ]
     }
    ],
-   "execution_count": 19
+   "execution_count": 10
   },
   {
    "metadata": {},
@@ -362,8 +362,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-10-08T18:17:31.948434Z",
-     "start_time": "2024-10-08T18:17:30.393415Z"
+     "end_time": "2024-11-08T16:21:26.869820Z",
+     "start_time": "2024-11-08T16:21:26.133863Z"
     }
    },
    "source": [
@@ -371,7 +371,7 @@
     "inputs = [\n",
     "    {\n",
     "        \"text\": \"This is an embedding example.\",\n",
-    "        \"image\": \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n",
+    "        \"image\": \"https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI\",\n",
     "    },\n",
     "    {\n",
     "        \"text\": \"This is another embedding example.\",\n",
@@ -388,12 +388,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[0.025471875444054604, -0.07661919295787811, 0.06256384402513504, -0.06042419373989105, 0.016889123246073723]\n",
+      "[0.11867266893386841, -0.05898813530802727, -0.026179173961281776, -0.10747235268354416, 0.07684746384620667]\n",
       "[0.026654226705431938, -0.10080841928720474, -0.012732953764498234, -0.04365091398358345, 0.036743905395269394]\n"
      ]
     }
    ],
-   "execution_count": 20
+   "execution_count": 11
   },
   {
    "metadata": {},

From c0d1f8c1a9199e146fbc088e5d4d0b1efc28e98f Mon Sep 17 00:00:00 2001
From: jmansdorfer <jmansdorfer19@gmail.com>
Date: Fri, 8 Nov 2024 11:25:56 -0500
Subject: [PATCH 5/5] updating provider doc

---
 .../providers/predictionguard.mdx             | 109 ++++++------------
 1 file changed, 33 insertions(+), 76 deletions(-)

diff --git a/docs/docs/integrations/providers/predictionguard.mdx b/docs/docs/integrations/providers/predictionguard.mdx
index 5e01eeef14dbe..542c20d077e42 100644
--- a/docs/docs/integrations/providers/predictionguard.mdx
+++ b/docs/docs/integrations/providers/predictionguard.mdx
@@ -4,99 +4,56 @@ This page covers how to use the Prediction Guard ecosystem within LangChain.
 It is broken into two parts: installation and setup, and then references to specific Prediction Guard wrappers.
 
 ## Installation and Setup
-- Install the Python SDK with `pip install predictionguard`
-- Get a Prediction Guard access token (as described [here](https://docs.predictionguard.com/)) and set it as an environment variable (`PREDICTIONGUARD_TOKEN`)
 
-## LLM Wrapper
-
-There exists a Prediction Guard LLM wrapper, which you can access with 
-```python
-from langchain_community.llms import PredictionGuard
+- Install the Python SDK:
 ```
-
-You can provide the name of the Prediction Guard model as an argument when initializing the LLM:
-```python
-pgllm = PredictionGuard(model="MPT-7B-Instruct")
+pip install predictionguard
 ```
 
-You can also provide your access token directly as an argument:
-```python
-pgllm = PredictionGuard(model="MPT-7B-Instruct", token="<your access token>")
-```
+- Get a Prediction Guard API key (as described [here](https://docs.predictionguard.com/)) and set it as an environment variable (`PREDICTIONGUARD_API_KEY`)
 
-Finally, you can provide an "output" argument that is used to structure/ control the output of the LLM:
-```python
-pgllm = PredictionGuard(model="MPT-7B-Instruct", output={"type": "boolean"})
-```
+## Prediction Guard Langchain Integrations
+|API|Description|Endpoint Docs|Import|Example Usage|
+|---|---|---|---|---|
+|Completions|Generate Text|[Completions](https://docs.predictionguard.com/api-reference/api-reference/completions)|`from langchain_community.llms.predictionguard import PredictionGuard`|[predictionguard.ipynb](/docs/integrations/llms/predictionguard)|
+|Text Embedding|Embed String to Vectores|[Embeddings](https://docs.predictionguard.com/api-reference/api-reference/embeddings)|`from langchain_community.embeddings.predictionguard import PredictionGuardEmbeddings`|[predictionguard.ipynb](/docs/integrations/text_embedding/predictionguard)|
 
-## Example usage
+## Getting Started
 
-Basic usage of the controlled or guarded LLM wrapper:
-```python
-import os
+## Embedding Models
 
-import predictionguard as pg
-from langchain_community.llms import PredictionGuard
-from langchain_core.prompts import PromptTemplate
-from langchain.chains import LLMChain
-
-# Your Prediction Guard API key. Get one at predictionguard.com
-os.environ["PREDICTIONGUARD_TOKEN"] = "<your Prediction Guard access token>"
-
-# Define a prompt template
-template = """Respond to the following query based on the context.
-
-Context: EVERY comment, DM + email suggestion has led us to this EXCITING announcement! 🎉 We have officially added TWO new candle subscription box options! 📦
-Exclusive Candle Box - $80 
-Monthly Candle Box - $45 (NEW!)
-Scent of The Month Box - $28 (NEW!)
-Head to stories to get ALL the deets on each box! 👆 BONUS: Save 50% on your first box with code 50OFF! 🎉
-
-Query: {query}
-
-Result: """
-prompt = PromptTemplate.from_template(template)
-
-# With "guarding" or controlling the output of the LLM. See the 
-# Prediction Guard docs (https://docs.predictionguard.com) to learn how to 
-# control the output with integer, float, boolean, JSON, and other types and
-# structures.
-pgllm = PredictionGuard(model="MPT-7B-Instruct", 
-                        output={
-                                "type": "categorical",
-                                "categories": [
-                                    "product announcement", 
-                                    "apology", 
-                                    "relational"
-                                    ]
-                                })
-pgllm(prompt.format(query="What kind of post is this?"))
+### Prediction Guard Embeddings
+
+See a [usage example](/docs/integrations/text_embedding/predictionguard)
+
+```python
+from langchain_community.embeddings.predictionguard
 ```
 
-Basic LLM Chaining with the Prediction Guard wrapper:
+#### Usage
 ```python
-import os
+# If predictionguard_api_key is not passed, default behavior is to use the `PREDICTIONGUARD_API_KEY` environment variable.
+embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc")
 
-from langchain_core.prompts import PromptTemplate
-from langchain.chains import LLMChain
-from langchain_community.llms import PredictionGuard
+text = "This is an embedding example."
+output = embeddings.embed_query(text)
+```
 
-# Optional, add your OpenAI API Key. This is optional, as Prediction Guard allows
-# you to access all the latest open access models (see https://docs.predictionguard.com)
-os.environ["OPENAI_API_KEY"] = "<your OpenAI api key>"
 
-# Your Prediction Guard API key. Get one at predictionguard.com
-os.environ["PREDICTIONGUARD_TOKEN"] = "<your Prediction Guard access token>"
 
-pgllm = PredictionGuard(model="OpenAI-gpt-3.5-turbo-instruct")
+## LLMs
+### Prediction Guard LLM
 
-template = """Question: {question}
+See a [usage example](/docs/integrations/llms/predictionguard)
 
-Answer: Let's think step by step."""
-prompt = PromptTemplate.from_template(template)
-llm_chain = LLMChain(prompt=prompt, llm=pgllm, verbose=True)
+```python
+from langchain_community.llms import PredictionGuard
+```
 
-question = "What NFL team won the Super Bowl in the year Justin Beiber was born?"
+#### Usage
+```python
+# If predictionguard_api_key is not passed, default behavior is to use the `PREDICTIONGUARD_API_KEY` environment variable.
+llm = PredictionGuard(model="Hermes-2-Pro-Llama-3-8B")
 
-llm_chain.predict(question=question)
+llm.invoke("Tell me a joke about bears")
 ```