From 68b4f33ad79e9b91d632516c7d817f83695966f0 Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Fri, 8 Nov 2024 10:41:45 -0500 Subject: [PATCH 1/5] adding support for embeddings using predictionguard --- .../text_embedding/predictionguard.ipynb | 428 ++++++++++++++++++ .../embeddings/__init__.py | 3 + .../embeddings/predictionguard.py | 159 +++++++ .../embeddings/test_predictionguard.py | 85 ++++ 4 files changed, 675 insertions(+) create mode 100644 docs/docs/integrations/text_embedding/predictionguard.ipynb create mode 100644 libs/community/langchain_community/embeddings/predictionguard.py create mode 100644 libs/community/tests/integration_tests/embeddings/test_predictionguard.py diff --git a/docs/docs/integrations/text_embedding/predictionguard.ipynb b/docs/docs/integrations/text_embedding/predictionguard.ipynb new file mode 100644 index 0000000000000..cbb8a2f8f3173 --- /dev/null +++ b/docs/docs/integrations/text_embedding/predictionguard.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": "# PredictionGuardEmbeddings" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": ">[Prediction Guard](https://predictionguard.com) is a secure, scalable GenAI platform that safeguards sensitive data, prevents common AI malfunctions, and runs on affordable hardware." + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Overview" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Integration details\n", + "This integration shows how to use the Prediction Guard embeddings integration with Langchain. This integration supports text and images, separately or together in matched pairs." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Setup\n", + "To access Prediction Guard models, contact us [here](https://predictionguard.com/get-started) to get a Prediction Guard API key and get started. \n" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Credentials\n", + "Once you have a key, you can set it with \n" + ] + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:59:10.422135Z", + "start_time": "2024-10-08T18:59:10.419563Z" + } + }, + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "os.environ[\"PREDICTIONGUARD_API_KEY\"] = \"\"" + ], + "outputs": [], + "execution_count": 21 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Installation" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "%pip install --upgrade --quiet predictionguard langchain" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Instantiation" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "First, install the Prediction Guard and LangChain packages. Then, set the required env vars and set up package imports." + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:13:40.463622Z", + "start_time": "2024-10-08T18:13:40.240249Z" + } + }, + "source": "from langchain_community.embeddings.predictionguard import PredictionGuardEmbeddings", + "outputs": [], + "execution_count": 1 + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:14:14.324100Z", + "start_time": "2024-10-08T18:14:13.997521Z" + } + }, + "source": [ + "embeddings = PredictionGuardEmbeddings(model=\"bridgetower-large-itm-mlm-itc\")" + ], + "outputs": [], + "execution_count": 4 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "" + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prediction Guard embeddings generation supports both text and images. This integration includes that support spread across various functions." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Indexing and Retrieval" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:14:29.307881Z", + "start_time": "2024-10-08T18:14:28.405099Z" + } + }, + "cell_type": "code", + "source": [ + "# Create a vector store with a sample text\n", + "from langchain_core.vectorstores import InMemoryVectorStore\n", + "\n", + "text = \"LangChain is the framework for building context-aware reasoning applications.\"\n", + "\n", + "vectorstore = InMemoryVectorStore.from_texts(\n", + " [text],\n", + " embedding=embeddings,\n", + ")\n", + "\n", + "# Use the vectorstore as a retriever\n", + "retriever = vectorstore.as_retriever()\n", + "\n", + "# Retrieve the most similar text\n", + "retrieved_documents = retriever.invoke(\"What is LangChain?\")\n", + "\n", + "# Show the retrieved document's content\n", + "retrieved_documents[0].page_content" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "'LangChain is the framework for building context-aware reasoning applications.'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Direct Usage\n", + "The vectorstore and retriever implementations are calling `embeddings.embed_documents(...)` and `embeddings.embed_query(...)` to create embeddings from the texts used in the `from_texts` and retrieval `invoke` operations.\n", + "\n", + "These methods can be directly called with the following commands." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "### Embed single texts" + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:16:00.824334Z", + "start_time": "2024-10-08T18:16:00.368665Z" + } + }, + "source": [ + "# Embedding a single string\n", + "text = \"This is an embedding example.\"\n", + "single_vector = embeddings.embed_query(text)\n", + "\n", + "single_vector[:5]" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "[0.01456777285784483,\n", + " -0.08131945133209229,\n", + " -0.013045587576925755,\n", + " -0.09488929063081741,\n", + " -0.003087474964559078]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 14 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Embed multiple texts" + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:16:11.076843Z", + "start_time": "2024-10-08T18:16:10.655925Z" + } + }, + "source": [ + "# Embedding multiple strings\n", + "docs = [\n", + " \"This is an embedding example.\",\n", + " \"This is another embedding example.\",\n", + "]\n", + "\n", + "two_vectors = embeddings.embed_documents(docs)\n", + "\n", + "for vector in two_vectors:\n", + " print(vector[:5])" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.01456777285784483, -0.08131945133209229, -0.013045587576925755, -0.09488929063081741, -0.003087474964559078]\n", + "[-0.0015021917643025517, -0.08883760124444962, -0.0025286630261689425, -0.1052245944738388, 0.014225339516997337]\n" + ] + } + ], + "execution_count": 15 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "### Embed single images" + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:16:44.853569Z", + "start_time": "2024-10-08T18:16:43.457282Z" + } + }, + "source": [ + "# Embedding a single image. These functions accept image URLs, image files, data URIs, and base64 encoded strings.\n", + "image = [\n", + " \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n", + "]\n", + "single_vector = embeddings.embed_images(image)\n", + "\n", + "print(single_vector[0][:5])" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.06482088565826416, -0.026690427213907242, 0.07683052867650986, -0.060580912977457047, 0.0001994583144551143]\n" + ] + } + ], + "execution_count": 17 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Embed multiple images" + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:17:02.165077Z", + "start_time": "2024-10-08T18:17:00.612485Z" + } + }, + "source": [ + "# Embedding multiple images\n", + "images = [\n", + " \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n", + " \"https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg\",\n", + "]\n", + "\n", + "two_vectors = embeddings.embed_images(images)\n", + "\n", + "for vector in two_vectors:\n", + " print(vector[:5])" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.06482088565826416, -0.026690427213907242, 0.07683052867650986, -0.060580912977457047, 0.0001994583144551143]\n", + "[0.0911610797047615, -0.034427884966135025, 0.007927080616354942, -0.03500846028327942, 0.022317267954349518]\n" + ] + } + ], + "execution_count": 18 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "### Embed single text-image pairs" + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:17:17.113169Z", + "start_time": "2024-10-08T18:17:15.669474Z" + } + }, + "source": [ + "# Embedding a single text-image pair\n", + "inputs = [\n", + " {\n", + " \"text\": \"This is an embedding example.\",\n", + " \"image\": \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n", + " },\n", + "]\n", + "single_vector = embeddings.embed_image_text(inputs)\n", + "\n", + "print(single_vector[0][:5])" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.025471875444054604, -0.07661919295787811, 0.06256384402513504, -0.06042419373989105, 0.016889123246073723]\n" + ] + } + ], + "execution_count": 19 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Embed multiple text-image pairs" + }, + { + "cell_type": "code", + "metadata": { + "ExecuteTime": { + "end_time": "2024-10-08T18:17:31.948434Z", + "start_time": "2024-10-08T18:17:30.393415Z" + } + }, + "source": [ + "# Embedding multiple text-image pairs\n", + "inputs = [\n", + " {\n", + " \"text\": \"This is an embedding example.\",\n", + " \"image\": \"https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png\",\n", + " },\n", + " {\n", + " \"text\": \"This is another embedding example.\",\n", + " \"image\": \"https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg\",\n", + " },\n", + "]\n", + "two_vectors = embeddings.embed_image_text(inputs)\n", + "\n", + "for vector in two_vectors:\n", + " print(vector[:5])" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.025471875444054604, -0.07661919295787811, 0.06256384402513504, -0.06042419373989105, 0.016889123246073723]\n", + "[0.026654226705431938, -0.10080841928720474, -0.012732953764498234, -0.04365091398358345, 0.036743905395269394]\n" + ] + } + ], + "execution_count": 20 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## API Reference\n", + "For detailed documentation of all PredictionGuardEmbeddings features and configurations check out the API reference: https://python.langchain.com/api_reference/community/embeddings/langchain_community.embeddings.predictionguard.PredictionGuardEmbeddings.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index 38c7d5a76bc1d..9f957f2f6fcb2 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -188,6 +188,9 @@ from langchain_community.embeddings.premai import ( PremAIEmbeddings, ) + from langchain_community.embeddings.predictionguard import ( + PredictionGuardEmbeddings, + ) from langchain_community.embeddings.sagemaker_endpoint import ( SagemakerEndpointEmbeddings, ) diff --git a/libs/community/langchain_community/embeddings/predictionguard.py b/libs/community/langchain_community/embeddings/predictionguard.py new file mode 100644 index 0000000000000..04ff8d56c12a0 --- /dev/null +++ b/libs/community/langchain_community/embeddings/predictionguard.py @@ -0,0 +1,159 @@ +import logging +from typing import Any, Dict, List, Optional + +from langchain_core.embeddings import Embeddings +from langchain_core.utils import get_from_dict_or_env +from pydantic import BaseModel, ConfigDict, model_validator + +logger = logging.getLogger(__name__) + + +class PredictionGuardEmbeddings(BaseModel, Embeddings): + """Prediction Guard chat models. + + To use, you should have the ``predictionguard`` python package installed, and the + environment variable ``PREDICTIONGUARD_API_KEY`` set with your api_key, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + + embeddings = PredictionGuardEmbeddings( + model="bridgetower-large-itm-mlm-itc", + api_key="my-api-key" + ) + """ + + client: Any = None #: :meta private: + """Prediction Guard Client""" + + model: Optional[str] = "bridgetower-large-itm-mlm-itc" + """Model name to use.""" + + predictionguard_api_key: Optional[str] = None + """Prediction Guard API key.""" + + model_config = ConfigDict( + extra="forbid", + ) + + @model_validator(mode="before") + def validate_environment(cls, values: Dict) -> Dict: + """Validate that the api_key and python package exists in environment.""" + pg_api_key = get_from_dict_or_env( + values, "predictionguard_api_key", "PREDICTIONGUARD_API_KEY" + ) + + try: + from predictionguard import PredictionGuard + + values["client"] = PredictionGuard( + api_key=pg_api_key, + ) + + except ImportError: + raise ImportError( + "Could not import predictionguard python package. " + "Please install it with `pip install predictionguard`." + ) + + return values + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Call out to Prediction Guard's embedding endpoint for embedding documents. + + Args: + texts: + List of dictionaries containing text inputs. + + Returns: + Embeddings for the texts. + """ + + inputs = [] + for text in texts: + input = {"text": text} + inputs.append(input) + + response = self.client.embeddings.create(model=self.model, input=inputs) + + res = [] + indx = 0 + for re in response["data"]: + if re["index"] == indx: + res.append(re["embedding"]) + indx += 1 + else: + continue + + return res + + def embed_query(self, text: str) -> List[float]: + """Call out to Prediction Guard's embedding endpoint for embedding query text. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + + inputs = [{"text": text}] + response = self.client.embeddings.create(model=self.model, input=inputs) + + return response["data"][0]["embedding"] + + def embed_images(self, images: List[str]) -> List[float]: + """ + Call out to Prediction Guard's embedding endpoint for embedding multiple images. + + Args: + images: A list of images to embed. + Supports image file paths, image URLs, data URIs, and base64 encoded images. + + Returns: + Embeddings for the images. + """ + + inputs = [] + for image in images: + input = {"image": image} + + inputs.append(input) + + response = self.client.embeddings.create(model=self.model, input=inputs) + + res = [] + indx = 0 + for re in response["data"]: + if re["index"] == indx: + res.append(re["embedding"]) + indx += 1 + else: + continue + + return res + + def embed_image_text(self, inputs: List[Dict[str, str]]) -> List[float]: + """ + Call out to Prediction Guard embeddings for embedding an image and text. + + Args: + inputs: A list of dictionaries containing the text and images to embed. + + Returns: + Embeddings for the text and images. + """ + + response = self.client.embeddings.create(model=self.model, input=inputs) + + res = [] + indx = 0 + for re in response["data"]: + if re["index"] == indx: + res.append(re["embedding"]) + indx += 1 + else: + continue + + return res \ No newline at end of file diff --git a/libs/community/tests/integration_tests/embeddings/test_predictionguard.py b/libs/community/tests/integration_tests/embeddings/test_predictionguard.py new file mode 100644 index 0000000000000..2c43898a65b29 --- /dev/null +++ b/libs/community/tests/integration_tests/embeddings/test_predictionguard.py @@ -0,0 +1,85 @@ +"""Test Prediction Guard API wrapper""" + +from langchain_community.embeddings.predictionguard import PredictionGuardEmbeddings + + +def test_predictionguard_embeddings_documents() -> None: + """Test Prediction Guard embeddings.""" + embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") + documents = [ + "embed this", + ] + output = embeddings.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) > 2 + + +def test_predictionguard_embeddings_documents_multiple() -> None: + """Test Prediction Guard embeddings.""" + embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") + documents = [ + "embed me", + "embed this", + ] + output = embeddings.embed_documents(documents) + assert len(output[0]) > 2 + assert len(output[1]) > 2 + + +def test_predictionguard_embeddings_query() -> None: + """Test Prediction Guard embeddings.""" + embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") + document = "embed this" + output = embeddings.embed_query(document) + assert len(output) > 2 + + +def test_predictionguard_embeddings_images() -> None: + """Test Prediction Guard embeddings.""" + embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") + image = [ + "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png", + ] + output = embeddings.embed_images(image) + assert len(output) == 1 + + +def test_predictionguard_embeddings_images_multiple() -> None: + """Test Prediction Guard embeddings.""" + embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") + images = [ + "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg", + "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png", + ] + output = embeddings.embed_images(images) + assert len(output) == 2 + + +def test_predictionguard_embeddings_image_text() -> None: + """Test Prediction Guard Embeddings""" + embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") + inputs = [ + { + "text": "embed me", + "image": "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png", + }, + ] + output = embeddings.embed_image_text(inputs) + assert len(output) == 1 + + +def test_predictionguard_embeddings_image_text_multiple() -> None: + """Test Prediction Guard Embeddings""" + embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") + inputs = [ + { + "text": "embed me", + "image": "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png", + }, + { + "text": "embed this", + "image": "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg", + }, + ] + output = embeddings.embed_image_text(inputs) + assert len(output) == 2 \ No newline at end of file From 27947903210c7ad0c4a9b8a136b6ee8a42059802 Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Fri, 8 Nov 2024 11:01:37 -0500 Subject: [PATCH 2/5] fixing linting errors --- .../langchain_community/embeddings/__init__.py | 8 +++++--- .../langchain_community/embeddings/predictionguard.py | 2 +- .../embeddings/test_predictionguard.py | 10 +++++----- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index 9f957f2f6fcb2..d786a2fc1e7b8 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -185,12 +185,12 @@ from langchain_community.embeddings.ovhcloud import ( OVHCloudEmbeddings, ) - from langchain_community.embeddings.premai import ( - PremAIEmbeddings, - ) from langchain_community.embeddings.predictionguard import ( PredictionGuardEmbeddings, ) + from langchain_community.embeddings.premai import ( + PremAIEmbeddings, + ) from langchain_community.embeddings.sagemaker_endpoint import ( SagemakerEndpointEmbeddings, ) @@ -304,6 +304,7 @@ "OpenVINOEmbeddings", "OracleEmbeddings", "OVHCloudEmbeddings", + "PredictionGuardEmbeddings", "PremAIEmbeddings", "QianfanEmbeddingsEndpoint", "QuantizedBgeEmbeddings", @@ -390,6 +391,7 @@ "QuantizedBiEncoderEmbeddings": "langchain_community.embeddings.optimum_intel", "OracleEmbeddings": "langchain_community.embeddings.oracleai", "OVHCloudEmbeddings": "langchain_community.embeddings.ovhcloud", + "PredictionGuardEmbeddings": "langchain_community.embeddings.predictionguard", "SagemakerEndpointEmbeddings": "langchain_community.embeddings.sagemaker_endpoint", "SambaStudioEmbeddings": "langchain_community.embeddings.sambanova", "SelfHostedEmbeddings": "langchain_community.embeddings.self_hosted", diff --git a/libs/community/langchain_community/embeddings/predictionguard.py b/libs/community/langchain_community/embeddings/predictionguard.py index 04ff8d56c12a0..58edecb9adff8 100644 --- a/libs/community/langchain_community/embeddings/predictionguard.py +++ b/libs/community/langchain_community/embeddings/predictionguard.py @@ -156,4 +156,4 @@ def embed_image_text(self, inputs: List[Dict[str, str]]) -> List[float]: else: continue - return res \ No newline at end of file + return res diff --git a/libs/community/tests/integration_tests/embeddings/test_predictionguard.py b/libs/community/tests/integration_tests/embeddings/test_predictionguard.py index 2c43898a65b29..419923a975d26 100644 --- a/libs/community/tests/integration_tests/embeddings/test_predictionguard.py +++ b/libs/community/tests/integration_tests/embeddings/test_predictionguard.py @@ -38,7 +38,7 @@ def test_predictionguard_embeddings_images() -> None: """Test Prediction Guard embeddings.""" embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") image = [ - "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png", + "https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI", ] output = embeddings.embed_images(image) assert len(output) == 1 @@ -49,7 +49,7 @@ def test_predictionguard_embeddings_images_multiple() -> None: embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") images = [ "https://farm4.staticflickr.com/3300/3497460990_11dfb95dd1_z.jpg", - "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png", + "https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI", ] output = embeddings.embed_images(images) assert len(output) == 2 @@ -61,7 +61,7 @@ def test_predictionguard_embeddings_image_text() -> None: inputs = [ { "text": "embed me", - "image": "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png", + "image": "https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI", }, ] output = embeddings.embed_image_text(inputs) @@ -74,7 +74,7 @@ def test_predictionguard_embeddings_image_text_multiple() -> None: inputs = [ { "text": "embed me", - "image": "https://pbs.twimg.com/media/GKLN4qPXEAArqoK.png", + "image": "https://fastly.picsum.photos/id/866/200/300.jpg?hmac=rcadCENKh4rD6MAp6V_ma-AyWv641M4iiOpe1RyFHeI", }, { "text": "embed this", @@ -82,4 +82,4 @@ def test_predictionguard_embeddings_image_text_multiple() -> None: }, ] output = embeddings.embed_image_text(inputs) - assert len(output) == 2 \ No newline at end of file + assert len(output) == 2 From 3cb093139a1218c4b4fb5dc8feeef6ccd1ab9d2c Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Fri, 8 Nov 2024 11:06:12 -0500 Subject: [PATCH 3/5] adding new import to import tests --- libs/community/tests/unit_tests/embeddings/test_imports.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/community/tests/unit_tests/embeddings/test_imports.py b/libs/community/tests/unit_tests/embeddings/test_imports.py index a6f26ce0c3fa6..1642f70ab1769 100644 --- a/libs/community/tests/unit_tests/embeddings/test_imports.py +++ b/libs/community/tests/unit_tests/embeddings/test_imports.py @@ -82,6 +82,7 @@ "AscendEmbeddings", "ZhipuAIEmbeddings", "TextEmbedEmbeddings", + "PredictionGuardEmbeddings", ] From aa040a00eae18b9f9f0b19b6955316ed3f234e7c Mon Sep 17 00:00:00 2001 From: jmansdorfer Date: Fri, 8 Nov 2024 11:22:13 -0500 Subject: [PATCH 4/5] fixing images in embeddings notebook --- .../text_embedding/predictionguard.ipynb | 82 +++++++++---------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/docs/docs/integrations/text_embedding/predictionguard.ipynb b/docs/docs/integrations/text_embedding/predictionguard.ipynb index cbb8a2f8f3173..b09d5bd924c1b 100644 --- a/docs/docs/integrations/text_embedding/predictionguard.ipynb +++ b/docs/docs/integrations/text_embedding/predictionguard.ipynb @@ -42,18 +42,18 @@ { "metadata": { "ExecuteTime": { - "end_time": "2024-10-08T18:59:10.422135Z", - "start_time": "2024-10-08T18:59:10.419563Z" + "end_time": "2024-11-08T16:20:01.598574Z", + "start_time": "2024-11-08T16:20:01.595887Z" } }, "cell_type": "code", "source": [ "import os\n", "\n", - "os.environ[\"PREDICTIONGUARD_API_KEY\"] = \"\"" + "os.environ[\"PREDICTIONGUARD_API_KEY\"] = \" Date: Fri, 8 Nov 2024 11:25:56 -0500 Subject: [PATCH 5/5] updating provider doc --- .../providers/predictionguard.mdx | 109 ++++++------------ 1 file changed, 33 insertions(+), 76 deletions(-) diff --git a/docs/docs/integrations/providers/predictionguard.mdx b/docs/docs/integrations/providers/predictionguard.mdx index 5e01eeef14dbe..542c20d077e42 100644 --- a/docs/docs/integrations/providers/predictionguard.mdx +++ b/docs/docs/integrations/providers/predictionguard.mdx @@ -4,99 +4,56 @@ This page covers how to use the Prediction Guard ecosystem within LangChain. It is broken into two parts: installation and setup, and then references to specific Prediction Guard wrappers. ## Installation and Setup -- Install the Python SDK with `pip install predictionguard` -- Get a Prediction Guard access token (as described [here](https://docs.predictionguard.com/)) and set it as an environment variable (`PREDICTIONGUARD_TOKEN`) -## LLM Wrapper - -There exists a Prediction Guard LLM wrapper, which you can access with -```python -from langchain_community.llms import PredictionGuard +- Install the Python SDK: ``` - -You can provide the name of the Prediction Guard model as an argument when initializing the LLM: -```python -pgllm = PredictionGuard(model="MPT-7B-Instruct") +pip install predictionguard ``` -You can also provide your access token directly as an argument: -```python -pgllm = PredictionGuard(model="MPT-7B-Instruct", token="") -``` +- Get a Prediction Guard API key (as described [here](https://docs.predictionguard.com/)) and set it as an environment variable (`PREDICTIONGUARD_API_KEY`) -Finally, you can provide an "output" argument that is used to structure/ control the output of the LLM: -```python -pgllm = PredictionGuard(model="MPT-7B-Instruct", output={"type": "boolean"}) -``` +## Prediction Guard Langchain Integrations +|API|Description|Endpoint Docs|Import|Example Usage| +|---|---|---|---|---| +|Completions|Generate Text|[Completions](https://docs.predictionguard.com/api-reference/api-reference/completions)|`from langchain_community.llms.predictionguard import PredictionGuard`|[predictionguard.ipynb](/docs/integrations/llms/predictionguard)| +|Text Embedding|Embed String to Vectores|[Embeddings](https://docs.predictionguard.com/api-reference/api-reference/embeddings)|`from langchain_community.embeddings.predictionguard import PredictionGuardEmbeddings`|[predictionguard.ipynb](/docs/integrations/text_embedding/predictionguard)| -## Example usage +## Getting Started -Basic usage of the controlled or guarded LLM wrapper: -```python -import os +## Embedding Models -import predictionguard as pg -from langchain_community.llms import PredictionGuard -from langchain_core.prompts import PromptTemplate -from langchain.chains import LLMChain - -# Your Prediction Guard API key. Get one at predictionguard.com -os.environ["PREDICTIONGUARD_TOKEN"] = "" - -# Define a prompt template -template = """Respond to the following query based on the context. - -Context: EVERY comment, DM + email suggestion has led us to this EXCITING announcement! 🎉 We have officially added TWO new candle subscription box options! 📦 -Exclusive Candle Box - $80 -Monthly Candle Box - $45 (NEW!) -Scent of The Month Box - $28 (NEW!) -Head to stories to get ALL the deets on each box! 👆 BONUS: Save 50% on your first box with code 50OFF! 🎉 - -Query: {query} - -Result: """ -prompt = PromptTemplate.from_template(template) - -# With "guarding" or controlling the output of the LLM. See the -# Prediction Guard docs (https://docs.predictionguard.com) to learn how to -# control the output with integer, float, boolean, JSON, and other types and -# structures. -pgllm = PredictionGuard(model="MPT-7B-Instruct", - output={ - "type": "categorical", - "categories": [ - "product announcement", - "apology", - "relational" - ] - }) -pgllm(prompt.format(query="What kind of post is this?")) +### Prediction Guard Embeddings + +See a [usage example](/docs/integrations/text_embedding/predictionguard) + +```python +from langchain_community.embeddings.predictionguard ``` -Basic LLM Chaining with the Prediction Guard wrapper: +#### Usage ```python -import os +# If predictionguard_api_key is not passed, default behavior is to use the `PREDICTIONGUARD_API_KEY` environment variable. +embeddings = PredictionGuardEmbeddings(model="bridgetower-large-itm-mlm-itc") -from langchain_core.prompts import PromptTemplate -from langchain.chains import LLMChain -from langchain_community.llms import PredictionGuard +text = "This is an embedding example." +output = embeddings.embed_query(text) +``` -# Optional, add your OpenAI API Key. This is optional, as Prediction Guard allows -# you to access all the latest open access models (see https://docs.predictionguard.com) -os.environ["OPENAI_API_KEY"] = "" -# Your Prediction Guard API key. Get one at predictionguard.com -os.environ["PREDICTIONGUARD_TOKEN"] = "" -pgllm = PredictionGuard(model="OpenAI-gpt-3.5-turbo-instruct") +## LLMs +### Prediction Guard LLM -template = """Question: {question} +See a [usage example](/docs/integrations/llms/predictionguard) -Answer: Let's think step by step.""" -prompt = PromptTemplate.from_template(template) -llm_chain = LLMChain(prompt=prompt, llm=pgllm, verbose=True) +```python +from langchain_community.llms import PredictionGuard +``` -question = "What NFL team won the Super Bowl in the year Justin Beiber was born?" +#### Usage +```python +# If predictionguard_api_key is not passed, default behavior is to use the `PREDICTIONGUARD_API_KEY` environment variable. +llm = PredictionGuard(model="Hermes-2-Pro-Llama-3-8B") -llm_chain.predict(question=question) +llm.invoke("Tell me a joke about bears") ```