From 43dc5d34163829720ba46d2268db61909e7e5c6c Mon Sep 17 00:00:00 2001 From: Mohammad Mohtashim <45242107+keenborder786@users.noreply.github.com> Date: Mon, 19 Feb 2024 23:09:11 +0500 Subject: [PATCH 01/31] community[patch]: OpenLLM Client Fixes + Added Timeout Parameter (#17478) - OpenLLM was using outdated method to get the final text output from openllm client invocation which was raising the error. Therefore corrected that. - OpenLLM `_identifying_params` was getting the openllm's client configuration using outdated attributes which was raising error. - Updated the docstring for OpenLLM. - Added timeout parameter to be passed to underlying openllm client. --- .../langchain_community/llms/openllm.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libs/community/langchain_community/llms/openllm.py b/libs/community/langchain_community/llms/openllm.py index afb5a18f9ba45..fa3b03e1f98d5 100644 --- a/libs/community/langchain_community/llms/openllm.py +++ b/libs/community/langchain_community/llms/openllm.py @@ -72,7 +72,7 @@ class OpenLLM(LLM): from langchain_community.llms import OpenLLM llm = OpenLLM(server_url='http://localhost:3000') - llm("What is the difference between a duck and a goose?") + llm.invoke("What is the difference between a duck and a goose?") """ model_name: Optional[str] = None @@ -82,6 +82,8 @@ class OpenLLM(LLM): See 'openllm models' for all available model variants.""" server_url: Optional[str] = None """Optional server URL that currently runs a LLMServer with 'openllm start'.""" + timeout: int = 30 + """"Time out for the openllm client""" server_type: ServerType = "http" """Optional server type. Either 'http' or 'grpc'.""" embedded: bool = True @@ -125,6 +127,7 @@ def __init__( *, model_id: Optional[str] = None, server_url: Optional[str] = None, + timeout: int = 30, server_type: Literal["grpc", "http"] = "http", embedded: bool = True, **llm_kwargs: Any, @@ -149,11 +152,12 @@ def __init__( if server_type == "http" else openllm.client.GrpcClient ) - client = client_cls(server_url) + client = client_cls(server_url, timeout) super().__init__( **{ "server_url": server_url, + "timeout": timeout, "server_type": server_type, "llm_kwargs": llm_kwargs, } @@ -217,9 +221,9 @@ def chat(input_text: str): def _identifying_params(self) -> IdentifyingParams: """Get the identifying parameters.""" if self._client is not None: - self.llm_kwargs.update(self._client._config()) - model_name = self._client._metadata()["model_name"] - model_id = self._client._metadata()["model_id"] + self.llm_kwargs.update(self._client._config) + model_name = self._client._metadata.model_dump()["model_name"] + model_id = self._client._metadata.model_dump()["model_id"] else: if self._runner is None: raise ValueError("Runner must be initialized.") @@ -265,9 +269,11 @@ def _call( self._identifying_params["model_name"], **copied ) if self._client: - res = self._client.generate( - prompt, **config.model_dump(flatten=True) - ).responses[0] + res = ( + self._client.generate(prompt, **config.model_dump(flatten=True)) + .outputs[0] + .text + ) else: assert self._runner is not None res = self._runner(prompt, **config.model_dump(flatten=True)) From e92e96193fc3fb263a3379b685ab365123e23ebc Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Mon, 19 Feb 2024 19:11:49 +0100 Subject: [PATCH 02/31] community[minor]: Add async methods to the AstraDB BaseStore (#16872) --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> --- .../langchain_community/storage/astradb.py | 59 ++++++++++-- .../integration_tests/storage/test_astradb.py | 95 ++++++++++++++++--- 2 files changed, 136 insertions(+), 18 deletions(-) diff --git a/libs/community/langchain_community/storage/astradb.py b/libs/community/langchain_community/storage/astradb.py index 0cb2ea310aad2..959ef374124c7 100644 --- a/libs/community/langchain_community/storage/astradb.py +++ b/libs/community/langchain_community/storage/astradb.py @@ -5,6 +5,7 @@ from typing import ( TYPE_CHECKING, Any, + AsyncIterator, Generic, Iterator, List, @@ -16,10 +17,13 @@ from langchain_core.stores import BaseStore, ByteStore -from langchain_community.utilities.astradb import _AstraDBEnvironment +from langchain_community.utilities.astradb import ( + SetupMode, + _AstraDBCollectionEnvironment, +) if TYPE_CHECKING: - from astrapy.db import AstraDB + from astrapy.db import AstraDB, AsyncAstraDB V = TypeVar("V") @@ -34,17 +38,23 @@ def __init__( api_endpoint: Optional[str] = None, astra_db_client: Optional[AstraDB] = None, namespace: Optional[str] = None, + *, + async_astra_db_client: Optional[AsyncAstraDB] = None, + pre_delete_collection: bool = False, + setup_mode: SetupMode = SetupMode.SYNC, ) -> None: - astra_env = _AstraDBEnvironment( + self.astra_env = _AstraDBCollectionEnvironment( + collection_name=collection_name, token=token, api_endpoint=api_endpoint, astra_db_client=astra_db_client, + async_astra_db_client=async_astra_db_client, namespace=namespace, + setup_mode=setup_mode, + pre_delete_collection=pre_delete_collection, ) - self.astra_db = astra_env.astra_db - self.collection = self.astra_db.create_collection( - collection_name=collection_name, - ) + self.collection = self.astra_env.collection + self.async_collection = self.astra_env.async_collection @abstractmethod def decode_value(self, value: Any) -> Optional[V]: @@ -56,28 +66,63 @@ def encode_value(self, value: Optional[V]) -> Any: def mget(self, keys: Sequence[str]) -> List[Optional[V]]: """Get the values associated with the given keys.""" + self.astra_env.ensure_db_setup() docs_dict = {} for doc in self.collection.paginated_find(filter={"_id": {"$in": list(keys)}}): docs_dict[doc["_id"]] = doc.get("value") return [self.decode_value(docs_dict.get(key)) for key in keys] + async def amget(self, keys: Sequence[str]) -> List[Optional[V]]: + """Get the values associated with the given keys.""" + await self.astra_env.aensure_db_setup() + docs_dict = {} + async for doc in self.async_collection.paginated_find( + filter={"_id": {"$in": list(keys)}} + ): + docs_dict[doc["_id"]] = doc.get("value") + return [self.decode_value(docs_dict.get(key)) for key in keys] + def mset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: """Set the given key-value pairs.""" + self.astra_env.ensure_db_setup() for k, v in key_value_pairs: self.collection.upsert({"_id": k, "value": self.encode_value(v)}) + async def amset(self, key_value_pairs: Sequence[Tuple[str, V]]) -> None: + """Set the given key-value pairs.""" + await self.astra_env.aensure_db_setup() + for k, v in key_value_pairs: + await self.async_collection.upsert( + {"_id": k, "value": self.encode_value(v)} + ) + def mdelete(self, keys: Sequence[str]) -> None: """Delete the given keys.""" + self.astra_env.ensure_db_setup() self.collection.delete_many(filter={"_id": {"$in": list(keys)}}) + async def amdelete(self, keys: Sequence[str]) -> None: + """Delete the given keys.""" + await self.astra_env.aensure_db_setup() + await self.async_collection.delete_many(filter={"_id": {"$in": list(keys)}}) + def yield_keys(self, *, prefix: Optional[str] = None) -> Iterator[str]: """Yield keys in the store.""" + self.astra_env.ensure_db_setup() docs = self.collection.paginated_find() for doc in docs: key = doc["_id"] if not prefix or key.startswith(prefix): yield key + async def ayield_keys(self, *, prefix: Optional[str] = None) -> AsyncIterator[str]: + """Yield keys in the store.""" + await self.astra_env.aensure_db_setup() + async for doc in self.async_collection.paginated_find(): + key = doc["_id"] + if not prefix or key.startswith(prefix): + yield key + class AstraDBStore(AstraDBBaseStore[Any]): """BaseStore implementation using DataStax AstraDB as the underlying store. diff --git a/libs/community/tests/integration_tests/storage/test_astradb.py b/libs/community/tests/integration_tests/storage/test_astradb.py index 643b4e93a3185..63108ef0c84a7 100644 --- a/libs/community/tests/integration_tests/storage/test_astradb.py +++ b/libs/community/tests/integration_tests/storage/test_astradb.py @@ -1,9 +1,16 @@ """Implement integration tests for AstraDB storage.""" +from __future__ import annotations + import os +from typing import TYPE_CHECKING import pytest from langchain_community.storage.astradb import AstraDBByteStore, AstraDBStore +from langchain_community.utilities.astradb import SetupMode + +if TYPE_CHECKING: + from astrapy.db import AstraDB, AsyncAstraDB def _has_env_vars() -> bool: @@ -16,7 +23,7 @@ def _has_env_vars() -> bool: @pytest.fixture -def astra_db(): # type: ignore[no-untyped-def] +def astra_db() -> AstraDB: from astrapy.db import AstraDB return AstraDB( @@ -26,24 +33,45 @@ def astra_db(): # type: ignore[no-untyped-def] ) -def init_store(astra_db, collection_name: str): # type: ignore[no-untyped-def, no-untyped-def] - astra_db.create_collection(collection_name) +@pytest.fixture +def async_astra_db() -> AsyncAstraDB: + from astrapy.db import AsyncAstraDB + + return AsyncAstraDB( + token=os.environ["ASTRA_DB_APPLICATION_TOKEN"], + api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"], + namespace=os.environ.get("ASTRA_DB_KEYSPACE"), + ) + + +def init_store(astra_db: AstraDB, collection_name: str) -> AstraDBStore: store = AstraDBStore(collection_name=collection_name, astra_db_client=astra_db) store.mset([("key1", [0.1, 0.2]), ("key2", "value2")]) return store -def init_bytestore(astra_db, collection_name: str): # type: ignore[no-untyped-def, no-untyped-def] - astra_db.create_collection(collection_name) +def init_bytestore(astra_db: AstraDB, collection_name: str) -> AstraDBByteStore: store = AstraDBByteStore(collection_name=collection_name, astra_db_client=astra_db) store.mset([("key1", b"value1"), ("key2", b"value2")]) return store +async def init_async_store( + async_astra_db: AsyncAstraDB, collection_name: str +) -> AstraDBStore: + store = AstraDBStore( + collection_name=collection_name, + async_astra_db_client=async_astra_db, + setup_mode=SetupMode.ASYNC, + ) + await store.amset([("key1", [0.1, 0.2]), ("key2", "value2")]) + return store + + @pytest.mark.requires("astrapy") @pytest.mark.skipif(not _has_env_vars(), reason="Missing Astra DB env. vars") class TestAstraDBStore: - def test_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] + def test_mget(self, astra_db: AstraDB) -> None: """Test AstraDBStore mget method.""" collection_name = "lc_test_store_mget" try: @@ -52,7 +80,16 @@ def test_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amget(self, async_astra_db: AsyncAstraDB) -> None: + """Test AstraDBStore amget method.""" + collection_name = "lc_test_store_mget" + try: + store = await init_async_store(async_astra_db, collection_name) + assert await store.amget(["key1", "key2"]) == [[0.1, 0.2], "value2"] + finally: + await async_astra_db.delete_collection(collection_name) + + def test_mset(self, astra_db: AstraDB) -> None: """Test that multiple keys can be set with AstraDBStore.""" collection_name = "lc_test_store_mset" try: @@ -64,7 +101,19 @@ def test_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_mdelete(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amset(self, async_astra_db: AsyncAstraDB) -> None: + """Test that multiple keys can be set with AstraDBStore.""" + collection_name = "lc_test_store_mset" + try: + store = await init_async_store(async_astra_db, collection_name) + result = await store.async_collection.find_one({"_id": "key1"}) + assert result["data"]["document"]["value"] == [0.1, 0.2] + result = await store.async_collection.find_one({"_id": "key2"}) + assert result["data"]["document"]["value"] == "value2" + finally: + await async_astra_db.delete_collection(collection_name) + + def test_mdelete(self, astra_db: AstraDB) -> None: """Test that deletion works as expected.""" collection_name = "lc_test_store_mdelete" try: @@ -75,7 +124,18 @@ def test_mdelete(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_yield_keys(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_amdelete(self, async_astra_db: AsyncAstraDB) -> None: + """Test that deletion works as expected.""" + collection_name = "lc_test_store_mdelete" + try: + store = await init_async_store(async_astra_db, collection_name) + await store.amdelete(["key1", "key2"]) + result = await store.amget(["key1", "key2"]) + assert result == [None, None] + finally: + await async_astra_db.delete_collection(collection_name) + + def test_yield_keys(self, astra_db: AstraDB) -> None: collection_name = "lc_test_store_yield_keys" try: store = init_store(astra_db, collection_name) @@ -85,7 +145,20 @@ def test_yield_keys(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_bytestore_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] + async def test_ayield_keys(self, async_astra_db: AsyncAstraDB) -> None: + collection_name = "lc_test_store_yield_keys" + try: + store = await init_async_store(async_astra_db, collection_name) + assert {key async for key in store.ayield_keys()} == {"key1", "key2"} + assert {key async for key in store.ayield_keys(prefix="key")} == { + "key1", + "key2", + } + assert {key async for key in store.ayield_keys(prefix="lang")} == set() + finally: + await async_astra_db.delete_collection(collection_name) + + def test_bytestore_mget(self, astra_db: AstraDB) -> None: """Test AstraDBByteStore mget method.""" collection_name = "lc_test_bytestore_mget" try: @@ -94,7 +167,7 @@ def test_bytestore_mget(self, astra_db) -> None: # type: ignore[no-untyped-def] finally: astra_db.delete_collection(collection_name) - def test_bytestore_mset(self, astra_db) -> None: # type: ignore[no-untyped-def] + def test_bytestore_mset(self, astra_db: AstraDB) -> None: """Test that multiple keys can be set with AstraDBByteStore.""" collection_name = "lc_test_bytestore_mset" try: From 6c18f73ca56bb72cb964aaa668c3f8ac14237619 Mon Sep 17 00:00:00 2001 From: Raghav Dixit <34462078+raghavdixit99@users.noreply.github.com> Date: Mon, 19 Feb 2024 13:22:02 -0500 Subject: [PATCH 03/31] community[patch]: LanceDB integration improvements/fixes (#16173) Hi, I'm from the LanceDB team. Improves LanceDB integration by making it easier to use - now you aren't required to create tables manually and pass them in the constructor, although that is still backward compatible. Bug fix - pandas was being used even though it's not a dependency for LanceDB or langchain PS - this issue was raised a few months ago but lost traction. It is a feature improvement for our users kindly review this , Thanks ! --- .../integrations/vectorstores/lancedb.ipynb | 178 ++++++++++++++---- .../data_connection/vectorstores/index.mdx | 2 +- .../vectorstores/lancedb.py | 84 +++++++-- .../vectorstores/test_lancedb.py | 34 ++-- 4 files changed, 225 insertions(+), 73 deletions(-) diff --git a/docs/docs/integrations/vectorstores/lancedb.ipynb b/docs/docs/integrations/vectorstores/lancedb.ipynb index ab5c56eb8f3cd..18eb519eecd3b 100644 --- a/docs/docs/integrations/vectorstores/lancedb.ipynb +++ b/docs/docs/integrations/vectorstores/lancedb.ipynb @@ -14,14 +14,50 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "bfcf346a", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: lancedb in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (0.4.4)\n", + "Requirement already satisfied: deprecation in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.1.0)\n", + "Requirement already satisfied: pylance==0.9.6 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (0.9.6)\n", + "Requirement already satisfied: ratelimiter~=1.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (1.2.0.post0)\n", + "Requirement already satisfied: retry>=0.9.2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (0.9.2)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (4.66.1)\n", + "Requirement already satisfied: pydantic>=1.10 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.4.2)\n", + "Requirement already satisfied: attrs>=21.3.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (23.1.0)\n", + "Requirement already satisfied: semver>=3.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (3.0.2)\n", + "Requirement already satisfied: cachetools in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (5.3.2)\n", + "Requirement already satisfied: pyyaml>=6.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (6.0.1)\n", + "Requirement already satisfied: click>=8.1.7 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (8.1.7)\n", + "Requirement already satisfied: requests>=2.31.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (2.31.0)\n", + "Requirement already satisfied: overrides>=0.7 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from lancedb) (7.4.0)\n", + "Requirement already satisfied: pyarrow>=12 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pylance==0.9.6->lancedb) (14.0.2)\n", + "Requirement already satisfied: numpy>=1.22 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pylance==0.9.6->lancedb) (1.24.4)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (0.5.0)\n", + "Requirement already satisfied: pydantic-core==2.10.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (2.10.1)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from pydantic>=1.10->lancedb) (4.8.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (3.3.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (2.0.6)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from requests>=2.31.0->lancedb) (2023.7.22)\n", + "Requirement already satisfied: decorator>=3.4.2 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from retry>=0.9.2->lancedb) (5.1.1)\n", + "Requirement already satisfied: py<2.0.0,>=1.4.26 in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from retry>=0.9.2->lancedb) (1.11.0)\n", + "Requirement already satisfied: packaging in /Users/raghavdixit/Desktop/langchain_lance/.dev_env/lib/python3.11/site-packages (from deprecation->lancedb) (23.2)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], "source": [ - "%pip install --upgrade --quiet lancedb" + "! pip install lancedb" ] }, { @@ -34,20 +70,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "a0361f5c-e6f4-45f4-b829-11680cf03cec", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "OpenAI API Key: ········\n" - ] - } - ], + "outputs": [], "source": [ "import getpass\n", "import os\n", @@ -57,15 +85,15 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "aac9563e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "from langchain_community.vectorstores import LanceDB\n", - "from langchain_openai import OpenAIEmbeddings" + "from langchain.embeddings import OpenAIEmbeddings\n", + "from langchain.vectorstores import LanceDB" ] }, { @@ -75,14 +103,13 @@ "metadata": {}, "outputs": [], "source": [ + "from langchain.document_loaders import TextLoader\n", "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain_community.document_loaders import TextLoader\n", "\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "\n", "documents = CharacterTextSplitter().split_documents(documents)\n", - "\n", "embeddings = OpenAIEmbeddings()" ] }, @@ -93,22 +120,7 @@ "metadata": {}, "outputs": [], "source": [ - "import lancedb\n", - "\n", - "db = lancedb.connect(\"/tmp/lancedb\")\n", - "table = db.create_table(\n", - " \"my_table\",\n", - " data=[\n", - " {\n", - " \"vector\": embeddings.embed_query(\"Hello World\"),\n", - " \"text\": \"Hello World\",\n", - " \"id\": \"1\",\n", - " }\n", - " ],\n", - " mode=\"overwrite\",\n", - ")\n", - "\n", - "docsearch = LanceDB.from_documents(documents, embeddings, connection=table)\n", + "docsearch = LanceDB.from_documents(documents, embeddings)\n", "\n", "query = \"What did the president say about Ketanji Brown Jackson\"\n", "docs = docsearch.similarity_search(query)" @@ -116,7 +128,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "9c608226", "metadata": {}, "outputs": [ @@ -136,7 +148,7 @@ "\n", "I’ve worked on these issues a long time. \n", "\n", - "I know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", + "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", "\n", "So let’s not abandon our streets. Or choose between safety and equal justice. \n", "\n", @@ -192,11 +204,97 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "a359ed74", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n", + "\n", + "Officer Mora was 27 years old. \n", + "\n", + "Officer Rivera was 22. \n", + "\n", + "Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n", + "\n", + "I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n", + "\n", + "I’ve worked on these issues a long time. \n", + "\n", + "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", + "\n", + "So let’s not abandon our streets. Or choose between safety and equal justice. \n", + "\n", + "Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. \n", + "\n", + "That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \n", + "\n", + "That’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope. \n", + "\n", + "We should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \n", + "\n", + "I ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe. \n", + "\n", + "And I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \n", + "\n", + "And I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \n", + "\n", + "Ban assault weapons and high-capacity magazines. \n", + "\n", + "Repeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \n", + "\n", + "These laws don’t infringe on the Second Amendment. They save lives. \n", + "\n", + "The most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. \n", + "\n", + "In state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \n", + "\n", + "We cannot let this happen. \n", + "\n", + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "\n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n", + "\n", + "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", + "\n", + "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", + "\n", + "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", + "\n", + "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", + "\n", + "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster.\n" + ] + } + ], + "source": [ + "print(docs[0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "12ca9ea8-3d09-49fb-922e-47c64ba90f28", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'vector': [-0.005863776430487633, -0.0019847142975777388, -0.004525014664977789, -0.002664136001840234, -0.0007940530776977539, 0.01969318464398384, 0.01712276227772236, 0.008474362082779408, -0.01931833289563656, -0.016988886520266533, 0.01086405199021101, 0.010763644240796566, -0.0004455566522665322, -0.007537228986620903, -0.003405475290492177, -0.0009003172744996846, 0.03338871896266937, -0.009672553278505802, 0.007657717447727919, -0.03087184764444828, -0.014016835950314999, 0.003234783187508583, 0.014552340842783451, 0.0068009099923074245, 0.0008007469004951417, 0.010261609219014645, 0.03170187771320343, -0.010013937950134277, 0.011004622094333172, -0.018608788028359413, -0.01729680225253105, 0.0061917733401060104, -0.036789171397686005, -0.018448136746883392, -0.02779269404709339, -0.0061415694653987885, 0.0002734002482611686, -0.011084947735071182, 0.018943479284644127, -0.014217650517821312, 0.036173343658447266, -0.02574438974261284, 0.002319404622539878, -0.01838119886815548, -0.019104130566120148, 0.017952794209122658, -0.00919059943407774, -0.020764194428920746, -0.026052303612232208, 0.025610512122511864, 0.044580765068531036, 0.0020282240584492683, -0.029211781919002533, -0.024994682520627975, 0.011586982756853104, -0.013735695742070675, -0.013327373191714287, 0.009378026239573956, -0.01097115222364664, -0.011607064865529537, 0.013882959261536598, 0.0014149037888273597, -0.02219666913151741, 0.01697549782693386, -0.009411495178937912, -0.01838119886815548, 0.0012860479764640331, 0.02172810398042202, -0.003882409306243062, 0.015797387808561325, 0.054246626794338226, 0.0028314811643213034, 0.026186181232333183, -0.0068678478710353374, 0.031621553003787994, -0.019719960168004036, -0.005365087650716305, -0.004725828766822815, -0.0011948448373004794, -0.017725205048918724, 0.022451035678386688, -0.01289896946400404, -0.02246442250907421, 0.015917876735329628, 0.013206885196268559, -0.014579115435481071, -0.002242425922304392, -0.0010567849967628717, 0.002655768534168601, 0.0006116467993706465, 0.013006070628762245, 0.024378851056098938, -0.003266578773036599, 0.006626870948821306, -0.009639084339141846, 0.015261884778738022, -0.02694927528500557, 0.02162100188434124, 0.008112896233797073, -0.026386994868516922, 0.016881786286830902, -0.02089807018637657, -0.026453932747244835, -0.011473188176751137, -0.028970805928111076, -0.02961341105401516, -0.006188426166772842, 0.002182181691750884, 0.004344281740486622, 0.011011315509676933, -0.006827685050666332, 0.009029948152601719, 0.0015763919800519943, 0.0075706979259848595, -0.011533432640135288, -0.02203601785004139, -0.018314260989427567, -0.025583738461136818, 0.022330546751618385, -0.03890441730618477, 0.019037192687392235, 0.014445239678025246, 0.0022390789818018675, -0.027953345328569412, 0.01969318464398384, -0.019974324852228165, -0.014164099469780922, 0.008199915289878845, 0.0008442566613666713, 0.003725104732438922, -0.011553513817489147, -0.011473188176751137, 0.023334616795182228, -0.008400729857385159, 0.011406250298023224, 0.007885306142270565, -0.02093823440372944, 0.01755116693675518, -0.01376247126609087, -0.01838119886815548, 0.01917106844484806, -0.01279856264591217, -0.02579793892800808, -0.01538237277418375, 0.01271823700517416, 0.021272923797369003, 0.0005706471856683493, 0.005903939250856638, 0.014552340842783451, 0.015810776501893997, 0.014766542240977287, -0.01603836566209793, -0.0003526800428517163, -0.007845143787562847, 0.004970152862370014, -0.002126957755535841, -0.024539504200220108, 0.0015303720720112324, 0.008969703689217567, 0.0027461349964141846, 0.006509729195386171, -0.01994754932820797, -0.009331169538199902, 0.03649464622139931, 0.02314719185233116, 0.016426606103777885, -0.014498789794743061, 0.02684217318892479, -0.0007497065817005932, 0.02554357424378395, 0.01915767975151539, 0.017899245023727417, -0.015288659371435642, 0.02773914486169815, 0.00015939632430672646, 0.007778205908834934, 0.018407974392175674, -0.008748807944357395, -0.02694927528500557, 0.01713615097105503, 0.01801973208785057, 0.0008266853983514011, 0.012222895398736, 0.04380428418517113, -0.023120416328310966, -0.009337862953543663, 0.017939407378435135, 0.0074836784042418, -0.023334616795182228, -0.007443515583872795, -0.0010659890249371529, 0.020871296525001526, 0.011138497851788998, -0.012832031585276127, -0.6456044912338257, -0.014552340842783451, 0.017484229058027267, -0.012115794233977795, -0.0034573522862046957, 0.010121039114892483, -0.0011714164866134524, 0.01785908080637455, -0.016426606103777885, 0.01538237277418375, -0.013534881174564362, 0.012805256061255932, 0.0006769114406779408, -0.022852662950754166, -0.026092467829585075, -0.027926571667194366, -0.013039539568126202, -0.00830701645463705, 0.031139599159359932, -0.006164997816085815, -0.02611924149096012, 0.004387791734188795, -0.006108100526034832, 0.0072493948973715305, 0.008353873156011105, 0.015676898881793022, 0.020509829744696617, -0.016105303540825844, -0.015650125220417976, 0.010515973903238773, -0.030175691470503807, 0.03204995393753052, -0.0017805531388148665, 0.0056227995082736015, 0.040136076509952545, -0.0022223445121198893, 0.0030105405021458864, 0.022866051644086838, 0.013668757863342762, 0.021808428689837456, -0.012336689978837967, 0.024378851056098938, 0.03954702243208885, -0.0028113997541368008, 0.025664063170552254, -0.00548222940415144, 0.021768266335129738, -0.010094263590872288, 0.0003871950029861182, 0.0241780374199152, -0.005867123603820801, 0.019559308886528015, -0.000377781834686175, 0.001261782948859036, -0.015730449929833412, -0.002237405627965927, -0.007162375375628471, -0.02146035060286522, 0.0009747859439812601, 0.0026674827095121145, -0.0057165129110217094, 0.008655094541609287, -0.022544747218489647, -0.011131804436445236, -0.01958608441054821, 0.02856917679309845, 0.012336689978837967, 0.011801185086369514, 0.018916703760623932, -0.0066201770678162575, 0.014659442007541656, 0.004689013119786978, -0.01013442687690258, -0.03515588492155075, 0.010054100304841995, -0.004340935032814741, 0.026025528088212013, -0.013019458390772343, -0.005673002917319536, 0.011312536895275116, 0.0013747409684583545, -0.00547218881547451, 7.080794603098184e-05, -0.0010944376699626446, 0.01607852801680565, 0.008929540403187275, -0.02172810398042202, 0.00571985961869359, 0.003490821458399296, 0.012283138930797577, 0.025463249534368515, 0.0025536881294101477, 0.011185354553163052, -0.017992958426475525, 0.010930989868938923, 0.02230377122759819, -0.023321229964494705, 0.0025202189572155476, 0.012390240095555782, -0.03170187771320343, -0.003520943457260728, -0.011841347441077232, 0.02370947040617466, 0.007282864302396774, 0.01650693267583847, 0.013193497434258461, -0.013949898071587086, -0.010355322621762753, 0.036066241562366486, -0.03818148374557495, -0.015275271609425545, 0.005187701899558306, -0.018889928236603737, -0.017618104815483093, 0.006600095424801111, -0.01665419526398182, 0.00783175602555275, 0.018622176721692085, -0.015061070211231709, -0.019974324852228165, 0.005164273548871279, -2.9782220735796727e-05, 0.013012764044106007, -0.03906506672501564, 0.015502861700952053, 0.005204436369240284, 0.006499688606709242, -0.003090866142883897, -0.0010735195828601718, -0.01049589179456234, 0.0033569452352821827, -0.0045149740763008595, 0.020978396758437157, 0.009210680611431599, 0.014846867881715298, -0.005047131795436144, 0.013802633620798588, -0.010904214344918728, 0.016774684190750122, -0.011325924657285213, -0.0029034395702183247, -0.001386455143801868, -0.006041162647306919, -0.003771961433812976, -0.02480725571513176, -0.02579793892800808, -0.0007149824523366988, -0.002642381004989147, -0.030041813850402832, -0.027498167008161545, 0.009779654443264008, -0.0185418501496315, -0.021607615053653717, -0.005960837006568909, 0.0074836784042418, -0.0010919275227934122, -0.01571706309914589, 0.01543592382222414, -0.004866398870944977, -0.02208956889808178, 0.01602497696876526, 0.0035744940396398306, -0.02779269404709339, -0.01475315447896719, 0.009833205491304398, -0.010268302634358406, 0.04099288582801819, 0.013461249880492687, 0.006600095424801111, -0.027873020619153976, 0.0012266404228284955, -0.013949898071587086, -0.0015337190125137568, -0.0008810725994408131, 0.03740500286221504, 0.017015662044286728, -0.02878337912261486, 0.01376247126609087, 0.016627419739961624, 0.011607064865529537, -0.007389965001493692, -0.013166721910238266, -0.02532937191426754, -0.007021805737167597, 0.018394585698843002, 0.016105303540825844, 0.004120039287954569, 0.014994132332503796, -0.02423158846795559, 0.020871296525001526, -0.0006279629305936396, -0.007784899789839983, -0.01801973208785057, 0.009813123382627964, 0.012597748078405857, 0.030148915946483612, 0.0088559091091156, 0.00596753042191267, 0.0033619655296206474, 0.02862272784113884, 0.011265680193901062, 0.011138497851788998, 0.02214311994612217, -0.010455729439854622, -0.01828748546540737, -0.03842246159911156, 0.009752878919243813, -0.031621553003787994, 0.0212996993213892, 0.0025720959529280663, -0.005709819030016661, -0.027350902557373047, -0.02632005698978901, -0.03234448283910751, 0.009712716564536095, 0.018394585698843002, -0.009732797741889954, 0.030363118276000023, -0.010054100304841995, -0.016828235238790512, 0.011613758280873299, 0.016855010762810707, 0.017216475680470467, -0.008273547515273094, 0.004893174394965172, 0.0032967007718980312, -0.0019311638316139579, 0.011680696159601212, 0.010857357643544674, -0.0015220048371702433, 8.377720223506913e-05, 0.01875605247914791, 0.015368985012173653, 0.031353797763586044, -0.01013442687690258, -0.02167455293238163, 0.0024649950210005045, -0.0015939632430672646, 0.04184969142079353, 0.004638809245079756, 0.02615940570831299, 0.020228689536452293, 0.016373055055737495, -0.001106151845306158, 0.02574438974261284, -0.031675104051828384, 0.0442059151828289, 0.00973949208855629, 0.030416667461395264, 0.013695533387362957, 0.00031586410477757454, 0.002749481936916709, -0.0013362516183406115, 0.008153058588504791, 0.01760471612215042, -0.03510233387351036, -0.0022072833962738514, 0.02083113230764866, 0.014659442007541656, 0.02575777657330036, 0.033549368381500244, 0.03060409426689148, 0.01654709503054619, -0.017511002719402313, -0.007543922867625952, 0.0015379026299342513, -0.010462422855198383, 0.007677799090743065, -0.0044681173749268055, -0.01812683418393135, 0.0018374505452811718, -0.017926020547747612, 0.009993856772780418, 0.00771796191111207, 0.031675104051828384, 0.022892825305461884, -0.004879786632955074, 0.015181559138000011, 0.0022223445121198893, 0.003467393107712269, -0.00917051825672388, -0.03413842245936394, 0.02721702679991722, 0.0240307729691267, -0.014900418929755688, -0.003497515106573701, -0.010462422855198383, -0.021594226360321045, -0.021085496991872787, 0.019452208653092384, -0.01739051565527916, -0.007624248508363962, -0.008688563480973244, 0.029800837859511375, -0.004983540624380112, -0.016051752492785454, 0.030684420838952065, -0.01376247126609087, 0.017899245023727417, -0.0014584135496988893, 0.005458801053464413, -0.001113682403229177, -0.022999927401542664, -0.0038388995453715324, 0.008782276883721352, -0.0030590705573558807, 0.012624523602426052, -0.011807878501713276, 0.023200741037726402, -0.017939407378435135, 0.01827409863471985, -0.009839898906648159, -0.013461249880492687, 0.010382097214460373, 0.002767889993265271, -0.003795389784500003, -0.02741784043610096, -0.014378301799297333, 0.004387791734188795, -0.012082325294613838, -0.002431526081636548, -0.024419015273451805, -0.04466109350323677, -0.016573870554566383, 0.13719630241394043, 0.02590504102408886, -0.00403301976621151, 0.007021805737167597, -0.006486300844699144, 0.0037083702627569437, -0.003395434468984604, -0.004461423493921757, 0.011332618072628975, -0.018461523577570915, 0.002367934910580516, 0.009324475191533566, -0.0032833132427185774, -0.003731798380613327, 0.012517422437667847, 0.003226415952667594, 0.018822990357875824, -0.025677450001239777, -0.010060794651508331, -0.013990060426294804, -0.01472637988626957, -0.005027050152420998, 0.021821817383170128, 0.032826438546180725, -0.02428513765335083, -0.01634628139436245, 0.031246699392795563, 0.026306668296456337, 0.012691461481153965, 0.003889102954417467, -0.002913480391725898, 0.014980744570493698, 0.02241087146103382, -0.0004777706053573638, -0.02302670292556286, -0.002781277522444725, 0.017162924632430077, -0.033817119896411896, 0.023227516561746597, 0.016413219273090363, 0.013153334148228168, 9.360873082187027e-05, 0.004320853389799595, -0.01154012605547905, 0.029907938092947006, -0.01634628139436245, 0.009157130494713783, 0.0020901416428387165, 0.01021475251764059, 0.0007053600857034326, 0.016948724165558815, -0.006663686595857143, -0.0106498496606946, -0.012939132750034332, 0.0024951172526925802, 0.012544197961688042, -0.0002017555816564709, -0.005006968975067139, -0.019238006323575974, 0.02329445444047451, -0.026909111067652702, -0.03411164879798889, 0.002063366584479809, -0.01650693267583847, 0.005686390679329634, -0.019666410982608795, -0.0056930845603346825, 0.003350251354277134, -0.0167612973600626, -0.013220272958278656, -0.006221895571798086, -0.008420811034739017, -0.03834213688969612, 0.02459305338561535, 0.009444964118301868, 0.011004622094333172, 0.03293353691697121, 0.0016491871792823076, 0.005070560146123171, -0.0012902315938845277, 0.006767440587282181, -0.042278096079826355, -0.0022859356831759214, 0.004946724511682987, -0.0013019457692280412, 0.00628213956952095, -0.01822054758667946, -0.00854129996150732, -0.02433868870139122, 0.037726305425167084, -0.00562949338927865, 0.0016885133227333426, 0.014619278721511364, -0.02183520421385765, -0.002321078209206462, 0.01796618290245533, 0.024218199774622917, 0.018033120781183243, -0.002704298822209239, -0.006185079459100962, 0.015904489904642105, -0.030041813850402832, -0.016908559948205948, -0.0203224029392004, -0.005957489833235741, -0.016373055055737495, 0.0074769845232367516, 0.02590504102408886, -0.01289896946400404, -0.011098334565758705, -0.005438719876110554, -0.011607064865529537, 0.0039058374240994453, 0.017484229058027267, -0.004863052163273096, 0.0024716889020055532, 0.01947898417711258, 0.007222619839012623, 0.001441679080016911, -0.02365592122077942, 0.0056897373870015144, -0.018367810174822807, 0.035798490047454834, 0.02194230444729328, -0.0063256495632231236, -0.008661787956953049, 0.006837725639343262, -0.021487126126885414, 0.018207158893346786, 0.0043978323228657246, 0.002235732041299343, 0.020603543147444725, -0.012269752100110054, -0.022009244188666344, -0.011238904669880867, -0.01645338162779808, -0.014445239678025246, 0.021540677174925804, 0.009913531132042408, 0.008159752935171127, -0.014485402964055538, -0.011707471683621407, -0.00022989050194155425, -0.04701731353998184, 0.014405076391994953, -0.014699604362249374, 0.006265405099838972, 0.000786940916441381, -0.01755116693675518, 0.0030791519675403833, -0.030577318742871284, -0.007256088778376579, -0.024834031239151955, -0.0010777032002806664, -0.0423048697412014, -0.021179210394620895, -0.0007501249783672392, -0.026547646149992943, 0.03692304715514183, 0.02684217318892479, 0.019345106557011604, 0.0041702426970005035, -0.012055549770593643, 0.0120890187099576, 0.01522172149270773, 0.01645338162779808, -0.007008417975157499, 0.023588981479406357, -0.009953693486750126, 0.04289392754435539, 0.031996406614780426, 0.018247323110699654, -0.028488850221037865, 0.008869296871125698, 0.008581462316215038, 0.02084452100098133, -0.028194323182106018, -0.004401179030537605, -0.011198742315173149, -0.022076182067394257, -0.023856734856963158, -0.008835827000439167, -0.002734420821070671, -0.0035811876878142357, -0.014284588396549225, 7.746252776996698e-06, 0.04931998252868652, -0.012450484558939934, 0.029185006394982338, -0.011894898489117622, 0.02167455293238163, -0.015047682449221611, -0.004223793279379606, -0.008849214762449265, -0.014927193522453308, -0.02057676762342453, -0.04626760631799698, 0.0051709674298763275, 0.03373679518699646, -0.013320679776370525, 0.009023253805935383, -0.0013772511156275868, -0.010382097214460373, -0.015168171375989914, 0.013521494343876839, 0.010669930838048458, -0.018608788028359413, -0.018501687794923782, 0.016828235238790512, -0.019974324852228165, -0.00033385370625182986, -0.00965916644781828, -0.027190251275897026, -0.029907938092947006, 0.0012400280684232712, 0.0006639421335421503, 0.01015450805425644, 0.010837276466190815, -0.007597472984343767, -0.015128008089959621, -0.027297353371977806, -0.014364914037287235, 0.008782276883721352, -0.005820266902446747, 0.011272373609244823, 0.007543922867625952, 0.00016619471716694534, -0.013789246790111065, 0.02172810398042202, 0.033549368381500244, 0.004357669502496719, 0.005398556590080261, 0.02700282447040081, -0.013775859028100967, -0.0007513800519518554, 0.00041815388249233365, 0.006379199679940939, -0.016774684190750122, -0.03071119636297226, 0.024271750822663307, 0.018836377188563347, -0.012992682866752148, -0.017002273350954056, -0.0008354710298590362, -0.018140221014618874, -0.010254914872348309, -0.01480670552700758, 0.02518210932612419, -0.001659227884374559, -0.010984539985656738, -0.020282240584492683, -0.004571871366351843, -0.006262058392167091, 0.005890551954507828, 0.02255813591182232, -0.01587771438062191, 0.011098334565758705, -0.0019261435372754931, 0.00572990020737052, 0.00644948473200202, -0.01433813851326704, 0.03164832666516304, -0.01827409863471985, 0.0040397136472165585, 0.0010484177619218826, 0.020697256550192833, -0.031086048111319542, 0.0005011989269405603, 0.024820642545819283, 0.024298526346683502, 0.0009639085037633777, 0.004568524658679962, -0.012343383394181728, -0.0011270700488239527, -0.01728341355919838, -0.007938857190310955, -0.026239730417728424, -0.020483054220676422, 0.00014914642088115215, 0.0016567177372053266, 0.007851837202906609, -0.0022240178659558296, -0.034754253923892975, -0.0017253292025998235, -0.003218048717826605, -0.019438819959759712, -0.016279341652989388, -0.018582012504339218, 0.025396311655640602, -0.0009371332707814872, -0.017484229058027267, -0.02178165316581726, -0.0014542299322783947, 0.027444615960121155, -0.004106651525944471, 0.009578839875757694, 0.021072110161185265, 0.003062417497858405, -0.027042988687753677, 0.01522172149270773, -0.038877639919519424, 0.007851837202906609, -0.03547718748450279, -0.005974224302917719, -0.03279966115951538, -0.013909734785556793, 0.00917051825672388, -0.002953643212094903, -0.025918427854776382, -0.020857907831668854, -0.007577391806989908, 0.0018910010112449527, 0.0018290833104401827, -0.017403902485966682, -0.006459525786340237, -0.003008867148309946, -0.00241646496579051, -0.013963285833597183, -0.01980028674006462, 0.05140845105051994, -0.016640808433294296, -0.005783450789749622, 0.0005053825443610549, -0.02532937191426754, -0.009799735620617867, 0.00089613365707919, 0.010763644240796566, 0.012537503615021706, -0.01013442687690258, -0.02266523614525795, -0.010623074136674404, 0.022705400362610817, -0.036949824541807175, -0.03055054321885109, -0.0149673568084836, 0.004394485615193844, -0.02037595398724079, 0.004702400416135788, 0.008547993376851082, -0.012932438403367996, 0.020014489069581032, 0.01303284615278244, 0.01488703116774559, -0.012517422437667847, -0.010040713474154472, -0.01602497696876526, 0.004357669502496719, -0.015342210419476032, -0.013073008507490158, -0.03306741639971733, -0.017939407378435135, 0.027096537873148918, -8.273129060398787e-05, -0.014458627440035343, -0.009726104326546192, -0.020242078229784966, -0.023776408284902573, -0.00950520858168602, -0.03175542876124382, 0.002734420821070671, 0.031166374683380127, 0.02356220781803131, 0.004628768656402826, 0.024164650589227676, -0.011714165098965168, 0.023120416328310966, -0.00443799514323473, -0.0036749010905623436, 0.01927816867828369, -0.037056926637887955, 0.036066241562366486, 0.0077514308504760265, -0.0211524348706007, -0.0005325761740095913, 0.009304394014179707, -0.0036347382701933384, 0.029238557443022728, 0.01613207906484604, -0.0362536683678627, 0.0003723431145772338, 0.0048965211026370525, 0.0051709674298763275, 0.011680696159601212, 0.006784175522625446, 0.0164935439825058, -0.0384492389857769, -0.023388167843222618, -0.0013287210604175925, -0.0023545471485704184, -0.008574768900871277, -0.01755116693675518, 0.01281864382326603, 0.0014215976698324084, 5.653130938299e-05, -0.015757225453853607, -0.001877613365650177, 0.03665529564023018, -0.01921123079955578, 0.028087222948670387, 0.015636736527085304, -0.009257537312805653, 0.018582012504339218, 0.02725718915462494, -0.016640808433294296, -0.005117416847497225, -0.005201089195907116, 0.015061070211231709, 0.012537503615021706, -0.0033569452352821827, 0.00042484767618589103, 0.036173343658447266, -0.02093823440372944, -0.005298149771988392, -0.012477260082960129, 0.02277233824133873, -0.01008087582886219, -0.005455454345792532, -0.002896745689213276, 0.00771796191111207, 0.0073230271227657795, -0.016587257385253906, -0.008688563480973244, 0.013467943295836449, -0.02575777657330036, 0.0033318432979285717, -0.019653022289276123, -0.014953969046473503, -0.010261609219014645, -0.010870745405554771, -0.0026055651251226664, -0.006968255154788494, -0.02282588742673397, -0.0021236108150333166, -0.012631217017769814, -0.007637635804712772, 0.021955693140625954, 0.23198063671588898, 0.003340210532769561, 0.005271374247968197, 0.016252567991614342, -0.013260435312986374, 0.030577318742871284, 0.010141120292246342, 0.011801185086369514, -0.003544371807947755, 0.021018559113144875, -0.01392312254756689, -0.010917602106928825, -0.021594226360321045, 0.004434648435562849, 0.0007823389023542404, -0.008869296871125698, -0.035798490047454834, -0.02345510572195053, -0.007938857190310955, 0.002749481936916709, -0.01917106844484806, 0.00942488294094801, -0.0058938986621797085, -0.014538953080773354, 0.015810776501893997, 0.016051752492785454, 0.0073698838241398335, 0.014980744570493698, 0.00692139845341444, -0.002874990925192833, -0.022892825305461884, -0.006335690151900053, 0.012390240095555782, -0.000747614772990346, -0.0023311187978833914, -0.011787797324359417, -0.024941131472587585, -0.012336689978837967, -0.0055993711575865746, 0.015556411817669868, -0.020616931840777397, 0.03245158493518829, 0.0018876540707424283, 0.007242701482027769, -0.004287384450435638, 0.041448064148426056, -0.00667372765019536, -0.013039539568126202, 0.0083806486800313, 0.006014387123286724, -0.03175542876124382, 0.011707471683621407, 0.01791263185441494, 0.02565067633986473, 0.0006677074125036597, -0.015569799579679966, 0.0005300659686326981, 0.003358618589118123, -0.018394585698843002, -0.013675451278686523, -0.015757225453853607, 0.00861493218690157, -0.013635288923978806, 0.039921876043081284, -0.013882959261536598, 0.04053770750761032, 0.020871296525001526, 0.009250843897461891, 0.007952244952321053, -0.013019458390772343, -0.030068589374423027, 0.011841347441077232, -0.01151335146278143, -0.004846317693591118, -0.017564553767442703, -0.01733696460723877, 0.012537503615021706, 0.01135939359664917, 0.014016835950314999, -0.0024348730221390724, 0.003607962979003787, -0.01692194864153862, 0.010562830604612827, 0.004247221630066633, -0.00266246241517365, -0.035075556486845016, 0.022384095937013626, -0.0034874745178967714, -0.007490372285246849, 0.004682319238781929, 0.0035477187484502792, -0.015810776501893997, -0.014873643405735493, -0.00848774891346693, -0.0013136599445715547, -0.00976626668125391, 0.010362016037106514, 0.035022005438804626, -0.020094813778996468, 0.01859540119767189, -0.031407348811626434, 0.02172810398042202, 0.033442266285419464, -0.011064865626394749, -0.004893174394965172, -0.0010484177619218826, -0.001434985315427184, 0.039975427091121674, 0.020710645243525505, -0.026360219344496727, -0.0004292404919397086, -0.021607615053653717, -0.004451382905244827, -0.006914704572409391, -0.0019964284729212523, 0.018193772062659264, 0.02282588742673397, -0.021433575078845024, 0.02569083869457245, 0.0027327474672347307, -0.004769338760524988, -0.035691387951374054, -0.031166374683380127, -0.002039938233792782, 0.0015805755974724889, -0.020175140351057053, -0.0075706979259848595, -0.005197742488235235, -0.004056448116898537, -0.024927744641900063, 0.0060445093549788, -0.011018008925020695, 0.03357614576816559, -0.003554412629455328, -0.001986387651413679, -0.0008844194817356765, 0.02188875526189804, 9.198757470585406e-05, -0.01157359592616558, 0.0019211231265217066, -0.00507725402712822, 0.0004426281084306538, 0.0055960239842534065, -0.013481331057846546, 0.00846097432076931, -0.014980744570493698, 0.02507500723004341, -0.025516798719763756, -0.0013119864743202925, -0.0033251496497541666, -0.03858311474323273, 0.02627989463508129, 0.008608237840235233, -0.018983641639351845, 0.016841622069478035, -0.029265332967042923, -0.02381657250225544, -0.03545041009783745, -0.01681484654545784, 0.015529637224972248, -0.03852956369519234, 0.024686766788363457, 0.023281067609786987, 0.004605340305715799, -0.019023803994059563, -0.009150436148047447, -0.17104020714759827, 0.03346904367208481, 0.004354322329163551, -0.006837725639343262, 9.397479880135506e-05, -0.007309639360755682, 0.00911027379333973, -0.014712992124259472, -0.0008484402787871659, -0.00233781267888844, 0.01791263185441494, 0.005883858073502779, -0.017216475680470467, -0.011225517839193344, 0.0003819654812105, -0.018863152712583542, -0.022692011669278145, 0.010522667318582535, 0.022437646985054016, 0.010221445932984352, 0.047392167150974274, -0.027083151042461395, 0.011319230310618877, -0.04361685737967491, -0.001145477988757193, -0.0149673568084836, -0.009277618490159512, 0.02005465142428875, -0.012376852333545685, -0.019934162497520447, -0.02036256715655327, -0.009853286668658257, 0.006974949035793543, 0.023334616795182228, 0.005950795952230692, 0.00274278805591166, 0.0021102232858538628, -0.0019964284729212523, -0.0013805980561301112, 0.015623349696397781, 0.0439649373292923, 0.020764194428920746, -0.012517422437667847, -0.006496341433376074, -0.015449310652911663, 0.01279856264591217, 0.005766716320067644, -0.004755950998514891, -0.006814297288656235, -0.003343557473272085, -0.01598481461405754, -0.043429430574178696, -0.011145191267132759, 0.01953253336250782, 0.0174440648406744, -0.004819542169570923, -0.03657497093081474, -0.006228588987141848, -0.014231037348508835, -0.009719409979879856, -0.0068477666936814785, 0.013695533387362957, 0.00506721343845129, 0.002038264647126198, -0.015837552025914192, -0.007905388250946999, -0.023669308051466942, -0.007356496062129736, -0.03368324413895607, 0.010274996049702168, -0.03279966115951538, 0.006007693242281675, -0.007450209464877844, -0.02950630895793438, 0.005003622267395258, 0.01884976588189602, -0.0044413418509066105, 0.002751155523583293, 0.008025876246392727, 0.006315608508884907, -0.0177118182182312, 0.023200741037726402, -0.01733696460723877, 0.007584085687994957, 0.005355047062039375, 0.011038091033697128, 0.010589605197310448, 0.0029569901525974274, -0.008440893143415451, -0.029104681685566902, 0.008829133585095406, -0.03676239773631096, 0.018247323110699654, -0.012102406471967697, -0.008447586558759212, 0.013481331057846546, 0.023588981479406357, -0.014445239678025246, 0.0023562207352370024, -0.019519146531820297, 0.0013780879089608788, -0.02204940654337406, 0.0029168270993977785, 0.017899245023727417, 0.0054654949344694614, 0.01660064607858658, 0.027350902557373047, 0.04324200376868248, 0.013856184668838978, -0.0054420665837824345, -0.015114620327949524, 0.01102470327168703, 0.009257537312805653, 0.003929265774786472, 0.009244149550795555, -0.007356496062129736, -0.010348628275096416, -0.0007384108030237257, 0.021487126126885414, -0.028381749987602234, 0.06345730274915695, 0.005137498024851084, -0.023629145696759224, 0.005478882696479559, 0.004732522647827864, -0.012296526692807674, -0.1011032909154892, 0.004304118920117617, 0.006305567920207977, 0.01467282883822918, -0.009880061261355877, 0.03143412619829178, 0.0030657644383609295, 0.04152838885784149, -0.013099784031510353, 0.03290676325559616, -0.01480670552700758, -0.030282791703939438, -0.007617554627358913, 0.013595125637948513, 0.018421361222863197, 0.00241479161195457, 0.0012592728016898036, -0.004458076786249876, -0.005428678821772337, 0.026146017014980316, -0.0044212606735527515, 0.002905112924054265, 0.009157130494713783, -0.013963285833597183, -0.012999377213418484, -0.014846867881715298, -0.0211524348706007, 0.016252567991614342, -0.009083498269319534, 0.013816021382808685, -0.005308190360665321, 0.014953969046473503, 0.01706921122968197, 0.00627879286184907, -0.020871296525001526, 0.003490821458399296, -0.0332280658185482, -0.02203601785004139, 0.027029599994421005, -0.015328822657465935, 0.004776032641530037, -0.020496442914009094, -0.0027160129975527525, -0.028381749987602234, -0.007363189943134785, -0.0024599747266620398, -0.006031121592968702, 0.005281415302306414, 0.022009244188666344, -0.01656048186123371, -0.02428513765335083, 0.010020631365478039, -0.0014249446103349328, -0.030898621305823326, 0.00443799514323473, 0.005187701899558306, -0.001059295260347426, -0.014699604362249374, -0.005227864719927311, 0.002454954432323575, 0.00030477746622636914, 0.01071009412407875, -0.010442341677844524, 0.015944652259349823, -0.0012893949169665575, -0.024767093360424042, -0.047606367617845535, 0.0022775684483349323, 0.007778205908834934, -0.012825338169932365, -0.0022240178659558296, 0.013554963283240795, -0.022892825305461884, 0.008869296871125698, -0.0288369283080101, 0.007918776012957096, -0.037940509617328644, -0.0014174140524119139, 0.020536605268716812, -0.02768559381365776, -0.00047484206152148545, -0.0174440648406744, 0.016828235238790512, -0.007597472984343767, 0.0252758227288723, 0.009826511144638062, -0.0054420665837824345, 0.01185473520308733, 0.0018960214219987392, -0.012524116784334183, 0.00861493218690157, 0.0318625271320343, -0.002891725394874811, -0.009177211672067642, 0.004334241151809692, 0.032505135983228683, 0.008400729857385159, 0.0021369983442127705, 0.008547993376851082, 0.007885306142270565, -0.0063256495632231236, 0.0018910010112449527, -0.06361795961856842, 0.022183282300829887, 0.0005267190863378346, 0.0012040488654747605, -0.005783450789749622, 0.014833481051027775, -0.0060445093549788, 0.0002813491446431726, 0.02037595398724079, 0.013789246790111065, -0.006914704572409391, 0.02042950503528118, 0.02219666913151741, -0.012316607870161533, -0.03703014925122261, 0.021554064005613327, 0.014405076391994953, 0.005408597644418478, 0.03743177652359009, 0.0060445093549788, 0.005361740943044424, 0.029238557443022728, 0.014940581284463406, 0.009471739642322063, -0.0006367485621012747, -0.004354322329163551, -0.01724325120449066, 0.006051203235983849, 0.011158579029142857, -0.008039264008402824, -0.0016140446532517672, -0.013635288923978806, -0.01143971923738718, 0.01823393441736698, -0.007135600317269564, -0.027444615960121155, 0.009793042205274105, -0.003842246253043413, 0.005010315682739019, 0.002568749012425542, -0.031407348811626434, -0.024298526346683502, -0.01681484654545784, -0.017457453534007072, -0.004156854934990406, -0.0058738174848258495, -0.005709819030016661, -0.013749083504080772, 0.0015412494540214539, -0.0039694285951554775, -0.011379474774003029, 0.0008229201193898916, -0.03154122456908226, 0.003915878012776375, -0.01062976848334074, -0.01447201520204544, 0.003929265774786472, 0.014083773829042912, 0.0031527839601039886, -0.027605267241597176, 0.034031324088573456, 0.010335240513086319, 0.0022574870381504297, -0.010034019127488136, 0.02862272784113884, -0.015489473938941956, -0.027712369337677956, 0.007082049734890461, 0.026333443820476532, -0.02532937191426754, -0.035022005438804626, -0.011894898489117622, -0.0019261435372754931, 0.02105872333049774, -0.008581462316215038, -0.007644329685717821, 0.012671380303800106, 0.0033100885339081287, 0.011346005834639072, 0.02162100188434124, 0.022062793374061584, -0.004136773757636547, -0.012035468593239784, 0.03622689098119736, -0.006215201690793037, 0.015114620327949524, -0.004889827221632004, 0.020081426948308945, 0.011131804436445236, 0.0020683868788182735, -0.02579793892800808, -0.0028498892206698656, -0.007008417975157499, 0.0009229088900610805, -0.010930989868938923, 0.005920673720538616, -0.004856358282268047, 0.00017759510956238955, 0.026467319577932358, -0.00037213394534774125, -0.005351700354367495, -0.018059896305203438, -0.0018742665415629745, 0.009752878919243813, -0.0029636838007718325, 0.025838103145360947, -0.028167547658085823, 0.0019378577126190066, -0.02486080676317215, 0.023696083575487137, 0.02136663720011711, 0.023374781012535095, 0.00905672274529934, 0.028033671900629997, -0.00395604083314538, 0.02203601785004139, 0.005388516001403332, -0.02095162123441696, -0.006375852972269058, 0.04559822753071785, 0.026708297431468964, -0.011325924657285213, 0.0066201770678162575, -0.010676625184714794, 0.02611924149096012, 0.008481055498123169, -0.001496066222898662, -0.0014458626974374056, 0.006208507809787989, 0.004314159508794546, 0.04075190797448158, -0.019452208653092384, -0.04393815994262695, 0.011807878501713276, -0.010690012946724892, 0.008467667736113071, 0.011158579029142857, 0.02516872063279152, 0.0006961561157368124, 0.04795444756746292, 0.01780553162097931, 0.0019361842423677444, -0.0063959346152842045, -0.010683318600058556, 0.01942543312907219, -0.008969703689217567, 0.005017009563744068, 0.00013032008428126574, -0.013160028494894505, 0.03419197350740433, -0.020027875900268555, 0.0036983294412493706, -0.0006095549324527383, -0.027377678081393242, 0.01303284615278244, -0.004163548815995455, 0.016721133142709732, -0.002142018871381879, 0.01175432838499546, 0.0027545022312551737, 0.0029971529729664326, 0.020349178463220596, 0.018394585698843002, -0.007664411328732967, -0.004089917056262493, 0.01287888828665018, -0.020871296525001526, 0.0028080528136342764, -0.015087845735251904, 0.01289896946400404, 0.008494443260133266, -0.02266523614525795, -0.024740317836403847, 0.030148915946483612, -0.01875605247914791, 0.02255813591182232, 0.01729680225253105, 0.018314260989427567, 0.00771796191111207, 0.0032297628931701183, -0.004853011108934879, -0.020228689536452293, -0.03713725134730339, 0.026507483795285225, 0.013816021382808685, -0.008755501359701157, -0.021754879504442215, 0.004391138441860676], 'id': '0c906ab3-3786-477f-b13a-5a98367ceee6', '_distance': 0.4137815535068512}\n" + ] + } + ], + "source": [ + "print(docs[0].metadata)" + ] } ], "metadata": { @@ -215,7 +313,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/docs/docs/modules/data_connection/vectorstores/index.mdx b/docs/docs/modules/data_connection/vectorstores/index.mdx index b1242021a27ca..3b6d12699b8c5 100644 --- a/docs/docs/modules/data_connection/vectorstores/index.mdx +++ b/docs/docs/modules/data_connection/vectorstores/index.mdx @@ -131,7 +131,7 @@ table = db.create_table( raw_documents = TextLoader('../../../state_of_the_union.txt').load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) documents = text_splitter.split_documents(raw_documents) -db = LanceDB.from_documents(documents, OpenAIEmbeddings(), connection=table) +db = LanceDB.from_documents(documents, OpenAIEmbeddings()) ``` diff --git a/libs/community/langchain_community/vectorstores/lancedb.py b/libs/community/langchain_community/vectorstores/lancedb.py index 4ca68c92ca66b..414517793ee44 100644 --- a/libs/community/langchain_community/vectorstores/lancedb.py +++ b/libs/community/langchain_community/vectorstores/lancedb.py @@ -12,6 +12,18 @@ class LanceDB(VectorStore): """`LanceDB` vector store. To use, you should have ``lancedb`` python package installed. + You can install it with ``pip install lancedb``. + + Args: + connection: LanceDB connection to use. If not provided, a new connection + will be created. + embedding: Embedding to use for the vectorstore. + vector_key: Key to use for the vector in the database. Defaults to ``vector``. + id_key: Key to use for the id in the database. Defaults to ``id``. + text_key: Key to use for the text in the database. Defaults to ``text``. + table_name: Name of the table to use. Defaults to ``vectorstore``. + + Example: .. code-block:: python @@ -25,13 +37,14 @@ class LanceDB(VectorStore): def __init__( self, - connection: Any, - embedding: Embeddings, + connection: Optional[Any] = None, + embedding: Optional[Embeddings] = None, vector_key: Optional[str] = "vector", id_key: Optional[str] = "id", text_key: Optional[str] = "text", + table_name: Optional[str] = "vectorstore", ): - """Initialize with Lance DB connection""" + """Initialize with Lance DB vectorstore""" try: import lancedb except ImportError: @@ -39,19 +52,28 @@ def __init__( "Could not import lancedb python package. " "Please install it with `pip install lancedb`." ) - if not isinstance(connection, lancedb.db.LanceTable): - raise ValueError( - "connection should be an instance of lancedb.db.LanceTable, ", - f"got {type(connection)}", - ) - self._connection = connection + self.lancedb = lancedb self._embedding = embedding self._vector_key = vector_key self._id_key = id_key self._text_key = text_key + self._table_name = table_name + + if self._embedding is None: + raise ValueError("embedding should be provided") + + if connection is not None: + if not isinstance(connection, lancedb.db.LanceTable): + raise ValueError( + "connection should be an instance of lancedb.db.LanceTable, ", + f"got {type(connection)}", + ) + self._connection = connection + else: + self._connection = self._init_table() @property - def embeddings(self) -> Embeddings: + def embeddings(self) -> Optional[Embeddings]: return self._embedding def add_texts( @@ -74,7 +96,7 @@ def add_texts( # Embed texts and create documents docs = [] ids = ids or [str(uuid.uuid4()) for _ in texts] - embeddings = self._embedding.embed_documents(list(texts)) + embeddings = self._embedding.embed_documents(list(texts)) # type: ignore for idx, text in enumerate(texts): embedding = embeddings[idx] metadata = metadatas[idx] if metadatas else {} @@ -86,7 +108,6 @@ def add_texts( **metadata, } ) - self._connection.add(docs) return ids @@ -102,14 +123,23 @@ def similarity_search( Returns: List of documents most similar to the query. """ - embedding = self._embedding.embed_query(query) - docs = self._connection.search(embedding).limit(k).to_df() + embedding = self._embedding.embed_query(query) # type: ignore + docs = ( + self._connection.search(embedding, vector_column_name=self._vector_key) + .limit(k) + .to_arrow() + ) + columns = docs.schema.names return [ Document( - page_content=row[self._text_key], - metadata=row[docs.columns != self._text_key], + page_content=docs[self._text_key][idx].as_py(), + metadata={ + col: docs[col][idx].as_py() + for col in columns + if col != self._text_key + }, ) - for _, row in docs.iterrows() + for idx in range(len(docs)) ] @classmethod @@ -134,3 +164,23 @@ def from_texts( instance.add_texts(texts, metadatas=metadatas, **kwargs) return instance + + def _init_table(self) -> Any: + import pyarrow as pa + + schema = pa.schema( + [ + pa.field( + self._vector_key, + pa.list_( + pa.float32(), + len(self.embeddings.embed_query("test")), # type: ignore + ), + ), + pa.field(self._id_key, pa.string()), + pa.field(self._text_key, pa.string()), + ] + ) + db = self.lancedb.connect("/tmp/lancedb") + tbl = db.create_table(self._table_name, schema=schema, mode="overwrite") + return tbl diff --git a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py index 37098e221141d..bde46e800e116 100644 --- a/libs/community/tests/integration_tests/vectorstores/test_lancedb.py +++ b/libs/community/tests/integration_tests/vectorstores/test_lancedb.py @@ -1,8 +1,11 @@ +import pytest + from langchain_community.vectorstores import LanceDB from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings -def test_lancedb() -> None: +@pytest.mark.requires("lancedb") +def test_lancedb_with_connection() -> None: import lancedb embeddings = FakeEmbeddings() @@ -23,22 +26,23 @@ def test_lancedb() -> None: assert "text 1" in result_texts -def test_lancedb_add_texts() -> None: - import lancedb +@pytest.mark.requires("lancedb") +def test_lancedb_without_connection() -> None: + embeddings = FakeEmbeddings() + texts = ["text 1", "text 2", "item 3"] + + store = LanceDB(embedding=embeddings) + store.add_texts(texts) + result = store.similarity_search("text 1") + result_texts = [doc.page_content for doc in result] + assert "text 1" in result_texts + +@pytest.mark.requires("lancedb") +def test_lancedb_add_texts() -> None: embeddings = FakeEmbeddings() - db = lancedb.connect("/tmp/lancedb") - texts = ["text 1"] - vectors = embeddings.embed_documents(texts) - table = db.create_table( - "my_table", - data=[ - {"vector": vectors[idx], "id": text, "text": text} - for idx, text in enumerate(texts) - ], - mode="overwrite", - ) - store = LanceDB(table, embeddings) + + store = LanceDB(embedding=embeddings) store.add_texts(["text 2"]) result = store.similarity_search("text 2") result_texts = [doc.page_content for doc in result] From 8009be862ee2f3bd6728ce359ae78f9e58709d09 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:27:26 -0700 Subject: [PATCH 04/31] core[patch]: Release 0.1.24 (#17744) --- libs/core/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 7aeb592eca11a..b87a95dd7dd64 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" authors = [] license = "MIT" From 0d294760e742e0707a71afc7aad22e4d00b54ae5 Mon Sep 17 00:00:00 2001 From: Aymeric Roucher <69208727+aymeric-roucher@users.noreply.github.com> Date: Mon, 19 Feb 2024 19:33:15 +0100 Subject: [PATCH 05/31] Community: Fuse HuggingFace Endpoint-related classes into one (#17254) ## Description Fuse HuggingFace Endpoint-related classes into one: - [HuggingFaceHub](https://github.com/langchain-ai/langchain/blob/5ceaf784f324064b868a3cfed3fab7554173e7b3/libs/community/langchain_community/llms/huggingface_hub.py) - [HuggingFaceTextGenInference](https://github.com/langchain-ai/langchain/blob/5ceaf784f324064b868a3cfed3fab7554173e7b3/libs/community/langchain_community/llms/huggingface_text_gen_inference.py) - and [HuggingFaceEndpoint](https://github.com/langchain-ai/langchain/blob/5ceaf784f324064b868a3cfed3fab7554173e7b3/libs/community/langchain_community/llms/huggingface_endpoint.py) Are fused into - HuggingFaceEndpoint ## Issue The deduplication of classes was creating a lack of clarity, and additional effort to develop classes leads to issues like [this hack](https://github.com/langchain-ai/langchain/blob/5ceaf784f324064b868a3cfed3fab7554173e7b3/libs/community/langchain_community/llms/huggingface_endpoint.py#L159). ## Dependancies None, this removes dependancies. ## Twitter handle If you want to post about this: @AymericRoucher --------- Co-authored-by: Bagatur --- .../llms/huggingface_endpoint.ipynb | 238 +++++++++ .../integrations/llms/huggingface_hub.ipynb | 466 ------------------ .../llms/huggingface_textgen_inference.ipynb | 108 ---- docs/vercel.json | 8 + .../chat_models/huggingface.py | 27 +- .../llms/huggingface_endpoint.py | 389 +++++++++++---- .../llms/huggingface_hub.py | 3 + .../llms/huggingface_pipeline.py | 30 +- .../llms/huggingface_text_gen_inference.py | 3 + .../llms/test_huggingface_endpoint.py | 69 ++- 10 files changed, 614 insertions(+), 727 deletions(-) create mode 100644 docs/docs/integrations/llms/huggingface_endpoint.ipynb delete mode 100644 docs/docs/integrations/llms/huggingface_hub.ipynb delete mode 100644 docs/docs/integrations/llms/huggingface_textgen_inference.ipynb diff --git a/docs/docs/integrations/llms/huggingface_endpoint.ipynb b/docs/docs/integrations/llms/huggingface_endpoint.ipynb new file mode 100644 index 0000000000000..a71a987bac101 --- /dev/null +++ b/docs/docs/integrations/llms/huggingface_endpoint.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Huggingface Endpoints\n", + "\n", + ">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n", + "\n", + "The `Hugging Face Hub` also offers various endpoints to build ML applications.\n", + "This example showcases how to connect to the different Endpoints types.\n", + "\n", + "In particular, text generation inference is powered by [Text Generation Inference](https://github.com/huggingface/text-generation-inference): a custom-built Rust, Python and gRPC server for blazing-faset text generation inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.llms import HuggingFaceEndpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation and Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use, you should have the ``huggingface_hub`` python [package installed](https://huggingface.co/docs/huggingface_hub/installation)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet huggingface_hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token\n", + "\n", + "from getpass import getpass\n", + "\n", + "HUGGINGFACEHUB_API_TOKEN = getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = HUGGINGFACEHUB_API_TOKEN" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.llms import HuggingFaceEndpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import LLMChain\n", + "from langchain.prompts import PromptTemplate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"Who won the FIFA World Cup in the year 1994? \"\n", + "\n", + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate.from_template(template)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Examples\n", + "\n", + "Here is an example of how you can access `HuggingFaceEndpoint` integration of the free [Serverless Endpoints](https://huggingface.co/inference-endpoints/serverless) API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "repo_id = \"mistralai/Mistral-7B-Instruct-v0.2\"\n", + "\n", + "llm = HuggingFaceEndpoint(\n", + " repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN\n", + ")\n", + "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", + "print(llm_chain.run(question))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dedicated Endpoint\n", + "\n", + "\n", + "The free serverless API lets you implement solutions and iterate in no time, but it may be rate limited for heavy use cases, since the loads are shared with other requests.\n", + "\n", + "For enterprise workloads, the best is to use [Inference Endpoints - Dedicated](https://huggingface.co/inference-endpoints/dedicated).\n", + "This gives access to a fully managed infrastructure that offer more flexibility and speed. These resoucres come with continuous support and uptime guarantees, as well as options like AutoScaling\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set the url to your Inference Endpoint below\n", + "your_endpoint_url = \"https://fayjubiy2xqn36z0.us-east-1.aws.endpoints.huggingface.cloud\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = HuggingFaceEndpoint(\n", + " endpoint_url=f\"{your_endpoint_url}\",\n", + " max_new_tokens=512,\n", + " top_k=10,\n", + " top_p=0.95,\n", + " typical_p=0.95,\n", + " temperature=0.01,\n", + " repetition_penalty=1.03,\n", + ")\n", + "llm(\"What did foo say about bar?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "from langchain_community.llms import HuggingFaceEndpoint\n", + "\n", + "llm = HuggingFaceEndpoint(\n", + " endpoint_url=f\"{your_endpoint_url}\",\n", + " max_new_tokens=512,\n", + " top_k=10,\n", + " top_p=0.95,\n", + " typical_p=0.95,\n", + " temperature=0.01,\n", + " repetition_penalty=1.03,\n", + " streaming=True,\n", + ")\n", + "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "agents", + "language": "python", + "name": "agents" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + }, + "vscode": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/llms/huggingface_hub.ipynb b/docs/docs/integrations/llms/huggingface_hub.ipynb deleted file mode 100644 index 67dbe3c41f385..0000000000000 --- a/docs/docs/integrations/llms/huggingface_hub.ipynb +++ /dev/null @@ -1,466 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "959300d4", - "metadata": {}, - "source": [ - "# Hugging Face Hub\n", - "\n", - ">The [Hugging Face Hub](https://huggingface.co/docs/hub/index) is a platform with over 120k models, 20k datasets, and 50k demo apps (Spaces), all open source and publicly available, in an online platform where people can easily collaborate and build ML together.\n", - "\n", - "This example showcases how to connect to the `Hugging Face Hub` and use different models." - ] - }, - { - "cell_type": "markdown", - "id": "1ddafc6d-7d7c-48fa-838f-0e7f50895ce3", - "metadata": {}, - "source": [ - "## Installation and Setup" - ] - }, - { - "cell_type": "markdown", - "id": "4c1b8450-5eaf-4d34-8341-2d785448a1ff", - "metadata": { - "tags": [] - }, - "source": [ - "To use, you should have the ``huggingface_hub`` python [package installed](https://huggingface.co/docs/huggingface_hub/installation)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d772b637-de00-4663-bd77-9bc96d798db2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "%pip install --upgrade --quiet huggingface_hub" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d597a792-354c-4ca5-b483-5965eec5d63d", - "metadata": {}, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - " ········\n" - ] - } - ], - "source": [ - "# get a token: https://huggingface.co/docs/api-inference/quicktour#get-your-api-token\n", - "\n", - "from getpass import getpass\n", - "\n", - "HUGGINGFACEHUB_API_TOKEN = getpass()" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "b8c5b88c-e4b8-4d0d-9a35-6e8f106452c2", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"HUGGINGFACEHUB_API_TOKEN\"] = HUGGINGFACEHUB_API_TOKEN" - ] - }, - { - "cell_type": "markdown", - "id": "84dd44c1-c428-41f3-a911-520281386c94", - "metadata": {}, - "source": [ - "## Prepare Examples" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3fe7d1d1-241d-426a-acff-e208f1088871", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.llms import HuggingFaceHub" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "6620f39b-3d32-4840-8931-ff7d2c3e47e8", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.chains import LLMChain\n", - "from langchain.prompts import PromptTemplate" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "44adc1a0-9c0a-4f1e-af5a-fe04222e78d7", - "metadata": {}, - "outputs": [], - "source": [ - "question = \"Who won the FIFA World Cup in the year 1994? \"\n", - "\n", - "template = \"\"\"Question: {question}\n", - "\n", - "Answer: Let's think step by step.\"\"\"\n", - "\n", - "prompt = PromptTemplate.from_template(template)" - ] - }, - { - "cell_type": "markdown", - "id": "ddaa06cf-95ec-48ce-b0ab-d892a7909693", - "metadata": {}, - "source": [ - "## Examples\n", - "\n", - "Below are some examples of models you can access through the `Hugging Face Hub` integration." - ] - }, - { - "cell_type": "markdown", - "id": "4c16fded-70d1-42af-8bfa-6ddda9f0bc63", - "metadata": {}, - "source": [ - "### `Flan`, by `Google`" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "39c7eeac-01c4-486b-9480-e828a9e73e78", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "repo_id = \"google/flan-t5-xxl\" # See https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads for some other options" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "3acf0069", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The FIFA World Cup was held in the year 1994. West Germany won the FIFA World Cup in 1994\n" - ] - } - ], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "1a5c97af-89bc-4e59-95c1-223742a9160b", - "metadata": {}, - "source": [ - "### `Dolly`, by `Databricks`\n", - "\n", - "See [Databricks](https://huggingface.co/databricks) organization page for a list of available models." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "521fcd2b-8e38-4920-b407-5c7d330411c9", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"databricks/dolly-v2-3b\"" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "9907ec3a-fe0c-4543-81c4-d42f9453f16c", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " First of all, the world cup was won by the Germany. Then the Argentina won the world cup in 2022. So, the Argentina won the world cup in 1994.\n", - "\n", - "\n", - "Question: Who\n" - ] - } - ], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "03f6ae52-b5f9-4de6-832c-551cb3fa11ae", - "metadata": {}, - "source": [ - "### `Camel`, by `Writer`\n", - "\n", - "See [Writer's](https://huggingface.co/Writer) organization page for a list of available models." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "257a091d-750b-4910-ac08-fe1c7b3fd98b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "repo_id = \"Writer/camel-5b-hf\" # See https://huggingface.co/Writer for other options" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b06f6838-a11a-4d6a-88e3-91fa1747a2b3", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "2bf838eb-1083-402f-b099-b07c452418c8", - "metadata": {}, - "source": [ - "### `XGen`, by `Salesforce`\n", - "\n", - "See [more information](https://github.com/salesforce/xgen)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "18c78880-65d7-41d0-9722-18090efb60e9", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"Salesforce/xgen-7b-8k-base\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b1150b4-ec30-4674-849e-6a41b085aa2b", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "0aca9f9e-f333-449c-97b2-10d1dbf17e75", - "metadata": {}, - "source": [ - "### `Falcon`, by `Technology Innovation Institute (TII)`\n", - "\n", - "See [more information](https://huggingface.co/tiiuae/falcon-40b)." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "496b35ac-5ee2-4b68-a6ce-232608f56c03", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"tiiuae/falcon-40b\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ff2541ad-e394-4179-93c2-7ae9c4ca2a25", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"temperature\": 0.5, \"max_length\": 64}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "7e15849b-5561-4bb9-86ec-6412ca10196a", - "metadata": {}, - "source": [ - "### `InternLM-Chat`, by `Shanghai AI Laboratory`\n", - "\n", - "See [more information](https://huggingface.co/internlm/internlm-7b)." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "3b533461-59f8-406e-907b-000841fa60a7", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"internlm/internlm-chat-7b\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c71210b9-5895-41a2-889a-f430d22fa1aa", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.8}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "4f2e5132-1713-42d7-919a-8c313744ce95", - "metadata": {}, - "source": [ - "### `Qwen`, by `Alibaba Cloud`\n", - "\n", - ">`Tongyi Qianwen-7B` (`Qwen-7B`) is a model with a scale of 7 billion parameters in the `Tongyi Qianwen` large model series developed by `Alibaba Cloud`. `Qwen-7B` is a large language model based on Transformer, which is trained on ultra-large-scale pre-training data.\n", - "\n", - "See [more information on HuggingFace](https://huggingface.co/Qwen/Qwen-7B) of on [GitHub](https://github.com/QwenLM/Qwen-7B).\n", - "\n", - "See here a [big example for LangChain integration and Qwen](https://github.com/QwenLM/Qwen-7B/blob/main/examples/langchain_tooluse.ipynb)." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "f598b1ca-77c7-40f1-a83f-c21ea9910c88", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"Qwen/Qwen-7B\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2c97f4e2-d401-44fb-9da7-b60b2e2cc663", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.5}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "markdown", - "id": "e3871376-ed0e-49a8-8d9b-7e60dbbd2b35", - "metadata": {}, - "source": [ - "### `Yi` series models, by `01.ai`\n", - "\n", - ">The `Yi` series models are large language models trained from scratch by developers at [01.ai](https://01.ai/). The first public release contains two bilingual(English/Chinese) base models with the parameter sizes of 6B(`Yi-6B`) and 34B(`Yi-34B`). Both of them are trained with 4K sequence length and can be extended to 32K during inference time. The `Yi-6B-200K` and `Yi-34B-200K` are base model with 200K context length.\n", - "\n", - "Here we test the [Yi-34B](https://huggingface.co/01-ai/Yi-34B) model." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "1c9d3125-3f50-48b8-93b6-b50847207afa", - "metadata": {}, - "outputs": [], - "source": [ - "repo_id = \"01-ai/Yi-34B\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8b661069-8229-4850-9f13-c4ca28c0c96b", - "metadata": {}, - "outputs": [], - "source": [ - "llm = HuggingFaceHub(\n", - " repo_id=repo_id, model_kwargs={\"max_length\": 128, \"temperature\": 0.5}\n", - ")\n", - "llm_chain = LLMChain(prompt=prompt, llm=llm)\n", - "print(llm_chain.run(question))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd6f3edc-9f97-47a6-ab2c-116756babbe6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb b/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb deleted file mode 100644 index e9b5e31c38600..0000000000000 --- a/docs/docs/integrations/llms/huggingface_textgen_inference.ipynb +++ /dev/null @@ -1,108 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Huggingface TextGen Inference\n", - "\n", - "[Text Generation Inference](https://github.com/huggingface/text-generation-inference) is a Rust, Python and gRPC server for text generation inference. Used in production at [HuggingFace](https://huggingface.co/) to power LLMs api-inference widgets.\n", - "\n", - "This notebooks goes over how to use a self hosted LLM using `Text Generation Inference`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To use, you should have the `text_generation` python package installed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# !pip3 install text_generation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain_community.llms import HuggingFaceTextGenInference\n", - "\n", - "llm = HuggingFaceTextGenInference(\n", - " inference_server_url=\"http://localhost:8010/\",\n", - " max_new_tokens=512,\n", - " top_k=10,\n", - " top_p=0.95,\n", - " typical_p=0.95,\n", - " temperature=0.01,\n", - " repetition_penalty=1.03,\n", - ")\n", - "llm(\"What did foo say about bar?\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Streaming" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", - "from langchain_community.llms import HuggingFaceTextGenInference\n", - "\n", - "llm = HuggingFaceTextGenInference(\n", - " inference_server_url=\"http://localhost:8010/\",\n", - " max_new_tokens=512,\n", - " top_k=10,\n", - " top_p=0.95,\n", - " typical_p=0.95,\n", - " temperature=0.01,\n", - " repetition_penalty=1.03,\n", - " streaming=True,\n", - ")\n", - "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - }, - "vscode": { - "interpreter": { - "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" - } - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/vercel.json b/docs/vercel.json index 65a9861a4d531..9a1ab9d8abdc4 100644 --- a/docs/vercel.json +++ b/docs/vercel.json @@ -1,5 +1,13 @@ { "redirects": [ + { + "source": "/docs/integrations/llms/huggingface_textgen_inference", + "destination": "/docs/integrations/llms/huggingface_endpoint" + }, + { + "source": "/docs/integrations/llms/huggingface_hub", + "destination": "/docs/integrations/llms/huggingface_endpoint" + }, { "source": "/docs/integrations/llms/watsonxllm", "destination": "/docs/integrations/llms/ibm_watsonx" diff --git a/libs/community/langchain_community/chat_models/huggingface.py b/libs/community/langchain_community/chat_models/huggingface.py index 0af34a8cf04b2..143aff07172d1 100644 --- a/libs/community/langchain_community/chat_models/huggingface.py +++ b/libs/community/langchain_community/chat_models/huggingface.py @@ -1,4 +1,5 @@ """Hugging Face Chat Wrapper.""" + from typing import Any, List, Optional, Union from langchain_core.callbacks.manager import ( @@ -52,6 +53,7 @@ def __init__(self, **kwargs: Any): from transformers import AutoTokenizer self._resolve_model_id() + self.tokenizer = ( AutoTokenizer.from_pretrained(self.model_id) if self.tokenizer is None @@ -90,10 +92,10 @@ def _to_chat_prompt( ) -> str: """Convert a list of messages into a prompt format expected by wrapped LLM.""" if not messages: - raise ValueError("at least one HumanMessage must be provided") + raise ValueError("At least one HumanMessage must be provided!") if not isinstance(messages[-1], HumanMessage): - raise ValueError("last message must be a HumanMessage") + raise ValueError("Last message must be a HumanMessage!") messages_dicts = [self._to_chatml_format(m) for m in messages] @@ -135,20 +137,15 @@ def _resolve_model_id(self) -> None: from huggingface_hub import list_inference_endpoints available_endpoints = list_inference_endpoints("*") - - if isinstance(self.llm, HuggingFaceTextGenInference): - endpoint_url = self.llm.inference_server_url - - elif isinstance(self.llm, HuggingFaceEndpoint): - endpoint_url = self.llm.endpoint_url - - elif isinstance(self.llm, HuggingFaceHub): - # no need to look up model_id for HuggingFaceHub LLM + if isinstance(self.llm, HuggingFaceHub) or ( + hasattr(self.llm, "repo_id") and self.llm.repo_id + ): self.model_id = self.llm.repo_id return - + elif isinstance(self.llm, HuggingFaceTextGenInference): + endpoint_url: Optional[str] = self.llm.inference_server_url else: - raise ValueError(f"Unknown LLM type: {type(self.llm)}") + endpoint_url = self.llm.endpoint_url for endpoint in available_endpoints: if endpoint.url == endpoint_url: @@ -156,8 +153,8 @@ def _resolve_model_id(self) -> None: if not self.model_id: raise ValueError( - "Failed to resolve model_id" - f"Could not find model id for inference server provided: {endpoint_url}" + "Failed to resolve model_id:" + f"Could not find model id for inference server: {endpoint_url}" "Make sure that your Hugging Face token has access to the endpoint." ) diff --git a/libs/community/langchain_community/llms/huggingface_endpoint.py b/libs/community/langchain_community/llms/huggingface_endpoint.py index c14b2e24a8050..df25bf367e8b4 100644 --- a/libs/community/langchain_community/llms/huggingface_endpoint.py +++ b/libs/community/langchain_community/llms/huggingface_endpoint.py @@ -1,12 +1,17 @@ -from typing import Any, Dict, List, Mapping, Optional +import json +import logging +from typing import Any, AsyncIterator, Dict, Iterator, List, Mapping, Optional -import requests -from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) from langchain_core.language_models.llms import LLM -from langchain_core.pydantic_v1 import Extra, root_validator -from langchain_core.utils import get_from_dict_or_env +from langchain_core.outputs import GenerationChunk +from langchain_core.pydantic_v1 import Extra, Field, root_validator +from langchain_core.utils import get_from_dict_or_env, get_pydantic_field_names -from langchain_community.llms.utils import enforce_stop_tokens +logger = logging.getLogger(__name__) VALID_TASKS = ( "text2text-generation", @@ -17,70 +22,198 @@ class HuggingFaceEndpoint(LLM): - """HuggingFace Endpoint models. - - To use, you should have the ``huggingface_hub`` python package installed, and the - environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass - it as a named parameter to the constructor. + """ + HuggingFace Endpoint. - Only supports `text-generation` and `text2text-generation` for now. + To use this class, you should have installed the ``huggingface_hub`` package, and + the environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, + or given as a named parameter to the constructor. Example: .. code-block:: python - from langchain_community.llms import HuggingFaceEndpoint - endpoint_url = ( - "https://abcdefghijklmnop.us-east-1.aws.endpoints.huggingface.cloud" + # Basic Example (no streaming) + llm = HuggingFaceEndpoint( + endpoint_url="http://localhost:8010/", + max_new_tokens=512, + top_k=10, + top_p=0.95, + typical_p=0.95, + temperature=0.01, + repetition_penalty=1.03, + huggingfacehub_api_token="my-api-key" ) - hf = HuggingFaceEndpoint( - endpoint_url=endpoint_url, + print(llm("What is Deep Learning?")) + + # Streaming response example + from langchain_community.callbacks import streaming_stdout + + callbacks = [streaming_stdout.StreamingStdOutCallbackHandler()] + llm = HuggingFaceEndpoint( + endpoint_url="http://localhost:8010/", + max_new_tokens=512, + top_k=10, + top_p=0.95, + typical_p=0.95, + temperature=0.01, + repetition_penalty=1.03, + callbacks=callbacks, + streaming=True, huggingfacehub_api_token="my-api-key" ) + print(llm("What is Deep Learning?")) + """ - endpoint_url: str = "" + endpoint_url: Optional[str] = None """Endpoint URL to use.""" + repo_id: Optional[str] = None + """Repo to use.""" + huggingfacehub_api_token: Optional[str] = None + max_new_tokens: int = 512 + """Maximum number of generated tokens""" + top_k: Optional[int] = None + """The number of highest probability vocabulary tokens to keep for + top-k-filtering.""" + top_p: Optional[float] = 0.95 + """If set to < 1, only the smallest set of most probable tokens with probabilities + that add up to `top_p` or higher are kept for generation.""" + typical_p: Optional[float] = 0.95 + """Typical Decoding mass. See [Typical Decoding for Natural Language + Generation](https://arxiv.org/abs/2202.00666) for more information.""" + temperature: Optional[float] = 0.8 + """The value used to module the logits distribution.""" + repetition_penalty: Optional[float] = None + """The parameter for repetition penalty. 1.0 means no penalty. + See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.""" + return_full_text: bool = False + """Whether to prepend the prompt to the generated text""" + truncate: Optional[int] = None + """Truncate inputs tokens to the given size""" + stop_sequences: List[str] = Field(default_factory=list) + """Stop generating tokens if a member of `stop_sequences` is generated""" + seed: Optional[int] = None + """Random sampling seed""" + inference_server_url: str = "" + """text-generation-inference instance base url""" + timeout: int = 120 + """Timeout in seconds""" + streaming: bool = False + """Whether to generate a stream of tokens asynchronously""" + do_sample: bool = False + """Activate logits sampling""" + watermark: bool = False + """Watermarking with [A Watermark for Large Language Models] + (https://arxiv.org/abs/2301.10226)""" + server_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any text-generation-inference server parameters not explicitly specified""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `call` not explicitly specified""" + model: str + client: Any + async_client: Any task: Optional[str] = None """Task to call the model with. Should be a task that returns `generated_text` or `summary_text`.""" - model_kwargs: Optional[dict] = None - """Keyword arguments to pass to the model.""" - - huggingfacehub_api_token: Optional[str] = None class Config: """Configuration for this pydantic object.""" extra = Extra.forbid + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = get_pydantic_field_names(cls) + extra = values.get("model_kwargs", {}) + for field_name in list(values): + if field_name in extra: + raise ValueError(f"Found {field_name} supplied twice.") + if field_name not in all_required_field_names: + logger.warning( + f"""WARNING! {field_name} is not default parameter. + {field_name} was transferred to model_kwargs. + Please make sure that {field_name} is what you intended.""" + ) + extra[field_name] = values.pop(field_name) + + invalid_model_kwargs = all_required_field_names.intersection(extra.keys()) + if invalid_model_kwargs: + raise ValueError( + f"Parameters {invalid_model_kwargs} should be specified explicitly. " + f"Instead they were passed in as part of `model_kwargs` parameter." + ) + + values["model_kwargs"] = extra + if "endpoint_url" not in values and "repo_id" not in values: + raise ValueError( + "Please specify an `endpoint_url` or `repo_id` for the model." + ) + if "endpoint_url" in values and "repo_id" in values: + raise ValueError( + "Please specify either an `endpoint_url` OR a `repo_id`, not both." + ) + values["model"] = values.get("endpoint_url") or values.get("repo_id") + return values + @root_validator() def validate_environment(cls, values: Dict) -> Dict: - """Validate that api key and python package exists in environment.""" - huggingfacehub_api_token = get_from_dict_or_env( - values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" - ) + """Validate that package is installed and that the API token is valid.""" try: - from huggingface_hub.hf_api import HfApi - - try: - HfApi( - endpoint="https://huggingface.co", # Can be a Private Hub endpoint. - token=huggingfacehub_api_token, - ).whoami() - except Exception as e: - raise ValueError( - "Could not authenticate with huggingface_hub. " - "Please check your API token." - ) from e + from huggingface_hub import login except ImportError: raise ImportError( "Could not import huggingface_hub python package. " "Please install it with `pip install huggingface_hub`." ) - values["huggingfacehub_api_token"] = huggingfacehub_api_token + try: + huggingfacehub_api_token = get_from_dict_or_env( + values, "huggingfacehub_api_token", "HUGGINGFACEHUB_API_TOKEN" + ) + login(token=huggingfacehub_api_token) + except Exception as e: + raise ValueError( + "Could not authenticate with huggingface_hub. " + "Please check your API token." + ) from e + + from huggingface_hub import AsyncInferenceClient, InferenceClient + + values["client"] = InferenceClient( + model=values["model"], + timeout=values["timeout"], + token=huggingfacehub_api_token, + **values["server_kwargs"], + ) + values["async_client"] = AsyncInferenceClient( + model=values["model"], + timeout=values["timeout"], + token=huggingfacehub_api_token, + **values["server_kwargs"], + ) + return values + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling text generation inference API.""" + return { + "max_new_tokens": self.max_new_tokens, + "top_k": self.top_k, + "top_p": self.top_p, + "typical_p": self.typical_p, + "temperature": self.temperature, + "repetition_penalty": self.repetition_penalty, + "return_full_text": self.return_full_text, + "truncate": self.truncate, + "stop_sequences": self.stop_sequences, + "seed": self.seed, + "do_sample": self.do_sample, + "watermark": self.watermark, + **self.model_kwargs, + } + @property def _identifying_params(self) -> Mapping[str, Any]: """Get the identifying parameters.""" @@ -95,6 +228,13 @@ def _llm_type(self) -> str: """Return type of llm.""" return "huggingface_endpoint" + def _invocation_params( + self, runtime_stop: Optional[List[str]], **kwargs: Any + ) -> Dict[str, Any]: + params = {**self._default_params, **kwargs} + params["stop_sequences"] = params["stop_sequences"] + (runtime_stop or []) + return params + def _call( self, prompt: str, @@ -102,62 +242,129 @@ def _call( run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> str: - """Call out to HuggingFace Hub's inference endpoint. + """Call out to HuggingFace Hub's inference endpoint.""" + invocation_params = self._invocation_params(stop, **kwargs) + if self.streaming: + completion = "" + for chunk in self._stream(prompt, stop, run_manager, **invocation_params): + completion += chunk.text + return completion + else: + invocation_params["stop"] = invocation_params[ + "stop_sequences" + ] # porting 'stop_sequences' into the 'stop' argument + response = self.client.post( + json={"inputs": prompt, "parameters": invocation_params}, + stream=False, + task=self.task, + ) + response_text = json.loads(response.decode())[0]["generated_text"] - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. + # Maybe the generation has stopped at one of the stop sequences: + # then we remove this stop sequence from the end of the generated text + for stop_seq in invocation_params["stop_sequences"]: + if response_text[-len(stop_seq) :] == stop_seq: + response_text = response_text[: -len(stop_seq)] + return response_text - Returns: - The string generated by the model. + async def _acall( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + invocation_params = self._invocation_params(stop, **kwargs) + if self.streaming: + completion = "" + async for chunk in self._astream( + prompt, stop, run_manager, **invocation_params + ): + completion += chunk.text + return completion + else: + invocation_params["stop"] = invocation_params["stop_sequences"] + response = await self.async_client.post( + json={"inputs": prompt, "parameters": invocation_params}, + stream=False, + task=self.task, + ) + response_text = json.loads(response.decode())[0]["generated_text"] - Example: - .. code-block:: python + # Maybe the generation has stopped at one of the stop sequences: + # then remove this stop sequence from the end of the generated text + for stop_seq in invocation_params["stop_sequences"]: + if response_text[-len(stop_seq) :] == stop_seq: + response_text = response_text[: -len(stop_seq)] + return response_text - response = hf("Tell me a joke.") - """ - _model_kwargs = self.model_kwargs or {} + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + invocation_params = self._invocation_params(stop, **kwargs) - # payload samples - params = {**_model_kwargs, **kwargs} - parameter_payload = {"inputs": prompt, "parameters": params} + for response in self.client.text_generation( + prompt, **invocation_params, stream=True + ): + # identify stop sequence in generated text, if any + stop_seq_found: Optional[str] = None + for stop_seq in invocation_params["stop_sequences"]: + if stop_seq in response: + stop_seq_found = stop_seq - # HTTP headers for authorization - headers = { - "Authorization": f"Bearer {self.huggingfacehub_api_token}", - "Content-Type": "application/json", - } + # identify text to yield + text: Optional[str] = None + if stop_seq_found: + text = response[: response.index(stop_seq_found)] + else: + text = response - # send request - try: - response = requests.post( - self.endpoint_url, headers=headers, json=parameter_payload - ) - except requests.exceptions.RequestException as e: # This is the correct syntax - raise ValueError(f"Error raised by inference endpoint: {e}") - generated_text = response.json() - if "error" in generated_text: - raise ValueError( - f"Error raised by inference API: {generated_text['error']}" - ) - if self.task == "text-generation": - text = generated_text[0]["generated_text"] - # Remove prompt if included in generated text. - if text.startswith(prompt): - text = text[len(prompt) :] - elif self.task == "text2text-generation": - text = generated_text[0]["generated_text"] - elif self.task == "summarization": - text = generated_text[0]["summary_text"] - elif self.task == "conversational": - text = generated_text["response"][1] - else: - raise ValueError( - f"Got invalid task {self.task}, " - f"currently only {VALID_TASKS} are supported" - ) - if stop is not None: - # This is a bit hacky, but I can't figure out a better way to enforce - # stop tokens when making calls to huggingface_hub. - text = enforce_stop_tokens(text, stop) - return text + # yield text, if any + if text: + chunk = GenerationChunk(text=text) + yield chunk + if run_manager: + run_manager.on_llm_new_token(chunk.text) + + # break if stop sequence found + if stop_seq_found: + break + + async def _astream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[GenerationChunk]: + invocation_params = self._invocation_params(stop, **kwargs) + async for response in await self.async_client.text_generation( + prompt, **invocation_params, stream=True + ): + # identify stop sequence in generated text, if any + stop_seq_found: Optional[str] = None + for stop_seq in invocation_params["stop_sequences"]: + if stop_seq in response: + stop_seq_found = stop_seq + + # identify text to yield + text: Optional[str] = None + if stop_seq_found: + text = response[: response.index(stop_seq_found)] + else: + text = response + + # yield text, if any + if text: + chunk = GenerationChunk(text=text) + yield chunk + if run_manager: + await run_manager.on_llm_new_token(chunk.text) + + # break if stop sequence found + if stop_seq_found: + break diff --git a/libs/community/langchain_community/llms/huggingface_hub.py b/libs/community/langchain_community/llms/huggingface_hub.py index 2d91231775231..2a5deaf766d61 100644 --- a/libs/community/langchain_community/llms/huggingface_hub.py +++ b/libs/community/langchain_community/llms/huggingface_hub.py @@ -1,6 +1,7 @@ import json from typing import Any, Dict, List, Mapping, Optional +from langchain_core._api.deprecation import deprecated from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_models.llms import LLM from langchain_core.pydantic_v1 import Extra, root_validator @@ -19,8 +20,10 @@ } +@deprecated("0.0.21", removal="0.2.0", alternative="HuggingFaceEndpoint") class HuggingFaceHub(LLM): """HuggingFaceHub models. + ! This class is deprecated, you should use HuggingFaceEndpoint instead. To use, you should have the ``huggingface_hub`` python package installed, and the environment variable ``HUGGINGFACEHUB_API_TOKEN`` set with your API token, or pass diff --git a/libs/community/langchain_community/llms/huggingface_pipeline.py b/libs/community/langchain_community/llms/huggingface_pipeline.py index 388ba117c25ec..7a2b915054fb5 100644 --- a/libs/community/langchain_community/llms/huggingface_pipeline.py +++ b/libs/community/langchain_community/llms/huggingface_pipeline.py @@ -9,8 +9,6 @@ from langchain_core.outputs import Generation, LLMResult from langchain_core.pydantic_v1 import Extra -from langchain_community.llms.utils import enforce_stop_tokens - DEFAULT_MODEL_ID = "gpt2" DEFAULT_TASK = "text-generation" VALID_TASKS = ("text2text-generation", "text-generation", "summarization") @@ -201,7 +199,12 @@ def _generate( batch_prompts = prompts[i : i + self.batch_size] # Process batch of prompts - responses = self.pipeline(batch_prompts, **pipeline_kwargs) + responses = self.pipeline( + batch_prompts, + stop_sequence=stop, + return_full_text=False, + **pipeline_kwargs, + ) # Process each response in the batch for j, response in enumerate(responses): @@ -210,23 +213,7 @@ def _generate( response = response[0] if self.pipeline.task == "text-generation": - try: - from transformers.pipelines.text_generation import ReturnType - - remove_prompt = ( - self.pipeline._postprocess_params.get("return_type") - != ReturnType.NEW_TEXT - ) - except Exception as e: - logger.warning( - f"Unable to extract pipeline return_type. " - f"Received error:\n\n{e}" - ) - remove_prompt = True - if remove_prompt: - text = response["generated_text"][len(batch_prompts[j]) :] - else: - text = response["generated_text"] + text = response["generated_text"] elif self.pipeline.task == "text2text-generation": text = response["generated_text"] elif self.pipeline.task == "summarization": @@ -236,9 +223,6 @@ def _generate( f"Got invalid task {self.pipeline.task}, " f"currently only {VALID_TASKS} are supported" ) - if stop: - # Enforce stop tokens - text = enforce_stop_tokens(text, stop) # Append the processed text to results text_generations.append(text) diff --git a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py index e03b6f7adcf83..9f56a949c6b94 100644 --- a/libs/community/langchain_community/llms/huggingface_text_gen_inference.py +++ b/libs/community/langchain_community/llms/huggingface_text_gen_inference.py @@ -1,6 +1,7 @@ import logging from typing import Any, AsyncIterator, Dict, Iterator, List, Optional +from langchain_core._api.deprecation import deprecated from langchain_core.callbacks import ( AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun, @@ -13,9 +14,11 @@ logger = logging.getLogger(__name__) +@deprecated("0.0.21", removal="0.2.0", alternative="HuggingFaceEndpoint") class HuggingFaceTextGenInference(LLM): """ HuggingFace text generation API. + ! This class is deprecated, you should use HuggingFaceEndpoint instead ! To use, you should have the `text-generation` python package installed and a text-generation server running. diff --git a/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py b/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py index ca89d54cde775..11af7df374269 100644 --- a/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py +++ b/libs/community/tests/integration_tests/llms/test_huggingface_endpoint.py @@ -1,6 +1,5 @@ -"""Test HuggingFace API wrapper.""" +"""Test HuggingFace Endpoints.""" -import unittest from pathlib import Path import pytest @@ -10,51 +9,73 @@ from tests.integration_tests.llms.utils import assert_llm_equality -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_text_generation() -> None: - """Test valid call to HuggingFace text generation model.""" +def test_huggingface_endpoint_call_error() -> None: + """Test valid call to HuggingFace that errors.""" + llm = HuggingFaceEndpoint(endpoint_url="", model_kwargs={"max_new_tokens": -1}) + with pytest.raises(ValueError): + llm("Say foo:") + + +def test_saving_loading_endpoint_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceHub LLM.""" llm = HuggingFaceEndpoint( endpoint_url="", task="text-generation", model_kwargs={"max_new_tokens": 10} ) + llm.save(file_path=tmp_path / "hf.yaml") + loaded_llm = load_llm(tmp_path / "hf.yaml") + assert_llm_equality(llm, loaded_llm) + + +def test_huggingface_text_generation() -> None: + """Test valid call to HuggingFace text generation model.""" + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": 10}) output = llm("Say foo:") print(output) # noqa: T201 assert isinstance(output, str) -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_text2text_generation() -> None: +def test_huggingface_text2text_generation() -> None: """Test valid call to HuggingFace text2text model.""" - llm = HuggingFaceEndpoint(endpoint_url="", task="text2text-generation") + llm = HuggingFaceEndpoint(repo_id="google/flan-t5-xl") output = llm("The capital of New York is") assert output == "Albany" -@unittest.skip( - "This test requires an inference endpoint. Tested with Hugging Face endpoints" -) -def test_huggingface_endpoint_summarization() -> None: +def test_huggingface_summarization() -> None: """Test valid call to HuggingFace summarization model.""" - llm = HuggingFaceEndpoint(endpoint_url="", task="summarization") + llm = HuggingFaceEndpoint(repo_id="facebook/bart-large-cnn") output = llm("Say foo:") assert isinstance(output, str) -def test_huggingface_endpoint_call_error() -> None: +def test_huggingface_call_error() -> None: """Test valid call to HuggingFace that errors.""" - llm = HuggingFaceEndpoint(model_kwargs={"max_new_tokens": -1}) + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": -1}) with pytest.raises(ValueError): llm("Say foo:") -def test_saving_loading_endpoint_llm(tmp_path: Path) -> None: - """Test saving/loading an HuggingFaceHub LLM.""" - llm = HuggingFaceEndpoint( - endpoint_url="", task="text-generation", model_kwargs={"max_new_tokens": 10} - ) +def test_saving_loading_llm(tmp_path: Path) -> None: + """Test saving/loading an HuggingFaceEndpoint LLM.""" + llm = HuggingFaceEndpoint(repo_id="gpt2", model_kwargs={"max_new_tokens": 10}) llm.save(file_path=tmp_path / "hf.yaml") loaded_llm = load_llm(tmp_path / "hf.yaml") assert_llm_equality(llm, loaded_llm) + + +def test_invocation_params_stop_sequences() -> None: + llm = HuggingFaceEndpoint() + assert llm._default_params["stop_sequences"] == [] + + runtime_stop = None + assert llm._invocation_params(runtime_stop)["stop_sequences"] == [] + assert llm._default_params["stop_sequences"] == [] + + runtime_stop = ["stop"] + assert llm._invocation_params(runtime_stop)["stop_sequences"] == ["stop"] + assert llm._default_params["stop_sequences"] == [] + + llm = HuggingFaceEndpoint(stop_sequences=["."]) + runtime_stop = ["stop"] + assert llm._invocation_params(runtime_stop)["stop_sequences"] == [".", "stop"] + assert llm._default_params["stop_sequences"] == ["."] From 86ae48b78152d9240d7c67e60a197cc3a99fc12f Mon Sep 17 00:00:00 2001 From: Pranav Agarwal <119924780+pranava-amzn@users.noreply.github.com> Date: Mon, 19 Feb 2024 10:36:37 -0800 Subject: [PATCH 06/31] experimental[minor]: Amazon Personalize support (#17436) ## Amazon Personalize support on Langchain This PR is a successor to this PR - https://github.com/langchain-ai/langchain/pull/13216 This PR introduces an integration with [Amazon Personalize](https://aws.amazon.com/personalize/) to help you to retrieve recommendations and use them in your natural language applications. This integration provides two new components: 1. An `AmazonPersonalize` client, that provides a wrapper around the Amazon Personalize API. 2. An `AmazonPersonalizeChain`, that provides a chain to pull in recommendations using the client, and then generating the response in natural language. We have added this to langchain_experimental since there was feedback from the previous PR about having this support in experimental rather than the core or community extensions. Here is some sample code to explain the usage. ```python from langchain_experimental.recommenders import AmazonPersonalize from langchain_experimental.recommenders import AmazonPersonalizeChain from langchain.llms.bedrock import Bedrock recommender_arn = "" client=AmazonPersonalize( credentials_profile_name="default", region_name="us-west-2", recommender_arn=recommender_arn ) bedrock_llm = Bedrock( model_id="anthropic.claude-v2", region_name="us-west-2" ) chain = AmazonPersonalizeChain.from_llm( llm=bedrock_llm, client=client ) response = chain({'user_id': '1'}) ``` Reviewer: @3coins --- cookbook/amazon_personalize_how_to.ipynb | 284 ++++++++++++++++++ .../recommenders/__init__.py | 7 + .../recommenders/amazon_personalize.py | 195 ++++++++++++ .../recommenders/amazon_personalize_chain.py | 192 ++++++++++++ 4 files changed, 678 insertions(+) create mode 100644 cookbook/amazon_personalize_how_to.ipynb create mode 100644 libs/experimental/langchain_experimental/recommenders/__init__.py create mode 100644 libs/experimental/langchain_experimental/recommenders/amazon_personalize.py create mode 100644 libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py diff --git a/cookbook/amazon_personalize_how_to.ipynb b/cookbook/amazon_personalize_how_to.ipynb new file mode 100644 index 0000000000000..7555e39d89494 --- /dev/null +++ b/cookbook/amazon_personalize_how_to.ipynb @@ -0,0 +1,284 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Amazon Personalize\n", + "\n", + "[Amazon Personalize](https://docs.aws.amazon.com/personalize/latest/dg/what-is-personalize.html) is a fully managed machine learning service that uses your data to generate item recommendations for your users. It can also generate user segments based on the users' affinity for certain items or item metadata.\n", + "\n", + "This notebook goes through how to use Amazon Personalize Chain. You need a Amazon Personalize campaign_arn or a recommender_arn before you get started with the below notebook.\n", + "\n", + "Following is a [tutorial](https://github.com/aws-samples/retail-demo-store/blob/master/workshop/1-Personalization/Lab-1-Introduction-and-data-preparation.ipynb) to setup a campaign_arn/recommender_arn on Amazon Personalize. Once the campaign_arn/recommender_arn is setup, you can use it in the langchain ecosystem. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Install Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!pip install boto3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Sample Use-cases" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.1 [Use-case-1] Setup Amazon Personalize Client and retrieve recommendations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_experimental.recommenders import AmazonPersonalize\n", + "\n", + "recommender_arn = \"\"\n", + "\n", + "client = AmazonPersonalize(\n", + " credentials_profile_name=\"default\",\n", + " region_name=\"us-west-2\",\n", + " recommender_arn=recommender_arn,\n", + ")\n", + "client.get_recommendations(user_id=\"1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.2 [Use-case-2] Invoke Personalize Chain for summarizing results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "from langchain.llms.bedrock import Bedrock\n", + "from langchain_experimental.recommenders import AmazonPersonalizeChain\n", + "\n", + "bedrock_llm = Bedrock(model_id=\"anthropic.claude-v2\", region_name=\"us-west-2\")\n", + "\n", + "# Create personalize chain\n", + "# Use return_direct=True if you do not want summary\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False\n", + ")\n", + "response = chain({\"user_id\": \"1\"})\n", + "print(response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.3 [Use-Case-3] Invoke Amazon Personalize Chain using your own prompt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.prompts.prompt import PromptTemplate\n", + "\n", + "RANDOM_PROMPT_QUERY = \"\"\"\n", + "You are a skilled publicist. Write a high-converting marketing email advertising several movies available in a video-on-demand streaming platform next week, \n", + " given the movie and user information below. Your email will leverage the power of storytelling and persuasive language. \n", + " The movies to recommend and their information is contained in the tag. \n", + " All movies in the tag must be recommended. Give a summary of the movies and why the human should watch them. \n", + " Put the email between tags.\n", + "\n", + " \n", + " {result} \n", + " \n", + "\n", + " Assistant:\n", + " \"\"\"\n", + "\n", + "RANDOM_PROMPT = PromptTemplate(input_variables=[\"result\"], template=RANDOM_PROMPT_QUERY)\n", + "\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False, prompt_template=RANDOM_PROMPT\n", + ")\n", + "chain.run({\"user_id\": \"1\", \"item_id\": \"234\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2.4 [Use-case-4] Invoke Amazon Personalize in a Sequential Chain " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import LLMChain, SequentialChain\n", + "\n", + "RANDOM_PROMPT_QUERY_2 = \"\"\"\n", + "You are a skilled publicist. Write a high-converting marketing email advertising several movies available in a video-on-demand streaming platform next week, \n", + " given the movie and user information below. Your email will leverage the power of storytelling and persuasive language. \n", + " You want the email to impress the user, so make it appealing to them.\n", + " The movies to recommend and their information is contained in the tag. \n", + " All movies in the tag must be recommended. Give a summary of the movies and why the human should watch them. \n", + " Put the email between tags.\n", + "\n", + " \n", + " {result}\n", + " \n", + "\n", + " Assistant:\n", + " \"\"\"\n", + "\n", + "RANDOM_PROMPT_2 = PromptTemplate(\n", + " input_variables=[\"result\"], template=RANDOM_PROMPT_QUERY_2\n", + ")\n", + "personalize_chain_instance = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=True\n", + ")\n", + "random_chain_instance = LLMChain(llm=bedrock_llm, prompt=RANDOM_PROMPT_2)\n", + "overall_chain = SequentialChain(\n", + " chains=[personalize_chain_instance, random_chain_instance],\n", + " input_variables=[\"user_id\"],\n", + " verbose=True,\n", + ")\n", + "overall_chain.run({\"user_id\": \"1\", \"item_id\": \"234\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.5 [Use-case-5] Invoke Amazon Personalize and retrieve metadata " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "recommender_arn = \"\"\n", + "metadata_column_names = [\n", + " \"\",\n", + " \"\",\n", + "]\n", + "metadataMap = {\"ITEMS\": metadata_column_names}\n", + "\n", + "client = AmazonPersonalize(\n", + " credentials_profile_name=\"default\",\n", + " region_name=\"us-west-2\",\n", + " recommender_arn=recommender_arn,\n", + ")\n", + "client.get_recommendations(user_id=\"1\", metadataColumns=metadataMap)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### 2.6 [Use-Case 6] Invoke Personalize Chain with returned metadata for summarizing results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "bedrock_llm = Bedrock(model_id=\"anthropic.claude-v2\", region_name=\"us-west-2\")\n", + "\n", + "# Create personalize chain\n", + "# Use return_direct=True if you do not want summary\n", + "chain = AmazonPersonalizeChain.from_llm(\n", + " llm=bedrock_llm, client=client, return_direct=False\n", + ")\n", + "response = chain({\"user_id\": \"1\", \"metadata_columns\": metadataMap})\n", + "print(response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + }, + "vscode": { + "interpreter": { + "hash": "15e58ce194949b77a891bd4339ce3d86a9bd138e905926019517993f97db9e6c" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/libs/experimental/langchain_experimental/recommenders/__init__.py b/libs/experimental/langchain_experimental/recommenders/__init__.py new file mode 100644 index 0000000000000..ec06f5541894d --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/__init__.py @@ -0,0 +1,7 @@ +"""Amazon Personalize primitives.""" +from langchain_experimental.recommenders.amazon_personalize import AmazonPersonalize +from langchain_experimental.recommenders.amazon_personalize_chain import ( + AmazonPersonalizeChain, +) + +__all__ = ["AmazonPersonalize", "AmazonPersonalizeChain"] diff --git a/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py b/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py new file mode 100644 index 0000000000000..b2300f0a19c3a --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/amazon_personalize.py @@ -0,0 +1,195 @@ +from typing import Any, List, Mapping, Optional, Sequence + + +class AmazonPersonalize: + """Amazon Personalize Runtime wrapper for executing real-time operations: + https://docs.aws.amazon.com/personalize/latest/dg/API_Operations_Amazon_Personalize_Runtime.html + + Args: + campaign_arn: str, Optional: The Amazon Resource Name (ARN) of the campaign + to use for getting recommendations. + recommender_arn: str, Optional: The Amazon Resource Name (ARN) of the + recommender to use to get recommendations + client: Optional: boto3 client + credentials_profile_name: str, Optional :AWS profile name + region_name: str, Optional: AWS region, e.g., us-west-2 + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize ( + campaignArn='' ) + """ + + def __init__( + self, + campaign_arn: Optional[str] = None, + recommender_arn: Optional[str] = None, + client: Optional[Any] = None, + credentials_profile_name: Optional[str] = None, + region_name: Optional[str] = None, + ): + self.campaign_arn = campaign_arn + self.recommender_arn = recommender_arn + + if campaign_arn and recommender_arn: + raise ValueError( + "Cannot initialize AmazonPersonalize with both " + "campaign_arn and recommender_arn." + ) + + if not campaign_arn and not recommender_arn: + raise ValueError( + "Cannot initialize AmazonPersonalize. Provide one of " + "campaign_arn or recommender_arn" + ) + + try: + if client is not None: + self.client = client + else: + import boto3 + import botocore.config + + if credentials_profile_name is not None: + session = boto3.Session(profile_name=credentials_profile_name) + else: + # use default credentials + session = boto3.Session() + + client_params = {} + if region_name: + client_params["region_name"] = region_name + + service = "personalize-runtime" + session_config = botocore.config.Config(user_agent_extra="langchain") + client_params["config"] = session_config + self.client = session.client(service, **client_params) + + except ImportError: + raise ModuleNotFoundError( + "Could not import boto3 python package. " + "Please install it with `pip install boto3`." + ) + + def get_recommendations( + self, + user_id: Optional[str] = None, + item_id: Optional[str] = None, + filter_arn: Optional[str] = None, + filter_values: Optional[Mapping[str, str]] = None, + num_results: Optional[int] = 10, + context: Optional[Mapping[str, str]] = None, + promotions: Optional[Sequence[Mapping[str, Any]]] = None, + metadata_columns: Optional[Mapping[str, Sequence[str]]] = None, + **kwargs: Any, + ) -> Mapping[str, Any]: + """Get recommendations from Amazon Personalize: + https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetRecommendations.html + + Args: + user_id: str, Optional: The user identifier + for which to retrieve recommendations + item_id: str, Optional: The item identifier + for which to retrieve recommendations + filter_arn: str, Optional: The ARN of the filter + to apply to the returned recommendations + filter_values: Mapping, Optional: The values + to use when filtering recommendations. + num_results: int, Optional: Default=10: The number of results to return + context: Mapping, Optional: The contextual metadata + to use when getting recommendations + promotions: Sequence, Optional: The promotions + to apply to the recommendation request. + metadata_columns: Mapping, Optional: The metadata Columns to be returned + as part of the response. + + Returns: + response: Mapping[str, Any]: Returns an itemList and recommendationId. + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize(campaignArn='' )\n + response = personalize_client.get_recommendations(user_id="1") + + """ + if not user_id and not item_id: + raise ValueError("One of user_id or item_id is required") + + if filter_arn: + kwargs["filterArn"] = filter_arn + if filter_values: + kwargs["filterValues"] = filter_values + if user_id: + kwargs["userId"] = user_id + if num_results: + kwargs["numResults"] = num_results + if context: + kwargs["context"] = context + if promotions: + kwargs["promotions"] = promotions + if item_id: + kwargs["itemId"] = item_id + if metadata_columns: + kwargs["metadataColumns"] = metadata_columns + if self.campaign_arn: + kwargs["campaignArn"] = self.campaign_arn + if self.recommender_arn: + kwargs["recommenderArn"] = self.recommender_arn + + return self.client.get_recommendations(**kwargs) + + def get_personalized_ranking( + self, + user_id: str, + input_list: List[str], + filter_arn: Optional[str] = None, + filter_values: Optional[Mapping[str, str]] = None, + context: Optional[Mapping[str, str]] = None, + metadata_columns: Optional[Mapping[str, Sequence[str]]] = None, + **kwargs: Any, + ) -> Mapping[str, Any]: + """Re-ranks a list of recommended items for the given user. + https://docs.aws.amazon.com/personalize/latest/dg/API_RS_GetPersonalizedRanking.html + + Args: + user_id: str, Required: The user identifier + for which to retrieve recommendations + input_list: List[str], Required: A list of items (by itemId) to rank + filter_arn: str, Optional: The ARN of the filter to apply + filter_values: Mapping, Optional: The values to use + when filtering recommendations. + context: Mapping, Optional: The contextual metadata + to use when getting recommendations + metadata_columns: Mapping, Optional: The metadata Columns to be returned + as part of the response. + + Returns: + response: Mapping[str, Any]: Returns personalizedRanking + and recommendationId. + + Example: + .. code-block:: python + + personalize_client = AmazonPersonalize(campaignArn='' )\n + response = personalize_client.get_personalized_ranking(user_id="1", + input_list=["123,"256"]) + + """ + + if filter_arn: + kwargs["filterArn"] = filter_arn + if filter_values: + kwargs["filterValues"] = filter_values + if user_id: + kwargs["userId"] = user_id + if input_list: + kwargs["inputList"] = input_list + if context: + kwargs["context"] = context + if metadata_columns: + kwargs["metadataColumns"] = metadata_columns + kwargs["campaignArn"] = self.campaign_arn + + return self.client.get_personalized_ranking(kwargs) diff --git a/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py b/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py new file mode 100644 index 0000000000000..4c187a8006463 --- /dev/null +++ b/libs/experimental/langchain_experimental/recommenders/amazon_personalize_chain.py @@ -0,0 +1,192 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Mapping, Optional, cast + +from langchain.callbacks.manager import ( + CallbackManagerForChainRun, +) +from langchain.chains import LLMChain +from langchain.chains.base import Chain +from langchain.prompts.prompt import PromptTemplate +from langchain.schema.language_model import BaseLanguageModel + +from langchain_experimental.recommenders.amazon_personalize import AmazonPersonalize + +SUMMARIZE_PROMPT_QUERY = """ +Summarize the recommended items for a user from the items list in tag below. +Make correlation into the items in the list and provide a summary. + + {result} + +""" + +SUMMARIZE_PROMPT = PromptTemplate( + input_variables=["result"], template=SUMMARIZE_PROMPT_QUERY +) + +INTERMEDIATE_STEPS_KEY = "intermediate_steps" + +# Input Key Names to be used +USER_ID_INPUT_KEY = "user_id" +ITEM_ID_INPUT_KEY = "item_id" +INPUT_LIST_INPUT_KEY = "input_list" +FILTER_ARN_INPUT_KEY = "filter_arn" +FILTER_VALUES_INPUT_KEY = "filter_values" +CONTEXT_INPUT_KEY = "context" +PROMOTIONS_INPUT_KEY = "promotions" +METADATA_COLUMNS_INPUT_KEY = "metadata_columns" +RESULT_OUTPUT_KEY = "result" + + +class AmazonPersonalizeChain(Chain): + """Amazon Personalize Chain for retrieving recommendations + from Amazon Personalize, and summarizing + the recommendations in natural language. + It will only return recommendations if return_direct=True. + Can also be used in sequential chains for working with + the output of Amazon Personalize. + + Example: + .. code-block:: python + + chain = PersonalizeChain.from_llm(llm=agent_llm, client=personalize_lg, + return_direct=True)\n + response = chain.run({'user_id':'1'})\n + response = chain.run({'user_id':'1', 'item_id':'234'}) + """ + + client: AmazonPersonalize + summarization_chain: LLMChain + return_direct: bool = False + return_intermediate_steps: bool = False + is_ranking_recipe: bool = False + + @property + def input_keys(self) -> List[str]: + """This returns an empty list since not there are optional + input_keys and none is required. + + :meta private: + """ + return [] + + @property + def output_keys(self) -> List[str]: + """Will always return result key. + + :meta private: + """ + return [RESULT_OUTPUT_KEY] + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + client: AmazonPersonalize, + prompt_template: PromptTemplate = SUMMARIZE_PROMPT, + is_ranking_recipe: bool = False, + **kwargs: Any, + ) -> AmazonPersonalizeChain: + """Initializes the Personalize Chain with LLMAgent, Personalize Client, + Prompts to be used + + Args: + llm: BaseLanguageModel: The LLM to be used in the Chain + client: AmazonPersonalize: The client created to support + invoking AmazonPersonalize + prompt_template: PromptTemplate: The prompt template which can be + invoked with the output from Amazon Personalize + is_ranking_recipe: bool: default: False: specifies + if the trained recipe is USER_PERSONALIZED_RANKING + + Example: + .. code-block:: python + + chain = PersonalizeChain.from_llm(llm=agent_llm, + client=personalize_lg, return_direct=True)\n + response = chain.run({'user_id':'1'})\n + response = chain.run({'user_id':'1', 'item_id':'234'}) + + RANDOM_PROMPT_QUERY=" Summarize recommendations in {result}" + chain = PersonalizeChain.from_llm(llm=agent_llm, + client=personalize_lg, prompt_template=PROMPT_TEMPLATE)\n + """ + summarization_chain = LLMChain(llm=llm, prompt=prompt_template) + + return cls( + summarization_chain=summarization_chain, + client=client, + is_ranking_recipe=is_ranking_recipe, + **kwargs, + ) + + def _call( + self, + inputs: Mapping[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, Any]: + """Retrieves recommendations by invoking Amazon Personalize, + and invokes an LLM using the default/overridden + prompt template with the output from Amazon Personalize + + Args: + inputs: Mapping [str, Any] : Provide input identifiers in a map. + For example - {'user_id','1'} or + {'user_id':'1', 'item_id':'123'}. You can also pass the + filter_arn, filter_values as an + input. + """ + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + callbacks = _run_manager.get_child() + + user_id = inputs.get(USER_ID_INPUT_KEY) + item_id = inputs.get(ITEM_ID_INPUT_KEY) + input_list = inputs.get(INPUT_LIST_INPUT_KEY) + filter_arn = inputs.get(FILTER_ARN_INPUT_KEY) + filter_values = inputs.get(FILTER_VALUES_INPUT_KEY) + promotions = inputs.get(PROMOTIONS_INPUT_KEY) + context = inputs.get(CONTEXT_INPUT_KEY) + metadata_columns = inputs.get(METADATA_COLUMNS_INPUT_KEY) + + intermediate_steps: List = [] + intermediate_steps.append({"Calling Amazon Personalize"}) + + if self.is_ranking_recipe: + response = self.client.get_personalized_ranking( + user_id=str(user_id), + input_list=cast(List[str], input_list), + filter_arn=filter_arn, + filter_values=filter_values, + context=context, + metadata_columns=metadata_columns, + ) + else: + response = self.client.get_recommendations( + user_id=user_id, + item_id=item_id, + filter_arn=filter_arn, + filter_values=filter_values, + context=context, + promotions=promotions, + metadata_columns=metadata_columns, + ) + + _run_manager.on_text("Call to Amazon Personalize complete \n") + + if self.return_direct: + final_result = response + else: + result = self.summarization_chain( + {RESULT_OUTPUT_KEY: response}, callbacks=callbacks + ) + final_result = result[self.summarization_chain.output_key] + + intermediate_steps.append({"context": response}) + chain_result: Dict[str, Any] = {RESULT_OUTPUT_KEY: final_result} + if self.return_intermediate_steps: + chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps + return chain_result + + @property + def _chain_type(self) -> str: + return "amazon_personalize_chain" From 6275d8b1bf88c6b02ec02dad91c3b09461b0ddf3 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Mon, 19 Feb 2024 19:47:38 +0100 Subject: [PATCH 07/31] docs: Fix AstraDBChatMessageHistory docstrings (#17740) --- .../chat_message_histories/astradb.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/libs/community/langchain_community/chat_message_histories/astradb.py b/libs/community/langchain_community/chat_message_histories/astradb.py index f820480ff26b7..5b118a0ac9f3c 100644 --- a/libs/community/langchain_community/chat_message_histories/astradb.py +++ b/libs/community/langchain_community/chat_message_histories/astradb.py @@ -26,16 +26,16 @@ class AstraDBChatMessageHistory(BaseChatMessageHistory): """Chat message history that stores history in Astra DB. - Args (only keyword-arguments accepted): + Args: session_id: arbitrary key that is used to store the messages of a single chat session. - collection_name (str): name of the Astra DB collection to create/use. - token (Optional[str]): API token for Astra DB usage. - api_endpoint (Optional[str]): full URL to the API endpoint, + collection_name: name of the Astra DB collection to create/use. + token: API token for Astra DB usage. + api_endpoint: full URL to the API endpoint, such as "https://-us-east1.apps.astra.datastax.com". - astra_db_client (Optional[Any]): *alternative to token+api_endpoint*, + astra_db_client: *alternative to token+api_endpoint*, you can pass an already-created 'astrapy.db.AstraDB' instance. - namespace (Optional[str]): namespace (aka keyspace) where the + namespace: namespace (aka keyspace) where the collection is created. Defaults to the database's "default namespace". """ @@ -51,7 +51,6 @@ def __init__( setup_mode: SetupMode = SetupMode.SYNC, pre_delete_collection: bool = False, ) -> None: - """Create an Astra DB chat message history.""" self.astra_env = _AstraDBCollectionEnvironment( collection_name=collection_name, token=token, @@ -96,7 +95,6 @@ def messages(self, messages: List[BaseMessage]) -> None: raise NotImplementedError("Use add_messages instead") async def aget_messages(self) -> List[BaseMessage]: - """Retrieve all session messages from DB""" await self.astra_env.aensure_db_setup() docs = self.async_collection.paginated_find( filter={ @@ -117,7 +115,6 @@ async def aget_messages(self) -> List[BaseMessage]: return messages def add_messages(self, messages: Sequence[BaseMessage]) -> None: - """Write a message to the table""" self.astra_env.ensure_db_setup() docs = [ { @@ -130,7 +127,6 @@ def add_messages(self, messages: Sequence[BaseMessage]) -> None: self.collection.chunked_insert_many(docs) async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None: - """Write a message to the table""" await self.astra_env.aensure_db_setup() docs = [ { @@ -143,11 +139,9 @@ async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None: await self.async_collection.chunked_insert_many(docs) def clear(self) -> None: - """Clear session memory from DB""" self.astra_env.ensure_db_setup() self.collection.delete_many(filter={"session_id": self.session_id}) async def aclear(self) -> None: - """Clear session memory from DB""" await self.astra_env.aensure_db_setup() await self.async_collection.delete_many(filter={"session_id": self.session_id}) From 919ebcc5966fd609d555ed26b8488827b3eac189 Mon Sep 17 00:00:00 2001 From: CogniJT <131272471+CogniJT@users.noreply.github.com> Date: Tue, 20 Feb 2024 00:24:13 +0530 Subject: [PATCH 08/31] community[minor]: CogniSwitch Agent Toolkit for LangChain (#17312) **Description**: CogniSwitch focusses on making GenAI usage more reliable. It abstracts out the complexity & decision making required for tuning processing, storage & retrieval. Using simple APIs documents / URLs can be processed into a Knowledge Graph that can then be used to answer questions. **Dependencies**: No dependencies. Just network calls & API key required **Tag maintainer**: @hwchase17 **Twitter handle**: https://github.com/CogniSwitch **Documentation**: Please check `docs/docs/integrations/toolkits/cogniswitch.ipynb` **Tests**: The usual tool & toolkits tests using `test_imports.py` PR has passed linting and testing before this submission. --------- Co-authored-by: Saicharan Sridhara <145636106+saiCogniswitch@users.noreply.github.com> --- .../integrations/toolkits/cogniswitch.ipynb | 326 ++++++++++++++ .../agent_toolkits/__init__.py | 2 + .../agent_toolkits/cogniswitch/__init__.py | 1 + .../agent_toolkits/cogniswitch/toolkit.py | 40 ++ .../langchain_community/tools/__init__.py | 38 ++ .../tools/cogniswitch/__init__.py | 1 + .../tools/cogniswitch/tool.py | 399 ++++++++++++++++++ .../unit_tests/agent_toolkits/test_imports.py | 1 + .../tests/unit_tests/tools/test_imports.py | 4 + .../tests/unit_tests/tools/test_public_api.py | 4 + 10 files changed, 816 insertions(+) create mode 100644 docs/docs/integrations/toolkits/cogniswitch.ipynb create mode 100644 libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py create mode 100644 libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py create mode 100644 libs/community/langchain_community/tools/cogniswitch/__init__.py create mode 100644 libs/community/langchain_community/tools/cogniswitch/tool.py diff --git a/docs/docs/integrations/toolkits/cogniswitch.ipynb b/docs/docs/integrations/toolkits/cogniswitch.ipynb new file mode 100644 index 0000000000000..836f425cf6055 --- /dev/null +++ b/docs/docs/integrations/toolkits/cogniswitch.ipynb @@ -0,0 +1,326 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "19062701", + "metadata": {}, + "source": [ + "## Cogniswitch Tools\n", + "\n", + "**Use CogniSwitch to build production ready applications that can consume, organize and retrieve knowledge flawlessly. Using the framework of your choice, in this case Langchain CogniSwitch helps alleviate the stress of decision making when it comes to, choosing the right storage and retrieval formats. It also eradicates reliability issues and hallucinations when it comes to responses that are generated. Get started by interacting with your knowledge in just two simple steps.**\n", + "\n", + "visit [https://www.cogniswitch.ai/developer to register](https://www.cogniswitch.ai/developer?utm_source=langchain&utm_medium=langchainbuild&utm_id=dev).\n\n", + "**Registration:** \n\n", + "- Signup with your email and verify your registration \n\n", + "- You will get a mail with a platform token and oauth token for using the services.\n\n\n", + "\n", + "**step 1: Instantiate the toolkit and get the tools:**\n\n", + "- Instantiate the cogniswitch toolkit with the cogniswitch token, openAI API key and OAuth token and get the tools. \n", + "\n", + "**step 2: Instantiate the agent with the tools and llm:**\n", + "- Instantiate the agent with the list of cogniswitch tools and the llm, into the agent executor.\n", + "\n", + "**step 3: CogniSwitch Store Tool:** \n", + "\n", + "***CogniSwitch knowledge source file tool***\n", + "- Use the agent to upload a file by giving the file path.(formats that are currently supported are .pdf, .docx, .doc, .txt, .html) \n", + "- The content from the file will be processed by the cogniswitch and stored in your knowledge store. \n", + "\n", + "***CogniSwitch knowledge source url tool***\n", + "- Use the agent to upload a URL. \n", + "- The content from the url will be processed by the cogniswitch and stored in your knowledge store. \n", + "\n", + "**step 4: CogniSwitch Status Tool:**\n", + "- Use the agent to know the status of the document uploaded with a document name.\n", + "- You can also check the status of document processing in cogniswitch console. \n", + "\n", + "**step 5: CogniSwitch Answer Tool:**\n", + "- Use the agent to ask your question.\n", + "- You will get the answer from your knowledge as the response. \n" + ] + }, + { + "cell_type": "markdown", + "id": "1435b193", + "metadata": {}, + "source": [ + "### Import necessary libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8d86323b", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "import os\n", + "\n", + "from langchain.agents.agent_toolkits import create_conversational_retrieval_agent\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain_community.agent_toolkits import CogniswitchToolkit" + ] + }, + { + "cell_type": "markdown", + "id": "6e6acf0e", + "metadata": {}, + "source": [ + "### Cogniswitch platform token, OAuth token and OpenAI API key" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3d2dfc9f", + "metadata": {}, + "outputs": [], + "source": [ + "cs_token = \"Your CogniSwitch token\"\n", + "OAI_token = \"Your OpenAI API token\"\n", + "oauth_token = \"Your CogniSwitch authentication token\"\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = OAI_token" + ] + }, + { + "cell_type": "markdown", + "id": "320e02fc", + "metadata": {}, + "source": [ + "### Instantiate the cogniswitch toolkit with the credentials" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "89f58167", + "metadata": {}, + "outputs": [], + "source": [ + "cogniswitch_toolkit = CogniswitchToolkit(\n", + " cs_token=cs_token, OAI_token=OAI_token, apiKey=oauth_token\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "16901682", + "metadata": {}, + "source": [ + "### Get the list of cogniswitch tools" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "288d07f6", + "metadata": {}, + "outputs": [], + "source": [ + "tool_lst = cogniswitch_toolkit.get_tools()" + ] + }, + { + "cell_type": "markdown", + "id": "4aae43a3", + "metadata": {}, + "source": [ + "### Instantiate the llm" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4d67e5bb", + "metadata": {}, + "outputs": [], + "source": [ + "llm = ChatOpenAI(\n", + " temperature=0,\n", + " openai_api_key=OAI_token,\n", + " max_tokens=1500,\n", + " model_name=\"gpt-3.5-turbo-0613\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "04179282", + "metadata": {}, + "source": [ + "### Create a agent executor" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2153e758", + "metadata": {}, + "outputs": [], + "source": [ + "agent_executor = create_conversational_retrieval_agent(llm, tool_lst, verbose=False)" + ] + }, + { + "cell_type": "markdown", + "id": "42c9890e", + "metadata": {}, + "source": [ + "### Invoke the agent to upload a URL" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "794b4fba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The URL https://cogniswitch.ai/developer has been uploaded successfully. The status of the document is currently being processed. You will receive an email notification once the processing is complete.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"upload this url https://cogniswitch.ai/developer\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "544fe8f9", + "metadata": {}, + "source": [ + "### Invoke the agent to upload a File" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fd0addfc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The file example_file.txt has been uploaded successfully. The status of the document is currently being processed. You will receive an email notification once the processing is complete.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"upload this file example_file.txt\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "02827e1b", + "metadata": {}, + "source": [ + "### Invoke the agent to get the status of a document" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f424e6c5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The status of the document example_file.txt is as follows:\n", + "\n", + "- Created On: 2024-01-22T19:07:42.000+00:00\n", + "- Modified On: 2024-01-22T19:07:42.000+00:00\n", + "- Document Entry ID: 153\n", + "- Status: 0 (Processing)\n", + "- Original File Name: example_file.txt\n", + "- Saved File Name: 1705950460069example_file29393011.txt\n", + "\n", + "The document is currently being processed.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"Tell me the status of this document example_file.txt\")\n", + "\n", + "print(response[\"output\"])" + ] + }, + { + "cell_type": "markdown", + "id": "0ba9aca9", + "metadata": {}, + "source": [ + "### Invoke the agent with query and get the answer" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e73e963f", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CogniSwitch can help develop GenAI applications in several ways:\n", + "\n", + "1. Knowledge Extraction: CogniSwitch can extract knowledge from various sources such as documents, websites, and databases. It can analyze and store data from these sources, making it easier to access and utilize the information for GenAI applications.\n", + "\n", + "2. Natural Language Processing: CogniSwitch has advanced natural language processing capabilities. It can understand and interpret human language, allowing GenAI applications to interact with users in a more conversational and intuitive manner.\n", + "\n", + "3. Sentiment Analysis: CogniSwitch can analyze the sentiment of text data, such as customer reviews or social media posts. This can be useful in developing GenAI applications that can understand and respond to the emotions and opinions of users.\n", + "\n", + "4. Knowledge Base Integration: CogniSwitch can integrate with existing knowledge bases or create new ones. This allows GenAI applications to access a vast amount of information and provide accurate and relevant responses to user queries.\n", + "\n", + "5. Document Analysis: CogniSwitch can analyze documents and extract key information such as entities, relationships, and concepts. This can be valuable in developing GenAI applications that can understand and process large amounts of textual data.\n", + "\n", + "Overall, CogniSwitch provides a range of AI-powered capabilities that can enhance the development of GenAI applications by enabling knowledge extraction, natural language processing, sentiment analysis, knowledge base integration, and document analysis.\n" + ] + } + ], + "source": [ + "response = agent_executor.invoke(\"How can cogniswitch help develop GenAI applications?\")\n", + "\n", + "print(response[\"output\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "langchain_repo", + "language": "python", + "name": "langchain_repo" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/community/langchain_community/agent_toolkits/__init__.py b/libs/community/langchain_community/agent_toolkits/__init__.py index 3f6bf3033190d..bbb3820cb3fac 100644 --- a/libs/community/langchain_community/agent_toolkits/__init__.py +++ b/libs/community/langchain_community/agent_toolkits/__init__.py @@ -18,6 +18,7 @@ from langchain_community.agent_toolkits.azure_cognitive_services import ( AzureCognitiveServicesToolkit, ) +from langchain_community.agent_toolkits.cogniswitch.toolkit import CogniswitchToolkit from langchain_community.agent_toolkits.connery import ConneryToolkit from langchain_community.agent_toolkits.file_management.toolkit import ( FileManagementToolkit, @@ -51,6 +52,7 @@ "AINetworkToolkit", "AmadeusToolkit", "AzureCognitiveServicesToolkit", + "CogniswitchToolkit", "ConneryToolkit", "FileManagementToolkit", "GmailToolkit", diff --git a/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py b/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py new file mode 100644 index 0000000000000..df1d84976c49a --- /dev/null +++ b/libs/community/langchain_community/agent_toolkits/cogniswitch/__init__.py @@ -0,0 +1 @@ +"""CogniSwitch Toolkit""" diff --git a/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py b/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py new file mode 100644 index 0000000000000..36ec5ae0f3609 --- /dev/null +++ b/libs/community/langchain_community/agent_toolkits/cogniswitch/toolkit.py @@ -0,0 +1,40 @@ +from typing import List + +from langchain_community.agent_toolkits.base import BaseToolkit +from langchain_community.tools import BaseTool +from langchain_community.tools.cogniswitch.tool import ( + CogniswitchKnowledgeRequest, + CogniswitchKnowledgeSourceFile, + CogniswitchKnowledgeSourceURL, + CogniswitchKnowledgeStatus, +) + + +class CogniswitchToolkit(BaseToolkit): + """ + Toolkit for CogniSwitch. + + Use the toolkit to get all the tools present in the cogniswitch and + use them to interact with your knowledge + """ + + cs_token: str # cogniswitch token + OAI_token: str # OpenAI API token + apiKey: str # Cogniswitch OAuth token + + def get_tools(self) -> List[BaseTool]: + """Get the tools in the toolkit.""" + return [ + CogniswitchKnowledgeStatus( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeRequest( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeSourceFile( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + CogniswitchKnowledgeSourceURL( + cs_token=self.cs_token, OAI_token=self.OAI_token, apiKey=self.apiKey + ), + ] diff --git a/libs/community/langchain_community/tools/__init__.py b/libs/community/langchain_community/tools/__init__.py index 3456ef10bc1e6..59ad157de5bf8 100644 --- a/libs/community/langchain_community/tools/__init__.py +++ b/libs/community/langchain_community/tools/__init__.py @@ -118,6 +118,32 @@ def _import_brave_search_tool() -> Any: return BraveSearch +def _import_cogniswitch_store_file_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import ( + CogniswitchKnowledgeSourceFile, + ) + + return CogniswitchKnowledgeSourceFile + + +def _import_cogniswitch_store_url_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeSourceURL + + return CogniswitchKnowledgeSourceURL + + +def _import_cogniswitch_answer_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeRequest + + return CogniswitchKnowledgeRequest + + +def _import_cogniswitch_knowledge_status_tool() -> Any: + from langchain_community.tools.cogniswitch.tool import CogniswitchKnowledgeStatus + + return CogniswitchKnowledgeStatus + + def _import_connery_tool() -> Any: from langchain_community.tools.connery import ConneryAction @@ -803,6 +829,14 @@ def __getattr__(name: str) -> Any: return _import_bing_search_tool_BingSearchRun() elif name == "BraveSearch": return _import_brave_search_tool() + elif name == "CogniswitchKnowledgeSourceFile": + return _import_cogniswitch_store_file_tool() + elif name == "CogniswitchKnowledgeSourceURL": + return _import_cogniswitch_store_url_tool() + elif name == "CogniswitchKnowledgeRequest": + return _import_cogniswitch_answer_tool() + elif name == "CogniswitchKnowledgeStatus": + return _import_cogniswitch_knowledge_status_tool() elif name == "ConneryAction": return _import_connery_tool() elif name == "DuckDuckGoSearchResults": @@ -1043,6 +1077,10 @@ def __getattr__(name: str) -> Any: "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", + "CogniswitchKnowledgeStatus", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", diff --git a/libs/community/langchain_community/tools/cogniswitch/__init__.py b/libs/community/langchain_community/tools/cogniswitch/__init__.py new file mode 100644 index 0000000000000..3a89a8d7d3a9a --- /dev/null +++ b/libs/community/langchain_community/tools/cogniswitch/__init__.py @@ -0,0 +1 @@ +"Cogniswitch Tools" diff --git a/libs/community/langchain_community/tools/cogniswitch/tool.py b/libs/community/langchain_community/tools/cogniswitch/tool.py new file mode 100644 index 0000000000000..e2878e6ed544e --- /dev/null +++ b/libs/community/langchain_community/tools/cogniswitch/tool.py @@ -0,0 +1,399 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional + +import requests +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.tools import BaseTool + + +class CogniswitchKnowledgeRequest(BaseTool): + """ + A tool for interacting with the Cogniswitch service to answer questions. + name: str = "cogniswitch_knowledge_request" + description: str = ( + "A wrapper around cogniswitch service to answer the question + from the knowledge base." + "Input should be a search query." + ) + """ + + name: str = "cogniswitch_knowledge_request" + description: str = """A wrapper around cogniswitch service to + answer the question from the knowledge base.""" + cs_token: str + OAI_token: str + apiKey: str + api_url = "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeRequest" + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Use the tool to answer a query. + + Args: + query (str): Natural language query, + that you would like to ask to your knowledge graph. + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + response = self.answer_cs(self.cs_token, self.OAI_token, query, self.apiKey) + return response + + def answer_cs(self, cs_token: str, OAI_token: str, query: str, apiKey: str) -> dict: + """ + Send a query to the Cogniswitch service and retrieve the response. + + Args: + cs_token (str): Cogniswitch token. + OAI_token (str): OpenAI token. + apiKey (str): OAuth token. + query (str): Query to be answered. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + if not cs_token: + raise ValueError("Missing cs_token") + if not OAI_token: + raise ValueError("Missing OpenAI token") + if not apiKey: + raise ValueError("Missing cogniswitch OAuth token") + if not query: + raise ValueError("Missing input query") + + headers = { + "apiKey": apiKey, + "platformToken": cs_token, + "openAIToken": OAI_token, + } + + data = {"query": query} + response = requests.post(self.api_url, headers=headers, verify=False, data=data) + return response.json() + + +class CogniswitchKnowledgeStatus(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to know the + status of the document or url uploaded. + name: str = "cogniswitch_knowledge_status" + description: str = ( + "A wrapper around cogniswitch services to know the status of + the document uploaded from a url or a file. " + "Input should be a file name or the url link" + ) + """ + + name: str = "cogniswitch_knowledge_status" + description: str = """A wrapper around cogniswitch services to know + the status of the document uploaded from a url or a file.""" + cs_token: str + OAI_token: str + apiKey: str + knowledge_status_url = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/status" + ) + + def _run( + self, + document_name: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Use the tool to know the status of the document uploaded. + + Args: + document_name (str): name of the document or + the url uploaded + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + response = self.knowledge_status(document_name) + return response + + def knowledge_status(self, document_name: str) -> dict: + """ + Use this function to know the status of the document or the URL uploaded + Args: + document_name (str): The document name or the url that is uploaded. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + + params = {"docName": document_name, "platformToken": self.cs_token} + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + response = requests.get( + self.knowledge_status_url, + headers=headers, + params=params, + verify=False, + ) + if response.status_code == 200: + source_info = response.json() + source_data = dict(source_info[-1]) + status = source_data.get("status") + if status == 0: + source_data["status"] = "SUCCESS" + elif status == 1: + source_data["status"] = "PROCESSING" + elif status == 2: + source_data["status"] = "UPLOADED" + elif status == 3: + source_data["status"] = "FAILURE" + elif status == 4: + source_data["status"] = "UPLOAD_FAILURE" + elif status == 5: + source_data["status"] = "REJECTED" + + if "filePath" in source_data.keys(): + source_data.pop("filePath") + if "savedFileName" in source_data.keys(): + source_data.pop("savedFileName") + if "integrationConfigId" in source_data.keys(): + source_data.pop("integrationConfigId") + if "metaData" in source_data.keys(): + source_data.pop("metaData") + if "docEntryId" in source_data.keys(): + source_data.pop("docEntryId") + return source_data + else: + # error_message = response.json()["message"] + return { + "message": response.status_code, + } + + +class CogniswitchKnowledgeSourceFile(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to store data. + name: str = "cogniswitch_knowledge_source_file" + description: str = ( + "This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input." + ) + """ + + name: str = "cogniswitch_knowledge_source_file" + description: str = """ + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + """ + cs_token: str + OAI_token: str + apiKey: str + knowledgesource_file = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/file" + ) + + def _run( + self, + file: Optional[str] = None, + document_name: Optional[str] = None, + document_description: Optional[str] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Execute the tool to store the data given from a file. + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + + Args: + file Optional[str]: The file path of your knowledge + document_name Optional[str]: Name of your knowledge document + document_description Optional[str]: Description of your knowledge document + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + if not file: + return { + "message": "No input provided", + } + else: + response = self.store_data( + file=file, + document_name=document_name, + document_description=document_description, + ) + return response + + def store_data( + self, + file: Optional[str], + document_name: Optional[str], + document_description: Optional[str], + ) -> dict: + """ + Store data using the Cogniswitch service. + This calls the CogniSwitch services to analyze & store data from a file. + If the input looks like a file path, assign that string value to file key. + Assign document name & description only if provided in input. + + Args: + file (Optional[str]): file path of your file. + the current files supported by the files are + .txt, .pdf, .docx, .doc, .html + document_name (Optional[str]): Name of the document you are uploading. + document_description (Optional[str]): Description of the document. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + data: Dict[str, Any] + if not document_name: + document_name = "" + if not document_description: + document_description = "" + + if file is not None: + files = {"file": open(file, "rb")} + + data = { + "documentName": document_name, + "documentDescription": document_description, + } + response = requests.post( + self.knowledgesource_file, + headers=headers, + verify=False, + data=data, + files=files, + ) + if response.status_code == 200: + return response.json() + else: + return {"message": "Bad Request"} + + +class CogniswitchKnowledgeSourceURL(BaseTool): + """ + A cogniswitch tool for interacting with the Cogniswitch services to store data. + name: str = "cogniswitch_knowledge_source_url" + description: str = ( + "This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input" + ) + """ + + name: str = "cogniswitch_knowledge_source_url" + description: str = """ + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input""" + cs_token: str + OAI_token: str + apiKey: str + knowledgesource_url = ( + "https://api.cogniswitch.ai:8243/cs-api/0.0.1/cs/knowledgeSource/url" + ) + + def _run( + self, + url: Optional[str] = None, + document_name: Optional[str] = None, + document_description: Optional[str] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> Dict[str, Any]: + """ + Execute the tool to store the data given from a url. + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input. + + Args: + url Optional[str]: The website/url link of your knowledge + document_name Optional[str]: Name of your knowledge document + document_description Optional[str]: Description of your knowledge document + run_manager (Optional[CallbackManagerForChainRun]): + Manager for chain run callbacks. + + Returns: + Dict[str, Any]: Output dictionary containing + the 'response' from the service. + """ + if not url: + return { + "message": "No input provided", + } + response = self.store_data( + url=url, + document_name=document_name, + document_description=document_description, + ) + return response + + def store_data( + self, + url: Optional[str], + document_name: Optional[str], + document_description: Optional[str], + ) -> dict: + """ + Store data using the Cogniswitch service. + This calls the CogniSwitch services to analyze & store data from a url. + the URL is provided in input, assign that value to the url key. + Assign document name & description only if provided in input. + + Args: + url (Optional[str]): URL link. + document_name (Optional[str]): Name of the document you are uploading. + document_description (Optional[str]): Description of the document. + + Returns: + dict: Response JSON from the Cogniswitch service. + """ + headers = { + "apiKey": self.apiKey, + "openAIToken": self.OAI_token, + "platformToken": self.cs_token, + } + data: Dict[str, Any] + if not document_name: + document_name = "" + if not document_description: + document_description = "" + if not url: + return { + "message": "No input provided", + } + else: + data = {"url": url} + response = requests.post( + self.knowledgesource_url, + headers=headers, + verify=False, + data=data, + ) + if response.status_code == 200: + return response.json() + else: + return {"message": "Bad Request"} diff --git a/libs/community/tests/unit_tests/agent_toolkits/test_imports.py b/libs/community/tests/unit_tests/agent_toolkits/test_imports.py index 3a7ca10efdf26..c2dbdd3833399 100644 --- a/libs/community/tests/unit_tests/agent_toolkits/test_imports.py +++ b/libs/community/tests/unit_tests/agent_toolkits/test_imports.py @@ -28,6 +28,7 @@ "create_pbi_chat_agent", "create_spark_sql_agent", "create_sql_agent", + "CogniswitchToolkit", ] diff --git a/libs/community/tests/unit_tests/tools/test_imports.py b/libs/community/tests/unit_tests/tools/test_imports.py index 4bf70aa0842f9..95fd4315575a5 100644 --- a/libs/community/tests/unit_tests/tools/test_imports.py +++ b/libs/community/tests/unit_tests/tools/test_imports.py @@ -24,6 +24,10 @@ "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", + "CogniswitchKnowledgeStatus", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", diff --git a/libs/community/tests/unit_tests/tools/test_public_api.py b/libs/community/tests/unit_tests/tools/test_public_api.py index 31ea8327022e1..1595dd4710917 100644 --- a/libs/community/tests/unit_tests/tools/test_public_api.py +++ b/libs/community/tests/unit_tests/tools/test_public_api.py @@ -25,6 +25,10 @@ "BingSearchRun", "BraveSearch", "ClickTool", + "CogniswitchKnowledgeSourceFile", + "CogniswitchKnowledgeStatus", + "CogniswitchKnowledgeSourceURL", + "CogniswitchKnowledgeRequest", "ConneryAction", "CopyFileTool", "CurrentWebPageTool", From ea61302f71ee2a05f2e24b43372c34239b340697 Mon Sep 17 00:00:00 2001 From: Karim Lalani Date: Mon, 19 Feb 2024 12:54:52 -0600 Subject: [PATCH 09/31] community[patch]: bug fix - add empty metadata when metadata not provided (#17669) Code fix to include empty medata dictionary to aadd_texts if metadata is not provided. --- libs/community/langchain_community/vectorstores/surrealdb.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/community/langchain_community/vectorstores/surrealdb.py b/libs/community/langchain_community/vectorstores/surrealdb.py index ef65c5ec6b002..34f002305e1fd 100644 --- a/libs/community/langchain_community/vectorstores/surrealdb.py +++ b/libs/community/langchain_community/vectorstores/surrealdb.py @@ -116,6 +116,8 @@ async def aadd_texts( data = {"text": text, "embedding": embeddings[idx]} if metadatas is not None and idx < len(metadatas): data["metadata"] = metadatas[idx] # type: ignore[assignment] + else: + data["metadata"] = [] record = await self.sdb.create( self.collection, data, From ad285ca15c9f0bc14a90824717b8e990f19245f4 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:13:33 -0700 Subject: [PATCH 10/31] community[patch]: Release 0.0.21 (#17750) --- libs/community/poetry.lock | 24 ++++++++++++++++++++---- libs/community/pyproject.toml | 4 ++-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/libs/community/poetry.lock b/libs/community/poetry.lock index 1a1dd4ec4cb52..be5c69eebb2ad 100644 --- a/libs/community/poetry.lock +++ b/libs/community/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aenum" @@ -3140,7 +3140,6 @@ files = [ {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"}, {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"}, {file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"}, - {file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"}, @@ -3224,6 +3223,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -3650,7 +3650,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -5457,6 +5457,8 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -5499,6 +5501,7 @@ files = [ {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, @@ -5507,6 +5510,8 @@ files = [ {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, @@ -6490,6 +6495,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -6497,8 +6503,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -6515,6 +6529,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -6522,6 +6537,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -9146,4 +9162,4 @@ extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "as [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "a012fa21f1c46644a7efbfd0c9174c92fa8b183b7b0fe1d3ae6ed57797de43e9" +content-hash = "5fdd9b2eb766411463fa27e19433daf5d5325f2af01ddd93b6a594e3e02a31de" diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index 7df0de6149d3b..abf8dc89b6384 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-community" -version = "0.0.20" +version = "0.0.21" description = "Community contributed LangChain integrations." authors = [] license = "MIT" @@ -9,7 +9,7 @@ repository = "https://github.com/langchain-ai/langchain" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.21,<0.2" +langchain-core = ">=0.1.24,<0.2" SQLAlchemy = ">=1.4,<3" requests = "^2" PyYAML = ">=5.3" From a9d3c100a20634a257898594db7c1cdfdb60af9e Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:22:31 -0700 Subject: [PATCH 11/31] infra: PR template nits (#17752) --- .github/PULL_REQUEST_TEMPLATE.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4d86dac6a59cf..8d776064019a7 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,19 +1,24 @@ Thank you for contributing to LangChain! -Checklist: - -- [ ] PR title: Please title your PR "package: description", where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. +- [ ] **PR title**: "package: description" + - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" -- [ ] PR message: **Delete this entire template message** and replace it with the following bulleted list + + +- [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! -- [ ] Pass lint and test: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified to check that you're passing lint and testing. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ -- [ ] Add tests and docs: If you're adding a new integration, please include + + +- [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. + +- [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ + Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. From 441448372d5dc649bfd0b8069ea18c18373a0341 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:27:37 -0700 Subject: [PATCH 12/31] langchain[patch]: Release 0.1.8 (#17751) --- libs/langchain/poetry.lock | 35 +++++++++++++++++++++++++++++++---- libs/langchain/pyproject.toml | 4 ++-- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index a8e98f8c65b43..a34dda564d618 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiodns" @@ -3049,7 +3049,6 @@ files = [ {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"}, {file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"}, {file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"}, - {file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"}, {file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"}, @@ -3133,6 +3132,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -3474,7 +3474,7 @@ url = "../community" [[package]] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -3743,6 +3743,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -5269,6 +5279,8 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -5311,6 +5323,7 @@ files = [ {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"}, {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"}, @@ -5319,6 +5332,8 @@ files = [ {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"}, {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"}, + {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"}, {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"}, @@ -5791,6 +5806,7 @@ files = [ {file = "pymongo-4.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6422b6763b016f2ef2beedded0e546d6aa6ba87910f9244d86e0ac7690f75c96"}, {file = "pymongo-4.5.0-cp312-cp312-win32.whl", hash = "sha256:77cfff95c1fafd09e940b3fdcb7b65f11442662fad611d0e69b4dd5d17a81c60"}, {file = "pymongo-4.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:e57d859b972c75ee44ea2ef4758f12821243e99de814030f69a3decb2aa86807"}, + {file = "pymongo-4.5.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8443f3a8ab2d929efa761c6ebce39a6c1dca1c9ac186ebf11b62c8fe1aef53f4"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2b0176f9233a5927084c79ff80b51bd70bfd57e4f3d564f50f80238e797f0c8a"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:89b3f2da57a27913d15d2a07d58482f33d0a5b28abd20b8e643ab4d625e36257"}, {file = "pymongo-4.5.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:5caee7bd08c3d36ec54617832b44985bd70c4cbd77c5b313de6f7fce0bb34f93"}, @@ -6307,6 +6323,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -6314,8 +6331,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -6332,6 +6357,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -6339,6 +6365,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -9123,4 +9150,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "e3419e64eee15fa3e93c00e2b76d4a29d9b08c4299291ec3fc65802b2aede5c0" +content-hash = "fe87b11389305796e6942af6b3e86632d8ba7fdc75b2ab93d7a9a566e3c71086" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 89387d7391033..2c9ec410757a2 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain" -version = "0.1.7" +version = "0.1.8" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -12,7 +12,7 @@ langchain-server = "langchain.server:main" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.22,<0.2" +langchain-core = ">=0.1.24,<0.2" langchain-community = ">=0.0.20,<0.1" langsmith = "^0.1.0" pydantic = ">=1,<3" From da7bca21785490acc5d8307245e24d574b86da48 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 13:58:32 -0700 Subject: [PATCH 13/31] langchain[patch]: bump community to 0.0.21 (#17754) --- libs/langchain/poetry.lock | 6 +++--- libs/langchain/pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index a34dda564d618..1028d8b813dc4 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -3446,7 +3446,7 @@ files = [ [[package]] name = "langchain-community" -version = "0.0.20" +version = "0.0.21" description = "Community contributed LangChain integrations." optional = false python-versions = ">=3.8.1,<4.0" @@ -3456,7 +3456,7 @@ develop = true [package.dependencies] aiohttp = "^3.8.3" dataclasses-json = ">= 0.5.7, < 0.7" -langchain-core = ">=0.1.21,<0.2" +langchain-core = ">=0.1.24,<0.2" langsmith = "^0.1.0" numpy = "^1" PyYAML = ">=5.3" @@ -9150,4 +9150,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "fe87b11389305796e6942af6b3e86632d8ba7fdc75b2ab93d7a9a566e3c71086" +content-hash = "417ecc70e983739852f1556da647cf6c97ae6236b670d37227a19d1be92cb66d" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 2c9ec410757a2..17f7e50f76d55 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -13,7 +13,7 @@ langchain-server = "langchain.server:main" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" langchain-core = ">=0.1.24,<0.2" -langchain-community = ">=0.0.20,<0.1" +langchain-community = ">=0.0.21,<0.1" langsmith = "^0.1.0" pydantic = ">=1,<3" SQLAlchemy = ">=1.4,<3" From 5ed16adbde84f7fdd93284a022a23a72a28956d8 Mon Sep 17 00:00:00 2001 From: Bagatur <22008038+baskaryan@users.noreply.github.com> Date: Mon, 19 Feb 2024 14:12:22 -0700 Subject: [PATCH 14/31] experimental[patch]: Release 0.0.52 (#17763) --- libs/experimental/poetry.lock | 42 ++++++++++++++++++++++++-------- libs/experimental/pyproject.toml | 6 ++--- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/libs/experimental/poetry.lock b/libs/experimental/poetry.lock index f575216ce2b9e..7975be85b940d 100644 --- a/libs/experimental/poetry.lock +++ b/libs/experimental/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -1328,6 +1328,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -1641,7 +1642,7 @@ files = [ [[package]] name = "langchain" -version = "0.1.7" +version = "0.1.8" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -1653,8 +1654,8 @@ aiohttp = "^3.8.3" async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""} dataclasses-json = ">= 0.5.7, < 0.7" jsonpatch = "^1.33" -langchain-community = ">=0.0.20,<0.1" -langchain-core = ">=0.1.22,<0.2" +langchain-community = ">=0.0.21,<0.1" +langchain-core = ">=0.1.24,<0.2" langsmith = "^0.1.0" numpy = "^1" pydantic = ">=1,<3" @@ -1684,7 +1685,7 @@ url = "../langchain" [[package]] name = "langchain-community" -version = "0.0.20" +version = "0.0.21" description = "Community contributed LangChain integrations." optional = false python-versions = ">=3.8.1,<4.0" @@ -1694,7 +1695,7 @@ develop = true [package.dependencies] aiohttp = "^3.8.3" dataclasses-json = ">= 0.5.7, < 0.7" -langchain-core = ">=0.1.21,<0.2" +langchain-core = ">=0.1.24,<0.2" langsmith = "^0.1.0" numpy = "^1" PyYAML = ">=5.3" @@ -1712,7 +1713,7 @@ url = "../community" [[package]] name = "langchain-core" -version = "0.1.23" +version = "0.1.24" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -1792,6 +1793,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2343,8 +2354,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -3034,6 +3045,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3041,8 +3053,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3059,6 +3079,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3066,6 +3087,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -4063,7 +4085,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""} +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} typing-extensions = ">=4.2.0" [package.extras] @@ -5038,4 +5060,4 @@ extended-testing = ["faker", "jinja2", "pandas", "presidio-analyzer", "presidio- [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "0e4b297b0a8c595fbfe1e8a00d5a13057b1bdd4a0ce08d415ca4c4a7712cee88" +content-hash = "577689c4eebd644296ea46af03ec0eead2b2877b739c0989b59dc633f904099f" diff --git a/libs/experimental/pyproject.toml b/libs/experimental/pyproject.toml index be3246167aac8..307dd276f100a 100644 --- a/libs/experimental/pyproject.toml +++ b/libs/experimental/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-experimental" -version = "0.0.51" +version = "0.0.52" description = "Building applications with LLMs through composability" authors = [] license = "MIT" @@ -10,8 +10,8 @@ repository = "https://github.com/langchain-ai/langchain" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = "^0.1.16" -langchain = "^0.1.5" +langchain-core = "^0.1.24" +langchain = "^0.1.8" presidio-anonymizer = {version = "^2.2.352", optional = true} presidio-analyzer = {version = "^2.2.352", optional = true} faker = {version = "^19.3.1", optional = true} From 07ee41d28421334101596983baac1eaf8e2a4d8b Mon Sep 17 00:00:00 2001 From: Nuno Campos Date: Mon, 19 Feb 2024 13:26:42 -0800 Subject: [PATCH 15/31] Cache calls to create_model for get_input_schema and get_output_schema (#17755) Thank you for contributing to LangChain! - [ ] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" - [ ] **PR message**: ***Delete this entire checklist*** and replace with - **Description:** a description of the change - **Issue:** the issue # it fixes, if applicable - **Dependencies:** any dependencies required for this change - **Twitter handle:** if your PR gets announced, and you'd like a mention, we'll gladly shout you out! - [ ] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [ ] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --- .gitignore | 4 +- libs/core/Makefile | 3 + libs/core/langchain_core/prompts/base.py | 3 +- libs/core/langchain_core/runnables/base.py | 22 +------- libs/core/langchain_core/runnables/history.py | 3 +- .../langchain_core/runnables/passthrough.py | 8 ++- libs/core/langchain_core/runnables/utils.py | 32 +++++++++++ libs/core/poetry.lock | 55 ++++++++++++++++++- libs/core/pyproject.toml | 1 + libs/langchain/langchain/chains/base.py | 9 +-- .../chains/combine_documents/base.py | 3 +- .../chains/combine_documents/map_reduce.py | 3 +- .../chains/combine_documents/map_rerank.py | 3 +- 13 files changed, 112 insertions(+), 37 deletions(-) diff --git a/.gitignore b/.gitignore index 407a65571adcd..db21b911098a8 100644 --- a/.gitignore +++ b/.gitignore @@ -177,4 +177,6 @@ docs/docs/build docs/docs/node_modules docs/docs/yarn.lock _dist -docs/docs/templates \ No newline at end of file +docs/docs/templates + +prof diff --git a/libs/core/Makefile b/libs/core/Makefile index ab8e9cadf03b3..d577f66cc7541 100644 --- a/libs/core/Makefile +++ b/libs/core/Makefile @@ -15,6 +15,9 @@ tests: test_watch: poetry run ptw --snapshot-update --now . -- -vv -x tests/unit_tests +test_profile: + poetry run pytest -vv tests/unit_tests/ --profile-svg + check_imports: $(shell find langchain_core -name '*.py') poetry run python ./scripts/check_imports.py $^ diff --git a/libs/core/langchain_core/prompts/base.py b/libs/core/langchain_core/prompts/base.py index 96cfbf63740e1..9f8ae5082f9f7 100644 --- a/libs/core/langchain_core/prompts/base.py +++ b/libs/core/langchain_core/prompts/base.py @@ -25,9 +25,10 @@ PromptValue, StringPromptValue, ) -from langchain_core.pydantic_v1 import BaseModel, Field, create_model, root_validator +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator from langchain_core.runnables import RunnableConfig, RunnableSerializable from langchain_core.runnables.config import ensure_config +from langchain_core.runnables.utils import create_model if TYPE_CHECKING: from langchain_core.documents import Document diff --git a/libs/core/langchain_core/runnables/base.py b/libs/core/langchain_core/runnables/base.py index 4e4a79a50cc4e..8c9f0b8b02378 100644 --- a/libs/core/langchain_core/runnables/base.py +++ b/libs/core/langchain_core/runnables/base.py @@ -38,7 +38,7 @@ from langchain_core._api import beta_decorator from langchain_core.load.dump import dumpd from langchain_core.load.serializable import Serializable -from langchain_core.pydantic_v1 import BaseConfig, BaseModel, Field, create_model +from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.runnables.config import ( RunnableConfig, acall_func_with_variable_args, @@ -65,6 +65,7 @@ accepts_config, accepts_context, accepts_run_manager, + create_model, gather_with_concurrency, get_function_first_arg_dict_keys, get_function_nonlocals, @@ -95,10 +96,6 @@ Other = TypeVar("Other") -class _SchemaConfig(BaseConfig): - arbitrary_types_allowed = True - - class Runnable(Generic[Input, Output], ABC): """A unit of work that can be invoked, batched, streamed, transformed and composed. @@ -301,7 +298,6 @@ def get_input_schema( return create_model( self.get_name("Input"), __root__=(root_type, None), - __config__=_SchemaConfig, ) @property @@ -334,7 +330,6 @@ def get_output_schema( return create_model( self.get_name("Output"), __root__=(root_type, None), - __config__=_SchemaConfig, ) @property @@ -371,7 +366,6 @@ def config_schema( ) for spec in config_specs }, - __config__=_SchemaConfig, ) if config_specs else None @@ -379,7 +373,6 @@ def config_schema( return create_model( # type: ignore[call-overload] self.get_name("Config"), - __config__=_SchemaConfig, **({"configurable": (configurable, None)} if configurable else {}), **{ field_name: (field_type, None) @@ -1691,7 +1684,6 @@ def _seq_input_schema( for k, v in next_input_schema.__fields__.items() if k not in first.mapper.steps }, - __config__=_SchemaConfig, ) elif isinstance(first, RunnablePick): return _seq_input_schema(steps[1:], config) @@ -1724,7 +1716,6 @@ def _seq_output_schema( for k, v in mapper_output_schema.__fields__.items() }, }, - __config__=_SchemaConfig, ) elif isinstance(last, RunnablePick): prev_output_schema = _seq_output_schema(steps[:-1], config) @@ -1738,14 +1729,12 @@ def _seq_output_schema( for k, v in prev_output_schema.__fields__.items() if k in last.keys }, - __config__=_SchemaConfig, ) else: field = prev_output_schema.__fields__[last.keys] return create_model( # type: ignore[call-overload] "RunnableSequenceOutput", __root__=(field.annotation, field.default), - __config__=_SchemaConfig, ) return last.get_output_schema(config) @@ -2598,7 +2587,6 @@ def get_input_schema( for k, v in step.get_input_schema(config).__fields__.items() if k != "__root__" }, - __config__=_SchemaConfig, ) return super().get_input_schema(config) @@ -2610,7 +2598,6 @@ def get_output_schema( return create_model( # type: ignore[call-overload] self.get_name("Output"), **{k: (v.OutputType, None) for k, v in self.steps.items()}, - __config__=_SchemaConfig, ) @property @@ -3250,13 +3237,11 @@ def get_input_schema( return create_model( self.get_name("Input"), **{item[1:-1]: (Any, None) for item in items}, # type: ignore - __config__=_SchemaConfig, ) else: return create_model( self.get_name("Input"), __root__=(List[Any], None), - __config__=_SchemaConfig, ) if self.InputType != Any: @@ -3266,7 +3251,6 @@ def get_input_schema( return create_model( self.get_name("Input"), **{key: (Any, None) for key in dict_keys}, # type: ignore - __config__=_SchemaConfig, ) return super().get_input_schema(config) @@ -3756,7 +3740,6 @@ def get_input_schema( List[self.bound.get_input_schema(config)], # type: ignore None, ), - __config__=_SchemaConfig, ) @property @@ -3773,7 +3756,6 @@ def get_output_schema( List[schema], # type: ignore None, ), - __config__=_SchemaConfig, ) @property diff --git a/libs/core/langchain_core/runnables/history.py b/libs/core/langchain_core/runnables/history.py index f9b68cd68e8b8..5037d2d9f5523 100644 --- a/libs/core/langchain_core/runnables/history.py +++ b/libs/core/langchain_core/runnables/history.py @@ -15,12 +15,13 @@ from langchain_core.chat_history import BaseChatMessageHistory from langchain_core.load.load import load -from langchain_core.pydantic_v1 import BaseModel, create_model +from langchain_core.pydantic_v1 import BaseModel from langchain_core.runnables.base import Runnable, RunnableBindingBase, RunnableLambda from langchain_core.runnables.config import run_in_executor from langchain_core.runnables.passthrough import RunnablePassthrough from langchain_core.runnables.utils import ( ConfigurableFieldSpec, + create_model, get_unique_config_specs, ) diff --git a/libs/core/langchain_core/runnables/passthrough.py b/libs/core/langchain_core/runnables/passthrough.py index 3e9277a7a2d88..17ddda9584c7b 100644 --- a/libs/core/langchain_core/runnables/passthrough.py +++ b/libs/core/langchain_core/runnables/passthrough.py @@ -20,7 +20,7 @@ cast, ) -from langchain_core.pydantic_v1 import BaseModel, create_model +from langchain_core.pydantic_v1 import BaseModel from langchain_core.runnables.base import ( Other, Runnable, @@ -36,7 +36,11 @@ patch_config, ) from langchain_core.runnables.graph import Graph -from langchain_core.runnables.utils import AddableDict, ConfigurableFieldSpec +from langchain_core.runnables.utils import ( + AddableDict, + ConfigurableFieldSpec, + create_model, +) from langchain_core.utils.aiter import atee, py_anext from langchain_core.utils.iter import safetee diff --git a/libs/core/langchain_core/runnables/utils.py b/libs/core/langchain_core/runnables/utils.py index 59d2c86203701..8ff332173cc69 100644 --- a/libs/core/langchain_core/runnables/utils.py +++ b/libs/core/langchain_core/runnables/utils.py @@ -5,6 +5,7 @@ import asyncio import inspect import textwrap +from functools import lru_cache from inspect import signature from itertools import groupby from typing import ( @@ -21,10 +22,13 @@ Protocol, Sequence, Set, + Type, TypeVar, Union, ) +from langchain_core.pydantic_v1 import BaseConfig, BaseModel +from langchain_core.pydantic_v1 import create_model as _create_model_base from langchain_core.runnables.schema import StreamEvent Input = TypeVar("Input", contravariant=True) @@ -489,3 +493,31 @@ def include_event(self, event: StreamEvent, root_type: str) -> bool: ) return include + + +class _SchemaConfig(BaseConfig): + arbitrary_types_allowed = True + frozen = True + + +def create_model( + __model_name: str, + **field_definitions: Any, +) -> Type[BaseModel]: + try: + return _create_model_cached(__model_name, **field_definitions) + except TypeError: + # something in field definitions is not hashable + return _create_model_base( + __model_name, __config__=_SchemaConfig, **field_definitions + ) + + +@lru_cache(maxsize=256) +def _create_model_cached( + __model_name: str, + **field_definitions: Any, +) -> Type[BaseModel]: + return _create_model_base( + __model_name, __config__=_SchemaConfig, **field_definitions + ) diff --git a/libs/core/poetry.lock b/libs/core/poetry.lock index ed548c2f62b6a..3078c2ae55e0b 100644 --- a/libs/core/poetry.lock +++ b/libs/core/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -555,6 +555,17 @@ files = [ [package.dependencies] python-dateutil = ">=2.7" +[[package]] +name = "gprof2dot" +version = "2022.7.29" +description = "Generate a dot graph from the output of several profilers." +optional = false +python-versions = ">=2.7" +files = [ + {file = "gprof2dot-2022.7.29-py2.py3-none-any.whl", hash = "sha256:f165b3851d3c52ee4915eb1bd6cca571e5759823c2cd0f71a79bda93c2dc85d6"}, + {file = "gprof2dot-2022.7.29.tar.gz", hash = "sha256:45b4d298bd36608fccf9511c3fd88a773f7a1abc04d6cd39445b11ba43133ec5"}, +] + [[package]] name = "grandalf" version = "0.8" @@ -810,6 +821,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -1163,6 +1175,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -1841,6 +1863,25 @@ pytest = ">=5.0" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] +[[package]] +name = "pytest-profiling" +version = "1.7.0" +description = "Profiling plugin for py.test" +optional = false +python-versions = "*" +files = [ + {file = "pytest-profiling-1.7.0.tar.gz", hash = "sha256:93938f147662225d2b8bd5af89587b979652426a8a6ffd7e73ec4a23e24b7f29"}, + {file = "pytest_profiling-1.7.0-py2.py3-none-any.whl", hash = "sha256:999cc9ac94f2e528e3f5d43465da277429984a1c237ae9818f8cfd0b06acb019"}, +] + +[package.dependencies] +gprof2dot = "*" +pytest = "*" +six = "*" + +[package.extras] +tests = ["pytest-virtualenv"] + [[package]] name = "pytest-watcher" version = "0.3.4" @@ -1942,6 +1983,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -1949,8 +1991,15 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -1967,6 +2016,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -1974,6 +2024,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -2744,4 +2795,4 @@ extended-testing = ["jinja2"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "8a8ec249b5b3843aaec58c5da0041f0c3846b2671ac64be80b981cef2840dc11" +content-hash = "8fe07123109b62d7210542d8aff20df6df00819e5b0f36bc12f02206c5161c43" diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index b87a95dd7dd64..fba2679af64cd 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -57,6 +57,7 @@ syrupy = "^4.0.2" pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" grandalf = "^0.8" +pytest-profiling = "^1.7.0" [tool.poetry.group.test_integration] diff --git a/libs/langchain/langchain/chains/base.py b/libs/langchain/langchain/chains/base.py index 5c5bd1aadfa0c..2f15d1fcc87a7 100644 --- a/libs/langchain/langchain/chains/base.py +++ b/libs/langchain/langchain/chains/base.py @@ -20,19 +20,14 @@ from langchain_core.load.dump import dumpd from langchain_core.memory import BaseMemory from langchain_core.outputs import RunInfo -from langchain_core.pydantic_v1 import ( - BaseModel, - Field, - create_model, - root_validator, - validator, -) +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator, validator from langchain_core.runnables import ( RunnableConfig, RunnableSerializable, ensure_config, run_in_executor, ) +from langchain_core.runnables.utils import create_model from langchain.schema import RUN_KEY diff --git a/libs/langchain/langchain/chains/combine_documents/base.py b/libs/langchain/langchain/chains/combine_documents/base.py index 6fbd22f3f12d4..89ad181dace77 100644 --- a/libs/langchain/langchain/chains/combine_documents/base.py +++ b/libs/langchain/langchain/chains/combine_documents/base.py @@ -9,8 +9,9 @@ ) from langchain_core.documents import Document from langchain_core.prompts import BasePromptTemplate, PromptTemplate -from langchain_core.pydantic_v1 import BaseModel, Field, create_model +from langchain_core.pydantic_v1 import BaseModel, Field from langchain_core.runnables.config import RunnableConfig +from langchain_core.runnables.utils import create_model from langchain.chains.base import Chain from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter diff --git a/libs/langchain/langchain/chains/combine_documents/map_reduce.py b/libs/langchain/langchain/chains/combine_documents/map_reduce.py index 18be6d4cf279b..cdeeda3302a9f 100644 --- a/libs/langchain/langchain/chains/combine_documents/map_reduce.py +++ b/libs/langchain/langchain/chains/combine_documents/map_reduce.py @@ -6,8 +6,9 @@ from langchain_core.callbacks import Callbacks from langchain_core.documents import Document -from langchain_core.pydantic_v1 import BaseModel, Extra, create_model, root_validator +from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator from langchain_core.runnables.config import RunnableConfig +from langchain_core.runnables.utils import create_model from langchain.chains.combine_documents.base import BaseCombineDocumentsChain from langchain.chains.combine_documents.reduce import ReduceDocumentsChain diff --git a/libs/langchain/langchain/chains/combine_documents/map_rerank.py b/libs/langchain/langchain/chains/combine_documents/map_rerank.py index 0466aac56b941..8650828f9b64f 100644 --- a/libs/langchain/langchain/chains/combine_documents/map_rerank.py +++ b/libs/langchain/langchain/chains/combine_documents/map_rerank.py @@ -6,8 +6,9 @@ from langchain_core.callbacks import Callbacks from langchain_core.documents import Document -from langchain_core.pydantic_v1 import BaseModel, Extra, create_model, root_validator +from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator from langchain_core.runnables.config import RunnableConfig +from langchain_core.runnables.utils import create_model from langchain.chains.combine_documents.base import BaseCombineDocumentsChain from langchain.chains.llm import LLMChain From 865cabff052fe74996bef45faaf00df6f322c215 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Mon, 19 Feb 2024 22:03:49 -0500 Subject: [PATCH 16/31] Docs: Add custom chat model documenation (#17595) This PR adds documentation about how to implement a custom chat model. --- .../model_io/chat/custom_chat_model.ipynb | 644 ++++++++++++++++++ docs/docs/modules/model_io/chat/index.mdx | 9 +- .../language_models/chat_models.py | 2 +- 3 files changed, 651 insertions(+), 4 deletions(-) create mode 100644 docs/docs/modules/model_io/chat/custom_chat_model.ipynb diff --git a/docs/docs/modules/model_io/chat/custom_chat_model.ipynb b/docs/docs/modules/model_io/chat/custom_chat_model.ipynb new file mode 100644 index 0000000000000..b91ca4cfd4333 --- /dev/null +++ b/docs/docs/modules/model_io/chat/custom_chat_model.ipynb @@ -0,0 +1,644 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "e3da9a3f-f583-4ba6-994e-0e8c1158f5eb", + "metadata": {}, + "source": [ + "# Custom Chat Model\n", + "\n", + "In this guide, we'll learn how to create a custom chat model using LangChain abstractions.\n", + "\n", + "Wrapping your LLM with the standard `ChatModel` interface allow you to use your LLM in existing LangChain programs with minimal code modifications!\n", + "\n", + "As an bonus, your LLM will automatically become a LangChain `Runnable` and will benefit from some optimizations out of the box (e.g., batch via a threadpool), async support, the `astream_events` API, etc.\n", + "\n", + "## Inputs and outputs\n", + "\n", + "First, we need to talk about messages which are the inputs and outputs of chat models.\n", + "\n", + "### Messages\n", + "\n", + "Chat models take messages as inputs and return a message as output. \n", + "\n", + "LangChain has a few built-in message types:\n", + "\n", + "- `SystemMessage`: Used for priming AI behavior, usually passed in as the first of a sequence of input messages.\n", + "- `HumanMessage`: Represents a message from a person interacting with the chat model.\n", + "- `AIMessage`: Represents a message from the chat model. This can be either text or a request to invoke a tool.\n", + "- `FunctionMessage` / `ToolMessage`: Message for passing the results of tool invocation back to the model.\n", + "\n", + "::: {.callout-note}\n", + "`ToolMessage` and `FunctionMessage` closely follow OpenAIs `function` and `tool` arguments.\n", + "\n", + "This is a rapidly developing field and as more models add function calling capabilities, expect that there will be additions to this schema.\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c5046e6a-8b09-4a99-b6e6-7a605aac5738", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.messages import (\n", + " AIMessage,\n", + " BaseMessage,\n", + " FunctionMessage,\n", + " HumanMessage,\n", + " SystemMessage,\n", + " ToolMessage,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "53033447-8260-4f53-bd6f-b2f744e04e75", + "metadata": {}, + "source": [ + "### Streaming Variant\n", + "\n", + "All the chat messages have a streaming variant that contains `Chunk` in the name." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d4656e9d-bfa1-4703-8f79-762fe6421294", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.messages import (\n", + " AIMessageChunk,\n", + " FunctionMessageChunk,\n", + " HumanMessageChunk,\n", + " SystemMessageChunk,\n", + " ToolMessageChunk,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "81ebf3f4-c760-4898-b921-fdb469453d4a", + "metadata": {}, + "source": [ + "These chunks are used when streaming output from chat models, and they all define an additive property!" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9c15c299-6f8a-49cf-a072-09924fd44396", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessageChunk(content='Hello World!')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "AIMessageChunk(content=\"Hello\") + AIMessageChunk(content=\" World!\")" + ] + }, + { + "cell_type": "markdown", + "id": "8e952d64-6d38-4a2b-b996-8812c204a12c", + "metadata": {}, + "source": [ + "## Simple Chat Model\n", + "\n", + "Inherting from `SimpleChatModel` is great for prototyping!\n", + "\n", + "It won't allow you to implement all features that you might want out of a chat model, but it's quick to implement, and if you need more you can transition to `BaseChatModel` shown below.\n", + "\n", + "Let's implement a chat model that echoes back the last `n` characters of the prompt!\n", + "\n", + "You need to implement the following:\n", + "\n", + "* The method `_call` - Use to generate a chat result from a prompt.\n", + "\n", + "In addition, you have the option to specify the following:\n", + "\n", + "* The property `_identifying_params` - Represent model parameterization for logging purposes.\n", + "\n", + "Optional:\n", + "\n", + "* `_stream` - Use to implement streaming.\n" + ] + }, + { + "cell_type": "markdown", + "id": "bbfebea1", + "metadata": {}, + "source": [ + "## Base Chat Model\n", + "\n", + "Let's implement a chat model that echoes back the first `n` characetrs of the last message in the prompt!\n", + "\n", + "To do so, we will inherit from `BaseChatModel` and we'll need to implement the following methods/properties:\n", + "\n", + "In addition, you have the option to specify the following:\n", + "\n", + "To do so inherit from `BaseChatModel` which is a lower level class and implement the methods:\n", + "\n", + "* `_generate` - Use to generate a chat result from a prompt\n", + "* The property `_llm_type` - Used to uniquely identify the type of the model. Used for logging.\n", + "\n", + "Optional:\n", + "\n", + "* `_stream` - Use to implement streaming.\n", + "* `_agenerate` - Use to implement a native async method.\n", + "* `_astream` - Use to implement async version of `_stream`.\n", + "* The property `_identifying_params` - Represent model parameterization for logging purposes.\n", + "\n", + "\n", + ":::{.callout-caution}\n", + "\n", + "Currently, to get async streaming to work (via `astream`), you must provide an implementation of `_astream`.\n", + "\n", + "By default if `_astream` is not provided, then async streaming falls back on `_agenerate` which does not support\n", + "token by token streaming.\n", + ":::" + ] + }, + { + "cell_type": "markdown", + "id": "8e7047bd-c235-46f6-85e1-d6d7e0868eb1", + "metadata": {}, + "source": [ + "### Implementation" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "25ba32e5-5a6d-49f4-bb68-911827b84d61", + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Any, AsyncIterator, Dict, Iterator, List, Optional\n", + "\n", + "from langchain_core.callbacks import (\n", + " AsyncCallbackManagerForLLMRun,\n", + " CallbackManagerForLLMRun,\n", + ")\n", + "from langchain_core.language_models import BaseChatModel, SimpleChatModel\n", + "from langchain_core.messages import AIMessageChunk, BaseMessage, HumanMessage\n", + "from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult\n", + "from langchain_core.runnables import run_in_executor\n", + "\n", + "\n", + "class CustomChatModelAdvanced(BaseChatModel):\n", + " \"\"\"A custom chat model that echoes the first `n` characters of the input.\n", + "\n", + " When contributing an implementation to LangChain, carefully document\n", + " the model including the initialization parameters, include\n", + " an example of how to initialize the model and include any relevant\n", + " links to the underlying models documentation or API.\n", + "\n", + " Example:\n", + "\n", + " .. code-block:: python\n", + "\n", + " model = CustomChatModel(n=2)\n", + " result = model.invoke([HumanMessage(content=\"hello\")])\n", + " result = model.batch([[HumanMessage(content=\"hello\")],\n", + " [HumanMessage(content=\"world\")]])\n", + " \"\"\"\n", + "\n", + " n: int\n", + " \"\"\"The number of characters from the last message of the prompt to be echoed.\"\"\"\n", + "\n", + " def _generate(\n", + " self,\n", + " messages: List[BaseMessage],\n", + " stop: Optional[List[str]] = None,\n", + " run_manager: Optional[CallbackManagerForLLMRun] = None,\n", + " **kwargs: Any,\n", + " ) -> ChatResult:\n", + " \"\"\"Override the _generate method to implement the chat model logic.\n", + "\n", + " This can be a call to an API, a call to a local model, or any other\n", + " implementation that generates a response to the input prompt.\n", + "\n", + " Args:\n", + " messages: the prompt composed of a list of messages.\n", + " stop: a list of strings on which the model should stop generating.\n", + " If generation stops due to a stop token, the stop token itself\n", + " SHOULD BE INCLUDED as part of the output. This is not enforced\n", + " across models right now, but it's a good practice to follow since\n", + " it makes it much easier to parse the output of the model\n", + " downstream and understand why generation stopped.\n", + " run_manager: A run manager with callbacks for the LLM.\n", + " \"\"\"\n", + " last_message = messages[-1]\n", + " tokens = last_message.content[: self.n]\n", + " message = AIMessage(content=tokens)\n", + " generation = ChatGeneration(message=message)\n", + " return ChatResult(generations=[generation])\n", + "\n", + " def _stream(\n", + " self,\n", + " messages: List[BaseMessage],\n", + " stop: Optional[List[str]] = None,\n", + " run_manager: Optional[CallbackManagerForLLMRun] = None,\n", + " **kwargs: Any,\n", + " ) -> Iterator[ChatGenerationChunk]:\n", + " \"\"\"Stream the output of the model.\n", + "\n", + " This method should be implemented if the model can generate output\n", + " in a streaming fashion. If the model does not support streaming,\n", + " do not implement it. In that case streaming requests will be automatically\n", + " handled by the _generate method.\n", + "\n", + " Args:\n", + " messages: the prompt composed of a list of messages.\n", + " stop: a list of strings on which the model should stop generating.\n", + " If generation stops due to a stop token, the stop token itself\n", + " SHOULD BE INCLUDED as part of the output. This is not enforced\n", + " across models right now, but it's a good practice to follow since\n", + " it makes it much easier to parse the output of the model\n", + " downstream and understand why generation stopped.\n", + " run_manager: A run manager with callbacks for the LLM.\n", + " \"\"\"\n", + " last_message = messages[-1]\n", + " tokens = last_message.content[: self.n]\n", + "\n", + " for token in tokens:\n", + " chunk = ChatGenerationChunk(message=AIMessageChunk(content=token))\n", + "\n", + " if run_manager:\n", + " run_manager.on_llm_new_token(token, chunk=chunk)\n", + "\n", + " yield chunk\n", + "\n", + " async def _astream(\n", + " self,\n", + " messages: List[BaseMessage],\n", + " stop: Optional[List[str]] = None,\n", + " run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,\n", + " **kwargs: Any,\n", + " ) -> AsyncIterator[ChatGenerationChunk]:\n", + " \"\"\"An async variant of astream.\n", + "\n", + " If not provided, the default behavior is to delegate to the _generate method.\n", + "\n", + " The implementation below instead will delegate to `_stream` and will\n", + " kick it off in a separate thread.\n", + "\n", + " If you're able to natively support async, then by all means do so!\n", + " \"\"\"\n", + " result = await run_in_executor(\n", + " None,\n", + " self._stream,\n", + " messages,\n", + " stop=stop,\n", + " run_manager=run_manager.get_sync() if run_manager else None,\n", + " **kwargs,\n", + " )\n", + " for chunk in result:\n", + " yield chunk\n", + "\n", + " @property\n", + " def _llm_type(self) -> str:\n", + " \"\"\"Get the type of language model used by this chat model.\"\"\"\n", + " return \"echoing-chat-model-advanced\"\n", + "\n", + " @property\n", + " def _identifying_params(self) -> Dict[str, Any]:\n", + " \"\"\"Return a dictionary of identifying parameters.\"\"\"\n", + " return {\"n\": self.n}" + ] + }, + { + "cell_type": "markdown", + "id": "b3c3d030-8d8b-4891-962d-a2d39b331883", + "metadata": {}, + "source": [ + ":::{.callout-tip}\n", + "The `_astream` implementation uses `run_in_executor` to launch the sync `_stream` in a separate thread.\n", + "\n", + "You can use this trick if you want to reuse the `_stream` implementation, but if you're able to implement code\n", + "that's natively async that's a better solution since that code will run with less overhead.\n", + ":::" + ] + }, + { + "cell_type": "markdown", + "id": "1e9af284-f2d3-44e2-ac6a-09b73d89ada3", + "metadata": {}, + "source": [ + "### Let's test it 🧪\n", + "\n", + "The chat model will implement the standard `Runnable` interface of LangChain which many of the LangChain abstractions support!" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "34bf2d48-556a-48be-aee7-496fb02332f3", + "metadata": {}, + "outputs": [], + "source": [ + "model = CustomChatModelAdvanced(n=3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "27689f30-dcd2-466b-ba9d-f60b7d434110", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='Meo')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.invoke(\n", + " [\n", + " HumanMessage(content=\"hello!\"),\n", + " AIMessage(content=\"Hi there human!\"),\n", + " HumanMessage(content=\"Meow!\"),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "406436df-31bf-466b-9c3d-39db9d6b6407", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='hel')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.invoke(\"hello\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a72ffa46-6004-41ef-bbe4-56fa17a029e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[AIMessage(content='hel'), AIMessage(content='goo')]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.batch([\"hello\", \"goodbye\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3633be2c-2ea0-42f9-a72f-3b5240690b55", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c|a|t|" + ] + } + ], + "source": [ + "for chunk in model.stream(\"cat\"):\n", + " print(chunk.content, end=\"|\")" + ] + }, + { + "cell_type": "markdown", + "id": "3f8a7c42-aec4-4116-adf3-93133d409827", + "metadata": {}, + "source": [ + "Please see the implementation of `_astream` in the model! If you do not implement it, then no output will stream.!" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b7d73995-eeab-48c6-a7d8-32c98ba29fc2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "c|a|t|" + ] + } + ], + "source": [ + "async for chunk in model.astream(\"cat\"):\n", + " print(chunk.content, end=\"|\")" + ] + }, + { + "cell_type": "markdown", + "id": "f80dc55b-d159-4527-9191-407a7c6d6042", + "metadata": {}, + "source": [ + "Let's try to use the astream events API which will also help double check that all the callbacks were implemented!" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "17840eba-8ff4-4e73-8e4f-85f16eb1c9d0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'event': 'on_chat_model_start', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'name': 'CustomChatModelAdvanced', 'tags': [], 'metadata': {}, 'data': {'input': 'cat'}}\n", + "{'event': 'on_chat_model_stream', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'tags': [], 'metadata': {}, 'name': 'CustomChatModelAdvanced', 'data': {'chunk': AIMessageChunk(content='c')}}\n", + "{'event': 'on_chat_model_stream', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'tags': [], 'metadata': {}, 'name': 'CustomChatModelAdvanced', 'data': {'chunk': AIMessageChunk(content='a')}}\n", + "{'event': 'on_chat_model_stream', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'tags': [], 'metadata': {}, 'name': 'CustomChatModelAdvanced', 'data': {'chunk': AIMessageChunk(content='t')}}\n", + "{'event': 'on_chat_model_end', 'name': 'CustomChatModelAdvanced', 'run_id': 'e03c0b21-521f-4cb4-a837-02fed65cf1cf', 'tags': [], 'metadata': {}, 'data': {'output': AIMessageChunk(content='cat')}}\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/eugene/src/langchain/libs/core/langchain_core/_api/beta_decorator.py:86: LangChainBetaWarning: This API is in beta and may change in the future.\n", + " warn_beta(\n" + ] + } + ], + "source": [ + "async for event in model.astream_events(\"cat\", version=\"v1\"):\n", + " print(event)" + ] + }, + { + "cell_type": "markdown", + "id": "42f9553f-7d8c-4277-aeb4-d80d77839d90", + "metadata": {}, + "source": [ + "## Identifying Params\n", + "\n", + "LangChain has a callback system which allows implementing loggers to monitor the behavior of LLM applications.\n", + "\n", + "Remember the `_identifying_params` property from earlier? \n", + "\n", + "It's passed to the callback system and is accessible for user specified loggers.\n", + "\n", + "Below we'll implement a handler with just a single `on_chat_model_start` event to see where `_identifying_params` appears." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "cc7e6b5f-711b-48aa-9ebe-92a13e230c37", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "---\n", + "On chat model start.\n", + "{'invocation_params': {'n': 3, '_type': 'echoing-chat-model-advanced', 'stop': ['woof']}, 'options': {'stop': ['woof']}, 'name': None, 'batch_size': 1}\n" + ] + }, + { + "data": { + "text/plain": [ + "AIMessage(content='meo')" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from typing import Union\n", + "from uuid import UUID\n", + "\n", + "from langchain_core.callbacks import AsyncCallbackHandler\n", + "from langchain_core.outputs import (\n", + " ChatGenerationChunk,\n", + " ChatResult,\n", + " GenerationChunk,\n", + " LLMResult,\n", + ")\n", + "\n", + "\n", + "class SampleCallbackHandler(AsyncCallbackHandler):\n", + " \"\"\"Async callback handler that handles callbacks from LangChain.\"\"\"\n", + "\n", + " async def on_chat_model_start(\n", + " self,\n", + " serialized: Dict[str, Any],\n", + " messages: List[List[BaseMessage]],\n", + " *,\n", + " run_id: UUID,\n", + " parent_run_id: Optional[UUID] = None,\n", + " tags: Optional[List[str]] = None,\n", + " metadata: Optional[Dict[str, Any]] = None,\n", + " **kwargs: Any,\n", + " ) -> Any:\n", + " \"\"\"Run when a chat model starts running.\"\"\"\n", + " print(\"---\")\n", + " print(\"On chat model start.\")\n", + " print(kwargs)\n", + "\n", + "\n", + "model.invoke(\"meow\", stop=[\"woof\"], config={\"callbacks\": [SampleCallbackHandler()]})" + ] + }, + { + "cell_type": "markdown", + "id": "44ee559b-b1da-4851-8c97-420ab394aff9", + "metadata": {}, + "source": [ + "## Contributing\n", + "\n", + "We appreciate all chat model integration contributions. \n", + "\n", + "Here's a checklist to help make sure your contribution gets added to LangChain:\n", + "\n", + "Documentation:\n", + "\n", + "* The model contains doc-strings for all initialization arguments, as these will be surfaced in the [APIReference](https://api.python.langchain.com/en/stable/langchain_api_reference.html).\n", + "* The class doc-string for the model contains a link to the model API if the model is powered by a service.\n", + "\n", + "Tests:\n", + "\n", + "* [ ] Add unit or integration tests to the overridden methods. Verify that `invoke`, `ainvoke`, `batch`, `stream` work if you've over-ridden the corresponding code.\n", + "\n", + "Streaming (if you're implementing it):\n", + "\n", + "* [ ] Provided an async implementation via `_astream`\n", + "* [ ] Make sure to invoke the `on_llm_new_token` callback\n", + "* [ ] `on_llm_new_token` is invoked BEFORE yielding the chunk\n", + "\n", + "Stop Token Behavior:\n", + "\n", + "* [ ] Stop token should be respected\n", + "* [ ] Stop token should be INCLUDED as part of the response\n", + "\n", + "Secret API Keys:\n", + "\n", + "* [ ] If your model connects to an API it will likely accept API keys as part of its initialization. Use Pydantic's `SecretStr` type for secrets, so they don't get accidentally printed out when folks print the model." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/modules/model_io/chat/index.mdx b/docs/docs/modules/model_io/chat/index.mdx index 2ebb19c93d671..058192951d8da 100644 --- a/docs/docs/modules/model_io/chat/index.mdx +++ b/docs/docs/modules/model_io/chat/index.mdx @@ -4,11 +4,13 @@ sidebar_position: 3 # Chat Models -ChatModels are a core component of LangChain. -LangChain does not serve its own ChatModels, but rather provides a standard interface for interacting with many different models. To be specific, this interface is one that takes as input a list of messages and returns a message. +Chat Models are a core component of LangChain. +A chat model is a language model that uses chat messages as inputs and returns chat messages as outputs (as opposed to using plain text). -There are lots of model providers (OpenAI, Cohere, Hugging Face, etc) - the `ChatModel` class is designed to provide a standard interface for all of them. +LangChain has integrations with many model providers (OpenAI, Cohere, Hugging Face, etc.) and exposes a standard interface to interact with all of these models. + +LangChain allows you to use models in sync, async, batching and streaming modes and provides other features (e.g., caching) and more. ## [Quick Start](./quick_start) @@ -27,3 +29,4 @@ This includes: - [How to use ChatModels that support function calling](./function_calling) - [How to stream responses from a ChatModel](./streaming) - [How to track token usage in a ChatModel call](./token_usage_tracking) +- [How to creat a custom ChatModel](./custom_chat_model) diff --git a/libs/core/langchain_core/language_models/chat_models.py b/libs/core/langchain_core/language_models/chat_models.py index 6279116093e5f..fe622f61b0295 100644 --- a/libs/core/langchain_core/language_models/chat_models.py +++ b/libs/core/langchain_core/language_models/chat_models.py @@ -794,7 +794,7 @@ def dict(self, **kwargs: Any) -> Dict: class SimpleChatModel(BaseChatModel): - """Simple Chat Model.""" + """A simplified implementation for a chat model to inherit from.""" def _generate( self, From b13e52b6acb5ecd664541ba21dbfdae74e736e6c Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Tue, 20 Feb 2024 17:39:30 +0100 Subject: [PATCH 17/31] community[patch]: Fix AstraDBCache docstrings (#17802) --- libs/community/langchain_community/cache.py | 118 ++++++++------------ 1 file changed, 47 insertions(+), 71 deletions(-) diff --git a/libs/community/langchain_community/cache.py b/libs/community/langchain_community/cache.py index 5f36aa5856b44..6b5844e463a22 100644 --- a/libs/community/langchain_community/cache.py +++ b/libs/community/langchain_community/cache.py @@ -1366,16 +1366,6 @@ def get_md5(input_string: str) -> str: class AstraDBCache(BaseCache): - """ - Cache that uses Astra DB as a backend. - - It uses a single collection as a kv store - The lookup keys, combined in the _id of the documents, are: - - prompt, a string - - llm_string, a deterministic str representation of the model parameters. - (needed to prevent same-prompt-different-model collisions) - """ - @staticmethod def _make_id(prompt: str, llm_string: str) -> str: return f"{_hash(prompt)}#{_hash(llm_string)}" @@ -1393,25 +1383,30 @@ def __init__( setup_mode: SetupMode = SetupMode.SYNC, ): """ - Create an AstraDB cache using a collection for storage. - - Args (only keyword-arguments accepted): - collection_name (str): name of the Astra DB collection to create/use. - token (Optional[str]): API token for Astra DB usage. - api_endpoint (Optional[str]): full URL to the API endpoint, - such as "https://-us-east1.apps.astra.datastax.com". - astra_db_client (Optional[AstraDB]): - *alternative to token+api_endpoint*, + Cache that uses Astra DB as a backend. + + It uses a single collection as a kv store + The lookup keys, combined in the _id of the documents, are: + - prompt, a string + - llm_string, a deterministic str representation of the model parameters. + (needed to prevent same-prompt-different-model collisions) + + Args: + collection_name: name of the Astra DB collection to create/use. + token: API token for Astra DB usage. + api_endpoint: full URL to the API endpoint, + such as `https://-us-east1.apps.astra.datastax.com`. + astra_db_client: *alternative to token+api_endpoint*, you can pass an already-created 'astrapy.db.AstraDB' instance. - async_astra_db_client (Optional[AsyncAstraDB]): - *alternative to token+api_endpoint*, + async_astra_db_client: *alternative to token+api_endpoint*, you can pass an already-created 'astrapy.db.AsyncAstraDB' instance. - namespace (Optional[str]): namespace (aka keyspace) where the + namespace: namespace (aka keyspace) where the collection is created. Defaults to the database's "default namespace". - pre_delete_collection (bool): whether to delete and re-create the - collection. Defaults to False. - async_setup (bool): whether to create the collection asynchronously. - Enable only if there is a running asyncio event loop. Defaults to False. + setup_mode: mode used to create the Astra DB collection (SYNC, ASYNC or + OFF). + pre_delete_collection: whether to delete the collection + before creating it. If False and the collection already exists, + the collection will be used as is. """ self.astra_env = _AstraDBCollectionEnvironment( collection_name=collection_name, @@ -1427,7 +1422,6 @@ def __init__( self.async_collection = self.astra_env.async_collection def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: - """Look up based on prompt and llm_string.""" self.astra_env.ensure_db_setup() doc_id = self._make_id(prompt, llm_string) item = self.collection.find_one( @@ -1441,7 +1435,6 @@ def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: return _loads_generations(item["body_blob"]) if item is not None else None async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: - """Look up based on prompt and llm_string.""" await self.astra_env.aensure_db_setup() doc_id = self._make_id(prompt, llm_string) item = ( @@ -1457,7 +1450,6 @@ async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYP return _loads_generations(item["body_blob"]) if item is not None else None def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: - """Update cache based on prompt and llm_string.""" self.astra_env.ensure_db_setup() doc_id = self._make_id(prompt, llm_string) blob = _dumps_generations(return_val) @@ -1471,7 +1463,6 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N async def aupdate( self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE ) -> None: - """Update cache based on prompt and llm_string.""" await self.astra_env.aensure_db_setup() doc_id = self._make_id(prompt, llm_string) blob = _dumps_generations(return_val) @@ -1523,12 +1514,10 @@ async def adelete(self, prompt: str, llm_string: str) -> None: await self.async_collection.delete_one(doc_id) def clear(self, **kwargs: Any) -> None: - """Clear cache. This is for all LLMs at once.""" self.astra_env.ensure_db_setup() self.collection.clear() async def aclear(self, **kwargs: Any) -> None: - """Clear cache. This is for all LLMs at once.""" await self.astra_env.aensure_db_setup() await self.async_collection.clear() @@ -1575,18 +1564,6 @@ def decorating_function(user_function: Callable) -> Callable: class AstraDBSemanticCache(BaseCache): - """ - Cache that uses Astra DB as a vector-store backend for semantic - (i.e. similarity-based) lookup. - - It uses a single (vector) collection and can store - cached values from several LLMs, so the LLM's 'llm_string' is stored - in the document metadata. - - You can choose the preferred similarity (or use the API default) -- - remember the threshold might require metric-dependent tuning. - """ - def __init__( self, *, @@ -1603,33 +1580,38 @@ def __init__( similarity_threshold: float = ASTRA_DB_SEMANTIC_CACHE_DEFAULT_THRESHOLD, ): """ - Initialize the cache with all relevant parameters. - Args: + Cache that uses Astra DB as a vector-store backend for semantic + (i.e. similarity-based) lookup. + + It uses a single (vector) collection and can store + cached values from several LLMs, so the LLM's 'llm_string' is stored + in the document metadata. + + You can choose the preferred similarity (or use the API default). + The default score threshold is tuned to the default metric. + Tune it carefully yourself if switching to another distance metric. - collection_name (str): name of the Astra DB collection to create/use. - token (Optional[str]): API token for Astra DB usage. - api_endpoint (Optional[str]): full URL to the API endpoint, - such as "https://-us-east1.apps.astra.datastax.com". - astra_db_client (Optional[AstraDB]): *alternative to token+api_endpoint*, + Args: + collection_name: name of the Astra DB collection to create/use. + token: API token for Astra DB usage. + api_endpoint: full URL to the API endpoint, + such as `https://-us-east1.apps.astra.datastax.com`. + astra_db_client: *alternative to token+api_endpoint*, you can pass an already-created 'astrapy.db.AstraDB' instance. - async_astra_db_client (Optional[AsyncAstraDB]): - *alternative to token+api_endpoint*, + async_astra_db_client: *alternative to token+api_endpoint*, you can pass an already-created 'astrapy.db.AsyncAstraDB' instance. - namespace (Optional[str]): namespace (aka keyspace) where the + namespace: namespace (aka keyspace) where the collection is created. Defaults to the database's "default namespace". - setup_mode (SetupMode): mode used to create the collection in the DB - (SYNC, ASYNC or OFF). Defaults to SYNC. - pre_delete_collection (bool): whether to delete and re-create the - collection. Defaults to False. - embedding (Embedding): Embedding provider for semantic - encoding and search. + setup_mode: mode used to create the Astra DB collection (SYNC, ASYNC or + OFF). + pre_delete_collection: whether to delete the collection + before creating it. If False and the collection already exists, + the collection will be used as is. + embedding: Embedding provider for semantic encoding and search. metric: the function to use for evaluating similarity of text embeddings. Defaults to 'cosine' (alternatives: 'euclidean', 'dot_product') - similarity_threshold (float, optional): the minimum similarity - for accepting a (semantic-search) match. - - The default score threshold is tuned to the default metric. - Tune it carefully yourself if switching to another distance metric. + similarity_threshold: the minimum similarity for accepting a + (semantic-search) match. """ self.embedding = embedding self.metric = metric @@ -1685,7 +1667,6 @@ def _make_id(prompt: str, llm_string: str) -> str: return f"{_hash(prompt)}#{_hash(llm_string)}" def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> None: - """Update cache based on prompt and llm_string.""" self.astra_env.ensure_db_setup() doc_id = self._make_id(prompt, llm_string) llm_string_hash = _hash(llm_string) @@ -1704,7 +1685,6 @@ def update(self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE) -> N async def aupdate( self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE ) -> None: - """Update cache based on prompt and llm_string.""" await self.astra_env.aensure_db_setup() doc_id = self._make_id(prompt, llm_string) llm_string_hash = _hash(llm_string) @@ -1721,7 +1701,6 @@ async def aupdate( ) def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: - """Look up based on prompt and llm_string.""" hit_with_id = self.lookup_with_id(prompt, llm_string) if hit_with_id is not None: return hit_with_id[1] @@ -1729,7 +1708,6 @@ def lookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: return None async def alookup(self, prompt: str, llm_string: str) -> Optional[RETURN_VAL_TYPE]: - """Look up based on prompt and llm_string.""" hit_with_id = await self.alookup_with_id(prompt, llm_string) if hit_with_id is not None: return hit_with_id[1] @@ -1835,11 +1813,9 @@ async def adelete_by_document_id(self, document_id: str) -> None: await self.async_collection.delete_one(document_id) def clear(self, **kwargs: Any) -> None: - """Clear the *whole* semantic cache.""" self.astra_env.ensure_db_setup() self.collection.clear() async def aclear(self, **kwargs: Any) -> None: - """Clear the *whole* semantic cache.""" await self.astra_env.aensure_db_setup() await self.async_collection.clear() From 441160d6b3cdf6fc3c7598b16a5c3dc838595e5b Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Tue, 20 Feb 2024 12:28:15 -0500 Subject: [PATCH 18/31] Docs: Update contributing documentation (#17557) This PR adds more details about how to contribute to documentation. --- docs/docs/contributing/documentation.mdx | 130 +++++++++++++++++++++-- 1 file changed, 119 insertions(+), 11 deletions(-) diff --git a/docs/docs/contributing/documentation.mdx b/docs/docs/contributing/documentation.mdx index a7468600b746b..b7bba374d11b2 100644 --- a/docs/docs/contributing/documentation.mdx +++ b/docs/docs/contributing/documentation.mdx @@ -3,24 +3,68 @@ sidebar_position: 3 --- # Contribute Documentation -The docs directory contains Documentation and API Reference. +LangChain documentation consists of two components: -Documentation is built using [Quarto](https://quarto.org) and [Docusaurus 2](https://docusaurus.io/). +1. Main Documentation: Hosted at [python.langchain.com](https://python.langchain.com/), +this comprehensive resource serves as the primary user-facing documentation. +It covers a wide array of topics, including tutorials, use cases, integrations, +and more, offering extensive guidance on building with LangChain. +The content for this documentation lives in the `/docs` directory of the monorepo. +2. In-code Documentation: This is documentation of the codebase itself, which is also +used to generate the externally facing [API Reference](https://api.python.langchain.com/en/latest/langchain_api_reference.html). +The content for the API reference is autogenerated by scanning the docstrings in the codebase. For this reason we ask that +developers document their code well. -API Reference are largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code and are hosted by [Read the Docs](https://readthedocs.org/). -For that reason, we ask that you add good documentation to all classes and methods. +The main documentation is built using [Quarto](https://quarto.org) and [Docusaurus 2](https://docusaurus.io/). -Similar to linting, we recognize documentation can be annoying. If you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed. +The `API Reference` is largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) +from the code and is hosted by [Read the Docs](https://readthedocs.org/). -## Build Documentation Locally +We appreciate all contributions to the documentation, whether it be fixing a typo, +adding a new tutorial or example and whether it be in the main documentation or the API Reference. + +Similar to linting, we recognize documentation can be annoying. If you do not want +to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed. + +## 📜 Main Documentation + +The content for the main documentation is located in the `/docs` directory of the monorepo. + +The documentation is written using a combination of ipython notebooks (`.ipynb` files) +and markdown (`.mdx` files). The notebooks are converted to markdown +using [Quarto](https://quarto.org) and then built using [Docusaurus 2](https://docusaurus.io/). + +Feel free to make contributions to the main documentation! 🥰 + +After modifying the documentation: + +1. Run the linting and formatting commands (see below) to ensure that the documentation is well-formatted and free of errors. +2. Optionally build the documentation locally to verify that the changes look good. +3. Make a pull request with the changes. +4. You can preview and verify that the changes are what you wanted by clicking the `View deployment` or `Visit Preview` buttons on the pull request `Conversation` page. This will take you to a preview of the documentation changes. + +## ⚒️ Linting and Building Documentation Locally + +After writing up the documentation, you may want to lint and build the documentation +locally to ensure that it looks good and is free of errors. + +If you're unable to build it locally that's okay as well, as you will be able to +see a preview of the documentation on the pull request page. ### Install dependencies -- [Quarto](https://quarto.org) - package that converts Jupyter notebooks (`.ipynb` files) into mdx files for serving in Docusaurus. -- `poetry install --with lint,docs --no-root` from the monorepo root. +- [Quarto](https://quarto.org) - package that converts Jupyter notebooks (`.ipynb` files) into mdx files for serving in Docusaurus. [Download link](https://quarto.org/docs/download/). + +From the **monorepo root**, run the following command to install the dependencies: + +```bash +poetry install --with lint,docs --no-root +```` ### Building +The code that builds the documentation is located in the `/docs` directory of the monorepo. + In the following commands, the prefix `api_` indicates that those are operations for the API Reference. Before building the documentation, it is always a good idea to clean the build directory: @@ -46,7 +90,7 @@ make api_docs_linkcheck ### Linting and Formatting -The docs are linted from the monorepo root. To lint the docs, run the following from there: +The Main Documentation is linted from the **monorepo root**. To lint the main documentation, run the following from there: ```bash make lint @@ -56,9 +100,73 @@ If you have formatting-related errors, you can fix them automatically with: ```bash make format -``` +``` + +## ⌨️ In-code Documentation + +The in-code documentation is largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code and is hosted by [Read the Docs](https://readthedocs.org/). + +For the API reference to be useful, the codebase must be well-documented. This means that all functions, classes, and methods should have a docstring that explains what they do, what the arguments are, and what the return value is. This is a good practice in general, but it is especially important for LangChain because the API reference is the primary resource for developers to understand how to use the codebase. + +We generally follow the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for docstrings. + +Here is an example of a well-documented function: + +```python + +def my_function(arg1: int, arg2: str) -> float: + """This is a short description of the function. (It should be a single sentence.) + + This is a longer description of the function. It should explain what + the function does, what the arguments are, and what the return value is. + It should wrap at 88 characters. + + Examples: + This is a section for examples of how to use the function. + + .. code-block:: python + + my_function(1, "hello") + + Args: + arg1: This is a description of arg1. We do not need to specify the type since + it is already specified in the function signature. + arg2: This is a description of arg2. + + Returns: + This is a description of the return value. + """ + return 3.14 +``` + +### Linting and Formatting + +The in-code documentation is linted from the directories belonging to the packages +being documented. + +For example, if you're working on the `langchain-community` package, you would change +the working directory to the `langchain-community` directory: + +```bash +cd [root]/libs/langchain-community +``` + +Set up a virtual environment for the package if you haven't done so already. + +Install the dependencies for the package. + +```bash +poetry install --with lint +``` + +Then you can run the following commands to lint and format the in-code documentation: + +```bash +make format +make lint +``` -## Verify Documentation changes +## Verify Documentation Changes After pushing documentation changes to the repository, you can preview and verify that the changes are what you wanted by clicking the `View deployment` or `Visit Preview` buttons on the pull request `Conversation` page. From 92e52e89ca43650b24a4d8c2af4553142ffd85cc Mon Sep 17 00:00:00 2001 From: Virat Singh Date: Tue, 20 Feb 2024 13:15:29 -0500 Subject: [PATCH 19/31] community: Add PolygonTickerNews Tool (#17808) Description: In this PR, I am adding a PolygonTickerNews Tool, which can be used to get the latest news for a given ticker / stock. Twitter handle: [@virattt](https://twitter.com/virattt) --- docs/docs/integrations/tools/polygon.ipynb | 159 +++++++++++++++--- .../agent_toolkits/polygon/toolkit.py | 7 +- .../langchain_community/tools/__init__.py | 9 + .../tools/polygon/__init__.py | 2 + .../tools/polygon/ticker_news.py | 36 ++++ .../langchain_community/utilities/polygon.py | 21 +++ .../tests/unit_tests/tools/test_imports.py | 1 + .../tests/unit_tests/tools/test_public_api.py | 1 + 8 files changed, 214 insertions(+), 22 deletions(-) create mode 100644 libs/community/langchain_community/tools/polygon/ticker_news.py diff --git a/docs/docs/integrations/tools/polygon.ipynb b/docs/docs/integrations/tools/polygon.ipynb index 62b078d0d8efa..0349ac67f78fb 100644 --- a/docs/docs/integrations/tools/polygon.ipynb +++ b/docs/docs/integrations/tools/polygon.ipynb @@ -7,11 +7,11 @@ "id": "245a954a" }, "source": [ - "# Polygon Stock Market API\n", + "# Polygon Stock Market API Tools\n", "\n", ">[Polygon](https://polygon.io/) The Polygon.io Stocks API provides REST endpoints that let you query the latest market data from all US stock exchanges.\n", "\n", - "Use the ``PolygonAPIWrapper`` to get stock market data like the latest quote for a ticker." + "This notebook uses tools to get stock market data like the latest quote and news for a ticker from Polygon." ] }, { @@ -20,7 +20,8 @@ "id": "34bb5968", "metadata": { "id": "34bb5968", - "is_executing": true + "is_executing": true, + "scrolled": true }, "outputs": [], "source": [ @@ -32,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "ac4910f8", "metadata": { "id": "ac4910f8", @@ -41,43 +42,161 @@ "outputs": [], "source": [ "from langchain_community.tools.polygon.last_quote import PolygonLastQuote\n", + "from langchain_community.tools.polygon.ticker_news import PolygonTickerNews\n", "from langchain_community.utilities.polygon import PolygonAPIWrapper" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, + "id": "8660b910-905b-46f3-9541-920b9fc3d4d6", + "metadata": {}, + "outputs": [], + "source": [ + "api_wrapper = PolygonAPIWrapper()\n", + "ticker = \"AAPL\"" + ] + }, + { + "cell_type": "markdown", + "id": "347f6951-b383-4675-b116-9b7d16c1f505", + "metadata": {}, + "source": [ + "### Get latest quote for ticker" + ] + }, + { + "cell_type": "code", + "execution_count": 55, "id": "84b8f773", "metadata": { "id": "84b8f773" }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tool output: {\"P\": 180.99, \"S\": 5, \"T\": \"AAPL\", \"X\": 11, \"i\": [604], \"p\": 180.98, \"q\": 31662815, \"s\": 2, \"t\": 1708445246516556649, \"x\": 21, \"y\": 1708445246516369924, \"z\": 3}\n" + ] + } + ], "source": [ - "tool = PolygonLastQuote(api_wrapper=PolygonAPIWrapper())" + "# Get the last quote for ticker\n", + "last_quote_tool = PolygonLastQuote(api_wrapper=api_wrapper)\n", + "last_quote = last_quote_tool.run(ticker)\n", + "print(f\"Tool output: {last_quote}\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "068991a6", "metadata": { "id": "068991a6", "outputId": "c5cdc6ec-03cf-4084-cc6f-6ae792d91d39" }, + "outputs": [], + "source": [ + "import json\n", + "\n", + "# Convert the last quote response to JSON\n", + "last_quote = last_quote_tool.run(ticker)\n", + "last_quote_json = json.loads(last_quote)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "174e2556-eb3e-48a4-bde6-9a3309fae9c9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Latest price for AAPL is $180.74\n" + ] + } + ], + "source": [ + "# Print the latest price for ticker\n", + "latest_price = last_quote_json[\"p\"]\n", + "print(f\"Latest price for {ticker} is ${latest_price}\")" + ] + }, + { + "cell_type": "markdown", + "id": "04f1b612-f91f-471c-8264-9cc8c14bdaef", + "metadata": {}, + "source": [ + "### Get latest news for ticker" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "024982db-1402-4bd7-9788-6cb369a9565d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tool output: [{\"id\": \"RckckqTS-K2zI6WRyycBB0HonfGLc2MPTQROWsbpKSA\", \"publisher\": {\"name\": \"The Motley Fool\", \"homepage_url\": \"https://www.fool.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/themotleyfool.svg\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/themotleyfool.ico\"}, \"title\": \"Should You Worry About Apple's Slowdown in China?\", \"author\": \"newsfeedback@fool.com (Adria Cimino)\", \"published_utc\": \"2024-02-20T15:10:00Z\", \"article_url\": \"https://www.fool.com/investing/2024/02/20/should-you-worry-about-apples-slowdown-in-china/\", \"tickers\": [\"AAPL\"], \"image_url\": \"https://g.foolcdn.com/editorial/images/765910/aapl.png\", \"description\": \"A local smartphone giant is gaining market share.\", \"keywords\": [\"investing\"]}, {\"id\": \"5goYCKw3ZsHlJYwN1XX8AJCyybZuvfJeoUz38SzlYBM\", \"publisher\": {\"name\": \"The Motley Fool\", \"homepage_url\": \"https://www.fool.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/themotleyfool.svg\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/themotleyfool.ico\"}, \"title\": \"Vanguard Total Stock Market ETF: Buy, Sell, or Hold?\", \"author\": \"newsfeedback@fool.com (Justin Pope)\", \"published_utc\": \"2024-02-20T12:30:00Z\", \"article_url\": \"https://www.fool.com/investing/2024/02/20/vanguard-total-stock-market-etf-buy-sell-or-hold/\", \"tickers\": [\"VTI\", \"AAPL\", \"MSFT\", \"GOOGL\", \"AMZN\", \"NVDA\", \"META\", \"TSLA\", \"GOOG\"], \"image_url\": \"https://g.foolcdn.com/editorial/images/765208/getty-buy-sell-hold-stocks-decide-ratings-analysts.jpg\", \"description\": \"This is the ultimate tool to diversify your portfolio.\", \"keywords\": [\"investing\"]}, {\"id\": \"F2nshszd6rBX-JJMHtbPuA325ZBizcq7iIJ7PBK_114\", \"publisher\": {\"name\": \"MarketWatch\", \"homepage_url\": \"https://www.marketwatch.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/marketwatch.svg\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/marketwatch.ico\"}, \"title\": \"Wall Street\\u2019s \\u2018mob psychology\\u2019 could fuel a dangerous stock meltup, warns top strategist\", \"author\": \"MarketWatch\", \"published_utc\": \"2024-02-20T12:20:00Z\", \"article_url\": \"https://www.marketwatch.com/story/wall-streets-mob-psychology-could-fuel-a-dangerous-stock-meltup-warns-top-strategist-fc8647c6\", \"tickers\": [\"NVDA\", \"GOOGL\", \"AMZN\", \"AAPL\", \"META\", \"MSFT\", \"NFLX\", \"TSLA\", \"COF\", \"DFS\", \"WMT\", \"VZIO\", \"HD\", \"PANW\", \"CZR\", \"SMCI\", \"OCGN\", \"NIO\", \"MARA\", \"AMD\", \"COIN\"], \"amp_url\": \"https://www.marketwatch.com/amp/story/wall-streets-mob-psychology-could-fuel-a-dangerous-stock-meltup-warns-top-strategist-fc8647c6\", \"image_url\": \"https://images.mktw.net/im-59050122/social\", \"description\": \"Ed Yardeni says giddy Wall Street analysts could be the undoing of this stock market.\"}, {\"id\": \"uvUtqCvuVV-wyCECL0DTIbHdqAtEgviJqSrPurvrqEQ\", \"publisher\": {\"name\": \"Zacks Investment Research\", \"homepage_url\": \"https://www.zacks.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/zacks.png\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/zacks.ico\"}, \"title\": \"Should Vanguard Mega Cap ETF (MGC) Be on Your Investing Radar?\", \"author\": \"Zacks Equity Research\", \"published_utc\": \"2024-02-20T11:20:07Z\", \"article_url\": \"https://www.zacks.com/stock/news/2228257/should-vanguard-mega-cap-etf-mgc-be-on-your-investing-radar\", \"tickers\": [\"MGC\", \"AMZN\", \"AAPL\", \"MSFT\", \"SPY\", \"IVV\"], \"amp_url\": \"https://www.zacks.com/amp/stock/news/2228257/should-vanguard-mega-cap-etf-mgc-be-on-your-investing-radar\", \"image_url\": \"https://staticx-tuner.zacks.com/images/default_article_images/default17.jpg\", \"description\": \"Style Box ETF report for MGC\"}, {\"id\": \"RRxHgtvw7_7Ql0QhUphMlzXdUjWhJbpZVjHqR7N5TCg\", \"publisher\": {\"name\": \"Zacks Investment Research\", \"homepage_url\": \"https://www.zacks.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/zacks.png\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/zacks.ico\"}, \"title\": \"Should Vanguard S&P 500 ETF (VOO) Be on Your Investing Radar?\", \"author\": \"Zacks Equity Research\", \"published_utc\": \"2024-02-20T11:20:06Z\", \"article_url\": \"https://www.zacks.com/stock/news/2228264/should-vanguard-sp-500-etf-voo-be-on-your-investing-radar\", \"tickers\": [\"VOO\", \"AMZN\", \"AAPL\", \"MSFT\", \"SPY\", \"IVV\"], \"amp_url\": \"https://www.zacks.com/amp/stock/news/2228264/should-vanguard-sp-500-etf-voo-be-on-your-investing-radar\", \"image_url\": \"https://staticx-tuner.zacks.com/images/default_article_images/default24.jpg\", \"description\": \"Style Box ETF report for VOO\"}, {\"id\": \"QBaNxnCVlXTHlVAChWRdM5RrWCD9f20qp6aUxfdxrEI\", \"publisher\": {\"name\": \"Zacks Investment Research\", \"homepage_url\": \"https://www.zacks.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/zacks.png\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/zacks.ico\"}, \"title\": \"Should Fidelity Nasdaq Composite Index ETF (ONEQ) Be on Your Investing Radar?\", \"author\": \"Zacks Equity Research\", \"published_utc\": \"2024-02-20T11:20:05Z\", \"article_url\": \"https://www.zacks.com/stock/news/2228269/should-fidelity-nasdaq-composite-index-etf-oneq-be-on-your-investing-radar\", \"tickers\": [\"ONEQ\", \"AMZN\", \"AAPL\", \"MSFT\", \"QQQ\", \"VUG\"], \"amp_url\": \"https://www.zacks.com/amp/stock/news/2228269/should-fidelity-nasdaq-composite-index-etf-oneq-be-on-your-investing-radar\", \"image_url\": \"https://staticx-tuner.zacks.com/images/default_article_images/default29.jpg\", \"description\": \"Style Box ETF report for ONEQ\"}, {\"id\": \"aVYhWKzOHm1Y8k8z9ka0xrCCI-tiUWYDdsrPuDFzG_g\", \"publisher\": {\"name\": \"The Motley Fool\", \"homepage_url\": \"https://www.fool.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/themotleyfool.svg\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/themotleyfool.ico\"}, \"title\": \"Apple Is Synonymous With iPhones, But Its Largest Gross Margin Comes From Somewhere Else Entirely\", \"author\": \"newsfeedback@fool.com (Neil Patel)\", \"published_utc\": \"2024-02-20T11:07:00Z\", \"article_url\": \"https://www.fool.com/investing/2024/02/20/apple-synonymous-iphones-devices-gross-margin/\", \"tickers\": [\"AAPL\"], \"image_url\": \"https://g.foolcdn.com/editorial/images/765917/aapl_revenue_bar.png\", \"description\": \"Investors need to pay attention to a budding segment.\", \"keywords\": [\"investing\"]}, {\"id\": \"6uW_52HjWOj3sKeCdPrqN4ZHmIbJAZxXV5naPVzjaHw\", \"publisher\": {\"name\": \"The Motley Fool\", \"homepage_url\": \"https://www.fool.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/themotleyfool.svg\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/themotleyfool.ico\"}, \"title\": \"Warren Buffett Has 51% of Berkshire Hathaway's $370 Billion Portfolio Invested in 2 Stocks\", \"author\": \"newsfeedback@fool.com (Trevor Jennewine)\", \"published_utc\": \"2024-02-20T10:45:00Z\", \"article_url\": \"https://www.fool.com/investing/2024/02/20/warren-buffett-51-portfolio-invested-in-2-stocks/\", \"tickers\": [\"AAPL\", \"KO\", \"BRK.A\", \"BRK.B\"], \"image_url\": \"https://g.foolcdn.com/editorial/images/765625/market-3.jpg\", \"description\": \"Warren Buffett's Berkshire Hathaway has more than half of its stock portfolio invested in Apple and Coca-Cola.\", \"keywords\": [\"investing\"]}, {\"id\": \"aE6I7jULkhZEsKWnVsRNUaPYNfsHBC89wARk4mf6iPQ\", \"publisher\": {\"name\": \"Investing.com\", \"homepage_url\": \"https://www.investing.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/investing.png\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/investing.ico\"}, \"title\": \"20-Year High Bullish Sentiment Meets Record Tech Allocation: Recipe for Disaster?\", \"author\": \"Investing.com\", \"published_utc\": \"2024-02-20T10:24:00Z\", \"article_url\": \"https://www.investing.com/analysis/20year-high-bullish-sentiment-meets-record-tech-allocation-recipe-for-disaster-200646188\", \"tickers\": [\"MSFT\", \"AAPL\", \"NVDA\", \"AMZN\", \"GOOGL\", \"META\"], \"amp_url\": \"https://m.investing.com/analysis/20year-high-bullish-sentiment-meets-record-tech-allocation-recipe-for-disaster-200646188?ampMode=1\", \"image_url\": \"https://i-invdn-com.investing.com/redesign/images/seo/investingcom_analysis_og.jpg\"}, {\"id\": \"EJIIij7T9dEbvogmzWS_P_1cLvVskocSSLrM3p2V_qE\", \"publisher\": {\"name\": \"Investing.com\", \"homepage_url\": \"https://www.investing.com/\", \"logo_url\": \"https://s3.polygon.io/public/assets/news/logos/investing.png\", \"favicon_url\": \"https://s3.polygon.io/public/assets/news/favicons/investing.ico\"}, \"title\": \"3 Wide-Moat Stocks Worth Buying in 2024\", \"author\": \"The Tokenist\", \"published_utc\": \"2024-02-20T07:09:00Z\", \"article_url\": \"https://www.investing.com/analysis/3-widemoat-stocks-worth-buying-in-2024-200646181\", \"tickers\": [\"KO\", \"GOOGL\", \"AAPL\", \"META\", \"AMZN\", \"MSFT\", \"NVDA\", \"AX\", \"QTWO\", \"ADBE\", \"CRM\"], \"amp_url\": \"https://m.investing.com/analysis/3-widemoat-stocks-worth-buying-in-2024-200646181?ampMode=1\", \"image_url\": \"https://i-invdn-com.investing.com/redesign/images/seo/investingcom_analysis_og.jpg\"}]\n" + ] + } + ], + "source": [ + "ticker_news_tool = PolygonTickerNews(api_wrapper=api_wrapper)\n", + "ticker_news = ticker_news_tool.run(ticker)\n", + "print(f\"Tool output: {ticker_news}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "dfd26ef6-2d92-483e-9780-484091bd3774", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total news items: 10\n" + ] + } + ], + "source": [ + "# Convert the news response to JSON array\n", + "ticker_news_json = json.loads(ticker_news)\n", + "print(f\"Total news items: {len(ticker_news_json)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "dbbb4b43-1096-45f3-8000-45538b3c73ee", + "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "{'results': {'P': 185.86, 'S': 1, 'T': 'AAPL', 'X': 11, 'i': [604], 'p': 185.81, 'q': 106551669, 's': 2, 't': 1705098436014023700, 'x': 12, 'y': 1705098436014009300, 'z': 3}}" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Title: Should You Worry About Apple's Slowdown in China?\n", + "Description: A local smartphone giant is gaining market share.\n", + "Publisher: The Motley Fool\n", + "URL: https://www.fool.com/investing/2024/02/20/should-you-worry-about-apples-slowdown-in-china/\n" + ] } ], "source": [ - "tool.run(\"AAPL\")" + "# Inspect the first news item\n", + "news_item = ticker_news_json[0]\n", + "print(f\"Title: {news_item['title']}\")\n", + "print(f\"Description: {news_item['description']}\")\n", + "print(f\"Publisher: {news_item['publisher']['name']}\")\n", + "print(f\"URL: {news_item['article_url']}\")" ] } ], @@ -86,9 +205,9 @@ "provenance": [] }, "kernelspec": { - "name": "venv", + "display_name": "Python (langchain)", "language": "python", - "display_name": "venv" + "name": "langchain" }, "language_info": { "codemirror_mode": { @@ -100,7 +219,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.9.18" }, "vscode": { "interpreter": { diff --git a/libs/community/langchain_community/agent_toolkits/polygon/toolkit.py b/libs/community/langchain_community/agent_toolkits/polygon/toolkit.py index 748c84c1eef9d..73cf95f684b9e 100644 --- a/libs/community/langchain_community/agent_toolkits/polygon/toolkit.py +++ b/libs/community/langchain_community/agent_toolkits/polygon/toolkit.py @@ -2,7 +2,7 @@ from langchain_community.agent_toolkits.base import BaseToolkit from langchain_community.tools import BaseTool -from langchain_community.tools.polygon import PolygonLastQuote +from langchain_community.tools.polygon import PolygonLastQuote, PolygonTickerNews from langchain_community.utilities.polygon import PolygonAPIWrapper @@ -18,7 +18,10 @@ def from_polygon_api_wrapper( tools = [ PolygonLastQuote( api_wrapper=polygon_api_wrapper, - ) + ), + PolygonTickerNews( + api_wrapper=polygon_api_wrapper, + ), ] return cls(tools=tools) diff --git a/libs/community/langchain_community/tools/__init__.py b/libs/community/langchain_community/tools/__init__.py index 59ad157de5bf8..58af2e95ac4cd 100644 --- a/libs/community/langchain_community/tools/__init__.py +++ b/libs/community/langchain_community/tools/__init__.py @@ -510,6 +510,12 @@ def _import_polygon_tool_PolygonLastQuote() -> Any: return PolygonLastQuote +def _import_polygon_tool_PolygonTickerNews() -> Any: + from langchain_community.tools.polygon.ticker_news import PolygonTickerNews + + return PolygonTickerNews + + def _import_powerbi_tool_InfoPowerBITool() -> Any: from langchain_community.tools.powerbi.tool import InfoPowerBITool @@ -957,6 +963,8 @@ def __getattr__(name: str) -> Any: return _import_plugin() elif name == "PolygonLastQuote": return _import_polygon_tool_PolygonLastQuote() + elif name == "PolygonTickerNews": + return _import_polygon_tool_PolygonTickerNews() elif name == "InfoPowerBITool": return _import_powerbi_tool_InfoPowerBITool() elif name == "ListPowerBITool": @@ -1141,6 +1149,7 @@ def __getattr__(name: str) -> Any: "OpenWeatherMapQueryRun", "PubmedQueryRun", "PolygonLastQuote", + "PolygonTickerNews", "RedditSearchRun", "QueryCheckerTool", "QueryPowerBITool", diff --git a/libs/community/langchain_community/tools/polygon/__init__.py b/libs/community/langchain_community/tools/polygon/__init__.py index acc8bc4ac70c7..b61740a8a2bd1 100644 --- a/libs/community/langchain_community/tools/polygon/__init__.py +++ b/libs/community/langchain_community/tools/polygon/__init__.py @@ -1,7 +1,9 @@ """Polygon IO tools.""" from langchain_community.tools.polygon.last_quote import PolygonLastQuote +from langchain_community.tools.polygon.ticker_news import PolygonTickerNews __all__ = [ "PolygonLastQuote", + "PolygonTickerNews", ] diff --git a/libs/community/langchain_community/tools/polygon/ticker_news.py b/libs/community/langchain_community/tools/polygon/ticker_news.py new file mode 100644 index 0000000000000..9d858ebc8f498 --- /dev/null +++ b/libs/community/langchain_community/tools/polygon/ticker_news.py @@ -0,0 +1,36 @@ +from typing import Optional, Type + +from langchain_core.callbacks import CallbackManagerForToolRun +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.tools import BaseTool + +from langchain_community.utilities.polygon import PolygonAPIWrapper + + +class Inputs(BaseModel): + """Inputs for Polygon's Ticker News API""" + + query: str + + +class PolygonTickerNews(BaseTool): + """Tool that gets the latest news for a given ticker from Polygon""" + + mode: str = "get_ticker_news" + name: str = "polygon_ticker_news" + description: str = ( + "A wrapper around Polygon's Ticker News API. " + "This tool is useful for fetching the latest news for a stock. " + "Input should be the ticker that you want to get the latest news for." + ) + args_schema: Type[BaseModel] = Inputs + + api_wrapper: PolygonAPIWrapper + + def _run( + self, + query: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the Polygon API tool.""" + return self.api_wrapper.run(self.mode, ticker=query) diff --git a/libs/community/langchain_community/utilities/polygon.py b/libs/community/langchain_community/utilities/polygon.py index bd7aa9964fe64..9c6c3e75118ca 100644 --- a/libs/community/langchain_community/utilities/polygon.py +++ b/libs/community/langchain_community/utilities/polygon.py @@ -39,8 +39,29 @@ def get_last_quote(self, ticker: str) -> Optional[dict]: return data.get("results", None) + def get_ticker_news(self, ticker: str) -> Optional[dict]: + """ + Get the most recent news articles relating to a stock ticker symbol, + including a summary of the article and a link to the original source. + """ + url = ( + f"{POLYGON_BASE_URL}v2/reference/news?" + f"ticker={ticker}&" + f"apiKey={self.polygon_api_key}" + ) + response = requests.get(url) + data = response.json() + + status = data.get("status", None) + if status != "OK": + raise ValueError(f"API Error: {data}") + + return data.get("results", None) + def run(self, mode: str, ticker: str) -> str: if mode == "get_last_quote": return json.dumps(self.get_last_quote(ticker)) + elif mode == "get_ticker_news": + return json.dumps(self.get_ticker_news(ticker)) else: raise ValueError(f"Invalid mode {mode} for Polygon API.") diff --git a/libs/community/tests/unit_tests/tools/test_imports.py b/libs/community/tests/unit_tests/tools/test_imports.py index 95fd4315575a5..09e73983efc76 100644 --- a/libs/community/tests/unit_tests/tools/test_imports.py +++ b/libs/community/tests/unit_tests/tools/test_imports.py @@ -85,6 +85,7 @@ "OpenWeatherMapQueryRun", "PubmedQueryRun", "PolygonLastQuote", + "PolygonTickerNews", "RedditSearchRun", "QueryCheckerTool", "QueryPowerBITool", diff --git a/libs/community/tests/unit_tests/tools/test_public_api.py b/libs/community/tests/unit_tests/tools/test_public_api.py index 1595dd4710917..2758300011b52 100644 --- a/libs/community/tests/unit_tests/tools/test_public_api.py +++ b/libs/community/tests/unit_tests/tools/test_public_api.py @@ -87,6 +87,7 @@ "OpenWeatherMapQueryRun", "PubmedQueryRun", "PolygonLastQuote", + "PolygonTickerNews", "RedditSearchRun", "QueryCheckerTool", "QueryPowerBITool", From 33555e5cbc7f4d36e9d64edb31de0b3eb40c2f67 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Tue, 20 Feb 2024 20:21:08 +0100 Subject: [PATCH 20/31] docs: Add typehints in both signature and description of API docs (#17815) This way we can document APIs in methods signature only where they are checked by the typing system and we get them also in the param description without having to duplicate in the docstrings (where they are unchecked). Twitter: @cbornet_ --- docs/api_reference/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api_reference/conf.py b/docs/api_reference/conf.py index e993048fc57f4..36f34a44598dd 100644 --- a/docs/api_reference/conf.py +++ b/docs/api_reference/conf.py @@ -114,8 +114,8 @@ def setup(app): autodoc_member_order = "groupwise" autoclass_content = "both" autodoc_typehints_format = "short" +autodoc_typehints = "both" -# autodoc_typehints = "description" # Add any paths that contain templates here, relative to this directory. templates_path = ["templates"] From 3ba1cb86509db7b13468d40347ef2222b3e9db78 Mon Sep 17 00:00:00 2001 From: Guangdong Liu Date: Wed, 21 Feb 2024 03:22:27 +0800 Subject: [PATCH 21/31] community[minor]: Add SparkLLM Text Embedding Model and SparkLLM introduction (#17573) --- docs/docs/integrations/providers/sparkllm.mdx | 11 ++ .../text_embedding/sparkllm.ipynb | 90 +++++++++ .../embeddings/__init__.py | 2 + .../embeddings/sparkllm.py | 184 ++++++++++++++++++ .../embeddings/test_sparkllm.py | 35 ++++ .../unit_tests/embeddings/test_imports.py | 1 + 6 files changed, 323 insertions(+) create mode 100644 docs/docs/integrations/providers/sparkllm.mdx create mode 100644 docs/docs/integrations/text_embedding/sparkllm.ipynb create mode 100644 libs/community/langchain_community/embeddings/sparkllm.py create mode 100644 libs/community/tests/integration_tests/embeddings/test_sparkllm.py diff --git a/docs/docs/integrations/providers/sparkllm.mdx b/docs/docs/integrations/providers/sparkllm.mdx new file mode 100644 index 0000000000000..1c767c7aa1ea5 --- /dev/null +++ b/docs/docs/integrations/providers/sparkllm.mdx @@ -0,0 +1,11 @@ +# SparkLLM + +>[SparkLLM](https://xinghuo.xfyun.cn/spark) is a large-scale cognitive model independently developed by iFLYTEK. +It has cross-domain knowledge and language understanding ability by learning a large amount of texts, codes and images. +It can understand and perform tasks based on natural dialogue. + +## SparkLLM Chat Model +An example is available at [example](/docs/integrations/chat/sparkllm). + +## SparkLLM Text Embedding Model +An example is available at [example](/docs/integrations/text_embedding/sparkllm) diff --git a/docs/docs/integrations/text_embedding/sparkllm.ipynb b/docs/docs/integrations/text_embedding/sparkllm.ipynb new file mode 100644 index 0000000000000..595edc95153df --- /dev/null +++ b/docs/docs/integrations/text_embedding/sparkllm.ipynb @@ -0,0 +1,90 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SparkLLM Text Embeddings" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Official Website: https://www.xfyun.cn/doc/spark/Embedding_new_api.html\n", + "\n", + "An API key is required to use this embedding model. You can get one by registering at https://platform.SparkLLM-ai.com/docs/text-Embedding." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "SparkLLMTextEmbeddings support 2K token window and preduces vectors with 2560 dimensions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.embeddings import SparkLLMTextEmbeddings\n", + "\n", + "embeddings = SparkLLMTextEmbeddings(\n", + " spark_app_id=\"sk-*\", spark_api_key=\"\", spark_api_secret=\"\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, you can set API key this way:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"SPARK_APP_ID\"] = \"YOUR_APP_ID\"\n", + "os.environ[\"SPARK_API_KEY\"] = \"YOUR_API_KEY\"\n", + "os.environ[\"SPARK_API_SECRET\"] = \"YOUR_API_SECRET\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "text_1 = \"iFLYTEK is a well-known intelligent speech and artificial intelligence publicly listed company in the Asia-Pacific Region. Since its establishment, the company is devoted to cornerstone technological research in speech and languages, natural language understanding, machine learning, machine reasoning, adaptive learning, and has maintained the world-leading position in those domains. The company actively promotes the development of A.I. products and their sector-based applications, with visions of enabling machines to listen and speak, understand and think, creating a better world with artificial intelligence.\"\n", + "text_2 = \"iFLYTEK Open Platform was launched in 2010 by iFLYTEK as China’s first Artificial Intelligence open platform for Mobile Internet and intelligent hardware developers.\"\n", + "\n", + "query_result = embeddings.embed_query(text_2)\n", + "query_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "doc_result = embeddings.embed_documents([text_1, text_2])\n", + "doc_result" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/community/langchain_community/embeddings/__init__.py b/libs/community/langchain_community/embeddings/__init__.py index 4d9565c60f7cb..6f652ccdb34a5 100644 --- a/libs/community/langchain_community/embeddings/__init__.py +++ b/libs/community/langchain_community/embeddings/__init__.py @@ -84,6 +84,7 @@ SentenceTransformerEmbeddings, ) from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings +from langchain_community.embeddings.sparkllm import SparkLLMTextEmbeddings from langchain_community.embeddings.tensorflow_hub import TensorflowHubEmbeddings from langchain_community.embeddings.vertexai import VertexAIEmbeddings from langchain_community.embeddings.volcengine import VolcanoEmbeddings @@ -152,6 +153,7 @@ "OCIGenAIEmbeddings", "QuantizedBiEncoderEmbeddings", "NeMoEmbeddings", + "SparkLLMTextEmbeddings", ] diff --git a/libs/community/langchain_community/embeddings/sparkllm.py b/libs/community/langchain_community/embeddings/sparkllm.py new file mode 100644 index 0000000000000..2d6ea5be5cef7 --- /dev/null +++ b/libs/community/langchain_community/embeddings/sparkllm.py @@ -0,0 +1,184 @@ +import base64 +import hashlib +import hmac +import json +import logging +from datetime import datetime +from time import mktime +from typing import Any, Dict, List, Optional +from urllib.parse import urlencode +from wsgiref.handlers import format_date_time + +import numpy as np +import requests +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, SecretStr, root_validator +from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env +from numpy import ndarray + +# Used for document and knowledge embedding +EMBEDDING_P_API_URL: str = "https://cn-huabei-1.xf-yun.com/v1/private/sa8a05c27" +# Used for user questions embedding +EMBEDDING_Q_API_URL: str = "https://cn-huabei-1.xf-yun.com/v1/private/s50d55a16" + +# SparkLLMTextEmbeddings is an embedding model provided by iFLYTEK Co., Ltd.. (https://iflytek.com/en/). + +# Official Website: https://www.xfyun.cn/doc/spark/Embedding_new_api.html +# Developers need to create an application in the console first, use the appid, APIKey, +# and APISecret provided in the application for authentication, +# and generate an authentication URL for handshake. +# You can get one by registering at https://console.xfyun.cn/services/bm3. +# SparkLLMTextEmbeddings support 2K token window and preduces vectors with +# 2560 dimensions. + +logger = logging.getLogger(__name__) + + +class Url: + def __init__(self, host: str, path: str, schema: str) -> None: + self.host = host + self.path = path + self.schema = schema + pass + + +class SparkLLMTextEmbeddings(BaseModel, Embeddings): + """SparkLLM Text Embedding models.""" + + spark_app_id: SecretStr + spark_api_key: SecretStr + spark_api_secret: SecretStr + + @root_validator(allow_reuse=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that auth token exists in environment.""" + cls.spark_app_id = convert_to_secret_str( + get_from_dict_or_env(values, "spark_app_id", "SPARK_APP_ID") + ) + cls.spark_api_key = convert_to_secret_str( + get_from_dict_or_env(values, "spark_api_key", "SPARK_API_KEY") + ) + cls.spark_api_secret = convert_to_secret_str( + get_from_dict_or_env(values, "spark_api_secret", "SPARK_API_SECRET") + ) + return values + + def _embed(self, texts: List[str], host: str) -> Optional[List[List[float]]]: + url = self._assemble_ws_auth_url( + request_url=host, + method="POST", + api_key=self.spark_api_key.get_secret_value(), + api_secret=self.spark_api_secret.get_secret_value(), + ) + content = self._get_body(self.spark_app_id.get_secret_value(), texts) + response = requests.post( + url, json=content, headers={"content-type": "application/json"} + ).text + res_arr = self._parser_message(response) + if res_arr is not None: + return res_arr.tolist() + return None + + def embed_documents(self, texts: List[str]) -> Optional[List[List[float]]]: # type: ignore[override] + """Public method to get embeddings for a list of documents. + + Args: + texts: The list of texts to embed. + + Returns: + A list of embeddings, one for each text, or None if an error occurs. + """ + return self._embed(texts, EMBEDDING_P_API_URL) + + def embed_query(self, text: str) -> Optional[List[float]]: # type: ignore[override] + """Public method to get embedding for a single query text. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text, or None if an error occurs. + """ + result = self._embed([text], EMBEDDING_Q_API_URL) + return result[0] if result is not None else None + + @staticmethod + def _assemble_ws_auth_url( + request_url: str, method: str = "GET", api_key: str = "", api_secret: str = "" + ) -> str: + u = SparkLLMTextEmbeddings._parse_url(request_url) + host = u.host + path = u.path + now = datetime.now() + date = format_date_time(mktime(now.timetuple())) + signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format( + host, date, method, path + ) + signature_sha = hmac.new( + api_secret.encode("utf-8"), + signature_origin.encode("utf-8"), + digestmod=hashlib.sha256, + ).digest() + signature_sha_str = base64.b64encode(signature_sha).decode(encoding="utf-8") + authorization_origin = ( + 'api_key="%s", algorithm="%s", headers="%s", signature="%s"' + % (api_key, "hmac-sha256", "host date request-line", signature_sha_str) + ) + authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode( + encoding="utf-8" + ) + values = {"host": host, "date": date, "authorization": authorization} + + return request_url + "?" + urlencode(values) + + @staticmethod + def _parse_url(request_url: str) -> Url: + stidx = request_url.index("://") + host = request_url[stidx + 3 :] + schema = request_url[: stidx + 3] + edidx = host.index("/") + if edidx <= 0: + raise AssembleHeaderException("invalid request url:" + request_url) + path = host[edidx:] + host = host[:edidx] + u = Url(host, path, schema) + return u + + @staticmethod + def _get_body(appid: str, text: List[str]) -> Dict[str, Any]: + body = { + "header": {"app_id": appid, "uid": "39769795890", "status": 3}, + "parameter": {"emb": {"feature": {"encoding": "utf8"}}}, + "payload": { + "messages": { + "text": base64.b64encode(json.dumps(text).encode("utf-8")).decode() + } + }, + } + return body + + @staticmethod + def _parser_message( + message: str, + ) -> Optional[ndarray]: + data = json.loads(message) + code = data["header"]["code"] + if code != 0: + logger.warning(f"Request error: {code}, {data}") + return None + else: + text_base = data["payload"]["feature"]["text"] + text_data = base64.b64decode(text_base) + dt = np.dtype(np.float32) + dt = dt.newbyteorder("<") + text = np.frombuffer(text_data, dtype=dt) + if len(text) > 2560: + array = text[:2560] + else: + array = text + return array + + +class AssembleHeaderException(Exception): + def __init__(self, msg: str) -> None: + self.message = msg diff --git a/libs/community/tests/integration_tests/embeddings/test_sparkllm.py b/libs/community/tests/integration_tests/embeddings/test_sparkllm.py new file mode 100644 index 0000000000000..b934e2e0089da --- /dev/null +++ b/libs/community/tests/integration_tests/embeddings/test_sparkllm.py @@ -0,0 +1,35 @@ +"""Test SparkLLM Text Embedding.""" +from langchain_community.embeddings.sparkllm import SparkLLMTextEmbeddings + + +def test_baichuan_embedding_documents() -> None: + """Test SparkLLM Text Embedding for documents.""" + documents = [ + "iFLYTEK is a well-known intelligent speech and artificial intelligence " + "publicly listed company in the Asia-Pacific Region. Since its establishment," + "the company is devoted to cornerstone technological research " + "in speech and languages, natural language understanding, machine learning," + "machine reasoning, adaptive learning, " + "and has maintained the world-leading position in those " + "domains. The company actively promotes the development of A.I. " + "products and their sector-based " + "applications, with visions of enabling machines to listen and speak, " + "understand and think, " + "creating a better world with artificial intelligence." + ] + embedding = SparkLLMTextEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 # type: ignore[arg-type] + assert len(output[0]) == 2560 # type: ignore[index] + + +def test_baichuan_embedding_query() -> None: + """Test SparkLLM Text Embedding for query.""" + document = ( + "iFLYTEK Open Platform was launched in 2010 by iFLYTEK as China’s " + "first Artificial Intelligence open platform for Mobile Internet " + "and intelligent hardware developers" + ) + embedding = SparkLLMTextEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 2560 # type: ignore[arg-type] diff --git a/libs/community/tests/unit_tests/embeddings/test_imports.py b/libs/community/tests/unit_tests/embeddings/test_imports.py index c66672c79ee0d..c3bbb90d3ecad 100644 --- a/libs/community/tests/unit_tests/embeddings/test_imports.py +++ b/libs/community/tests/unit_tests/embeddings/test_imports.py @@ -60,6 +60,7 @@ "OCIGenAIEmbeddings", "QuantizedBiEncoderEmbeddings", "NeMoEmbeddings", + "SparkLLMTextEmbeddings", ] From 47b1b7092dac56794c8799d6af149fa00612086e Mon Sep 17 00:00:00 2001 From: Guangdong Liu Date: Wed, 21 Feb 2024 03:23:47 +0800 Subject: [PATCH 22/31] community[minor]: Add SparkLLM to community (#17702) --- docs/docs/integrations/llms/sparkllm.ipynb | 141 +++++++ .../langchain_community/llms/__init__.py | 10 + .../langchain_community/llms/sparkllm.py | 383 ++++++++++++++++++ .../integration_tests/llms/test_sparkllm.py | 19 + .../tests/unit_tests/llms/test_imports.py | 1 + 5 files changed, 554 insertions(+) create mode 100644 docs/docs/integrations/llms/sparkllm.ipynb create mode 100644 libs/community/langchain_community/llms/sparkllm.py create mode 100644 libs/community/tests/integration_tests/llms/test_sparkllm.py diff --git a/docs/docs/integrations/llms/sparkllm.ipynb b/docs/docs/integrations/llms/sparkllm.ipynb new file mode 100644 index 0000000000000..f17c33a36d381 --- /dev/null +++ b/docs/docs/integrations/llms/sparkllm.ipynb @@ -0,0 +1,141 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SparkLLM\n", + "[SparkLLM](https://xinghuo.xfyun.cn/spark) is a large-scale cognitive model independently developed by iFLYTEK.\n", + "It has cross-domain knowledge and language understanding ability by learning a large amount of texts, codes and images.\n", + "It can understand and perform tasks based on natural dialogue." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisite\n", + "- Get SparkLLM's app_id, api_key and api_secret from [iFlyTek SparkLLM API Console](https://console.xfyun.cn/services/bm3) (for more info, see [iFlyTek SparkLLM Intro](https://xinghuo.xfyun.cn/sparkapi) ), then set environment variables `IFLYTEK_SPARK_APP_ID`, `IFLYTEK_SPARK_API_KEY` and `IFLYTEK_SPARK_API_SECRET` or pass parameters when creating `ChatSparkLLM` as the demo above." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use SparkLLM" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"IFLYTEK_SPARK_APP_ID\"] = \"app_id\"\n", + "os.environ[\"IFLYTEK_SPARK_API_KEY\"] = \"api_key\"\n", + "os.environ[\"IFLYTEK_SPARK_API_SECRET\"] = \"api_secret\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/liugddx/code/langchain/libs/core/langchain_core/_api/deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.7 and will be removed in 0.2.0. Use invoke instead.\n", + " warn_deprecated(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My name is iFLYTEK Spark. How can I assist you today?\n" + ] + } + ], + "source": [ + "from langchain_community.llms import SparkLLM\n", + "\n", + "# Load the model\n", + "llm = SparkLLM()\n", + "\n", + "res = llm(\"What's your name?\")\n", + "print(res)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2024-02-18T13:04:29.305856Z", + "start_time": "2024-02-18T13:04:28.085715Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": "LLMResult(generations=[[Generation(text='Hello! How can I assist you today?')]], llm_output=None, run=[RunInfo(run_id=UUID('d8cdcd41-a698-4cbf-a28d-e74f9cd2037b'))])" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res = llm.generate(prompts=[\"hello!\"])\n", + "res" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2024-02-18T13:05:44.640035Z", + "start_time": "2024-02-18T13:05:43.244126Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello! How can I assist you today?\n" + ] + } + ], + "source": [ + "for res in llm.stream(\"foo:\"):\n", + " print(res)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/community/langchain_community/llms/__init__.py b/libs/community/langchain_community/llms/__init__.py index 9adaf0af1be14..42cd3f0c2739b 100644 --- a/libs/community/langchain_community/llms/__init__.py +++ b/libs/community/langchain_community/llms/__init__.py @@ -582,6 +582,12 @@ def _import_volcengine_maas() -> Any: return VolcEngineMaasLLM +def _import_sparkllm() -> Any: + from langchain_community.llms.sparkllm import SparkLLM + + return SparkLLM + + def __getattr__(name: str) -> Any: if name == "AI21": return _import_ai21() @@ -769,6 +775,8 @@ def __getattr__(name: str) -> Any: k: v() for k, v in get_type_to_cls_dict().items() } return type_to_cls_dict + elif name == "SparkLLM": + return _import_sparkllm() else: raise AttributeError(f"Could not find: {name}") @@ -861,6 +869,7 @@ def __getattr__(name: str) -> Any: "YandexGPT", "Yuan2", "VolcEngineMaasLLM", + "SparkLLM", ] @@ -950,4 +959,5 @@ def get_type_to_cls_dict() -> Dict[str, Callable[[], Type[BaseLLM]]]: "yandex_gpt": _import_yandex_gpt, "yuan2": _import_yuan2, "VolcEngineMaasLLM": _import_volcengine_maas, + "SparkLLM": _import_sparkllm(), } diff --git a/libs/community/langchain_community/llms/sparkllm.py b/libs/community/langchain_community/llms/sparkllm.py new file mode 100644 index 0000000000000..0f49a356b9d36 --- /dev/null +++ b/libs/community/langchain_community/llms/sparkllm.py @@ -0,0 +1,383 @@ +from __future__ import annotations + +import base64 +import hashlib +import hmac +import json +import logging +import queue +import threading +from datetime import datetime +from queue import Queue +from time import mktime +from typing import Any, Dict, Generator, Iterator, List, Optional +from urllib.parse import urlencode, urlparse, urlunparse +from wsgiref.handlers import format_date_time + +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models.llms import LLM +from langchain_core.outputs import GenerationChunk +from langchain_core.pydantic_v1 import Field, root_validator +from langchain_core.utils import get_from_dict_or_env + +logger = logging.getLogger(__name__) + + +class SparkLLM(LLM): + """Wrapper around iFlyTek's Spark large language model. + + To use, you should pass `app_id`, `api_key`, `api_secret` + as a named parameter to the constructor OR set environment + variables ``IFLYTEK_SPARK_APP_ID``, ``IFLYTEK_SPARK_API_KEY`` and + ``IFLYTEK_SPARK_API_SECRET`` + + Example: + .. code-block:: python + + client = SparkLLM( + spark_app_id="", + spark_api_key="", + spark_api_secret="" + ) + """ + + client: Any = None #: :meta private: + spark_app_id: Optional[str] = None + spark_api_key: Optional[str] = None + spark_api_secret: Optional[str] = None + spark_api_url: Optional[str] = None + spark_llm_domain: Optional[str] = None + spark_user_id: str = "lc_user" + streaming: bool = False + request_timeout: int = 30 + temperature: float = 0.5 + top_k: int = 4 + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + values["spark_app_id"] = get_from_dict_or_env( + values, + "spark_app_id", + "IFLYTEK_SPARK_APP_ID", + ) + values["spark_api_key"] = get_from_dict_or_env( + values, + "spark_api_key", + "IFLYTEK_SPARK_API_KEY", + ) + values["spark_api_secret"] = get_from_dict_or_env( + values, + "spark_api_secret", + "IFLYTEK_SPARK_API_SECRET", + ) + values["spark_app_url"] = get_from_dict_or_env( + values, + "spark_app_url", + "IFLYTEK_SPARK_APP_URL", + "wss://spark-api.xf-yun.com/v3.1/chat", + ) + values["spark_llm_domain"] = get_from_dict_or_env( + values, + "spark_llm_domain", + "IFLYTEK_SPARK_LLM_DOMAIN", + "generalv3", + ) + # put extra params into model_kwargs + values["model_kwargs"]["temperature"] = values["temperature"] or cls.temperature + values["model_kwargs"]["top_k"] = values["top_k"] or cls.top_k + + values["client"] = _SparkLLMClient( + app_id=values["spark_app_id"], + api_key=values["spark_api_key"], + api_secret=values["spark_api_secret"], + api_url=values["spark_api_url"], + spark_domain=values["spark_llm_domain"], + model_kwargs=values["model_kwargs"], + ) + return values + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "spark-llm-chat" + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling SparkLLM API.""" + normal_params = { + "spark_llm_domain": self.spark_llm_domain, + "stream": self.streaming, + "request_timeout": self.request_timeout, + "top_k": self.top_k, + "temperature": self.temperature, + } + + return {**normal_params, **self.model_kwargs} + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Call out to an sparkllm for each generation with a prompt. + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + Returns: + The string generated by the llm. + + Example: + .. code-block:: python + response = client("Tell me a joke.") + """ + if self.streaming: + completion = "" + for chunk in self._stream(prompt, stop, run_manager, **kwargs): + completion += chunk.text + return completion + completion = "" + self.client.arun( + [{"role": "user", "content": prompt}], + self.spark_user_id, + self.model_kwargs, + self.streaming, + ) + for content in self.client.subscribe(timeout=self.request_timeout): + if "data" not in content: + continue + completion = content["data"]["content"] + + return completion + + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + self.client.run( + [{"role": "user", "content": prompt}], + self.spark_user_id, + self.model_kwargs, + self.streaming, + ) + for content in self.client.subscribe(timeout=self.request_timeout): + if "data" not in content: + continue + delta = content["data"] + yield GenerationChunk(text=delta["content"]) + if run_manager: + run_manager.on_llm_new_token(delta) + + +class _SparkLLMClient: + """ + Use websocket-client to call the SparkLLM interface provided by Xfyun, + which is the iFlyTek's open platform for AI capabilities + """ + + def __init__( + self, + app_id: str, + api_key: str, + api_secret: str, + api_url: Optional[str] = None, + spark_domain: Optional[str] = None, + model_kwargs: Optional[dict] = None, + ): + try: + import websocket + + self.websocket_client = websocket + except ImportError: + raise ImportError( + "Could not import websocket client python package. " + "Please install it with `pip install websocket-client`." + ) + + self.api_url = ( + "wss://spark-api.xf-yun.com/v3.1/chat" if not api_url else api_url + ) + self.app_id = app_id + self.ws_url = _SparkLLMClient._create_url( + self.api_url, + api_key, + api_secret, + ) + self.model_kwargs = model_kwargs + self.spark_domain = spark_domain or "generalv3" + self.queue: Queue[Dict] = Queue() + self.blocking_message = {"content": "", "role": "assistant"} + + @staticmethod + def _create_url(api_url: str, api_key: str, api_secret: str) -> str: + """ + Generate a request url with an api key and an api secret. + """ + # generate timestamp by RFC1123 + date = format_date_time(mktime(datetime.now().timetuple())) + + # urlparse + parsed_url = urlparse(api_url) + host = parsed_url.netloc + path = parsed_url.path + + signature_origin = f"host: {host}\ndate: {date}\nGET {path} HTTP/1.1" + + # encrypt using hmac-sha256 + signature_sha = hmac.new( + api_secret.encode("utf-8"), + signature_origin.encode("utf-8"), + digestmod=hashlib.sha256, + ).digest() + + signature_sha_base64 = base64.b64encode(signature_sha).decode(encoding="utf-8") + + authorization_origin = f'api_key="{api_key}", algorithm="hmac-sha256", \ + headers="host date request-line", signature="{signature_sha_base64}"' + authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode( + encoding="utf-8" + ) + + # generate url + params_dict = {"authorization": authorization, "date": date, "host": host} + encoded_params = urlencode(params_dict) + url = urlunparse( + ( + parsed_url.scheme, + parsed_url.netloc, + parsed_url.path, + parsed_url.params, + encoded_params, + parsed_url.fragment, + ) + ) + return url + + def run( + self, + messages: List[Dict], + user_id: str, + model_kwargs: Optional[dict] = None, + streaming: bool = False, + ) -> None: + self.websocket_client.enableTrace(False) + ws = self.websocket_client.WebSocketApp( + self.ws_url, + on_message=self.on_message, + on_error=self.on_error, + on_close=self.on_close, + on_open=self.on_open, + ) + ws.messages = messages + ws.user_id = user_id + ws.model_kwargs = self.model_kwargs if model_kwargs is None else model_kwargs + ws.streaming = streaming + ws.run_forever() + + def arun( + self, + messages: List[Dict], + user_id: str, + model_kwargs: Optional[dict] = None, + streaming: bool = False, + ) -> threading.Thread: + ws_thread = threading.Thread( + target=self.run, + args=( + messages, + user_id, + model_kwargs, + streaming, + ), + ) + ws_thread.start() + return ws_thread + + def on_error(self, ws: Any, error: Optional[Any]) -> None: + self.queue.put({"error": error}) + ws.close() + + def on_close(self, ws: Any, close_status_code: int, close_reason: str) -> None: + logger.debug( + { + "log": { + "close_status_code": close_status_code, + "close_reason": close_reason, + } + } + ) + self.queue.put({"done": True}) + + def on_open(self, ws: Any) -> None: + self.blocking_message = {"content": "", "role": "assistant"} + data = json.dumps( + self.gen_params( + messages=ws.messages, user_id=ws.user_id, model_kwargs=ws.model_kwargs + ) + ) + ws.send(data) + + def on_message(self, ws: Any, message: str) -> None: + data = json.loads(message) + code = data["header"]["code"] + if code != 0: + self.queue.put( + {"error": f"Code: {code}, Error: {data['header']['message']}"} + ) + ws.close() + else: + choices = data["payload"]["choices"] + status = choices["status"] + content = choices["text"][0]["content"] + if ws.streaming: + self.queue.put({"data": choices["text"][0]}) + else: + self.blocking_message["content"] += content + if status == 2: + if not ws.streaming: + self.queue.put({"data": self.blocking_message}) + usage_data = ( + data.get("payload", {}).get("usage", {}).get("text", {}) + if data + else {} + ) + self.queue.put({"usage": usage_data}) + ws.close() + + def gen_params( + self, messages: list, user_id: str, model_kwargs: Optional[dict] = None + ) -> dict: + data: Dict = { + "header": {"app_id": self.app_id, "uid": user_id}, + "parameter": {"chat": {"domain": self.spark_domain}}, + "payload": {"message": {"text": messages}}, + } + + if model_kwargs: + data["parameter"]["chat"].update(model_kwargs) + logger.debug(f"Spark Request Parameters: {data}") + return data + + def subscribe(self, timeout: Optional[int] = 30) -> Generator[Dict, None, None]: + while True: + try: + content = self.queue.get(timeout=timeout) + except queue.Empty as _: + raise TimeoutError( + f"SparkLLMClient wait LLM api response timeout {timeout} seconds" + ) + if "error" in content: + raise ConnectionError(content["error"]) + if "usage" in content: + yield content + continue + if "done" in content: + break + if "data" not in content: + break + yield content diff --git a/libs/community/tests/integration_tests/llms/test_sparkllm.py b/libs/community/tests/integration_tests/llms/test_sparkllm.py new file mode 100644 index 0000000000000..6df9bf7c36fa1 --- /dev/null +++ b/libs/community/tests/integration_tests/llms/test_sparkllm.py @@ -0,0 +1,19 @@ +"""Test SparkLLM.""" +from langchain_core.outputs import LLMResult + +from langchain_community.llms.sparkllm import SparkLLM + + +def test_call() -> None: + """Test valid call to sparkllm.""" + llm = SparkLLM() + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_generate() -> None: + """Test valid call to sparkllm.""" + llm = SparkLLM() + output = llm.generate(["Say foo:"]) + assert isinstance(output, LLMResult) + assert isinstance(output.generations, list) diff --git a/libs/community/tests/unit_tests/llms/test_imports.py b/libs/community/tests/unit_tests/llms/test_imports.py index 6c8c6504e36a2..1f5489b2b3554 100644 --- a/libs/community/tests/unit_tests/llms/test_imports.py +++ b/libs/community/tests/unit_tests/llms/test_imports.py @@ -90,6 +90,7 @@ "Yuan2", "VolcEngineMaasLLM", "WatsonxLLM", + "SparkLLM", ] From 6e854ae371910e2f87f4b739b22c6883a7309e3a Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Tue, 20 Feb 2024 13:33:20 -0800 Subject: [PATCH 23/31] docs: fix api docs search (#17820) --- docs/api_reference/themes/scikit-learn-modern/search.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api_reference/themes/scikit-learn-modern/search.html b/docs/api_reference/themes/scikit-learn-modern/search.html index a1ededafb5240..2c9f40f15a6cc 100644 --- a/docs/api_reference/themes/scikit-learn-modern/search.html +++ b/docs/api_reference/themes/scikit-learn-modern/search.html @@ -5,7 +5,7 @@ - +