Skip to content

Commit

Permalink
Merge branch 'langchain-ai:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
chadj2 authored May 15, 2024
2 parents 9a76986 + f2f970f commit cf2bed9
Show file tree
Hide file tree
Showing 11 changed files with 355 additions and 102 deletions.
2 changes: 1 addition & 1 deletion docs/docs/how_to/function_calling.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
"version": "3.9.1"
}
},
"nbformat": 4,
Expand Down
376 changes: 299 additions & 77 deletions docs/docs/how_to/structured_output.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/docs/integrations/chat/openai.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@
"\n",
"### ChatOpenAI.bind_tools()\n",
"\n",
"With `ChatAnthropic.bind_tools`, we can easily pass in Pydantic classes, dict schemas, LangChain tools, or even functions as tools to the model. Under the hood these are converted to an Anthropic tool schemas, which looks like:\n",
"With `ChatOpenAI.bind_tools`, we can easily pass in Pydantic classes, dict schemas, LangChain tools, or even functions as tools to the model. Under the hood these are converted to an OpenAI tool schemas, which looks like:\n",
"```\n",
"{\n",
" \"name\": \"...\",\n",
Expand Down
6 changes: 3 additions & 3 deletions docs/docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ const config = {
/** @type {import('@docusaurus/preset-classic').ThemeConfig} */
({
announcementBar: {
content: 'You are viewing the <strong>preview</strong> LangChain v0.2 docs. Note that 0.2 Search features are currently unstable and in progress. View the <a href="/v0.1/docs/get_started/introduction/">stable 0.1 docs here</a>.',
content: 'You are viewing the <strong>preview</strong> LangChain v0.2 docs. View the <a href="/v0.1/docs/get_started/introduction/">stable 0.1 docs here</a>.',
isCloseable: true,
},
docs: {
Expand Down Expand Up @@ -310,9 +310,9 @@ const config = {
// this is linked to [email protected] currently
apiKey: "6c01842d6a88772ed2236b9c85806441",

indexName: "python-langchain",
indexName: "python-langchain-0.2",

contextualSearch: true,
contextualSearch: false,
},
}),

Expand Down
4 changes: 0 additions & 4 deletions docs/scripts/notebook_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,8 @@ def check_conditions(self, cell):
pattern = re.compile(r"(?s)(?:\s*\Z)|(?:.*#\s*\|\s*output:\s*false.*)")
rtn = not pattern.match(cell.source)
if not rtn:
print("--remove--")
print(cell.source)
return False
else:
print("--keep--")
print(cell.source)
return True

def preprocess(self, nb, resources):
Expand Down
4 changes: 3 additions & 1 deletion libs/core/langchain_core/language_models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ async def agenerate_prompt(
def with_structured_output(
self, schema: Union[Dict, Type[BaseModel]], **kwargs: Any
) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
"""Implement this if there is a way of steering the model to generate responses that match a given schema.""" # noqa: E501
"""Not implemented on this class."""
# Implement this on child class if there is a way of steering the model to
# generate responses that match a given schema.
raise NotImplementedError()

@deprecated("0.1.7", alternative="invoke", removal="0.3.0")
Expand Down
5 changes: 5 additions & 0 deletions libs/core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ select = [
disallow_untyped_defs = "True"
exclude = ["notebooks", "examples", "example_data", "langchain_core/pydantic"]

[[tool.mypy.overrides]]
# conditional dependencies introduced by langsmith-sdk
module = ["numpy", "pytest"]
ignore_missing_imports = true

[tool.coverage.run]
omit = ["tests/*"]

Expand Down
43 changes: 32 additions & 11 deletions libs/partners/mongodb/langchain_mongodb/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
)

import numpy as np
from bson import ObjectId, json_util
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.runnables.config import run_in_executor
Expand All @@ -31,7 +32,7 @@

logger = logging.getLogger(__name__)

DEFAULT_INSERT_BATCH_SIZE = 100
DEFAULT_INSERT_BATCH_SIZE = 100_000


class MongoDBAtlasVectorSearch(VectorStore):
Expand Down Expand Up @@ -150,18 +151,24 @@ def add_texts(
"""
batch_size = kwargs.get("batch_size", DEFAULT_INSERT_BATCH_SIZE)
_metadatas: Union[List, Generator] = metadatas or ({} for _ in texts)
texts_batch = []
metadatas_batch = []
texts_batch = texts
metadatas_batch = _metadatas
result_ids = []
for i, (text, metadata) in enumerate(zip(texts, _metadatas)):
texts_batch.append(text)
metadatas_batch.append(metadata)
if (i + 1) % batch_size == 0:
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
texts_batch = []
metadatas_batch = []
if batch_size:
texts_batch = []
metadatas_batch = []
size = 0
for i, (text, metadata) in enumerate(zip(texts, _metadatas)):
size += len(text) + len(metadata)
texts_batch.append(text)
metadatas_batch.append(metadata)
if (i + 1) % batch_size == 0 or size >= 47_000_000:
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
texts_batch = []
metadatas_batch = []
size = 0
if texts_batch:
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch)) # type: ignore
return result_ids

def _insert_texts(self, texts: List[str], metadatas: List[Dict[str, Any]]) -> List:
Expand Down Expand Up @@ -210,9 +217,23 @@ def _similarity_search_with_score(
pipeline.extend(post_filter_pipeline)
cursor = self._collection.aggregate(pipeline) # type: ignore[arg-type]
docs = []

def _make_serializable(obj: Dict[str, Any]) -> None:
for k, v in obj.items():
if isinstance(v, dict):
_make_serializable(v)
elif isinstance(v, list) and v and isinstance(v[0], ObjectId):
obj[k] = [json_util.default(item) for item in v]
elif isinstance(v, ObjectId):
obj[k] = json_util.default(v)

for res in cursor:
text = res.pop(self._text_key)
score = res.pop("score")
# Make every ObjectId found JSON-Serializable
# following format used in bson.json_util.loads
# e.g. loads('{"_id": {"$oid": "664..."}}') == {'_id': ObjectId('664..')} # noqa: E501
_make_serializable(res)
docs.append((Document(page_content=text, metadata=res), score))
return docs

Expand Down
4 changes: 2 additions & 2 deletions libs/partners/mongodb/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-mongodb"
version = "0.1.3"
version = "0.1.4"
description = "An integration package connecting MongoDB and LangChain"
authors = []
readme = "README.md"
Expand Down Expand Up @@ -28,7 +28,7 @@ pytest-watcher = "^0.3.4"
pytest-asyncio = "^0.21.1"
langchain = { path = "../../langchain", develop = true }
langchain-core = { path = "../../core", develop = true }
langchain-text-splitters = {path = "../../text-splitters", develop = true}
langchain-text-splitters = { path = "../../text-splitters", develop = true }

[tool.poetry.group.codespell]
optional = true
Expand Down
7 changes: 7 additions & 0 deletions libs/partners/mongodb/tests/unit_tests/test_vectorstores.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from json import dumps, loads
from typing import Any, Optional

import pytest
from bson import ObjectId, json_util
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from pymongo.collection import Collection
Expand Down Expand Up @@ -75,6 +77,11 @@ def _validate_search(
output = vectorstore.similarity_search("", k=1)
assert output[0].page_content == page_content
assert output[0].metadata.get("c") == metadata
# Validate the ObjectId provided is json serializable
assert loads(dumps(output[0].page_content)) == output[0].page_content
assert loads(dumps(output[0].metadata)) == output[0].metadata
json_metadata = dumps(output[0].metadata) # normal json.dumps
assert isinstance(json_util.loads(json_metadata)["_id"], ObjectId)

def test_from_documents(
self, embedding_openai: Embeddings, collection: MockCollection
Expand Down
4 changes: 2 additions & 2 deletions libs/partners/mongodb/tests/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

import uuid
from copy import deepcopy
from typing import Any, Dict, List, Mapping, Optional, cast

from bson import ObjectId
from langchain_core.callbacks.manager import (
AsyncCallbackManagerForLLMRun,
CallbackManagerForLLMRun,
Expand Down Expand Up @@ -162,7 +162,7 @@ def delete_many(self, *args, **kwargs) -> DeleteResult: # type: ignore

def insert_many(self, to_insert: List[Any], *args, **kwargs) -> InsertManyResult: # type: ignore
mongodb_inserts = [
{"_id": str(uuid.uuid4()), "score": 1, **insert} for insert in to_insert
{"_id": ObjectId(), "score": 1, **insert} for insert in to_insert
]
self._data.extend(mongodb_inserts)
return self._insert_result or InsertManyResult(
Expand Down

0 comments on commit cf2bed9

Please sign in to comment.