cohere-ai · billytrend-cohere · Oct 18, 2024 · Oct 18, 2024 · Sep 19, 2024 · Oct 18, 2024
diff --git a/.fernignore b/.fernignore
@@ -14,4 +14,5 @@ src/cohere/manually_maintained
 src/cohere/bedrock_client.py
 src/cohere/aws_client.py
 src/cohere/sagemaker_client.py
-src/cohere/client_v2.py
+src/cohere/client_v2.py
+mypy.ini
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cohere"
-version = "5.11.0"
+version = "5.11.1"
 description = ""
 readme = "README.md"
 authors = []
@@ -32,18 +32,18 @@ Repository = 'https://github.com/cohere-ai/cohere-python'
 
 [tool.poetry.dependencies]
 python = "^3.8"
-boto3 = "^1.34.0"
+boto3 = { version="^1.34.0", optional = true}
 fastavro = "^1.9.4"
 httpx = ">=0.21.2"
 httpx-sse = "0.4.0"
 parameterized = "^0.9.0"
 pydantic = ">= 1.9.2"
 pydantic-core = "^2.18.2"
 requests = "^2.0.0"
+sagemaker = { version="^2.232.1", optional = true}
 tokenizers = ">=0.15,<1"
 types-requests = "^2.0.0"
 typing_extensions = ">= 4.0.0"
-sagemaker = "^2.232.1"
 
 [tool.poetry.dev-dependencies]
 mypy = "1.0.1"
@@ -68,3 +68,6 @@ line-length = 120
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.poetry.extras]
+aws=["sagemaker", "boto3"]
diff --git a/reference.md b/reference.md
@@ -2319,7 +2319,9 @@ client.check_api_key()
 <dl>
 <dd>
 
-Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+Generates a message from the model in response to a provided conversation. To learn more about the features of the Chat API follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).
+
+Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.
 </dd>
 </dl>
 </dd>
@@ -2396,7 +2398,7 @@ for chunk in response:
 <dl>
 <dd>
 
-**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/v2/docs/chat-fine-tuning) model.
 
 </dd>
 </dl>
@@ -2452,14 +2454,12 @@ When `tools` is passed (without `tool_results`), the `text` content in the respo
 
 **safety_mode:** `typing.Optional[V2ChatStreamRequestSafetyMode]` 
 
-Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
 When `OFF` is specified, the safety instruction will be omitted.
 
 Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
 
-**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+**Note**: This parameter is only compatible with models [Command R 08-2024](https://docs.cohere.com/v2/docs/command-r#august-2024-release), [Command R+ 08-2024](https://docs.cohere.com/v2/docs/command-r-plus#august-2024-release) and newer.
 
 
 </dd>
@@ -2468,7 +2468,11 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**max_tokens:** `typing.Optional[int]` — The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+**max_tokens:** `typing.Optional[int]` 
+
+The maximum number of tokens the model will generate as part of the response.
+
+**Note**: Setting a low value may result in incomplete generations.
 
 
 </dd>
@@ -2595,7 +2599,9 @@ Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
 <dl>
 <dd>
 
-Generates a message from the model in response to a provided conversation. To learn how to use the Chat API with Streaming and RAG follow our Text Generation guides.
+Generates a message from the model in response to a provided conversation. To learn more about the features of the Chat API follow our [Text Generation guides](https://docs.cohere.com/v2/docs/chat-api).
+
+Follow the [Migration Guide](https://docs.cohere.com/v2/docs/migrating-v1-to-v2) for instructions on moving from API v1 to API v2.
 </dd>
 </dl>
 </dd>
@@ -2621,6 +2627,7 @@ client.v2.chat(
     messages=[
         ToolChatMessageV2(
             tool_call_id="messages",
+            content="messages",
         )
     ],
 )
@@ -2639,7 +2646,7 @@ client.v2.chat(
 <dl>
 <dd>
 
-**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/docs/chat-fine-tuning) model.
+**model:** `str` — The name of a compatible [Cohere model](https://docs.cohere.com/v2/docs/models) (such as command-r or command-r-plus) or the ID of a [fine-tuned](https://docs.cohere.com/v2/docs/chat-fine-tuning) model.
 
 </dd>
 </dl>
@@ -2695,14 +2702,12 @@ When `tools` is passed (without `tool_results`), the `text` content in the respo
 
 **safety_mode:** `typing.Optional[V2ChatRequestSafetyMode]` 
 
-Used to select the [safety instruction](/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
+Used to select the [safety instruction](https://docs.cohere.com/v2/docs/safety-modes) inserted into the prompt. Defaults to `CONTEXTUAL`.
 When `OFF` is specified, the safety instruction will be omitted.
 
 Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters.
 
-**Note**: This parameter is only compatible with models [Command R 08-2024](/docs/command-r#august-2024-release), [Command R+ 08-2024](/docs/command-r-plus#august-2024-release) and newer.
-
-Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments
+**Note**: This parameter is only compatible with models [Command R 08-2024](https://docs.cohere.com/v2/docs/command-r#august-2024-release), [Command R+ 08-2024](https://docs.cohere.com/v2/docs/command-r-plus#august-2024-release) and newer.
 
 
 </dd>
@@ -2711,7 +2716,11 @@ Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private D
 <dl>
 <dd>
 
-**max_tokens:** `typing.Optional[int]` — The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+**max_tokens:** `typing.Optional[int]` 
+
+The maximum number of tokens the model will generate as part of the response.
+
+**Note**: Setting a low value may result in incomplete generations.
 
 
 </dd>
@@ -2865,6 +2874,8 @@ client = Client(
 )
 client.v2.embed(
     model="model",
+    input_type="search_document",
+    embedding_types=["float"],
 )
 
 ```
@@ -2904,43 +2915,43 @@ Available models and corresponding embedding dimensions:
 <dl>
 <dd>
 
-**texts:** `typing.Optional[typing.Sequence[str]]` — An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
+**input_type:** `EmbedInputType` 
 
 </dd>
 </dl>
 
 <dl>
 <dd>
 
-**images:** `typing.Optional[typing.Sequence[str]]` 
+**embedding_types:** `typing.Sequence[EmbeddingType]` 
 
-An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
+Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
 
-The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
+* `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
+* `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
+* `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
+* `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
+* `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
 
 </dd>
 </dl>
 
 <dl>
 <dd>
 
-**input_type:** `typing.Optional[EmbedInputType]` 
+**texts:** `typing.Optional[typing.Sequence[str]]` — An array of strings for the model to embed. Maximum number of texts per call is `96`. We recommend reducing the length of each text to be under `512` tokens for optimal quality.
 
 </dd>
 </dl>
 
 <dl>
 <dd>
 
-**embedding_types:** `typing.Optional[typing.Sequence[EmbeddingType]]` 
+**images:** `typing.Optional[typing.Sequence[str]]` 
 
-Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
+An array of image data URIs for the model to embed. Maximum number of images per call is `1`.
 
-* `"float"`: Use this when you want to get back the default float embeddings. Valid for all models.
-* `"int8"`: Use this when you want to get back signed int8 embeddings. Valid for only v3 models.
-* `"uint8"`: Use this when you want to get back unsigned int8 embeddings. Valid for only v3 models.
-* `"binary"`: Use this when you want to get back signed binary embeddings. Valid for only v3 models.
-* `"ubinary"`: Use this when you want to get back unsigned binary embeddings. Valid for only v3 models.
+The image must be a valid [data URI](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data). The image must be in either `image/jpeg` or `image/png` format and has a maximum size of 5MB.
 
 </dd>
 </dl>

diff --git a/src/cohere/aws_client.py b/src/cohere/aws_client.py
@@ -3,10 +3,7 @@
 import re
 import typing
 
-import boto3  # type: ignore
 import httpx
-from botocore.auth import SigV4Auth  # type: ignore
-from botocore.awsrequest import AWSRequest  # type: ignore
 from httpx import URL, SyncByteStream, ByteStream
 from tokenizers import Tokenizer  # type: ignore
 
@@ -17,6 +14,14 @@
 from .core import construct_type
 
 
+try:
+    import boto3  # type: ignore
+    from botocore.auth import SigV4Auth  # type: ignore
+    from botocore.awsrequest import AWSRequest  # type: ignore
+    AWS_DEPS_AVAILABLE = True
+except ImportError:
+    AWS_DEPS_AVAILABLE = False
+
 class AwsClient(Client):
     def __init__(
             self,
@@ -28,6 +33,8 @@ def __init__(
             timeout: typing.Optional[float] = None,
             service: typing.Union[typing.Literal["bedrock"], typing.Literal["sagemaker"]],
     ):
+        if not AWS_DEPS_AVAILABLE:
+            raise ImportError("AWS dependencies not available. Please install boto3 and botocore.")
         Client.__init__(
             self,
             base_url="https://api.cohere.com",  # this url is unused for BedrockClient

diff --git a/src/cohere/bedrock_client.py b/src/cohere/bedrock_client.py
@@ -1,8 +1,5 @@
 import typing
 
-import boto3  # type: ignore
-from botocore.auth import SigV4Auth  # type: ignore
-from botocore.awsrequest import AWSRequest  # type: ignore
 from tokenizers import Tokenizer  # type: ignore
 
 from .aws_client import AwsClient

diff --git a/src/cohere/core/client_wrapper.py b/src/cohere/core/client_wrapper.py
@@ -24,7 +24,7 @@ def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cohere",
-            "X-Fern-SDK-Version": "5.11.0",
+            "X-Fern-SDK-Version": "5.11.1",
         }
         if self._client_name is not None:
             headers["X-Client-Name"] = self._client_name

diff --git a/src/cohere/manually_maintained/cohere_aws/client.py b/src/cohere/manually_maintained/cohere_aws/client.py
@@ -5,12 +5,6 @@
 import time
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-import boto3
-import sagemaker as sage
-from botocore.exceptions import (ClientError, EndpointConnectionError,
-                                 ParamValidationError)
-from sagemaker.s3 import S3Downloader, S3Uploader, parse_s3_url
-
 from .classification import Classification, Classifications
 from .embeddings import Embeddings
 from .error import CohereError
@@ -23,7 +17,18 @@
 from .mode import Mode
 import typing
 
-class Client:    
+# Try to import sagemaker and related modules
+try:
+    import sagemaker as sage
+    from sagemaker.s3 import S3Downloader, S3Uploader, parse_s3_url
+    import boto3
+    from botocore.exceptions import (
+        ClientError, EndpointConnectionError, ParamValidationError)
+    AWS_DEPS_AVAILABLE = True
+except ImportError:
+    AWS_DEPS_AVAILABLE = False
+
+class Client:
     def __init__(
            self,
             aws_region: typing.Optional[str] = None,
@@ -32,8 +37,9 @@ def __init__(
         By default we assume region configured in AWS CLI (`aws configure get region`). You can change the region with
         `aws configure set region us-west-2` or override it with `region_name` parameter.
         """
-        self._client = boto3.client("sagemaker-runtime", region_name=aws_region)
-        self._service_client = boto3.client("sagemaker", region_name=aws_region)
+        if not AWS_DEPS_AVAILABLE:
+            raise CohereError("AWS dependencies not available. Please install boto3 and sagemaker.")
+        self._client = boto3.client()
         if os.environ.get('AWS_DEFAULT_REGION') is None:
             os.environ['AWS_DEFAULT_REGION'] = aws_region
         self._sess = sage.Session(sagemaker_client=self._service_client)

diff --git a/src/cohere/types/assistant_message.py b/src/cohere/types/assistant_message.py
@@ -3,10 +3,10 @@
 from ..core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .tool_call_v2 import ToolCallV2
+import pydantic
 from .assistant_message_content import AssistantMessageContent
 from .citation import Citation
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
 
 
 class AssistantMessage(UncheckedBaseModel):
@@ -15,7 +15,11 @@ class AssistantMessage(UncheckedBaseModel):
     """
 
     tool_calls: typing.Optional[typing.List[ToolCallV2]] = None
-    tool_plan: typing.Optional[str] = None
+    tool_plan: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    A chain-of-thought style reflection and plan that the model generates when working with Tools.
+    """
+
     content: typing.Optional[AssistantMessageContent] = None
     citations: typing.Optional[typing.List[Citation]] = None
 

diff --git a/src/cohere/types/assistant_message_response.py b/src/cohere/types/assistant_message_response.py
@@ -3,10 +3,10 @@
 from ..core.unchecked_base_model import UncheckedBaseModel
 import typing
 from .tool_call_v2 import ToolCallV2
+import pydantic
 from .assistant_message_response_content_item import AssistantMessageResponseContentItem
 from .citation import Citation
 from ..core.pydantic_utilities import IS_PYDANTIC_V2
-import pydantic
 
 
 class AssistantMessageResponse(UncheckedBaseModel):
@@ -16,7 +16,11 @@ class AssistantMessageResponse(UncheckedBaseModel):
 
     role: typing.Literal["assistant"] = "assistant"
     tool_calls: typing.Optional[typing.List[ToolCallV2]] = None
-    tool_plan: typing.Optional[str] = None
+    tool_plan: typing.Optional[str] = pydantic.Field(default=None)
+    """
+    A chain-of-thought style reflection and plan that the model generates when working with Tools.
+    """
+
     content: typing.Optional[typing.List[AssistantMessageResponseContentItem]] = None
     citations: typing.Optional[typing.List[Citation]] = None
 

diff --git a/src/cohere/types/chat_finish_reason.py b/src/cohere/types/chat_finish_reason.py
@@ -3,6 +3,5 @@
 import typing
 
 ChatFinishReason = typing.Union[
-    typing.Literal["complete", "stop_sequence", "max_tokens", "tool_call", "error", "content_blocked", "error_limit"],
-    typing.Any,
+    typing.Literal["COMPLETE", "STOP_SEQUENCE", "MAX_TOKENS", "TOOL_CALL", "ERROR"], typing.Any
 ]
diff --git a/src/cohere/types/chat_message_v2.py b/src/cohere/types/chat_message_v2.py
@@ -79,7 +79,7 @@ class ToolChatMessageV2(UncheckedBaseModel):
 
     role: typing.Literal["tool"] = "tool"
     tool_call_id: str
-    content: typing.Optional[ToolMessageV2Content] = None
+    content: ToolMessageV2Content
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2

diff --git a/src/cohere/types/chat_tool_calls_chunk_event.py b/src/cohere/types/chat_tool_calls_chunk_event.py
@@ -2,13 +2,14 @@
 
 from .chat_stream_event import ChatStreamEvent
 from .tool_call_delta import ToolCallDelta
-from ..core.pydantic_utilities import IS_PYDANTIC_V2
 import typing
+from ..core.pydantic_utilities import IS_PYDANTIC_V2
 import pydantic
 
 
 class ChatToolCallsChunkEvent(ChatStreamEvent):
     tool_call_delta: ToolCallDelta
+    text: typing.Optional[str] = None
 
     if IS_PYDANTIC_V2:
         model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True)  # type: ignore # Pydantic v2