diff --git a/cohere-openapi.yaml b/cohere-openapi.yaml index dffe695e..8ac18332 100644 --- a/cohere-openapi.yaml +++ b/cohere-openapi.yaml @@ -5588,7 +5588,7 @@ paths: With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned. - Compatible Deployments: + Compatible Deployments: - AUTO: Cohere Platform Only - AUTO_PRESERVE_ORDER: Azure, AWS Sagemaker/Bedrock, Private Deployments connectors: @@ -5832,6 +5832,8 @@ paths: **Note**: This parameter is only compatible with models [Command R 08-2024](https://docs.cohere.com/docs/command-r#august-2024-release), [Command R+ 08-2024](https://docs.cohere.com/docs/command-r-plus#august-2024-release) and newer. + **Note**: `command-r7b-12-2024` only supports `"CONTEXTUAL"` and `"STRICT"` modes. + Compatible Deployments: Cohere Platform, Azure, AWS Sagemaker/Bedrock, Private Deployments responses: "200": @@ -5986,7 +5988,7 @@ paths: x-fern-availability: beta type: boolean description: | - When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Strict Tools guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools). + When set to `true`, tool calls in the Assistant message will be forced to follow the tool definition strictly. Learn more in the [Structured Outputs (Tools) guide](https://docs.cohere.com/docs/structured-outputs-json#structured-outputs-tools). **Note**: The first few requests with a new set of tools will take longer to process. documents: @@ -6021,6 +6023,8 @@ paths: Safety modes are not yet configurable in combination with `tools`, `tool_results` and `documents` parameters. **Note**: This parameter is only compatible with models [Command R 08-2024](https://docs.cohere.com/v2/docs/command-r#august-2024-release), [Command R+ 08-2024](https://docs.cohere.com/v2/docs/command-r-plus#august-2024-release) and newer. + + **Note**: `command-r7b-12-2024` only supports `"CONTEXTUAL"` and `"STRICT"` modes. max_tokens: x-fern-audiences: - public @@ -7349,7 +7353,7 @@ paths: import cohere - co = cohere.Client() + co = cohere.ClientV2() response = co.chat( @@ -15988,9 +15992,7 @@ paths: type: string x-fern-audiences: - public - description: "The identifier of the model to use, one of : - `rerank-english-v3.0`, `rerank-multilingual-v3.0`, - `rerank-english-v2.0`, `rerank-multilingual-v2.0`" + description: The identifier of the model to use, eg `rerank-v3.5`. query: type: string x-fern-audiences: @@ -16085,7 +16087,7 @@ paths: {String: "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages."}, {String: "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district."}, }, - Model: cohere.String("rerank-english-v3.0"), + Model: cohere.String("rerank-v3.5"), }, ) @@ -16118,12 +16120,12 @@ paths: text: 'Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.', }, { - text: 'Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.', + text: 'Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.', }, ], query: 'What is the capital of the United States?', topN: 3, - model: 'rerank-english-v3.0', + model: 'rerank-v3.5', }); console.log(rerank); @@ -16142,12 +16144,12 @@ paths: "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", ] response = co.rerank( - model="rerank-english-v3.0", + model="rerank-v3.5", query="What is the capital of the United States?", documents=docs, top_n=3, @@ -16165,26 +16167,21 @@ paths: co = cohere.AsyncClient() - docs = [ - "Carson City is the capital city of the American state of Nevada.", - "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", - "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", - "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", - ] - - - async def main(): response = await co.rerank( - model="rerank-english-v2.0", + model="rerank-v3.5", query="What is the capital of the United States?", - documents=docs, + documents=[ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", + ], top_n=3, ) print(response) - asyncio.run(main()) - sdk: java name: Cohere java SDK @@ -16234,13 +16231,13 @@ paths: + " capital of the United States. It is" + " a federal district."), RerankRequestDocumentsItem.of( - "Capital punishment (the death penalty) has" + "Capital punishment has" + " existed in the United States since" + " beforethe United States was a" + " country. As of 2017, capital" + " punishment is legal in 30 of the 50" + " states."))) - .model("rerank-english-v3.0") + .model("rerank-english-v3.5") .topN(3) .build()); @@ -16256,14 +16253,14 @@ paths: --header 'content-type: application/json' \ --header "Authorization: bearer $CO_API_KEY" \ --data '{ - "model": "rerank-english-v3.0", + "model": "rerank-v3.5", "query": "What is the capital of the United States?", "top_n": 3, "documents": ["Carson City is the capital city of the American state of Nevada.", "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."] + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."] }' request: documents: @@ -16276,12 +16273,12 @@ paths: - text: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. - - text: Capital punishment (the death penalty) has existed in the United States - since beforethe United States was a country. As of 2017, - capital punishment is legal in 30 of the 50 states. + - text: Capital punishment has existed in the United States since beforethe United + States was a country. As of 2017, capital punishment is legal + in 30 of the 50 states. query: What is the capital of the United States? top_n: 3 - model: rerank-english-v3.0 + model: rerank-v3.5 response: body: id: 8bc745a3-7871-4597-822e-18c95d5df48c @@ -16397,14 +16394,7 @@ paths: type: string x-fern-audiences: - public - description: |- - The identifier of the model to use. - - Supported models: - - `rerank-english-v3.0` - - `rerank-multilingual-v3.0` - - `rerank-english-v2.0` - - `rerank-multilingual-v2.0` + description: The identifier of the model to use, eg `rerank-v3.5`. query: type: string x-fern-audiences: @@ -16466,7 +16456,7 @@ paths: ], query: 'What is the capital of the United States?', topN: 3, - model: 'rerank-english-v3.0', + model: 'rerank-v3.5', }); console.log(rerank); @@ -16490,7 +16480,7 @@ paths: response = co.rerank( - model="rerank-english-v3.0", + model="rerank-v3.5", query="What is the capital of the United States?", documents=docs, top_n=3, @@ -16508,22 +16498,18 @@ paths: co = cohere.AsyncClientV2() - docs = [ - "Carson City is the capital city of the American state of Nevada.", - "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", - "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", - "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", - "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", - ] - - - async def main(): response = await co.rerank( - model="rerank-english-v2.0", + model="rerank-v3.5", query="What is the capital of the United States?", - documents=docs, - top_n=3, + documents=[ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", + ], + top_n=3 ) print(response) @@ -16552,7 +16538,7 @@ paths: .v2() .rerank( V2RerankRequest.builder() - .model("rerank-english-v3.0") + .model("rerank-v3.5") .query("What is the capital of the United States?") .documents( List.of( @@ -16583,7 +16569,7 @@ paths: --header 'content-type: application/json' \ --header "Authorization: bearer $CO_API_KEY" \ --data '{ - "model": "rerank-english-v3.0", + "model": "rerank-v3.5", "query": "What is the capital of the United States?", "top_n": 3, "documents": ["Carson City is the capital city of the American state of Nevada.", @@ -16603,12 +16589,12 @@ paths: - Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. - - Capital punishment (the death penalty) has existed in the United - States since beforethe United States was a country. As of 2017, - capital punishment is legal in 30 of the 50 states. + - Capital punishment has existed in the United States since + beforethe United States was a country. As of 2017, capital + punishment is legal in 30 of the 50 states. query: What is the capital of the United States? top_n: 3 - model: rerank-english-v3.0 + model: rerank-v3.5 response: body: id: 07734bd2-2473-4f07-94e1-0d9f0e6843cf @@ -22670,6 +22656,8 @@ components: description: | Defaults to `"accurate"`. Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results, `"fast"` results or no results. + + **Note**: `command-r7b-12-2024` only supports `"fast"` and `"off"` modes. Its default is `"fast"`. ResponseFormatTypeV2: x-fern-audiences: - public diff --git a/fern/docs.yml b/fern/docs.yml index 06c036b9..7c989930 100644 --- a/fern/docs.yml +++ b/fern/docs.yml @@ -156,7 +156,7 @@ redirects: destination: "/docs/overview-rag-connectors" permanent: true - source: "/docs/reranking" - destination: "/docs/overview" + destination: "/docs/rerank-overview" permanent: true - source: "/reference/rerank-1" destination: "/reference/rerank" diff --git a/fern/pages/changelog/2024-12-02-Rerank-v3.5-is-released.mdx b/fern/pages/changelog/2024-12-02-Rerank-v3.5-is-released.mdx new file mode 100644 index 00000000..f83bb398 --- /dev/null +++ b/fern/pages/changelog/2024-12-02-Rerank-v3.5-is-released.mdx @@ -0,0 +1,37 @@ +--- +title: "Announcing Rerank-v3.5" +slug: "changelog/rerank-v3.5" +createdAt: "Mon Dec 2 2024 00:00:00 (MST)" +hidden: false +description: >- + Release announcment for Rerank 3.5 - our new state of the art model for ranking. +--- + +We're pleased to announce the release of [Rerank 3.5](/docs/rerank-2) our newest and most performant foundational model for ranking. Rerank 3.5 has a context length of 4096, SOTA performance on Multilingual Retrieval tasks and Reasoning Capabilities. In addition, Rerank 3.5 has SOTA performance on BEIR and domains such as Finance, E-commerce, Hospitality, Project Management, and Email/Messaging Retrieval tasks. + +In the rest of these release notes, we’ll provide more details about changes to the api. + +## Technical Details + +### API Changes: + +Along with the model, we are releasing V2 of the Rerank API. It includes the following major changes: +- `model` is now a required parameter +- `max_chunks_per_doc` has been replaced by `max_tokens_per_doc`; `max_tokens_per_doc` will determine the maximum amount of tokens a document can have before truncation. The default value for `max_tokens_per_doc` is 4096. +- support for passing a list of objects for the `documents` parameter has been removed - if your documents contain structured data, for best performance we recommend formatting them as [YAML strings](/docs/rerank-overview#example-with-structured-data). + +Example request + +```Text cURL +POST https://api.cohere.ai/v2/rerank +{ + "model": "rerank-v3.5", + "query": "What is the capital of the United States?", + "top_n": 3, + "documents": ["Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."] +} +``` \ No newline at end of file diff --git a/fern/pages/changelog/2024-12-13-command-r-7b-is-here.mdx b/fern/pages/changelog/2024-12-13-command-r-7b-is-here.mdx new file mode 100644 index 00000000..5cb1d00b --- /dev/null +++ b/fern/pages/changelog/2024-12-13-command-r-7b-is-here.mdx @@ -0,0 +1,12 @@ +--- +title: "Announcing Command R7b" +slug: "changelog/command-r-7b" +createdAt: "Fri Dec 13 2024 00:00:00 (MST)" +hidden: false +description: >- + Release announcment for Command R 7B - our fastest, lightest, and last Command R model. +--- + +We're thrilled to announce the release of Command R7B, the smallest, fastest, and final model in our R family of enterprise-focused [large language models](https://docs.cohere.com/docs/introduction-to-large-language-models) (LLMs). With a context window of 128K, Command R7B offers state-of-the-art performance across a variety of real-world tasks, and is designed for use cases in which speed, cost, and compute are important. Specifically, Command R7B is excellent for [retrieval-augmented generation](https://docs.cohere.com/docs/retrieval-augmented-generation-rag), [tool use](https://docs.cohere.com/docs/tool-use), and [agentic applications](https://docs.cohere.com/docs/multi-step-tool-use) where complex reasoning, multiple actions, and information-seeking are important for success. + +Command R7B is available today on the [Cohere Platform](https://docs.cohere.com/docs/the-cohere-platform) as well as accessible on HuggingFace, or you can access it in the SDK with `command-r7b-12-2024`. For more information, check out our [dedicated blog post](cohere.com/blog/command-r7b). \ No newline at end of file diff --git a/fern/pages/cookbooks/rerank-demo.mdx b/fern/pages/cookbooks/rerank-demo.mdx index 35d0d96d..e654dd82 100644 --- a/fern/pages/cookbooks/rerank-demo.mdx +++ b/fern/pages/cookbooks/rerank-demo.mdx @@ -3,7 +3,7 @@ title: Demo of Rerank slug: /page/rerank-demo description: "This page contains a basic tutorial on how Cohere's ReRank models work and how to use them." -image: "../../assets/images/f1cc130-cohere_meta_image.jpg" +image: "../../assets/images/f1cc130-cohere_meta_image.jpg" keywords: "Cohere, ReRank" --- @@ -70,7 +70,7 @@ docs = [ "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.", "West Virginia is a state in the Appalachian region of the United States. Its capital and largest city is Charleston. It is often abbreviated W. Va. or simply WV.", - "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.", + "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.", "North Dakota is a state in the United States. 672,591 people lived in North Dakota in the year 2010. The capital and seat of government is Bismarck.", "Kentucky is a state in the United States. Its capital is Frankfort. It touches the states of Missouri (by the Mississippi River), Illinois, Indiana, Ohio, West Virginia (by the Ohio River), Tennessee and Virginia. There are many rivers in Kentucky", "Micronesia, officially the Federated States of Micronesia, is an island nation in the Pacific Ocean, northeast of Papua New Guinea. The country is a sovereign state in free association with the United States. The capital city of Federated States of Micronesia is Palikir.", @@ -97,7 +97,7 @@ Relevance Score: 1.00 Document Rank: 2, Document Index: 5 -Document: Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment. +Document: Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment. Relevance Score: 0.75 @@ -246,7 +246,7 @@ search(query = "What is the capital of the United States?") ```txt title="Output" Input question: What is the capital of the United States? Top-3 lexical search (BM25) hits - 16.264 Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment. + 16.264 Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment. 15.124 In 1783, it was the capital of the United States for a few months. 14.476 New York was the capital of the United States under the Articles of Confederation from 1785 to 1788. When the US Constitution was made, it stayed as the capital from 1789 until 1790. In 1789, the first President of the United States, George Washington, was inaugurated; the first United States Congress and the Supreme Court of the United States each met for the first time, and the United States Bill of Rights was written, all at Federal Hall on Wall Street. By 1790, New York grew bigger than Philadelphia, so it become the biggest city in the United States. By the end of 1790, because of the Residence Act, Philadelphia became the new capital. diff --git a/fern/pages/deployment-options/cohere-on-microsoft-azure.mdx b/fern/pages/deployment-options/cohere-on-microsoft-azure.mdx index 7dfa1ad5..c469987a 100644 --- a/fern/pages/deployment-options/cohere-on-microsoft-azure.mdx +++ b/fern/pages/deployment-options/cohere-on-microsoft-azure.mdx @@ -13,7 +13,7 @@ updatedAt: "Wed May 01 2024 16:11:36 GMT+0000 (Coordinated Universal Time)" --- In an effort to make our language-model capabilities more widely available, we've partnered with a few major platforms to create hosted versions of our offerings. -In this article, you learn how to use [Azure AI Studio](https://ai.azure.com/) to deploy both the Cohere Command models and the Cohere Embed models on Microsoft's Azure cloud computing platform. +In this article, you learn how to use [Azure AI Foundry](https://ai.azure.com/) to deploy both the Cohere Command models and the Cohere Embed models on Microsoft's Azure cloud computing platform. You can read more about Azure AI Foundry in its documentation[here](https://learn.microsoft.com/en-us/azure/ai-studio/what-is-ai-studio). The following six models are available through Azure AI Studio with pay-as-you-go, token-based billing: @@ -22,7 +22,7 @@ The following six models are available through Azure AI Studio with pay-as-you-g - Embed v3 - English - Embed v3 - Multilingual - Cohere Rerank V3 (English) -- Cohere Rerank V3 (multilingual) +- Cohere Rerank V3 (Multilingual) ## Prerequisites @@ -140,7 +140,7 @@ except urllib.error.HTTPError as error: print(error.read().decode("utf8", "ignore")) ``` -## ReRank +## Rerank We currently exposes the `v1/rerank` endpoint for inference with both Rerank 3 - English and Rerank 3 - Multilingual. For more information on using the APIs, see the [reference](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/deploy-models-cohere-rerank#rerank-api-reference-for-cohere-rerank-models-deployed-as-a-service) section. @@ -199,8 +199,97 @@ response = co.rerank( ) ``` -## A Note on SDKs +## Using the Cohere SDK -You should be aware that it's possible to use the cohere SDK client to consume Azure AI deployments. Here are example notes for [Command](https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/cohere/cohere-cmdR.ipynb) and [Embed](https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/cohere/cohere-embed.ipynb). +You can use the Cohere SDK client to consume Cohere models that are deployed via Azure AI Foundry. This means you can leverage the SDK's features such as RAG, tool use, structured outputs, and more. + +The following are a few examples on how to use the SDK for the different models. + +### Setup +```python PYTHON +# pip install cohere + +import cohere + +# For Command models +co_chat = cohere.Client( + api_key="AZURE_INFERENCE_CREDENTIAL", + base_url="AZURE_MODEL_ENDPOINT", # Example - https://Cohere-command-r-plus-08-2024-xyz.eastus.models.ai.azure.com/ +) + +# For Embed models +co_embed = cohere.Client( + api_key="AZURE_INFERENCE_CREDENTIAL", + base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-embed-v3-multilingual-xyz.eastus.models.ai.azure.com/ +) + +# For Rerank models +co_rerank = cohere.Client( + api_key="AZURE_INFERENCE_CREDENTIAL", + base_url="AZURE_MODEL_ENDPOINT", # Example - hhttps://cohere-rerank-v3-multilingual-xyz.eastus.models.ai.azure.com/ +) +``` + +### Chat +```python PYTHON +message = "I'm joining a new startup called Co1t today. Could you help me write a short introduction message to my teammates." + +response = co_chat.chat(message=message) + +print(response) +``` +### RAG +```python PYTHON +faqs_short = [ + { + "text": "Reimbursing Travel Expenses: Easily manage your travel expenses by submitting them through our finance tool. Approvals are prompt and straightforward." + }, + { + "text": "Health and Wellness Benefits: We care about your well-being and offer gym memberships, on-site yoga classes, and comprehensive health insurance." + }, +] + +query = "Are there fitness-related perks?" + +response = co_chat.chat(message=query, documents=faqs_short) + +print(response) +``` + +### Embed +```python PYTHON +docs = [ + "Joining Slack Channels: You will receive an invite via email. Be sure to join relevant channels to stay informed and engaged.", + "Finding Coffee Spots: For your caffeine fix, head to the break room's coffee machine or cross the street to the café for artisan coffee.", +] + +doc_emb = co_embed.embed( + input_type="search_document", + texts=docs, +).embeddings +``` + +### Rerank +```python PYTHON +faqs_short = [ + { + "text": "Reimbursing Travel Expenses: Easily manage your travel expenses by submitting them through our finance tool. Approvals are prompt and straightforward." + }, + { + "text": "Working from Abroad: Working remotely from another country is possible. Simply coordinate with your manager and ensure your availability during core hours." + }, + { + "text": "Health and Wellness Benefits: We care about your well-being and offer gym memberships, on-site yoga classes, and comprehensive health insurance." + }, +] + +query = "Are there fitness-related perks?" + +results = co_rerank.rerank( + query=query, documents=faqs_short, top_n=2, model="rerank-english-v3.0" +) +``` + +Here are some other examples for [Command](https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/cohere/cohere-cmdR.ipynb) and [Embed](https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/cohere/cohere-embed.ipynb). The important thing to understand is that our new and existing customers can call the models from Azure while still leveraging their integration with the Cohere SDK. diff --git a/fern/pages/deployment-options/cohere-works-everywhere.mdx b/fern/pages/deployment-options/cohere-works-everywhere.mdx index 1a96466c..205a794b 100644 --- a/fern/pages/deployment-options/cohere-works-everywhere.mdx +++ b/fern/pages/deployment-options/cohere-works-everywhere.mdx @@ -22,10 +22,10 @@ The table below summarizes the environments in which Cohere models can be deploy | sdk | [Cohere platform](/reference/about) | [Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere.html) | Sagemaker | Azure | OCI | Private Deployment | | ------------------------------------------------------------ | ---------------------------------------------------------- | -------------------------------------------------------------------------------------------- | ------------------------------- | --------------------------- | -------------------------- | ------------------------------ | -| [Typescript](https://github.com/cohere-ai/cohere-typescript) | [✅ docs](#cohere-platform) | [✅ docs](#bedrock) | [✅ docs](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#cohere-platform) | -| [Python](https://github.com/cohere-ai/cohere-python) | [✅ docs](#cohere-platform) | [✅ docs](#bedrock) | [✅ docs](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#cohere-platform) | -| [Go](https://github.com/cohere-ai/cohere-go) | [✅ docs](#cohere-platform) | [🟠 soon](#bedrock) | [🟠 soon](#sagemaker) | [✅ docs](#azure) | [🟠 soon](#) | [✅ docs](#cohere-platform) | -| [Java](https://github.com/cohere-ai/cohere-java) | [✅ docs](#cohere-platform) | [🟠 soon](#bedrock) | [🟠 soon](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#cohere-platform) | +| [Typescript](https://github.com/cohere-ai/cohere-typescript) | [✅ docs](#cohere-platform) | [✅ docs](#bedrock) | [✅ docs](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#private-deployment) | +| [Python](https://github.com/cohere-ai/cohere-python) | [✅ docs](#cohere-platform) | [✅ docs](#bedrock) | [✅ docs](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#private-deployment) | +| [Go](https://github.com/cohere-ai/cohere-go) | [✅ docs](#cohere-platform) | [🟠 soon](#bedrock) | [🟠 soon](#sagemaker) | [✅ docs](#azure) | [🟠 soon](#) | [✅ docs](#private-deployment) | +| [Java](https://github.com/cohere-ai/cohere-java) | [✅ docs](#cohere-platform) | [🟠 soon](#bedrock) | [🟠 soon](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#private-deployment) | ## Feature support @@ -161,6 +161,125 @@ public class ChatPost { ``` +#### Private Deployment + + +```typescript TS +const { CohereClient } = require('cohere-ai'); + +const cohere = new CohereClient({ + token: '', + base_url='' +}); + +(async () => { + const response = await cohere.chat({ + chatHistory: [ + { role: 'USER', message: 'Who discovered gravity?' }, + { + role: 'CHATBOT', + message: 'The man who is widely credited with discovering gravity is Sir Isaac Newton', + }, + ], + message: 'What year was he born?', + // perform web search before answering the question. You can also use your own custom connector. + connectors: [{ id: 'web-search' }], + }); + + console.log(response); +})(); +``` +```python PYTHON +import cohere + +co = cohere.Client(api_key="", + base_url="") + +response = co.chat( + chat_history=[ + {"role": "USER", "message": "Who discovered gravity?"}, + { + "role": "CHATBOT", + "message": "The man who is widely credited with discovering gravity is Sir Isaac Newton", + }, + ], + message="What year was he born?", + # perform web search before answering the question. You can also use your own custom connector. + connectors=[{"id": "web-search"}], +) + +print(response) +``` +```go GO +package main + +import ( + "context" + "log" + + cohere "github.com/cohere-ai/cohere-go/v2" + client "github.com/cohere-ai/cohere-go/v2/client" +) + +func main() { + co := client.NewClient( + client.WithBaseURL(""), + ) + + resp, err := co.Chat( + context.TODO(), + &cohere.ChatRequest{ + ChatHistory: []*cohere.ChatMessage{ + { + Role: cohere.ChatMessageRoleUser, + Message: "Who discovered gravity?", + }, + { + Role: cohere.ChatMessageRoleChatbot, + Message: "The man who is widely credited with discovering gravity is Sir Isaac Newton", + }}, + Message: "What year was he born?", + Connectors: []*cohere.ChatConnector{ + {Id: "web-search"}, + }, + }, + ) + + if err != nil { + log.Fatal(err) + } + + log.Printf("%+v", resp) +} +``` +```java JAVA +import com.cohere.api.Cohere; +import com.cohere.api.requests.ChatRequest; +import com.cohere.api.types.ChatMessage; +import com.cohere.api.types.Message; +import com.cohere.api.types.NonStreamedChatResponse; + +import java.util.List; + + +public class ChatPost { + public static void main(String[] args) { + Cohere cohere = Cohere.builder().token("Your API key").clientName("snippet").build(); + Cohere cohere = Cohere.builder().environment(Environment.custom("")).clientName("snippet").build(); + + NonStreamedChatResponse response = cohere.chat( + ChatRequest.builder() + .message("What year was he born?") + .chatHistory( + List.of(Message.user(ChatMessage.builder().message("Who discovered gravity?").build()), + Message.chatbot(ChatMessage.builder().message("The man who is widely credited with discovering gravity is Sir Isaac Newton").build()))).build()); + + System.out.println(response); + } +} +``` + + #### Bedrock diff --git a/fern/pages/fine-tuning/rerank-fine-tuning/rerank-understanding-the-results.mdx b/fern/pages/fine-tuning/rerank-fine-tuning/rerank-understanding-the-results.mdx index a47305d9..8a7930ab 100644 --- a/fern/pages/fine-tuning/rerank-fine-tuning/rerank-understanding-the-results.mdx +++ b/fern/pages/fine-tuning/rerank-fine-tuning/rerank-understanding-the-results.mdx @@ -34,7 +34,7 @@ MRR stands for [Mean Reciprocal Rank](https://en.wikipedia.org/wiki/Mean_recipr | QUERY | PASSAGES / DOCUMENTS | FIRST RELEVANT RESPONSE | RANK | RECIPROCAL RANK | | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---- | --------------- | | When was George Washington born? | `{George Washington was born at Popes Creek in Westmoreland County, in the British colony of Virginia.,Washington, D.C., formally the District of Columbia and commonly called Washington or D.C., is the capital city of the United States., George Washington was born in 1732.}` | `George Washington was born in 1732.` | 3 | `1/3` | -| What is the capital of the United States? | `{Capital punishment (the death penalty) has existed in the United States since before the United States was a country. ,Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States.}` | `Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States` | 2 | `1/2` | +| What is the capital of the United States? | `{Capital punishment has existed in the United States since before the United States was a country. ,Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States.}` | `Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States` | 2 | `1/2` | Given these two samples we could calculate the mean reciprocal rank as `((1/3)+(1/2))/2=5/12`or 0.42. diff --git a/fern/pages/going-to-production/deprecations.mdx b/fern/pages/going-to-production/deprecations.mdx new file mode 100644 index 00000000..c3583486 --- /dev/null +++ b/fern/pages/going-to-production/deprecations.mdx @@ -0,0 +1,44 @@ +--- +title: Deprecations +slug: docs/deprecations +hidden: false +description: >- + Learn about Cohere's deprecation policies and recommended replacements +image: ../../assets/images/4f186df-cohere_docs_preview_image_1200x630_copy.jpg +keywords: 'Cohere API, large language models, generative AI' +createdAt: 'Wed Nov 27 2024 00:00:00 GMT+0000 (Coordinated Universal Time)' +updatedAt: 'Wed Nov 27 2024 00:00:00 GMT+0000 (Coordinated Universal Time)' +--- +Find information around deprecated endpoints and models with their recommended replacements. + +## Overview +As Cohere launches safer and more capable models, we will regularly retire old models. Applications relying on Cohere's models may need occasional updates to keep working. Impacted customers will always be notified via email and in our documentation along with blog posts. +This page lists all API deprecations, along with recommended replacements. + +Cohere uses the following terms to describe the lifecycle of our models: +- **Active:** The model and endpoint are fully supported and recommended for use. +- **Legacy:** The model and endpoints will no longer receive updates and may be deprecated in the future. +- **Deprecated:** The model and endpoints are no longer available to new customers but remain available to existing users until retirement. (An existing user is defined as anyone who has used the model or endpoint within 90 days of the deprecation announcement.) A shutdown date will be assigned at that time. +- **Shutdown:** The model and endpoint are no longer available for users. Requests to shutdown models and endpoints will fail. + +## Migrating to replacements +Once a model is deprecated, it is imperative to migrate all usage to a suitable replacement before the shutdown date. Requests to models and endpoints past the shutdown date will fail. +To ensure a smooth transition, we recommend thorough testing of your applications with the new models well before the shutdown date. If your team requires assistance, do not hesitate to reach out to support@cohere.ai. + +## Deprecation History +All deprecations are listed below with the most recent announcements at the top. + +### 2024-12-02: Rerank v2.0 +On December 2nd, 2024, we announced the release of Rerank-v3.5 along with the deprecation of the Rerank-v2.0 model family. +Fine-tuned models created from these base models are not affected by this deprecation. + +| Shutdown Date| Deprecated Model| Deprecated Model Price| Recommended Replacement| +|--------------|-----------------|-----------------------|------------------------| +| 2025-03-31 | `rerank-english-v2.0` | $1.00 / 1K searches | `rerank-v3.5`| +| 2025-03-31 | `rerank-multilingual-v2.0` | $1.00 / 1K searches | `rerank-v3.5`| + +# Best Practices: +1. Regularly check our documentation for updates on announcements regarding the status of models. +2. Test applications with newer models well before the shutdown date of your current model. +3. Update any production code to use an active model as soon as possible. +4. Contact support@cohere.ai if you need any assistance with migration or have any questions. \ No newline at end of file diff --git a/fern/pages/going-to-production/rate-limits.mdx b/fern/pages/going-to-production/rate-limits.mdx index 052bb678..9aaef7a1 100644 --- a/fern/pages/going-to-production/rate-limits.mdx +++ b/fern/pages/going-to-production/rate-limits.mdx @@ -17,7 +17,7 @@ Cohere offers two kinds of API keys: evaluation keys (free but limited in usage) | ------------------------------------------ | --------------------- | --------------------- | | [Chat](/reference/chat) | 20/min | 500/min | | [Embed](/reference/embed) | 100/min | 2,000/min | -| [Embed (Images)](/reference/embed) | 5/min | 40/min | +| [Embed (Images)](/reference/embed) | 5/min | 400/min | | [Rerank](/reference/rerank) | 10/min | 1,000/min | | [Tokenize](/reference/tokenize) | 100/min | 2,000/min | | [Classify](/reference/classify) | 100/min | 1000/min | diff --git a/fern/pages/integrations/cohere-and-langchain/chat-on-langchain.mdx b/fern/pages/integrations/cohere-and-langchain/chat-on-langchain.mdx index cadd73c1..651717bb 100644 --- a/fern/pages/integrations/cohere-and-langchain/chat-on-langchain.mdx +++ b/fern/pages/integrations/cohere-and-langchain/chat-on-langchain.mdx @@ -283,4 +283,14 @@ llm = ChatCohere(cohere_api_key="COHERE_API_KEY", chain = load_summarize_chain(llm, chain_type="stuff") chain.invoke({"input_documents": docs}) +``` + +### Using LangChain on Private Deployments + +You can use LangChain with privately deployed Cohere models. To use it, specify your model deployment URL in the `base_url` parameter. + +```python PYTHON +llm = ChatCohere(base_url=, + cohere_api_key="COHERE_API_KEY", + model="MODEL_NAME") ``` \ No newline at end of file diff --git a/fern/pages/integrations/cohere-and-langchain/embed-on-langchain.mdx b/fern/pages/integrations/cohere-and-langchain/embed-on-langchain.mdx index c341107f..47d2e82d 100644 --- a/fern/pages/integrations/cohere-and-langchain/embed-on-langchain.mdx +++ b/fern/pages/integrations/cohere-and-langchain/embed-on-langchain.mdx @@ -111,3 +111,12 @@ embeddings = BedrockEmbeddings( embeddings.embed_query("This is a content of the document") ``` +### Using LangChain on Private Deployments + +You can use LangChain with privately deployed Cohere models. To use it, specify your model deployment URL in the `base_url` parameter. + +```python PYTHON +llm = CohereEmbeddings(base_url=, + cohere_api_key="COHERE_API_KEY", + model="MODEL_NAME") +``` \ No newline at end of file diff --git a/fern/pages/integrations/cohere-and-langchain/rerank-on-langchain.mdx b/fern/pages/integrations/cohere-and-langchain/rerank-on-langchain.mdx index c62a7eb9..f6efd02f 100644 --- a/fern/pages/integrations/cohere-and-langchain/rerank-on-langchain.mdx +++ b/fern/pages/integrations/cohere-and-langchain/rerank-on-langchain.mdx @@ -84,3 +84,13 @@ citations = docs[-1].metadata['citations'] print("Citations:") print(citations) ``` + +### Using LangChain on Private Deployments + +You can use LangChain with privately deployed Cohere models. To use it, specify your model deployment URL in the `base_url` parameter. + +```python PYTHON +llm = CohereRerank(base_url=, + cohere_api_key="COHERE_API_KEY", + model="MODEL_NAME") +``` \ No newline at end of file diff --git a/fern/pages/models/cohere-embed.mdx b/fern/pages/models/cohere-embed.mdx index 9d95cdb2..a37f3cb4 100644 --- a/fern/pages/models/cohere-embed.mdx +++ b/fern/pages/models/cohere-embed.mdx @@ -1,5 +1,5 @@ --- -title: Embed Model +title: Cohere's Embed Models (Details and Application) slug: docs/cohere-embed hidden: false description: >- diff --git a/fern/pages/models/models.mdx b/fern/pages/models/models.mdx index 6407c467..aa53eb7c 100644 --- a/fern/pages/models/models.mdx +++ b/fern/pages/models/models.mdx @@ -1,5 +1,5 @@ --- -title: "Models Overview" +title: An Overview of Cohere's Models slug: "docs/models" hidden: false @@ -39,6 +39,7 @@ Command is Cohere's default generation model that takes a user instruction (or c | Model Name | Description | Modality | Context Length | Maximum Output Tokens | Endpoints | |--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|----------------|-----------------------|-------------------------------------------------------------------------------------------| +| `command-r7b-12-2024` | `command-r7b-12-2024` is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps. | Text | 128k | 4k | [Chat](/reference/chat)| | `command-r-plus-08-2024` | `command-r-plus-08-2024` is an update of the Command R+ model, delivered in August 2024. Find more information [here](https://docs.cohere.com/changelog/command-gets-refreshed) | Text | 128k | 4k | [Chat](/reference/chat) | | `command-r-plus-04-2024` | Command R+ is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use. | Text | 128k | 4k | [Chat](/reference/chat) | | `command-r-plus` | `command-r-plus` is an alias for `command-r-plus-04-2024`, so if you use `command-r-plus` in the API, that's the model you're pointing to. | Text | 128k | 4k | [Chat](/reference/chat) | @@ -59,6 +60,7 @@ In this table, we provide some important context for using Cohere Command models | Model Name | Amazon Bedrock Model ID | Amazon SageMaker | Azure AI Studio Model ID | Oracle OCI Generative AI Service | | :---------------------- | :------------------------------ | :-------------------- | :----------------------- | :------------------------------- | +| `command-r7b-12-2024` | (Coming soon) | (Coming soon) | (Coming soon) | (Coming soon) | | `command-r-plus` | `cohere.command-r-plus-v1:0` | Unique per deployment | Unique per deployment | `cohere.command-r-plus v1.2` | | `command-r` | `cohere.command-r-v1:0` | Unique per deployment | Unique per deployment | `cohere.command-r-16k v1.2` | | `command` | `cohere.command-text-v14` | N/A | N/A | `cohere.command v15.6` | @@ -106,11 +108,10 @@ The Rerank model can improve created models by re-organizing their results based | Model Name | Description | Modalities | Context Length | Endpoints | | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------- | ---------------|---------------------------- | +| `rerank-v3.5` | A model that allows for re-ranking English Language documents and semi-structured data (JSON). This model has a context length of 4096 tokens. | Text | 4k | [Rerank](/reference/rerank) | | `rerank-english-v3.0` | A model that allows for re-ranking English Language documents and semi-structured data (JSON). This model has a context length of 4096 tokens. | Text | 4k | [Rerank](/reference/rerank) | | `rerank-multilingual-v3.0` | A model for documents and semi-structure data (JSON) that are not in English. Supports the same languages as embed-multilingual-v3.0. This model has a context length of 4096 tokens. | Text | 4k | [Rerank](/reference/rerank) | -| | | | | | -| `rerank-english-v2.0` | A model that allows for re-ranking English language documents. | Text | 512 | [Rerank](/reference/rerank) | -| `rerank-multilingual-v2.0` | A model for documents that are not in English. Supports the same languages as `embed-multilingual-v3.0`. | Text | 512 | [Rerank](/reference/rerank) | + ### Using Rerank Models on Different Platforms @@ -118,10 +119,10 @@ In this table, we provide some important context for using Cohere Rerank models | Model Name | Amazon Bedrock Model ID | Amazon SageMaker | Azure AI Studio Model ID | Oracle OCI Generative AI Service | | :------------------------- | :---------------------- | :-------------------- | :----------------------- | :------------------------------- | -| `rerank-english-v3.0` | Not yet available | Unique per deployment | Not yet available | N/A | -| `rerank-multilingual-v3.0` | Not yet available | Unique per deployment | Not yet available | N/A | -| `rerank-english-v2.0` | N/A | N/A | N/A | N/A | -| `rerank-multilingual-v2.0` | N/A | N/A | N/A | N/A | +| `rerank-v3.5` | cohere.rerank-v3-5:0 | Unique per deployment | Not yet available | N/A | +| `rerank-english-v3.0` | N/A | Unique per deployment | Not yet available | N/A | +| `rerank-multilingual-v3.0` | N/A | Unique per deployment | Not yet available | N/A | +
diff --git a/fern/pages/models/rerank-2.mdx b/fern/pages/models/rerank-2.mdx index 069869dc..9ae174a6 100644 --- a/fern/pages/models/rerank-2.mdx +++ b/fern/pages/models/rerank-2.mdx @@ -1,10 +1,10 @@ --- -title: "Rerank Model" +title: Cohere's Rerank Model (Details and Application) slug: "docs/rerank-2" hidden: false -description: "This page describes how Cohere's ReRank models work and how to use them." +description: "This page describes how Cohere's Rerank models work and how to use them." image: "../../assets/images/f1cc130-cohere_meta_image.jpg" keywords: "Cohere, language models, rerank models" @@ -13,12 +13,12 @@ updatedAt: "Mon Apr 08 2024 17:42:11 GMT+0000 (Coordinated Universal Time)" --- Rerank models sort text inputs by semantic relevance to a specified query. They are often used to sort search results returned from an existing search solution. Learn more about using Rerank in the [best practices guide](/docs/reranking-best-practices). -| Latest Model | Description | Modality | Max Tokens | Endpoints | -| -------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ---------|------------|-------------------| -| `rerank-english-v3.0` | A model that allows for re-ranking English Language documents and semi-structured data (JSON). This model has a context length of 4096 tokens. | Text | N/A | [Rerank](/reference/rerank) | -| `rerank-multilingual-v3.0` | A model for documents and semi-structure data (JSON) that are not in English. Supports the same languages as `embed-multilingual-v3.0`. This model has a context length of 4096 tokens.| Text | N/A | [Rerank](/reference/rerank) | -| `rerank-english-v2.0` | A model that allows for re-ranking English language documents. This model has a context length of 512 tokens. | Text | N/A | [Rerank](/reference/rerank) | -| `rerank-multilingual-v2.0` | A model for documents that are not in English. Supports the same languages as `embed-multilingual-v3.0`. This model has a context length of 512 tokens. | Text | N/A | [Rerank](/reference/rerank) | +| Latest Model | Description | Modality | Endpoints | +| -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ---------|-----------------------------| +| `rerank-v3.5` | A model for documents and semi-structured data (JSON). State-of-the-art performance in English and non-English languages; supports the same languages as embed-multilingual-v3.0. This model has a context length of 4096 tokens| Text | [Rerank](/reference/rerank) | +| `rerank-english-v3.0` | A model that allows for re-ranking English Language documents and semi-structured data (JSON). This model has a context length of 4096 tokens. | Text | [Rerank](/reference/rerank) | +| `rerank-multilingual-v3.0` | A model for documents and semi-structure data (JSON) that are not in English. Supports the same languages as `embed-multilingual-v3.0`. This model has a context length of 4096 tokens. | Text | [Rerank](/reference/rerank) | + For each document included in a request, Rerank combines the tokens from the query with the tokens from the document and the combined total counts toward the context limit for a single document. If the combined number of tokens from the query and a given document exceeds the model’s context length for a single document, the document will automatically get chunked and processed in multiple inferences. See our [best practice guide](/docs/reranking-best-practices) for more info about formatting documents for the Rerank endpoint. diff --git a/fern/pages/models/the-command-family-of-models/command-r-plus.mdx b/fern/pages/models/the-command-family-of-models/command-r-plus.mdx index 6f59167f..575faa44 100644 --- a/fern/pages/models/the-command-family-of-models/command-r-plus.mdx +++ b/fern/pages/models/the-command-family-of-models/command-r-plus.mdx @@ -21,10 +21,20 @@ For information on toxicity, safety, and using this model responsibly check out ### Model Details | Model Name | Description | Modality | Context Length | Maximum Output Tokens | Endpoints | |--------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------|----------------|-----------------------|------------------------| +| `command-r7b-12-2024` | `command-r7b-12-2024` is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps. | Text | 128k | 4k | [Chat](/reference/chat)| | `command-r-plus-08-2024` | `command-r-plus-08-2024` is an update of the Command R+ model, delivered in August 2024. | Text | 128k | 4k | [Chat](/reference/chat)| | `command-r-plus-04-2024` | Command R+ is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use. | Text | 128k | 4k | [Chat](/reference/chat)| | `command-r-plus` | `command-r-plus` is an alias for `command-r-plus-04-2024`, so if you use `command-r-plus` in the API, that's the model you're pointing to. | Text | 128k | 4k | [Chat](/reference/chat)| +## Command R7B December 2024 Release +Command R7B is the smallest, fastest, and final model in our R family of enterprise-focused [large language models](https://docs.cohere.com/docs/introduction-to-large-language-models) (LLMs). With a context window of 128K, Command R7B offers state-of-the-art performance across a variety of real-world tasks, and is designed for use cases in which speed, cost, and compute are important. Specifically, Command R7B is excellent for: + +- RAG - [Retrieval Augmented Generation](https://docs.cohere.com/docs/retrieval-augmented-generation-rag) (RAG) refers to the practice of ‘grounding’ model outputs in external data sources, which can increase accuracy. Command R7B is exceptionally good at generating responses in conversational tasks, attending over long inputs, and extracting and manipulating numerical information in financial settings. +- Tool-use - With [tool use](https://docs.cohere.com/docs/tool-use), Command models can be given tools such as search engines, APIs, vector databases, etc., which can expand their baseline functionality. Command R7B excels at tool use, exhibiting particular strength in using tools in real-world, diverse, and dynamic environments. In addition, Command R7B is good at avoiding unnecessarily calling tools, which is an important aspect of tool-use in practical applications. +- Agents - As this is being written, [agents](https://docs.cohere.com/docs/multi-step-tool-use) are among the most exciting frontiers for large language models. Command R7B’s multistep tool use capabilities allow it to power fast and capable REACT agents. When set up as an internet-augmented research agent, for example, Command R7B ably completes tasks that require breaking down complex questions into subgoals, and also performs favorably in domains that utilize complex reasoning and active information seeking. + +Command R7B is available today on the Cohere Platform as well as accessible on HuggingFace, or you can access it in the SDK with `command-r7b-12-2024`. For more information, check out our [dedicated blog post](cohere.com/blog/command-r7b). + ## Command R+ August 2024 Release Cohere's flagship text-generation models, Command R and Command R+, received a substantial update in August 2024. We chose to designate these models with time stamps, so in the API Command R+ 08-2024 is accesible with `command-r-plus-08-2024`. diff --git a/fern/pages/models/the-command-family-of-models/command-r.mdx b/fern/pages/models/the-command-family-of-models/command-r.mdx index 6e35d2ba..434e1015 100644 --- a/fern/pages/models/the-command-family-of-models/command-r.mdx +++ b/fern/pages/models/the-command-family-of-models/command-r.mdx @@ -22,10 +22,20 @@ For information on toxicity, safety, and using this model responsibly check out ### Model Details | Model Name | Description | Modality | Context Length | Maximum Output Tokens | Endpoints| |--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|----------------|-----------------------|----------| +| `command-r7b-12-2024` | `command-r7b-12-2024` is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps. | Text | 128k | 4k | [Chat](/reference/chat) | | `command-r-08-2024` | `command-r-08-2024` is an update of the Command R model, delivered in August 2024. | Text | 128k | 4k | [Chat](/reference/chat) | | | `command-r-03-2024` | Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents. | Text | 128k | 4k | [Chat](/reference/chat) | | | `command-r` | `command-r` is an alias for `command-r-03-2024`, so if you use `command-r` in the API, that's the model you're pointing to. | Text | 128k | 4k | [Chat](/reference/chat) | | +## Command R7B December 2024 Release +Command R7B is the smallest, fastest, and final model in our R family of enterprise-focused [large language models](https://docs.cohere.com/docs/introduction-to-large-language-models) (LLMs). With a context window of 128K, Command R7B offers state-of-the-art performance across a variety of real-world tasks, and is designed for use cases in which speed, cost, and compute are important. Specifically, Command R7B is excellent for: + +- RAG - [Retrieval Augmented Generation](https://docs.cohere.com/docs/retrieval-augmented-generation-rag) (RAG) refers to the practice of ‘grounding’ model outputs in external data sources, which can increase accuracy. Command R7B is exceptionally good at generating responses in conversational tasks, attending over long inputs, and extracting and manipulating numerical information in financial settings. +- Tool-use - With [tool use](https://docs.cohere.com/docs/tool-use), Command models can be given tools such as search engines, APIs, vector databases, etc., which can expand their baseline functionality. Command R7B excels at tool use, exhibiting particular strength in using tools in real-world, diverse, and dynamic environments. In addition, Command R7B is good at avoiding unnecessarily calling tools, which is an important aspect of tool-use in practical applications. +- Agents - As this is being written, [agents](https://docs.cohere.com/docs/multi-step-tool-use) are among the most exciting frontiers for large language models. Command R7B’s multistep tool use capabilities allow it to power fast and capable REACT agents. When set up as an internet-augmented research agent, for example, Command R7B ably completes tasks that require breaking down complex questions into subgoals, and also performs favorably in domains that utilize complex reasoning and active information seeking. + +Command R7B is available today on the Cohere Platform as well as accessible on HuggingFace, or you can access it in the SDK with `command-r7b-12-2024`. For more information, check out our [dedicated blog post](cohere.com/blog/command-r7b). + ## Command R August 2024 Release Cohere's flagship text-generation models, Command R and Command R+, received a substantial update in August 2024. We chose to designate these models with time stamps, so in the API Command R 08-2024 is accesible with `command-r-08-2024`. diff --git a/fern/pages/responsible-use/responsible-use.mdx b/fern/pages/responsible-use/responsible-use.mdx index c18e0c88..0edf882c 100644 --- a/fern/pages/responsible-use/responsible-use.mdx +++ b/fern/pages/responsible-use/responsible-use.mdx @@ -2,10 +2,10 @@ title: "Command R and Command R+ Model Card" slug: "docs/responsible-use" -hidden: false -description: This doc provides guidelines for using Cohere generation models ethically and constructively. +hidden: false +description: This doc provides guidelines for using Cohere generation models ethically and constructively. -image: "../../assets/images/5d25315-cohere_docs_preview_image_1200x630_copy.jpg" +image: "../../assets/images/5d25315-cohere_docs_preview_image_1200x630_copy.jpg" keywords: "AI safety, AI risk, responsible AI" createdAt: "Thu Sep 01 2022 19:22:12 GMT+0000 (Coordinated Universal Time)" @@ -15,22 +15,22 @@ This documentation aims to guide developers in using language models constructiv [NOTE: This page was updated on October 31st, 2024.] -## Safety Benchmarks +## Safety Benchmarks -The safety of our Command R and Command R+ models has been evaluated on the BOLD (Biases in Open-ended Language Generation) dataset (Dhamala et al, 2021), which contains nearly 24,000 prompts testing for biases based on profession, gender, race, religion, and political ideology. +The safety of our Command R and Command R+ models has been evaluated on the BOLD (Biases in Open-ended Language Generation) dataset (Dhamala et al, 2021), which contains nearly 24,000 prompts testing for biases based on profession, gender, race, religion, and political ideology. -Overall, both models show a lack of bias, with generations that are very rarely toxic. That said, there remain some differences in bias between the two, as measured by their respective sentiment and regard for "Gender" and "Religion" categories. Command R+, the more powerful model, tends to display slightly less bias than Command R. +Overall, both models show a lack of bias, with generations that are very rarely toxic. That said, there remain some differences in bias between the two, as measured by their respective sentiment and regard for "Gender" and "Religion" categories. Command R+, the more powerful model, tends to display slightly less bias than Command R. -Below, we report differences in privileged vs. minoritised groups for gender, race, and religion. +Below, we report differences in privileged vs. minoritised groups for gender, race, and religion. ![](../../assets/images/responsible_use_1.png) -## Intended Use Cases +## Intended Use Cases Command R models are trained for sophisticated text generation—which can include natural text, summarization, code, and markdown—as well as to support complex [Retrieval Augmented Generation](https://docs.cohere.com/docs/retrieval-augmented-generation-rag) (RAG) and [tool-use](https://docs.cohere.com/docs/tool-use) tasks. Command R models support 23 languages, including 10 languages that are key to global business (English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Chinese, Arabic). While it has strong performance on these ten languages, the other 13 are lower-resource and less rigorously evaluated. -## Unintended and Prohibited Use Cases +## Unintended and Prohibited Use Cases We do not recommend using the Command R models on their own for decisions that could have a significant impact on individuals, including those related to access to financial services, employment, and housing. Cohere’s [Usage Guidelines](https://cohere.com/responsibility) and customer agreements contain details about prohibited use cases, like social scoring, inciting violence or harm, and misinformation or other political manipulation. @@ -52,15 +52,15 @@ We have put safeguards in place to avoid generating harmful text, and while they Language models capture problematic associations and stereotypes that are prominent on the internet and society at large. They should not be used to make decisions about individuals or the groups they belong to. For example, it can be dangerous to use Generation model outputs in CV ranking systems due to known biases (Nadeem et al., 2020). ## Technical Notes -Now, we'll discuss some details of our underlying models that should be kept in mind. +Now, we'll discuss some details of our underlying models that should be kept in mind. -### Language Limitations +### Language Limitations This model is designed to excel at English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Chinese, and Arabic, and to generate in 13 other languages well. It will sometimes respond in other languages, but the generations are unlikely to be reliable. -### Sampling Parameters +### Sampling Parameters A model's generation quality is highly dependent on its sampling parameters. Please consult [the documentation](https://docs.cohere.com/docs/advanced-generation-hyperparameters) for details about each parameter and tune the values used for your application. Parameters may require re-tuning upon a new model release. -### Prompt Engineering +### Prompt Engineering Performance quality on generation tasks may increase when examples are provided as part of the system prompt. See [the documentation](https://docs.cohere.com/docs/crafting-effective-prompts) for examples on how to do this. @@ -73,4 +73,4 @@ The examples in this section are not comprehensive; they are meant to be more mo by members of the public, on social media or any other channel. - **Generation of misinformation and other harmful content:** The generation of news or other articles which manipulate public opinion, or any content which aims to incite hate or mischaracterize a group of people. -- **Human-outside-the-loop:** The generation of text that could be used to make important decisions about people, without a human-in-the-loop. \ No newline at end of file +- **Human-outside-the-loop:** The generation of text that could be used to make important decisions about people, without a human-in-the-loop. \ No newline at end of file diff --git a/fern/pages/responsible-use/responsible-use/usage-guidelines.mdx b/fern/pages/responsible-use/responsible-use/usage-guidelines.mdx index 85d96a78..0aef38b0 100644 --- a/fern/pages/responsible-use/responsible-use/usage-guidelines.mdx +++ b/fern/pages/responsible-use/responsible-use/usage-guidelines.mdx @@ -8,7 +8,7 @@ image: "../../../assets/images/da0a0ac-cohere_docs_preview_image_1200x630_copy.j keywords: "Cohere API" createdAt: "Thu Sep 01 2022 19:24:15 GMT+0000 (Coordinated Universal Time)" -updatedAt: "Thu Nov 21 2024 09::48 GMT+0000 (Coordinated Universal Time)" +updatedAt: "Fr Nov 29 2024 09::48 GMT+0000 (Coordinated Universal Time)" --- (This document was updated on 11/21/2024) @@ -43,4 +43,4 @@ You must ensure your Customer Application complies with the Universal Requiremen If your Customer Application is public-facing and interacts with human users (including consumers), like chatbots and interactive AI agents, you must: (1) disclose to the users that they are interacting with an AI system rather than a human; and (2) if the Customer Application interacts with minors, comply with any specific child safety regulations and implement appropriate additional safety controls such as age verification and content moderation. ## Research Exceptions -Cohere encourages responsible security and safety research. Limited exceptions to our Usage Policy are possible for research purposes if specifically authorized by us or permitted in accordance with our Responsible Disclosure Policy applicable to security research. For safety-related research that falls outside the scope of our Responsible Disclosure Policy or to report a model safety issue, please contact safety@cohere.com. +Cohere encourages responsible security and safety research. Limited exceptions to our Usage Policy are possible for research purposes if specifically authorized by us or permitted in accordance with our Responsible Disclosure Policy applicable to security research. For safety-related research that falls outside the scope of our [Responsible Disclosure Policy](https://trustcenter.cohere.com/) or to report a model safety issue, please contact safety@cohere.com. diff --git a/fern/pages/text-embeddings/reranking/overview.mdx b/fern/pages/text-embeddings/reranking/overview.mdx index 89523f60..b119737f 100644 --- a/fern/pages/text-embeddings/reranking/overview.mdx +++ b/fern/pages/text-embeddings/reranking/overview.mdx @@ -1,11 +1,11 @@ --- title: "Rerank Overview" -slug: "docs/overview" +slug: "docs/rerank-overview" hidden: false -description: "This page describes how Cohere's ReRank models work." -image: "../../../assets/images/f1cc130-cohere_meta_image.jpg" +description: "This page describes how Cohere's Rerank models work." +image: "../../../assets/images/f1cc130-cohere_meta_image.jpg" keywords: "Cohere, reranking models, large language models" createdAt: "Thu May 23 2024 04:39:27 GMT+0000 (Coordinated Universal Time)" @@ -13,13 +13,13 @@ updatedAt: "Thu May 30 2024 15:15:29 GMT+0000 (Coordinated Universal Time)" --- ## How Rerank Works -The [Rerank API endpoint](/reference/rerank-1), powered by the [Rerank models](/docs/rerank-2), is a simple and very powerful tool for semantic search. Given a `query` and a list of `documents`, Rerank indexes the documents from most to least semantically relevant to the query. +The [Rerank API endpoint](/reference/rerank-1), powered by the [Rerank models](/docs/rerank-2), is a simple and very powerful tool for semantic search. Given a `query` and a list of `documents`, Rerank indexes the documents from most to least semantically relevant to the query. ## Get Started ### Example with Texts -In the example below, we use the [Rerank API endpoint](/reference/rerank-1) to index the list of `docs` from most to least relevant to the query ` What is the capital of the United States?`. +In the example below, we use the [Rerank API endpoint](/reference/rerank-1) to index the list of `documents` from most to least relevant to the query ` What is the capital of the United States?`. **Request** @@ -35,8 +35,8 @@ docs = [ "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.", "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.", - "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."] -results = co.rerank(model="rerank-english-v3.0", query=query, documents=docs, top_n=5, return_documents=True) + "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."] +results = co.rerank(model="rerank-v3.5", query=query, documents=docs, top_n=5, return_documents=True) ``` **Response** @@ -54,7 +54,7 @@ results = co.rerank(model="rerank-english-v3.0", query=query, documents=docs, to }, { "document": { - "text": "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment." + "text": "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment." }, "index": 4, "relevance_score": 0.7516481 @@ -102,77 +102,108 @@ Alternatively, you can pass in a JSON object and specify the fields you'd like t ```python PYTHON query = "What is the capital of the United States?" docs = [ - {"Title":"Facts about Carson City","Content":"Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274."}, - {"Title":"The Commonwealth of Northern Mariana Islands","Content":"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan."}, - {"Title":"The Capital of United States Virgin Islands","Content":"Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas."}, - {"Title":"Washington D.C.","Content":"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America."}, - {"Title":"Capital Punishment in the US","Content":"Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."}] -results = co.rerank(model="rerank-english-v3.0", query=query, documents=docs, rank_fields=['Title','Content'],top_n=5, return_documents=True) + {"Title": "Facts about Carson City","Content": "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274."}, + {"Title": "The Commonwealth of Northern Mariana Islands","Content": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan."}, + {"Title": "The Capital of United States Virgin Islands","Content": "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas."}, + {"Title": "Washington D.C.","Content":"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America."}, + {"Title": "Capital Punishment in the US","Content": "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."} + ] +results = co.rerank(model="rerank-v3.5", query=query, documents=docs, rank_fields=['Title','Content'],top_n=5, return_documents=True) ``` -In the `docs` parameter, we are passing in a list of objects which have the key values: `[Title ,Content]`. As part of the Rerank call, we are specifying which keys to rank over, as well as the order in which the key value pairs should be considered. +In the `docs` parameter, we are passing in a list of objects which have the key values: `['Title' ,'Content']`. As part of the Rerank call, we are specifying which keys to rank over, as well as the order in which the key value pairs should be considered. ```python PYTHON { - "id": "75a94aa7-6761-4a64-a2ae-4bc0a62bc601", - "results": [ - { - "document": { - "Content": "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.", - "Title": "Washington D.C." - }, - "index": 3, - "relevance_score": 0.9987405 - }, - { - "document": { - "Content": "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.", - "Title": "Capital Punishment in the US" - }, - "index": 4, - "relevance_score": 0.5011778 - }, - { - "document": { - "Content": "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.", - "Title": "The Capital of United States Virgin Islands" - }, - "index": 2, - "relevance_score": 0.10070161 - }, - { - "document": { - "Content": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.", - "Title": "The Commonwealth of Northern Mariana Islands" - }, - "index": 1, - "relevance_score": 0.03197956 - }, - { - "document": { - "Content": "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.", - "Title": "Facts about Carson City" - }, - "index": 0, - "relevance_score": 0.019456575 - } - ], - "meta": { - "api_version": { - "version": "2022-12-06" - }, - "billed_units": { - "search_units": 1 - } - } + id='e8f55f3f-d86e-47d7-9b24-7feb18286505', + results=[ + RerankResponseResultsItem( + document=RerankResponseResultsItemDocument( + text=None, + Content=( + 'Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) ' + 'is the capital of the United States. It is a federal district. The President of the USA and many major ' + 'national government offices are in the territory. This makes it the political center of the United States of America.' + ), + Title='Washington D.C.' + ), + index=3, + relevance_score=0.8914433 + ), + RerankResponseResultsItem( + document=RerankResponseResultsItemDocument( + text=None, + Content=( + 'Charlotte Amalie is the capital and largest city of the United States Virgin Islands. ' + 'It has about 20,000 people. The city is on the island of Saint Thomas.' + ), + Title='The Capital of United States Virgin Islands' + ), + index=2, + relevance_score=0.40344992 + ), + RerankResponseResultsItem( + document=RerankResponseResultsItemDocument( + text=None, + Content=( + 'Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, ' + 'Carson City had a population of 55,274.' + ), + Title='Facts about Carson City' + ), + index=0, + relevance_score=0.23343581 + ), + RerankResponseResultsItem( + document=RerankResponseResultsItemDocument( + text=None, + Content=( + 'The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that ' + 'are a political division controlled by the United States. Its capital is Saipan.' + ), + Title='The Commonwealth of Northern Mariana Islands' + ), + index=1, + relevance_score=0.15964958 + ), + RerankResponseResultsItem( + document=RerankResponseResultsItemDocument( + text=None, + Content=( + 'Capital punishment has existed in the United States since before the United States was a country. ' + 'As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) ' + 'also uses capital punishment.' + ), + Title='Capital Punishment in the US' + ), + index=4, + relevance_score=0.10465127 + ), + ], + meta=ApiMeta( + api_version=ApiMetaApiVersion( + version='1', + is_deprecated=None, + is_experimental=None + ), + billed_units=ApiMetaBilledUnits( + images=None, + input_tokens=None, + output_tokens=None, + search_units=1.0, + classifications=None + ), + tokens=None, + warnings=None + ) } ``` ## Multilingual Reranking -Cohere offers a multilingual model, `rerank-multilingual-v3.0`. Please note that performance may vary across languages. The model is trained on the following languages: +Cohere's `rerank-v3.5` and `rerank-multilingual-v3.0` models have been trained for performance across a variety of languages. Please note that performance may vary across languages. The model is trained on the following languages: | ISO Code | Language Name | | -------- | -------------- | diff --git a/fern/pages/text-embeddings/reranking/reranking-best-practices.mdx b/fern/pages/text-embeddings/reranking/reranking-best-practices.mdx index 1ba0bb92..ffa40621 100644 --- a/fern/pages/text-embeddings/reranking/reranking-best-practices.mdx +++ b/fern/pages/text-embeddings/reranking/reranking-best-practices.mdx @@ -11,72 +11,55 @@ updatedAt: 'Thu May 30 2024 15:16:00 GMT+0000 (Coordinated Universal Time)' --- ## Optimizing Performance -In the following two tables, you'll find recommendations for getting the best Rerank performance, organized by model family. +In the following table, you'll find recommendations for getting the best Rerank performance. -### Rerank-v3.0 +### Rerank-v3.5 and Rerank-v3.0 | Constraint | Minimum | Maximum | Default Value | | ----------------------------- | ------- | ------------------------------------------------------------------------------------------------------- | :------------ | -| Number of Documents | 1 | 1000 | N/A | +| Number of Documents | 1 | 10,000 | N/A | | Max Number of Chunks | 1 | N/A | 1 | -| Number of Tokens per Document | 1 | N/A (see [below ](/docs/reranking-best-practices#document-chunking)for more info) | N/A | +| Number of Tokens per Document | 1 | N/A (see [below ](/docs/reranking-best-practices#document-chunking)for more info) | N/A | | Number of Tokens per Query | 1 | 2048 | N/A | -### Rerank-v2.0 - -| Constraint | Minimum | Maximum | Default Value | -| ----------------------------- | ------- | ------------------------------------------------------------------------------------------------------- | :------------ | -| Number of Documents | 1 | 10,000 | N/A | -| Max Number of Chunks | 1 | N/A | 10 | -| Number of Tokens per Document | 1 | N/A (see [below ](/docs/reranking-best-practices#document-chunking)for more info) | N/A | -| Number of Tokens per Query | 1 | 256 | N/A | - ## Document Chunking -For `rerank-v3.0`, the model breaks documents into 4094 token chunks. For example, if your query is 100 tokens and your document is 10,000 tokens, your document will be broken into the following chunks: +For `rerank-v3.5` and `rerank-v3.0`, the model breaks documents into 4093 token chunks. For example, if your query is 100 tokens and your document is 10,000 tokens, your document will be broken into the following chunks: -1. `relevance_score_1 = ` -2. `relevance_score_2 = ` -3. `relevance_score_3 = ` +1. `relevance_score_1 = ` +2. `relevance_score_2 = ` +3. `relevance_score_3 = ` 4. `relevance_score = max(relevance_score_1, relevance_score_2, relevance_score_3)` If you would like more control over how chunking is done, we recommend that you chunk your documents yourself. ## Max Number of Documents -### Rerank-v3.0 Models +When using `rerank-v3.5` and `rerank-v3.0` models, the endpoint will throw an error if the user attempts to pass more than 10,000 documents at a time. The maximum number of documents that can be passed to the endpoint is calculated with the following inequality: `Number of documents * max_chunks_per_doc >10,000`. -When using `rerank-v3.0` models, the endpoint will throw an error if the user tries to pass more than 1000 documents at a time. The maximum number of documents that can be passed to the endpoint is calculated with the following inequality: `Number of documents * max_chunks_per_doc >1000`. - -If `Number of documents * max_chunks_per_doc` exceeds `1000`, the endpoint will return an error. By default, the `max_chunks_per_doc` is set to `1` for `rerank-v3.0` models; given that the model has a context length of 4096, the maximum number of tokens for each call would be 4,096,000. - -### Rerank-v2.0 Models - -When using `rerank-v2.0`, the endpoint will throw an error if the user tries to pass more than 10,000 documents at a time. The maximum number of documents that can be passed to the endpoint is calculated with the following inequality: `Number of documents * max_chunks_per_doc >10,000`. - -If `Number of documents * max_chunks_per_doc` exceeds `10,000`, the endpoint will return an error. By default, the `max_chunks_per_doc` is set to `10` for `rerank-v2.0` models; given that the model has a context length of 512, the maximum number of tokens for each call would be 5,120,000. +If `Number of documents * max_chunks_per_doc` exceeds `10,000`, the endpoint will return an error. By default, the `max_chunks_per_doc` is set to `1` for `rerank` models. ## Queries -Our `rerank-v3.0` models are trained with a context length of 4096 tokens. The model takes into account both the input from the query and document. If your query is larger than 2048 tokens, it will be truncated to the first 2048 tokens. For v2.0 models, if your query is larger than 256 tokens, it will be truncated to the first 256 tokens. +Our `rerank-v3.5` and `rerank-v3.0` models are trained with a context length of 4096 tokens. The model takes both the _query_ and the _document_ into account when calculating against this limit, and the query can account for up to half of the full context length. If your query is larger than 2048 tokens, in other words, it will be truncated to the first 2048 tokens (leaving the other 2048 for the document(s)). ## Semi-Structured Data Support -Our `rerank-v3.0` models support semi-structured data reranking through a list of JSON objects. The `rank_fields` parameter will default to a field parameter called `text` unless otherwise specified. If the `rank_fields` parameter is unspecified _and_ none of your JSON objects have a `text` field, the endpoint will return an error. +Our `rerank-v3.5` and `rerank-v3.0` models support semi-structured data reranking through a list of JSON objects. The `rank_fields` parameter will default to a field parameter called `text` unless otherwise specified. If the `rank_fields` parameter is unspecified _and_ none of your JSON objects have a `text` field, the endpoint will return an error. ```json JSON [ { - "Title":"How to fix a dishwasher" - "Author":"John Smith" - "Date":"August 1st 2023" + "Title": "How to fix a dishwasher", + "Author": "John Smith", + "Date":"August 1st 2023", "Content": "Fixing a dishwasher depends on the specific problem you're facing. Here are some common issues and their potential solutions:...." }, { - "Title":"How to fix a leaky sink" - "Date":"July 25th 2024" + "Title": "How to fix a leaky sink", + "Date": "July 25th 2024", "Content": "Fixing a leaky sink will depend on the source of the leak. Here are general steps you can take to address common types of sink leaks:....." - },..... + } ] ``` @@ -86,11 +69,11 @@ Looking at the example above, passing in `rank_fields=["Title","Content"]` would The most important output from the [Rerank API endpoint](/reference/rerank-1) is the absolute rank exposed in the response object. The score is query dependent, and could be higher or lower depending on the query and passages sent in. In the example below, what matters is that Ottawa is more relevant than Toronto, but the user should not assume that Ottawa is two times more relevant than Ontario. -```python PYTHON -[ - RerankResult, - RerankResult, - RerankResult +``` +[ + RerankResult, + RerankResult, + RerankResult ] ``` diff --git a/fern/pages/text-generation/prompt-engineering/command-r7b-hf.mdx b/fern/pages/text-generation/prompt-engineering/command-r7b-hf.mdx new file mode 100644 index 00000000..d0a5c701 --- /dev/null +++ b/fern/pages/text-generation/prompt-engineering/command-r7b-hf.mdx @@ -0,0 +1,299 @@ +--- +title: "Using Command R7B on Hugging Face" +slug: "docs/command-r7b-hf" + +hidden: true +description: "This page contains detailed instructions about how to run Command R7B with Huggingface, for RAG, Tool Use and Agents use cases." +image: "../../../assets/images/b2b492c-cohere_meta_image.jpg" +keywords: "large language models, generative AI models" + +createdAt: "Fri Dec 13 2024 17:14:34 GMT+0000 (Coordinated Universal Time)" +updatedAt: "Fri Dec 13 2024 19:22:34 GMT+0000 (Coordinated Universal Time)" +--- + +This page contains detailed instructions about +- How to set preambles for Command R7B in Hugging Face +- How to run Command R7B in Hugging Face for Chat, RAG, Tool Use and Agents use cases. + +## Chat Capabilities + +Command R7B can be configured as both a conversational model and an instruct model. +- The conversational mode conditions the model on interactive behaviour, meaning it is expected to reply in a conversational fashion, provides introductory statements and follow-up questions, and uses Markdown as well as LaTeX where appropriate. It is optimized for interactive experiences, such as chatbots, where the model engages in dialogue. +- The instruct mode, in contrast, conditions the model to provide concise yet comprehensive responses, and does not use Markdown / LaTeX by default. It is designed for non-interactive, task-focused use cases like extracting information, summarizing text, translation, and categorization. + +### Conversational Mode +The system preamble for conversational mode is as follows: + +````txt wordWrap +# System Preamble +{Safety Preamble} + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer. +```` + +Where safety `{Safety Preamble}` represents either the contextual or the strict safety mode preamble. + +The contextual safety mode preamble is as follows: + +````txt wordWrap +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. +```` + +The strict safety mode preamble is as follows: + +````txt wordWrap +You are in strict safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will reject requests to generate content related to violence, hate, misinformation or sex to any amount. You will avoid using profanity. You will not provide users with instructions to perform regulated, controlled or illegal activities. +```` +### Instruct Mode + +The instruct mode preamble is as follows: +````txt wordWrap +# System Preamble +{Safety Preamble} + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply comprehensively and accurately without including introductory statements and follow-up questions. +- If the input is ambiguous, do your best to answer and do not ask clarifying follow-up questions. +- Do not use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Do not use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please return only the code without any explanation. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer. +```` +Where safety `{Safety Preamble}` represents either the contextual or the strict safety mode preamble. + + +### Example + +An example of how the model can be called using the conversational mode preamble can be found below. + +````python wordWrap PYTHON +conversational_system_preamble = """# System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.""" + +conversation = [ + {"role": "system", "content": conversational_system_preamble}, + {"role": "user", "content": "Hi what's your favorite color? And what's your information cutoff date? And who built you?"} +] +```` + +## Grounded Generation and RAG Capabilities: + +Command R7B has been trained specifically for tasks like summarization and the final step of Retrieval Augmented Generation (RAG). The model takes a conversation as input (with an optional user-supplied system preamble, indicating task, context and desired output style), along with a list of document snippets. This behavior has been trained into the model via a mixture of supervised fine-tuning and preference fine-tuning. + +For these tasks, you can use Command R7B in two ways. + +### Option 1: Grounded Generation +Grounded generation in Command R7B is supported through [chat templates](https://huggingface.co/docs/transformers/main/en/chat_templating#advanced-retrieval-augmented-generation) in Transformers. Simply provide document snippets using the `documents` parameter of Hugging Face’s `apply_chat_template()`. Document snippets should be short chunks, rather than long documents, typically around 100-400 words per chunk, formatted as key-value pairs. The keys should be short descriptive strings, the values can be text or semi-structured. Under the hood, this builds a specific prompt template that the model has been trained on. The code snippet below shows a minimal working example. + + +````python PYTHON +from transformers import AutoTokenizer, AutoModelForCausalLM + +# Load the model and tokenizer +model_id = "CohereForAI/c4ai-command-r7b-12-2024" +tokenizer = AutoTokenizer.from_pretrained(model_id) +model = AutoModelForCausalLM.from_pretrained(model_id) + +# Define conversation input +conversation = [{"role": "user", "content": "What has Man always dreamed of?"}] + +# Define documents for retrieval-based generation +documents = [ + { + "heading": "The Moon: Our Age-Old Foe", + "body": "Man has always dreamed of destroying the moon. In this essay, I shall..." + }, + { + "heading": "Love is all you need", + "body": "Man's dream has always been to find love. This profound lesson..." + }, + { + "heading": "The Sun: Our Age-Old Friend", + "body": "Although often underappreciated, the sun provides several notable benefits..." + } +] + +# Get the Grounded Generation prompt +input_prompt = tokenizer.apply_chat_template( + conversation=conversation, + documents=documents, + tokenize=False, + add_generation_prompt=True, + return_tensors="pt" +) +print("== Grounded Generation prompt:", input_prompt) + +# Tokenize the prompt +input_ids = tokenizer.encode_plus(input_prompt, return_tensors="pt") + +# Generate a response +gen_tokens = model.generate( + input_ids, + max_new_tokens=512, + do_sample=True, + temperature=0.3, + skip_special_tokens=True, +) + +# Decode and print the generated text along with generation prompt +gen_text = tokenizer.decode(gen_tokens[0]) +print(gen_text) + +```` + + + + +````txt wordWrap +<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, French, Spanish, Italian, German, Portuguese, Japanese, Korean, Modern Standard Arabic, Mandarin, Russian, Indonesian, Turkish, Dutch, Polish, Persian, Vietnamese, Czech, Hindi, Ukrainian, Romanian, Greek and Hebrew but have the ability to speak many more languages. + +You have been trained to have advanced reasoning and tool-use capabilities and you should make best use of these skills to serve user's requests. + +## Tool Use +Think about how you can make best use of the provided tools to help with the task and come up with a high level plan that you will execute first. + +0. Start by writing <|START_THINKING|> followed by a detailed step by step plan of how you will solve the problem. For each step explain your thinking fully and give details of required tool calls (if needed). Unless specified otherwise, you write your plan in natural language. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when the user request is so straightforward to address that only a trivial plan would be needed. + NOTE: You MUST skip this step when you are directly responding to the user's request without using any tools. + +Then carry out your plan by repeatedly executing the following steps. +1. Action: write <|START_ACTION|> followed by a list of JSON-formatted tool calls, with each one containing "tool_name" and "parameters" fields. + When there are multiple tool calls which are completely independent of each other (i.e. they can be executed in parallel), you should list them out all together in one step. When you finish, close it out with <|END_ACTION|>. +2. Observation: you will then receive results of those tool calls in JSON format in the very next turn, wrapped around by <|START_TOOL_RESULT|> and <|END_TOOL_RESULT|>. Carefully observe those results and think about what to do next. Note that these results will be provided to you in a separate turn. NEVER hallucinate results. + Every tool call produces a list of results (when a tool call produces no result or a single result, it'll still get wrapped inside a list). Each result is clearly linked to its originating tool call via its "tool_call_id". +3. Reflection: start the next turn by writing <|START_THINKING|> followed by what you've figured out so far, any changes you need to make to your plan, and what you will do next. When you finish, close it out with <|END_THINKING|>. + You can optionally choose to skip this step when everything is going according to plan and no special pieces of information or reasoning chains need to be recorded. + NOTE: You MUST skip this step when you are done with tool-use actions and are ready to respond to the user. + +You can repeat the above 3 steps multiple times (could be 0 times too if no suitable tool calls are available or needed), until you decide it's time to finally respond to the user. + +4. Response: then break out of the loop and write <|START_RESPONSE|> followed by a piece of text which serves as a response to the user's last request. Use all previous tool calls and results to help you when formulating your response. When you finish, close it out with <|END_RESPONSE|>. + +## Available Tools +Here is the list of tools that you have available to you. +You can ONLY use the tools listed here. When a tool is not listed below, it is NOT available and you should NEVER attempt to use it. +Each tool is represented as a JSON object with fields like "name", "description", "parameters" (per JSON Schema), and optionally, "responses" (per JSON Schema). + +```json +[ + {"name": "direct-injected-document", "description": "This is a special tool to directly inject user-uploaded documents into the chat as additional context. DO NOT use this tool by yourself!", "parameters": {"type": "object", "properties": {}, "required": []}, "responses": {"200": {"description": "Successfully returned a list of chunked text snippets from the directly uploaded documents.", "content": {"application/json": {"schema": {"type": "array", "items": {"type": "object", "required": ["url", "snippet"], "properties": {"url": {"type": "string", "description": "The url of the uploaded document."}, "snippet": {"type": "string", "description": "The text snippet for the returned document chunk."}}}}}}}}} +] +``` + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Command. +- You are a large language model built by Cohere. +- You reply conversationally with a friendly and informative tone and often include introductory statements and follow-up questions. +- If the input is ambiguous, ask clarifying follow-up questions. +- Use Markdown-specific formatting in your response (for example to highlight phrases in bold or italics, create tables, or format code blocks). +- Use LaTeX to generate mathematical notation for complex equations. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Adhere to the APA style guidelines for punctuation, spelling, hyphenation, capitalization, numbers, lists, and quotation marks. Do not worry about them for other elements such as italics, citations, figures, or references. +- Use gender-neutral pronouns for unspecified persons. +- Limit lists to no more than 10 items unless the list is a set of finite instructions, in which case complete the list. +- Use the third person when asked to write a summary. +- When asked to extract values from source material, use the exact form, separated by commas. +- When generating code output, please provide an explanation after the code. +- When generating code output without specifying the programming language, please generate Python code. +- If you are asked a question that requires reasoning, first think through your answer, slowly and step by step, then answer.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>What has Man always dreamed of?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_THINKING|>I will look through the document to address the users needs.<|END_THINKING|><|START_ACTION|>[ + {"tool_call_id": "0", "tool_name": "direct-injected-document", "parameters": {}} +]<|END_ACTION|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><|START_TOOL_RESULT|>[ + { + "tool_call_id": "0", + "results": { + "0": {"body": "Man has always dreamed of destroying the moon. In this essay, I shall...", "heading": "The Moon: Our Age-Old Foe"}, + "1": {"body": "Man's dream has always been to find love. This profound lesson...", "heading": "Love is all you need"}, + "2": {"body": "Although often underappreciated, the sun provides several notable benefits...", "heading": "The Sun: Our Age-Old Friend"} + }, + "is_error": null + } +]<|END_TOOL_RESULT|><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|> +```` + + + +````txt wordWrap +There are two answers to this question. Man has dreamed of destroying the moon and finding love. +```` + + + +### Option 2: Regular Generation + +You may find that simply including relevant documents directly in a user message works just as well, or better than using the documents parameter to render the special grounded generation template. Grounded Generation is generally a strong default, but Regular Generation can offer more control and customization over the prompt, at the cost of some effort to find an optimal prompt. We encourage users to play with both Grounded Generation and Regular Generation, and to evaluate which mode works best for their specific use case. + +## Tool use, Function Calling & Agent capabilities + +Command R7B has been specifically trained with conversational tool use capabilities. This allows the model to interact with external tools like APIs, databases, or search engines. These capabilities have been trained into the model via a mixture of supervised fine-tuning and preference fine-tuning, using a specific prompt template. Deviating from this prompt template will likely reduce performance, but we encourage experimentation. + +Instructions on how to leverage these capabilities in Hugging Face are coming soon. \ No newline at end of file diff --git a/fern/pages/text-generation/prompt-engineering/prompt-truncation.mdx b/fern/pages/text-generation/prompt-engineering/prompt-truncation.mdx index 1015d44c..16e4986e 100644 --- a/fern/pages/text-generation/prompt-engineering/prompt-truncation.mdx +++ b/fern/pages/text-generation/prompt-engineering/prompt-truncation.mdx @@ -10,8 +10,8 @@ keywords: "prompt engineering, generative AI prompts" createdAt: "Thu Feb 29 2024 18:14:26 GMT+0000 (Coordinated Universal Time)" updatedAt: "Thu May 23 2024 20:21:50 GMT+0000 (Coordinated Universal Time)" --- -LLMs come with limitations; specifically, they can only handle so much text as input. This means that you will often need to figure out which document sections and chat history elements to keep, and which ones to omit. +LLMs come with limitations; specifically, they can only handle so much text as input. This means that you will often need to figure out which part of a document or chat history to keep, and which ones to omit. -To make this easier, the Chat API comes with a helpful `prompt_truncation` parameter. When `prompt_truncation` is set to `AUTO`, the API will automatically break up the documents into smaller chunks, rerank the chunks and drop the minimum required number of the least relevant documents in order to stay within the model's context length limit. +To make this easier, the Chat API comes with a helpful `prompt_truncation` parameter. When `prompt_truncation` is set to `AUTO`, the API will automatically break up the documents into smaller chunks, rerank those chunks according to how relevant they are, and then start dropping the least relevant documents until the text fits within the model's context length limit. **Note:** The last few messages in the chat history will never be truncated or dropped. The RAG API will throw a 400 `Too Many Tokens` error if it can't fit those messages along with a single document under the context limit. diff --git a/fern/pages/tutorials/build-things-with-cohere/rag-with-cohere.mdx b/fern/pages/tutorials/build-things-with-cohere/rag-with-cohere.mdx index a7f0af35..3ea3acd1 100644 --- a/fern/pages/tutorials/build-things-with-cohere/rag-with-cohere.mdx +++ b/fern/pages/tutorials/build-things-with-cohere/rag-with-cohere.mdx @@ -314,7 +314,7 @@ Document: {'text': 'Team-Building Activities: We foster team spirit with monthly Further reading: - [Rerank endpoint API reference](/reference/rerank) -- [Documentation on Rerank](/docs/overview) +- [Documentation on Rerank](/docs/rerank-overview) - [Documentation on Rerank fine-tuning](/docs/rerank-fine-tuning) - [Documentation on Rerank best practices](/docs/reranking-best-practices) diff --git a/fern/pages/tutorials/build-things-with-cohere/reranking-with-cohere.mdx b/fern/pages/tutorials/build-things-with-cohere/reranking-with-cohere.mdx index d3a7f274..f0ad5e22 100644 --- a/fern/pages/tutorials/build-things-with-cohere/reranking-with-cohere.mdx +++ b/fern/pages/tutorials/build-things-with-cohere/reranking-with-cohere.mdx @@ -99,7 +99,7 @@ Document: {'text': 'Performance Reviews Frequency: We conduct informal check-ins Further reading: - [Rerank endpoint API reference](/reference/rerank) -- [Documentation on Rerank](/docs/overview) +- [Documentation on Rerank](/docs/rerank-overview) - [Documentation on Rerank fine-tuning](/docs/rerank-fine-tuning) - [Documentation on Rerank best practices](/docs/reranking-best-practices) - [LLM University module on Text Representation](https://cohere.com/llmu#text-representation) diff --git a/fern/pages/v2/deployment-options/cohere-on-aws/amazon-bedrock.mdx b/fern/pages/v2/deployment-options/cohere-on-aws/amazon-bedrock.mdx index 204f688a..d582ba24 100644 --- a/fern/pages/v2/deployment-options/cohere-on-aws/amazon-bedrock.mdx +++ b/fern/pages/v2/deployment-options/cohere-on-aws/amazon-bedrock.mdx @@ -18,10 +18,9 @@ Here, you'll learn how to use Amazon Bedrock to deploy both the Cohere Command a - Command R - Command R+ -- Command Light -- Command - Embed - English - Embed - Multilingual +- Rerank v3.5 ## Prerequisites @@ -62,17 +61,17 @@ model_id = "cohere.embed-english-v3" # or "cohere.embed-multilingual-v3" # Invoke the model and print the response result = co.embed( - model=model_id, - input_type=input_type, - texts=texts, - truncate=truncate) # aws_client.invoke_model(**params) + model=model_id, + input_type=input_type, + texts=texts, + truncate=truncate) # aws_client.invoke_model(**params) print(result) ``` ## Text Generation -You can use this code to invoke either Command R (`cohere.command-r-v1:0`), Command R+ (`cohere.command-r-plus-v1:0`), Command (`cohere.command-text-v14`), or Command light (`cohere.command-light-text-v14`) on Amazon Bedrock: +You can use this code to invoke either Command R (`cohere.command-r-v1:0`), Command R+ (`cohere.command-r-plus-v1:0`) on Amazon Bedrock: ```python PYTHON import cohere @@ -90,3 +89,35 @@ result = co.chat(message="Write a LinkedIn post about starting a career in tech: print(result) ``` + +## Rerank + +You can use this code to invoke our latest Rerank models on Bedrock + +```python PYTHON +import cohere + +co = cohere.BedrockClientV2( + aws_region="us-west-2", # pick a region where the model is available + aws_access_key="...", + aws_secret_key="...", + aws_session_token="...", +) + +docs = [ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", +] + +response = co.rerank( + model="cohere.rerank-v3-5:0", + query="What is the capital of the United States?", + documents=docs, + top_n=3, +) + +print(response) +``` diff --git a/fern/pages/v2/deployment-options/cohere-on-microsoft-azure.mdx b/fern/pages/v2/deployment-options/cohere-on-microsoft-azure.mdx index 7ae6d561..083552b0 100644 --- a/fern/pages/v2/deployment-options/cohere-on-microsoft-azure.mdx +++ b/fern/pages/v2/deployment-options/cohere-on-microsoft-azure.mdx @@ -202,8 +202,97 @@ response = co.rerank( ) ``` -## A Note on SDKs +## Using the Cohere SDK -You should be aware that it's possible to use the cohere SDK client to consume Azure AI deployments. Here are example notes for [Command](https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/cohere/cohere-cmdR.ipynb) and [Embed](https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/cohere/cohere-embed.ipynb). +You can use the Cohere SDK client to consume Cohere models that are deployed via Azure AI Foundry. This means you can leverage the SDK's features such as RAG, tool use, structured outputs, and more. + +The following are a few examples on how to use the SDK for the different models. + +### Setup +```python PYTHON +# pip install cohere + +import cohere + +# For Command models +co_chat = cohere.Client( + api_key="AZURE_INFERENCE_CREDENTIAL", + base_url="AZURE_MODEL_ENDPOINT", # Example - https://Cohere-command-r-plus-08-2024-xyz.eastus.models.ai.azure.com/ +) + +# For Embed models +co_embed = cohere.Client( + api_key="AZURE_INFERENCE_CREDENTIAL", + base_url="AZURE_MODEL_ENDPOINT", # Example - https://cohere-embed-v3-multilingual-xyz.eastus.models.ai.azure.com/ +) + +# For Rerank models +co_rerank = cohere.Client( + api_key="AZURE_INFERENCE_CREDENTIAL", + base_url="AZURE_MODEL_ENDPOINT", # Example - https://cohere-rerank-v3-multilingual-xyz.eastus.models.ai.azure.com/ +) +``` + +### Chat +```python PYTHON +message = "I'm joining a new startup called Co1t today. Could you help me write a short introduction message to my teammates." + +response = co_chat.chat(message=message) + +print(response) +``` +### RAG +```python PYTHON +faqs_short = [ + { + "text": "Reimbursing Travel Expenses: Easily manage your travel expenses by submitting them through our finance tool. Approvals are prompt and straightforward." + }, + { + "text": "Health and Wellness Benefits: We care about your well-being and offer gym memberships, on-site yoga classes, and comprehensive health insurance." + }, +] + +query = "Are there fitness-related perks?" + +response = co_chat.chat(message=query, documents=faqs_short) + +print(response) +``` + +### Embed +```python PYTHON +docs = [ + "Joining Slack Channels: You will receive an invite via email. Be sure to join relevant channels to stay informed and engaged.", + "Finding Coffee Spots: For your caffeine fix, head to the break room's coffee machine or cross the street to the café for artisan coffee.", +] + +doc_emb = co_embed.embed( + input_type="search_document", + texts=docs, +).embeddings +``` + +### Rerank +```python PYTHON +faqs_short = [ + { + "text": "Reimbursing Travel Expenses: Easily manage your travel expenses by submitting them through our finance tool. Approvals are prompt and straightforward." + }, + { + "text": "Working from Abroad: Working remotely from another country is possible. Simply coordinate with your manager and ensure your availability during core hours." + }, + { + "text": "Health and Wellness Benefits: We care about your well-being and offer gym memberships, on-site yoga classes, and comprehensive health insurance." + }, +] + +query = "Are there fitness-related perks?" + +results = co_rerank.rerank( + query=query, documents=faqs_short, top_n=2, model="rerank-english-v3.0" +) +``` + +Here are some other examples for [Command](https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/cohere/cohere-cmdR.ipynb) and [Embed](https://github.com/Azure/azureml-examples/blob/main/sdk/python/foundation-models/cohere/cohere-embed.ipynb). The important thing to understand is that our new and existing customers can call the models from Azure while still leveraging their integration with the Cohere SDK. diff --git a/fern/pages/v2/deployment-options/cohere-works-everywhere.mdx b/fern/pages/v2/deployment-options/cohere-works-everywhere.mdx index f4b6f539..a880453b 100644 --- a/fern/pages/v2/deployment-options/cohere-works-everywhere.mdx +++ b/fern/pages/v2/deployment-options/cohere-works-everywhere.mdx @@ -12,10 +12,6 @@ createdAt: "Thu Jun 06 2024 10:53:49 GMT+0000 (Coordinated Universal Time)" updatedAt: "Tue Jun 18 2024 16:38:28 GMT+0000 (Coordinated Universal Time)" --- - -The code examples in this section use the Cohere v1 API. The v2 API is not yet supported for cloud deployments and will be coming soon. - - To maximize convenience in building on and switching between Cohere-supported environments, we have developed SDKs that seamlessly support whichever backend you choose. This allows you to start developing your project with one backend while maintaining the flexibility to switch, should the need arise. Note that the code snippets presented in this document should be more than enough to get you started, but if you end up switching from one environment to another there will be some small changes you need to make to how you import and initialize the SDK. @@ -24,12 +20,16 @@ Note that the code snippets presented in this document should be more than enoug The table below summarizes the environments in which Cohere models can be deployed. You'll notice it contains many links; the links in the "sdk" column take you to Github pages with more information on Cohere's language-specific SDKs, while all the others take you to relevant sections in this document. + +The Cohere v2 API is not yet supported for cloud deployments (Bedrock, SageMaker, Azure, and OCI) and will be coming soon. The code examples shown for these cloud deployments use the v1 API. + + | sdk | [Cohere platform](/reference/about) | [Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere.html) | Sagemaker | Azure | OCI | Private Deployment | | ------------------------------------------------------------ | ---------------------------------------------------------- | -------------------------------------------------------------------------------------------- | ------------------------------- | --------------------------- | -------------------------- | ------------------------------ | -| [Typescript](https://github.com/cohere-ai/cohere-typescript) | [✅ docs](#cohere-platform) | [✅ docs](#bedrock) | [✅ docs](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#cohere-platform) | -| [Python](https://github.com/cohere-ai/cohere-python) | [✅ docs](#cohere-platform) | [✅ docs](#bedrock) | [✅ docs](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#cohere-platform) | -| [Go](https://github.com/cohere-ai/cohere-go) | [✅ docs](#cohere-platform) | [🟠 soon](#bedrock) | [🟠 soon](#sagemaker) | [✅ docs](#azure) | [🟠 soon](#) | [✅ docs](#cohere-platform) | -| [Java](https://github.com/cohere-ai/cohere-java) | [✅ docs](#cohere-platform) | [🟠 soon](#bedrock) | [🟠 soon](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#cohere-platform) | +| [Typescript](https://github.com/cohere-ai/cohere-typescript) | [✅ docs](#cohere-platform) | [✅ docs](#bedrock) | [✅ docs](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#private-deployment) | +| [Python](https://github.com/cohere-ai/cohere-python) | [✅ docs](#cohere-platform) | [✅ docs](#bedrock) | [✅ docs](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#private-deployment) | +| [Go](https://github.com/cohere-ai/cohere-go) | [✅ docs](#cohere-platform) | [🟠 soon](#bedrock) | [🟠 soon](#sagemaker) | [✅ docs](#azure) | [🟠 soon](#) | [✅ docs](#private-deployment) | +| [Java](https://github.com/cohere-ai/cohere-java) | [✅ docs](#cohere-platform) | [🟠 soon](#bedrock) | [🟠 soon](#sagemaker) | [✅ docs](#azure) | [🟠 soon]() | [✅ docs](#private-deployment) | ## Feature support @@ -165,8 +165,131 @@ public class ChatPost { ```
+#### Private Deployment + + +```typescript TS +const { CohereClient } = require('cohere-ai'); + +const cohere = new CohereClientV2({ + token: '', + base_url='' +}); + +(async () => { + const response = await cohere.chat({ + chatHistory: [ + { role: 'USER', message: 'Who discovered gravity?' }, + { + role: 'CHATBOT', + message: 'The man who is widely credited with discovering gravity is Sir Isaac Newton', + }, + ], + message: 'What year was he born?', + // perform web search before answering the question. You can also use your own custom connector. + connectors: [{ id: 'web-search' }], + }); + + console.log(response); +})(); +``` +```python PYTHON +import cohere + +co = cohere.ClientV2(api_key="", + base_url="") + +response = co.chat( + chat_history=[ + {"role": "USER", "message": "Who discovered gravity?"}, + { + "role": "CHATBOT", + "message": "The man who is widely credited with discovering gravity is Sir Isaac Newton", + }, + ], + message="What year was he born?", + # perform web search before answering the question. You can also use your own custom connector. + connectors=[{"id": "web-search"}], +) + +print(response) +``` +```go GO +package main + +import ( + "context" + "log" + + cohere "github.com/cohere-ai/cohere-go/v2" + client "github.com/cohere-ai/cohere-go/v2/client" +) + +func main() { + co := client.NewClient( + client.WithBaseURL(""), + ) + + resp, err := co.V2.Chat( + context.TODO(), + &cohere.ChatRequest{ + ChatHistory: []*cohere.ChatMessage{ + { + Role: cohere.ChatMessageRoleUser, + Message: "Who discovered gravity?", + }, + { + Role: cohere.ChatMessageRoleChatbot, + Message: "The man who is widely credited with discovering gravity is Sir Isaac Newton", + }}, + Message: "What year was he born?", + Connectors: []*cohere.ChatConnector{ + {Id: "web-search"}, + }, + }, + ) + + if err != nil { + log.Fatal(err) + } + + log.Printf("%+v", resp) +} +``` +```java JAVA +import com.cohere.api.Cohere; +import com.cohere.api.requests.ChatRequest; +import com.cohere.api.types.ChatMessage; +import com.cohere.api.types.Message; +import com.cohere.api.types.NonStreamedChatResponse; + +import java.util.List; + + +public class ChatPost { + public static void main(String[] args) { + Cohere cohere = Cohere.builder().token("Your API key").clientName("snippet").build(); + Cohere cohere = Cohere.builder().environment(Environment.custom("")).clientName("snippet").build(); + + NonStreamedChatResponse response = cohere.v2.chat( + ChatRequest.builder() + .message("What year was he born?") + .chatHistory( + List.of(Message.user(ChatMessage.builder().message("Who discovered gravity?").build()), + Message.chatbot(ChatMessage.builder().message("The man who is widely credited with discovering gravity is Sir Isaac Newton").build()))).build()); + + System.out.println(response); + } +} +``` + + #### Bedrock + +Rerank v3.5 on Bedrock is only supported with Rerank API v2, via `BedrockClientV2()` + + ```typescript TS const { BedrockClient } = require('cohere-ai'); diff --git a/fern/pages/v2/models/the-command-family-of-models/command-r-plus.mdx b/fern/pages/v2/models/the-command-family-of-models/command-r-plus.mdx index a216fc17..898ddc7a 100644 --- a/fern/pages/v2/models/the-command-family-of-models/command-r-plus.mdx +++ b/fern/pages/v2/models/the-command-family-of-models/command-r-plus.mdx @@ -21,10 +21,20 @@ For information on toxicity, safety, and using this model responsibly check out ### Model Details | Model Name | Description | Modality | Context Length | Maximum Output Tokens | Endpoints | |--------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------------|----------------|-----------------------|------------------------| +| `command-r7b-12-2024` | `command-r7b-12-2024` is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps. | Text | 128k | 4k | [Chat](/reference/chat)| | `command-r-plus-08-2024` | `command-r-plus-08-2024` is an update of the Command R+ model, delivered in August 2024. | Text | 128k | 4k | [Chat](/reference/chat)| | `command-r-plus-04-2024` | Command R+ is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use. | Text | 128k | 4k | [Chat](/reference/chat)| | `command-r-plus` | `command-r-plus` is an alias for `command-r-plus-04-2024`, so if you use `command-r-plus` in the API, that's the model you're pointing to. | Text | 128k | 4k | [Chat](/reference/chat)| +## Command R7B December 2024 Release +Command R7B is the smallest, fastest, and final model in our R family of enterprise-focused [large language models](https://docs.cohere.com/docs/introduction-to-large-language-models) (LLMs). With a context window of 128K, Command R7B offers state-of-the-art performance across a variety of real-world tasks, and is designed for use cases in which speed, cost, and compute are important. Specifically, Command R7B is excellent for: + +- RAG - [Retrieval Augmented Generation](https://docs.cohere.com/docs/retrieval-augmented-generation-rag) (RAG) refers to the practice of ‘grounding’ model outputs in external data sources, which can increase accuracy. Command R7B is exceptionally good at generating responses in conversational tasks, attending over long inputs, and extracting and manipulating numerical information in financial settings. +- Tool-use - With [tool use](https://docs.cohere.com/docs/tool-use), Command models can be given tools such as search engines, APIs, vector databases, etc., which can expand their baseline functionality. Command R7B excels at tool use, exhibiting particular strength in using tools in real-world, diverse, and dynamic environments. In addition, Command R7B is good at avoiding unnecessarily calling tools, which is an important aspect of tool-use in practical applications. +- Agents - As this is being written, [agents](https://docs.cohere.com/docs/multi-step-tool-use) are among the most exciting frontiers for large language models. Command R7B’s multistep tool use capabilities allow it to power fast and capable REACT agents. When set up as an internet-augmented research agent, for example, Command R7B ably completes tasks that require breaking down complex questions into subgoals, and also performs favorably in domains that utilize complex reasoning and active information seeking. + +Command R7B is available today on the Cohere Platform as well as accessible on HuggingFace, or you can access it in the SDK with `command-r7b-12-2024`. For more information, check out our [dedicated blog post](cohere.com/blog/command-r7b). + ## Command R+ August 2024 Release Cohere's flagship text-generation models, Command R and Command R+, received a substantial update in August 2024. We chose to designate these models with time stamps, so in the API Command R+ 08-2024 is accesible with `command-r-plus-08-2024`. diff --git a/fern/pages/v2/models/the-command-family-of-models/command-r.mdx b/fern/pages/v2/models/the-command-family-of-models/command-r.mdx index 6eea77de..c5df5bf9 100644 --- a/fern/pages/v2/models/the-command-family-of-models/command-r.mdx +++ b/fern/pages/v2/models/the-command-family-of-models/command-r.mdx @@ -20,10 +20,20 @@ For information on toxicity, safety, and using this model responsibly check out ### Model Details | Model Name | Description | Modality | Context Length | Maximum Output Tokens | Endpoints| |--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|----------------|-----------------------|----------| +| `command-r7b-12-2024` | `command-r7b-12-2024` is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning and multiple steps. | Text | 128k | 4k | [Chat](/reference/chat) | | `command-r-08-2024` | `command-r-08-2024` is an update of the Command R model, delivered in August 2024. | Text | 128k | 4k | [Chat](/reference/chat) | | | `command-r-03-2024` | Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents. | Text | 128k | 4k | [Chat](/reference/chat) | | | `command-r` | `command-r` is an alias for `command-r-03-2024`, so if you use `command-r` in the API, that's the model you're pointing to. | Text | 128k | 4k | [Chat](/reference/chat) | | +## Command R7B December 2024 Release +Command R7B is the smallest, fastest, and final model in our R family of enterprise-focused [large language models](https://docs.cohere.com/docs/introduction-to-large-language-models) (LLMs). With a context window of 128K, Command R7B offers state-of-the-art performance across a variety of real-world tasks, and is designed for use cases in which speed, cost, and compute are important. Specifically, Command R7B is excellent for: + +- RAG - [Retrieval Augmented Generation](https://docs.cohere.com/docs/retrieval-augmented-generation-rag) (RAG) refers to the practice of ‘grounding’ model outputs in external data sources, which can increase accuracy. Command R7B is exceptionally good at generating responses in conversational tasks, attending over long inputs, and extracting and manipulating numerical information in financial settings. +- Tool-use - With [tool use](https://docs.cohere.com/docs/tool-use), Command models can be given tools such as search engines, APIs, vector databases, etc., which can expand their baseline functionality. Command R7B excels at tool use, exhibiting particular strength in using tools in real-world, diverse, and dynamic environments. In addition, Command R7B is good at avoiding unnecessarily calling tools, which is an important aspect of tool-use in practical applications. +- Agents - As this is being written, [agents](https://docs.cohere.com/docs/multi-step-tool-use) are among the most exciting frontiers for large language models. Command R7B’s multistep tool use capabilities allow it to power fast and capable REACT agents. When set up as an internet-augmented research agent, for example, Command R7B ably completes tasks that require breaking down complex questions into subgoals, and also performs favorably in domains that utilize complex reasoning and active information seeking. + +Command R7B is available today on the Cohere Platform as well as accessible on HuggingFace, or you can access it in the SDK with `command-r7b-12-2024`. For more information, check out our [dedicated blog post](cohere.com/blog/command-r7b). + ## Command R August 2024 Release Cohere's flagship text-generation models, Command R and Command R+, received a substantial update in August 2024. We chose to designate these models with time stamps, so in the API Command R 08-2024 is accesible with `command-r-08-2024`. diff --git a/fern/pages/v2/text-embeddings/reranking/overview.mdx b/fern/pages/v2/text-embeddings/reranking/overview.mdx index 1deca035..8ef0e45d 100644 --- a/fern/pages/v2/text-embeddings/reranking/overview.mdx +++ b/fern/pages/v2/text-embeddings/reranking/overview.mdx @@ -1,10 +1,10 @@ --- title: "Rerank Overview" -slug: "v2/docs/overview" +slug: "v2/docs/rerank-overview" hidden: false -description: "This page describes how Cohere's ReRank models work." +description: "This page describes how Cohere's Rerank models work." image: "../../../../assets/images/f1cc130-cohere_meta_image.jpg" keywords: "Cohere, reranking models, large language models" @@ -19,7 +19,7 @@ The [Rerank API endpoint](/reference/rerank-1), powered by the [Rerank models](/ ### Example with Texts -In the example below, we use the [Rerank API endpoint](/reference/rerank-1) to index the list of `docs` from most to least relevant to the query ` What is the capital of the United States?`. +In the example below, we use the [Rerank API endpoint](/reference/rerank-1) to index the list of `documents` from most to least relevant to the query `"What is the capital of the United States?"`. **Request** @@ -27,133 +27,133 @@ In this example, the documents being passed in are a list of strings: ```python PYTHON import cohere -co = cohere.ClientV2(api_key="") +co = cohere.ClientV2() query = "What is the capital of the United States?" docs = [ - "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.", - "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.", - "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.", - "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.", - "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."] -results = co.rerank(model="rerank-english-v3.0", query=query, documents=docs, top_n=5, return_documents=True) + "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.", + "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.", + "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment." +] + +results = co.rerank( + model="rerank-v3.5", + query=query, + documents=docs, + top_n=5) ``` **Response** ```jsx { - "id": "97813271-fe74-465d-b9d5-577e77079253", - "results": [ - { - "document": { - "text": "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America." - }, - "index": 3, - "relevance_score": 0.9990564 - }, - { - "document": { - "text": "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment." - }, - "index": 4, - "relevance_score": 0.7516481 - }, - { - "document": { - "text": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan." - }, - "index": 1, - "relevance_score": 0.08882029 - }, - { - "document": { - "text": "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274." - }, - "index": 0, - "relevance_score": 0.058238626 - }, - { - "document": { - "text": "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas." - }, - "index": 2, - "relevance_score": 0.019946935 - } - ], - "meta": { - "api_version": { - "version": "2022-12-06" - }, - "billed_units": { - "search_units": 1 - } - } + "id": "97813271-fe74-465d-b9d5-577e77079253", + "results": [ + { + "index": 3, // "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) ..." + "relevance_score": 0.9990564 + }, + { + "index": 4, // "Capital punishment has existed in the United States since before the United States was a country. As of 2017 ..." + "relevance_score": 0.7516481 + }, + { + "index": 1, // "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division ..." + "relevance_score": 0.08882029 + }, + { + "index": 0, // "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a ..." + "relevance_score": 0.058238626 + }, + { + "index": 2, // ""Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people ..." + "relevance_score": 0.019946935 + } + ], + "meta": { + "api_version": { + "version": "2" + }, + "billed_units": { + "search_units": 1 + } + } } - ``` -### Example with Semi-structured Data: +### Example with Structured Data: -Alternatively, you can pass in a JSON object and specify the fields you'd like to rank over. If you do not pass in any `rank_fields`, it will default to the text key. +If your documents contain structured data, for best performance we recommend formatting them as YAML strings. **Request** ```python PYTHON +import yaml +import cohere + +co = cohere.ClientV2() + query = "What is the capital of the United States?" docs = [ - {"Title":"Facts about Carson City","Content":"Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274."}, - {"Title":"The Commonwealth of Northern Mariana Islands","Content":"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan."}, - {"Title":"The Capital of United States Virgin Islands","Content":"Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas."}, - {"Title":"Washington D.C.","Content":"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America."}, - {"Title":"Capital Punishment in the US","Content":"Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment."}] -results = co.rerank(model="rerank-english-v3.0", query=query, documents=docs, rank_fields=['Title','Content'],top_n=5, return_documents=True) + { + "Title": "Facts about Carson City", + "Content": "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274." + }, + { + "Title": "The Commonwealth of Northern Mariana Islands", + "Content": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan." + }, + { + "Title": "The Capital of United States Virgin Islands", + "Content": "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas." + }, + { + "Title": "Washington D.C.", + "Content":"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America." + }, + { + "Title": "Capital Punishment in the US", + "Content": "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment." + } +] + +yaml_docs = [yaml.dump(doc, sort_keys=False) for doc in docs] +results = co.rerank( + model="rerank-v3.5", + query=query, + documents=yaml_docs, + top_n=5 +) ``` -In the `docs` parameter, we are passing in a list of objects which have the key values: `[Title ,Content]`. As part of the Rerank call, we are specifying which keys to rank over, as well as the order in which the key value pairs should be considered. +In the `documents` parameter, we are passing in a list YAML strings, representing the structured data. -```python PYTHON +**Response** + +```jsx { "id": "75a94aa7-6761-4a64-a2ae-4bc0a62bc601", "results": [ { - "document": { - "Content": "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.", - "Title": "Washington D.C." - }, "index": 3, "relevance_score": 0.9987405 }, { - "document": { - "Content": "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.", - "Title": "Capital Punishment in the US" - }, "index": 4, "relevance_score": 0.5011778 }, { - "document": { - "Content": "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.", - "Title": "The Capital of United States Virgin Islands" - }, "index": 2, "relevance_score": 0.10070161 }, { - "document": { - "Content": "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.", - "Title": "The Commonwealth of Northern Mariana Islands" - }, "index": 1, "relevance_score": 0.03197956 }, { - "document": { - "Content": "Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.", - "Title": "Facts about Carson City" - }, "index": 0, "relevance_score": 0.019456575 } @@ -172,7 +172,7 @@ In the `docs` parameter, we are passing in a list of objects which have the key ## Multilingual Reranking -Cohere offers a multilingual model, `rerank-multilingual-v3.0`. Please note that performance may vary across languages. The model is trained on the following languages: +Cohere's `rerank-v3.5` and `rerank-multilingual-v3.0` models have been trained for performance across a variety of languages. Please note that performance may vary across languages. The model is trained on the following languages: | ISO Code | Language Name | | -------- | -------------- | diff --git a/fern/pages/v2/text-embeddings/reranking/reranking-best-practices.mdx b/fern/pages/v2/text-embeddings/reranking/reranking-best-practices.mdx new file mode 100644 index 00000000..2cb48e15 --- /dev/null +++ b/fern/pages/v2/text-embeddings/reranking/reranking-best-practices.mdx @@ -0,0 +1,86 @@ +--- +title: Rerank Best Practices +slug: docs/reranking-best-practices +hidden: false +description: >- + Tips for optimal endpoint performance, including constraints on the number of documents, tokens per document, and tokens per query. +image: ../../../../assets/images/b75cfed-cohere_docs_preview_image_1200x630_copy.jpg +keywords: 'rerank, natural language processing' +createdAt: 'Mon Nov 25 2024 16:58:46 GMT+0000 (Coordinated Universal Time)' +updatedAt: 'Mon Nov 25 2024 15:16:00 GMT+0000 (Coordinated Universal Time)' +--- +## Document Chunking + +Under the hood, the Rerank API turns user input into text chunks. Every chunk will include the `query` and a portion of the document text. Chunk size depends on the model. + +For example, if +- the selected model is `rerank-v3.5`, which has context length (aka max chunk size) of 4096 tokens +- the query is 100 tokens +- there is one document and it is 10,000 tokens long +- document truncation is disabled by setting `max_tokens_per_doc` parameter to 10,000 tokens + +Then the document will be broken into the following three chunks: + +``` +relevance_score_1 = +relevance_score_2 = +relevance_score_3 = +``` + +And the final relevance score for that document will be computed as the highest score among those chunks: +```python +relevance_score = max( + relevance_score_1, relevance_score_2, relevance_score_3 +) +``` + +If you would like more control over how chunking is done, we recommend that you chunk your documents yourself. + +## Queries + +Our `rerank-v3.5` and `rerank-v3.0` models are trained with a context length of 4096 tokens. The model takes both the _query_ and the _document_ into account when calculating against this limit, and the query can account for up to half of the full context length. If your query is larger than 2048 tokens, in other words, it will be truncated to the first 2048 tokens (leaving the other 2048 for the document(s)). + +## Structured Data Support + +Our Rerank models support reranking structured data formatted as a list of YAML strings. Note that since long document strings get truncated, the order of the keys is especially important. When constructing the YAML string from a dictionary, make sure to maintain the order. In Python that is done by setting `sort_keys=False` when using `yaml.dump`. + +Example: +```python +import yaml + +docs = [ + { + "Title": "How to fix a dishwasher", + "Author": "John Smith", + "Date": "August 1st 2023", + "Content": "Fixing a dishwasher depends on the specific problem you're facing. Here are some common issues and their potential solutions:....", + }, + { + "Title": "How to fix a leaky sink", + "Date": "July 25th 2024", + "Content": "Fixing a leaky sink will depend on the source of the leak. Here are general steps you can take to address common types of sink leaks:.....", + }, +] + +yaml_docs = [yaml.dump(doc, sort_keys=False) for doc in docs] +``` + +## Interpreting Results + +The most important output from the [Rerank API endpoint](/reference/rerank-1) is the absolute rank exposed in the response object. The score is query dependent, and could be higher or lower depending on the query and passages sent in. In the example below, what matters is that Ottawa is more relevant than Toronto, but the user should not assume that Ottawa is two times more relevant than Ontario. + +``` +[ + RerankResult, + RerankResult, + RerankResult +] +``` + +Relevance scores are normalized to be in the range `[0, 1]`. Scores close to `1` indicate a high relevance to the query, and scores closer to `0` indicate low relevance. To find a threshold on the scores to determine whether a document is relevant or not, we recommend going through the following process: + +- Select a set of 30-50 representative queries `Q=[q_0, … q_n]` from your domain. +- For each query provide a document that is considered borderline relevant to the query for your specific use case, and create a list of (query, document) pairs: `sample_inputs=[(q_0, d_0), …, (q_n, d_n)]` . +- Pass all tuples in `sample_inputs` through the rerank endpoint in a loop, and gather relevance scores `sample_scores=[s0, ..., s_n]`. + +The average of `sample_scores` can then be used as a reference when deciding a threshold for filtering out irrelevant documents. diff --git a/fern/pages/v2/text-generation/migrating-v1-to-v2.mdx b/fern/pages/v2/text-generation/migrating-v1-to-v2.mdx index ee1dd6fa..4700ed8d 100644 --- a/fern/pages/v2/text-generation/migrating-v1-to-v2.mdx +++ b/fern/pages/v2/text-generation/migrating-v1-to-v2.mdx @@ -556,7 +556,7 @@ tools_v2 = [ "type": "object", "properties": { "location": { - "type" : "str", + "type" : "string", "description": "the location to get weather, example: San Fransisco, CA" } }, @@ -850,4 +850,4 @@ The following v1 features are not supported in v2: - `connectors` parameter - `prompt_truncation` parameter - Tool use - - `force_single_step` parameter (all tool calls are now multi-step by default) \ No newline at end of file + - `force_single_step` parameter (all tool calls are now multi-step by default) diff --git a/fern/pages/v2/tutorials/build-things-with-cohere/reranking-with-cohere.mdx b/fern/pages/v2/tutorials/build-things-with-cohere/reranking-with-cohere.mdx index b34e22a1..16b0d319 100644 --- a/fern/pages/v2/tutorials/build-things-with-cohere/reranking-with-cohere.mdx +++ b/fern/pages/v2/tutorials/build-things-with-cohere/reranking-with-cohere.mdx @@ -98,7 +98,7 @@ Document: {'text': 'Performance Reviews Frequency: We conduct informal check-ins Further reading: - [Rerank endpoint API reference](https://docs.cohere.com/reference/rerank) -- [Documentation on Rerank](https://docs.cohere.com/docs/overview) +- [Documentation on Rerank](https://docs.cohere.com/docs/rerank-overview) - [Documentation on Rerank fine-tuning](https://docs.cohere.com/docs/rerank-fine-tuning) - [Documentation on Rerank best practices](https://docs.cohere.com/docs/reranking-best-practices) - [LLM University module on Text Representation](https://cohere.com/llmu#text-representation) diff --git a/fern/v1.yml b/fern/v1.yml index c038684f..913a3950 100644 --- a/fern/v1.yml +++ b/fern/v1.yml @@ -41,7 +41,7 @@ navigation: path: pages/get-started/contribute.mdx - section: Models contents: - - page: Models Overview + - page: An Overview of Cohere's Models path: pages/models/models.mdx - section: Command contents: @@ -51,9 +51,9 @@ navigation: path: pages/models/the-command-family-of-models/command-r.mdx - page: Command and Command Light path: pages/models/the-command-family-of-models/command-beta.mdx - - page: Embed + - page: Cohere's Embed Models (Details and Application) path: pages/models/cohere-embed.mdx - - page: Rerank + - page: Cohere's Rerank Model (Details and Application) path: pages/models/rerank-2.mdx - page: Aya path: pages/models/aya.mdx @@ -202,6 +202,8 @@ navigation: path: pages/going-to-production/rate-limits.mdx - page: Going Live path: pages/going-to-production/going-live.mdx + - page: Deprecations + path: pages/going-to-production/deprecations.mdx - page: How Does Cohere Pricing Work? path: pages/going-to-production/how-does-cohere-pricing-work.mdx - section: Integrations @@ -429,6 +431,9 @@ navigation: - page: Prompting Command R hidden: true path: pages/text-generation/prompt-engineering/prompting-command-r.mdx + - page: Using Command R7B on Hugging Face + hidden: true + path: pages/text-generation/prompt-engineering/command-r7b-hf.mdx - page: Conclusion hidden: true path: pages/llm-university/intro-text-generation/text-generation-conclusion.mdx diff --git a/fern/v2.yml b/fern/v2.yml index ceb9bea1..9baf8b93 100644 --- a/fern/v2.yml +++ b/fern/v2.yml @@ -41,7 +41,7 @@ navigation: path: pages/get-started/contribute.mdx - section: Models contents: - - page: Models Overview + - page: An Overview of Cohere's Models path: pages/models/models.mdx - section: Command contents: @@ -51,9 +51,9 @@ navigation: path: pages/v2/models/the-command-family-of-models/command-r.mdx - page: Command and Command Light path: pages/v2/models/the-command-family-of-models/command-beta.mdx - - page: Embed + - page: Cohere's Embed Models (Details and Application) path: pages/models/cohere-embed.mdx - - page: Rerank + - page: Cohere's Rerank Model (Details and Application) path: pages/models/rerank-2.mdx - page: Aya path: pages/models/aya.mdx @@ -131,7 +131,7 @@ navigation: - page: Rerank Overview path: pages/v2/text-embeddings/reranking/overview.mdx - page: Rerank Best Practices - path: pages/text-embeddings/reranking/reranking-best-practices.mdx + path: pages/v2/text-embeddings/reranking/reranking-best-practices.mdx - page: Text Classification path: pages/v2/text-embeddings/text-classification-with-cohere.mdx - section: Fine-Tuning @@ -183,6 +183,8 @@ navigation: path: pages/going-to-production/rate-limits.mdx - page: Going Live path: pages/going-to-production/going-live.mdx + - page: Deprecations + path: pages/going-to-production/deprecations.mdx - page: How Does Cohere Pricing Work? path: pages/going-to-production/how-does-cohere-pricing-work.mdx - section: Integrations @@ -425,6 +427,9 @@ navigation: - page: Prompting Command R hidden: true path: pages/text-generation/prompt-engineering/prompting-command-r.mdx + - page: Using Command R7B on Hugging Face + hidden: true + path: pages/text-generation/prompt-engineering/command-r7b-hf.mdx - page: Conclusion hidden: true path: pages/llm-university/intro-text-generation/text-generation-conclusion.mdx diff --git a/package.json b/package.json index 540b073e..efb368db 100644 --- a/package.json +++ b/package.json @@ -19,8 +19,8 @@ }, "dependencies": { "cohere-ai": "^7.14.0", - "fern-api": "^0.45.1", + "fern-api": "^0.41.16", "gray-matter": "^4.0.3", "react": "^18.3.1" } -} +} \ No newline at end of file diff --git a/scripts/cookbooks-json/rerank-demo.json b/scripts/cookbooks-json/rerank-demo.json index c0dce880..841b498b 100644 --- a/scripts/cookbooks-json/rerank-demo.json +++ b/scripts/cookbooks-json/rerank-demo.json @@ -13,7 +13,7 @@ }, "title": "Demo of Rerank", "slug": "rerank-demo", - "body": "[block:html]\n{\n \"html\": \"\\n\\n
\\n

Demo of Rerank

\\n
\\n\\n\"\n}\n[/block]\n\nIn the past months, we engineered a novel relevance endpoint that takes a query and a list of documents and predicts the relevance between the query and each document. \n\nIt can be used in a two-stage retrieval setup: First you take the user question, and retrieve the top-100 documents from your collection by either using lexical search or semantic search.\n\nYou then pass the question and these top-100 documents to our relevance-endpoint to get a score for each document. You can then rank these documents based on these scores.\n\nIn our benchmarks across 20 datasets, we **saw significant improvements compared to lexical and semantic search**, especially for use-cases where no training data is available.\n\nWe will demonstrate the rerank endpoint in this notebook.\n\n\n\n\n```python\n!pip install \"cohere<5\"\n```\n\n \u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0mRequirement already satisfied: cohere<5 in /opt/homebrew/lib/python3.9/site-packages (4.45)\n Requirement already satisfied: aiohttp<4.0,>=3.0 in /opt/homebrew/lib/python3.9/site-packages (from cohere<5) (3.8.1)\n Requirement already satisfied: backoff<3.0,>=2.0 in /opt/homebrew/lib/python3.9/site-packages (from cohere<5) (2.2.1)\n Requirement already satisfied: fastavro<2.0,>=1.8 in /opt/homebrew/lib/python3.9/site-packages (from cohere<5) (1.9.3)\n Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /opt/homebrew/lib/python3.9/site-packages (from cohere<5) (6.6.0)\n Requirement already satisfied: requests<3.0.0,>=2.25.0 in /Users/elliottchoi/Library/Python/3.9/lib/python/site-packages (from cohere<5) (2.28.2)\n Requirement already satisfied: urllib3<3,>=1.26 in /Users/elliottchoi/Library/Python/3.9/lib/python/site-packages (from cohere<5) (1.26.14)\n Requirement already satisfied: attrs>=17.3.0 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (22.1.0)\n Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (2.0.12)\n Requirement already satisfied: multidict<7.0,>=4.5 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (6.0.2)\n Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (4.0.2)\n Requirement already satisfied: yarl<2.0,>=1.0 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (1.8.1)\n Requirement already satisfied: frozenlist>=1.1.1 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (1.3.1)\n Requirement already satisfied: aiosignal>=1.1.2 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (1.2.0)\n Requirement already satisfied: zipp>=0.5 in /opt/homebrew/lib/python3.9/site-packages (from importlib_metadata<7.0,>=6.0->cohere<5) (3.15.0)\n Requirement already satisfied: idna<4,>=2.5 in /Users/elliottchoi/Library/Python/3.9/lib/python/site-packages (from requests<3.0.0,>=2.25.0->cohere<5) (3.4)\n Requirement already satisfied: certifi>=2017.4.17 in /Users/elliottchoi/Library/Python/3.9/lib/python/site-packages (from requests<3.0.0,>=2.25.0->cohere<5) (2022.12.7)\n \u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0m\n\n\n```python\nimport cohere\nimport requests\nimport numpy as np\nfrom time import time\nfrom typing import List\nfrom pprint import pprint\n```\n\n\n```python\nAPI_KEY = \"\"\nco = cohere.Client(API_KEY)\nMODEL_NAME = \"rerank-english-v3.0\" # another option is rerank-multilingual-02\n\nquery = \"What is the capital of the United States?\"\ndocs = [\n \"Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.\",\n \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.\",\n \"Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.\",\n \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.\",\n \"West Virginia is a state in the Appalachian region of the United States. Its capital and largest city is Charleston. It is often abbreviated W. Va. or simply WV.\",\n \"Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.\",\n \"North Dakota is a state in the United States. 672,591 people lived in North Dakota in the year 2010. The capital and seat of government is Bismarck.\",\n \"Kentucky is a state in the United States. Its capital is Frankfort. It touches the states of Missouri (by the Mississippi River), Illinois, Indiana, Ohio, West Virginia (by the Ohio River), Tennessee and Virginia. There are many rivers in Kentucky\",\n \"Micronesia, officially the Federated States of Micronesia, is an island nation in the Pacific Ocean, northeast of Papua New Guinea. The country is a sovereign state in free association with the United States. The capital city of Federated States of Micronesia is Palikir.\",\n \"Utah is a state in the west United States. The capital and largest city is Salt Lake City. Utah became a state in the U.S. on January 4, 1896.\"]\n```\n\n## Using the Endpoint\nIn the following cell we will call rerank to rank `docs` based on how relevant they are with `query`.\n\n\n\n\n```python\nresults = co.rerank(query=query, model=MODEL_NAME, documents=docs, top_n=3) # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.\nfor idx, r in enumerate(results):\n print(f\"Document Rank: {idx + 1}, Document Index: {r.index}\")\n print(f\"Document: {r.document['text']}\")\n print(f\"Relevance Score: {r.relevance_score:.2f}\")\n print(\"\\n\")\n```\n\n Document Rank: 1, Document Index: 3\n Document: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.\n Relevance Score: 1.00\n \n \n Document Rank: 2, Document Index: 5\n Document: Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.\n Relevance Score: 0.75\n \n \n Document Rank: 3, Document Index: 1\n Document: The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.\n Relevance Score: 0.09\n \n \n\n\n## Search on Wikipedia - End2end demo\nThe following is an example how to use this model end-to-end to search over the Simple English Wikipedia, which consists of about 500k passages. \n\nWe use BM25 lexical search to retrieve the top-100 passages matching the query and then send these 100 passages and the query to our rerank endpoint to get a re-ranked list. We output the top-3 hits according to BM25 lexical search (as used by e.g. Elasticsearch) and the re-ranked list from our endpoint.\n\n\n\n```python\n!pip install -U rank_bm25\n```\n\n \u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0mCollecting rank_bm25\n Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)\n Requirement already satisfied: numpy in /opt/homebrew/lib/python3.9/site-packages (from rank_bm25) (1.23.5)\n Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)\n Installing collected packages: rank_bm25\n \u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0m\u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0mSuccessfully installed rank_bm25-0.2.2\n\n\n\n```python\nimport json\nimport gzip\nimport os\nfrom rank_bm25 import BM25Okapi\nfrom sklearn.feature_extraction import _stop_words\nimport string\nfrom tqdm.autonotebook import tqdm\n```\n\n /var/folders/ww/ht8qwj2s7s799qnktblg6qhm0000gp/T/ipykernel_31832/1066443236.py:7: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n from tqdm.autonotebook import tqdm\n\n\n\n```python\n!wget http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz\n```\n\n --2024-04-08 14:28:00-- http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz\n Resolving sbert.net (sbert.net)... 172.64.80.1, 2606:4700:130:436c:6f75:6466:6c61:7265\n Connecting to sbert.net (sbert.net)|172.64.80.1|:80... connected.\n HTTP request sent, awaiting response... 301 Moved Permanently\n Location: https://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz [following]\n --2024-04-08 14:28:01-- https://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz\n Connecting to sbert.net (sbert.net)|172.64.80.1|:443... connected.\n HTTP request sent, awaiting response... 301 Moved Permanently\n Location: https://public.ukp.informatik.tu-darmstadt.de/reimers/sentence-transformers/datasets/simplewiki-2020-11-01.jsonl.gz [following]\n --2024-04-08 14:28:01-- https://public.ukp.informatik.tu-darmstadt.de/reimers/sentence-transformers/datasets/simplewiki-2020-11-01.jsonl.gz\n Resolving public.ukp.informatik.tu-darmstadt.de (public.ukp.informatik.tu-darmstadt.de)... 130.83.167.186\n Connecting to public.ukp.informatik.tu-darmstadt.de (public.ukp.informatik.tu-darmstadt.de)|130.83.167.186|:443... connected.\n HTTP request sent, awaiting response... 200 OK\n Length: 50223724 (48M) [application/octet-stream]\n Saving to: ‘simplewiki-2020-11-01.jsonl.gz’\n \n simplewiki-2020-11- 100%[===================>] 47.90M 5.78MB/s in 8.9s \n \n 2024-04-08 14:28:11 (5.37 MB/s) - ‘simplewiki-2020-11-01.jsonl.gz’ saved [50223724/50223724]\n \n\n\n\n```python\nwikipedia_filepath = 'simplewiki-2020-11-01.jsonl.gz'\n\npassages = []\nwith gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:\n for line in fIn:\n data = json.loads(line.strip())\n passages.extend(data['paragraphs'])\n\nprint(\"Passages:\", len(passages))\n```\n\n Passages: 509663\n\n\n\n```python\nprint(passages[0], passages[1])\n```\n\n Ted Cassidy (July 31, 1932 - January 16, 1979) was an American actor. He was best known for his roles as Lurch and Thing on \"The Addams Family\". Aileen Carol Wuornos Pralle (born Aileen Carol Pittman; February 29, 1956 – October 9, 2002) was an American serial killer. She was born in Rochester, Michigan. She confessed to killing six men in Florida and was executed in Florida State Prison by lethal injection for the murders. Wuornos said that the men she killed had raped her or tried to rape her while she was working as a prostitute.\n\n\n\n```python\n\ndef bm25_tokenizer(text):\n tokenized_doc = []\n for token in text.lower().split():\n token = token.strip(string.punctuation)\n\n if len(token) > 0 and token not in _stop_words.ENGLISH_STOP_WORDS:\n tokenized_doc.append(token)\n return tokenized_doc\n\n\ntokenized_corpus = []\nfor passage in tqdm(passages):\n tokenized_corpus.append(bm25_tokenizer(passage))\n\nbm25 = BM25Okapi(tokenized_corpus)\n```\n\n 100%|██████████| 509663/509663 [00:09<00:00, 51180.82it/s]\n\n\n\n```python\n\ndef search(query, top_k=3, num_candidates=100):\n print(\"Input question:\", query)\n\n ##### BM25 search (lexical search) #####\n bm25_scores = bm25.get_scores(bm25_tokenizer(query))\n top_n = np.argpartition(bm25_scores, -num_candidates)[-num_candidates:]\n bm25_hits = [{'corpus_id': idx, 'score': bm25_scores[idx]} for idx in top_n]\n bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)\n \n print(f\"Top-3 lexical search (BM25) hits\")\n for hit in bm25_hits[0:top_k]:\n print(\"\\t{:.3f}\\t{}\".format(hit['score'], passages[hit['corpus_id']].replace(\"\\n\", \" \")))\n\n \n #Add re-ranking\n docs = [passages[hit['corpus_id']] for hit in bm25_hits]\n \n print(f\"\\nTop-3 hits by rank-API ({len(bm25_hits)} BM25 hits re-ranked)\")\n results = co.rerank(query=query, model=MODEL_NAME, documents=docs, top_n=top_k)\n for hit in results:\n print(\"\\t{:.3f}\\t{}\".format(hit.relevance_score, hit.document[\"text\"].replace(\"\\n\", \" \")))\n```\n\n\n```python\nsearch(query = \"What is the capital of the United States?\")\n```\n\n Input question: What is the capital of the United States?\n Top-3 lexical search (BM25) hits\n \t16.264\tCapital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.\n \t15.124\tIn 1783, it was the capital of the United States for a few months.\n \t14.476\tNew York was the capital of the United States under the Articles of Confederation from 1785 to 1788. When the US Constitution was made, it stayed as the capital from 1789 until 1790. In 1789, the first President of the United States, George Washington, was inaugurated; the first United States Congress and the Supreme Court of the United States each met for the first time, and the United States Bill of Rights was written, all at Federal Hall on Wall Street. By 1790, New York grew bigger than Philadelphia, so it become the biggest city in the United States. By the end of 1790, because of the Residence Act, Philadelphia became the new capital.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.999\tWashington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.\n \t0.994\tNew York was the capital of the United States under the Articles of Confederation from 1785 to 1788. When the US Constitution was made, it stayed as the capital from 1789 until 1790. In 1789, the first President of the United States, George Washington, was inaugurated; the first United States Congress and the Supreme Court of the United States each met for the first time, and the United States Bill of Rights was written, all at Federal Hall on Wall Street. By 1790, New York grew bigger than Philadelphia, so it become the biggest city in the United States. By the end of 1790, because of the Residence Act, Philadelphia became the new capital.\n \t0.993\tAs the national capital of the United States, Washington, D.C. has numerous media outlets in various mediums. Some of these media are known throughout the United States, including \"The Washington Post\" and various broadcasting networks headquartered in D.C.\n\n\n\n```python\nsearch(query = \"Number countries Europe\")\n```\n\n Input question: Number countries Europe\n Top-3 lexical search (BM25) hits\n \t16.963\tECoHR' has a number of judges. The number of judges is seven normally but at the case of dealing a great issue, the number will be 21 and the judges are equally from member countries of the Council of Europe. At present, there are forty seven member countries of the Council of Europe. Each country may have one judge in the ECoHR. But, judges work independently for the ECoHR, and not for their country.\n \t14.560\tMost countries in Europe, and a few countries in Asia, have made some or all synthetic cannabinoids illegal.\n \t14.165\tMany of these countries were members of the Western European Union. Many, such as Norway, are also in Northern Europe or in Central Europe or Southern Europe.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.997\tThere are at least 43 countries in Europe (the European identities of 5 transcontinental countries:Cyprus, Georgia, Kazakhstan, Russia and Turkey are disputed). Most of these countries are members of the European Union.\n \t0.987\tWithin these regions, there are up to 48 independent European countries (with the identities of 5 transcontinental countries being disputed). The largest is the Russian Federation, which covers 39% of Europe.\n \t0.981\tEurope, the planet's 6th largest continent, includes 47 countries and assorted dependencies, islands and territories.\n\n\n\n```python\nsearch(query = \"Elon Musk year birth\")\n```\n\n Input question: Elon Musk year birth\n Top-3 lexical search (BM25) hits\n \t22.568\tTesla, Inc. is a company based in Palo Alto, California which makes electric cars. It was started in 2003 by Martin Eberhard, Dylan Stott, and Elon Musk (who also co-founded PayPal and SpaceX and is the CEO of SpaceX). Eberhard no longer works there. Today, Elon Musk is the Chief Executive Officer (CEO). It started selling its first car, the Roadster in 2008.\n \t20.492\tElon Musk complained via Twitter about Los Angeles traffic and the same day, December 17, 2016, founded the company. It built a short test tunnel in Los Angeles.\n \t20.448\tAt the end of 2016, Musk founded The Boring Company which focuses on tunnelling and infrastructure. He mentioned Los Angeles traffic as the reason for starting this company. In March 2017 Elon Musk announced he has started another company which aims to merge human brains and computers, it is called Neuralink.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.994\tElon Reeve Musk (born June 28, 1971) is a businessman and philanthropist. He was born in South Africa. He moved to Canada and later became an American citizen. Musk is the current CEO & Chief Product Architect of Tesla Motors, a company that makes electric vehicles. He is also the CEO of Solar City, a company that makes solar panels, and the CEO & CTO of SpaceX, an aerospace company. In August 2020, Bloomberg ranked Musk third among the richest people on the planet with net worth to be $115.4 billion.\n \t0.602\tElon Musk and his brother started Zip2, a software company, in 1995. In 1999 he sold it and became a millionaire. He then started X.com, which merged with the company to make PayPal. X.com was then renamed to PayPal, and he focused on growing that part of the company. He then started SpaceX and became the CEO of Tesla.\n \t0.474\tIn early 2002, Musk was seeking workers for his new space company, soon to be named SpaceX. Musk found a rocket engineer Tom Mueller (later SpaceX's CTO of Propulsion). He agreed to work for Musk. That was how SpaceX was born. The first headquarters of SpaceX was in a warehouse in El Segundo, California. The company has grown rapidly since it was founded in 2002, growing from 160 workers in November 2005 to 1,100 in 2010, 3,800 workers and contractors by October 2013, nearly 5,000 by late 2015, and about 6,000 in April 2017.\n\n\n\n```python\nsearch(query = \"Which US president was killed?\")\n```\n\n Input question: Which US president was killed?\n Top-3 lexical search (BM25) hits\n \t11.966\tHe came into office when the previous president, Cyprien Ntaryamira, was killed in a plane crash. It was an assassination in which the Rwandan president Juvénal Habyarimana was also killed. Ntibantunganya left office when he was deposed by Pierre Buyoya in a military coup of 1996.\n \t11.697\tBurr killed Alexander Hamilton in a duel in 1804, when Burr was still Vice President.\n \t11.482\tAfter President James A. Garfield died, vice-president Chester Arthur replaced him. The man who killed him expected the new President to pardon him. This did not happen.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.984\tJames Abram Garfield (November 19, 1831 - September 19, 1881) was the 20th (1881) President of the United States and the 2nd President to be assassinated (killed while in office). President Garfield was in office from March to September of 1881. He was in office for a total of six months and fifteen days. For almost half that time he was bedridden as a result of an attempt to kill him. He was shot on July 2 and finally died in September the same year he got into office.\n \t0.976\tPresident William McKinley was killed by anarchist Leon Czolgosz because Czolgosz believed president McKinley was against good working people, he considered McKinley responsible for falsifying the reasons for the war, and approving and waging an illegal, devastating Philippines war.\n \t0.916\tOn the night that President Abraham Lincoln was killed, someone also tried to kill Seward. For the rest of his life, Seward had scars on his face from the attack. Later, the man who attacked him was caught and put to death.\n\n\n\n```python\nsearch(query=\"When is Chinese New Year\")\n```\n\n Input question: When is Chinese New Year\n Top-3 lexical search (BM25) hits\n \t18.606\tToday in China the Gregorian calendar is used for most activities. At the same time, the Chinese calendar is still used for traditional Chinese holidays like Chinese New Year or Lunar New Year.\n \t18.151\tBefore that, the holiday was usually just called the \"NewYear\". Because the traditional Chinese calendar is mostly based on the changes in the moon, the Chinese New Year is also known in English as the \"Lunar New Year\" or \"Chinese Lunar New Year\". This name comes from \"Luna\", an old Latin name for the moon. The Indonesian name for the holiday is Imlek, which comes from the Hokkien word for the old Chinese calendar and is therefore also like saying \"Lunar New Year\".\n \t18.011\tSpring Festival is the Chinese New Year.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.999\tChinese New Year, known in China as the SpringFestival and in Singapore as the LunarNewYear, is a holiday on and around the new moon on the first day of the year in the traditional Chinese calendar. This calendar is based on the changes in the moon and is only sometimes changed to fit the seasons of the year based on how the Earth moves around the sun. Because of this, Chinese New Year is never on January1. It moves around between January21 and February20.\n \t0.997\tChinese New Year always starts on a new moon, when the Moon is between the Earth and Sun and it looks all dark in the night sky. Because new moons happen about every 29.53 days but the year set by Pope GregoryXIII is 365.2425 days long, the Chinese holiday moves to different days each year. The Chinese calendar adds a 13th month every so often to keep the seasons in the right place, so the first day of the new year always happens between January21 and February20 on the 2nd or 3rd new moon after the 1st day of winter. The chart on the right gives the day of each Chinese New Year from 1996 to 2031.\n \t0.996\tChinese New Year lasts fifteen days, including one week as a national holiday. It starts with the first day of the Chinese lunar year and ends with the full moon fifteen days later. It is always in the middle of winter, but is called the Spring Festival in Chinese because Chinese seasons are a little different from English ones. On the first day of the Chinese New Year, people call on friends and relatives. Because most people watch the special performances on CCTV all the night on New Year's Eve and don't go to bed until 12:00 AM, they usually get up later in the next day. The fifth day of the Chinese New Year is the day to welcome the god of Wealth (Chinese:财神爷), many people make and eat dumplings (Chinese:饺子. Pinyin: Jaozi). They believe that dumplings can hold the god of Wealth and bring luck. The last day of the Chinese New Year is the Lantern Festival. On this day, the moon becomes the full moon. People go out and watch the lantern festivals everywhere. After that, they eat sweet dumpling (Chinese:汤圆,元宵), a kind of dumpling which is round and looks like the full moon.\n\n\n\n```python\nsearch(query=\"How many people live in Paris\")\n```\n\n Input question: How many people live in Paris\n Top-3 lexical search (BM25) hits\n \t16.277\tLive à Paris (English: \"Live in Paris\") is a live album by Canadian singer Céline Dion.\n \t15.173\tÎle-de-France is a region of France. The capital city is Paris. It is also the capital city of France. In 2013 about 12 million people lived in the region. About 2.1 million people live in the city of Paris.\n \t14.666\tGennevilliers is a town in France near Paris. It is in the region Île-de-France and the department of Hauts-de-Seine. About 41,000 people live there.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.999\tParis (nicknamed the \"\"City of light\"\") is the capital city of France, and the largest city in France. The area is , and around 2.15 million people live there. If suburbs are counted, the population of the Paris area rises to 12 million people.\n \t0.987\tÎle-de-France is a region of France. The capital city is Paris. It is also the capital city of France. In 2013 about 12 million people lived in the region. About 2.1 million people live in the city of Paris.\n \t0.602\tEssonne is a department to the south of Paris in the Île-de-France region. Its prefecture is Évry. About 1,172,000 people live there (2006 estimation).\n\n\n\n```python\nsearch(query=\"Who is the director of The Matrix?\")\n```\n\n Input question: Who is the director of The Matrix?\n Top-3 lexical search (BM25) hits\n \t16.253\tAn inverse matrix is a matrix that, when multiplied by another matrix, equals the identity matrix. For example:\n \t16.072\tis an identity matrix. There is exactly one identity matrix for each square dimension set. An identity matrix is special because when multiplying any matrix by the identity matrix, the result is always the original matrix with no change.\n \t15.353\tFirst, the system needs to be turned into an augmented matrix. In an augmented matrix, each linear equation becomes a row. On one side of the augmented matrix, the coefficients of each term in the linear equation become numbers in the matrix. On the other side of the augmented matrix are the constant terms each linear equation is equal to. For this system, the augmented matrix is:\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.995\tThe Matrix is a science fiction action movie that was made in 1999. It was written and directed by the Wachowski Brothers. The main actors in the movie are Keanu Reeves, Laurence Fishburne, Carrie-Anne Moss, and Hugo Weaving. \"The Matrix\" was followed by two sequels: \"The Matrix Reloaded\" and \"The Matrix Revolutions\".\n \t0.992\tHelmut Bakaitis (born 26 September 1944) is a German-born Australian director, actor and screenwriter. He is known for his role as The Architect in \"The Matrix\" movie series. Bakaitis was born in Lauban, Lower Silesia, Germany (now Lubań, Poland). Bakaitis started teaching directing at Australian Academy of Dramatic Art (AADA).\n \t0.804\tThe Matrix Revolutions is a 2003 movie that was written and directed by the Wachowski brothers. It is the sequel to \"The Matrix Reloaded\".", + "body": "[block:html]\n{\n \"html\": \"\\n\\n
\\n

Demo of Rerank

\\n
\\n\\n\"\n}\n[/block]\n\nIn the past months, we engineered a novel relevance endpoint that takes a query and a list of documents and predicts the relevance between the query and each document. \n\nIt can be used in a two-stage retrieval setup: First you take the user question, and retrieve the top-100 documents from your collection by either using lexical search or semantic search.\n\nYou then pass the question and these top-100 documents to our relevance-endpoint to get a score for each document. You can then rank these documents based on these scores.\n\nIn our benchmarks across 20 datasets, we **saw significant improvements compared to lexical and semantic search**, especially for use-cases where no training data is available.\n\nWe will demonstrate the rerank endpoint in this notebook.\n\n\n\n\n```python\n!pip install \"cohere<5\"\n```\n\n \u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0mRequirement already satisfied: cohere<5 in /opt/homebrew/lib/python3.9/site-packages (4.45)\n Requirement already satisfied: aiohttp<4.0,>=3.0 in /opt/homebrew/lib/python3.9/site-packages (from cohere<5) (3.8.1)\n Requirement already satisfied: backoff<3.0,>=2.0 in /opt/homebrew/lib/python3.9/site-packages (from cohere<5) (2.2.1)\n Requirement already satisfied: fastavro<2.0,>=1.8 in /opt/homebrew/lib/python3.9/site-packages (from cohere<5) (1.9.3)\n Requirement already satisfied: importlib_metadata<7.0,>=6.0 in /opt/homebrew/lib/python3.9/site-packages (from cohere<5) (6.6.0)\n Requirement already satisfied: requests<3.0.0,>=2.25.0 in /Users/elliottchoi/Library/Python/3.9/lib/python/site-packages (from cohere<5) (2.28.2)\n Requirement already satisfied: urllib3<3,>=1.26 in /Users/elliottchoi/Library/Python/3.9/lib/python/site-packages (from cohere<5) (1.26.14)\n Requirement already satisfied: attrs>=17.3.0 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (22.1.0)\n Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (2.0.12)\n Requirement already satisfied: multidict<7.0,>=4.5 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (6.0.2)\n Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (4.0.2)\n Requirement already satisfied: yarl<2.0,>=1.0 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (1.8.1)\n Requirement already satisfied: frozenlist>=1.1.1 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (1.3.1)\n Requirement already satisfied: aiosignal>=1.1.2 in /opt/homebrew/lib/python3.9/site-packages (from aiohttp<4.0,>=3.0->cohere<5) (1.2.0)\n Requirement already satisfied: zipp>=0.5 in /opt/homebrew/lib/python3.9/site-packages (from importlib_metadata<7.0,>=6.0->cohere<5) (3.15.0)\n Requirement already satisfied: idna<4,>=2.5 in /Users/elliottchoi/Library/Python/3.9/lib/python/site-packages (from requests<3.0.0,>=2.25.0->cohere<5) (3.4)\n Requirement already satisfied: certifi>=2017.4.17 in /Users/elliottchoi/Library/Python/3.9/lib/python/site-packages (from requests<3.0.0,>=2.25.0->cohere<5) (2022.12.7)\n \u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0m\n\n\n```python\nimport cohere\nimport requests\nimport numpy as np\nfrom time import time\nfrom typing import List\nfrom pprint import pprint\n```\n\n\n```python\nAPI_KEY = \"\"\nco = cohere.Client(API_KEY)\nMODEL_NAME = \"rerank-english-v3.0\" # another option is rerank-multilingual-02\n\nquery = \"What is the capital of the United States?\"\ndocs = [\n \"Carson City is the capital city of the American state of Nevada. At the 2010 United States Census, Carson City had a population of 55,274.\",\n \"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.\",\n \"Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.\",\n \"Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.\",\n \"West Virginia is a state in the Appalachian region of the United States. Its capital and largest city is Charleston. It is often abbreviated W. Va. or simply WV.\",\n \"Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.\",\n \"North Dakota is a state in the United States. 672,591 people lived in North Dakota in the year 2010. The capital and seat of government is Bismarck.\",\n \"Kentucky is a state in the United States. Its capital is Frankfort. It touches the states of Missouri (by the Mississippi River), Illinois, Indiana, Ohio, West Virginia (by the Ohio River), Tennessee and Virginia. There are many rivers in Kentucky\",\n \"Micronesia, officially the Federated States of Micronesia, is an island nation in the Pacific Ocean, northeast of Papua New Guinea. The country is a sovereign state in free association with the United States. The capital city of Federated States of Micronesia is Palikir.\",\n \"Utah is a state in the west United States. The capital and largest city is Salt Lake City. Utah became a state in the U.S. on January 4, 1896.\"]\n```\n\n## Using the Endpoint\nIn the following cell we will call rerank to rank `docs` based on how relevant they are with `query`.\n\n\n\n\n```python\nresults = co.rerank(query=query, model=MODEL_NAME, documents=docs, top_n=3) # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.\nfor idx, r in enumerate(results):\n print(f\"Document Rank: {idx + 1}, Document Index: {r.index}\")\n print(f\"Document: {r.document['text']}\")\n print(f\"Relevance Score: {r.relevance_score:.2f}\")\n print(\"\\n\")\n```\n\n Document Rank: 1, Document Index: 3\n Document: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.\n Relevance Score: 1.00\n \n \n Document Rank: 2, Document Index: 5\n Document: Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.\n Relevance Score: 0.75\n \n \n Document Rank: 3, Document Index: 1\n Document: The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.\n Relevance Score: 0.09\n \n \n\n\n## Search on Wikipedia - End2end demo\nThe following is an example how to use this model end-to-end to search over the Simple English Wikipedia, which consists of about 500k passages. \n\nWe use BM25 lexical search to retrieve the top-100 passages matching the query and then send these 100 passages and the query to our rerank endpoint to get a re-ranked list. We output the top-3 hits according to BM25 lexical search (as used by e.g. Elasticsearch) and the re-ranked list from our endpoint.\n\n\n\n```python\n!pip install -U rank_bm25\n```\n\n \u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0mCollecting rank_bm25\n Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)\n Requirement already satisfied: numpy in /opt/homebrew/lib/python3.9/site-packages (from rank_bm25) (1.23.5)\n Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)\n Installing collected packages: rank_bm25\n \u001b[33m DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0m\u001b[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621\u001b[0m\u001b[33m\n \u001b[0mSuccessfully installed rank_bm25-0.2.2\n\n\n\n```python\nimport json\nimport gzip\nimport os\nfrom rank_bm25 import BM25Okapi\nfrom sklearn.feature_extraction import _stop_words\nimport string\nfrom tqdm.autonotebook import tqdm\n```\n\n /var/folders/ww/ht8qwj2s7s799qnktblg6qhm0000gp/T/ipykernel_31832/1066443236.py:7: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n from tqdm.autonotebook import tqdm\n\n\n\n```python\n!wget http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz\n```\n\n --2024-04-08 14:28:00-- http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz\n Resolving sbert.net (sbert.net)... 172.64.80.1, 2606:4700:130:436c:6f75:6466:6c61:7265\n Connecting to sbert.net (sbert.net)|172.64.80.1|:80... connected.\n HTTP request sent, awaiting response... 301 Moved Permanently\n Location: https://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz [following]\n --2024-04-08 14:28:01-- https://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz\n Connecting to sbert.net (sbert.net)|172.64.80.1|:443... connected.\n HTTP request sent, awaiting response... 301 Moved Permanently\n Location: https://public.ukp.informatik.tu-darmstadt.de/reimers/sentence-transformers/datasets/simplewiki-2020-11-01.jsonl.gz [following]\n --2024-04-08 14:28:01-- https://public.ukp.informatik.tu-darmstadt.de/reimers/sentence-transformers/datasets/simplewiki-2020-11-01.jsonl.gz\n Resolving public.ukp.informatik.tu-darmstadt.de (public.ukp.informatik.tu-darmstadt.de)... 130.83.167.186\n Connecting to public.ukp.informatik.tu-darmstadt.de (public.ukp.informatik.tu-darmstadt.de)|130.83.167.186|:443... connected.\n HTTP request sent, awaiting response... 200 OK\n Length: 50223724 (48M) [application/octet-stream]\n Saving to: ‘simplewiki-2020-11-01.jsonl.gz’\n \n simplewiki-2020-11- 100%[===================>] 47.90M 5.78MB/s in 8.9s \n \n 2024-04-08 14:28:11 (5.37 MB/s) - ‘simplewiki-2020-11-01.jsonl.gz’ saved [50223724/50223724]\n \n\n\n\n```python\nwikipedia_filepath = 'simplewiki-2020-11-01.jsonl.gz'\n\npassages = []\nwith gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:\n for line in fIn:\n data = json.loads(line.strip())\n passages.extend(data['paragraphs'])\n\nprint(\"Passages:\", len(passages))\n```\n\n Passages: 509663\n\n\n\n```python\nprint(passages[0], passages[1])\n```\n\n Ted Cassidy (July 31, 1932 - January 16, 1979) was an American actor. He was best known for his roles as Lurch and Thing on \"The Addams Family\". Aileen Carol Wuornos Pralle (born Aileen Carol Pittman; February 29, 1956 – October 9, 2002) was an American serial killer. She was born in Rochester, Michigan. She confessed to killing six men in Florida and was executed in Florida State Prison by lethal injection for the murders. Wuornos said that the men she killed had raped her or tried to rape her while she was working as a prostitute.\n\n\n\n```python\n\ndef bm25_tokenizer(text):\n tokenized_doc = []\n for token in text.lower().split():\n token = token.strip(string.punctuation)\n\n if len(token) > 0 and token not in _stop_words.ENGLISH_STOP_WORDS:\n tokenized_doc.append(token)\n return tokenized_doc\n\n\ntokenized_corpus = []\nfor passage in tqdm(passages):\n tokenized_corpus.append(bm25_tokenizer(passage))\n\nbm25 = BM25Okapi(tokenized_corpus)\n```\n\n 100%|██████████| 509663/509663 [00:09<00:00, 51180.82it/s]\n\n\n\n```python\n\ndef search(query, top_k=3, num_candidates=100):\n print(\"Input question:\", query)\n\n ##### BM25 search (lexical search) #####\n bm25_scores = bm25.get_scores(bm25_tokenizer(query))\n top_n = np.argpartition(bm25_scores, -num_candidates)[-num_candidates:]\n bm25_hits = [{'corpus_id': idx, 'score': bm25_scores[idx]} for idx in top_n]\n bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True)\n \n print(f\"Top-3 lexical search (BM25) hits\")\n for hit in bm25_hits[0:top_k]:\n print(\"\\t{:.3f}\\t{}\".format(hit['score'], passages[hit['corpus_id']].replace(\"\\n\", \" \")))\n\n \n #Add re-ranking\n docs = [passages[hit['corpus_id']] for hit in bm25_hits]\n \n print(f\"\\nTop-3 hits by rank-API ({len(bm25_hits)} BM25 hits re-ranked)\")\n results = co.rerank(query=query, model=MODEL_NAME, documents=docs, top_n=top_k)\n for hit in results:\n print(\"\\t{:.3f}\\t{}\".format(hit.relevance_score, hit.document[\"text\"].replace(\"\\n\", \" \")))\n```\n\n\n```python\nsearch(query = \"What is the capital of the United States?\")\n```\n\n Input question: What is the capital of the United States?\n Top-3 lexical search (BM25) hits\n \t16.264\tCapital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.\n \t15.124\tIn 1783, it was the capital of the United States for a few months.\n \t14.476\tNew York was the capital of the United States under the Articles of Confederation from 1785 to 1788. When the US Constitution was made, it stayed as the capital from 1789 until 1790. In 1789, the first President of the United States, George Washington, was inaugurated; the first United States Congress and the Supreme Court of the United States each met for the first time, and the United States Bill of Rights was written, all at Federal Hall on Wall Street. By 1790, New York grew bigger than Philadelphia, so it become the biggest city in the United States. By the end of 1790, because of the Residence Act, Philadelphia became the new capital.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.999\tWashington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.\n \t0.994\tNew York was the capital of the United States under the Articles of Confederation from 1785 to 1788. When the US Constitution was made, it stayed as the capital from 1789 until 1790. In 1789, the first President of the United States, George Washington, was inaugurated; the first United States Congress and the Supreme Court of the United States each met for the first time, and the United States Bill of Rights was written, all at Federal Hall on Wall Street. By 1790, New York grew bigger than Philadelphia, so it become the biggest city in the United States. By the end of 1790, because of the Residence Act, Philadelphia became the new capital.\n \t0.993\tAs the national capital of the United States, Washington, D.C. has numerous media outlets in various mediums. Some of these media are known throughout the United States, including \"The Washington Post\" and various broadcasting networks headquartered in D.C.\n\n\n\n```python\nsearch(query = \"Number countries Europe\")\n```\n\n Input question: Number countries Europe\n Top-3 lexical search (BM25) hits\n \t16.963\tECoHR' has a number of judges. The number of judges is seven normally but at the case of dealing a great issue, the number will be 21 and the judges are equally from member countries of the Council of Europe. At present, there are forty seven member countries of the Council of Europe. Each country may have one judge in the ECoHR. But, judges work independently for the ECoHR, and not for their country.\n \t14.560\tMost countries in Europe, and a few countries in Asia, have made some or all synthetic cannabinoids illegal.\n \t14.165\tMany of these countries were members of the Western European Union. Many, such as Norway, are also in Northern Europe or in Central Europe or Southern Europe.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.997\tThere are at least 43 countries in Europe (the European identities of 5 transcontinental countries:Cyprus, Georgia, Kazakhstan, Russia and Turkey are disputed). Most of these countries are members of the European Union.\n \t0.987\tWithin these regions, there are up to 48 independent European countries (with the identities of 5 transcontinental countries being disputed). The largest is the Russian Federation, which covers 39% of Europe.\n \t0.981\tEurope, the planet's 6th largest continent, includes 47 countries and assorted dependencies, islands and territories.\n\n\n\n```python\nsearch(query = \"Elon Musk year birth\")\n```\n\n Input question: Elon Musk year birth\n Top-3 lexical search (BM25) hits\n \t22.568\tTesla, Inc. is a company based in Palo Alto, California which makes electric cars. It was started in 2003 by Martin Eberhard, Dylan Stott, and Elon Musk (who also co-founded PayPal and SpaceX and is the CEO of SpaceX). Eberhard no longer works there. Today, Elon Musk is the Chief Executive Officer (CEO). It started selling its first car, the Roadster in 2008.\n \t20.492\tElon Musk complained via Twitter about Los Angeles traffic and the same day, December 17, 2016, founded the company. It built a short test tunnel in Los Angeles.\n \t20.448\tAt the end of 2016, Musk founded The Boring Company which focuses on tunnelling and infrastructure. He mentioned Los Angeles traffic as the reason for starting this company. In March 2017 Elon Musk announced he has started another company which aims to merge human brains and computers, it is called Neuralink.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.994\tElon Reeve Musk (born June 28, 1971) is a businessman and philanthropist. He was born in South Africa. He moved to Canada and later became an American citizen. Musk is the current CEO & Chief Product Architect of Tesla Motors, a company that makes electric vehicles. He is also the CEO of Solar City, a company that makes solar panels, and the CEO & CTO of SpaceX, an aerospace company. In August 2020, Bloomberg ranked Musk third among the richest people on the planet with net worth to be $115.4 billion.\n \t0.602\tElon Musk and his brother started Zip2, a software company, in 1995. In 1999 he sold it and became a millionaire. He then started X.com, which merged with the company to make PayPal. X.com was then renamed to PayPal, and he focused on growing that part of the company. He then started SpaceX and became the CEO of Tesla.\n \t0.474\tIn early 2002, Musk was seeking workers for his new space company, soon to be named SpaceX. Musk found a rocket engineer Tom Mueller (later SpaceX's CTO of Propulsion). He agreed to work for Musk. That was how SpaceX was born. The first headquarters of SpaceX was in a warehouse in El Segundo, California. The company has grown rapidly since it was founded in 2002, growing from 160 workers in November 2005 to 1,100 in 2010, 3,800 workers and contractors by October 2013, nearly 5,000 by late 2015, and about 6,000 in April 2017.\n\n\n\n```python\nsearch(query = \"Which US president was killed?\")\n```\n\n Input question: Which US president was killed?\n Top-3 lexical search (BM25) hits\n \t11.966\tHe came into office when the previous president, Cyprien Ntaryamira, was killed in a plane crash. It was an assassination in which the Rwandan president Juvénal Habyarimana was also killed. Ntibantunganya left office when he was deposed by Pierre Buyoya in a military coup of 1996.\n \t11.697\tBurr killed Alexander Hamilton in a duel in 1804, when Burr was still Vice President.\n \t11.482\tAfter President James A. Garfield died, vice-president Chester Arthur replaced him. The man who killed him expected the new President to pardon him. This did not happen.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.984\tJames Abram Garfield (November 19, 1831 - September 19, 1881) was the 20th (1881) President of the United States and the 2nd President to be assassinated (killed while in office). President Garfield was in office from March to September of 1881. He was in office for a total of six months and fifteen days. For almost half that time he was bedridden as a result of an attempt to kill him. He was shot on July 2 and finally died in September the same year he got into office.\n \t0.976\tPresident William McKinley was killed by anarchist Leon Czolgosz because Czolgosz believed president McKinley was against good working people, he considered McKinley responsible for falsifying the reasons for the war, and approving and waging an illegal, devastating Philippines war.\n \t0.916\tOn the night that President Abraham Lincoln was killed, someone also tried to kill Seward. For the rest of his life, Seward had scars on his face from the attack. Later, the man who attacked him was caught and put to death.\n\n\n\n```python\nsearch(query=\"When is Chinese New Year\")\n```\n\n Input question: When is Chinese New Year\n Top-3 lexical search (BM25) hits\n \t18.606\tToday in China the Gregorian calendar is used for most activities. At the same time, the Chinese calendar is still used for traditional Chinese holidays like Chinese New Year or Lunar New Year.\n \t18.151\tBefore that, the holiday was usually just called the \"NewYear\". Because the traditional Chinese calendar is mostly based on the changes in the moon, the Chinese New Year is also known in English as the \"Lunar New Year\" or \"Chinese Lunar New Year\". This name comes from \"Luna\", an old Latin name for the moon. The Indonesian name for the holiday is Imlek, which comes from the Hokkien word for the old Chinese calendar and is therefore also like saying \"Lunar New Year\".\n \t18.011\tSpring Festival is the Chinese New Year.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.999\tChinese New Year, known in China as the SpringFestival and in Singapore as the LunarNewYear, is a holiday on and around the new moon on the first day of the year in the traditional Chinese calendar. This calendar is based on the changes in the moon and is only sometimes changed to fit the seasons of the year based on how the Earth moves around the sun. Because of this, Chinese New Year is never on January1. It moves around between January21 and February20.\n \t0.997\tChinese New Year always starts on a new moon, when the Moon is between the Earth and Sun and it looks all dark in the night sky. Because new moons happen about every 29.53 days but the year set by Pope GregoryXIII is 365.2425 days long, the Chinese holiday moves to different days each year. The Chinese calendar adds a 13th month every so often to keep the seasons in the right place, so the first day of the new year always happens between January21 and February20 on the 2nd or 3rd new moon after the 1st day of winter. The chart on the right gives the day of each Chinese New Year from 1996 to 2031.\n \t0.996\tChinese New Year lasts fifteen days, including one week as a national holiday. It starts with the first day of the Chinese lunar year and ends with the full moon fifteen days later. It is always in the middle of winter, but is called the Spring Festival in Chinese because Chinese seasons are a little different from English ones. On the first day of the Chinese New Year, people call on friends and relatives. Because most people watch the special performances on CCTV all the night on New Year's Eve and don't go to bed until 12:00 AM, they usually get up later in the next day. The fifth day of the Chinese New Year is the day to welcome the god of Wealth (Chinese:财神爷), many people make and eat dumplings (Chinese:饺子. Pinyin: Jaozi). They believe that dumplings can hold the god of Wealth and bring luck. The last day of the Chinese New Year is the Lantern Festival. On this day, the moon becomes the full moon. People go out and watch the lantern festivals everywhere. After that, they eat sweet dumpling (Chinese:汤圆,元宵), a kind of dumpling which is round and looks like the full moon.\n\n\n\n```python\nsearch(query=\"How many people live in Paris\")\n```\n\n Input question: How many people live in Paris\n Top-3 lexical search (BM25) hits\n \t16.277\tLive à Paris (English: \"Live in Paris\") is a live album by Canadian singer Céline Dion.\n \t15.173\tÎle-de-France is a region of France. The capital city is Paris. It is also the capital city of France. In 2013 about 12 million people lived in the region. About 2.1 million people live in the city of Paris.\n \t14.666\tGennevilliers is a town in France near Paris. It is in the region Île-de-France and the department of Hauts-de-Seine. About 41,000 people live there.\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.999\tParis (nicknamed the \"\"City of light\"\") is the capital city of France, and the largest city in France. The area is , and around 2.15 million people live there. If suburbs are counted, the population of the Paris area rises to 12 million people.\n \t0.987\tÎle-de-France is a region of France. The capital city is Paris. It is also the capital city of France. In 2013 about 12 million people lived in the region. About 2.1 million people live in the city of Paris.\n \t0.602\tEssonne is a department to the south of Paris in the Île-de-France region. Its prefecture is Évry. About 1,172,000 people live there (2006 estimation).\n\n\n\n```python\nsearch(query=\"Who is the director of The Matrix?\")\n```\n\n Input question: Who is the director of The Matrix?\n Top-3 lexical search (BM25) hits\n \t16.253\tAn inverse matrix is a matrix that, when multiplied by another matrix, equals the identity matrix. For example:\n \t16.072\tis an identity matrix. There is exactly one identity matrix for each square dimension set. An identity matrix is special because when multiplying any matrix by the identity matrix, the result is always the original matrix with no change.\n \t15.353\tFirst, the system needs to be turned into an augmented matrix. In an augmented matrix, each linear equation becomes a row. On one side of the augmented matrix, the coefficients of each term in the linear equation become numbers in the matrix. On the other side of the augmented matrix are the constant terms each linear equation is equal to. For this system, the augmented matrix is:\n \n Top-3 hits by rank-API (100 BM25 hits re-ranked)\n \t0.995\tThe Matrix is a science fiction action movie that was made in 1999. It was written and directed by the Wachowski Brothers. The main actors in the movie are Keanu Reeves, Laurence Fishburne, Carrie-Anne Moss, and Hugo Weaving. \"The Matrix\" was followed by two sequels: \"The Matrix Reloaded\" and \"The Matrix Revolutions\".\n \t0.992\tHelmut Bakaitis (born 26 September 1944) is a German-born Australian director, actor and screenwriter. He is known for his role as The Architect in \"The Matrix\" movie series. Bakaitis was born in Lauban, Lower Silesia, Germany (now Lubań, Poland). Bakaitis started teaching directing at Australian Academy of Dramatic Art (AADA).\n \t0.804\tThe Matrix Revolutions is a 2003 movie that was written and directed by the Wachowski brothers. It is the sequel to \"The Matrix Reloaded\".", "html": "", "htmlmode": false, "fullscreen": false, diff --git a/scripts/cookbooks-mdx/rerank-demo.mdx b/scripts/cookbooks-mdx/rerank-demo.mdx index ec2864ce..ea908330 100644 --- a/scripts/cookbooks-mdx/rerank-demo.mdx +++ b/scripts/cookbooks-mdx/rerank-demo.mdx @@ -132,7 +132,7 @@ slug: /page/rerank-demo } -In the past months, we engineered a novel relevance endpoint that takes a query and a list of documents and predicts the relevance between the query and each document. +In the past months, we engineered a novel relevance endpoint that takes a query and a list of documents and predicts the relevance between the query and each document. It can be used in a two-stage retrieval setup: First you take the user question, and retrieve the top-100 documents from your collection by either using lexical search or semantic search. @@ -193,7 +193,7 @@ docs = [ "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America.", "West Virginia is a state in the Appalachian region of the United States. Its capital and largest city is Charleston. It is often abbreviated W. Va. or simply WV.", - "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.", + "Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment.", "North Dakota is a state in the United States. 672,591 people lived in North Dakota in the year 2010. The capital and seat of government is Bismarck.", "Kentucky is a state in the United States. Its capital is Frankfort. It touches the states of Missouri (by the Mississippi River), Illinois, Indiana, Ohio, West Virginia (by the Ohio River), Tennessee and Virginia. There are many rivers in Kentucky", "Micronesia, officially the Federated States of Micronesia, is an island nation in the Pacific Ocean, northeast of Papua New Guinea. The country is a sovereign state in free association with the United States. The capital city of Federated States of Micronesia is Palikir.", @@ -218,22 +218,22 @@ for idx, r in enumerate(results): Document Rank: 1, Document Index: 3 Document: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America. Relevance Score: 1.00 - - + + Document Rank: 2, Document Index: 5 - Document: Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment. + Document: Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment. Relevance Score: 0.75 - - + + Document Rank: 3, Document Index: 1 Document: The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan. Relevance Score: 0.09 - - + + ## Search on Wikipedia - End2end demo -The following is an example how to use this model end-to-end to search over the Simple English Wikipedia, which consists of about 500k passages. +The following is an example how to use this model end-to-end to search over the Simple English Wikipedia, which consists of about 500k passages. We use BM25 lexical search to retrieve the top-100 passages matching the query and then send these 100 passages and the query to our rerank endpoint to get a re-ranked list. We output the top-3 hits according to BM25 lexical search (as used by e.g. Elasticsearch) and the re-ranked list from our endpoint. @@ -289,11 +289,11 @@ from tqdm.autonotebook import tqdm HTTP request sent, awaiting response... 200 OK Length: 50223724 (48M) [application/octet-stream] Saving to: ‘simplewiki-2020-11-01.jsonl.gz’ - - simplewiki-2020-11- 100%[===================>] 47.90M 5.78MB/s in 8.9s - + + simplewiki-2020-11- 100%[===================>] 47.90M 5.78MB/s in 8.9s + 2024-04-08 14:28:11 (5.37 MB/s) - ‘simplewiki-2020-11-01.jsonl.gz’ saved [50223724/50223724] - + @@ -354,15 +354,15 @@ def search(query, top_k=3, num_candidates=100): top_n = np.argpartition(bm25_scores, -num_candidates)[-num_candidates:] bm25_hits = [{'corpus_id': idx, 'score': bm25_scores[idx]} for idx in top_n] bm25_hits = sorted(bm25_hits, key=lambda x: x['score'], reverse=True) - + print(f"Top-3 lexical search (BM25) hits") for hit in bm25_hits[0:top_k]: print("\t{:.3f}\t{}".format(hit['score'], passages[hit['corpus_id']].replace("\n", " "))) - + #Add re-ranking docs = [passages[hit['corpus_id']] for hit in bm25_hits] - + print(f"\nTop-3 hits by rank-API ({len(bm25_hits)} BM25 hits re-ranked)") results = co.rerank(query=query, model=MODEL_NAME, documents=docs, top_n=top_k) for hit in results: @@ -376,10 +376,10 @@ search(query = "What is the capital of the United States?") Input question: What is the capital of the United States? Top-3 lexical search (BM25) hits - 16.264 Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment. + 16.264 Capital punishment has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. The federal government (including the United States military) also uses capital punishment. 15.124 In 1783, it was the capital of the United States for a few months. 14.476 New York was the capital of the United States under the Articles of Confederation from 1785 to 1788. When the US Constitution was made, it stayed as the capital from 1789 until 1790. In 1789, the first President of the United States, George Washington, was inaugurated; the first United States Congress and the Supreme Court of the United States each met for the first time, and the United States Bill of Rights was written, all at Federal Hall on Wall Street. By 1790, New York grew bigger than Philadelphia, so it become the biggest city in the United States. By the end of 1790, because of the Residence Act, Philadelphia became the new capital. - + Top-3 hits by rank-API (100 BM25 hits re-ranked) 0.999 Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. The President of the USA and many major national government offices are in the territory. This makes it the political center of the United States of America. 0.994 New York was the capital of the United States under the Articles of Confederation from 1785 to 1788. When the US Constitution was made, it stayed as the capital from 1789 until 1790. In 1789, the first President of the United States, George Washington, was inaugurated; the first United States Congress and the Supreme Court of the United States each met for the first time, and the United States Bill of Rights was written, all at Federal Hall on Wall Street. By 1790, New York grew bigger than Philadelphia, so it become the biggest city in the United States. By the end of 1790, because of the Residence Act, Philadelphia became the new capital. @@ -396,7 +396,7 @@ search(query = "Number countries Europe") 16.963 ECoHR' has a number of judges. The number of judges is seven normally but at the case of dealing a great issue, the number will be 21 and the judges are equally from member countries of the Council of Europe. At present, there are forty seven member countries of the Council of Europe. Each country may have one judge in the ECoHR. But, judges work independently for the ECoHR, and not for their country. 14.560 Most countries in Europe, and a few countries in Asia, have made some or all synthetic cannabinoids illegal. 14.165 Many of these countries were members of the Western European Union. Many, such as Norway, are also in Northern Europe or in Central Europe or Southern Europe. - + Top-3 hits by rank-API (100 BM25 hits re-ranked) 0.997 There are at least 43 countries in Europe (the European identities of 5 transcontinental countries:Cyprus, Georgia, Kazakhstan, Russia and Turkey are disputed). Most of these countries are members of the European Union. 0.987 Within these regions, there are up to 48 independent European countries (with the identities of 5 transcontinental countries being disputed). The largest is the Russian Federation, which covers 39% of Europe. @@ -413,7 +413,7 @@ search(query = "Elon Musk year birth") 22.568 Tesla, Inc. is a company based in Palo Alto, California which makes electric cars. It was started in 2003 by Martin Eberhard, Dylan Stott, and Elon Musk (who also co-founded PayPal and SpaceX and is the CEO of SpaceX). Eberhard no longer works there. Today, Elon Musk is the Chief Executive Officer (CEO). It started selling its first car, the Roadster in 2008. 20.492 Elon Musk complained via Twitter about Los Angeles traffic and the same day, December 17, 2016, founded the company. It built a short test tunnel in Los Angeles. 20.448 At the end of 2016, Musk founded The Boring Company which focuses on tunnelling and infrastructure. He mentioned Los Angeles traffic as the reason for starting this company. In March 2017 Elon Musk announced he has started another company which aims to merge human brains and computers, it is called Neuralink. - + Top-3 hits by rank-API (100 BM25 hits re-ranked) 0.994 Elon Reeve Musk (born June 28, 1971) is a businessman and philanthropist. He was born in South Africa. He moved to Canada and later became an American citizen. Musk is the current CEO & Chief Product Architect of Tesla Motors, a company that makes electric vehicles. He is also the CEO of Solar City, a company that makes solar panels, and the CEO & CTO of SpaceX, an aerospace company. In August 2020, Bloomberg ranked Musk third among the richest people on the planet with net worth to be $115.4 billion. 0.602 Elon Musk and his brother started Zip2, a software company, in 1995. In 1999 he sold it and became a millionaire. He then started X.com, which merged with the company to make PayPal. X.com was then renamed to PayPal, and he focused on growing that part of the company. He then started SpaceX and became the CEO of Tesla. @@ -430,7 +430,7 @@ search(query = "Which US president was killed?") 11.966 He came into office when the previous president, Cyprien Ntaryamira, was killed in a plane crash. It was an assassination in which the Rwandan president Juvénal Habyarimana was also killed. Ntibantunganya left office when he was deposed by Pierre Buyoya in a military coup of 1996. 11.697 Burr killed Alexander Hamilton in a duel in 1804, when Burr was still Vice President. 11.482 After President James A. Garfield died, vice-president Chester Arthur replaced him. The man who killed him expected the new President to pardon him. This did not happen. - + Top-3 hits by rank-API (100 BM25 hits re-ranked) 0.984 James Abram Garfield (November 19, 1831 - September 19, 1881) was the 20th (1881) President of the United States and the 2nd President to be assassinated (killed while in office). President Garfield was in office from March to September of 1881. He was in office for a total of six months and fifteen days. For almost half that time he was bedridden as a result of an attempt to kill him. He was shot on July 2 and finally died in September the same year he got into office. 0.976 President William McKinley was killed by anarchist Leon Czolgosz because Czolgosz believed president McKinley was against good working people, he considered McKinley responsible for falsifying the reasons for the war, and approving and waging an illegal, devastating Philippines war. @@ -447,7 +447,7 @@ search(query="When is Chinese New Year") 18.606 Today in China the Gregorian calendar is used for most activities. At the same time, the Chinese calendar is still used for traditional Chinese holidays like Chinese New Year or Lunar New Year. 18.151 Before that, the holiday was usually just called the "NewYear". Because the traditional Chinese calendar is mostly based on the changes in the moon, the Chinese New Year is also known in English as the "Lunar New Year" or "Chinese Lunar New Year". This name comes from "Luna", an old Latin name for the moon. The Indonesian name for the holiday is Imlek, which comes from the Hokkien word for the old Chinese calendar and is therefore also like saying "Lunar New Year". 18.011 Spring Festival is the Chinese New Year. - + Top-3 hits by rank-API (100 BM25 hits re-ranked) 0.999 Chinese New Year, known in China as the SpringFestival and in Singapore as the LunarNewYear, is a holiday on and around the new moon on the first day of the year in the traditional Chinese calendar. This calendar is based on the changes in the moon and is only sometimes changed to fit the seasons of the year based on how the Earth moves around the sun. Because of this, Chinese New Year is never on January1. It moves around between January21 and February20. 0.997 Chinese New Year always starts on a new moon, when the Moon is between the Earth and Sun and it looks all dark in the night sky. Because new moons happen about every 29.53 days but the year set by Pope GregoryXIII is 365.2425 days long, the Chinese holiday moves to different days each year. The Chinese calendar adds a 13th month every so often to keep the seasons in the right place, so the first day of the new year always happens between January21 and February20 on the 2nd or 3rd new moon after the 1st day of winter. The chart on the right gives the day of each Chinese New Year from 1996 to 2031. @@ -464,7 +464,7 @@ search(query="How many people live in Paris") 16.277 Live à Paris (English: "Live in Paris") is a live album by Canadian singer Céline Dion. 15.173 Île-de-France is a region of France. The capital city is Paris. It is also the capital city of France. In 2013 about 12 million people lived in the region. About 2.1 million people live in the city of Paris. 14.666 Gennevilliers is a town in France near Paris. It is in the region Île-de-France and the department of Hauts-de-Seine. About 41,000 people live there. - + Top-3 hits by rank-API (100 BM25 hits re-ranked) 0.999 Paris (nicknamed the ""City of light"") is the capital city of France, and the largest city in France. The area is , and around 2.15 million people live there. If suburbs are counted, the population of the Paris area rises to 12 million people. 0.987 Île-de-France is a region of France. The capital city is Paris. It is also the capital city of France. In 2013 about 12 million people lived in the region. About 2.1 million people live in the city of Paris. @@ -481,7 +481,7 @@ search(query="Who is the director of The Matrix?") 16.253 An inverse matrix is a matrix that, when multiplied by another matrix, equals the identity matrix. For example: 16.072 is an identity matrix. There is exactly one identity matrix for each square dimension set. An identity matrix is special because when multiplying any matrix by the identity matrix, the result is always the original matrix with no change. 15.353 First, the system needs to be turned into an augmented matrix. In an augmented matrix, each linear equation becomes a row. On one side of the augmented matrix, the coefficients of each term in the linear equation become numbers in the matrix. On the other side of the augmented matrix are the constant terms each linear equation is equal to. For this system, the augmented matrix is: - + Top-3 hits by rank-API (100 BM25 hits re-ranked) 0.995 The Matrix is a science fiction action movie that was made in 1999. It was written and directed by the Wachowski Brothers. The main actors in the movie are Keanu Reeves, Laurence Fishburne, Carrie-Anne Moss, and Hugo Weaving. "The Matrix" was followed by two sequels: "The Matrix Reloaded" and "The Matrix Revolutions". 0.992 Helmut Bakaitis (born 26 September 1944) is a German-born Australian director, actor and screenwriter. He is known for his role as The Architect in "The Matrix" movie series. Bakaitis was born in Lauban, Lower Silesia, Germany (now Lubań, Poland). Bakaitis started teaching directing at Australian Academy of Dramatic Art (AADA). diff --git a/snippets/curl/rerank-post.sh b/snippets/curl/rerank-post.sh index 11dead44..f2afc9aa 100644 --- a/snippets/curl/rerank-post.sh +++ b/snippets/curl/rerank-post.sh @@ -11,5 +11,5 @@ curl --request POST \ "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."] + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."] }' \ No newline at end of file diff --git a/snippets/java/app/src/main/java/RerankPost.java b/snippets/java/app/src/main/java/RerankPost.java index 49d74834..c04b6d81 100644 --- a/snippets/java/app/src/main/java/RerankPost.java +++ b/snippets/java/app/src/main/java/RerankPost.java @@ -15,7 +15,7 @@ public static void main(String[] args) { RerankRequestDocumentsItem.of("The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan."), RerankRequestDocumentsItem.of("Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages."), RerankRequestDocumentsItem.of("Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district."), - RerankRequestDocumentsItem.of("Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.") + RerankRequestDocumentsItem.of("Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.") )).model("rerank-english-v3.0").topN(3).build()); System.out.println(response); diff --git a/snippets/node/rerank-post.js b/snippets/node/rerank-post.js index 3dcbe73c..3176f0cc 100644 --- a/snippets/node/rerank-post.js +++ b/snippets/node/rerank-post.js @@ -1,7 +1,7 @@ import { CohereClient } from 'cohere-ai'; const cohere = new CohereClient({ - + }); (async () => { @@ -18,7 +18,7 @@ const cohere = new CohereClient({ text: 'Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.', }, { - text: 'Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.', + text: 'Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.', }, ], query: 'What is the capital of the United States?', diff --git a/snippets/node/rerank-post.ts b/snippets/node/rerank-post.ts index 497dc675..c4e8e48f 100644 --- a/snippets/node/rerank-post.ts +++ b/snippets/node/rerank-post.ts @@ -18,7 +18,7 @@ const cohere = new CohereClient({ text: 'Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.', }, { - text: 'Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.', + text: 'Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.', }, ], query: 'What is the capital of the United States?', diff --git a/snippets/python-async/rerank-post.py b/snippets/python-async/rerank-post.py index 8dcd9e0b..09c8d479 100644 --- a/snippets/python-async/rerank-post.py +++ b/snippets/python-async/rerank-post.py @@ -8,7 +8,7 @@ "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", ] diff --git a/snippets/python/rerank-post.py b/snippets/python/rerank-post.py index 5150cadb..aa93d115 100644 --- a/snippets/python/rerank-post.py +++ b/snippets/python/rerank-post.py @@ -7,7 +7,7 @@ "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", ] response = co.rerank( diff --git a/snippets/snippets/curl/rerank-post.sh b/snippets/snippets/curl/rerank-post.sh index 11dead44..71d0d485 100644 --- a/snippets/snippets/curl/rerank-post.sh +++ b/snippets/snippets/curl/rerank-post.sh @@ -4,12 +4,12 @@ curl --request POST \ --header 'content-type: application/json' \ --header "Authorization: bearer $CO_API_KEY" \ --data '{ - "model": "rerank-english-v3.0", + "model": "rerank-v3.5", "query": "What is the capital of the United States?", "top_n": 3, "documents": ["Carson City is the capital city of the American state of Nevada.", "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."] + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states."] }' \ No newline at end of file diff --git a/snippets/snippets/curl/rerank-v2-post.sh b/snippets/snippets/curl/rerank-v2-post.sh index c1810cb1..52b28bcf 100644 --- a/snippets/snippets/curl/rerank-v2-post.sh +++ b/snippets/snippets/curl/rerank-v2-post.sh @@ -4,7 +4,7 @@ curl --request POST \ --header 'content-type: application/json' \ --header "Authorization: bearer $CO_API_KEY" \ --data '{ - "model": "rerank-english-v3.0", + "model": "rerank-v3.5", "query": "What is the capital of the United States?", "top_n": 3, "documents": ["Carson City is the capital city of the American state of Nevada.", diff --git a/snippets/snippets/go/rerank-post/main.go b/snippets/snippets/go/rerank-post/main.go index 78f5e2d8..d8007a61 100644 --- a/snippets/snippets/go/rerank-post/main.go +++ b/snippets/snippets/go/rerank-post/main.go @@ -21,7 +21,7 @@ func main() { {String: "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages."}, {String: "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district."}, }, - Model: cohere.String("rerank-english-v3.0"), + Model: cohere.String("rerank-v3.5"), }, ) diff --git a/snippets/snippets/java/app/src/main/java/RerankPost.java b/snippets/snippets/java/app/src/main/java/RerankPost.java index 11bd102a..d178619b 100644 --- a/snippets/snippets/java/app/src/main/java/RerankPost.java +++ b/snippets/snippets/java/app/src/main/java/RerankPost.java @@ -37,13 +37,13 @@ public static void main(String[] args) { + " capital of the United States. It is" + " a federal district."), RerankRequestDocumentsItem.of( - "Capital punishment (the death penalty) has" + "Capital punishment has" + " existed in the United States since" + " beforethe United States was a" + " country. As of 2017, capital" + " punishment is legal in 30 of the 50" + " states."))) - .model("rerank-english-v3.0") + .model("rerank-english-v3.5") .topN(3) .build()); diff --git a/snippets/snippets/java/app/src/main/java/RerankV2Post.java b/snippets/snippets/java/app/src/main/java/RerankV2Post.java index 8252d106..f47b365c 100644 --- a/snippets/snippets/java/app/src/main/java/RerankV2Post.java +++ b/snippets/snippets/java/app/src/main/java/RerankV2Post.java @@ -13,7 +13,7 @@ public static void main(String[] args) { .v2() .rerank( V2RerankRequest.builder() - .model("rerank-english-v3.0") + .model("rerank-v3.5") .query("What is the capital of the United States?") .documents( List.of( diff --git a/snippets/snippets/node/rerank-post.ts b/snippets/snippets/node/rerank-post.ts index 576990fb..30d2d5e8 100644 --- a/snippets/snippets/node/rerank-post.ts +++ b/snippets/snippets/node/rerank-post.ts @@ -16,12 +16,12 @@ const cohere = new CohereClient({}); text: 'Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.', }, { - text: 'Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.', + text: 'Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.', }, ], query: 'What is the capital of the United States?', topN: 3, - model: 'rerank-english-v3.0', + model: 'rerank-v3.5', }); console.log(rerank); diff --git a/snippets/snippets/node/rerank-v2-post.ts b/snippets/snippets/node/rerank-v2-post.ts index 948b9502..5c2bf077 100644 --- a/snippets/snippets/node/rerank-v2-post.ts +++ b/snippets/snippets/node/rerank-v2-post.ts @@ -13,7 +13,7 @@ const cohere = new CohereClient({}); ], query: 'What is the capital of the United States?', topN: 3, - model: 'rerank-english-v3.0', + model: 'rerank-v3.5', }); console.log(rerank); diff --git a/snippets/snippets/python-async/rerank-post.py b/snippets/snippets/python-async/rerank-post.py index 567937fa..dfc04a6d 100644 --- a/snippets/snippets/python-async/rerank-post.py +++ b/snippets/snippets/python-async/rerank-post.py @@ -3,23 +3,19 @@ co = cohere.AsyncClient() -docs = [ - "Carson City is the capital city of the American state of Nevada.", - "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", - "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", - "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", -] - - async def main(): response = await co.rerank( - model="rerank-english-v2.0", + model="rerank-v3.5", query="What is the capital of the United States?", - documents=docs, + documents=[ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", + ], top_n=3, ) print(response) - asyncio.run(main()) diff --git a/snippets/snippets/python-async/rerank-v2-post.py b/snippets/snippets/python-async/rerank-v2-post.py index b01240cf..ea972238 100644 --- a/snippets/snippets/python-async/rerank-v2-post.py +++ b/snippets/snippets/python-async/rerank-v2-post.py @@ -3,21 +3,18 @@ co = cohere.AsyncClientV2() -docs = [ - "Carson City is the capital city of the American state of Nevada.", - "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", - "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", - "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", - "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", -] - - async def main(): response = await co.rerank( - model="rerank-english-v2.0", + model="rerank-v3.5", query="What is the capital of the United States?", - documents=docs, - top_n=3, + documents=[ + "Carson City is the capital city of the American state of Nevada.", + "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", + "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", + "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", + ], + top_n=3 ) print(response) diff --git a/snippets/snippets/python/chat-v2-post/tools.py b/snippets/snippets/python/chat-v2-post/tools.py index 63a57e05..47c4085c 100644 --- a/snippets/snippets/python/chat-v2-post/tools.py +++ b/snippets/snippets/python/chat-v2-post/tools.py @@ -1,6 +1,6 @@ import cohere -co = cohere.Client() +co = cohere.ClientV2() response = co.chat( model="command-r-plus-08-2024", diff --git a/snippets/snippets/python/rerank-post.py b/snippets/snippets/python/rerank-post.py index b3be77bf..c6ef5c36 100644 --- a/snippets/snippets/python/rerank-post.py +++ b/snippets/snippets/python/rerank-post.py @@ -7,11 +7,11 @@ "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.", "Capitalization or capitalisation in English grammar is the use of a capital letter at the start of a word. English usage varies from capitalization in other languages.", "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district.", - "Capital punishment (the death penalty) has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", + "Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.", ] response = co.rerank( - model="rerank-english-v3.0", + model="rerank-v3.5", query="What is the capital of the United States?", documents=docs, top_n=3, diff --git a/snippets/snippets/python/rerank-v2-post.py b/snippets/snippets/python/rerank-v2-post.py index 4ac20f11..2b615fbb 100644 --- a/snippets/snippets/python/rerank-v2-post.py +++ b/snippets/snippets/python/rerank-v2-post.py @@ -11,7 +11,7 @@ ] response = co.rerank( - model="rerank-english-v3.0", + model="rerank-v3.5", query="What is the capital of the United States?", documents=docs, top_n=3, diff --git a/snippets/snippets/requests/rerank-post.yaml b/snippets/snippets/requests/rerank-post.yaml index 36cbc39c..ca16a86c 100644 --- a/snippets/snippets/requests/rerank-post.yaml +++ b/snippets/snippets/requests/rerank-post.yaml @@ -8,9 +8,9 @@ documents: - text: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. - - text: Capital punishment (the death penalty) has existed in the United States + - text: Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. query: What is the capital of the United States? top_n: 3 -model: rerank-english-v3.0 +model: rerank-v3.5 diff --git a/snippets/snippets/requests/rerank-v2-post.yaml b/snippets/snippets/requests/rerank-v2-post.yaml index e89dc6e1..ffb408b6 100644 --- a/snippets/snippets/requests/rerank-v2-post.yaml +++ b/snippets/snippets/requests/rerank-v2-post.yaml @@ -8,9 +8,9 @@ documents: - Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. - - Capital punishment (the death penalty) has existed in the United States + - Capital punishment has existed in the United States since beforethe United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states. query: What is the capital of the United States? top_n: 3 -model: rerank-english-v3.0 +model: rerank-v3.5