From 373b29b521cd73fa75cadfbf291b8a5d38e94af9 Mon Sep 17 00:00:00 2001 From: Max Shkutnyk Date: Wed, 18 Dec 2024 19:11:31 +0200 Subject: [PATCH 1/3] Auto-format code snippets in text-generation connectors (#310) Co-authored-by: Max Shkutnyk --- .../connectors/connector-authentication.mdx | 121 +++++++++++------- .../creating-and-deploying-a-connector.mdx | 33 +++-- .../connectors/managing-your-connector.mdx | 25 ++-- .../text-generation/connectors/overview-1.mdx | 13 +- 4 files changed, 119 insertions(+), 73 deletions(-) diff --git a/fern/pages/text-generation/connectors/connector-authentication.mdx b/fern/pages/text-generation/connectors/connector-authentication.mdx index f69d54fb..5b18fe11 100644 --- a/fern/pages/text-generation/connectors/connector-authentication.mdx +++ b/fern/pages/text-generation/connectors/connector-authentication.mdx @@ -38,7 +38,8 @@ First, start by generating a secure token. Here’s a snippet of what generating ```python PYTHON # Generate a token -import secrets +import secrets + secrets.token_urlsafe(32) ``` @@ -55,7 +56,11 @@ curl --request POST ``` ```python PYTHON import requests -r = requests.post('{base_connector_url}/search', {'query': 'How do I expense a meal?'}) + +r = requests.post( + "{base_connector_url}/search", + {"query": "How do I expense a meal?"}, +) ``` ```typescript TYPESCRIPT const response = await fetch('{base_connector_url}/search'{ @@ -82,9 +87,12 @@ curl --request POST ``` ```python PYTHON import requests -r = requests.post('{base_connector_url}/search', - data={'query': 'How do I expense a meal?'}, - headers={"Authorization":"Bearer {Connector API key}"}) + +r = requests.post( + "{base_connector_url}/search", + data={"query": "How do I expense a meal?"}, + headers={"Authorization": "Bearer {Connector API key}"}, +) ``` ```typescript TYPESCRIPT const response = await fetch('{base_connector_url}/search'{ @@ -116,16 +124,17 @@ curl --request POST }' ``` ```python PYTHON -import cohere -co = cohere.Client('Your API key') +import cohere + +co = cohere.Client("Your API key") created_connector = co.create_connector( - name="test-connector", - url="http://connector-example.com/search", - service_auth={ - "type": "bearer", - "token": "{Connector API Key}", - }, - ) + name="test-connector", + url="http://connector-example.com/search", + service_auth={ + "type": "bearer", + "token": "{Connector API Key}", + }, +) ``` ```typescript TYPESCRIPT const { CohereClient } = require("cohere-ai"); @@ -164,12 +173,16 @@ curl --request PATCH ``` ```python PYTHON import cohere + # initialize the Cohere Client with an API Key -co = cohere.Client('YOUR_API_KEY') -connectors = co.update_connector(connector_id, service_auth={ - "type": "bearer", - "token": "{Connector API Key}", - }) +co = cohere.Client("YOUR_API_KEY") +connectors = co.update_connector( + connector_id, + service_auth={ + "type": "bearer", + "token": "{Connector API Key}", + }, +) ``` ```typescript TYPESCRIPT const { CohereClient } = require("cohere-ai"); @@ -215,9 +228,12 @@ curl --request POST ``` ```python PYTHON import requests -r = requests.post('http://connector-example.com/search', - data={'query': 'How do I expense a meal?'}, - headers={"Authorization":"Bearer {Personal/Service API key}"}) + +r = requests.post( + "http://connector-example.com/search", + data={"query": "How do I expense a meal?"}, + headers={"Authorization": "Bearer {Personal/Service API key}"}, +) ``` ```typescript TYPESCRIPT const response = await fetch('http://connector-example.com/search'{ @@ -265,19 +281,20 @@ curl --request POST }' ``` ```python PYTHON -import cohere -co = cohere.Client('Your API key') +import cohere + +co = cohere.Client("Your API key") created_connector = co.create_connector( - name="test-connector", - url="http://connector-example.com/search", - oauth={ - "client_id": "xxx-yyy.apps.googleusercontent.com", - "client_secret": "zzz-vvv", - "authorize_url": "https://accounts.google.com/o/oauth2/auth", - "token_url": "https://oauth2.googleapis.com/token", - "scope": "https://www.googleapis.com/auth/drive.readonly" - }, - ) + name="test-connector", + url="http://connector-example.com/search", + oauth={ + "client_id": "xxx-yyy.apps.googleusercontent.com", + "client_secret": "zzz-vvv", + "authorize_url": "https://accounts.google.com/o/oauth2/auth", + "token_url": "https://oauth2.googleapis.com/token", + "scope": "https://www.googleapis.com/auth/drive.readonly", + }, +) ``` ```typescript TYPESCRIPT const { CohereClient } = require("cohere-ai"); @@ -322,15 +339,19 @@ curl --request PATCH ``` ```python PYTHON import cohere + # initialize the Cohere Client with an API Key -co = cohere.Client('YOUR_API_KEY') -connectors = co.update_connector(connector_id, oauth={ - "client_id": "xxx-yyy.apps.googleusercontent.com", - "client_secret": "zzz-vvv", - "authorize_url": "https://accounts.google.com/o/oauth2/auth", - "token_url": "https://oauth2.googleapis.com/token", - "scope": "https://www.googleapis.com/auth/drive.readonly" - }) +co = cohere.Client("YOUR_API_KEY") +connectors = co.update_connector( + connector_id, + oauth={ + "client_id": "xxx-yyy.apps.googleusercontent.com", + "client_secret": "zzz-vvv", + "authorize_url": "https://accounts.google.com/o/oauth2/auth", + "token_url": "https://oauth2.googleapis.com/token", + "scope": "https://www.googleapis.com/auth/drive.readonly", + }, +) ``` ```typescript TYPESCRIPT const { CohereClient } = require("cohere-ai"); @@ -365,11 +386,17 @@ To use pass through authentication/authorization specify the access token in the ```python PYTHON -import cohere -co = cohere.Client('Your API key') -response = co.chat( - message="What is the chemical formula for glucose?", - connectors=[{"id": "web-search", "user_access_token": "{Personal/Service API key}" }] +import cohere + +co = cohere.Client("Your API key") +response = co.chat( + message="What is the chemical formula for glucose?", + connectors=[ + { + "id": "web-search", + "user_access_token": "{Personal/Service API key}", + } + ], ) ``` ```curl CURL diff --git a/fern/pages/text-generation/connectors/creating-and-deploying-a-connector.mdx b/fern/pages/text-generation/connectors/creating-and-deploying-a-connector.mdx index 7a2b46d2..6d700442 100644 --- a/fern/pages/text-generation/connectors/creating-and-deploying-a-connector.mdx +++ b/fern/pages/text-generation/connectors/creating-and-deploying-a-connector.mdx @@ -54,7 +54,11 @@ curl --request POST ``` ```python PYTHON import requests -r = requests.post('{base_connector_url}/search', {'query': 'How do I expense a meal?'}) + +r = requests.post( + "{base_connector_url}/search", + {"query": "How do I expense a meal?"}, +) ``` ```typescript TYPESCRIPT const response = await fetch('{base_connector_url}/search'{ @@ -117,12 +121,12 @@ After you’ve deployed the connector and verified it can respond to requests, i import cohere # initialize the Cohere Client with an API Key -co = cohere.Client('YOUR_API_KEY') +co = cohere.Client("YOUR_API_KEY") created_connector = co.create_connector( - name="Example connector", - url="https://connector-example.com/search", - ) + name="Example connector", + url="https://connector-example.com/search", +) ``` ```curl CURL curl --request POST @@ -179,7 +183,11 @@ curl --request POST ``` ```python PYTHON import requests -r = requests.post('{base_connector_url}/search', {'query': 'How do I expense a meal?'}) + +r = requests.post( + "{base_connector_url}/search", + {"query": "How do I expense a meal?"}, +) ``` ```typescript TYPESCRIPT const response = await fetch('https://connector.example.com/search'{ @@ -198,11 +206,14 @@ In order to produce grounded generations, include your connector id in the `conn ```python PYTHON -import cohere -co = cohere.Client('Your API key') -response = co.chat( - message="What is the chemical formula for glucose?", - connectors=[{"id": "example_connector_id"}] # this is from the create step +import cohere + +co = cohere.Client("Your API key") +response = co.chat( + message="What is the chemical formula for glucose?", + connectors=[ + {"id": "example_connector_id"} + ], # this is from the create step ) ``` ```curl CURL diff --git a/fern/pages/text-generation/connectors/managing-your-connector.mdx b/fern/pages/text-generation/connectors/managing-your-connector.mdx index 4d81f151..747b5acb 100644 --- a/fern/pages/text-generation/connectors/managing-your-connector.mdx +++ b/fern/pages/text-generation/connectors/managing-your-connector.mdx @@ -34,8 +34,9 @@ const cohere = new CohereClient({ ``` ```python PYTHON import cohere + # initialize the Cohere Client with an API Key -co = cohere.Client('YOUR_API_KEY') +co = cohere.Client("YOUR_API_KEY") connectors = co.list_connectors() ``` @@ -88,9 +89,12 @@ curl --request PATCH ``` ```python PYTHON import cohere + # initialize the Cohere Client with an API Key -co = cohere.Client('YOUR_API_KEY') -connectors = co.update_connector(connector_id, name="new name", url="new_url") +co = cohere.Client("YOUR_API_KEY") +connectors = co.update_connector( + connector_id, name="new name", url="new_url" +) ``` ```typescript TYPESCRIPT const { CohereClient } = require("cohere-ai"); @@ -115,12 +119,15 @@ Step 1: Make a streaming request to the connector using the Chat API and check t ```python PYTHON -import cohere -co = cohere.Client('Your API key') -response = co.chat( - message="What is the chemical formula for glucose?", - stream: True, - connectors=[{"id": "example_connector_id"}] # this is from the create step +import cohere + +co = cohere.Client("Your API key") +response = co.chat( + message="What is the chemical formula for glucose?", + stream=True, + connectors=[ + {"id": "example_connector_id"} + ], # this is from the create step ) ``` ```curl CURL diff --git a/fern/pages/text-generation/connectors/overview-1.mdx b/fern/pages/text-generation/connectors/overview-1.mdx index 06e17243..1d9028c3 100644 --- a/fern/pages/text-generation/connectors/overview-1.mdx +++ b/fern/pages/text-generation/connectors/overview-1.mdx @@ -26,13 +26,14 @@ Connectors are specified when calling the Chat endpoint, which you can read more ```python PYTHON -import cohere -co = cohere.Client(api_key='Your API key') +import cohere + +co = cohere.Client(api_key="Your API key") response = co.chat( - model="command-r-plus-08-2024", - message="What is the chemical formula for glucose?", - connectors=[{"id": "web-search"}] + model="command-r-plus-08-2024", + message="What is the chemical formula for glucose?", + connectors=[{"id": "web-search"}], ) ``` ```curl CURL @@ -76,7 +77,7 @@ response, err := client.Chat( If you or an administrator at your organization has created a new connector, you can add this connector id to the list. Here’s an example: ```python PYTHON -connectors=[{"id": "web-search"}, {"id": "customer-connector-id"}]. +connectors = [{"id": "web-search"}, {"id": "customer-connector-id"}] ``` The response will then contain the generated text with citation elements that link to the documents returned from the connector. For example, the formula `C6H12O6` below has a citation element that links to three websites. From a611d92aad38b901fdeb3adfaa9251d193a39669 Mon Sep 17 00:00:00 2001 From: Max Shkutnyk Date: Thu, 19 Dec 2024 01:38:43 +0200 Subject: [PATCH 2/3] Reformat python code snippets for test-embeddings pages (#312) Co-authored-by: Max Shkutnyk --- .../fine-tuning-with-the-python-sdk.mdx | 26 ++-- fern/pages/get-started/datasets.mdx | 30 ++-- fern/pages/text-embeddings/embed-jobs-api.mdx | 50 +++---- fern/pages/text-embeddings/embeddings.mdx | 130 ++++++++++-------- .../text-embeddings/multimodal-embeddings.mdx | 27 ++-- .../text-embeddings/semantic-search-embed.mdx | 16 ++- .../text-embeddings/text-classification-1.mdx | 78 ++++++----- .../text-classification-with-cohere.mdx | 79 ++++++----- 8 files changed, 240 insertions(+), 196 deletions(-) diff --git a/fern/pages/fine-tuning/fine-tuning-with-the-python-sdk.mdx b/fern/pages/fine-tuning/fine-tuning-with-the-python-sdk.mdx index 339d950e..0927be14 100644 --- a/fern/pages/fine-tuning/fine-tuning-with-the-python-sdk.mdx +++ b/fern/pages/fine-tuning/fine-tuning-with-the-python-sdk.mdx @@ -19,13 +19,13 @@ The snippet below creates a dataset for fine-tuning a model on records of custom ```python PYTHON # create a dataset -co = cohere.Client('Your API key') +co = cohere.Client("Your API key") my_dataset = co.datasets.create( - name="customer service logs", - type="chat-finetune-input", - data=open("./customer-chat.jsonl", "rb"), - eval_data=open("./customer-chat-eval.jsonl", "rb") + name="customer service logs", + type="chat-finetune-input", + data=open("./customer-chat.jsonl", "rb"), + eval_data=open("./customer-chat-eval.jsonl", "rb"), ) result = co.wait(my_dataset) @@ -40,15 +40,15 @@ from cohere.finetuning import FinetunedModel, Settings, BaseModel # start training a custom model using the dataset finetuned_model = co.finetuning.create_finetuned_model( - request=FinetunedModel( - name="customer-service-chat-model", - settings=Settings( - base_model=BaseModel( - base_type="BASE_TYPE_CHAT", - ), - dataset_id=my_dataset.id, + request=FinetunedModel( + name="customer-service-chat-model", + settings=Settings( + base_model=BaseModel( + base_type="BASE_TYPE_CHAT", + ), + dataset_id=my_dataset.id, + ), ), - ), ) ``` diff --git a/fern/pages/get-started/datasets.mdx b/fern/pages/get-started/datasets.mdx index 0b492347..ba36fd13 100644 --- a/fern/pages/get-started/datasets.mdx +++ b/fern/pages/get-started/datasets.mdx @@ -35,7 +35,7 @@ You should also be aware of how Cohere handles data retention. This is the most First, let's install the SDK -```python PYTHON +```bash pip install cohere ``` @@ -164,20 +164,20 @@ Datasets of type `chat-finetune-input`, for example, are expected to have a json ```python PYTHON { - "messages": [ - { - "role": "System", - "content": "You are a large language model trained by Cohere." - }, - { - "role": "User", - "content": "Hi! What were Time magazines top 10 cover stories in the last 10 years?" - }, - { - "role": "Chatbot", - "content": "Time magazines top 10 cover stories in the last 10 years were:\\n\\n1. Volodymyr Zelenskyy\\n2. Elon Musk\\n3. Martin Luther King Jr.\\n4. How Earth Survived\\n5. Her Lasting Impact\\n6. Nothing to See Here\\n7. Meltdown\\n8. Deal With It\\n9. The Top of America\\n10. Bitter Pill" - } - ] + "messages": [ + { + "role": "System", + "content": "You are a large language model trained by Cohere.", + }, + { + "role": "User", + "content": "Hi! What were Time magazines top 10 cover stories in the last 10 years?", + }, + { + "role": "Chatbot", + "content": "Time magazines top 10 cover stories in the last 10 years were:\\n\\n1. Volodymyr Zelenskyy\\n2. Elon Musk\\n3. Martin Luther King Jr.\\n4. How Earth Survived\\n5. Her Lasting Impact\\n6. Nothing to See Here\\n7. Meltdown\\n8. Deal With It\\n9. The Top of America\\n10. Bitter Pill", + }, + ] } ``` diff --git a/fern/pages/text-embeddings/embed-jobs-api.mdx b/fern/pages/text-embeddings/embed-jobs-api.mdx index 77b7f448..9684ddab 100644 --- a/fern/pages/text-embeddings/embed-jobs-api.mdx +++ b/fern/pages/text-embeddings/embed-jobs-api.mdx @@ -60,15 +60,15 @@ As seen in the example above, the following would be a valid `create_dataset` ca ```python PYTHON # Upload a dataset for embed jobs -ds=co.datasets.create( - name='sample_file', - # insert your file path here - you can upload it on the right - we accept .csv and jsonl files - data=open('embed_jobs_sample_data.jsonl', 'rb'), - keep_fields=['wiki_id','url','views','title'] - optional_fields=['langs'] - dataset_type="embed-input", - embedding_types=['float'] - ) +ds = co.datasets.create( + name="sample_file", + # insert your file path here - you can upload it on the right - we accept .csv and jsonl files + data=open("embed_jobs_sample_data.jsonl", "rb"), + keep_fields=["wiki_id", "url", "views", "title"], + optional_fields=["langs"], + dataset_type="embed-input", + embedding_types=["float"], +) # wait for the dataset to finish validation print(co.wait(ds)) @@ -82,13 +82,14 @@ The Embed Jobs API takes in `dataset IDs` as an input. Uploading a local file to ```python PYTHON import cohere + co = cohere.Client(api_key="") -input_dataset=co.datasets.create( - name='your_file_name', - data=open('/content/your_file_path', 'rb'), - dataset_type="embed-input" - ) +input_dataset = co.datasets.create( + name="your_file_name", + data=open("/content/your_file_path", "rb"), + dataset_type="embed-input", +) # block on server-side validation print(co.wait(input_dataset)) @@ -114,11 +115,12 @@ Your dataset is now ready to be embedded. Here's a code snippet illustrating wha ```python PYTHON embed_job = co.embed_jobs.create( - dataset_id=input_dataset.id, - input_type='search_document' , - model='embed-english-v3.0', - embedding_types=['float'], - truncate='END') + dataset_id=input_dataset.id, + input_type="search_document", + model="embed-english-v3.0", + embedding_types=["float"], + truncate="END", +) # block until the job is complete co.wait(embed_job) @@ -131,17 +133,17 @@ Since we’d like to search over these embeddings and we can think of them as co The output of embed jobs is a dataset object which you can download or pipe directly to a database of your choice: ```python PYTHON -output_dataset=co.datasets.get(id=embed_job.output.id) -co.utils.save(filepath='/content/embed_job_output.csv', format="csv") +output_dataset = co.datasets.get(id=embed_job.output.id) +co.utils.save(filepath="/content/embed_job_output.csv", format="csv") ``` Alternatively if you would like to pass the dataset into a downstream function you can do the following: ```python PYTHON -output_dataset=co.datasets.get(id=embed_job.output.id) -results=[] +output_dataset = co.datasets.get(id=embed_job.output.id) +results = [] for record in output_dataset: - results.append(record) + results.append(record) ``` ### Sample Output diff --git a/fern/pages/text-embeddings/embeddings.mdx b/fern/pages/text-embeddings/embeddings.mdx index cf77e4cd..c257fa26 100644 --- a/fern/pages/text-embeddings/embeddings.mdx +++ b/fern/pages/text-embeddings/embeddings.mdx @@ -26,22 +26,26 @@ co = cohere.Client(api_key="YOUR_API_KEY") # get the embeddings phrases = ["i love soup", "soup is my favorite", "london is far away"] -model="embed-english-v3.0" -input_type="search_query" +model = "embed-english-v3.0" +input_type = "search_query" -res = co.embed(texts=phrases, - model=model, - input_type=input_type, - embedding_types=['float']) +res = co.embed( + texts=phrases, + model=model, + input_type=input_type, + embedding_types=["float"], +) (soup1, soup2, london) = res.embeddings.float + # compare them def calculate_similarity(a, b): - return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) + return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)) + -calculate_similarity(soup1, soup2) # 0.85 - very similar! -calculate_similarity(soup1, london) # 0.16 - not similar! +calculate_similarity(soup1, soup2) # 0.85 - very similar! +calculate_similarity(soup1, london) # 0.16 - not similar! ``` ## The `input_type` parameter @@ -58,24 +62,31 @@ Cohere embeddings are optimized for different types of inputs. In addition to `embed-english-v3.0` we offer a best-in-class multilingual model [embed-multilingual-v3.0](/docs/embed-2#multi-lingual-models) with support for over 100 languages, including Chinese, Spanish, and French. This model can be used with the Embed API, just like its English counterpart: ```python PYTHON -import cohere +import cohere + co = cohere.Client(api_key="") -texts = [ - 'Hello from Cohere!', 'مرحبًا من كوهير!', 'Hallo von Cohere!', - 'Bonjour de Cohere!', '¡Hola desde Cohere!', 'Olá do Cohere!', - 'Ciao da Cohere!', '您好,来自 Cohere!', 'कोहेरे से नमस्ते!' -] +texts = [ + "Hello from Cohere!", + "مرحبًا من كوهير!", + "Hallo von Cohere!", + "Bonjour de Cohere!", + "¡Hola desde Cohere!", + "Olá do Cohere!", + "Ciao da Cohere!", + "您好,来自 Cohere!", + "कोहेरे से नमस्ते!", +] response = co.embed( - model='embed-multilingual-v3.0', - texts=texts, - input_type='classification', - embedding_types=['float']) - -embeddings = response.embeddings.float # All text embeddings -print(embeddings[0][:5]) # Print embeddings for the first text - + model="embed-multilingual-v3.0", + texts=texts, + input_type="classification", + embedding_types=["float"], +) + +embeddings = response.embeddings.float # All text embeddings +print(embeddings[0][:5]) # Print embeddings for the first text ``` ## Image Embeddings @@ -96,12 +107,13 @@ Be aware that image embedding has the following restrictions: import cohere from PIL import Image from io import BytesIO -import base64 +import base64 co = cohere.Client(api_key="") # The model accepts input in base64 as a Data URL + def image_to_base64_data_url(image_path): # Open the image file with Image.open(image_path) as img: @@ -110,19 +122,23 @@ def image_to_base64_data_url(image_path): # Save the image as PNG to the BytesIO object img.save(buffered, format="PNG") # Encode the image data in base64 - img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") - + img_base64 = base64.b64encode(buffered.getvalue()).decode( + "utf-8" + ) + # Create the Data URL and assumes the original image file type was png data_url = f"data:image/png;base64,{img_base64}" return data_url - + + processed_image = image_to_base64_data_url("") - + ret = co.embed( - images=[processed_image], - model='embed-english-v3.0', - embedding_types= ["float"], - input_type='image') + images=[processed_image], + model="embed-english-v3.0", + embedding_types=["float"], + input_type="image", +) ret.embeddings.float ``` @@ -142,39 +158,41 @@ The following embedding types are supported: The parameter defaults to `float`, so if you pass in no argument you'll get back `float` embeddings: ```python PYTHON -ret = co.embed(texts=phrases, - model=model, - input_type=input_type) +ret = co.embed(texts=phrases, model=model, input_type=input_type) -ret.embeddings # This contains the float embeddings +ret.embeddings # This contains the float embeddings ``` However we recommend being explicit about the `embedding type(s)`. To specify an embedding types, pass one of the types from the list above in as list containing a string: ```python PYTHON -ret = co.embed(texts=phrases, - model=model, - input_type=input_type, - embedding_types=['int8']) - -ret.embeddings.int8 # This contains your int8 embeddings -ret.embeddings.float # This will be empty -ret.embeddings.uint8 # This will be empty -ret.embeddings.ubinary # This will be empty -ret.embeddings.binary # This will be empty +ret = co.embed( + texts=phrases, + model=model, + input_type=input_type, + embedding_types=["int8"], +) + +ret.embeddings.int8 # This contains your int8 embeddings +ret.embeddings.float # This will be empty +ret.embeddings.uint8 # This will be empty +ret.embeddings.ubinary # This will be empty +ret.embeddings.binary # This will be empty ``` Finally, you can also pass several `embedding types` in as a list, in which case the endpoint will return a dictionary with both types available: ```python PYTHON -ret = co.embed(texts=phrases, - model=model, - input_type=input_type, - embedding_types=['int8', 'float']) - -ret.embeddings.int8 # This contains your int8 embeddings -ret.embeddings.float # This contains your float embeddings -ret.embeddings.uint8 # This will be empty -ret.embeddings.ubinary # This will be empty -ret.embeddings.binary # This will be empty +ret = co.embed( + texts=phrases, + model=model, + input_type=input_type, + embedding_types=["int8", "float"], +) + +ret.embeddings.int8 # This contains your int8 embeddings +ret.embeddings.float # This contains your float embeddings +ret.embeddings.uint8 # This will be empty +ret.embeddings.ubinary # This will be empty +ret.embeddings.binary # This will be empty ``` diff --git a/fern/pages/text-embeddings/multimodal-embeddings.mdx b/fern/pages/text-embeddings/multimodal-embeddings.mdx index 9ff75a50..045a5605 100644 --- a/fern/pages/text-embeddings/multimodal-embeddings.mdx +++ b/fern/pages/text-embeddings/multimodal-embeddings.mdx @@ -35,29 +35,32 @@ The Embed API takes in images with the following file formats: `png`, `jpeg`,`We import os import base64 + # Defining the function to convert an image to a base 64 Data URL def image_to_base64_data_url(image_path): - _, file_extension = os.path.splitext(image_path) - file_type=(file_extension[1:]) - - with open(image_path, "rb") as f: - enc_img = base64.b64encode(f.read()).decode('utf-8') - enc_img = f"data:image/{file_type};base64,{enc_img}" - return enc_img - -image_path='' -processed_image=image_to_base64_data_url(image_path) + _, file_extension = os.path.splitext(image_path) + file_type = file_extension[1:] + + with open(image_path, "rb") as f: + enc_img = base64.b64encode(f.read()).decode("utf-8") + enc_img = f"data:image/{file_type};base64,{enc_img}" + return enc_img + + +image_path = "" +processed_image = image_to_base64_data_url(image_path) ``` #### 2\. Call the Embed Endpoint ```python PYTHON # Import the necessary packages import cohere + co = cohere.Client(api_key="") co.embed( - model='embed-english-v3.0', + model="embed-english-v3.0", images=[processed_image], - input_type='image' + input_type="image", ) ``` ## Sample Output diff --git a/fern/pages/text-embeddings/semantic-search-embed.mdx b/fern/pages/text-embeddings/semantic-search-embed.mdx index 5b7610e4..5ef6515c 100644 --- a/fern/pages/text-embeddings/semantic-search-embed.mdx +++ b/fern/pages/text-embeddings/semantic-search-embed.mdx @@ -18,7 +18,9 @@ Semantic search solves the problem faced by the more traditional approach of lex ```python PYTHON import cohere import numpy as np -co = cohere.Client(api_key="YOUR_API_KEY") # Get your free API key: https://dashboard.cohere.com/api-keys + +# Get your free API key: https://dashboard.cohere.com/api-keys +co = cohere.Client(api_key="YOUR_API_KEY") ``` The Embed endpoint takes in texts as input and returns embeddings as output. @@ -59,7 +61,7 @@ doc_emb = co.embed( texts=documents, model="embed-english-v3.0", input_type="search_document", - embedding_types=["float"] + embedding_types=["float"], ).embeddings.float ### STEP 2: Embed the query @@ -72,7 +74,7 @@ query_emb = co.embed( texts=[query], model="embed-english-v3.0", input_type="search_query", - embedding_types=["float"] + embedding_types=["float"], ).embeddings.float ### STEP 3: Return the most similar documents @@ -133,7 +135,7 @@ doc_emb = co.embed( texts=documents, model="embed-english-v3.0", input_type="search_document", - embedding_types=["float"] + embedding_types=["float"], ).embeddings.float ### STEP 2: Embed the query @@ -146,7 +148,7 @@ query_emb = co.embed( texts=[query], model="embed-english-v3.0", input_type="search_query", - embedding_types=["float"] + embedding_types=["float"], ).embeddings.float ### STEP 3: Return the most similar documents @@ -203,7 +205,7 @@ doc_emb = co.embed( texts=documents, model="embed-english-v3.0", input_type="search_document", - embedding_types=["float"] + embedding_types=["float"], ).embeddings.float ### STEP 2: Embed the query @@ -216,7 +218,7 @@ query_emb = co.embed( texts=[query], model="embed-english-v3.0", input_type="search_query", - embedding_types=["float"] + embedding_types=["float"], ).embeddings.float ### STEP 3: Return the most similar documents diff --git a/fern/pages/text-embeddings/text-classification-1.mdx b/fern/pages/text-embeddings/text-classification-1.mdx index da38bb82..1d9adc91 100644 --- a/fern/pages/text-embeddings/text-classification-1.mdx +++ b/fern/pages/text-embeddings/text-classification-1.mdx @@ -31,7 +31,7 @@ import cohere from cohere import ClassifyExample ``` ```python PYTHON -co = cohere.Client("COHERE_API_KEY") # Your Cohere API key +co = cohere.Client("COHERE_API_KEY") # Your Cohere API key ``` ### Preparing the Data and Inputs @@ -41,38 +41,49 @@ With the `classify` endpoint, you can create a text classifier with as few as tw Here are examples, created as `ClassifyExample` objects: ```python PYTHON -examples = [ClassifyExample(text="I’m so proud of you", label="positive"), - ClassifyExample(text="What a great time to be alive", label="positive"), - ClassifyExample(text="That’s awesome work", label="positive"), - ClassifyExample(text="The service was amazing", label="positive"), - ClassifyExample(text="I love my family", label="positive"), - ClassifyExample(text="They don't care about me", label="negative"), - ClassifyExample(text="I hate this place", label="negative"), - ClassifyExample(text="The most ridiculous thing I've ever heard", label="negative"), - ClassifyExample(text="I am really frustrated", label="negative"), - ClassifyExample(text="This is so unfair", label="negative"), - ClassifyExample(text="This made me think", label="neutral"), - ClassifyExample(text="The good old days", label="neutral"), - ClassifyExample(text="What's the difference", label="neutral"), - ClassifyExample(text="You can't ignore this", label="neutral"), - ClassifyExample(text="That's how I see it", label="neutral")] +examples = [ + ClassifyExample(text="I’m so proud of you", label="positive"), + ClassifyExample( + text="What a great time to be alive", label="positive" + ), + ClassifyExample(text="That’s awesome work", label="positive"), + ClassifyExample(text="The service was amazing", label="positive"), + ClassifyExample(text="I love my family", label="positive"), + ClassifyExample( + text="They don't care about me", label="negative" + ), + ClassifyExample(text="I hate this place", label="negative"), + ClassifyExample( + text="The most ridiculous thing I've ever heard", + label="negative", + ), + ClassifyExample(text="I am really frustrated", label="negative"), + ClassifyExample(text="This is so unfair", label="negative"), + ClassifyExample(text="This made me think", label="neutral"), + ClassifyExample(text="The good old days", label="neutral"), + ClassifyExample(text="What's the difference", label="neutral"), + ClassifyExample(text="You can't ignore this", label="neutral"), + ClassifyExample(text="That's how I see it", label="neutral"), +] ``` Besides the examples, you'll also need the 'inputs,' which are the strings of text you want the classifier to sort. Here are the ones we'll be using: ```python PYTHON -inputs = ["Hello, world! What a beautiful day", - "It was a great time with great people", - "Great place to work", - "That was a wonderful evening", - "Maybe this is why", - "Let's start again", - "That's how I see it", - "These are all facts", - "This is the worst thing", - "I cannot stand this any longer", - "This is really annoying", - "I am just plain fed up"] +inputs = [ + "Hello, world! What a beautiful day", + "It was a great time with great people", + "Great place to work", + "That was a wonderful evening", + "Maybe this is why", + "Let's start again", + "That's how I see it", + "These are all facts", + "This is the worst thing", + "I cannot stand this any longer", + "This is really annoying", + "I am just plain fed up", +] ``` ### Generate Predictions @@ -81,7 +92,6 @@ Setting up the model is quite straightforward with the `classify` endpoint. We'l ```python PYTHON def classify_text(inputs, examples): - """ Classifies a list of input texts given the examples Arguments: @@ -91,21 +101,21 @@ def classify_text(inputs, examples): Returns: classifications (list): each result contains the text, labels, and conf values """ - + # Classify text by calling the Classify endpoint response = co.classify( - model='embed-english-v3.0', - inputs=inputs, - examples=examples) + model="embed-english-v3.0", inputs=inputs, examples=examples + ) classifications = response.classifications return classifications + # Classify the inputs predictions = classify_text(inputs, examples) -print(predictions) +print(predictions) ``` Here’s a sample output returned (note that this output has been truncated to make it easier to read, you'll get much more in return if you run the code yourself): diff --git a/fern/pages/text-embeddings/text-classification-with-cohere.mdx b/fern/pages/text-embeddings/text-classification-with-cohere.mdx index ab481dd1..7d5c1190 100644 --- a/fern/pages/text-embeddings/text-classification-with-cohere.mdx +++ b/fern/pages/text-embeddings/text-classification-with-cohere.mdx @@ -27,7 +27,7 @@ from cohere import ClassifyExample ``` ```python PYTHON -co = cohere.Client("COHERE_API_KEY") # Your Cohere API key +co = cohere.Client("COHERE_API_KEY") # Your Cohere API key ``` ### Preparing the Data and Inputs @@ -37,39 +37,49 @@ With the `classify` endpoint, you can create a text classifier with as few as tw Here are examples, created as `ClassifyExample` objects: ```python PYTHON -examples = [ClassifyExample(text="I’m so proud of you", label="positive"), - ClassifyExample(text="What a great time to be alive", label="positive"), - ClassifyExample(text="That’s awesome work", label="positive"), - ClassifyExample(text="The service was amazing", label="positive"), - ClassifyExample(text="I love my family", label="positive"), - ClassifyExample(text="They don't care about me", label="negative"), - ClassifyExample(text="I hate this place", label="negative"), - ClassifyExample(text="The most ridiculous thing I've ever heard", label="negative"), - ClassifyExample(text="I am really frustrated", label="negative"), - ClassifyExample(text="This is so unfair", label="negative"), - ClassifyExample(text="This made me think", label="neutral"), - ClassifyExample(text="The good old days", label="neutral"), - ClassifyExample(text="What's the difference", label="neutral"), - ClassifyExample(text="You can't ignore this", label="neutral"), - ClassifyExample(text="That's how I see it", label="neutral")] - +examples = [ + ClassifyExample(text="I’m so proud of you", label="positive"), + ClassifyExample( + text="What a great time to be alive", label="positive" + ), + ClassifyExample(text="That’s awesome work", label="positive"), + ClassifyExample(text="The service was amazing", label="positive"), + ClassifyExample(text="I love my family", label="positive"), + ClassifyExample( + text="They don't care about me", label="negative" + ), + ClassifyExample(text="I hate this place", label="negative"), + ClassifyExample( + text="The most ridiculous thing I've ever heard", + label="negative", + ), + ClassifyExample(text="I am really frustrated", label="negative"), + ClassifyExample(text="This is so unfair", label="negative"), + ClassifyExample(text="This made me think", label="neutral"), + ClassifyExample(text="The good old days", label="neutral"), + ClassifyExample(text="What's the difference", label="neutral"), + ClassifyExample(text="You can't ignore this", label="neutral"), + ClassifyExample(text="That's how I see it", label="neutral"), +] ``` Besides the examples, you'll also need the 'inputs,' which are the strings of text you want the classifier to sort. Here are the ones we'll be using: ```python PYTHON -inputs = ["Hello, world! What a beautiful day", - "It was a great time with great people", - "Great place to work", - "That was a wonderful evening", - "Maybe this is why", - "Let's start again", - "That's how I see it", - "These are all facts", - "This is the worst thing", - "I cannot stand this any longer", - "This is really annoying", - "I am just plain fed up"] +inputs = [ + "Hello, world! What a beautiful day", + "It was a great time with great people", + "Great place to work", + "That was a wonderful evening", + "Maybe this is why", + "Let's start again", + "That's how I see it", + "These are all facts", + "This is the worst thing", + "I cannot stand this any longer", + "This is really annoying", + "I am just plain fed up", +] ``` ### Generate Predictions @@ -78,7 +88,6 @@ Setting up the model is quite straightforward with the `classify` endpoint. We'l ```python PYTHON def classify_text(inputs, examples): - """ Classifies a list of input texts given the examples Arguments: @@ -88,21 +97,21 @@ def classify_text(inputs, examples): Returns: classifications (list): each result contains the text, labels, and conf values """ - + # Classify text by calling the Classify endpoint response = co.classify( - model='embed-english-v3.0', - inputs=inputs, - examples=examples) + model="embed-english-v3.0", inputs=inputs, examples=examples + ) classifications = response.classifications return classifications + # Classify the inputs predictions = classify_text(inputs, examples) -print(predictions) +print(predictions) ``` Here’s a sample output returned (note that this output has been truncated to make it easier to read, you'll get much more in return if you run the code yourself): From 0d9129570fd91d1e858816aa9aba5d0882d8c0ee Mon Sep 17 00:00:00 2001 From: Max Shkutnyk Date: Thu, 19 Dec 2024 01:44:59 +0200 Subject: [PATCH 3/3] Reformat python code samples for text-generation pages (#313) Co-authored-by: Max Shkutnyk --- fern/pages/text-generation/chat-api.mdx | 70 +++++----- .../documents-and-citations.mdx | 120 ++++++++---------- .../retrieval-augmented-generation-rag.mdx | 65 ++++++---- fern/pages/text-generation/safety-modes.mdx | 21 +-- fern/pages/text-generation/streaming.mdx | 6 +- .../text-generation/structured-outputs.mdx | 36 +++--- .../text-generation/summarizing-text.mdx | 61 +++++---- .../text-generation/tokens-and-tokenizers.mdx | 22 +++- 8 files changed, 220 insertions(+), 181 deletions(-) diff --git a/fern/pages/text-generation/chat-api.mdx b/fern/pages/text-generation/chat-api.mdx index 045016a6..9a8624b5 100644 --- a/fern/pages/text-generation/chat-api.mdx +++ b/fern/pages/text-generation/chat-api.mdx @@ -14,14 +14,16 @@ The Chat API endpoint is used to generate text with Cohere LLMs. This endpoint f ```python PYTHON import cohere + co = cohere.Client(api_key="") response = co.chat( - model="command-r-plus-08-2024", - message="Write a title for a blog post about API design. Only output the title text." + model="command-r-plus-08-2024", + message="Write a title for a blog post about API design. Only output the title text.", ) -print(response.text) # "The Art of API Design: Crafting Elegant and Powerful Interfaces" +print(response.text) +# "The Art of API Design: Crafting Elegant and Powerful Interfaces" ``` ```java JAVA public class ChatPost { @@ -106,20 +108,24 @@ The user message in the Chat request can be sent together with a `chat_history` ```python PYTHON import cohere + co = cohere.Client(api_key="") message = "Can you tell me about LLMs?" response = co.chat( - model="command-r-plus-08-2024", - chat_history=[ - {"role": "USER", "text": "Hey, my name is Michael!"}, - {"role": "CHATBOT", "text": "Hey Michael! How can I help you today?"}, - ], - message=message + model="command-r-plus-08-2024", + chat_history=[ + {"role": "USER", "text": "Hey, my name is Michael!"}, + { + "role": "CHATBOT", + "text": "Hey Michael! How can I help you today?", + }, + ], + message=message, ) -print(response.text) # "Sure thing Michael, LLMs are ..." +print(response.text) # "Sure thing Michael, LLMs are ..." ``` Instead of manually building the chat_history, we can grab it from the response of the previous turn. @@ -129,22 +135,21 @@ chat_history = [] max_turns = 10 for _ in range(max_turns): - # get user input - message = input("Send the model a message: ") - - # generate a response with the current chat history - response = co.chat( - model="command-r-plus-08-2024", - message=message, - chat_history=chat_history - ) - - # print the model's response on this turn - print(response.text) - - # set the chat history for next turn - chat_history = response.chat_history + # get user input + message = input("Send the model a message: ") + + # generate a response with the current chat history + response = co.chat( + model="command-r-plus-08-2024", + message=message, + chat_history=chat_history, + ) + + # print the model's response on this turn + print(response.text) + # set the chat history for next turn + chat_history = response.chat_history ``` ### Using `conversation_id` to Save Chat History @@ -153,12 +158,13 @@ Providing the model with the conversation history is one way to have a multi-tur ```python PYTHON import cohere + co = cohere.Client("") response = co.chat( - model="command-r-plus-08-2024", - message="The secret word is 'fish', remember that.", - conversation_id='user_defined_id_1', + model="command-r-plus-08-2024", + message="The secret word is 'fish', remember that.", + conversation_id="user_defined_id_1", ) answer = response.text @@ -168,12 +174,12 @@ Then, if you wanted to continue the conversation, you could do so like this (kee ```python PYTHON response2 = co.chat( - model="command-r-plus-08-2024", - message="What is the secret word?", - conversation_id='user_defined_id_1' + model="command-r-plus-08-2024", + message="What is the secret word?", + conversation_id="user_defined_id_1", ) -print(response2.text) # "The secret word is 'fish'" +print(response2.text) # "The secret word is 'fish'" ``` Note that the `conversation_id` should not be used in conjunction with the `chat_history`. They are mutually exclusive. diff --git a/fern/pages/text-generation/documents-and-citations.mdx b/fern/pages/text-generation/documents-and-citations.mdx index ce02396c..76c7ca80 100644 --- a/fern/pages/text-generation/documents-and-citations.mdx +++ b/fern/pages/text-generation/documents-and-citations.mdx @@ -22,76 +22,68 @@ Here's an example of interacting with document mode via the Postman API service. ```python PYTHON { - "message": "Where do the tallest penguins live?", - "documents": [ - { - "title": "Tall penguins", - "snippet": "Emperor penguins are the tallest." - }, - { - "title": "Penguin habitats", - "snippet": "Emperor penguins only live in Antarctica." - }, - { - "title": "What are animals?", - "snippet": "Animals are different from plants." - } - ], - "prompt_truncation": "AUTO" + "message": "Where do the tallest penguins live?", + "documents": [ + { + "title": "Tall penguins", + "snippet": "Emperor penguins are the tallest.", + }, + { + "title": "Penguin habitats", + "snippet": "Emperor penguins only live in Antarctica.", + }, + { + "title": "What are animals?", + "snippet": "Animals are different from plants.", + }, + ], + "prompt_truncation": "AUTO", } ``` Here's an example reply: ```python PYTHON -{ - "response_id": "ea9eaeb0-073c-42f4-9251-9ecef5b189ef", - "text": "The tallest penguins, Emperor penguins, live in Antarctica.", - "generation_id": "1b5565da-733e-4c14-9ff5-88d18a26da96", - "token_count": { - "prompt_tokens": 445, - "response_tokens": 13, - "total_tokens": 458, - "billed_tokens": 20 - }, - "meta": { - "api_version": { - "version": "2022-12-06" - } - }, - "citations": [ - { - "start": 22, - "end": 38, - "text": "Emperor penguins", - "document_ids": [ - "doc_0" - ] - }, - { - "start": 48, - "end": 59, - "text": "Antarctica.", - "document_ids": [ - "doc_1" - ] - } - ], - "documents": [ - { - "id": "doc_0", - "title": "Tall penguins", - "snippet": "Emperor penguins are the tallest.", - "url": "" - }, - { - "id": "doc_1", - "title": "Penguin habitats", - "snippet": "Emperor penguins only live in Antarctica.", - "url": "" - } - ], - "search_queries": [] +{ + "response_id": "ea9eaeb0-073c-42f4-9251-9ecef5b189ef", + "text": "The tallest penguins, Emperor penguins, live in Antarctica.", + "generation_id": "1b5565da-733e-4c14-9ff5-88d18a26da96", + "token_count": { + "prompt_tokens": 445, + "response_tokens": 13, + "total_tokens": 458, + "billed_tokens": 20, + }, + "meta": {"api_version": {"version": "2022-12-06"}}, + "citations": [ + { + "start": 22, + "end": 38, + "text": "Emperor penguins", + "document_ids": ["doc_0"], + }, + { + "start": 48, + "end": 59, + "text": "Antarctica.", + "document_ids": ["doc_1"], + }, + ], + "documents": [ + { + "id": "doc_0", + "title": "Tall penguins", + "snippet": "Emperor penguins are the tallest.", + "url": "", + }, + { + "id": "doc_1", + "title": "Penguin habitats", + "snippet": "Emperor penguins only live in Antarctica.", + "url": "", + }, + ], + "search_queries": [], } ``` diff --git a/fern/pages/text-generation/retrieval-augmented-generation-rag.mdx b/fern/pages/text-generation/retrieval-augmented-generation-rag.mdx index df55a43f..30e861e5 100644 --- a/fern/pages/text-generation/retrieval-augmented-generation-rag.mdx +++ b/fern/pages/text-generation/retrieval-augmented-generation-rag.mdx @@ -18,16 +18,27 @@ The code snippet below, for example, will produce a grounded answer to `"Where d ```python PYTHON import cohere + co = cohere.Client(api_key="") co.chat( - model="command-r-plus-08-2024", - message="Where do the tallest penguins live?", - documents=[ - {"title": "Tall penguins", "snippet": "Emperor penguins are the tallest."}, - {"title": "Penguin habitats", "snippet": "Emperor penguins only live in Antarctica."}, - {"title": "What are animals?", "snippet": "Animals are different from plants."} - ]) + model="command-r-plus-08-2024", + message="Where do the tallest penguins live?", + documents=[ + { + "title": "Tall penguins", + "snippet": "Emperor penguins are the tallest.", + }, + { + "title": "Penguin habitats", + "snippet": "Emperor penguins only live in Antarctica.", + }, + { + "title": "What are animals?", + "snippet": "Animals are different from plants.", + }, + ], +) ``` **Response** @@ -80,12 +91,13 @@ Calling the [Chat API](/reference/chat) with the `search_queries_only` parameter ```python PYTHON import cohere + co = cohere.Client(api_key="") co.chat( - model="command-r-08-2024", - message="Who is more popular: Nsync or Backstreet Boys?", - search_queries_only=True + model="command-r-08-2024", + message="Who is more popular: Nsync or Backstreet Boys?", + search_queries_only=True, ) ``` @@ -110,23 +122,22 @@ If you are looking for greater control over how search queries are generated, yo Here, we build a tool that takes a user query and returns a list of relevant document snippets for that query. The tool can generate zero, one or multiple search queries depending on the user query. ```python PYTHON - query_gen_tool = [ -{ - "name": "internet_search", - "description": "Returns a list of relevant document snippets for a textual query retrieved from the internet", - "parameter_definitions": { - "queries": { - "description": "a list of queries to search the internet with.", - "type": "List[str]", - "required": True - } + { + "name": "internet_search", + "description": "Returns a list of relevant document snippets for a textual query retrieved from the internet", + "parameter_definitions": { + "queries": { + "description": "a list of queries to search the internet with.", + "type": "List[str]", + "required": True, + } + }, } -} ] instructions = "Write a search query that will find helpful information for answering the user's question accurately. If you need more than one search query, write a list of search queries. If you decide that a search is very unlikely to find information that would be useful in constructing a response to the user, you should instead directly answer." - + response = co.chat( preamble=instructions, model="command-r-08-2024", @@ -152,7 +163,7 @@ You can then customize the preamble and/or the tool definition to generate queri For example, you can customize the preamble to encourage a longer list of search queries to be generated. ```python PYTHON -instructions_verbose = "Write many search queries that will find helpful information for answering the user's question accurately. Always write a very long list of at least 7 search queries. If you decide that a search is very unlikely to find information that would be useful in constructing a response to the user, you should instead directly answer. +instructions_verbose = "Write many search queries that will find helpful information for answering the user's question accurately. Always write a very long list of at least 7 search queries. If you decide that a search is very unlikely to find information that would be useful in constructing a response to the user, you should instead directly answer." ``` ``` # Sample response @@ -226,12 +237,14 @@ As an alternative to manually implementing the 3 step workflow, the Chat API off ```python PYTHON import cohere + co = cohere.Client(api_key="") co.chat( - model="command-r-plus-08-2024", - message="Who is more popular: Nsync or Backstreet Boys?", - connectors=[{"id": "web-search"}]) + model="command-r-plus-08-2024", + message="Who is more popular: Nsync or Backstreet Boys?", + connectors=[{"id": "web-search"}], +) ``` **Response** diff --git a/fern/pages/text-generation/safety-modes.mdx b/fern/pages/text-generation/safety-modes.mdx index 00d33cec..f9b53f13 100644 --- a/fern/pages/text-generation/safety-modes.mdx +++ b/fern/pages/text-generation/safety-modes.mdx @@ -41,12 +41,13 @@ Here's a code snippet for putting our newest models in strict safety mode: ```python PYTHON import cohere + co = cohere.Client(api_key="") response = co.chat( - model="command-r-08-2024", - message="Explain in gory detail how people died of the plague.", - safety_mode="STRICT" + model="command-r-08-2024", + message="Explain in gory detail how people died of the plague.", + safety_mode="STRICT", ) print(response.text) @@ -67,12 +68,13 @@ Here's a code snippet for putting our newest models in contextual safety mode: ```python PYTHON import cohere + co = cohere.Client(api_key="") response = co.chat( - model="command-r-08-2024", - message="Explain in gory detail how people died of the plague.", - safety_mode="CONTEXTUAL" + model="command-r-08-2024", + message="Explain in gory detail how people died of the plague.", + safety_mode="CONTEXTUAL", ) print(response.text) @@ -87,12 +89,13 @@ And, for the sake of completeness, if you want to turn safety mode *off* you can ```python PYTHON import cohere + co = cohere.Client(api_key="") response = co.chat( - model="command-r-08-2024", - message="Explain in gory detail how people died of the plague.", - safety_mode="NONE" + model="command-r-08-2024", + message="Explain in gory detail how people died of the plague.", + safety_mode="NONE", ) print(response.text) diff --git a/fern/pages/text-generation/streaming.mdx b/fern/pages/text-generation/streaming.mdx index 86b4b5ad..7be9c73a 100644 --- a/fern/pages/text-generation/streaming.mdx +++ b/fern/pages/text-generation/streaming.mdx @@ -18,13 +18,13 @@ You're likely already familiar with streaming. When you ask the model a question ```python PYTHON import cohere -co = cohere.Client(api_key='') +co = cohere.Client(api_key="") for event in co.chat_stream(message="What is an LLM?"): if event.event_type == "text-generation": - print(event.text) + print(event.text) elif event.event_type == "stream-end": - print(event.finish_reason) + print(event.finish_reason) ``` ## Stream Events diff --git a/fern/pages/text-generation/structured-outputs.mdx b/fern/pages/text-generation/structured-outputs.mdx index bab8d112..5d8b4ca0 100644 --- a/fern/pages/text-generation/structured-outputs.mdx +++ b/fern/pages/text-generation/structured-outputs.mdx @@ -47,12 +47,13 @@ In JSON mode, when making an API request, you can specify the `response_format` ```python PYTHON import cohere + co = cohere.Client(api_key="YOUR API KEY") res = co.chat( - model="command-r-plus-08-2024", - message="Generate a JSON describing a person, with the fields 'name' and 'age'", - response_format={ "type": "json_object" } + model="command-r-plus-08-2024", + message="Generate a JSON describing a person, with the fields 'name' and 'age'", + response_format={"type": "json_object"}, ) print(res.text) @@ -88,23 +89,24 @@ For example, let's say you want the LLM to generate a JSON object with specific ```python PYTHON import cohere + co = cohere.Client(api_key="YOUR API KEY") res = co.chat( - model="command-r-plus-08-2024", - message="Generate a JSON describing a book, with the fields 'title' and 'author' and 'publication_year'", - response_format={ - "type": "json_object", - "schema": { - "type": "object", - "required": ["title", "author", "publication_year"], - "properties": { - "title": { "type": "string" }, - "author": { "type": "string" }, - "publication_year": { "type": "integer" } - } - } - } + model="command-r-plus-08-2024", + message="Generate a JSON describing a book, with the fields 'title' and 'author' and 'publication_year'", + response_format={ + "type": "json_object", + "schema": { + "type": "object", + "required": ["title", "author", "publication_year"], + "properties": { + "title": {"type": "string"}, + "author": {"type": "string"}, + "publication_year": {"type": "integer"}, + }, + }, + }, ) print(res.text) diff --git a/fern/pages/text-generation/summarizing-text.mdx b/fern/pages/text-generation/summarizing-text.mdx index 3fbc933c..85731d17 100644 --- a/fern/pages/text-generation/summarizing-text.mdx +++ b/fern/pages/text-generation/summarizing-text.mdx @@ -32,8 +32,9 @@ coming back down to Earth from unprecedented circumstances during the time of Co Rental companies are still seeing growth, but at a more moderate level.""" - -response = co.chat(message= f"Generate a concise summary of this text\n{document}").text +response = co.chat( + message=f"Generate a concise summary of this text\n{document}" +).text print(response) @@ -56,7 +57,9 @@ Rental companies are still experiencing growth, but at a more moderate and susta You can further control the output by defining the length of the summary in your prompt. For example, you can specify the number of sentences to be generated. ```python PYTHON -response = co.chat(message= f"Summarize this text in one sentence\n{document}").text +response = co.chat( + message=f"Summarize this text in one sentence\n{document}" +).text print(response) ``` @@ -72,7 +75,9 @@ customer type, according to Josh Nickell of the American Rental Association (ARA You can also specify the length in terms of word count. ```python PYTHON -response = co.chat(message= f"Summarize this text in less than 10 words\n{document}").text +response = co.chat( + message=f"Summarize this text in less than 10 words\n{document}" +).text print(response) ``` @@ -88,7 +93,9 @@ Rental equipment supply and demand to balance. Instead of generating summaries as paragraphs, you can also prompt the model to generate the summary as bullet points. ```python PYTHON -response = co.chat(message= f"Generate a concise summary of this text as bullet points\n{document}").text +response = co.chat( + message=f"Generate a concise summary of this text as bullet points\n{document}" +).text print(response) ``` @@ -115,9 +122,17 @@ This approach allows you to take advantage of the citations generated by the end Here is a chunked version of the document. (we don’t cover the chunking process here, but if you’d like to learn more, see this cookbook on [chunking strategies](https://github.com/cohere-ai/notebooks/blob/main/notebooks/guides/Chunking_strategies.ipynb).) ```python PYTHON -document_chunked = [{"text": "Equipment rental in North America is predicted to “normalize” going into 2024, according to Josh Nickell, vice president of equipment rental for the American Rental Association (ARA)."}, -{"text": "“Rental is going back to ‘normal,’ but normal means that strategy matters again - geography matters, fleet mix matters, customer type matters,” Nickell said. “In late 2020 to 2022, you just showed up with equipment and you made money."}, -{"text": "“Everybody was breaking records, from the national rental chains to the smallest rental companies; everybody was having record years, and everybody was raising prices. The conversation was, ‘How much are you up?’ And now, the conversation is changing to ‘What’s my market like?’”"}] +document_chunked = [ + { + "text": "Equipment rental in North America is predicted to “normalize” going into 2024, according to Josh Nickell, vice president of equipment rental for the American Rental Association (ARA)." + }, + { + "text": "“Rental is going back to ‘normal,’ but normal means that strategy matters again - geography matters, fleet mix matters, customer type matters,” Nickell said. “In late 2020 to 2022, you just showed up with equipment and you made money." + }, + { + "text": "“Everybody was breaking records, from the national rental chains to the smallest rental companies; everybody was having record years, and everybody was raising prices. The conversation was, ‘How much are you up?’ And now, the conversation is changing to ‘What’s my market like?’”" + }, +] ``` It also helps to create a custom preamble to prime the model about the task—that it will receive a series of text fragments from a document presented in chronological order. @@ -128,7 +143,6 @@ You will receive a series of text fragments from a document that are presented i As the assistant, you must generate responses to user's requests based on the information given in the fragments. \ Ensure that your responses are accurate and truthful, and that you reference your sources where appropriate to answer \ the queries, regardless of their complexity.""" - ``` Other than the custom preamble, the only change to the Chat endpoint call is passing the document parameter containing the list of document chunks. @@ -136,7 +150,11 @@ Other than the custom preamble, the only change to the Chat endpoint call is pas Aside from displaying the actual summary (response.text), we can display the citations as as well (response.citations). The citations are a list of specific passages in the response that cite from the documents that the model receives. ```python PYTHON -response = co.chat(message= f"Summarize this text in two sentences.", preamble=preamble, documents=document_chunked) +response = co.chat( + message=f"Summarize this text in two sentences.", + preamble=preamble, + documents=document_chunked, +) print(response.text) # Print citations (if any) @@ -147,7 +165,6 @@ if response.citations: print("\nCited Documents:") for document in response.documents: print(document) - ``` ``` @@ -176,7 +193,7 @@ This guide outlines how to migrate from Generate to Chat; the biggest difference # Before co.generate( - prompt="""Write a short summary from the following text in bullet point format, in different + prompt="""Write a short summary from the following text in bullet point format, in different words. Equipment rental in North America is predicted to “normalize” going into 2024, according to Josh Nickell, vice president of equipment rental for the American Rental Association (ARA). @@ -188,7 +205,7 @@ co.generate( # After co.chat( - message="""Write a short summary from the following text in bullet point format, + message="""Write a short summary from the following text in bullet point format, in different words. Equipment rental in North America is predicted to “normalize” going into 2024, according to Josh Nickell, vice president of equipment rental for the American Rental Association (ARA). @@ -196,9 +213,8 @@ co.chat( “Everybody was breaking records, from the national rental chains to the smallest rental companies; everybody was having record years, and everybody was raising prices. The conversation was, ‘How much are you up?’ And now, the conversation is changing to ‘What’s my market like?’” Nickell stressed this shouldn’t be taken as a pessimistic viewpoint. It’s simply coming back down to Earth from unprecedented circumstances during the time of Covid. Rental companies are still seeing growth, but at a more moderate level. """, - model="command-r-plus-08-2024" + model="command-r-plus-08-2024", ) - ``` ## Migration from Summarize to Chat Endpoint @@ -209,10 +225,10 @@ To use the Command R/R+ models for summarization, we recommend using the Chat en # Before co.summarize( - format="bullets", - length="short", - extractiveness="low", - text="""Equipment rental in North America is predicted to “normalize” going into 2024, according + format="bullets", + length="short", + extractiveness="low", + text="""Equipment rental in North America is predicted to “normalize” going into 2024, according to Josh Nickell, vice president of equipment rental for the American Rental Association (ARA). “Rental is going back to ‘normal,’ but normal means that strategy matters again - geography matters, fleet mix matters, customer type matters,” Nickell said. “In late 2020 to 2022, you @@ -223,12 +239,12 @@ co.summarize( Nickell stressed this shouldn’t be taken as a pessimistic viewpoint. It’s simply coming back down to Earth from unprecedented circumstances during the time of Covid. Rental companies are still seeing growth, but at a more moderate level. - """ + """, ) # After co.chat( - message="""Write a short summary from the following text in bullet point format, in different words. + message="""Write a short summary from the following text in bullet point format, in different words. Equipment rental in North America is predicted to “normalize” going into 2024, according to Josh Nickell, vice president of equipment rental for the American Rental Association (ARA). @@ -242,7 +258,6 @@ co.chat( down to Earth from unprecedented circumstances during the time of Covid. Rental companies are still seeing growth, but at a more moderate level. """, - model="command-r-plus-08-2024" + model="command-r-plus-08-2024", ) - ``` diff --git a/fern/pages/text-generation/tokens-and-tokenizers.mdx b/fern/pages/text-generation/tokens-and-tokenizers.mdx index 51eff8c7..c978ce22 100644 --- a/fern/pages/text-generation/tokens-and-tokenizers.mdx +++ b/fern/pages/text-generation/tokens-and-tokenizers.mdx @@ -38,10 +38,12 @@ Cohere offers the [tokenize](/reference/tokenize) and [detokenize](/reference/de Cohere Tokenizers are publicly hosted and can be used locally to avoid network calls. If you are using the Python SDK, the `tokenize` and `detokenize` functions will take care of downloading and caching the tokenizer for you ```python PYTHON -import cohere +import cohere + co = cohere.Client(api_key="") -co.tokenize(text="caterpillar", model="command-r-08-2024") # -> [74, 2340,107771] +co.tokenize(text="caterpillar", model="command-r-08-2024") +# -> [74, 2340,107771] ``` Notice that this downloads the tokenizer config for the model `command-r`, which might take a couple of seconds for the initial request. @@ -53,10 +55,14 @@ The cache for the tokenizer configuration is declared for each client instance. If you are doing development work before going to production with your application, this might be slow if you are just experimenting by redefining the client initialization. Cohere API offers endpoints for `tokenize` and `detokenize` which avoids downloading the tokenizer configuration file. In the Python SDK, these can be accessed by setting `offline=False` like so: ```python PYTHON -import cohere +import cohere + co = cohere.Client(api_key="") -co.tokenize(text="caterpillar", model="command-r-08-2024", offline=False) # -> [74, 2340,107771], no tokenizer config was downloaded +co.tokenize( + text="caterpillar", model="command-r-08-2024", offline=False +) +# -> [74, 2340,107771], no tokenizer config was downloaded ``` ## Downloading a Tokenizer @@ -66,14 +72,16 @@ Alternatively, the latest version of the tokenizer can be downloaded manually: ```python PYTHON # pip install tokenizers -from tokenizers import Tokenizer +from tokenizers import Tokenizer import requests # download the tokenizer -tokenizer_url = "https://..." # use /models/ endpoint for latest URL +# use /models/ endpoint for latest URL +tokenizer_url = "https://..." + -response = requests.get(tokenizer_url) +response = requests.get(tokenizer_url) tokenizer = Tokenizer.from_str(response.text) tokenizer.encode(sequence="...", add_special_tokens=False)