From 1c0bac22482c5410345f02e52d1fd5c71d7ce169 Mon Sep 17 00:00:00 2001
From: Max Shkutnyk <invader89@gmail.com>
Date: Mon, 25 Nov 2024 19:03:57 +0200
Subject: [PATCH] Format code samples (#257)

* format code samples, remove non-python entries from python code samples

* auto-format more code snippets

* Fix pip install snippets

* rename check-mdx-frontmatter to cjs

---------

Co-authored-by: Max Shkutnyk <max@lightsonsoftware.com>
Co-authored-by: trentfowlercohere <141260477+trentfowlercohere@users.noreply.github.com>
---
 ...ontmatter.js => check-mdx-frontmatter.cjs} |  0
 .github/workflows/check-mdx-frontmatter.yml   |  2 +-
 .../cookbooks/convfinqa-finetuning-wandb.mdx  |  4 +-
 ...deploy-finetuned-model-aws-marketplace.mdx | 11 ++-
 .../pages/cookbooks/finetune-on-sagemaker.mdx |  7 +-
 fern/pages/cookbooks/rag-cohere-mongodb.mdx   | 86 +++++++++----------
 .../retrieval-augmented-generation-rag.mdx    |  6 +-
 .../structured-outputs-json.mdx               |  4 +-
 .../generating-multi-faceted-queries.mdx      | 10 +--
 9 files changed, 65 insertions(+), 65 deletions(-)
 rename .github/scripts/{check-mdx-frontmatter.js => check-mdx-frontmatter.cjs} (100%)

diff --git a/.github/scripts/check-mdx-frontmatter.js b/.github/scripts/check-mdx-frontmatter.cjs
similarity index 100%
rename from .github/scripts/check-mdx-frontmatter.js
rename to .github/scripts/check-mdx-frontmatter.cjs
diff --git a/.github/workflows/check-mdx-frontmatter.yml b/.github/workflows/check-mdx-frontmatter.yml
index dcf134ea..ae23f208 100644
--- a/.github/workflows/check-mdx-frontmatter.yml
+++ b/.github/workflows/check-mdx-frontmatter.yml
@@ -26,4 +26,4 @@ jobs:
         run: pnpm install
 
       - name: Run MDX frontmatter check
-        run: node .github/scripts/check-mdx-frontmatter.js
+        run: node .github/scripts/check-mdx-frontmatter.cjs
diff --git a/fern/pages/cookbooks/convfinqa-finetuning-wandb.mdx b/fern/pages/cookbooks/convfinqa-finetuning-wandb.mdx
index 63943717..d4e29412 100644
--- a/fern/pages/cookbooks/convfinqa-finetuning-wandb.mdx
+++ b/fern/pages/cookbooks/convfinqa-finetuning-wandb.mdx
@@ -55,10 +55,10 @@ from cohere.finetuning import (
 )
 
 # fill in your Cohere API key here
-os.environ['COHERE_API_KEY'] = "<COHERE_API_KEY>" 
+os.environ["COHERE_API_KEY"] = "<COHERE_API_KEY>"
 
 # instantiate the Cohere client
-co = cohere.Client(os.environ['COHERE_API_KEY'])
+co = cohere.Client(os.environ["COHERE_API_KEY"])
 ```
 
 ## Dataset
diff --git a/fern/pages/cookbooks/deploy-finetuned-model-aws-marketplace.mdx b/fern/pages/cookbooks/deploy-finetuned-model-aws-marketplace.mdx
index 3318e9a9..d82b98ac 100644
--- a/fern/pages/cookbooks/deploy-finetuned-model-aws-marketplace.mdx
+++ b/fern/pages/cookbooks/deploy-finetuned-model-aws-marketplace.mdx
@@ -71,8 +71,8 @@ To subscribe to the algorithm:
 Install the Python packages you will use below and import them. For example, you can run the command below to install `cohere` if you haven't done so.
 
 
-```python
-!pip install "cohere>=5.11.0"
+```sh
+pip install "cohere>=5.11.0"
 ```
 
 
@@ -200,9 +200,10 @@ save_hf_model(merged_weights_dir, merged_model)
 
 
 ```python
-%%time
 sess = sage.Session()
-merged_weights = S3Uploader.upload(merged_weights_dir, s3_checkpoint_dir, sagemaker_session=sess)
+merged_weights = S3Uploader.upload(
+    merged_weights_dir, s3_checkpoint_dir, sagemaker_session=sess
+)
 print("merged_weights", merged_weights)
 ```
 
@@ -213,7 +214,6 @@ Create Cohere client and use it to export the merged weights to the TensorRT-LLM
 
 
 ```python
-%%time
 co = cohere.SagemakerClient(aws_region=region)
 co.sagemaker_finetuning.export_finetune(
     arn=arn,
@@ -232,7 +232,6 @@ The Cohere client provides a built-in method to create an endpoint for inference
 
 
 ```python
-%%time
 co.sagemaker_finetuning.create_endpoint(
     arn=arn,
     endpoint_name=endpoint_name,
diff --git a/fern/pages/cookbooks/finetune-on-sagemaker.mdx b/fern/pages/cookbooks/finetune-on-sagemaker.mdx
index 5d11a04f..329cee60 100644
--- a/fern/pages/cookbooks/finetune-on-sagemaker.mdx
+++ b/fern/pages/cookbooks/finetune-on-sagemaker.mdx
@@ -58,10 +58,11 @@ To subscribe to the model algorithm:
 2. On the AWS Marketplace listing, click on the **Continue to Subscribe** button.
 3. On the **Subscribe to this software** page, review and click on **"Accept Offer"** if you and your organization agrees with EULA, pricing, and support terms. On the "Configure and launch" page, make sure ARN displayed in your region match with the ARN in the following cell.
 
+```sh
+pip install "cohere>=5.11.0"
+```
 
 ```python
-!pip install "cohere>=5.11.0"
-
 import cohere
 import boto3
 import sagemaker as sage
@@ -297,7 +298,7 @@ from tqdm import tqdm
 total = 0
 correct = 0
 for line in tqdm(
-    open('./sample_finetune_scienceQA_eval.jsonl').readlines()
+    open("./sample_finetune_scienceQA_eval.jsonl").readlines()
 ):
     total += 1
     question_answer_json = json.loads(line)
diff --git a/fern/pages/cookbooks/rag-cohere-mongodb.mdx b/fern/pages/cookbooks/rag-cohere-mongodb.mdx
index 68e29962..a033294a 100644
--- a/fern/pages/cookbooks/rag-cohere-mongodb.mdx
+++ b/fern/pages/cookbooks/rag-cohere-mongodb.mdx
@@ -52,8 +52,8 @@ Libraries:
 
 
 
-```python
-!pip install --quiet datasets tqdm cohere pymongo
+```sh
+pip install --quiet datasets tqdm cohere pymongo
 ```
 
 
@@ -183,11 +183,11 @@ def combine_attributes(row):
     combined = f"{row['company']} {row['sector']} "
 
     # Add reports information
-    for report in row['reports']:
+    for report in row["reports"]:
         combined += f"{report['year']} {report['title']} {report['author']} {report['content']} "
 
     # Add recent news information
-    for news in row['recent_news']:
+    for news in row["recent_news"]:
         combined += f"{news['headline']} {news['summary']} "
 
     return combined.strip()
@@ -196,7 +196,7 @@ def combine_attributes(row):
 
 ```python
 # Add the new column 'combined_attributes'
-dataset_df['combined_attributes'] = dataset_df.apply(
+dataset_df["combined_attributes"] = dataset_df.apply(
     combine_attributes, axis=1
 )
 ```
@@ -204,7 +204,7 @@ dataset_df['combined_attributes'] = dataset_df.apply(
 
 ```python
 # Display the first few rows of the updated dataframe
-dataset_df[['company', 'ticker', 'combined_attributes']].head()
+dataset_df[["company", "ticker", "combined_attributes"]].head()
 ```
 
 <div>
@@ -270,7 +270,7 @@ def get_embedding(
         texts=[text],
         model=model,
         input_type=input_type,  # Used for embeddings of search queries run against a vector DB to find relevant documents
-        embedding_types=['float'],
+        embedding_types=["float"],
     )
 
     return response.embeddings.float[0]
@@ -279,7 +279,7 @@ def get_embedding(
 # Apply the embedding function with a progress bar
 tqdm.pandas(desc="Generating embeddings")
 dataset_df["embedding"] = dataset_df[
-    'combined_attributes'
+    "combined_attributes"
 ].progress_apply(get_embedding)
 
 print(f"We just computed {len(dataset_df['embedding'])} embeddings.")
@@ -421,8 +421,8 @@ def get_mongo_client(mongo_uri):
     )
 
     # Validate the connection
-    ping_result = client.admin.command('ping')
-    if ping_result.get('ok') == 1.0:
+    ping_result = client.admin.command("ping")
+    if ping_result.get("ok") == 1.0:
         # Connection successful
         print("Connection to MongoDB successful")
         return client
@@ -478,7 +478,7 @@ MongoDB's Document model and its compatibility with Python dictionaries offer se
 ![](../../assets/images/rag-cohere-mongodb-4.png)
 
 ```python
-documents = dataset_df.to_dict('records')
+documents = dataset_df.to_dict("records")
 collection.insert_many(documents)
 
 print("Data ingestion into MongoDB completed")
@@ -592,13 +592,13 @@ def rerank_documents(query: str, documents, top_n: int = 3):
             original_doc = documents[result.index]
             top_documents_after_rerank.append(
                 {
-                    'company': original_doc['company'],
-                    'combined_attributes': original_doc[
-                        'combined_attributes'
+                    "company": original_doc["company"],
+                    "combined_attributes": original_doc[
+                        "combined_attributes"
                     ],
-                    'reports': original_doc['reports'],
-                    'vector_search_score': original_doc['score'],
-                    'relevance_score': result.relevance_score,
+                    "reports": original_doc["reports"],
+                    "vector_search_score": original_doc["score"],
+                    "relevance_score": result.relevance_score,
                 }
             )
 
@@ -724,9 +724,9 @@ pd.DataFrame(reranked_documents).head()
 def format_documents_for_chat(documents):
     return [
         {
-            "company": doc['company'],
+            "company": doc["company"],
             # "reports": doc['reports'],
-            "combined_attributes": doc['combined_attributes'],
+            "combined_attributes": doc["combined_attributes"],
         }
         for doc in documents
     ]
@@ -825,7 +825,7 @@ class CohereChat:
         # Use the connection string from history_params
         self.client = pymongo.MongoClient(
             self.history_params.get(
-                'connection_string', 'mongodb://localhost:27017/'
+                "connection_string", "mongodb://localhost:27017/"
             )
         )
 
@@ -838,34 +838,34 @@ class CohereChat:
         # Use the history_collection from history_params, or default to "chat_history"
         self.history_collection = self.db[
             self.history_params.get(
-                'history_collection', 'chat_history'
+                "history_collection", "chat_history"
             )
         ]
 
         # Use the session_id from history_params, or default to "default_session"
         self.session_id = self.history_params.get(
-            'session_id', 'default_session'
+            "session_id", "default_session"
         )
 
     def add_to_history(self, message: str, prefix: str = ""):
         self.history_collection.insert_one(
             {
-                'session_id': self.session_id,
-                'message': message,
-                'prefix': prefix,
+                "session_id": self.session_id,
+                "message": message,
+                "prefix": prefix,
             }
         )
 
     def get_chat_history(self) -> List[Dict[str, str]]:
         history = self.history_collection.find(
-            {'session_id': self.session_id}
-        ).sort('_id', 1)
+            {"session_id": self.session_id}
+        ).sort("_id", 1)
         return [
             {
                 "role": (
-                    "user" if item['prefix'] == "USER" else "chatbot"
+                    "user" if item["prefix"] == "USER" else "chatbot"
                 ),
-                "message": item['message'],
+                "message": item["message"],
             }
             for item in history
         ]
@@ -875,11 +875,11 @@ class CohereChat:
     ) -> List[Dict]:
         rerank_docs = [
             {
-                'company': doc['company'],
-                'combined_attributes': doc['combined_attributes'],
+                "company": doc["company"],
+                "combined_attributes": doc["combined_attributes"],
             }
             for doc in documents
-            if doc['combined_attributes'].strip()
+            if doc["combined_attributes"].strip()
         ]
 
         if not rerank_docs:
@@ -897,11 +897,11 @@ class CohereChat:
 
             top_documents_after_rerank = [
                 {
-                    'company': rerank_docs[result.index]['company'],
-                    'combined_attributes': rerank_docs[result.index][
-                        'combined_attributes'
+                    "company": rerank_docs[result.index]["company"],
+                    "combined_attributes": rerank_docs[result.index][
+                        "combined_attributes"
                     ],
-                    'relevance_score': result.relevance_score,
+                    "relevance_score": result.relevance_score,
                 }
                 for result in response.results
             ]
@@ -925,8 +925,8 @@ class CohereChat:
     ) -> List[Dict]:
         return [
             {
-                "company": doc['company'],
-                "combined_attributes": doc['combined_attributes'],
+                "company": doc["company"],
+                "combined_attributes": doc["combined_attributes"],
             }
             for doc in documents
         ]
@@ -972,8 +972,8 @@ class CohereChat:
 
     def show_history(self):
         history = self.history_collection.find(
-            {'session_id': self.session_id}
-        ).sort('_id', 1)
+            {"session_id": self.session_id}
+        ).sort("_id", 1)
         for item in history:
             print(f"{item['prefix']}: {item['message']}")
             print("-------------------------")
@@ -988,9 +988,9 @@ chat = CohereChat(
     database=DB_NAME,
     main_collection=COLLECTION_NAME,
     history_params={
-        'connection_string': MONGO_URI,
-        'history_collection': "chat_history",
-        'session_id': 2,
+        "connection_string": MONGO_URI,
+        "history_collection": "chat_history",
+        "session_id": 2,
     },
 )
 
diff --git a/fern/pages/v2/text-generation/retrieval-augmented-generation-rag.mdx b/fern/pages/v2/text-generation/retrieval-augmented-generation-rag.mdx
index f6c64375..9b3c673f 100644
--- a/fern/pages/v2/text-generation/retrieval-augmented-generation-rag.mdx
+++ b/fern/pages/v2/text-generation/retrieval-augmented-generation-rag.mdx
@@ -56,7 +56,7 @@ documents = [
             "title": "What are animals?",
             "snippet": "Animals are different from plants.",
         }
-    }
+    },
 ]
 
 # Add the user message
@@ -66,7 +66,7 @@ messages = [{"role": "user", "content": message}]
 response = co.chat(
     model="command-r-plus-08-2024",
     messages=messages,
-    documents=documents
+    documents=documents,
 )
 
 print(response.message.content[0].text)
@@ -246,7 +246,7 @@ messages = [{"role": "user", "content": message}]
 response = co.chat(
     model="command-r-plus-08-2024",
     messages=messages,
-    documents=documents
+    documents=documents,
 )
 
 print(response.message.content[0].text)
diff --git a/fern/pages/v2/text-generation/structured-outputs-json.mdx b/fern/pages/v2/text-generation/structured-outputs-json.mdx
index ec7bf8f9..5c53cdbb 100644
--- a/fern/pages/v2/text-generation/structured-outputs-json.mdx
+++ b/fern/pages/v2/text-generation/structured-outputs-json.mdx
@@ -35,7 +35,7 @@ res = co.chat(
             "content": "Generate a JSON describing a person, with the fields 'name' and 'age'",
         }
     ],
-    response_format={"type": "json_object"}
+    response_format={"type": "json_object"},
 )
 
 print(res.message.content[0].text)
@@ -86,7 +86,7 @@ res = co.chat(
                 "publication_year": {"type": "integer"},
             },
         },
-    }
+    },
 )
 
 print(res.message.content[0].text)
diff --git a/fern/pages/v2/tutorials/agentic-rag/generating-multi-faceted-queries.mdx b/fern/pages/v2/tutorials/agentic-rag/generating-multi-faceted-queries.mdx
index 05164599..b6da9130 100644
--- a/fern/pages/v2/tutorials/agentic-rag/generating-multi-faceted-queries.mdx
+++ b/fern/pages/v2/tutorials/agentic-rag/generating-multi-faceted-queries.mdx
@@ -94,7 +94,7 @@ search_code_examples_detailed_tool = {
             "properties": {
                 "query": {
                     "type": "string",
-                    "description": "The search query."
+                    "description": "The search query.",
                 },
                 "programming_language": {
                     "type": "string",
@@ -104,11 +104,11 @@ search_code_examples_detailed_tool = {
                     "type": "array",
                     "items": {"type": "string"},
                     "description": "The Cohere endpoints used in the code example or tutorial. Only use this property when asked by the user. Possible enum values: chat, embed, rerank, classify.",
-                }
+                },
             },
-            "required": ["query"]
-        }
-    }
+            "required": ["query"],
+        },
+    },
 }
 ```
 ```python PYTHON