From 1b51827c665ed4ccb9d99ccd2c7cec29f5b0f2fa Mon Sep 17 00:00:00 2001
From: TLP <104315397+TLongP@users.noreply.github.com>
Date: Wed, 28 Aug 2024 22:18:59 +0200
Subject: [PATCH] Fix INIT_YAML embeddings default settings (#1039)

Co-authored-by: Thanh Long Phan <long.phan@dida.do>
Co-authored-by: Alonso Guevara <alonsog@microsoft.com>
---
 .semversioner/next-release/patch-20240827203354884800.json | 4 ++++
 graphrag/index/init_content.py                             | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 .semversioner/next-release/patch-20240827203354884800.json

diff --git a/.semversioner/next-release/patch-20240827203354884800.json b/.semversioner/next-release/patch-20240827203354884800.json
new file mode 100644
index 0000000000..71f905c6e5
--- /dev/null
+++ b/.semversioner/next-release/patch-20240827203354884800.json
@@ -0,0 +1,4 @@
+{
+  "type": "patch",
+  "description": "Fix default settings for embedding"
+}
diff --git a/graphrag/index/init_content.py b/graphrag/index/init_content.py
index 8c1a7ba085..fe8dd7cc40 100644
--- a/graphrag/index/init_content.py
+++ b/graphrag/index/init_content.py
@@ -38,6 +38,8 @@
   ## parallelization: override the global parallelization settings for embeddings
   async_mode: {defs.ASYNC_MODE.value} # or asyncio
   # target: {defs.EMBEDDING_TARGET.value} # or all
+  # batch_size: {defs.EMBEDDING_BATCH_SIZE} # the number of documents to send in a single request
+  # batch_max_tokens: {defs.EMBEDDING_BATCH_MAX_TOKENS} # the maximum number of tokens to send in a single request
   llm:
     api_key: ${{GRAPHRAG_API_KEY}}
     type: {defs.EMBEDDING_TYPE.value} # or azure_openai_embedding
@@ -52,8 +54,6 @@
     # max_retry_wait: {defs.LLM_MAX_RETRY_WAIT}
     # sleep_on_rate_limit_recommendation: true # whether to sleep when azure suggests wait-times
     # concurrent_requests: {defs.LLM_CONCURRENT_REQUESTS} # the number of parallel inflight requests that may be made
-    # batch_size: {defs.EMBEDDING_BATCH_SIZE} # the number of documents to send in a single request
-    # batch_max_tokens: {defs.EMBEDDING_BATCH_MAX_TOKENS} # the maximum number of tokens to send in a single request