Merge pull request #123 from FederatedAI/feature-2.2.0-update_doc

Feature 2.2.0 update doc
FederatedAI · Aug 1, 2024 · ce0e2e9 · ce0e2e9
2 parents 5fad8d8 + 8ac3bbb
commit ce0e2e9
Showing 1 changed file with 5 additions and 5 deletions.
diff --git a/doc/tutorial/fdkt/fdkt.ipynb b/doc/tutorial/fdkt/fdkt.ipynb
@@ -39,7 +39,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The following code will sample 5000 datalines of 'Health' subdomain, and train data will generated under the folder './balance_processed_data/Health/train.json'"
+    "The following code will sample 5000 datalines of 'Health' subdomain, and train data will generated under the folder './processed_data/Health/train.json'"
    ]
   },
   {
@@ -391,7 +391,7 @@
     "llm_pretrained_path = \"Qwen1.5-7B-Chat\"\n",
     "embedding_model_path = \"all-mpnet-base-v2\"\n",
     "slm_pretrained_path = \"gpt2-xl\"\n",
-    "slm_data_path = \"./process/Health/train.json\"\n",
+    "slm_data_path = \"./processed_data/Health/train.json\"\n",
     "\n",
     "\n",
     "def get_optimizer(model, optimizer=\"adam\", lr=1e-4):\n",
@@ -472,7 +472,7 @@
     "\n",
     "    embedding_lm = SentenceTransformerModel(model_name_or_path=embedding_model_path).load()\n",
     "    training_args = FDKTTrainingArguments(\n",
-    "        sample_num_per_cluster=5,\n",
+    "        sample_num_per_cluster=4,\n",
     "        filter_prompt_max_length=2**14,\n",
     "        filter_generation_config=dict(\n",
     "            max_tokens=4096,\n",
@@ -559,7 +559,7 @@
     "llm_pretrained_path = \"Qwen1.5-7B-Chat\"\n",
     "embedding_model_path = \"all-mpnet-base-v2/\"\n",
     "slm_pretrained_path = \"gpt2-xl\"\n",
-    "slm_data_path = \"./process/Health/train.json\" # should be absolute path\n",
+    "slm_data_path = \"./processed_data/Health/train.json\" # should be absolute path\n",
     "\n",
     "\n",
     "def get_llm_conf():\n",
@@ -581,7 +581,7 @@
     "    )\n",
     "\n",
     "    training_args = FDKTTrainingArguments(\n",
-    "        sample_num_per_cluster=5,\n",
+    "        sample_num_per_cluster=4,\n",
     "        filter_prompt_max_length=2 ** 14,\n",
     "        filter_generation_config=dict(\n",
     "            max_tokens=4096,\n",