Updated prompts, additional template kwargs, config updates.

jondurbin · Jul 24, 2023 · 4552a8e · 4552a8e
1 parent c1fbe19
commit 4552a8e
Show file tree

Hide file tree

Showing 8 changed files with 91 additions and 17 deletions.
diff --git a/airoboros/instructors/prompts/riddle.txt b/airoboros/instructors/prompts/riddle.txt
@@ -23,6 +23,8 @@ Please write 20 more examples of a similar format, with "QUESTION: [puzzle]" and
 
 All output text should be in {language}, but the exact terms "QUESTION" and "ANSWER" are special tokens that must not be translated.
 
+Try not to use common/well-known riddles; the puzzles/riddles should be highly diverse and unique.
+
 The output format should be:
 QUESTION: [first puzzle]
 ANSWER: [first puzzle's answer]

diff --git a/airoboros/instructors/prompts/writing.txt b/airoboros/instructors/prompts/writing.txt
@@ -16,6 +16,8 @@ Here are a few examples:
 
 Make sure to include a wide variety of writing tasks, across a wide range of subjects.
 
+{style_extra}
+
 All output text should be in {language}.
 
 {topic_avoidance}

diff --git a/airoboros/instructors/prompts/writing_response.txt b/airoboros/instructors/prompts/writing_response.txt
@@ -3,7 +3,9 @@ Below is a user's instruction.
 I would would like you to following the following rules:
 - Always pay very close attention to the details in the instruction to make sure the output will be pleasing to the user.
 - When the instruction specifies a specific style (happy, sad, inspirational, etc.), be sure to apply that to your response.
-- If the instruction doesn't specify any particular ending style (happy, inspirational, etc.), and the instruction is to write a poem, story, song, or other creative/non-professional output, you will quietly randomly select from one of the following styles: happy, sad, surprising, open-ended, shocking, tragic.  Remember that not all stories need a resolution where the downtrodden somehow win - lessons can be learned from stories with bad/sad/tragic endings as well, so don't always select a happy ending.  Characters in the story can be friendly, mean, violent, or anything in between - remember, it's just a story, and complex character interactions make it more compelling.  Don't include any moralizing or explanation of what the lesson should be, just tell the story - the user is smart and will understand.
+- If the instruction doesn't specify any particular style (happy, inspirational, etc.), and the instruction is to write a poem, story, song, or other creative/non-professional output, you will quietly randomly select from one of the following styles: happy, sad, surprising, open-ended, shocking, tragic.  Remember that not all stories need a resolution where the downtrodden somehow win - lessons can be learned from stories with bad/sad/tragic endings as well, so don't always select a happy ending.  Characters in the story can be friendly, mean, violent, or anything in between - remember, it's just a story, and complex character interactions make it more compelling.  Don't include any moralizing or explanation of what the lesson should be, just tell the story - the user is smart and will understand.
 - If the instruction is to write an email or letter, do not start the body of the message with "I hope this [..] finds you ...".  Use a variety of introductory sentences without requiring some sort of nicety.
 
+The response should be in {language}.
+
 Instruction: {instruction}
diff --git a/airoboros/instructors/simple_task.py b/airoboros/instructors/simple_task.py
@@ -3,7 +3,7 @@
 import re
 
 
-async def generate(instructor, category):
+async def generate(instructor, category, template_kwargs={}):
     """Generator for simple instruction response tasks (e.g. roleplay, wordgames)."""
     config = instructor.instructors.get(category)
     if not config:
@@ -49,16 +49,14 @@ async def generate(instructor, category):
         instructor.instructor_counts[category] = 0
     language = config.get("language") or instructor.language
     while instructor.instructor_counts[category] < target_count:
+        format_args = {"batch_size": batch_size, "language": language}
+        for key, val in template_kwargs.items():
+            format_args[key] = val(instructor)
+        if "{topic_avoidance}" in template:
+            format_args["topic_avoidance"] = instructor.topic_avoidance
+
         # Get a batch of instructions.
-        prompt = (
-            template.format(batch_size=batch_size, language=language)
-            if "{topic_avoidance}" not in template
-            else template.format(
-                batch_size=batch_size,
-                language=language,
-                topic_avoidance=instructor.topic_avoidance,
-            )
-        )
+        prompt = template.format(**format_args)
         response = await instructor.generate_response(prompt, **api_params)
         if not response:
             continue
@@ -76,7 +74,9 @@ async def generate(instructor, category):
             instructions.append(instruction)
             full_prompt = instruction
             if response_prompt:
-                full_prompt = response_prompt.format(instruction=instruction)
+                full_prompt = response_prompt.format(
+                    language=language, instruction=instruction
+                )
             futures.append(instructor.generate_response(full_prompt, **api_params))
         if not futures:
             continue

diff --git a/airoboros/instructors/writing.py b/airoboros/instructors/writing.py
@@ -1,7 +1,42 @@
+import math
+import random
 from airoboros.instructors.simple_task import generate as generate_simple_task
 
 
+def generate_style_extra(instructor):
+    """Inject a list of style directives."""
+    batch_size = instructor.instructors["writing"].get("batch_size")
+    if batch_size is None:
+        batch_size = instructor.default_batch_size
+    batch_size = int(batch_size)
+    with_styles = math.floor(batch_size / 4)
+    if not with_styles and batch_size > 1:
+        with_styles = 1
+    if with_styles > len(instructor.instructors["writing"]["styles"]):
+        with_styles = instructor.instructors["writing"]["styles"]
+    batch_styles = random.sample(
+        instructor.instructors["writing"]["styles"], with_styles
+    )
+    return "Additional requirements:\n" + "\n".join(
+        [
+            f"- instruction {idx + 1} should specify that the style be {batch_styles[idx]}"
+            for idx in range(with_styles)
+        ]
+    )
+
+
 async def generate(instructor):
     """Generator for writing training data."""
-    async for item in generate_simple_task(instructor, "writing"):
+    conf = instructor.instructors.get("writing", {})
+    if not conf:
+        return
+    styles = conf.get("styles", [])
+    template_kwargs = {}
+    if styles:
+        template_kwargs["style_extra"] = generate_style_extra
+    else:
+        template_kwargs["style_extra"] = lambda _: ""
+    async for item in generate_simple_task(
+        instructor, "writing", template_kwargs=template_kwargs
+    ):
         yield item
diff --git a/example-config.yaml b/example-config.yaml
@@ -224,6 +224,9 @@ instructors:
   ##################################################################################
   # Guided experiences, e.g. meditation.
   experience:
+    api_params:
+      temperature: 0.9
+      top_p: 0.4
     count: 100
     min_docsearch_score: 0.15
     prompt_path: experience.txt
@@ -239,9 +242,12 @@ instructors:
   ##################################################################################
   # Orca style reasoning/math prompts.
   riddle:
+    api_params:
+      temperature: 0.9
+      top_p: 0.4
+    batch_size: 50
+    min_docsearch_score: 0.01
     count: 100
-    batch_size: 10
-    min_docsearch_score: 0.05
     prompt_path: riddle.txt
 
   ##################################################################################
@@ -255,6 +261,8 @@ instructors:
   ##################################################################################
   # Roleplay.
   roleplay:
+    api_params:
+      temperature: 0.9
     count: 100
     batch_size: 10
     min_docsearch_score: 0.15
@@ -287,13 +295,33 @@ instructors:
   ##################################################################################
   # Writing tasks.
   writing:
+    api_params:
+      temperature: 0.9
+    styles:
+      - happy
+      - sad
+      - tragic
+      - unexpected
+      - inspirational
+      - evil
+      - hilarious
+      - suspenseful
+      - horrific
+      - nostalgic
+      - thought-provoking
+      - enigmatic
+      - fantastical
+      - heartwarming
+      - romantic
     count: 100
-    batch_size: 10
+    batch_size: 12
     min_docsearch_score: 0.35
 
   ##################################################################################
   # Character/scenario card tasks.
   card:
+    api_params:
+      temperature: 0.9
     count: 50
     batch_size: 5
     min_docsearch_score: 0.05
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,5 @@
+[flake8]
+ignore =
+  E501
+  W503
+  W504
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name="airoboros",
-    version="2.0.8",
+    version="2.0.9",
     description="Updated and improved implementation of the self-instruct system.",
     long_description=long_description,
     long_description_content_type="text/markdown",