Skip to content

Commit

Permalink
Remove filtering from some instructors, ensure accurate counts, misc …
Browse files Browse the repository at this point in the history
…fixes.
  • Loading branch information
j-durbin committed Jul 24, 2023
1 parent 4c0d9cf commit c1fbe19
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 31 deletions.
14 changes: 9 additions & 5 deletions airoboros/instructors/coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@ async def generate(instructor):
if batch_size is None:
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
count = instructor.instructor_counts.get("coding", 0)
if "coding" not in instructor.instructor_counts:
instructor.instructor_counts["coding"] = 0
language_index = 0
language = config.get("language") or instructor.language
while count < target_count:
while instructor.instructor_counts["coding"] < target_count:
# Inject languages to use for this batch.
current_languages = []
for _ in range(batch_size):
Expand Down Expand Up @@ -111,7 +112,11 @@ async def generate(instructor):
instructions.append(
instruction if not plain else instruction + " PLAINFORMAT"
)
futures.append(instructor.generate_response(full_instruction, **api_params))
futures.append(
instructor.generate_response(
full_instruction, filter_response=False, **api_params
)
)
if not futures:
continue
responses = await asyncio.gather(*futures)
Expand All @@ -128,6 +133,5 @@ async def generate(instructor):
"response": response.strip(),
"category": "coding",
}
count += 1
if count >= target_count:
if instructor.instructor_counts["coding"] >= target_count:
break
8 changes: 4 additions & 4 deletions airoboros/instructors/contextual.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,14 @@ async def generate(instructor):
min_score = float(min_score)

# Generate the instruction/response pairs until we reach the target count.
count = instructor.instructor_counts.get("contextual", 0)
if "contextual" not in instructor.instructor_counts:
instructor.instructor_counts["contextual"] = 0
batch_size = config.get("batch_size")
if batch_size is None:
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
futures = []
while count < target_count:
while instructor.instructor_counts["contextual"] < target_count:
prompt = generate_prompt(instructor, config, template, topic_iter)
futures.append(instructor.generate_response(prompt, **api_params))
if len(futures) < batch_size:
Expand Down Expand Up @@ -223,7 +224,6 @@ async def generate(instructor):
"response": response.strip(),
"category": "contextual",
}
count += 1
if count >= target_count:
if instructor.instructor_counts["contextual"] >= target_count:
break
futures = []
11 changes: 7 additions & 4 deletions airoboros/instructors/counterfactual_contextual.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,14 @@ async def generate(instructor):
min_score = float(min_score)

# Generate the instruction/response pairs until we reach the target count.
count = instructor.instructor_counts.get("counterfactual_contextual", 0)
if "counterfactual_contextual" not in instructor.instructor_counts:
instructor.instructor_counts["counterfactual_contextual"] = 0
batch_size = config.get("batch_size")
if batch_size is None:
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
language = config.get("language") or instructor.language
while count < target_count:
while instructor.instructor_counts["counterfactual_contextual"] < target_count:
response = await instructor.generate_response(
template.format(batch_size=batch_size, language=language), **api_params
)
Expand Down Expand Up @@ -164,7 +165,9 @@ async def generate(instructor):
"response": response.strip(),
"category": "counterfactual_contextual",
}
count += 1
if count >= target_count:
if (
instructor.instructor_counts["counterfactual_contextual"]
>= target_count
):
break
futures = []
12 changes: 7 additions & 5 deletions airoboros/instructors/experience.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,19 @@ async def generate(instructor):
min_score = float(min_score)

# Generate the instruction/response pairs until we reach the target count.
count = instructor.instructor_counts.get("experience", 0)
if "experience" not in instructor.instructor_counts:
instructor.instructor_counts["experience"] = 0
language = config.get("language") or instructor.language
batch_size = config.get("batch_size")
if batch_size is None:
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
futures = []
while count < target_count:
while instructor.instructor_counts["experience"] < target_count:
futures.append(
instructor.generate_response(prompt.format(language=language), **api_params)
instructor.generate_response(
prompt.format(language=language), filter_response=False, **api_params
)
)
if len(futures) < batch_size:
continue
Expand Down Expand Up @@ -67,7 +70,6 @@ async def generate(instructor):
"response": response,
"category": "experience",
}
count += 1
if count >= target_count:
if instructor.instructor_counts["experience"] >= target_count:
break
futures = []
8 changes: 4 additions & 4 deletions airoboros/instructors/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,10 @@ async def generate(instructor):
if batch_size is None:
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
count = instructor.instructor_counts.get("general", 0)
if "general" not in instructor.instructor_counts:
instructor.instructor_counts["general"] = 0
language = config.get("language") or instructor.language
while count < target_count:
while instructor.instructor_counts["general"] < target_count:
# Inject the topics to use for this batch.
current_topics = []
for _ in range(batch_size):
Expand Down Expand Up @@ -93,6 +94,5 @@ async def generate(instructor):
"response": response.strip(),
"category": "general",
}
count += 1
if count >= target_count:
if instructor.instructor_counts["general"] >= target_count:
break
8 changes: 4 additions & 4 deletions airoboros/instructors/inline_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ async def generate(
if batch_size is None:
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
count = instructor.instructor_counts.get(category, 0)
if category not in instructor.instructor_counts:
instructor.instructor_counts[category] = 0
language = config.get("language") or instructor.language
while count < target_count:
while instructor.instructor_counts[category] < target_count:
# Get a batch of instructions.
prompt_args = {"language": language}
if "{batch_size}" in template:
Expand All @@ -66,6 +67,5 @@ async def generate(
"response": response.strip(),
"category": category,
}
count += 1
if count >= target_count:
if instructor.instructor_counts[category] >= target_count:
break
8 changes: 4 additions & 4 deletions airoboros/instructors/simple_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,10 @@ async def generate(instructor, category):
if batch_size is None:
batch_size = instructor.default_batch_size
batch_size = int(batch_size)
count = instructor.instructor_counts.get(category, 0)
if category not in instructor.instructor_counts:
instructor.instructor_counts[category] = 0
language = config.get("language") or instructor.language
while count < target_count:
while instructor.instructor_counts[category] < target_count:
# Get a batch of instructions.
prompt = (
template.format(batch_size=batch_size, language=language)
Expand Down Expand Up @@ -89,6 +90,5 @@ async def generate(instructor, category):
"response": response.strip(),
"category": category,
}
count += 1
if count >= target_count:
if instructor.instructor_counts[category] >= target_count:
break
1 change: 1 addition & 0 deletions airoboros/self_instruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ def persist(self, item):
self.outfile.flush()
self.docstores[-1].add_texts([item["instruction"]])
self.docstore_size += 1
self.instructor_counts[item["category"]] += 1
if self.docstore_size >= MAX_DOCSTORE_SIZE:
logger.info("Initializing new docstore...")
self.docstores.append(
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="airoboros",
version="2.0.7",
version="2.0.8",
description="Updated and improved implementation of the self-instruct system.",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit c1fbe19

Please sign in to comment.