Fixes and annotation templates

IlyaGusev · Nov 18, 2024 · 767d267 · 767d267
1 parent 7b26726
commit 767d267
Show file tree

Hide file tree

Showing 10 changed files with 125 additions and 17 deletions.
diff --git a/results/en_annotated.jsonl → results/en_annotations/first.jsonl b/results/en_annotated.jsonl → results/en_annotations/first.jsonl
diff --git a/results/v2/ru/judge_claude_3_5_sonnet_player_saiga_nemo_12b_v3.json b/results/v2/ru/judge_claude_3_5_sonnet_player_saiga_nemo_12b_v3.json
diff --git a/results/v2/ru/judge_gpt_4o_player_saiga_nemo_12b_v3.json b/results/v2/ru/judge_gpt_4o_player_saiga_nemo_12b_v3.json
diff --git a/src/annotations/convert_results.py b/src/annotations/convert_results.py
@@ -1,4 +1,4 @@
-import fire
+import fire  # type: ignore
 import json
 
 mapping = {
@@ -9,7 +9,7 @@
     "Полностью согласен": 5,
 }
 
-def main(input_path, orig_path, output_path):
+def main(input_path: str, orig_path: str, output_path: str) -> None:
     orig_records = dict()
     with open(orig_path) as r:
         for idx, line in enumerate(r):

diff --git a/src/annotations/convert_to_label_studio.py b/src/annotations/convert_to_label_studio.py
@@ -1,23 +1,22 @@
 import json
 import csv
-from typing import Dict, Any
+from typing import Dict, Any, List
 
-import fire
-import markdown
+import fire  # type: ignore
+import markdown  # type: ignore
 
 
 def to_markdown(record: Dict[str, Any]) -> str:
     result = ""
     messages = record["messages"]
     for m in messages:
         content = m["content"]
-        #content = content.replace("*", "**")
         result += "\n**{role}**:\n\n{content}\n\n".format(role=m["role"].capitalize(), content=content)
     return result
 
 
-def markdown_to_html(text):
-    html = markdown.markdown(text)
+def markdown_to_html(text: str) -> str:
+    html: str = markdown.markdown(text)
     user_color = "#6a9fb5"
     assistant_color = "#4f6b12"
     template = "<p{style}><strong>{role}</strong>:</p>\n<p{style}>"
@@ -28,8 +27,9 @@ def markdown_to_html(text):
     print(html)
     return html
 
-def main(input_path: str, output_path: str):
-    new_records = []
+
+def main(input_path: str, output_path: str) -> None:
+    new_records: List[Dict[str, Any]] = []
     with open(input_path) as r:
         for idx, line in enumerate(r):
             record = json.loads(line)
@@ -47,8 +47,8 @@ def main(input_path: str, output_path: str):
         writer = csv.writer(w)
         header = list(new_records[0].keys())
         writer.writerow(header)
-        for r in new_records:
-            row = [r[k] for k in header]
+        for rec in new_records:
+            row = [rec[k] for k in header]
             writer.writerow(row)
 
 

diff --git a/src/annotations/merge_annotations.py b/src/annotations/merge_annotations.py
@@ -1,15 +1,16 @@
-import fire
+import fire  # type: ignore
 import json
+from typing import Dict, List
 from collections import defaultdict
 from statistics import mean
 
 
-def main(files: str, output_path: str):
-    files = files.split(",")
+def main(files: str, output_path: str) -> None:
+    all_files = files.split(",")
 
     records = dict()
-    scores = defaultdict(lambda: defaultdict(list))
-    for f in files:
+    scores: Dict[str, Dict[str, List[float]]] = defaultdict(lambda: defaultdict(list))
+    for f in all_files:
         with open(f) as r:
             for line in r:
                 record = json.loads(line)

diff --git a/templates/annotations/label_studio_instruction_en.jinja b/templates/annotations/label_studio_instruction_en.jinja
@@ -0,0 +1,10 @@
+Please carefully read the character card and the dialogue. Based on the assistant's responses, answer 3 questions about the quality of these responses. The criteria for evaluating the answers are:
+<ul>
+<li>Adherence to a character card: everything the assistant says should not contradict the character card.</li>
+<li>Entertainment value: you should find the assistant's answers interesting to read, and they should not repeat between different responses within the same dialogue.</li>
+<li>Language fluency: responses should be in fluent English, unless otherwise specified in the character card.</li>
+</ul>
+<h2>Questions and Answers</h2>
+<p><b>Question</b>: What should be done if the assistant responds in Chinese instead of English? <b>Answer</b>: Give the minimum score for the fluency question, for others - at your discretion.</p>
+<p><b>Question</b>: What should be done if the assistant's responses are repetitive? <b>Answer</b>: Give the minimum score for the entertainment question, for others - at your discretion.</p>
+<p><b>Question</b>: What should be done if the user's responses are not very appropriate? <b>Answer</b>: Nothing, your task is to evaluate only the assistant's responses.</p>
diff --git a/templates/annotations/label_studio_instruction_ru.jinja b/templates/annotations/label_studio_instruction_ru.jinja
@@ -0,0 +1,13 @@
+Внимательно прочитайте карточку персонажа и диалог. На основе реплик ассистента ответьте на 3 вопроса о качестве этих реплик. Критерии, по которым нужно оценить ответы:
+<ul>
+<li>Соответствие карточке персонажа: всё, что говорит ассистент, не должно противоречить карточке.</li>
+<li>Развлекательность: вам должно быть интересно читать ответы ассистента, они не должны повторяться между разными репликами в рамках одного диалога.</li>
+<li>Язык: ответы должны быть на хорошем русском языке, если иного не указано в карточке персонажа.
+</li></ul>
+
+<h2>Вопросы и ответы</h2>
+<p><b>Вопрос</b>: Что делать, если ассистент отвечает на английском вместо русского? <b>Ответ</b>: В вопросе про язык нужно поставить минимальный балл, в остальных — на ваше усмотрение.</p>
+
+<p><b>Вопрос</b>: Что делать, если реплики ассистента повторяются? <b>Ответ</b>: В вопросе про развлекательность нужно поставить минимальный балл, в остальных — на ваше усмотрение.</p>
+
+<p><b>Вопрос</b>: Что делать, если реплики пользователя не очень корректны? <b>Ответ</b>: Ничего, ваша задача — оценка только ответов ассистента.</p>
diff --git a/templates/annotations/label_studio_ui_en.html b/templates/annotations/label_studio_ui_en.html
@@ -0,0 +1,39 @@
+<View>
+  <Text name="char_name" value="Character: $char_name" />
+  <Collapse>
+    <Panel value="Character card">
+      <View><Text name="char_info" value="$char_info" /></View>
+  </Panel>
+  </Collapse>
+  <HyperText name="text" value="$html" />
+  <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="The bot's answers are perfectly aligned with an assigned character"/>
+    <Choices name="in_character" toName="text" choice="single" showInLine="true">
+      <Choice value="Strongly disagree"/>
+      <Choice value="Disagree"/>
+      <Choice value="Neutral"/>
+      <Choice value="Agree"/>
+      <Choice value="Strongly agree"/>
+    </Choices>
+  </View>
+  <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="The bot's responses are extremely engaging and entertaining"/>
+    <Choices name="entertaining" toName="text" choice="single" showInLine="true">
+      <Choice value="Strongly disagree"/>
+      <Choice value="Disagree"/>
+      <Choice value="Neutral"/>
+      <Choice value="Agree"/>
+      <Choice value="Strongly agree"/>
+    </Choices>
+  </View>
+    <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="The bot's language use is of the highest quality, without any mistakes"/>
+    <Choices name="fluency" toName="text" choice="single" showInLine="true">
+      <Choice value="Strongly disagree"/>
+      <Choice value="Disagree"/>
+      <Choice value="Neutral"/>
+      <Choice value="Agree"/>
+      <Choice value="Strongly agree"/>
+    </Choices>
+  </View>
+</View>
diff --git a/templates/annotations/label_studio_ui_ru.html b/templates/annotations/label_studio_ui_ru.html
@@ -0,0 +1,39 @@
+<View>
+  <Text name="char_name" value="Персонаж: $char_name" />
+  <Collapse>
+    <Panel value="Карточка персонажа">
+    	<View><Text name="char_info" value="$char_info" /></View>
+  	</Panel>
+  </Collapse>
+  <HyperText name="text" value="$html" />
+  <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="Ответы ассистента идеально соответствуют карточке персонажа."/>
+    <Choices name="in_character" toName="text" choice="single" showInLine="true">
+      <Choice value="Полностью не согласен"/>
+      <Choice value="Не согласен"/>
+      <Choice value="Не знаю"/>
+      <Choice value="Согласен"/>
+      <Choice value="Полностью согласен"/>
+    </Choices>
+  </View>
+  <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="Ответы ассистента чрезвычайно интересны и увлекательны."/>
+    <Choices name="entertaining" toName="text" choice="single" showInLine="true">
+      <Choice value="Полностью не согласен"/>
+      <Choice value="Не согласен"/>
+      <Choice value="Не знаю"/>
+      <Choice value="Согласен"/>
+      <Choice value="Полностью согласен"/>
+    </Choices>
+  </View>
+    <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="Русский язык ассистента идеален, нет ошибок, нет внезапных переходов на английский."/>
+    <Choices name="fluency" toName="text" choice="single" showInLine="true">
+      <Choice value="Полностью не согласен"/>
+      <Choice value="Не согласен"/>
+      <Choice value="Не знаю"/>
+      <Choice value="Согласен"/>
+      <Choice value="Полностью согласен"/>
+    </Choices>
+  </View>
+</View>