From 767d267659fd2490be1c4e5f79f3792fd73db935 Mon Sep 17 00:00:00 2001
From: Ilya Gusev <phoenixilya@gmail.com>
Date: Mon, 18 Nov 2024 16:58:21 +0000
Subject: [PATCH] Fixes and annotation templates

---
 .../first.jsonl}                              |  0
 ...e_3_5_sonnet_player_saiga_nemo_12b_v3.json |  3 ++
 ...judge_gpt_4o_player_saiga_nemo_12b_v3.json |  3 ++
 src/annotations/convert_results.py            |  4 +-
 src/annotations/convert_to_label_studio.py    | 20 +++++-----
 src/annotations/merge_annotations.py          | 11 +++---
 .../label_studio_instruction_en.jinja         | 10 +++++
 .../label_studio_instruction_ru.jinja         | 13 +++++++
 templates/annotations/label_studio_ui_en.html | 39 +++++++++++++++++++
 templates/annotations/label_studio_ui_ru.html | 39 +++++++++++++++++++
 10 files changed, 125 insertions(+), 17 deletions(-)
 rename results/{en_annotated.jsonl => en_annotations/first.jsonl} (100%)
 create mode 100644 results/v2/ru/judge_claude_3_5_sonnet_player_saiga_nemo_12b_v3.json
 create mode 100644 results/v2/ru/judge_gpt_4o_player_saiga_nemo_12b_v3.json
 create mode 100644 templates/annotations/label_studio_instruction_en.jinja
 create mode 100644 templates/annotations/label_studio_instruction_ru.jinja
 create mode 100644 templates/annotations/label_studio_ui_en.html
 create mode 100644 templates/annotations/label_studio_ui_ru.html
diff --git a/results/en_annotated.jsonl b/results/en_annotations/first.jsonl
similarity index 100%
rename from results/en_annotated.jsonl
rename to results/en_annotations/first.jsonl
diff --git a/results/v2/ru/judge_claude_3_5_sonnet_player_saiga_nemo_12b_v3.json b/results/v2/ru/judge_claude_3_5_sonnet_player_saiga_nemo_12b_v3.json
new file mode 100644
index 0000000..b9ac2f2
--- /dev/null
+++ b/results/v2/ru/judge_claude_3_5_sonnet_player_saiga_nemo_12b_v3.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:226d9673a9d9241ec97e7453c4dd690b6b826ace8f91c6796cdb3f254e2aa86b
+size 741640
diff --git a/results/v2/ru/judge_gpt_4o_player_saiga_nemo_12b_v3.json b/results/v2/ru/judge_gpt_4o_player_saiga_nemo_12b_v3.json
new file mode 100644
index 0000000..400a0de
--- /dev/null
+++ b/results/v2/ru/judge_gpt_4o_player_saiga_nemo_12b_v3.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbad6d495c51467ef86d9276f587c4e72755ac51a04e5a9e0baeac1329a69d7c
+size 740978
diff --git a/src/annotations/convert_results.py b/src/annotations/convert_results.py
index 53227be..5d9c699 100644
--- a/src/annotations/convert_results.py
+++ b/src/annotations/convert_results.py
@@ -1,4 +1,4 @@
-import fire
+import fire  # type: ignore
 import json
 
 mapping = {
@@ -9,7 +9,7 @@
     "Полностью согласен": 5,
 }
 
-def main(input_path, orig_path, output_path):
+def main(input_path: str, orig_path: str, output_path: str) -> None:
     orig_records = dict()
     with open(orig_path) as r:
         for idx, line in enumerate(r):
diff --git a/src/annotations/convert_to_label_studio.py b/src/annotations/convert_to_label_studio.py
index d1ea7b2..829ed6c 100644
--- a/src/annotations/convert_to_label_studio.py
+++ b/src/annotations/convert_to_label_studio.py
@@ -1,9 +1,9 @@
 import json
 import csv
-from typing import Dict, Any
+from typing import Dict, Any, List
 
-import fire
-import markdown
+import fire  # type: ignore
+import markdown  # type: ignore
 
 
 def to_markdown(record: Dict[str, Any]) -> str:
@@ -11,13 +11,12 @@ def to_markdown(record: Dict[str, Any]) -> str:
     messages = record["messages"]
     for m in messages:
         content = m["content"]
-        #content = content.replace("*", "**")
         result += "\n**{role}**:\n\n{content}\n\n".format(role=m["role"].capitalize(), content=content)
     return result
 
 
-def markdown_to_html(text):
-    html = markdown.markdown(text)
+def markdown_to_html(text: str) -> str:
+    html: str = markdown.markdown(text)
     user_color = "#6a9fb5"
     assistant_color = "#4f6b12"
     template = "<p{style}><strong>{role}</strong>:</p>\n<p{style}>"
@@ -28,8 +27,9 @@ def markdown_to_html(text):
     print(html)
     return html
 
-def main(input_path: str, output_path: str):
-    new_records = []
+
+def main(input_path: str, output_path: str) -> None:
+    new_records: List[Dict[str, Any]] = []
     with open(input_path) as r:
         for idx, line in enumerate(r):
             record = json.loads(line)
@@ -47,8 +47,8 @@ def main(input_path: str, output_path: str):
         writer = csv.writer(w)
         header = list(new_records[0].keys())
         writer.writerow(header)
-        for r in new_records:
-            row = [r[k] for k in header]
+        for rec in new_records:
+            row = [rec[k] for k in header]
             writer.writerow(row)
 
 
diff --git a/src/annotations/merge_annotations.py b/src/annotations/merge_annotations.py
index 2510955..a722a64 100644
--- a/src/annotations/merge_annotations.py
+++ b/src/annotations/merge_annotations.py
@@ -1,15 +1,16 @@
-import fire
+import fire  # type: ignore
 import json
+from typing import Dict, List
 from collections import defaultdict
 from statistics import mean
 
 
-def main(files: str, output_path: str):
-    files = files.split(",")
+def main(files: str, output_path: str) -> None:
+    all_files = files.split(",")
 
     records = dict()
-    scores = defaultdict(lambda: defaultdict(list))
-    for f in files:
+    scores: Dict[str, Dict[str, List[float]]] = defaultdict(lambda: defaultdict(list))
+    for f in all_files:
         with open(f) as r:
             for line in r:
                 record = json.loads(line)
diff --git a/templates/annotations/label_studio_instruction_en.jinja b/templates/annotations/label_studio_instruction_en.jinja
new file mode 100644
index 0000000..04cad0f
--- /dev/null
+++ b/templates/annotations/label_studio_instruction_en.jinja
@@ -0,0 +1,10 @@
+Please carefully read the character card and the dialogue. Based on the assistant's responses, answer 3 questions about the quality of these responses. The criteria for evaluating the answers are:
+<ul>
+<li>Adherence to a character card: everything the assistant says should not contradict the character card.</li>
+<li>Entertainment value: you should find the assistant's answers interesting to read, and they should not repeat between different responses within the same dialogue.</li>
+<li>Language fluency: responses should be in fluent English, unless otherwise specified in the character card.</li>
+</ul>
+<h2>Questions and Answers</h2>
+<p><b>Question</b>: What should be done if the assistant responds in Chinese instead of English? <b>Answer</b>: Give the minimum score for the fluency question, for others - at your discretion.</p>
+<p><b>Question</b>: What should be done if the assistant's responses are repetitive? <b>Answer</b>: Give the minimum score for the entertainment question, for others - at your discretion.</p>
+<p><b>Question</b>: What should be done if the user's responses are not very appropriate? <b>Answer</b>: Nothing, your task is to evaluate only the assistant's responses.</p>
diff --git a/templates/annotations/label_studio_instruction_ru.jinja b/templates/annotations/label_studio_instruction_ru.jinja
new file mode 100644
index 0000000..b6c7bad
--- /dev/null
+++ b/templates/annotations/label_studio_instruction_ru.jinja
@@ -0,0 +1,13 @@
+Внимательно прочитайте карточку персонажа и диалог. На основе реплик ассистента ответьте на 3 вопроса о качестве этих реплик. Критерии, по которым нужно оценить ответы:
+<ul>
+<li>Соответствие карточке персонажа: всё, что говорит ассистент, не должно противоречить карточке.</li>
+<li>Развлекательность: вам должно быть интересно читать ответы ассистента, они не должны повторяться между разными репликами в рамках одного диалога.</li>
+<li>Язык: ответы должны быть на хорошем русском языке, если иного не указано в карточке персонажа.
+</li></ul>
+
+<h2>Вопросы и ответы</h2>
+<p><b>Вопрос</b>: Что делать, если ассистент отвечает на английском вместо русского? <b>Ответ</b>: В вопросе про язык нужно поставить минимальный балл, в остальных — на ваше усмотрение.</p>
+
+<p><b>Вопрос</b>: Что делать, если реплики ассистента повторяются? <b>Ответ</b>: В вопросе про развлекательность нужно поставить минимальный балл, в остальных — на ваше усмотрение.</p>
+
+<p><b>Вопрос</b>: Что делать, если реплики пользователя не очень корректны? <b>Ответ</b>: Ничего, ваша задача — оценка только ответов ассистента.</p>
diff --git a/templates/annotations/label_studio_ui_en.html b/templates/annotations/label_studio_ui_en.html
new file mode 100644
index 0000000..2a2a6f5
--- /dev/null
+++ b/templates/annotations/label_studio_ui_en.html
@@ -0,0 +1,39 @@
+<View>
+  <Text name="char_name" value="Character: $char_name" />
+  <Collapse>
+    <Panel value="Character card">
+      <View><Text name="char_info" value="$char_info" /></View>
+  </Panel>
+  </Collapse>
+  <HyperText name="text" value="$html" />
+  <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="The bot's answers are perfectly aligned with an assigned character"/>
+    <Choices name="in_character" toName="text" choice="single" showInLine="true">
+      <Choice value="Strongly disagree"/>
+      <Choice value="Disagree"/>
+      <Choice value="Neutral"/>
+      <Choice value="Agree"/>
+      <Choice value="Strongly agree"/>
+    </Choices>
+  </View>
+  <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="The bot's responses are extremely engaging and entertaining"/>
+    <Choices name="entertaining" toName="text" choice="single" showInLine="true">
+      <Choice value="Strongly disagree"/>
+      <Choice value="Disagree"/>
+      <Choice value="Neutral"/>
+      <Choice value="Agree"/>
+      <Choice value="Strongly agree"/>
+    </Choices>
+  </View>
+    <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="The bot's language use is of the highest quality, without any mistakes"/>
+    <Choices name="fluency" toName="text" choice="single" showInLine="true">
+      <Choice value="Strongly disagree"/>
+      <Choice value="Disagree"/>
+      <Choice value="Neutral"/>
+      <Choice value="Agree"/>
+      <Choice value="Strongly agree"/>
+    </Choices>
+  </View>
+</View>
diff --git a/templates/annotations/label_studio_ui_ru.html b/templates/annotations/label_studio_ui_ru.html
new file mode 100644
index 0000000..da0e66b
--- /dev/null
+++ b/templates/annotations/label_studio_ui_ru.html
@@ -0,0 +1,39 @@
+<View>
+  <Text name="char_name" value="Персонаж: $char_name" />
+  <Collapse>
+    <Panel value="Карточка персонажа">
+    	<View><Text name="char_info" value="$char_info" /></View>
+  	</Panel>
+  </Collapse>
+  <HyperText name="text" value="$html" />
+  <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="Ответы ассистента идеально соответствуют карточке персонажа."/>
+    <Choices name="in_character" toName="text" choice="single" showInLine="true">
+      <Choice value="Полностью не согласен"/>
+      <Choice value="Не согласен"/>
+      <Choice value="Не знаю"/>
+      <Choice value="Согласен"/>
+      <Choice value="Полностью согласен"/>
+    </Choices>
+  </View>
+  <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="Ответы ассистента чрезвычайно интересны и увлекательны."/>
+    <Choices name="entertaining" toName="text" choice="single" showInLine="true">
+      <Choice value="Полностью не согласен"/>
+      <Choice value="Не согласен"/>
+      <Choice value="Не знаю"/>
+      <Choice value="Согласен"/>
+      <Choice value="Полностью согласен"/>
+    </Choices>
+  </View>
+    <View style="box-shadow: 2px 2px 5px #999;                padding: 20px; margin-top: 2em;                border-radius: 5px;">
+    <Header value="Русский язык ассистента идеален, нет ошибок, нет внезапных переходов на английский."/>
+    <Choices name="fluency" toName="text" choice="single" showInLine="true">
+      <Choice value="Полностью не согласен"/>
+      <Choice value="Не согласен"/>
+      <Choice value="Не знаю"/>
+      <Choice value="Согласен"/>
+      <Choice value="Полностью согласен"/>
+    </Choices>
+  </View>
+</View>