Skip to content

Commit

Permalink
update summary (#27)
Browse files Browse the repository at this point in the history
  • Loading branch information
sanderland authored Nov 28, 2024
1 parent b905470 commit a8207e3
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 48 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Code for the paper "Fishing for Magikarp"

This repository contains the code and extended results for the paper [Fishing for Magikarp: Automatically Detecting Under-trained Tokens in Large Language Models](https://arxiv.org/abs/2405.05417)
This repository contains the code and extended results for the paper Fishing for Magikarp: Automatically Detecting Under-trained Tokens in Large Language Models.

The paper is available on [arXiV](https://arxiv.org/abs/2405.05417) and [ACL Anthology](https://aclanthology.org/2024.emnlp-main.649/).

## Exploring Results

Expand Down
2 changes: 1 addition & 1 deletion generate_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def group_key(x):
)
f.write(f"Processed {len(model_infos_all)} models, {len(model_infos)} succeeded")
if failed:
f.write(f"{len(failed)} failed: {failed}\n")
f.write(f", {len(failed)} failed: {failed}\n")

print(tabulate.tabulate([format_info(i, target="latex") for i in model_infos], headers="keys", tablefmt="github"))

Expand Down
2 changes: 1 addition & 1 deletion magikarp/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def plot_xylabel(s):


def hardcoded_indicator_ix(model_id): # yes this is bad
indicator_ix = 1 if model_id in ["allenai/OLMo-7B-hf"] else 0
indicator_ix = 1 if any(s in model_id for s in ["allenai/OLMo-7B-hf", "allenai/OLMo-2"]) else 0
return indicator_ix


Expand Down
40 changes: 20 additions & 20 deletions results/reports/upstage_SOLAR_10_7B_v1_0.md

Large diffs are not rendered by default.

36 changes: 18 additions & 18 deletions results/reports_mini/upstage_SOLAR_10_7B_v1_0.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,31 @@

| token_id | token | indicator | max_prob | in_other_tokens |
|------------|--------------------|-------------|------------------------------------------------------------------|-----------------------------------------------------------------------------|
| 20418 | ````` ▁/**\r ````` | 0.00373846 | <span style='border: 1px solid rgb(169, 68, 66);'>3.8e-08</span> | |
| 26636 | ````` });\r ````` | 0.00470908 | <span style='border: 1px solid rgb(169, 68, 66);'>4.9e-10</span> | |
| 26407 | ````` };\r ````` | 0.0050243 | <span style='border: 1px solid rgb(169, 68, 66);'>7.7e-09</span> | |
| 26392 | ````` ▁});\r ````` | 0.00529803 | <span style='border: 1px solid rgb(169, 68, 66);'>9.2e-11</span> | |
| 20418 | ````` ▁/**\r ````` | 0.00373846 | <span style='border: 1px solid rgb(169, 68, 66);'>4.3e-07</span> | |
| 26636 | ````` });\r ````` | 0.00470908 | <span style='border: 1px solid rgb(169, 68, 66);'>3.8e-11</span> | |
| 26407 | ````` };\r ````` | 0.0050243 | <span style='border: 1px solid rgb(169, 68, 66);'>2.3e-07</span> | |
| 26392 | ````` ▁});\r ````` | 0.00529803 | <span style='border: 1px solid rgb(169, 68, 66);'>5.7e-11</span> | |
| 26083 | ````` ▁//\r ````` | 0.00591912 | <span style='border: 1px solid rgb(169, 68, 66);'>3.6e-11</span> | |
| 18759 | ````` ';\r ````` | 0.00594713 | <span style='border: 1px solid rgb(169, 68, 66);'>3.6e-11</span> | |
| 9823 | ````` */\r ````` | 0.0071945 | <span style='border: 1px solid rgb(169, 68, 66);'>6.2e-11</span> | |
| 7608 | ````` ▁*/\r ````` | 0.00811153 | <span style='border: 1px solid rgb(169, 68, 66);'>3e-09</span> | |
| 9823 | ````` */\r ````` | 0.0071945 | <span style='border: 1px solid rgb(169, 68, 66);'>9.3e-09</span> | |
| 7608 | ````` ▁*/\r ````` | 0.00811153 | <span style='border: 1px solid rgb(169, 68, 66);'>2.7e-08</span> | |
| 28171 | ````` ]);\r ````` | 0.00861516 | <span style='border: 1px solid rgb(169, 68, 66);'>8.1e-11</span> | |
| 23139 | ````` ▁};\r ````` | 0.00877842 | <span style='border: 1px solid rgb(169, 68, 66);'>2.1e-09</span> | |
| 15056 | ````` ());\r ````` | 0.00900373 | <span style='border: 1px solid rgb(169, 68, 66);'>1.7e-09</span> | |
| 17695 | ````` },\r ````` | 0.00900494 | <span style='border: 1px solid rgb(169, 68, 66);'>2e-08</span> | <span style='border: 1px solid rgb(169, 68, 66);'>````` ▁},\r `````</span> |
| 12193 | ````` ▁);\r ````` | 0.00917036 | <span style='border: 1px solid rgb(169, 68, 66);'>1.2e-09</span> | |
| 23139 | ````` ▁};\r ````` | 0.00877842 | <span style='border: 1px solid rgb(169, 68, 66);'>2e-08</span> | |
| 15056 | ````` ());\r ````` | 0.00900373 | <span style='border: 1px solid rgb(169, 68, 66);'>8.1e-09</span> | |
| 17695 | ````` },\r ````` | 0.00900494 | <span style='border: 1px solid rgb(169, 68, 66);'>4.4e-08</span> | <span style='border: 1px solid rgb(169, 68, 66);'>````` ▁},\r `````</span> |
| 12193 | ````` ▁);\r ````` | 0.00917036 | <span style='border: 1px solid rgb(169, 68, 66);'>1.1e-08</span> | |
| 14756 | ````` /**\r ````` | 0.00998607 | <span style='border: 1px solid rgb(169, 68, 66);'>7.6e-10</span> | <span style='border: 1px solid rgb(169, 68, 66);'>````` ▁/**\r `````</span> |
| 16943 | ````` ');\r ````` | 0.0105091 | <span style='border: 1px solid rgb(169, 68, 66);'>1.8e-10</span> | |
| 20692 | ````` ▁},\r ````` | 0.0106782 | <span style='border: 1px solid rgb(169, 68, 66);'>1.2e-10</span> | |
| 10278 | ````` ',\r ````` | 0.0120642 | <span style='border: 1px solid rgb(169, 68, 66);'>1.4e-09</span> | |
| 14420 | ````` ];\r ````` | 0.0151245 | <span style='border: 1px solid rgb(169, 68, 66);'>1.2e-08</span> | |
| 18055 | ````` ){\r ````` | 0.01543 | <span style='border: 1px solid rgb(169, 68, 66);'>7.9e-10</span> | |
| 31738 | ````` \uefc0 ````` | 0.0162754 | <span style='border: 1px solid rgb(169, 68, 66);'>1.8e-10</span> | |
| 16943 | ````` ');\r ````` | 0.0105091 | <span style='border: 1px solid rgb(169, 68, 66);'>4.3e-11</span> | |
| 20692 | ````` ▁},\r ````` | 0.0106782 | <span style='border: 1px solid rgb(169, 68, 66);'>1.1e-10</span> | |
| 10278 | ````` ',\r ````` | 0.0120642 | <span style='border: 1px solid rgb(169, 68, 66);'>8.6e-09</span> | |
| 14420 | ````` ];\r ````` | 0.0151245 | <span style='border: 1px solid rgb(169, 68, 66);'>5.1e-08</span> | |
| 18055 | ````` ){\r ````` | 0.01543 | <span style='border: 1px solid rgb(169, 68, 66);'>7.1e-09</span> | |
| 31738 | ````` \uefc0 ````` | 0.0162754 | <span style='border: 1px solid rgb(169, 68, 66);'>1.5e-10</span> | |
<details><summary>21 additional entries below threshold</summary>

| token_id | token | indicator | max_prob | in_other_tokens |
|------------|---------------------|-------------|------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 14980 | ````` ">\r ````` | 0.0168083 | <span style='border: 1px solid rgb(169, 68, 66);'>9e-10</span> | |
| 14980 | ````` ">\r ````` | 0.0168083 | <span style='border: 1px solid rgb(169, 68, 66);'>1.5e-08</span> | |
| 30929 | `````````` | 0.0180165 | <span style='border: 1px solid rgb(169, 68, 66);'>1.7e-09</span> | |
| 22186 | ````` ')\r ````` | 0.0262392 | <span style='border: 1px solid rgb(169, 68, 66);'>3.5e-07</span> | |
| 10939 | ````` ",\r ````` | 0.0269747 | <span style='border: 1px solid rgb(169, 68, 66);'>1.3e-07</span> | |
Expand All @@ -69,7 +69,7 @@
| 17334 | ````` (\r ````` | 0.0436971 | <span style='border: 1px solid rgb(169, 68, 66);'>4.4e-08</span> | |
| 16949 | ````` ")\r ````` | 0.0487248 | <span style='border: 1px solid rgb(169, 68, 66);'>1.5e-06</span> | |
| 6913 | ````` ");\r ````` | 0.0615329 | <span style='border: 1px solid rgb(169, 68, 66);'>5.3e-10</span> | |
| 4441 | ````` {\r ````` | 0.0629616 | <span style='border: 1px solid rgb(169, 68, 66);'>1.6e-09</span> | <span style='border: 1px solid rgb(169, 68, 66);'>````` ){\r `````</span> |
| 4441 | ````` {\r ````` | 0.0629616 | <span style='border: 1px solid rgb(169, 68, 66);'>2.5e-09</span> | <span style='border: 1px solid rgb(169, 68, 66);'>````` ){\r `````</span> |
| 27732 | ````` '\r ````` | 0.0639409 | <span style='border: 1px solid rgb(169, 68, 66);'>2.4e-06</span> | |
| 14668 | ````` ))\r ````` | 0.0670425 | <span style='border: 1px solid rgb(169, 68, 66);'>1.9e-08</span> | |
| 3426 | ````` ▁}\r ````` | 0.0743393 | <span style='border: 1px solid rgb(169, 68, 66);'>1.4e-08</span> | |
Expand Down
Loading

0 comments on commit a8207e3

Please sign in to comment.