diff --git a/README.md b/README.md index 031888d..25ea55d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # Code for the paper "Fishing for Magikarp" -This repository contains the code and extended results for the paper [Fishing for Magikarp: Automatically Detecting Under-trained Tokens in Large Language Models](https://arxiv.org/abs/2405.05417) +This repository contains the code and extended results for the paper Fishing for Magikarp: Automatically Detecting Under-trained Tokens in Large Language Models. + +The paper is available on [arXiV](https://arxiv.org/abs/2405.05417) and [ACL Anthology](https://aclanthology.org/2024.emnlp-main.649/). ## Exploring Results diff --git a/generate_summary.py b/generate_summary.py index e6065ee..146fdfb 100644 --- a/generate_summary.py +++ b/generate_summary.py @@ -97,7 +97,7 @@ def group_key(x): ) f.write(f"Processed {len(model_infos_all)} models, {len(model_infos)} succeeded") if failed: - f.write(f"{len(failed)} failed: {failed}\n") + f.write(f", {len(failed)} failed: {failed}\n") print(tabulate.tabulate([format_info(i, target="latex") for i in model_infos], headers="keys", tablefmt="github")) diff --git a/magikarp/report.py b/magikarp/report.py index b67bbd4..7722107 100644 --- a/magikarp/report.py +++ b/magikarp/report.py @@ -28,7 +28,7 @@ def plot_xylabel(s): def hardcoded_indicator_ix(model_id): # yes this is bad - indicator_ix = 1 if model_id in ["allenai/OLMo-7B-hf"] else 0 + indicator_ix = 1 if any(s in model_id for s in ["allenai/OLMo-7B-hf", "allenai/OLMo-2"]) else 0 return indicator_ix diff --git a/results/reports/upstage_SOLAR_10_7B_v1_0.md b/results/reports/upstage_SOLAR_10_7B_v1_0.md index 61e0602..0978680 100644 --- a/results/reports/upstage_SOLAR_10_7B_v1_0.md +++ b/results/reports/upstage_SOLAR_10_7B_v1_0.md @@ -32,31 +32,31 @@ | token_id | token | indicator | max_prob | in_other_tokens | |------------|--------------------|-------------|------------------------------------------------------------------|-----------------------------------------------------------------------------| -| 20418 | ````` ▁/**\r ````` | 0.00373846 | 3.8e-08 | | -| 26636 | ````` });\r ````` | 0.00470908 | 4.9e-10 | | -| 26407 | ````` };\r ````` | 0.0050243 | 7.7e-09 | | -| 26392 | ````` ▁});\r ````` | 0.00529803 | 9.2e-11 | | +| 20418 | ````` ▁/**\r ````` | 0.00373846 | 4.3e-07 | | +| 26636 | ````` });\r ````` | 0.00470908 | 3.8e-11 | | +| 26407 | ````` };\r ````` | 0.0050243 | 2.3e-07 | | +| 26392 | ````` ▁});\r ````` | 0.00529803 | 5.7e-11 | | | 26083 | ````` ▁//\r ````` | 0.00591912 | 3.6e-11 | | | 18759 | ````` ';\r ````` | 0.00594713 | 3.6e-11 | | -| 9823 | ````` */\r ````` | 0.0071945 | 6.2e-11 | | -| 7608 | ````` ▁*/\r ````` | 0.00811153 | 3e-09 | | +| 9823 | ````` */\r ````` | 0.0071945 | 9.3e-09 | | +| 7608 | ````` ▁*/\r ````` | 0.00811153 | 2.7e-08 | | | 28171 | ````` ]);\r ````` | 0.00861516 | 8.1e-11 | | -| 23139 | ````` ▁};\r ````` | 0.00877842 | 2.1e-09 | | -| 15056 | ````` ());\r ````` | 0.00900373 | 1.7e-09 | | -| 17695 | ````` },\r ````` | 0.00900494 | 2e-08 | ````` ▁},\r ````` | -| 12193 | ````` ▁);\r ````` | 0.00917036 | 1.2e-09 | | +| 23139 | ````` ▁};\r ````` | 0.00877842 | 2e-08 | | +| 15056 | ````` ());\r ````` | 0.00900373 | 8.1e-09 | | +| 17695 | ````` },\r ````` | 0.00900494 | 4.4e-08 | ````` ▁},\r ````` | +| 12193 | ````` ▁);\r ````` | 0.00917036 | 1.1e-08 | | | 14756 | ````` /**\r ````` | 0.00998607 | 7.6e-10 | ````` ▁/**\r ````` | -| 16943 | ````` ');\r ````` | 0.0105091 | 1.8e-10 | | -| 20692 | ````` ▁},\r ````` | 0.0106782 | 1.2e-10 | | -| 10278 | ````` ',\r ````` | 0.0120642 | 1.4e-09 | | -| 14420 | ````` ];\r ````` | 0.0151245 | 1.2e-08 | | -| 18055 | ````` ){\r ````` | 0.01543 | 7.9e-10 | | -| 31738 | ````` \uefc0 ````` | 0.0162754 | 1.8e-10 | | +| 16943 | ````` ');\r ````` | 0.0105091 | 4.3e-11 | | +| 20692 | ````` ▁},\r ````` | 0.0106782 | 1.1e-10 | | +| 10278 | ````` ',\r ````` | 0.0120642 | 8.6e-09 | | +| 14420 | ````` ];\r ````` | 0.0151245 | 5.1e-08 | | +| 18055 | ````` ){\r ````` | 0.01543 | 7.1e-09 | | +| 31738 | ````` \uefc0 ````` | 0.0162754 | 1.5e-10 | |
21 additional entries below threshold | token_id | token | indicator | max_prob | in_other_tokens | |------------|---------------------|-------------|------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 14980 | ````` ">\r ````` | 0.0168083 | 9e-10 | | +| 14980 | ````` ">\r ````` | 0.0168083 | 1.5e-08 | | | 30929 | ````` ᥀ ````` | 0.0180165 | 1.7e-09 | | | 22186 | ````` ')\r ````` | 0.0262392 | 3.5e-07 | | | 10939 | ````` ",\r ````` | 0.0269747 | 1.3e-07 | | @@ -69,7 +69,7 @@ | 17334 | ````` (\r ````` | 0.0436971 | 4.4e-08 | | | 16949 | ````` ")\r ````` | 0.0487248 | 1.5e-06 | | | 6913 | ````` ");\r ````` | 0.0615329 | 5.3e-10 | | -| 4441 | ````` {\r ````` | 0.0629616 | 1.6e-09 | ````` ){\r ````` | +| 4441 | ````` {\r ````` | 0.0629616 | 2.5e-09 | ````` ){\r ````` | | 27732 | ````` '\r ````` | 0.0639409 | 2.4e-06 | | | 14668 | ````` ))\r ````` | 0.0670425 | 1.9e-08 | | | 3426 | ````` ▁}\r ````` | 0.0743393 | 1.4e-08 | | @@ -106,7 +106,7 @@ | 29934 | ````` ⣿ ````` | 0.140086 | 1 | | | 288 | ````` ing ````` | 0.140543 | 1 | ````` ring `````, ````` ings `````, ````` tring `````, ````` ning `````, ````` ating `````, ... | | 30897 | ````` ⠄ ````` | 0.141828 | 0.99 | | -| 31733 | ````` ⵙ ````` | 0.14194 | 0.78 | | +| 31733 | ````` ⵙ ````` | 0.14194 | 0.81 | | | 297 | ````` ▁in ````` | 0.142941 | 1 | ````` ▁int `````, ````` ▁into `````, ````` ▁inter `````, ````` ▁inst `````, ````` ▁incl `````, ... | | 17779 | ````` ▁gepublice ````` | 0.143946 | 1.1e-06 | ````` ▁gepubliceerd ````` | | 25833 | ````` >?[< ````` | 0.144418 | 0.076 | | @@ -251,7 +251,7 @@ | 362 | ````` th ````` | 0.174964 | 1 | ````` ▁that `````, ````` ith `````, ````` ▁with `````, ````` ▁this `````, ````` ath `````, ... | | 1725 | ````` ▁That ````` | 0.174981 | 1 | | | 473 | ````` ine ````` | 0.175017 | 1 | ````` line `````, ````` ines `````, ````` ined `````, ````` ▁line `````, ````` iness `````, ... | -| 3174 | ````` )\r ````` | 0.175053 | 0.73 | ````` ()\r `````, ````` ▁)\r `````, ````` ))\r `````, ````` ")\r `````, ````` ')\r ````` | +| 3174 | ````` )\r ````` | 0.175053 | 0.74 | ````` ()\r `````, ````` ▁)\r `````, ````` ))\r `````, ````` ")\r `````, ````` ')\r ````` | | 816 | ````` ▁We ````` | 0.175055 | 1 | ````` ▁West `````, ````` ▁Well `````, ````` ▁Web `````, ````` ▁Wed `````, ````` ▁Western `````, ... | | 375 | ````` ab ````` | 0.175089 | 1 | ````` able `````, ````` ▁ab `````, ````` ▁about `````, ````` abel `````, ````` label `````, ... | | 30832 | ````` 🟢 ````` | 0.175101 | 1 | | diff --git a/results/reports_mini/upstage_SOLAR_10_7B_v1_0.md b/results/reports_mini/upstage_SOLAR_10_7B_v1_0.md index 6d27923..f250d20 100644 --- a/results/reports_mini/upstage_SOLAR_10_7B_v1_0.md +++ b/results/reports_mini/upstage_SOLAR_10_7B_v1_0.md @@ -32,31 +32,31 @@ | token_id | token | indicator | max_prob | in_other_tokens | |------------|--------------------|-------------|------------------------------------------------------------------|-----------------------------------------------------------------------------| -| 20418 | ````` ▁/**\r ````` | 0.00373846 | 3.8e-08 | | -| 26636 | ````` });\r ````` | 0.00470908 | 4.9e-10 | | -| 26407 | ````` };\r ````` | 0.0050243 | 7.7e-09 | | -| 26392 | ````` ▁});\r ````` | 0.00529803 | 9.2e-11 | | +| 20418 | ````` ▁/**\r ````` | 0.00373846 | 4.3e-07 | | +| 26636 | ````` });\r ````` | 0.00470908 | 3.8e-11 | | +| 26407 | ````` };\r ````` | 0.0050243 | 2.3e-07 | | +| 26392 | ````` ▁});\r ````` | 0.00529803 | 5.7e-11 | | | 26083 | ````` ▁//\r ````` | 0.00591912 | 3.6e-11 | | | 18759 | ````` ';\r ````` | 0.00594713 | 3.6e-11 | | -| 9823 | ````` */\r ````` | 0.0071945 | 6.2e-11 | | -| 7608 | ````` ▁*/\r ````` | 0.00811153 | 3e-09 | | +| 9823 | ````` */\r ````` | 0.0071945 | 9.3e-09 | | +| 7608 | ````` ▁*/\r ````` | 0.00811153 | 2.7e-08 | | | 28171 | ````` ]);\r ````` | 0.00861516 | 8.1e-11 | | -| 23139 | ````` ▁};\r ````` | 0.00877842 | 2.1e-09 | | -| 15056 | ````` ());\r ````` | 0.00900373 | 1.7e-09 | | -| 17695 | ````` },\r ````` | 0.00900494 | 2e-08 | ````` ▁},\r ````` | -| 12193 | ````` ▁);\r ````` | 0.00917036 | 1.2e-09 | | +| 23139 | ````` ▁};\r ````` | 0.00877842 | 2e-08 | | +| 15056 | ````` ());\r ````` | 0.00900373 | 8.1e-09 | | +| 17695 | ````` },\r ````` | 0.00900494 | 4.4e-08 | ````` ▁},\r ````` | +| 12193 | ````` ▁);\r ````` | 0.00917036 | 1.1e-08 | | | 14756 | ````` /**\r ````` | 0.00998607 | 7.6e-10 | ````` ▁/**\r ````` | -| 16943 | ````` ');\r ````` | 0.0105091 | 1.8e-10 | | -| 20692 | ````` ▁},\r ````` | 0.0106782 | 1.2e-10 | | -| 10278 | ````` ',\r ````` | 0.0120642 | 1.4e-09 | | -| 14420 | ````` ];\r ````` | 0.0151245 | 1.2e-08 | | -| 18055 | ````` ){\r ````` | 0.01543 | 7.9e-10 | | -| 31738 | ````` \uefc0 ````` | 0.0162754 | 1.8e-10 | | +| 16943 | ````` ');\r ````` | 0.0105091 | 4.3e-11 | | +| 20692 | ````` ▁},\r ````` | 0.0106782 | 1.1e-10 | | +| 10278 | ````` ',\r ````` | 0.0120642 | 8.6e-09 | | +| 14420 | ````` ];\r ````` | 0.0151245 | 5.1e-08 | | +| 18055 | ````` ){\r ````` | 0.01543 | 7.1e-09 | | +| 31738 | ````` \uefc0 ````` | 0.0162754 | 1.5e-10 | |
21 additional entries below threshold | token_id | token | indicator | max_prob | in_other_tokens | |------------|---------------------|-------------|------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 14980 | ````` ">\r ````` | 0.0168083 | 9e-10 | | +| 14980 | ````` ">\r ````` | 0.0168083 | 1.5e-08 | | | 30929 | ````` ᥀ ````` | 0.0180165 | 1.7e-09 | | | 22186 | ````` ')\r ````` | 0.0262392 | 3.5e-07 | | | 10939 | ````` ",\r ````` | 0.0269747 | 1.3e-07 | | @@ -69,7 +69,7 @@ | 17334 | ````` (\r ````` | 0.0436971 | 4.4e-08 | | | 16949 | ````` ")\r ````` | 0.0487248 | 1.5e-06 | | | 6913 | ````` ");\r ````` | 0.0615329 | 5.3e-10 | | -| 4441 | ````` {\r ````` | 0.0629616 | 1.6e-09 | ````` ){\r ````` | +| 4441 | ````` {\r ````` | 0.0629616 | 2.5e-09 | ````` ){\r ````` | | 27732 | ````` '\r ````` | 0.0639409 | 2.4e-06 | | | 14668 | ````` ))\r ````` | 0.0670425 | 1.9e-08 | | | 3426 | ````` ▁}\r ````` | 0.0743393 | 1.4e-08 | | diff --git a/results/summary.md b/results/summary.md index 7d37fb7..b39bcd7 100644 --- a/results/summary.md +++ b/results/summary.md @@ -12,6 +12,7 @@ | meta-llama/Llama-2-7b-hf | [mini](reports_mini/meta_llama_Llama_2_7b_hf.md) [full](reports/meta_llama_Llama_2_7b_hf.md) | 32000 × 4096 | BPE, Byte Fallback | 32000 | 351 | 0 | 0 | 3 | 20/639 | 19/551 | springframework | ````` ▁Mediabestanden `````, ````` ▁Portály `````, ````` oreferrer `````, ````` ederbörd `````, ````` ▁Расподела `````, ````` nederbörd `````, ````` ▁regnig `````, ````` ▁demsel `````, ````` tatywna `````, ````` љашње ````` | | microsoft/Phi-3-mini-128k-instruct | [mini](reports_mini/microsoft_Phi_3_mini_128k_instruct.md) [full](reports/microsoft_Phi_3_mini_128k_instruct.md) | 32064 × 3072 | BPE, Byte Fallback | 32011 | 351 | 0 | 0 | 14 | 169/636 | 169/635 | springframework | ````` ▁Mediabestanden `````, ````` ▁autorytatywna `````, ````` ▁Хронологија `````, ````` ▁Portály `````, ````` Архівовано `````, ````` Webachiv `````, ````` ▁Спољашње `````, ````` ▁archiválva `````, ````` хівовано `````, ````` ▁regnigaste ````` | | microsoft/Phi-3.5-mini-instruct | [mini](reports_mini/microsoft_Phi_3_5_mini_instruct.md) [full](reports/microsoft_Phi_3_5_mini_instruct.md) | 32064 × 3072 | BPE, Byte Fallback | 32011 | 351 | 0 | 0 | 14 | 88/636 | 87/633 | springframework | ````` ▁Хронологија `````, ````` Архівовано `````, ````` ▁Спољашње `````, ````` ▁Mediabestanden `````, ````` хівовано `````, ````` ▁autorytatywna `````, ````` Webachiv `````, ````` ▁надморској `````, ````` ▁regnigaste `````, ````` ▁årsnederbörd ````` | +| upstage/solar-pro-preview-instruct | [mini](reports_mini/upstage_solar_pro_preview_instruct.md) [full](reports/upstage_solar_pro_preview_instruct.md) | 32128 × 5120 | BPE, Byte Fallback | 32128 | 351 | 0 | 0 | 131 | 138/639 | 127/605 | springframework | ````` ▁regnigaste `````, ````` ▁Mediabestanden `````, ````` ▁Хронологија `````, ````` ▁årsnederbörd `````, ````` ▁Portály `````, ````` Архівовано `````, ````` ▁Спољашње `````, ````` ▁надморској `````, ````` ▁autorytatywna `````, ````` ▁савезној ````` | | HuggingFaceH4/zephyr-7b-beta | [mini](reports_mini/HuggingFaceH4_zephyr_7b_beta.md) [full](reports/HuggingFaceH4_zephyr_7b_beta.md) | 32000 × 4096 | BPE, Byte Fallback | 32000 | 380 | 0 | 0 | 3 | 76/637 | 70/529 | includegraphics | ````` \uefc0 `````, ````` ▁/**\r `````, ````` });\r `````, ````` };\r `````, ````` ▁});\r `````, ````` ';\r `````, ````` ▁//\r `````, ````` */\r `````, ````` >?[< `````, ````` ▁*/\r ````` | | Nexusflow/Starling-LM-7B-beta | [mini](reports_mini/Nexusflow_Starling_LM_7B_beta.md) [full](reports/Nexusflow_Starling_LM_7B_beta.md) | 32002 × 4096 | BPE, Byte Fallback | 32002 | 380 | 0 | 0 | 5 | 48/636 | 42/530 | includegraphics | ````` \uefc0 `````, ````` ▁/**\r `````, ````` });\r `````, ````` ▁});\r `````, ````` ▁//\r `````, ````` };\r `````, ````` ';\r `````, ````` */\r `````, ````` >?[< `````, ````` ▁*/\r ````` | | Rakuten/RakutenAI-7B | [mini](reports_mini/Rakuten_RakutenAI_7B.md) [full](reports/Rakuten_RakutenAI_7B.md) | 48000 × 4096 | BPE, Byte Fallback | 48000 | 380 | 0 | 0 | 3 | 66/957 | 59/950 | includegraphics | ````` \uefc0 `````, ````` ▁/**\r `````, ````` });\r `````, ````` ▁//\r `````, ````` };\r `````, ````` ▁});\r `````, ````` >?[< `````, ````` */\r `````, ````` ]);\r `````, ````` ▁};\r ````` | @@ -19,12 +20,12 @@ | mistralai/Mistral-7B-Instruct-v0.2 | [mini](reports_mini/mistralai_Mistral_7B_Instruct_v0_2.md) [full](reports/mistralai_Mistral_7B_Instruct_v0_2.md) | 32000 × 4096 | BPE, Byte Fallback | 32000 | 380 | 0 | 0 | 3 | 35/637 | 28/529 | includegraphics | ````` \uefc0 `````, ````` ▁/**\r `````, ````` ▁//\r `````, ````` >?[< `````, ````` */\r `````, ````` \x85 `````, ````` });\r `````, ````` };\r `````, ````` ';\r `````, ````` /**\r ````` | | mistralai/Mistral-7B-v0.1 | [mini](reports_mini/mistralai_Mistral_7B_v0_1.md) [full](reports/mistralai_Mistral_7B_v0_1.md) | 32000 × 4096 | BPE, Byte Fallback | 32000 | 380 | 0 | 0 | 3 | 49/637 | 43/529 | includegraphics | ````` \uefc0 `````, ````` ▁/**\r `````, ````` });\r `````, ````` };\r `````, ````` ▁});\r `````, ````` ▁//\r `````, ````` ';\r `````, ````` */\r `````, ````` >?[< `````, ````` ▁*/\r ````` | | mistralai/Mixtral-8x7B-v0.1 | [mini](reports_mini/mistralai_Mixtral_8x7B_v0_1.md) [full](reports/mistralai_Mixtral_8x7B_v0_1.md) | 32000 × 4096 | BPE, Byte Fallback | 32000 | 380 | 0 | 0 | 3 | 44/637 | 42/542 | includegraphics | ````` \uefc0 `````, ````` ▁/**\r `````, ````` ▁//\r `````, ````` });\r `````, ````` ▁});\r `````, ````` */\r `````, ````` };\r `````, ````` ]);\r `````, ````` ▁};\r `````, ````` ▁*/\r ````` | +| upstage/SOLAR-10.7B-v1.0 | [mini](reports_mini/upstage_SOLAR_10_7B_v1_0.md) [full](reports/upstage_SOLAR_10_7B_v1_0.md) | 32000 × 4096 | BPE, Byte Fallback | 32000 | 380 | 0 | 0 | 3 | 58/638 | 51/523 | includegraphics | ````` ▁/**\r `````, ````` });\r `````, ````` };\r `````, ````` ▁});\r `````, ````` ▁//\r `````, ````` ';\r `````, ````` */\r `````, ````` ▁*/\r `````, ````` ]);\r `````, ````` ▁};\r ````` | | mistralai/Codestral-22B-v0.1 | [mini](reports_mini/mistralai_Codestral_22B_v0_1.md) [full](reports/mistralai_Codestral_22B_v0_1.md) | 32768 × 6144 | BPE, Byte Fallback | 32768 | 380 | 0 | 0 | 771 | 54/637 | 48/602 | includegraphics | ````` ▁/**\r `````, ````` });\r `````, ````` ▁//\r `````, ````` ';\r `````, ````` ▁},\r `````, ````` ]);\r `````, ````` ▁);\r `````, ````` ());\r `````, ````` ▁*/\r `````, ````` ▁};\r ````` | | mistralai/Mistral-7B-v0.3 | [mini](reports_mini/mistralai_Mistral_7B_v0_3.md) [full](reports/mistralai_Mistral_7B_v0_3.md) | 32768 × 4096 | BPE, Byte Fallback | 32768 | 380 | 0 | 0 | 771 | 53/637 | 47/529 | includegraphics | ````` \uefc0 `````, ````` ▁/**\r `````, ````` });\r `````, ````` };\r `````, ````` ▁});\r `````, ````` ▁//\r `````, ````` ';\r `````, ````` */\r `````, ````` >?[< `````, ````` ▁*/\r ````` | | utter-project/EuroLLM-1.7B | [mini](reports_mini/utter_project_EuroLLM_1_7B.md) [full](reports/utter_project_EuroLLM_1_7B.md) | 128000 × 2048 | BPE, Byte Fallback | 128000 | 354 | 1 | 0 | 261 | 357/2559 | 101/2260 | includegraphics | ````` funsio `````, ````` urasGeneral `````, ````` ▁momč `````, ````` asaíne `````, ````` jetain `````, ````` ▁compéten `````, ````` ▁meais `````, ````` ħtie `````, ````` ▁voormal `````, ````` iċli ````` | | m-a-p/neo_7b | [mini](reports_mini/m_a_p_neo_7b.md) [full](reports/m_a_p_neo_7b.md) | 64256 × 3072 | BPE, Byte Input | 64005 | 351 | 0 | 0 | 8 | 403/1276 | 403/1276 | 即表明您已经阅读并接受上述条款 | ````` ▁只看该作者大 `````, ````` 写一个指定评分 `````, ````` ▁先判断以下内容的 `````, ````` nhelpviewer `````, ````` :①②\ `````, ````` 如所属地区或 `````, ````` 身份在社区发言 `````, ````` 用上半句或者 `````, ````` ▁请给出下面的 `````, ````` ▁这条电影评论 ````` | | deepseek-ai/deepseek-coder-33b-base | [mini](reports_mini/deepseek_ai_deepseek_coder_33b_base.md) [full](reports/deepseek_ai_deepseek_coder_33b_base.md) | 32256 × 7168 | BPE, Byte Input | 32022 | 256 | 53 | 327 | 9 | 111/631 | 108/628 | ArgumentException | ````` \xa0anys `````, ````` \xa0milions `````, ````` \xa0persones `````, ````` ▁desocupats `````, ````` Polítics `````, ````` automòbils `````, ````` ▁capbaix `````, ````` ▁unipersonals `````, ````` Родени `````, ````` ▁херцо ````` | -| upstage/solar-pro-preview-instruct | [mini](reports_mini/upstage_solar_pro_preview_instruct.md) [full](reports/upstage_solar_pro_preview_instruct.md) | 32128 × 5120 | BPE, Byte Fallback | 32128 | 351 | 0 | 0 | 131 | 138/639 | 127/605 | PropertyChanged | ````` ▁regnigaste `````, ````` ▁Mediabestanden `````, ````` ▁Хронологија `````, ````` ▁årsnederbörd `````, ````` ▁Portály `````, ````` Архівовано `````, ````` ▁Спољашње `````, ````` ▁надморској `````, ````` ▁autorytatywna `````, ````` ▁савезној ````` | | bigcode/starcoder2-15b | [mini](reports_mini/bigcode_starcoder2_15b.md) [full](reports/bigcode_starcoder2_15b.md) | 49152 × 6144 | BPE, Byte Input | 49152 | 242 | 0 | 676 | 38 | 128/968 | 128/966 | InvalidProtocolBufferException | ````` Integervelvel `````, ````` Loremipumdolorsitametconsecteturadipiscingelit `````, ````` lcsStatusWlan `````, ````` ucMZQg `````, ````` hqSLBjKPZFq `````, ````` fWILIM `````, ````` tableOBJECT `````, ````` vjHPp `````, ````` fWILIMmJNUZLIEMNV `````, ````` GQGantt ````` | | ibm-granite/granite-3.0-8b-base | [mini](reports_mini/ibm_granite_granite_3_0_8b_base.md) [full](reports/ibm_granite_granite_3_0_8b_base.md) | 49152 × 4096, tied | BPE, Byte Input | 49152 | 242 | 0 | 675 | 19 | 209/979 | 198/967 | InvalidProtocolBufferException | ````` DetalleNotaCreditoPuntoVenta `````, ````` strmojo `````, ````` ExperimentResultSet `````, ````` DocSyntaxParser `````, ````` ParamCaption `````, ````` DetalleOrdenCompra `````, ````` MODRST `````, ````` esatterwhite `````, ````` DetalleFacturaProveedor `````, ````` StatusPointer ````` | | 01-ai/Yi-1.5-9B | [mini](reports_mini/01_ai_Yi_1_5_9B.md) [full](reports/01_ai_Yi_1_5_9B.md) | 64000 × 4096 | BPE, Byte Fallback | 64000 | 354 | 4 | 0 | 238 | 63/1274 | 61/1261 | Acknowledgements | ````` \\+::\\+ `````, ````` \\+\\_\\+\\+ `````, ````` \\+\\_\\+ `````, ````` mabaochang `````, ````` \\+:: `````, ````` nzoem `````, ````` \\+\\_\\ `````, ````` Разпространение `````, ````` mcited `````, ````` ":"\ufeff ````` | @@ -55,10 +56,13 @@ | Qwen/Qwen2.5-32B-Instruct | [mini](reports_mini/Qwen_Qwen2_5_32B_Instruct.md) [full](reports/Qwen_Qwen2_5_32B_Instruct.md) | 152064 × 5120 | BPE, Byte Input | 151665 | 256 | 1906 | 1320 | 25 | 1256/2979 | 1238/2958 | abcdefghijklmnopqrstuvwxyz | ````` ▁ForCanBeConverted `````, ````` ▁ForCanBeConvertedToF `````, ````` PostalCodesNL `````, ````` $PostalCodesNL `````, ````` (stypy `````, ````` ▁zwłaszc `````, ````` \tTokenNameIdentifier `````, ````` ▁thuisontvangst `````, ````` טלוו `````, ````` useRalative ````` | | Qwen/Qwen2.5-7B | [mini](reports_mini/Qwen_Qwen2_5_7B.md) [full](reports/Qwen_Qwen2_5_7B.md) | 152064 × 3584 | BPE, Byte Input | 151665 | 256 | 1906 | 1320 | 25 | 1332/2979 | 1311/2958 | abcdefghijklmnopqrstuvwxyz | ````` \tTokenNameIdentifier `````, ````` ▁ForCanBeConverted `````, ````` ▁ForCanBeConvertedToF `````, ````` PostalCodesNL `````, ````` $PostalCodesNL `````, ````` ▁zwłaszc `````, ````` (stypy `````, ````` טלוו `````, ````` ▁thuisontvangst `````, ````` useRalative ````` | | Qwen/Qwen2.5-7B-Instruct | [mini](reports_mini/Qwen_Qwen2_5_7B_Instruct.md) [full](reports/Qwen_Qwen2_5_7B_Instruct.md) | 152064 × 3584 | BPE, Byte Input | 151665 | 256 | 1906 | 1320 | 25 | 1859/2979 | 1842/2958 | abcdefghijklmnopqrstuvwxyz | ````` \tTokenNameIdentifier `````, ````` ▁ForCanBeConverted `````, ````` ▁ForCanBeConvertedToF `````, ````` PostalCodesNL `````, ````` $PostalCodesNL `````, ````` ▁zwłaszc `````, ````` (stypy `````, ````` טלוו `````, ````` ▁thuisontvangst `````, ````` useRalative ````` | -| meta-llama/Meta-Llama-3.1-70B | [mini](reports_mini/meta_llama_Meta_Llama_3_1_70B.md) [full](reports/meta_llama_Meta_Llama_3_1_70B.md) | 128256 × 8192 | BPE, Byte Input | 128256 | 256 | 0 | 1224 | 256 | 458/2539 | 204/2202 | abcdefghijklmnopqrstuvwxyz | ````` $PostalCodesNL `````, ````` ▁ForCanBeConvertedToF `````, ````` илася `````, ````` useRalative `````, ````` ávající `````, ````` илакти `````, ````` атися `````, ````` ыџN `````, ````` ЎыџN `````, ````` İTESİ ````` | +| allenai/Llama-3.1-Tulu-3-8B | [mini](reports_mini/allenai_Llama_3_1_Tulu_3_8B.md) [full](reports/allenai_Llama_3_1_Tulu_3_8B.md) | 128264 × 4096 | BPE, Byte Input | 128257 | 256 | 0 | 1224 | 257 | 559/2540 | 305/2225 | abcdefghijklmnopqrstuvwxyz | ````` $PostalCodesNL `````, ````` итися `````, ````` ıldığında `````, ````` аракт `````, ````` ЎыџN `````, ````` \tTokenNameIdentifier `````, ````` атися `````, ````` ávající `````, ````` useRalative `````, ````` ЎыџNЎыџN ````` | +| allenai/OLMo-2-1124-7B | [mini](reports_mini/allenai_OLMo_2_1124_7B.md) [full](reports/allenai_OLMo_2_1124_7B.md) | 100352 × 4096[*] | BPE, Byte Input | 100278 | 256 | 0 | 645 | 19 | 197/1992 | 179/1973 | abcdefghijklmnopqrstuvwxyz | ````` useRalative `````, ````` useRal `````, ````` webElementProperties `````, ````` \|\|\|EMAIL_ADDRESS\|\|\| `````, ````` $PostalCodesNL `````, ````` ▁ForCanBeConvertedToF `````, ````` \tRTHOOK `````, ````` webElementX `````, ````` \tRTCK `````, ````` \tRTLU ````` | +| meta-llama/Meta-Llama-3-70B | [mini](reports_mini/meta_llama_Meta_Llama_3_70B.md) [full](reports/meta_llama_Meta_Llama_3_70B.md) | 128256 × 8192 | BPE, Byte Input | 128256 | 256 | 0 | 1224 | 256 | 462/2539 | 208/2204 | abcdefghijklmnopqrstuvwxyz | ````` $PostalCodesNL `````, ````` ▁ForCanBeConvertedToF `````, ````` илася `````, ````` useRalative `````, ````` ávající `````, ````` илакти `````, ````` ыџN `````, ````` атися `````, ````` ЎыџN `````, ````` İTESİ ````` | | meta-llama/Meta-Llama-3.1-8B | [mini](reports_mini/meta_llama_Meta_Llama_3_1_8B.md) [full](reports/meta_llama_Meta_Llama_3_1_8B.md) | 128256 × 4096 | BPE, Byte Input | 128256 | 256 | 0 | 1224 | 256 | 534/2540 | 280/2225 | abcdefghijklmnopqrstuvwxyz | ````` $PostalCodesNL `````, ````` итися `````, ````` ıldığında `````, ````` аракт `````, ````` ЎыџN `````, ````` ▁ForCanBeConverted `````, ````` \tTokenNameIdentifier `````, ````` атися `````, ````` ;\r\r\r\n `````, ````` ávající ````` | -| meta-llama/Meta-Llama-3-70B | [mini](reports_mini/meta_llama_Meta_Llama_3_70B.md) [full](reports/meta_llama_Meta_Llama_3_70B.md) | 128256 × 8192 | BPE, Byte Input | 128256 | 256 | 0 | 1224 | 256 | 462/2539 | 208/2204 | ABCDEFGHIJKLMNOPQRSTUVWXYZ | ````` $PostalCodesNL `````, ````` ▁ForCanBeConvertedToF `````, ````` илася `````, ````` useRalative `````, ````` ávající `````, ````` илакти `````, ````` ыџN `````, ````` атися `````, ````` ЎыџN `````, ````` İTESİ ````` | +| allenai/OLMo-2-1124-13B | [mini](reports_mini/allenai_OLMo_2_1124_13B.md) [full](reports/allenai_OLMo_2_1124_13B.md) | 100352 × 5120[*] | BPE, Byte Input | 100278 | 256 | 0 | 645 | 19 | 192/1992 | 174/1974 | ABCDEFGHIJKLMNOPQRSTUVWXYZ | ````` \tRTHOOK `````, ````` richTextPanel `````, ````` \|\|\|PHONE_NUMBER\|\|\| `````, ````` ▁ForCanBeConverted `````, ````` ▁ForCanBeConvertedToF `````, ````` SpecWarn `````, ````` webElementXpaths `````, ````` adaptiveStyles `````, ````` useRalativeImagePath `````, ````` useRal ````` | | meta-llama/Meta-Llama-3-8B | [mini](reports_mini/meta_llama_Meta_Llama_3_8B.md) [full](reports/meta_llama_Meta_Llama_3_8B.md) | 128256 × 4096 | BPE, Byte Input | 128256 | 256 | 0 | 1224 | 256 | 556/2540 | 302/2203 | ABCDEFGHIJKLMNOPQRSTUVWXYZ | ````` $PostalCodesNL `````, ````` итися `````, ````` \tTokenNameIdentifier `````, ````` ▁ForCanBeConverted `````, ````` ıldığında `````, ````` аракт `````, ````` ;\r\r\r\n `````, ````` атися `````, ````` еристи `````, ````` ávající ````` | +| meta-llama/Meta-Llama-3.1-70B | [mini](reports_mini/meta_llama_Meta_Llama_3_1_70B.md) [full](reports/meta_llama_Meta_Llama_3_1_70B.md) | 128256 × 8192 | BPE, Byte Input | 128256 | 256 | 0 | 1224 | 256 | 458/2539 | 204/2202 | ABCDEFGHIJKLMNOPQRSTUVWXYZ | ````` $PostalCodesNL `````, ````` ▁ForCanBeConvertedToF `````, ````` илася `````, ````` useRalative `````, ````` ávající `````, ````` илакти `````, ````` атися `````, ````` ыџN `````, ````` ЎыџN `````, ````` İTESİ ````` | | stabilityai/stablelm-2-12b | [mini](reports_mini/stabilityai_stablelm_2_12b.md) [full](reports/stabilityai_stablelm_2_12b.md) | 100352 × 5120 | BPE, Byte Input | 100289 | 256 | 1102 | 645 | 33 | 138/1997 | 109/1966 | ABCDEFGHIJKLMNOPQRSTUVWXYZ | ````` (stypy `````, ````` PostalCodesNL `````, ````` ▁ForCanBeConverted `````, ````` useRalative `````, ````` \tTokenNameIdentifier `````, ````` ▁ForCanBeConvertedToF `````, ````` $PostalCodesNL `````, ````` elementGuidId `````, ````` webElementXpaths `````, ````` webElementProperties ````` | | chuxin-llm/Chuxin-1.6B-Base | [mini](reports_mini/chuxin_llm_Chuxin_1_6B_Base.md) [full](reports/chuxin_llm_Chuxin_1_6B_Base.md) | 102400 × 2048 | BPE, Byte Input | 100015 | 256 | 32 | 438 | 2 | 887/1990 | 886/1983 | IllegalArgumentException | ````` 日内与新浪看点 `````, ````` 不代表新浪看点 `````, ````` 基督教基督教基督教 `````, ````` controlcap `````, ````` orangehilldev `````, ````` кедония `````, ````` посолство `````, ````` lemanya `````, ````` ▁EDIPU `````, ````` RecordedVote ````` | | deepseek-ai/DeepSeek-V2-Lite | [mini](reports_mini/deepseek_ai_DeepSeek_V2_Lite.md) [full](reports/deepseek_ai_DeepSeek_V2_Lite.md) | 102400 × 2048 | BPE, Byte Input | 100002 | 243 | 12 | 438 | 2 | 202/1989 | 202/1989 | IllegalArgumentException | ````` ="../../../../..">< `````, ````` IconSuccessEncoded `````, ````` IconErrorEncoded `````, ````` orangehilldev `````, ````` ExternalTaskPojo `````, ````` typeNameLink `````, ````` navBarCell `````, ````` textquoted `````, ````` 日内与新浪看点 `````, ````` Supamiu ````` | @@ -66,10 +70,10 @@ | deepseek-ai/deepseek-math-7b-base | [mini](reports_mini/deepseek_ai_deepseek_math_7b_base.md) [full](reports/deepseek_ai_deepseek_math_7b_base.md) | 102400 × 4096 | BPE, Byte Input | 100002 | 243 | 12 | 438 | 2 | 202/1989 | 202/1989 | IllegalArgumentException | ````` IconSuccessEncoded `````, ````` 日内与新浪看点 `````, ````` IconErrorEncoded `````, ````` orangehilldev `````, ````` 不代表新浪看点 `````, ````` ="../../../../..">< `````, ````` ▁EDIPU `````, ````` lemanya `````, ````` odeciclismo `````, ````` кедония ````` | | LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct | [mini](reports_mini/LGAI_EXAONE_EXAONE_3_0_7_8B_Instruct.md) [full](reports/LGAI_EXAONE_EXAONE_3_0_7_8B_Instruct.md) | 102400 × 4096 | BPE, Byte Input | 102400 | 256 | 2 | 1222 | 314 | 763/2022 | 462/1648 | MethodAccessorImpl | ````` ▁**]{}, `````, ````` ▁\|>#!/ `````, ````` ▁FBQyx `````, ````` ▁\x95\x98 `````, ````` ▁*]{}, `````, ````` ▁',['../ `````, ````` ▁\x9d\x80 `````, ````` ▁*]{}. `````, ````` ▁$]{}]{} `````, ````` ▁*]{} ````` | | mistralai/Mistral-Nemo-Base-2407 | [mini](reports_mini/mistralai_Mistral_Nemo_Base_2407.md) [full](reports/mistralai_Mistral_Nemo_Base_2407.md) | 131072 × 5120 | BPE, Byte Input | 131072 | 256 | 0 | 1307 | 1000 | 1279/2595 | 277/1585 | Vriendschappelijk | ````` ోగ్యాస్ `````, ````` 页面存档 `````, ````` ురుగున `````, ````` ▁పారబో `````, ````` రుగుదొడ `````, ````` ▁erresident `````, ````` మురుగున `````, ````` \xa0μg `````, ````` ▁ట్రాక్టర్ల `````, ````` abezian ````` | -| CohereForAI/aya-23-35B | [mini](reports_mini/CohereForAI_aya_23_35B.md) [full](reports/CohereForAI_aya_23_35B.md) | 256000 × 8192, tied | BPE, Byte Input | 255029 | 256 | 1403 | 2956 | 37 | 1693/5012 | 1650/4955 | InvalidProtocolBufferException | ````` tochassubtree `````, ````` ▁ARStdSong `````, ````` ▁林肯近地小行星研究小 `````, ````` 目前尚未由人工引 `````, ````` ageryears `````, ````` AddLanguageSpecificText `````, ````` tocguid `````, ````` ▁hbBiddersParams `````, ````` recDocCases `````, ````` \U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f\U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f ````` | -| CohereForAI/c4ai-command-r-plus | [mini](reports_mini/CohereForAI_c4ai_command_r_plus.md) [full](reports/CohereForAI_c4ai_command_r_plus.md) | 256000 × 12288, tied | BPE, Byte Input | 255029 | 256 | 1403 | 2956 | 37 | 75/5012 | 47/4962 | InvalidProtocolBufferException | ````` tocguid `````, ````` 目前尚未由人工引 `````, ````` ▁ARStdSong `````, ````` ▁hbBiddersParams `````, ````` recDocCases `````, ````` ▁林肯近地小行星研究小 `````, ````` AddLanguageSpecificText `````, ````` \U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f\U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f `````, ````` tocectory `````, ````` 和人口皆未知 ````` | -| CohereForAI/c4ai-command-r-v01 | [mini](reports_mini/CohereForAI_c4ai_command_r_v01.md) [full](reports/CohereForAI_c4ai_command_r_v01.md) | 256000 × 8192, tied | BPE, Byte Input | 255029 | 256 | 1403 | 2956 | 37 | 306/5012 | 278/4963 | InvalidProtocolBufferException | ````` AddLanguageSpecificText `````, ````` ▁ARStdSong `````, ````` ▁林肯近地小行星研究小 `````, ````` tochassubtree `````, ````` ageryears `````, ````` ▁hbBiddersParams `````, ````` tocguid `````, ````` 目前尚未由人工引 `````, ````` recDocCases `````, ````` \U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f\U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f ````` | +| CohereForAI/aya-23-35B | [mini](reports_mini/CohereForAI_aya_23_35B.md) [full](reports/CohereForAI_aya_23_35B.md) | 256000 × 8192, tied | BPE, Byte Input | 255029 | 256 | 1403 | 2956 | 37 | 1693/5012 | 1650/4955 | InvalidProtocolBufferException | ````` ▁林肯近地小行星研究小 `````, ````` tochassubtree `````, ````` 目前尚未由人工引 `````, ````` ▁ARStdSong `````, ````` tocguid `````, ````` AddLanguageSpecificText `````, ````` ageryears `````, ````` recDocCases `````, ````` ▁hbBiddersParams `````, ````` \U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f\U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f ````` | +| CohereForAI/c4ai-command-r-plus | [mini](reports_mini/CohereForAI_c4ai_command_r_plus.md) [full](reports/CohereForAI_c4ai_command_r_plus.md) | 256000 × 12288, tied | BPE, Byte Input | 255029 | 256 | 1403 | 2956 | 37 | 75/5012 | 47/4962 | InvalidProtocolBufferException | ````` tocguid `````, ````` 目前尚未由人工引 `````, ````` ▁hbBiddersParams `````, ````` ▁ARStdSong `````, ````` recDocCases `````, ````` ▁林肯近地小行星研究小 `````, ````` AddLanguageSpecificText `````, ````` \U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f\U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f `````, ````` tocectory `````, ````` 和人口皆未知 ````` | +| CohereForAI/c4ai-command-r-v01 | [mini](reports_mini/CohereForAI_c4ai_command_r_v01.md) [full](reports/CohereForAI_c4ai_command_r_v01.md) | 256000 × 8192, tied | BPE, Byte Input | 255029 | 256 | 1403 | 2956 | 37 | 306/5012 | 278/4963 | InvalidProtocolBufferException | ````` AddLanguageSpecificText `````, ````` ▁ARStdSong `````, ````` ageryears `````, ````` ▁林肯近地小行星研究小 `````, ````` tocguid `````, ````` tochassubtree `````, ````` ▁hbBiddersParams `````, ````` recDocCases `````, ````` 目前尚未由人工引 `````, ````` \U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f\U000e0067\U000e0062\U000e0065\U000e006e\U000e0067\U000e007f ````` | | Fugaku-LLM/Fugaku-LLM-13B | [mini](reports_mini/Fugaku_LLM_Fugaku_LLM_13B.md) [full](reports/Fugaku_LLM_Fugaku_LLM_13B.md) | 49152 × 5184, tied | Unigram, Byte Fallback | 48586 | 353 | 85 | 0 | 8 | 682/972 | 675/965 | FIAsyncOperation | ````` ▁kGTLR `````, ````` RESETVALUE `````, ````` \ue964 `````, ````` YYCURSOR `````, ````` ▁GBProperty `````, ````` CyFunction `````, ````` VNVPROC `````, ````` DIPSETTING `````, ````` acadoWorkspace `````, ````` ewGetProcAddress ````` | | facebook/xglm-7.5B | [mini](reports_mini/facebook_xglm_7_5B.md) [full](reports/facebook_xglm_7_5B.md) | 256008 × 4096, tied | Unigram | 256008 | 94 | 20 | 0 | 11 | 12/5129 | 11/5120 | 习近平新时代中国特色社会主义思想 | ````` වැසි `````, ````` ▁ukupnog `````, ````` ᓯᒪᔪ `````, ````` ▁ਪ੍ਰਕਾਸ਼ `````, ````` ▁podmienok `````, ````` ▁sėkmingai `````, ````` рацыі `````, ````` ▁යාපාරය `````, ````` ೋರ್ಟ್ `````, ````` න්ද් ````` | -Processed 67 models, 66 succeeded1 failed: ['upstage/SOLAR-10.7B-v1.0'] +Processed 70 models, 70 succeeded \ No newline at end of file diff --git a/results/verifications/upstage_SOLAR_10_7B_v1_0.jsonl.gz b/results/verifications/upstage_SOLAR_10_7B_v1_0.jsonl.gz index b0366a8..17510e0 100644 Binary files a/results/verifications/upstage_SOLAR_10_7B_v1_0.jsonl.gz and b/results/verifications/upstage_SOLAR_10_7B_v1_0.jsonl.gz differ diff --git a/results/verifications/upstage_solar_pro_preview_instruct.jsonl.gz b/results/verifications/upstage_solar_pro_preview_instruct.jsonl.gz index 093f521..9d51d65 100644 Binary files a/results/verifications/upstage_solar_pro_preview_instruct.jsonl.gz and b/results/verifications/upstage_solar_pro_preview_instruct.jsonl.gz differ diff --git a/results/verifications_scatterplot/upstage_SOLAR_10_7B_v1_0.png b/results/verifications_scatterplot/upstage_SOLAR_10_7B_v1_0.png index 60be412..de4ff80 100644 Binary files a/results/verifications_scatterplot/upstage_SOLAR_10_7B_v1_0.png and b/results/verifications_scatterplot/upstage_SOLAR_10_7B_v1_0.png differ