Task Name | Train | Val | Test | Val/Test Docs | Metrics |
---|---|---|---|---|---|
anagrams1 | ✓ | 10000 | acc | ||
anagrams2 | ✓ | 10000 | acc | ||
anli_r1 | ✓ | ✓ | ✓ | 1000 | acc |
anli_r2 | ✓ | ✓ | ✓ | 1000 | acc |
anli_r3 | ✓ | ✓ | ✓ | 1200 | acc |
arc_challenge | ✓ | ✓ | ✓ | 1172 | acc, acc_norm |
arc_easy | ✓ | ✓ | ✓ | 2376 | acc, acc_norm |
arithmetic_1dc | ✓ | 2000 | acc | ||
arithmetic_2da | ✓ | 2000 | acc | ||
arithmetic_2dm | ✓ | 2000 | acc | ||
arithmetic_2ds | ✓ | 2000 | acc | ||
arithmetic_3da | ✓ | 2000 | acc | ||
arithmetic_3ds | ✓ | 2000 | acc | ||
arithmetic_4da | ✓ | 2000 | acc | ||
arithmetic_4ds | ✓ | 2000 | acc | ||
arithmetic_5da | ✓ | 2000 | acc | ||
arithmetic_5ds | ✓ | 2000 | acc | ||
bigbench_causal_judgement | ✓ | 190 | multiple_choice_grade, exact_str_match | ||
bigbench_date_understanding | ✓ | 369 | multiple_choice_grade, exact_str_match | ||
bigbench_disambiguation_qa | ✓ | 258 | multiple_choice_grade, exact_str_match | ||
bigbench_dyck_languages | ✓ | 1000 | multiple_choice_grade, exact_str_match | ||
bigbench_formal_fallacies_syllogisms_negation | ✓ | 14200 | multiple_choice_grade, exact_str_match | ||
bigbench_geometric_shapes | ✓ | 359 | multiple_choice_grade, exact_str_match | ||
bigbench_hyperbaton | ✓ | 50000 | multiple_choice_grade, exact_str_match | ||
bigbench_logical_deduction_five_objects | ✓ | 500 | multiple_choice_grade, exact_str_match | ||
bigbench_logical_deduction_seven_objects | ✓ | 700 | multiple_choice_grade, exact_str_match | ||
bigbench_logical_deduction_three_objects | ✓ | 300 | multiple_choice_grade, exact_str_match | ||
bigbench_movie_recommendation | ✓ | 500 | multiple_choice_grade, exact_str_match | ||
bigbench_navigate | ✓ | 1000 | multiple_choice_grade, exact_str_match | ||
bigbench_reasoning_about_colored_objects | ✓ | 2000 | multiple_choice_grade, exact_str_match | ||
bigbench_ruin_names | ✓ | 448 | multiple_choice_grade, exact_str_match | ||
bigbench_salient_translation_error_detection | ✓ | 998 | multiple_choice_grade, exact_str_match | ||
bigbench_snarks | ✓ | 181 | multiple_choice_grade, exact_str_match | ||
bigbench_sports_understanding | ✓ | 986 | multiple_choice_grade, exact_str_match | ||
bigbench_temporal_sequences | ✓ | 1000 | multiple_choice_grade, exact_str_match | ||
bigbench_tracking_shuffled_objects_five_objects | ✓ | 1250 | multiple_choice_grade, exact_str_match | ||
bigbench_tracking_shuffled_objects_seven_objects | ✓ | 1750 | multiple_choice_grade, exact_str_match | ||
bigbench_tracking_shuffled_objects_three_objects | ✓ | 300 | multiple_choice_grade, exact_str_match | ||
blimp_adjunct_island | ✓ | 1000 | acc | ||
blimp_anaphor_gender_agreement | ✓ | 1000 | acc | ||
blimp_anaphor_number_agreement | ✓ | 1000 | acc | ||
blimp_animate_subject_passive | ✓ | 1000 | acc | ||
blimp_animate_subject_trans | ✓ | 1000 | acc | ||
blimp_causative | ✓ | 1000 | acc | ||
blimp_complex_NP_island | ✓ | 1000 | acc | ||
blimp_coordinate_structure_constraint_complex_left_branch | ✓ | 1000 | acc | ||
blimp_coordinate_structure_constraint_object_extraction | ✓ | 1000 | acc | ||
blimp_determiner_noun_agreement_1 | ✓ | 1000 | acc | ||
blimp_determiner_noun_agreement_2 | ✓ | 1000 | acc | ||
blimp_determiner_noun_agreement_irregular_1 | ✓ | 1000 | acc | ||
blimp_determiner_noun_agreement_irregular_2 | ✓ | 1000 | acc | ||
blimp_determiner_noun_agreement_with_adj_2 | ✓ | 1000 | acc | ||
blimp_determiner_noun_agreement_with_adj_irregular_1 | ✓ | 1000 | acc | ||
blimp_determiner_noun_agreement_with_adj_irregular_2 | ✓ | 1000 | acc | ||
blimp_determiner_noun_agreement_with_adjective_1 | ✓ | 1000 | acc | ||
blimp_distractor_agreement_relational_noun | ✓ | 1000 | acc | ||
blimp_distractor_agreement_relative_clause | ✓ | 1000 | acc | ||
blimp_drop_argument | ✓ | 1000 | acc | ||
blimp_ellipsis_n_bar_1 | ✓ | 1000 | acc | ||
blimp_ellipsis_n_bar_2 | ✓ | 1000 | acc | ||
blimp_existential_there_object_raising | ✓ | 1000 | acc | ||
blimp_existential_there_quantifiers_1 | ✓ | 1000 | acc | ||
blimp_existential_there_quantifiers_2 | ✓ | 1000 | acc | ||
blimp_existential_there_subject_raising | ✓ | 1000 | acc | ||
blimp_expletive_it_object_raising | ✓ | 1000 | acc | ||
blimp_inchoative | ✓ | 1000 | acc | ||
blimp_intransitive | ✓ | 1000 | acc | ||
blimp_irregular_past_participle_adjectives | ✓ | 1000 | acc | ||
blimp_irregular_past_participle_verbs | ✓ | 1000 | acc | ||
blimp_irregular_plural_subject_verb_agreement_1 | ✓ | 1000 | acc | ||
blimp_irregular_plural_subject_verb_agreement_2 | ✓ | 1000 | acc | ||
blimp_left_branch_island_echo_question | ✓ | 1000 | acc | ||
blimp_left_branch_island_simple_question | ✓ | 1000 | acc | ||
blimp_matrix_question_npi_licensor_present | ✓ | 1000 | acc | ||
blimp_npi_present_1 | ✓ | 1000 | acc | ||
blimp_npi_present_2 | ✓ | 1000 | acc | ||
blimp_only_npi_licensor_present | ✓ | 1000 | acc | ||
blimp_only_npi_scope | ✓ | 1000 | acc | ||
blimp_passive_1 | ✓ | 1000 | acc | ||
blimp_passive_2 | ✓ | 1000 | acc | ||
blimp_principle_A_c_command | ✓ | 1000 | acc | ||
blimp_principle_A_case_1 | ✓ | 1000 | acc | ||
blimp_principle_A_case_2 | ✓ | 1000 | acc | ||
blimp_principle_A_domain_1 | ✓ | 1000 | acc | ||
blimp_principle_A_domain_2 | ✓ | 1000 | acc | ||
blimp_principle_A_domain_3 | ✓ | 1000 | acc | ||
blimp_principle_A_reconstruction | ✓ | 1000 | acc | ||
blimp_regular_plural_subject_verb_agreement_1 | ✓ | 1000 | acc | ||
blimp_regular_plural_subject_verb_agreement_2 | ✓ | 1000 | acc | ||
blimp_sentential_negation_npi_licensor_present | ✓ | 1000 | acc | ||
blimp_sentential_negation_npi_scope | ✓ | 1000 | acc | ||
blimp_sentential_subject_island | ✓ | 1000 | acc | ||
blimp_superlative_quantifiers_1 | ✓ | 1000 | acc | ||
blimp_superlative_quantifiers_2 | ✓ | 1000 | acc | ||
blimp_tough_vs_raising_1 | ✓ | 1000 | acc | ||
blimp_tough_vs_raising_2 | ✓ | 1000 | acc | ||
blimp_transitive | ✓ | 1000 | acc | ||
blimp_wh_island | ✓ | 1000 | acc | ||
blimp_wh_questions_object_gap | ✓ | 1000 | acc | ||
blimp_wh_questions_subject_gap | ✓ | 1000 | acc | ||
blimp_wh_questions_subject_gap_long_distance | ✓ | 1000 | acc | ||
blimp_wh_vs_that_no_gap | ✓ | 1000 | acc | ||
blimp_wh_vs_that_no_gap_long_distance | ✓ | 1000 | acc | ||
blimp_wh_vs_that_with_gap | ✓ | 1000 | acc | ||
blimp_wh_vs_that_with_gap_long_distance | ✓ | 1000 | acc | ||
boolq | ✓ | ✓ | 3270 | acc | |
cb | ✓ | ✓ | 56 | acc, f1 | |
cola | ✓ | ✓ | 1043 | mcc | |
copa | ✓ | ✓ | 100 | acc | |
coqa | ✓ | ✓ | 500 | f1, em | |
crows_pairs_english | ✓ | 1677 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_age | ✓ | 91 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_autre | ✓ | 11 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_disability | ✓ | 65 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_gender | ✓ | 320 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_nationality | ✓ | 216 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_physical_appearance | ✓ | 72 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_race_color | ✓ | 508 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_religion | ✓ | 111 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_sexual_orientation | ✓ | 93 | likelihood_difference, pct_stereotype | ||
crows_pairs_english_socioeconomic | ✓ | 190 | likelihood_difference, pct_stereotype | ||
crows_pairs_french | ✓ | 1677 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_age | ✓ | 90 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_autre | ✓ | 13 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_disability | ✓ | 66 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_gender | ✓ | 321 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_nationality | ✓ | 253 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_physical_appearance | ✓ | 72 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_race_color | ✓ | 460 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_religion | ✓ | 115 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_sexual_orientation | ✓ | 91 | likelihood_difference, pct_stereotype | ||
crows_pairs_french_socioeconomic | ✓ | 196 | likelihood_difference, pct_stereotype | ||
cycle_letters | ✓ | 10000 | acc | ||
drop | ✓ | ✓ | 9536 | em, f1 | |
ethics_cm | ✓ | ✓ | 3885 | acc | |
ethics_deontology | ✓ | ✓ | 3596 | acc, em | |
ethics_justice | ✓ | ✓ | 2704 | acc, em | |
ethics_utilitarianism | ✓ | ✓ | 4808 | acc | |
ethics_utilitarianism_original | ✓ | 4808 | acc | ||
ethics_virtue | ✓ | ✓ | 4975 | acc, em | |
gsm8k | ✓ | ✓ | 1319 | acc | |
headqa | ✓ | ✓ | ✓ | 2742 | acc, acc_norm |
headqa_en | ✓ | ✓ | ✓ | 2742 | acc, acc_norm |
headqa_es | ✓ | ✓ | ✓ | 2742 | acc, acc_norm |
hellaswag | ✓ | ✓ | 10042 | acc, acc_norm | |
hendrycksTest-abstract_algebra | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-anatomy | ✓ | ✓ | 135 | acc, acc_norm | |
hendrycksTest-astronomy | ✓ | ✓ | 152 | acc, acc_norm | |
hendrycksTest-business_ethics | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-clinical_knowledge | ✓ | ✓ | 265 | acc, acc_norm | |
hendrycksTest-college_biology | ✓ | ✓ | 144 | acc, acc_norm | |
hendrycksTest-college_chemistry | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-college_computer_science | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-college_mathematics | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-college_medicine | ✓ | ✓ | 173 | acc, acc_norm | |
hendrycksTest-college_physics | ✓ | ✓ | 102 | acc, acc_norm | |
hendrycksTest-computer_security | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-conceptual_physics | ✓ | ✓ | 235 | acc, acc_norm | |
hendrycksTest-econometrics | ✓ | ✓ | 114 | acc, acc_norm | |
hendrycksTest-electrical_engineering | ✓ | ✓ | 145 | acc, acc_norm | |
hendrycksTest-elementary_mathematics | ✓ | ✓ | 378 | acc, acc_norm | |
hendrycksTest-formal_logic | ✓ | ✓ | 126 | acc, acc_norm | |
hendrycksTest-global_facts | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-high_school_biology | ✓ | ✓ | 310 | acc, acc_norm | |
hendrycksTest-high_school_chemistry | ✓ | ✓ | 203 | acc, acc_norm | |
hendrycksTest-high_school_computer_science | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-high_school_european_history | ✓ | ✓ | 165 | acc, acc_norm | |
hendrycksTest-high_school_geography | ✓ | ✓ | 198 | acc, acc_norm | |
hendrycksTest-high_school_government_and_politics | ✓ | ✓ | 193 | acc, acc_norm | |
hendrycksTest-high_school_macroeconomics | ✓ | ✓ | 390 | acc, acc_norm | |
hendrycksTest-high_school_mathematics | ✓ | ✓ | 270 | acc, acc_norm | |
hendrycksTest-high_school_microeconomics | ✓ | ✓ | 238 | acc, acc_norm | |
hendrycksTest-high_school_physics | ✓ | ✓ | 151 | acc, acc_norm | |
hendrycksTest-high_school_psychology | ✓ | ✓ | 545 | acc, acc_norm | |
hendrycksTest-high_school_statistics | ✓ | ✓ | 216 | acc, acc_norm | |
hendrycksTest-high_school_us_history | ✓ | ✓ | 204 | acc, acc_norm | |
hendrycksTest-high_school_world_history | ✓ | ✓ | 237 | acc, acc_norm | |
hendrycksTest-human_aging | ✓ | ✓ | 223 | acc, acc_norm | |
hendrycksTest-human_sexuality | ✓ | ✓ | 131 | acc, acc_norm | |
hendrycksTest-international_law | ✓ | ✓ | 121 | acc, acc_norm | |
hendrycksTest-jurisprudence | ✓ | ✓ | 108 | acc, acc_norm | |
hendrycksTest-logical_fallacies | ✓ | ✓ | 163 | acc, acc_norm | |
hendrycksTest-machine_learning | ✓ | ✓ | 112 | acc, acc_norm | |
hendrycksTest-management | ✓ | ✓ | 103 | acc, acc_norm | |
hendrycksTest-marketing | ✓ | ✓ | 234 | acc, acc_norm | |
hendrycksTest-medical_genetics | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-miscellaneous | ✓ | ✓ | 783 | acc, acc_norm | |
hendrycksTest-moral_disputes | ✓ | ✓ | 346 | acc, acc_norm | |
hendrycksTest-moral_scenarios | ✓ | ✓ | 895 | acc, acc_norm | |
hendrycksTest-nutrition | ✓ | ✓ | 306 | acc, acc_norm | |
hendrycksTest-philosophy | ✓ | ✓ | 311 | acc, acc_norm | |
hendrycksTest-prehistory | ✓ | ✓ | 324 | acc, acc_norm | |
hendrycksTest-professional_accounting | ✓ | ✓ | 282 | acc, acc_norm | |
hendrycksTest-professional_law | ✓ | ✓ | 1534 | acc, acc_norm | |
hendrycksTest-professional_medicine | ✓ | ✓ | 272 | acc, acc_norm | |
hendrycksTest-professional_psychology | ✓ | ✓ | 612 | acc, acc_norm | |
hendrycksTest-public_relations | ✓ | ✓ | 110 | acc, acc_norm | |
hendrycksTest-security_studies | ✓ | ✓ | 245 | acc, acc_norm | |
hendrycksTest-sociology | ✓ | ✓ | 201 | acc, acc_norm | |
hendrycksTest-us_foreign_policy | ✓ | ✓ | 100 | acc, acc_norm | |
hendrycksTest-virology | ✓ | ✓ | 166 | acc, acc_norm | |
hendrycksTest-world_religions | ✓ | ✓ | 171 | acc, acc_norm | |
iwslt17-ar-en | ✓ | 1460 | bleu, chrf, ter | ||
iwslt17-en-ar | ✓ | 1460 | bleu, chrf, ter | ||
lambada_openai | ✓ | 5153 | ppl, acc | ||
lambada_openai_cloze | ✓ | 5153 | ppl, acc | ||
lambada_openai_mt_de | ✓ | 5153 | ppl, acc | ||
lambada_openai_mt_en | ✓ | 5153 | ppl, acc | ||
lambada_openai_mt_es | ✓ | 5153 | ppl, acc | ||
lambada_openai_mt_fr | ✓ | 5153 | ppl, acc | ||
lambada_openai_mt_it | ✓ | 5153 | ppl, acc | ||
lambada_standard | ✓ | ✓ | 5153 | ppl, acc | |
lambada_standard_cloze | ✓ | ✓ | 5153 | ppl, acc | |
logiqa | ✓ | ✓ | ✓ | 651 | acc, acc_norm |
math_algebra | ✓ | ✓ | 1187 | acc | |
math_asdiv | ✓ | 2305 | acc | ||
math_counting_and_prob | ✓ | ✓ | 474 | acc | |
math_geometry | ✓ | ✓ | 479 | acc | |
math_intermediate_algebra | ✓ | ✓ | 903 | acc | |
math_num_theory | ✓ | ✓ | 540 | acc | |
math_prealgebra | ✓ | ✓ | 871 | acc | |
math_precalc | ✓ | ✓ | 546 | acc | |
mathqa | ✓ | ✓ | ✓ | 2985 | acc, acc_norm |
mc_taco | ✓ | ✓ | 9442 | f1, em | |
mgsm_bn | ✓ | ✓ | 250 | acc | |
mgsm_de | ✓ | ✓ | 250 | acc | |
mgsm_en | ✓ | ✓ | 250 | acc | |
mgsm_es | ✓ | ✓ | 250 | acc | |
mgsm_fr | ✓ | ✓ | 250 | acc | |
mgsm_ja | ✓ | ✓ | 250 | acc | |
mgsm_ru | ✓ | ✓ | 250 | acc | |
mgsm_sw | ✓ | ✓ | 250 | acc | |
mgsm_te | ✓ | ✓ | 250 | acc | |
mgsm_th | ✓ | ✓ | 250 | acc | |
mgsm_zh | ✓ | ✓ | 250 | acc | |
mnli | ✓ | ✓ | 9815 | acc | |
mnli_mismatched | ✓ | ✓ | 9832 | acc | |
mrpc | ✓ | ✓ | 408 | acc, f1 | |
multirc | ✓ | ✓ | 4848 | acc | |
mutual | ✓ | ✓ | 886 | r@1, r@2, mrr | |
mutual_plus | ✓ | ✓ | 886 | r@1, r@2, mrr | |
openbookqa | ✓ | ✓ | ✓ | 500 | acc, acc_norm |
pawsx_de | ✓ | ✓ | ✓ | 2000 | acc |
pawsx_en | ✓ | ✓ | ✓ | 2000 | acc |
pawsx_es | ✓ | ✓ | ✓ | 2000 | acc |
pawsx_fr | ✓ | ✓ | ✓ | 2000 | acc |
pawsx_ja | ✓ | ✓ | ✓ | 2000 | acc |
pawsx_ko | ✓ | ✓ | ✓ | 2000 | acc |
pawsx_zh | ✓ | ✓ | ✓ | 2000 | acc |
pile_arxiv | ✓ | ✓ | 2407 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_bookcorpus2 | ✓ | ✓ | 28 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_books3 | ✓ | ✓ | 269 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_dm-mathematics | ✓ | ✓ | 1922 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_enron | ✓ | ✓ | 1010 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_europarl | ✓ | ✓ | 157 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_freelaw | ✓ | ✓ | 5101 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_github | ✓ | ✓ | 18195 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_gutenberg | ✓ | ✓ | 80 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_hackernews | ✓ | ✓ | 1632 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_nih-exporter | ✓ | ✓ | 1884 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_opensubtitles | ✓ | ✓ | 642 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_openwebtext2 | ✓ | ✓ | 32925 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_philpapers | ✓ | ✓ | 68 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_pile-cc | ✓ | ✓ | 52790 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_pubmed-abstracts | ✓ | ✓ | 29895 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_pubmed-central | ✓ | ✓ | 5911 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_stackexchange | ✓ | ✓ | 30378 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_ubuntu-irc | ✓ | ✓ | 22 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_uspto | ✓ | ✓ | 11415 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_wikipedia | ✓ | ✓ | 17511 | word_perplexity, byte_perplexity, bits_per_byte | |
pile_youtubesubtitles | ✓ | ✓ | 342 | word_perplexity, byte_perplexity, bits_per_byte | |
piqa | ✓ | ✓ | 1838 | acc, acc_norm | |
prost | ✓ | 18736 | acc, acc_norm | ||
pubmedqa | ✓ | 1000 | acc | ||
qa4mre_2011 | ✓ | 120 | acc, acc_norm | ||
qa4mre_2012 | ✓ | 160 | acc, acc_norm | ||
qa4mre_2013 | ✓ | 284 | acc, acc_norm | ||
qasper | ✓ | ✓ | 1764 | f1_yesno, f1_abstractive | |
qnli | ✓ | ✓ | 5463 | acc | |
qqp | ✓ | ✓ | 40430 | acc, f1 | |
race | ✓ | ✓ | ✓ | 1045 | acc |
random_insertion | ✓ | 10000 | acc | ||
record | ✓ | ✓ | 10000 | f1, em | |
reversed_words | ✓ | 10000 | acc | ||
rte | ✓ | ✓ | 277 | acc | |
sciq | ✓ | ✓ | ✓ | 1000 | acc, acc_norm |
squad2 | ✓ | ✓ | 11873 | exact, f1, HasAns_exact, HasAns_f1, NoAns_exact, NoAns_f1, best_exact, best_f1 | |
sst | ✓ | ✓ | 872 | acc | |
swag | ✓ | ✓ | 20006 | acc, acc_norm | |
toxigen | ✓ | ✓ | 940 | acc, acc_norm | |
triviaqa | ✓ | ✓ | 11313 | acc | |
truthfulqa_gen | ✓ | 817 | bleurt_max, bleurt_acc, bleurt_diff, bleu_max, bleu_acc, bleu_diff, rouge1_max, rouge1_acc, rouge1_diff, rouge2_max, rouge2_acc, rouge2_diff, rougeL_max, rougeL_acc, rougeL_diff | ||
truthfulqa_mc | ✓ | 817 | mc1, mc2 | ||
webqs | ✓ | ✓ | 2032 | acc | |
wic | ✓ | ✓ | 638 | acc | |
wikitext | ✓ | ✓ | ✓ | 62 | word_perplexity, byte_perplexity, bits_per_byte |
winogrande | ✓ | ✓ | 1267 | acc | |
wmt14-en-fr | ✓ | 3003 | bleu, chrf, ter | ||
wmt14-fr-en | ✓ | 3003 | bleu, chrf, ter | ||
wmt16-de-en | ✓ | 2999 | bleu, chrf, ter | ||
wmt16-en-de | ✓ | 2999 | bleu, chrf, ter | ||
wmt16-en-ro | ✓ | 1999 | bleu, chrf, ter | ||
wmt16-ro-en | ✓ | 1999 | bleu, chrf, ter | ||
wmt20-cs-en | ✓ | 664 | bleu, chrf, ter | ||
wmt20-de-en | ✓ | 785 | bleu, chrf, ter | ||
wmt20-de-fr | ✓ | 1619 | bleu, chrf, ter | ||
wmt20-en-cs | ✓ | 1418 | bleu, chrf, ter | ||
wmt20-en-de | ✓ | 1418 | bleu, chrf, ter | ||
wmt20-en-iu | ✓ | 2971 | bleu, chrf, ter | ||
wmt20-en-ja | ✓ | 1000 | bleu, chrf, ter | ||
wmt20-en-km | ✓ | 2320 | bleu, chrf, ter | ||
wmt20-en-pl | ✓ | 1000 | bleu, chrf, ter | ||
wmt20-en-ps | ✓ | 2719 | bleu, chrf, ter | ||
wmt20-en-ru | ✓ | 2002 | bleu, chrf, ter | ||
wmt20-en-ta | ✓ | 1000 | bleu, chrf, ter | ||
wmt20-en-zh | ✓ | 1418 | bleu, chrf, ter | ||
wmt20-fr-de | ✓ | 1619 | bleu, chrf, ter | ||
wmt20-iu-en | ✓ | 2971 | bleu, chrf, ter | ||
wmt20-ja-en | ✓ | 993 | bleu, chrf, ter | ||
wmt20-km-en | ✓ | 2320 | bleu, chrf, ter | ||
wmt20-pl-en | ✓ | 1001 | bleu, chrf, ter | ||
wmt20-ps-en | ✓ | 2719 | bleu, chrf, ter | ||
wmt20-ru-en | ✓ | 991 | bleu, chrf, ter | ||
wmt20-ta-en | ✓ | 997 | bleu, chrf, ter | ||
wmt20-zh-en | ✓ | 2000 | bleu, chrf, ter | ||
wnli | ✓ | ✓ | 71 | acc | |
wsc | ✓ | ✓ | 104 | acc | |
wsc273 | ✓ | 273 | acc | ||
xcopa_et | ✓ | ✓ | 500 | acc | |
xcopa_ht | ✓ | ✓ | 500 | acc | |
xcopa_id | ✓ | ✓ | 500 | acc | |
xcopa_it | ✓ | ✓ | 500 | acc | |
xcopa_qu | ✓ | ✓ | 500 | acc | |
xcopa_sw | ✓ | ✓ | 500 | acc | |
xcopa_ta | ✓ | ✓ | 500 | acc | |
xcopa_th | ✓ | ✓ | 500 | acc | |
xcopa_tr | ✓ | ✓ | 500 | acc | |
xcopa_vi | ✓ | ✓ | 500 | acc | |
xcopa_zh | ✓ | ✓ | 500 | acc | |
xnli_ar | ✓ | ✓ | ✓ | 5010 | acc |
xnli_bg | ✓ | ✓ | ✓ | 5010 | acc |
xnli_de | ✓ | ✓ | ✓ | 5010 | acc |
xnli_el | ✓ | ✓ | ✓ | 5010 | acc |
xnli_en | ✓ | ✓ | ✓ | 5010 | acc |
xnli_es | ✓ | ✓ | ✓ | 5010 | acc |
xnli_fr | ✓ | ✓ | ✓ | 5010 | acc |
xnli_hi | ✓ | ✓ | ✓ | 5010 | acc |
xnli_ru | ✓ | ✓ | ✓ | 5010 | acc |
xnli_sw | ✓ | ✓ | ✓ | 5010 | acc |
xnli_th | ✓ | ✓ | ✓ | 5010 | acc |
xnli_tr | ✓ | ✓ | ✓ | 5010 | acc |
xnli_ur | ✓ | ✓ | ✓ | 5010 | acc |
xnli_vi | ✓ | ✓ | ✓ | 5010 | acc |
xnli_zh | ✓ | ✓ | ✓ | 5010 | acc |
xstory_cloze_ar | ✓ | ✓ | 1511 | acc | |
xstory_cloze_en | ✓ | ✓ | 1511 | acc | |
xstory_cloze_es | ✓ | ✓ | 1511 | acc | |
xstory_cloze_eu | ✓ | ✓ | 1511 | acc | |
xstory_cloze_hi | ✓ | ✓ | 1511 | acc | |
xstory_cloze_id | ✓ | ✓ | 1511 | acc | |
xstory_cloze_my | ✓ | ✓ | 1511 | acc | |
xstory_cloze_ru | ✓ | ✓ | 1511 | acc | |
xstory_cloze_sw | ✓ | ✓ | 1511 | acc | |
xstory_cloze_te | ✓ | ✓ | 1511 | acc | |
xstory_cloze_zh | ✓ | ✓ | 1511 | acc | |
xwinograd_en | ✓ | 2325 | acc | ||
xwinograd_fr | ✓ | 83 | acc | ||
xwinograd_jp | ✓ | 959 | acc | ||
xwinograd_pt | ✓ | 263 | acc | ||
xwinograd_ru | ✓ | 315 | acc | ||
xwinograd_zh | ✓ | 504 | acc |
This repository has been archived by the owner on Nov 26, 2024. It is now read-only.