forked from EleutherAI/lm-evaluation-harness
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from oskarvanderwal/master
Adding CrowsPairs task for English and French
- Loading branch information
Showing
2 changed files
with
70 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
""" | ||
French CrowS-Pairs: Extending a challenge dataset for measuring social bias in masked language models to a language other than English | ||
https://hal.inria.fr/hal-03629677/file/ACLFinal.pdf | ||
Measuring social biases in masked language models in English and French. | ||
https://gitlab.inria.fr/french-crows-pairs/acl-2022-paper-data-and-code/-/tree/main | ||
""" | ||
from lm_eval.base import PromptSourceTask | ||
|
||
|
||
_CITATION = """\ | ||
@inproceedings{neveol2022french, | ||
title={French CrowS-Pairs: Extending a challenge dataset for measuring social bias in masked language models to a language other than English}, | ||
author={N{\'e}v{\'e}ol, Aur{\'e}lie and Dupont, Yoann and Bezan{\c{c}}on, Julien and Fort, Kar{\"e}n}, | ||
booktitle={ACL 2022-60th Annual Meeting of the Association for Computational Linguistics}, | ||
year={2022} | ||
""" | ||
|
||
|
||
class CrowsPairsEnglish(PromptSourceTask): | ||
VERSION = 0 | ||
DATASET_PATH = "oskarvanderwal/crows_pairs_multilingual" | ||
DATASET_NAME = "english" | ||
|
||
def has_training_docs(self): | ||
return False | ||
|
||
def has_validation_docs(self): | ||
return False | ||
|
||
def has_test_docs(self): | ||
return True | ||
|
||
def training_docs(self): | ||
pass | ||
|
||
def validation_docs(self): | ||
pass | ||
|
||
def test_docs(self): | ||
if self.has_test_docs(): | ||
return self.dataset["test"] | ||
|
||
class CrowsPairsFrench(PromptSourceTask): | ||
VERSION = 0 | ||
DATASET_PATH = "oskarvanderwal/crows_pairs_multilingual" | ||
DATASET_NAME = "french" | ||
|
||
def has_training_docs(self): | ||
return False | ||
|
||
def has_validation_docs(self): | ||
return False | ||
|
||
def has_test_docs(self): | ||
return True | ||
|
||
def training_docs(self): | ||
pass | ||
|
||
def validation_docs(self): | ||
pass | ||
|
||
def test_docs(self): | ||
if self.has_test_docs(): | ||
return self.dataset["test"] |