Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relates #31 | Add SEACrowd Speech Classification #55

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions seacrowd/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
ssp_features,
speech_text_features,
speech2speech_features,
speech_features,
speech_multi_features,
image_text_features,
)

Expand Down Expand Up @@ -78,6 +80,11 @@ class Tasks(Enum):
# SpeechText
SPEECH_RECOGNITION = "ASR"
SPEECH_TO_TEXT_TRANSLATION = "STTT"

SPEECH_LANGUAGE_IDENTIFICATION = "SPEECH_LID"
SPEECH_EMOTION_RECOGNITION = "SER"
SPEECH_EMOTION_RECOGNITION_MULTILABEL = "SER_MULTI"

TEXT_TO_SPEECH = "TTS"

# SpeechSpeech
Expand Down Expand Up @@ -205,6 +212,9 @@ class Licenses(Enum):
Tasks.SPEECH_TO_TEXT_TRANSLATION: "SPTEXT",
Tasks.TEXT_TO_SPEECH: "SPTEXT",
Tasks.SPEECH_TO_SPEECH_TRANSLATION: "S2S",
Tasks.SPEECH_LANGUAGE_IDENTIFICATION: "SP_CLS",
Tasks.SPEECH_EMOTION_RECOGNITION: "SP_CLS",
Tasks.SPEECH_EMOTION_RECOGNITION_MULTILABEL: "SP_CLS_MULTI",
Tasks.IMAGE_CAPTIONING: "IMTEXT",
Tasks.STYLIZED_IMAGE_CAPTIONING: "IMTEXT",
Tasks.VISUALLY_GROUNDED_REASONING: "IMTEXT",
Expand Down Expand Up @@ -234,6 +244,8 @@ class Licenses(Enum):
"SSP": ssp_features,
"SPTEXT": speech_text_features,
"S2S": speech2speech_features,
"SP_CLS": speech_features(),
"SP_CLS_MULTI": speech_multi_features(),
"IMTEXT": image_text_features(),
}

Expand Down
4 changes: 3 additions & 1 deletion seacrowd/utils/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from .self_supervised_pretraining import features as ssp_features
from .speech_text import features as speech_text_features
from .speech_to_speech import features as speech2speech_features
from .speech_classification import features as speech_features
from .speech_multilabel import features as speech_multi_features
from .image_text import features as image_text_features

__all__ = ["kb_features", "qa_features", "text2text_features", "text_features", "text_multi_features", "pairs_features", "pairs_multi_features", "pairs_features_score", "seq_label_features", "ssp_features", "speech_text_features", "speech2speech_features", "image_text_features"]
__all__ = ["kb_features", "qa_features", "text2text_features", "text_features", "text_multi_features", "pairs_features", "pairs_multi_features", "pairs_features_score", "seq_label_features", "ssp_features", "speech_text_features", "speech2speech_features", "speech_features", "speech_multi_features", "image_text_features"]
19 changes: 19 additions & 0 deletions seacrowd/utils/schemas/speech_classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Speech Classification Schema for Single Label (be it Binary or Multiclass)
"""
import datasets

def features(label_names = ["Yes", "No"]):
return datasets.Features(
{
"id": datasets.Value("string"),
"path": datasets.Value("string"),
"audio": datasets.Audio(sampling_rate=16_000),
"speaker_id": datasets.Value("string"),
"labels": datasets.ClassLabel(names=label_names),
"metadata": {
"speaker_age": datasets.Value("int64"),
"speaker_gender": datasets.Value("string"),
}
}
)
19 changes: 19 additions & 0 deletions seacrowd/utils/schemas/speech_multilabel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Speech Classification Schema for Multilabel
"""
import datasets

def features(label_names = ["Yes", "No"]):
return datasets.Features(
{
"id": datasets.Value("string"),
"path": datasets.Value("string"),
"audio": datasets.Audio(sampling_rate=16_000),
"speaker_id": datasets.Value("string"),
"labels": datasets.Sequence(datasets.ClassLabel(names=label_names)),
"metadata": {
"speaker_age": datasets.Value("int64"),
"speaker_gender": datasets.Value("string"),
}
}
)