Skip to content

Commit

Permalink
Merge pull request #484 from emdupre/enh/revai-fr
Browse files Browse the repository at this point in the history
[enh] support non-English transcript with RevAI
  • Loading branch information
adelavega authored Oct 28, 2022
2 parents 8c41e74 + b9e8ca4 commit c249daf
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ env.list
docs/_build/
docs/generated/
.pytest_cache/
.vscode/
2 changes: 1 addition & 1 deletion optional-dependencies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ tensorflow>=2.4.0
torch
transformers
xlrd
rev_ai
rev_ai >= 2.11.0
tensorflow-hub
tensorflow_text
12 changes: 9 additions & 3 deletions pliers/converters/api/revai.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@ class RevAISpeechAPIConverter(APITransformer, AudioToTextConverter):
to finish. Defaults to 90 seconds.
request_rate (int): Number of seconds to wait between polling the
API for completion.
language (str): Language included in the provided audio file.
Must be a language supported by RevAI; for the full list,
see their docs: https://docs.rev.ai/api/asynchronous/reference/#operation/SubmitTranscriptionJob!ct=application/json&path=language&t=request #:PEP8 -E501
'''

_env_keys = ('REVAI_ACCESS_TOKEN',)
_log_attributes = ('access_token', 'timeout', 'request_rate')
VERSION = '1.0'

def __init__(self, access_token=None, timeout=1000, request_rate=5):
def __init__(self, access_token=None, timeout=1000, request_rate=5, language="en"):
verify_dependencies(['rev_ai_client'])
if access_token is None:
try:
Expand All @@ -45,6 +48,7 @@ def __init__(self, access_token=None, timeout=1000, request_rate=5):
self.timeout = timeout
self.request_rate = request_rate
self.client = rev_ai_client.RevAiAPIClient(access_token)
self.language = language
super().__init__()

@property
Expand All @@ -71,10 +75,12 @@ def _convert(self, audio):
logging.warning(msg)

if audio.url:
job = self.client.submit_job_url(audio.url)
job = self.client.submit_job_url(
audio.url, language=self.language)
else:
with audio.get_filename() as filename:
job = self.client.submit_job_local_file(filename)
job = self.client.submit_job_local_file(
filename, language=self.language)

operation_start = time.time()
response = self.client.get_job_details(job.id)
Expand Down
4 changes: 4 additions & 0 deletions pliers/tests/converters/api/test_revai_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ def test_googleAPI_converter():

conv = RevAISpeechAPIConverter(access_token='badtoken')
assert not conv.validate_keys()

conv = RevAISpeechAPIConverter(language='ex') # pass unsupported lang
with pytest.raises(Exception):
conv.transform(stim)

0 comments on commit c249daf

Please sign in to comment.