-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
229 lines (200 loc) · 8.15 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# Author: George Papadopoulos
# E-mail: [email protected]
# import library
import speech_recognition as sr
import os
import subprocess
import wave
import contextlib
from pydub import AudioSegment
import shutil
# Check if the audio is .wav file type
def audio_file_is_wav(audio_file_path):
if (audio_file_path.endswith('.wav')):
return True
else:
return False
# Create temp folder in current working directory
def create_tmp_folder():
try:
os.mkdir(os.getcwd() + '\\temp')
except OSError:
# Folder could not create
return False
else:
# Folder created
return True
# Create Temp subfolder
def create_tmp_sbfolder():
# Check if Temp folder exists
if (not check_temp_folder()):
# Temp folder is NOT exists
# Create Temp folder
create_tmp_folder()
# Check if Temp folder exists
if (check_temp_folder()):
# Temp folder exists
try:
os.mkdir(os.getcwd() + '\\temp\\splittedAudio')
except OSError:
# Folder could not create
return False
else:
# Folder created
return True
# Temp folder checker (checking if folder exists)
def check_temp_folder():
if (os.path.isdir('./temp')):
return True
else:
return False
# Deleting the Temp folder
def delete_temp_folder():
try:
shutil.rmtree('./temp')
except OSError:
# Deletion of the folder failed
return False
else:
# Deletion of the folder succeeded
return True
# Convert any audio file to .wav file type
def convert_audio_to_wav(audio_file_path):
subprocess.call(['ffmpeg', '-i', audio_file_path,
os.getcwd() + '\\temp\\' + os.path.basename(audio_file_path).replace('mp3', 'wav')])
# Speech Recognition - Returns string with the contents
def speech_recognition(audio_file_path, language):
# Initialize main list to save audio transcript
# Initialize recognizer class (for recognizing the speech)
r = sr.Recognizer()
# Reading Audio file as source
# listening the audio file and store in audio_text variable
with sr.AudioFile(audio_file_path) as source:
audio_text = r.listen(source)
# recognize_() method will throw a request error if the API is unreachable, hence using exception handling
try:
# using google speech recognition
return r.recognize_google(audio_text, language=language)
except:
# Could not recognize anything - Returns empty string
return ''
# Get .wav file duration - Returns duration in seconds
def get_wav_duration(audio_file_path):
with contextlib.closing(wave.open(audio_file_path, 'r')) as f:
return f.getnframes() / float(f.getframerate())
# Break audio file to separate files with max 2 minutes duration - Returns the splitter counter
def break_wav_file(audio_file_path):
# Call function to create a subfolder
# Check if subfolder created
if (create_tmp_sbfolder()):
# Setup timers to split audio
timer1 = 0 # 0s
# Check if wav duration in less that 60s
if (round(get_wav_duration(audio_file_path)) < 60):
# Duration is less than 60s
# Set timer2 as the end of the wav duration
timer2 = round(get_wav_duration(audio_file_path)) * 1000
else:
# Duration is greater than 60s
# Set timer2 to 60s
timer2 = 60000
cnt = 0
while (timer2 >= round(get_wav_duration(audio_file_path))):
newAudio = AudioSegment.from_wav(audio_file_path)
newAudio = newAudio[timer1:timer2]
newAudio.export('./temp/splittedAudio/splittedWav' + str(cnt) + '.wav',
format="wav") # Exports to a wav file in the current path.
cnt += 1
timer1 = timer2 + 1
# Check if wav duration in less that 60s
if (round(get_wav_duration(audio_file_path)) * 1000 == timer2):
# timer2 is equal with the total .wav duration
# break the while loop
break
# Check if the total duration of .wav file is equal with timer2
elif (round(get_wav_duration(audio_file_path)) * 1000 < timer2 + 60000):
# Duration is less than 60s
# Set timer2 as the end of the wav duration
timer2 = round(get_wav_duration(audio_file_path)) * 1000
else:
# Duration is greater than 60s
# Set timer2 to 60s
timer2 = timer2 + 60000
return cnt
else:
print('Could not create Temp subfolder')
# Convert double size list to one list
def convert_to_one_list(text_list):
one_list = []
for sublist in text_list:
for item in sublist:
one_list.append(item)
return one_list
# Speech to text MAIN program
def speech_to_text_main(audio_file_src, language):
# Setting the export list
transcript_contents = []
# Check if file is .wav
if (not audio_file_is_wav(audio_file_src)):
# File is NOT .wav
# Create tmp folder
# Check if tmp folder is created
if (create_tmp_folder()):
# Folder created
# Convert audio file to .wav
convert_audio_to_wav(audio_file_src)
else:
# Folder could not create
print('Temp folder could not create')
else:
# Temp folder exists
print('Temp Folder already exists')
# Check if file is .wav. If True then Run speech recognition
if (audio_file_is_wav(audio_file_src)):
# Check if audio duration is above 2 minutes
if (round(get_wav_duration(audio_file_src)) > 121):
# Audio duration is adove 2 minutes
# Breaking the file into smaller (60s) files
splitted_files_counter = break_wav_file(audio_file_src)
cnt = 0
while cnt != splitted_files_counter - 1:
transcript_contents.append(
speech_recognition('./temp/splittedAudio/splittedWav' + str(cnt) + '.wav', language).split(' '))
cnt += 1
else:
# Audio duration is below 2 minutes
# Do the speech recognition
transcript_contents.append(speech_recognition(audio_file_src, language))
# If file is not .wav then check if temp folder has created
elif (not audio_file_is_wav(audio_file_src) and check_temp_folder()):
transcript = speech_recognition(os.getcwd() + '\\temp\\' + os.path.basename(audio_file_src).replace('mp3', 'wav'), language)
# Check if audio duration is above 2 minutes
if (round(get_wav_duration(os.getcwd() + '\\temp\\' + os.path.basename(audio_file_src).replace('mp3', 'wav'))) > 121):
# Audio duration is adove 2 minutes
# Breaking the file into smaller (60s) files
splitted_files_counter = break_wav_file(os.getcwd() + '\\temp\\' + os.path.basename(audio_file_src).replace('mp3', 'wav'))
cnt = 0
while cnt != splitted_files_counter - 1:
transcript_contents.append(speech_recognition('./temp/splittedAudio/splittedWav' + str(cnt) + '.wav', language).split(' '))
cnt += 1
else:
# Audio duration is below 2 minutes
# Do the speech recognition
transcript_contents.append(speech_recognition(os.getcwd() + '\\temp\\' + os.path.basename(audio_file_src).replace('mp3', 'wav'), language))
# Check if transcript is NOT empty
if transcript_contents:
# Transcript is NOT empty
# Check if temp folder exists
if (check_temp_folder()):
# Temp folder exists
# Removing the temp folder
delete_temp_folder()
# Transcript is READY and available in transcript_contents variable (list)
# Put your logic below that point
print(transcript_contents)
print(convert_to_one_list(transcript_contents))
print(type(transcript_contents))
else:
# Transcript is empty
print('Could not recognise speech. File is too large.')
speech_to_text_main('audio_file.mp3', 'el-GR')