-
Notifications
You must be signed in to change notification settings - Fork 1
/
Segment_audio.py
113 lines (96 loc) · 4.47 KB
/
Segment_audio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import json
import os
import xml.dom.minidom
import numpy as np
from pydub import AudioSegment
# get file paths from configuration file
with open('config.json') as config_file:
data = json.load(config_file)
filenames = []
vocal_sound_ids = []
audio_folder = data['Data']['directories']['Audio']['path']
meeting_file = data['Data']['directories']['Meetings']['path']
segments_folder = data['Data']['directories']['Segments']['path']
words_folder = data['Data']['directories']['Words']['path']
output_folder = data['OutputFolder']['path']
# set audio type string for finding suitable audio file
def get_audio_type(audio_type_string):
switcher = {
"LAPEL": "Lapel",
"HEADSET": "Headset",
}
return switcher.get(audio_type_string.upper(), "lapel")
audio_type = get_audio_type(data['Data']['directories']['Audio']['type'])
meeting_xml = xml.dom.minidom.parse(data['Data']['directories']['Meetings']['path'])
meetings = meeting_xml.getElementsByTagName("meeting")
# identify backchannels from words.xml
def generate_vocal_sound_list(words_file):
vocal_sounds = words_file.getElementsByTagName("vocalsound")
for vocal_sound in vocal_sounds:
vocal_sound_ids.append(vocal_sound.getAttribute("nite:id"))
# create audio chunks
def generate_audio(channel, start_time, end_time, audio_index):
start_ms = float(start_time) * 1000
end_ms = float(end_time) * 1000
audio_dir = audio_files_per_meeting + "/" + observation + "." + audio_type + "-" + channel + ".wav"
if not (os.path.exists(audio_dir)):
return
audio = AudioSegment.from_wav(audio_dir)
meeting_directory = output_folder + '/' + observation
vocal_directory = meeting_directory + "/" + observation + "-" + channel
if not (os.path.exists(meeting_directory)):
os.mkdir(meeting_directory)
if not (os.path.exists(vocal_directory)):
os.mkdir(vocal_directory)
filename = vocal_directory + "/" + observation + "-" + channel + "-" + str(audio_index) + '.wav'
audio_chunk = audio[start_ms:end_ms]
audio_chunk.export(filename.format(end), format="wav")
filenames.append(filename)
# check if a chunk is a backchannel
def check_vocal_sound(word_id_list):
start_id = word_id_list[1].replace('..', '').replace(')', '')
if len(word_id_list) == 3:
end_id = word_id_list[2].replace(')', '')
if start_id in vocal_sound_ids or end_id in vocal_sound_ids:
return True
elif start_id in vocal_sound_ids:
return True
else:
return False
# create audio chunks for each meeting and each speaker
for meeting in meetings:
vocal_sound_ids = []
observation = meeting.getAttribute("observation")
audio_files_per_meeting = audio_folder + "/" + observation + "/audio"
speakers = meeting.getElementsByTagName("speaker")
for speaker in speakers:
nxt_agent = speaker.getAttribute("nxt_agent")
channel, participant = speaker.getAttribute("channel"), speaker.getAttribute("global_name")
if not (os.path.exists(words_folder + "/" + observation + "." + nxt_agent + ".words.xml") and os.path.exists(
segments_folder + "/" + observation + "." + nxt_agent + ".segments.xml")):
continue
words_file = xml.dom.minidom.parse(words_folder + "/" + observation + "." + nxt_agent + ".words.xml")
segments_file = xml.dom.minidom.parse(segments_folder + "/" + observation + "." + nxt_agent + ".segments.xml")
generate_vocal_sound_list(words_file)
segments = segments_file.getElementsByTagName("nite:child")
words = words_file.getElementsByTagName("w")
audioIndex = 0
for segment in segments:
wordIDs = segment.getAttribute("href").split('id(')
if check_vocal_sound(wordIDs) is not True:
if len(wordIDs) == 3:
start = wordIDs[1].replace('..', '').replace(')', '')
end = wordIDs[2].replace(')', '')
audioIndex += 1
else:
continue
for word in words:
wordID = word.getAttribute("nite:id")
if wordID == start:
start_time = word.getAttribute("starttime")
elif wordID == end:
end_time = word.getAttribute("endtime")
break
generate_audio(channel, start_time, end_time, audioIndex)
# save the generated file paths to filepaths.txt
np.savetxt(os.path.join(output_folder, 'filepaths.txt'), filenames, fmt='%s')