-
Notifications
You must be signed in to change notification settings - Fork 0
/
util.py
62 lines (48 loc) · 2.06 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from typing import Iterator, TextIO
def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = '.') -> str:
assert seconds >= 0, "non-negative timestamp expected"
milliseconds = round(seconds * 1000.0)
hours = milliseconds // 3_600_000
milliseconds -= hours * 3_600_000
minutes = milliseconds // 60_000
milliseconds -= minutes * 60_000
seconds = milliseconds // 1_000
milliseconds -= seconds * 1_000
hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"
def write_vtt(transcript: Iterator[dict], file: TextIO):
print("WEBVTT\n", file=file)
for segment in transcript:
print(
f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
f"{segment['text'].strip().replace('-->', '->')}\n",
file=file,
flush=True,
)
def bucket_cdn_url(bucket_name: str, region: str = 'ams3') -> str:
return f"https://{bucket_name}.{region}.cdn.digitaloceanspaces.com"
def coalesce_short_transcript_segments(segments: list,) -> list:
"""
Some extracted transcript segments from openai/whisper are really short, like even just one word.
This function accepts a minimum segment length and combines short segments until the minimum is reached.
"""
minimum_transcript_len = 100 # About 1 sentence
previous = None
long_enough_segments = []
for current in segments:
if previous is None:
previous = current
elif len(previous["text"]) < minimum_transcript_len:
previous = _merge_segments(left=previous, right=current)
else:
long_enough_segments.append(previous)
previous = current
if previous:
long_enough_segments.append(previous)
return long_enough_segments
def _merge_segments(left, right):
return {
"text": left["text"] + " " + right["text"],
"start": left["start"],
"end": right["end"],
}