From 6576aa634ca17227bd7db40a0a2b915de527742d Mon Sep 17 00:00:00 2001 From: darodi <4682830+darodi@users.noreply.github.com> Date: Tue, 25 Jan 2022 00:35:47 +0100 Subject: [PATCH 1/3] manage color styles during conversion to srt --- .gitignore | 1 + tests/subtitles/styles2.srt | 23 +++++++++++++++ tests/subtitles/styles2.vtt | 58 +++++++++++++++++++++++++++++++++++++ tests/test_srt.py | 15 ++++++++++ webvtt/structures.py | 38 ++++++++++++++++++++++++ webvtt/webvtt.py | 2 +- webvtt/writers.py | 10 +++++-- 7 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 tests/subtitles/styles2.srt create mode 100644 tests/subtitles/styles2.vtt diff --git a/.gitignore b/.gitignore index 1d42b5b..a05c58b 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ dist/ # PyCharm .idea +venv # Documentation docs/_build/ \ No newline at end of file diff --git a/tests/subtitles/styles2.srt b/tests/subtitles/styles2.srt new file mode 100644 index 0000000..b32f11d --- /dev/null +++ b/tests/subtitles/styles2.srt @@ -0,0 +1,23 @@ +1 +00:00:07,120 --> 00:00:09,480 +Musique douce + +2 +00:00:09,720 --> 00:00:29,520 +--- + +3 +00:00:32,439 --> 00:00:35,320 +Musique douce + +4 +00:00:35,560 --> 00:02:25,240 +--- + +5 +00:02:25,480 --> 00:02:27,440 +-Stéphane ? Où on se gare ? + +6 +00:02:27,680 --> 00:02:29,280 +-Euh, là-bas, au chêne. \ No newline at end of file diff --git a/tests/subtitles/styles2.vtt b/tests/subtitles/styles2.vtt new file mode 100644 index 0000000..27252b7 --- /dev/null +++ b/tests/subtitles/styles2.vtt @@ -0,0 +1,58 @@ +WEBVTT + +STYLE +::cue { + font-family: Verdana, Arial, Tiresias; + line-height: 125%; +} +::cue(.white) { + color: #ffffff; +} +::cue(.lime) { + color: #00ff00; +} +::cue(.cyan) { + color: #00ffff; +} +::cue(.red) { + color: #ff0000; +} +::cue(.yellow) { + color: #ffff00; +} +::cue(.magenta) { + color: #ff00ff; +} +::cue(.blue) { + color: #0000ff; +} +::cue(.black) { + color: #000000; +} +::cue(.bg_black) { + background: rgba(0, 0, 0, 0.76); +} + +sub0 +00:00:07.120 --> 00:00:09.480 line:-1 +Musique douce + +sub1 +00:00:09.720 --> 00:00:29.520 align:left line:-1 +--- + +sub2 +00:00:32.439 --> 00:00:35.320 line:-1 +Musique douce + +sub3 +00:00:35.560 --> 00:02:25.240 align:left line:-1 +--- + +sub4 +00:02:25.480 --> 00:02:27.440 line:-1 +-Stéphane ? Où on se gare ? + +sub5 +00:02:27.680 --> 00:02:29.280 align:left line:-1 +-Euh, là-bas, au chêne. \ No newline at end of file diff --git a/tests/test_srt.py b/tests/test_srt.py index eed186d..34381b1 100644 --- a/tests/test_srt.py +++ b/tests/test_srt.py @@ -33,3 +33,18 @@ def test_convert_from_srt_to_vtt_and_back_gives_same_file(self): converted = f.read() self.assertEqual(original.strip(), converted.strip()) + + def test_convert_to_srt_with_styles(self): + copy(self._get_file('styles2.vtt'), OUTPUT_DIR) + copy(self._get_file('styles2.srt'), OUTPUT_DIR) + + vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'styles2.vtt')) + vtt.save_as_srt(os.path.join(OUTPUT_DIR, 'styles2_converted.srt')) + + with open(os.path.join(OUTPUT_DIR, 'styles2.srt'), 'r', encoding='utf-8') as f: + original = f.read() + + with open(os.path.join(OUTPUT_DIR, 'styles2_converted.srt'), 'r', encoding='utf-8') as f: + converted = f.read() + + self.assertEqual(original.strip(), converted.strip()) diff --git a/webvtt/structures.py b/webvtt/structures.py index 84f376d..d80e4bc 100644 --- a/webvtt/structures.py +++ b/webvtt/structures.py @@ -2,6 +2,8 @@ from .errors import MalformedCaptionError +COLOURS_PATTERN = re.compile(r'::cue\(\.([^)]+)\)\s*{.*?color:(.*?);.*?}') + TIMESTAMP_PATTERN = re.compile('(\d+)?:?(\d{2}):(\d{2})[.,](\d{3})') __all__ = ['Caption'] @@ -43,6 +45,33 @@ def __str__(self): def add_line(self, line): self.lines.append(line) + @staticmethod + def replace_color(x, tag, v): + return ("" if tag == "c" else ("<" + tag + ">")) \ + + "" \ + + x.group(1) \ + + "" \ + + ("" if tag == "c" else ("")) + + def _replace_colors(self, raw_text, colours, tag): + result = raw_text + for k, v in colours.items(): + regex_string = "<" + tag + "(?:\\..*?)?\\." + str(k) + "(?:\\..*?)?>(.*?)" + if re.search(regex_string, result) is not None: + result = re.sub(regex_string, lambda x: self.replace_color(x, tag, v), result) + return result + + def to_srt_formatted(self, colours): + caption_text = self.raw_text + no_tag_found = True + for tag in ['c', 'i', 'b', 'u']: + if re.search("<" + tag + "\\..*?>.*?", caption_text) is not None: + caption_text = self._replace_colors(caption_text, colours, tag) + no_tag_found = False + if no_tag_found: + caption_text = self.text + return caption_text + def _to_seconds(self, hours, minutes, seconds, milliseconds): return hours * 3600 + minutes * 60 + seconds + milliseconds / 1000 @@ -133,3 +162,12 @@ def text(self, value): if type(value) != str: raise TypeError('The text value must be a string.') self.lines = value.split('\n') + + @property + def colours(self): + """Returns the colours as a dict""" + colours_found = COLOURS_PATTERN.findall(self.text) + colours_classes = list(map(lambda x: x[0], colours_found)) + colours_values = list(map(lambda x: x[1].replace(" ", ""), colours_found)) + colours = dict(zip(colours_classes, colours_values)) + return colours diff --git a/webvtt/webvtt.py b/webvtt/webvtt.py index adec7c9..d0e798f 100644 --- a/webvtt/webvtt.py +++ b/webvtt/webvtt.py @@ -107,7 +107,7 @@ def write(self, f, format='vtt'): if format == 'vtt': WebVTTWriter().write(self._captions, f) elif format == 'srt': - SRTWriter().write(self._captions, f) + SRTWriter().write(self._captions, self._styles, f) # elif output_format == OutputFormat.SBV: # SBVWriter().write(self._captions, f) diff --git a/webvtt/writers.py b/webvtt/writers.py index 5ec551b..2d7a2f0 100644 --- a/webvtt/writers.py +++ b/webvtt/writers.py @@ -20,12 +20,18 @@ def webvtt_content(self, captions): class SRTWriter(object): - def write(self, captions, f): + def write(self, captions, styles, f): + colours = dict() + if styles is not None: + for style in styles: + colours.update(style.colours) + for line_number, caption in enumerate(captions, start=1): f.write('{}\n'.format(line_number)) f.write('{} --> {}\n'.format(self._to_srt_timestamp(caption.start_in_seconds), self._to_srt_timestamp(caption.end_in_seconds))) - f.writelines(['{}\n'.format(l) for l in caption.lines]) + f.write('{}\n'.format(caption.to_srt_formatted(colours))) + # f.writelines(['{}\n'.format(l) for l in caption.lines]) f.write('\n') def _to_srt_timestamp(self, total_seconds): From 26177d5d15930154b89e4abc34880a64a2ddb542 Mon Sep 17 00:00:00 2001 From: darodi <4682830+darodi@users.noreply.github.com> Date: Tue, 25 Jan 2022 22:50:38 +0100 Subject: [PATCH 2/3] manage color styles during conversion to srt - unescape html --- tests/subtitles/styles2.srt | 2 +- tests/subtitles/styles2.vtt | 2 +- webvtt/structures.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/subtitles/styles2.srt b/tests/subtitles/styles2.srt index b32f11d..0a41da5 100644 --- a/tests/subtitles/styles2.srt +++ b/tests/subtitles/styles2.srt @@ -8,7 +8,7 @@ 3 00:00:32,439 --> 00:00:35,320 -Musique douce +Some italic and normal coloured text. By the way, 2 < 3 ! 4 00:00:35,560 --> 00:02:25,240 diff --git a/tests/subtitles/styles2.vtt b/tests/subtitles/styles2.vtt index 27252b7..acb2c27 100644 --- a/tests/subtitles/styles2.vtt +++ b/tests/subtitles/styles2.vtt @@ -43,7 +43,7 @@ sub1 sub2 00:00:32.439 --> 00:00:35.320 line:-1 -Musique douce +Some italic and normal coloured text. By the way, 2 < 3 ! sub3 00:00:35.560 --> 00:02:25.240 align:left line:-1 diff --git a/webvtt/structures.py b/webvtt/structures.py index d80e4bc..6673d09 100644 --- a/webvtt/structures.py +++ b/webvtt/structures.py @@ -1,3 +1,4 @@ +import html import re from .errors import MalformedCaptionError @@ -49,7 +50,7 @@ def add_line(self, line): def replace_color(x, tag, v): return ("" if tag == "c" else ("<" + tag + ">")) \ + "" \ - + x.group(1) \ + + html.unescape(x.group(1)) \ + "" \ + ("" if tag == "c" else ("")) From fadc9b5da152a185227191d6b2129bff900a152e Mon Sep 17 00:00:00 2001 From: darodi <4682830+darodi@users.noreply.github.com> Date: Sun, 30 Jan 2022 03:05:56 +0100 Subject: [PATCH 3/3] manage color styles during conversion to srt - better test --- tests/subtitles/styles2.srt | 2 +- tests/subtitles/styles2.vtt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/subtitles/styles2.srt b/tests/subtitles/styles2.srt index 0a41da5..8e60436 100644 --- a/tests/subtitles/styles2.srt +++ b/tests/subtitles/styles2.srt @@ -8,7 +8,7 @@ 3 00:00:32,439 --> 00:00:35,320 -Some italic and normal coloured text. By the way, 2 < 3 ! +Some italic and normal coloured text. By the way, 2 < 3 ! 4 00:00:35,560 --> 00:02:25,240 diff --git a/tests/subtitles/styles2.vtt b/tests/subtitles/styles2.vtt index acb2c27..4a07608 100644 --- a/tests/subtitles/styles2.vtt +++ b/tests/subtitles/styles2.vtt @@ -43,7 +43,7 @@ sub1 sub2 00:00:32.439 --> 00:00:35.320 line:-1 -Some italic and normal coloured text. By the way, 2 < 3 ! +Some italic and normal coloured text. By the way, 2 < 3 ! sub3 00:00:35.560 --> 00:02:25.240 align:left line:-1