From 6576aa634ca17227bd7db40a0a2b915de527742d Mon Sep 17 00:00:00 2001
From: darodi <4682830+darodi@users.noreply.github.com>
Date: Tue, 25 Jan 2022 00:35:47 +0100
Subject: [PATCH 1/3] manage color styles during conversion to srt
---
.gitignore | 1 +
tests/subtitles/styles2.srt | 23 +++++++++++++++
tests/subtitles/styles2.vtt | 58 +++++++++++++++++++++++++++++++++++++
tests/test_srt.py | 15 ++++++++++
webvtt/structures.py | 38 ++++++++++++++++++++++++
webvtt/webvtt.py | 2 +-
webvtt/writers.py | 10 +++++--
7 files changed, 144 insertions(+), 3 deletions(-)
create mode 100644 tests/subtitles/styles2.srt
create mode 100644 tests/subtitles/styles2.vtt
diff --git a/.gitignore b/.gitignore
index 1d42b5b..a05c58b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ dist/
# PyCharm
.idea
+venv
# Documentation
docs/_build/
\ No newline at end of file
diff --git a/tests/subtitles/styles2.srt b/tests/subtitles/styles2.srt
new file mode 100644
index 0000000..b32f11d
--- /dev/null
+++ b/tests/subtitles/styles2.srt
@@ -0,0 +1,23 @@
+1
+00:00:07,120 --> 00:00:09,480
+Musique douce
+
+2
+00:00:09,720 --> 00:00:29,520
+---
+
+3
+00:00:32,439 --> 00:00:35,320
+Musique douce
+
+4
+00:00:35,560 --> 00:02:25,240
+---
+
+5
+00:02:25,480 --> 00:02:27,440
+-Stéphane ? Où on se gare ?
+
+6
+00:02:27,680 --> 00:02:29,280
+-Euh, là-bas, au chêne.
\ No newline at end of file
diff --git a/tests/subtitles/styles2.vtt b/tests/subtitles/styles2.vtt
new file mode 100644
index 0000000..27252b7
--- /dev/null
+++ b/tests/subtitles/styles2.vtt
@@ -0,0 +1,58 @@
+WEBVTT
+
+STYLE
+::cue {
+ font-family: Verdana, Arial, Tiresias;
+ line-height: 125%;
+}
+::cue(.white) {
+ color: #ffffff;
+}
+::cue(.lime) {
+ color: #00ff00;
+}
+::cue(.cyan) {
+ color: #00ffff;
+}
+::cue(.red) {
+ color: #ff0000;
+}
+::cue(.yellow) {
+ color: #ffff00;
+}
+::cue(.magenta) {
+ color: #ff00ff;
+}
+::cue(.blue) {
+ color: #0000ff;
+}
+::cue(.black) {
+ color: #000000;
+}
+::cue(.bg_black) {
+ background: rgba(0, 0, 0, 0.76);
+}
+
+sub0
+00:00:07.120 --> 00:00:09.480 line:-1
+Musique douce
+
+sub1
+00:00:09.720 --> 00:00:29.520 align:left line:-1
+---
+
+sub2
+00:00:32.439 --> 00:00:35.320 line:-1
+Musique douce
+
+sub3
+00:00:35.560 --> 00:02:25.240 align:left line:-1
+---
+
+sub4
+00:02:25.480 --> 00:02:27.440 line:-1
+-Stéphane ? Où on se gare ?
+
+sub5
+00:02:27.680 --> 00:02:29.280 align:left line:-1
+-Euh, là-bas, au chêne.
\ No newline at end of file
diff --git a/tests/test_srt.py b/tests/test_srt.py
index eed186d..34381b1 100644
--- a/tests/test_srt.py
+++ b/tests/test_srt.py
@@ -33,3 +33,18 @@ def test_convert_from_srt_to_vtt_and_back_gives_same_file(self):
converted = f.read()
self.assertEqual(original.strip(), converted.strip())
+
+ def test_convert_to_srt_with_styles(self):
+ copy(self._get_file('styles2.vtt'), OUTPUT_DIR)
+ copy(self._get_file('styles2.srt'), OUTPUT_DIR)
+
+ vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'styles2.vtt'))
+ vtt.save_as_srt(os.path.join(OUTPUT_DIR, 'styles2_converted.srt'))
+
+ with open(os.path.join(OUTPUT_DIR, 'styles2.srt'), 'r', encoding='utf-8') as f:
+ original = f.read()
+
+ with open(os.path.join(OUTPUT_DIR, 'styles2_converted.srt'), 'r', encoding='utf-8') as f:
+ converted = f.read()
+
+ self.assertEqual(original.strip(), converted.strip())
diff --git a/webvtt/structures.py b/webvtt/structures.py
index 84f376d..d80e4bc 100644
--- a/webvtt/structures.py
+++ b/webvtt/structures.py
@@ -2,6 +2,8 @@
from .errors import MalformedCaptionError
+COLOURS_PATTERN = re.compile(r'::cue\(\.([^)]+)\)\s*{.*?color:(.*?);.*?}')
+
TIMESTAMP_PATTERN = re.compile('(\d+)?:?(\d{2}):(\d{2})[.,](\d{3})')
__all__ = ['Caption']
@@ -43,6 +45,33 @@ def __str__(self):
def add_line(self, line):
self.lines.append(line)
+ @staticmethod
+ def replace_color(x, tag, v):
+ return ("" if tag == "c" else ("<" + tag + ">")) \
+ + "" \
+ + x.group(1) \
+ + "" \
+ + ("" if tag == "c" else ("" + tag + ">"))
+
+ def _replace_colors(self, raw_text, colours, tag):
+ result = raw_text
+ for k, v in colours.items():
+ regex_string = "<" + tag + "(?:\\..*?)?\\." + str(k) + "(?:\\..*?)?>(.*?)" + tag + ">"
+ if re.search(regex_string, result) is not None:
+ result = re.sub(regex_string, lambda x: self.replace_color(x, tag, v), result)
+ return result
+
+ def to_srt_formatted(self, colours):
+ caption_text = self.raw_text
+ no_tag_found = True
+ for tag in ['c', 'i', 'b', 'u']:
+ if re.search("<" + tag + "\\..*?>.*?" + tag + ">", caption_text) is not None:
+ caption_text = self._replace_colors(caption_text, colours, tag)
+ no_tag_found = False
+ if no_tag_found:
+ caption_text = self.text
+ return caption_text
+
def _to_seconds(self, hours, minutes, seconds, milliseconds):
return hours * 3600 + minutes * 60 + seconds + milliseconds / 1000
@@ -133,3 +162,12 @@ def text(self, value):
if type(value) != str:
raise TypeError('The text value must be a string.')
self.lines = value.split('\n')
+
+ @property
+ def colours(self):
+ """Returns the colours as a dict"""
+ colours_found = COLOURS_PATTERN.findall(self.text)
+ colours_classes = list(map(lambda x: x[0], colours_found))
+ colours_values = list(map(lambda x: x[1].replace(" ", ""), colours_found))
+ colours = dict(zip(colours_classes, colours_values))
+ return colours
diff --git a/webvtt/webvtt.py b/webvtt/webvtt.py
index adec7c9..d0e798f 100644
--- a/webvtt/webvtt.py
+++ b/webvtt/webvtt.py
@@ -107,7 +107,7 @@ def write(self, f, format='vtt'):
if format == 'vtt':
WebVTTWriter().write(self._captions, f)
elif format == 'srt':
- SRTWriter().write(self._captions, f)
+ SRTWriter().write(self._captions, self._styles, f)
# elif output_format == OutputFormat.SBV:
# SBVWriter().write(self._captions, f)
diff --git a/webvtt/writers.py b/webvtt/writers.py
index 5ec551b..2d7a2f0 100644
--- a/webvtt/writers.py
+++ b/webvtt/writers.py
@@ -20,12 +20,18 @@ def webvtt_content(self, captions):
class SRTWriter(object):
- def write(self, captions, f):
+ def write(self, captions, styles, f):
+ colours = dict()
+ if styles is not None:
+ for style in styles:
+ colours.update(style.colours)
+
for line_number, caption in enumerate(captions, start=1):
f.write('{}\n'.format(line_number))
f.write('{} --> {}\n'.format(self._to_srt_timestamp(caption.start_in_seconds),
self._to_srt_timestamp(caption.end_in_seconds)))
- f.writelines(['{}\n'.format(l) for l in caption.lines])
+ f.write('{}\n'.format(caption.to_srt_formatted(colours)))
+ # f.writelines(['{}\n'.format(l) for l in caption.lines])
f.write('\n')
def _to_srt_timestamp(self, total_seconds):
From 26177d5d15930154b89e4abc34880a64a2ddb542 Mon Sep 17 00:00:00 2001
From: darodi <4682830+darodi@users.noreply.github.com>
Date: Tue, 25 Jan 2022 22:50:38 +0100
Subject: [PATCH 2/3] manage color styles during conversion to srt - unescape
html
---
tests/subtitles/styles2.srt | 2 +-
tests/subtitles/styles2.vtt | 2 +-
webvtt/structures.py | 3 ++-
3 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/tests/subtitles/styles2.srt b/tests/subtitles/styles2.srt
index b32f11d..0a41da5 100644
--- a/tests/subtitles/styles2.srt
+++ b/tests/subtitles/styles2.srt
@@ -8,7 +8,7 @@
3
00:00:32,439 --> 00:00:35,320
-Musique douce
+Some italic and normal coloured text. By the way, 2 < 3 !
4
00:00:35,560 --> 00:02:25,240
diff --git a/tests/subtitles/styles2.vtt b/tests/subtitles/styles2.vtt
index 27252b7..acb2c27 100644
--- a/tests/subtitles/styles2.vtt
+++ b/tests/subtitles/styles2.vtt
@@ -43,7 +43,7 @@ sub1
sub2
00:00:32.439 --> 00:00:35.320 line:-1
-Musique douce
+Some italic and normal coloured text. By the way, 2 < 3 !
sub3
00:00:35.560 --> 00:02:25.240 align:left line:-1
diff --git a/webvtt/structures.py b/webvtt/structures.py
index d80e4bc..6673d09 100644
--- a/webvtt/structures.py
+++ b/webvtt/structures.py
@@ -1,3 +1,4 @@
+import html
import re
from .errors import MalformedCaptionError
@@ -49,7 +50,7 @@ def add_line(self, line):
def replace_color(x, tag, v):
return ("" if tag == "c" else ("<" + tag + ">")) \
+ "" \
- + x.group(1) \
+ + html.unescape(x.group(1)) \
+ "" \
+ ("" if tag == "c" else ("" + tag + ">"))
From fadc9b5da152a185227191d6b2129bff900a152e Mon Sep 17 00:00:00 2001
From: darodi <4682830+darodi@users.noreply.github.com>
Date: Sun, 30 Jan 2022 03:05:56 +0100
Subject: [PATCH 3/3] manage color styles during conversion to srt - better
test
---
tests/subtitles/styles2.srt | 2 +-
tests/subtitles/styles2.vtt | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/subtitles/styles2.srt b/tests/subtitles/styles2.srt
index 0a41da5..8e60436 100644
--- a/tests/subtitles/styles2.srt
+++ b/tests/subtitles/styles2.srt
@@ -8,7 +8,7 @@
3
00:00:32,439 --> 00:00:35,320
-Some italic and normal coloured text. By the way, 2 < 3 !
+Some italic and normal coloured text. By the way, 2 < 3 !
4
00:00:35,560 --> 00:02:25,240
diff --git a/tests/subtitles/styles2.vtt b/tests/subtitles/styles2.vtt
index acb2c27..4a07608 100644
--- a/tests/subtitles/styles2.vtt
+++ b/tests/subtitles/styles2.vtt
@@ -43,7 +43,7 @@ sub1
sub2
00:00:32.439 --> 00:00:35.320 line:-1
-Some italic and normal coloured text. By the way, 2 < 3 !
+Some italic and normal coloured text. By the way, 2 < 3 !
sub3
00:00:35.560 --> 00:02:25.240 align:left line:-1