glut23 · darodi · Jan 24, 2022 · Jan 25, 2022 · Jan 30, 2022
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ dist/
 
 # PyCharm
 .idea
+venv
 
 # Documentation
 docs/_build/
diff --git a/tests/subtitles/styles2.srt b/tests/subtitles/styles2.srt
@@ -0,0 +1,23 @@
+1
+00:00:07,120 --> 00:00:09,480
+<font color="#ff00ff">Musique douce</font>
+
+2
+00:00:09,720 --> 00:00:29,520
+<font color="#ff00ff">---</font>
+
+3
+00:00:32,439 --> 00:00:35,320
+<font color="#ff00ff"><i><font color="#0000ff">Some italic</font></i> and normal coloured text. By the way, 2 < 3 !</font>
+
+4
+00:00:35,560 --> 00:02:25,240
+<font color="#ff00ff">---</font>
+
+5
+00:02:25,480 --> 00:02:27,440
+<font color="#ffffff">-Stéphane ? Où on se gare ?</font>
+
+6
+00:02:27,680 --> 00:02:29,280
+<font color="#ffffff">-Euh, là-bas, au chêne.</font>
diff --git a/tests/subtitles/styles2.vtt b/tests/subtitles/styles2.vtt
@@ -0,0 +1,58 @@
+WEBVTT
+
+STYLE
+::cue {
+    font-family: Verdana, Arial, Tiresias;
+    line-height: 125%;
+}
+::cue(.white) {
+    color: #ffffff;
+}
+::cue(.lime) {
+    color: #00ff00;
+}
+::cue(.cyan) {
+    color: #00ffff;
+}
+::cue(.red) {
+    color: #ff0000;
+}
+::cue(.yellow) {
+    color: #ffff00;
+}
+::cue(.magenta) {
+    color: #ff00ff;
+}
+::cue(.blue) {
+    color: #0000ff;
+}
+::cue(.black) {
+    color: #000000;
+}
+::cue(.bg_black) {
+    background: rgba(0, 0, 0, 0.76);
+}
+
+sub0
+00:00:07.120 --> 00:00:09.480 line:-1
+<c.magenta.bg_black>Musique douce</c>
+
+sub1
+00:00:09.720 --> 00:00:29.520 align:left line:-1
+<c.magenta.bg_black>---</c>
+
+sub2
+00:00:32.439 --> 00:00:35.320 line:-1
+<c.magenta.bg_black><i.blue>Some italic</i> and normal coloured text. By the way, 2 &lt; 3 !</c>
+
+sub3
+00:00:35.560 --> 00:02:25.240 align:left line:-1
+<c.magenta.bg_black>---</c>
+
+sub4
+00:02:25.480 --> 00:02:27.440 line:-1
+<c.white.bg_black>-Stéphane ? Où on se gare ?</c>
+
+sub5
+00:02:27.680 --> 00:02:29.280 align:left line:-1
+<c.white.bg_black>-Euh, là-bas, au chêne.</c>
diff --git a/tests/test_srt.py b/tests/test_srt.py
@@ -33,3 +33,18 @@ def test_convert_from_srt_to_vtt_and_back_gives_same_file(self):
             converted = f.read()
 
         self.assertEqual(original.strip(), converted.strip())
+
+    def test_convert_to_srt_with_styles(self):
+        copy(self._get_file('styles2.vtt'), OUTPUT_DIR)
+        copy(self._get_file('styles2.srt'), OUTPUT_DIR)
+
+        vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'styles2.vtt'))
+        vtt.save_as_srt(os.path.join(OUTPUT_DIR, 'styles2_converted.srt'))
+
+        with open(os.path.join(OUTPUT_DIR, 'styles2.srt'), 'r', encoding='utf-8') as f:
+            original = f.read()
+
+        with open(os.path.join(OUTPUT_DIR, 'styles2_converted.srt'), 'r', encoding='utf-8') as f:
+            converted = f.read()
+
+        self.assertEqual(original.strip(), converted.strip())
diff --git a/webvtt/structures.py b/webvtt/structures.py
@@ -1,7 +1,10 @@
+import html
 import re
 
 from .errors import MalformedCaptionError
 
+COLOURS_PATTERN = re.compile(r'::cue\(\.([^)]+)\)\s*{.*?color:(.*?);.*?}')
+
 TIMESTAMP_PATTERN = re.compile('(\d+)?:?(\d{2}):(\d{2})[.,](\d{3})')
 
 __all__ = ['Caption']
@@ -43,6 +46,33 @@ def __str__(self):
     def add_line(self, line):
         self.lines.append(line)
 
+    @staticmethod
+    def replace_color(x, tag, v):
+        return ("" if tag == "c" else ("<" + tag + ">")) \
+               + "<font color=\"" + v + "\">" \
+               + html.unescape(x.group(1)) \
+               + "</font>" \
+               + ("" if tag == "c" else ("</" + tag + ">"))
+
+    def _replace_colors(self, raw_text, colours, tag):
+        result = raw_text
+        for k, v in colours.items():
+            regex_string = "<" + tag + "(?:\\..*?)?\\." + str(k) + "(?:\\..*?)?>(.*?)</" + tag + ">"
+            if re.search(regex_string, result) is not None:
+                result = re.sub(regex_string, lambda x: self.replace_color(x, tag, v), result)
+        return result
+
+    def to_srt_formatted(self, colours):
+        caption_text = self.raw_text
+        no_tag_found = True
+        for tag in ['c', 'i', 'b', 'u']:
+            if re.search("<" + tag + "\\..*?>.*?</" + tag + ">", caption_text) is not None:
+                caption_text = self._replace_colors(caption_text, colours, tag)
+                no_tag_found = False
+        if no_tag_found:
+            caption_text = self.text
+        return caption_text
+
     def _to_seconds(self, hours, minutes, seconds, milliseconds):
         return hours * 3600 + minutes * 60 + seconds + milliseconds / 1000
 
@@ -133,3 +163,12 @@ def text(self, value):
         if type(value) != str:
             raise TypeError('The text value must be a string.')
         self.lines = value.split('\n')
+
+    @property
+    def colours(self):
+        """Returns the colours as a dict"""
+        colours_found = COLOURS_PATTERN.findall(self.text)
+        colours_classes = list(map(lambda x: x[0], colours_found))
+        colours_values = list(map(lambda x: x[1].replace(" ", ""), colours_found))
+        colours = dict(zip(colours_classes, colours_values))
+        return colours
diff --git a/webvtt/webvtt.py b/webvtt/webvtt.py
@@ -107,7 +107,7 @@ def write(self, f, format='vtt'):
         if format == 'vtt':
             WebVTTWriter().write(self._captions, f)
         elif format == 'srt':
-            SRTWriter().write(self._captions, f)
+            SRTWriter().write(self._captions, self._styles, f)
 #        elif output_format == OutputFormat.SBV:
 #            SBVWriter().write(self._captions, f)
 

diff --git a/webvtt/writers.py b/webvtt/writers.py
@@ -20,12 +20,18 @@ def webvtt_content(self, captions):
 
 class SRTWriter(object):
 
-    def write(self, captions, f):
+    def write(self, captions, styles, f):
+        colours = dict()
+        if styles is not None:
+            for style in styles:
+                colours.update(style.colours)
+
         for line_number, caption in enumerate(captions, start=1):
             f.write('{}\n'.format(line_number))
             f.write('{} --> {}\n'.format(self._to_srt_timestamp(caption.start_in_seconds),
                                          self._to_srt_timestamp(caption.end_in_seconds)))
-            f.writelines(['{}\n'.format(l) for l in caption.lines])
+            f.write('{}\n'.format(caption.to_srt_formatted(colours)))
+            # f.writelines(['{}\n'.format(l) for l in caption.lines])
             f.write('\n')
 
     def _to_srt_timestamp(self, total_seconds):