From befa19e388410c6262e8cf0bd63edad9d17d6ec9 Mon Sep 17 00:00:00 2001 From: Pierre-Anthony Lemieux Date: Mon, 21 Dec 2020 19:29:17 -0800 Subject: [PATCH] Fix FontFamily model and parsing/serialization (#167, #168) --- .../python/ttconv/imsc/style_properties.py | 15 ++++--- src/main/python/ttconv/imsc/utils.py | 42 ++++++++++++++++--- src/main/python/ttconv/style_properties.py | 17 ++++++-- .../python/test_imsc_font_families_parser.py | 30 ++++++++++--- 4 files changed, 84 insertions(+), 20 deletions(-) diff --git a/src/main/python/ttconv/imsc/style_properties.py b/src/main/python/ttconv/imsc/style_properties.py index d934e7ce..365b9c0e 100644 --- a/src/main/python/ttconv/imsc/style_properties.py +++ b/src/main/python/ttconv/imsc/style_properties.py @@ -261,14 +261,19 @@ class FontFamily(StyleProperty): @classmethod def extract(cls, context: StyleParsingContext, xml_attrib: str): - return tuple(map( - lambda f: "monospaceSerif" if f == "default" else f, - utils.parse_font_families(xml_attrib) - )) + return tuple( + map( + lambda f: styles.GenericFontFamilyType.monospaceSerif if f is styles.GenericFontFamilyType.default else f, + utils.parse_font_families(xml_attrib) + ) + ) @classmethod def from_model(cls, xml_element, model_value): - xml_element.set(f"{{{cls.ns}}}{cls.local_name}", model_value[0]) + xml_element.set( + f"{{{cls.ns}}}{cls.local_name}", + utils.serialize_font_family(model_value) + ) class FontSize(StyleProperty): diff --git a/src/main/python/ttconv/imsc/utils.py b/src/main/python/ttconv/imsc/utils.py index 6b8c9dc9..0fe77875 100644 --- a/src/main/python/ttconv/imsc/utils.py +++ b/src/main/python/ttconv/imsc/utils.py @@ -37,9 +37,6 @@ _LENGTH_RE = re.compile(r"^((?:\+|\-)?\d*(?:\.\d+)?)(px|em|c|%|rh|rw)$") -_FAMILIES_SEPARATOR = re.compile(r"(?<=[^\\]),") -_FAMILIES_ESCAPED_CHAR = re.compile(r"\\(.)") - _CLOCK_TIME_FRACTION_RE = re.compile(r"^(\d{2,}):(\d\d):(\d\d(?:\.\d+)?)$") _CLOCK_TIME_FRAMES_RE = re.compile(r"^(\d{2,}):(\d\d):(\d\d):(\d{2,})$") _OFFSET_FRAME_RE = re.compile(r"^(\d+(?:\.\d+)?)f") @@ -114,22 +111,55 @@ def parse_length(attr_value: str) -> typing.Tuple[float, str]: raise ValueError("Bad length syntax") +_FAMILIES_ESCAPED_CHAR = re.compile(r"\\(.)") +_SINGLE_QUOTE_PATTERN = "(?:'(?P(.+?)(?(.+?)(?(?:\\\\.|[^'\", ])(?:\\\\.|[^'\",])+)" + +_FONT_FAMILY_PATTERN = re.compile( + "|".join( + ( + _SINGLE_QUOTE_PATTERN, + _DOUBLE_QUOTE_PATTERN, + _NO_QUOTE_PATTERN + ) + ) +) + def parse_font_families(attr_value: str) -> typing.List[str]: '''Parses th TTML \\ value in `attr_value` into a list of font families''' rslt = [] - for family in map(str.strip, _FAMILIES_SEPARATOR.split(attr_value)): + for m in _FONT_FAMILY_PATTERN.finditer(attr_value): - unquoted_family = family[1:-1] if family[0] == "'" or family[0] == '"' else family + is_quoted = m.lastgroup in ("single_quote", "double_quote") - rslt.append(_FAMILIES_ESCAPED_CHAR.sub(r"\1", unquoted_family)) + escaped_family = _FAMILIES_ESCAPED_CHAR.sub(r"\1", m.group(m.lastgroup)) + + if not is_quoted and escaped_family in styles.GenericFontFamilyType.__members__: + rslt.append(styles.GenericFontFamilyType(escaped_family)) + else: + rslt.append(escaped_family) + if len(rslt) == 0: raise ValueError("Bad syntax") return rslt +def serialize_font_family(font_family: typing.Tuple[typing.Union[str, styles.GenericFontFamilyType], ...]): + '''Serialize model FontFamily to tts:fontFamily + ''' + + def _serialize_one_family(family): + if isinstance(family, styles.GenericFontFamilyType): + return family.value + + return '"' + family.replace('"', r'\"') + '"' + + return ", ".join(map(_serialize_one_family, font_family)) + def parse_time_expression(tick_rate: typing.Optional[int], frame_rate: typing.Optional[Fraction], time_expr: str) -> Fraction: '''Parse a TTML time expression in a fractional number in seconds diff --git a/src/main/python/ttconv/style_properties.py b/src/main/python/ttconv/style_properties.py index 08b10a03..2caa5e8d 100644 --- a/src/main/python/ttconv/style_properties.py +++ b/src/main/python/ttconv/style_properties.py @@ -135,6 +135,17 @@ class ExtentType: height: LengthType = LengthType() width: LengthType = LengthType() +class GenericFontFamilyType(Enum): + '''\\ + ''' + default = "default" + monospace = "monospace" + sansSerif = "sansSerif" + serif = "serif" + monospaceSansSerif = "monospaceSansSerif" + monospaceSerif = "monospaceSerif" + proportionalSansSerif = "proportionalSansSerif" + proportionalSerif = "proportionalSerif" class FontStyleType(Enum): '''tts:fontStyle value @@ -507,11 +518,11 @@ class FontFamily(StyleProperty): @staticmethod def make_initial_value(): - return ("default",) + return (GenericFontFamilyType.default,) @staticmethod - def validate(value: typing.List[str]): - return isinstance(value, tuple) and all(lambda i: isinstance(i, str) for i in value) + def validate(value: typing.Tuple[typing.Union[str, GenericFontFamilyType]]): + return isinstance(value, tuple) and all(lambda i: isinstance(i, (str, GenericFontFamilyType)) for i in value) class FontSize(StyleProperty): diff --git a/src/test/python/test_imsc_font_families_parser.py b/src/test/python/test_imsc_font_families_parser.py index 67c1984c..2c7735c3 100644 --- a/src/test/python/test_imsc_font_families_parser.py +++ b/src/test/python/test_imsc_font_families_parser.py @@ -28,23 +28,41 @@ # pylint: disable=R0201,C0115,C0116 import unittest -from ttconv.imsc.utils import parse_font_families +from ttconv.imsc.utils import parse_font_families, serialize_font_family +import ttconv.style_properties as styles class IMSCReaderTest(unittest.TestCase): - tests = [ - ["default", ["default"]], + _parse_tests = [ + ["default", [styles.GenericFontFamilyType.default]], + ["'default'", ["default"]], ["foo, 'bar good'", ["foo", "bar good"]], ['foo, "bar good"', ["foo", "bar good"]], [r'foo, "bar \good"', ["foo", "bar good"]], - [r'foo, "bar \,good"', ["foo", "bar ,good"]] + [r'foo, "bar \,good"', ["foo", "bar ,good"]], + [r'foo, "bar,good"', ["foo", "bar,good"]] ] - def test_font_families(self): - for test in self.tests: + def test_parse_font_families(self): + for test in self._parse_tests: with self.subTest(test[0]): c = parse_font_families(test[0]) self.assertEqual(c, test[1]) + + _serialize_tests = [ + [(styles.GenericFontFamilyType.default,), "default"], + [("default",), '"default"'], + [(styles.GenericFontFamilyType.proportionalSansSerif, "bar good"), 'proportionalSansSerif, "bar good"'], + [("foo", "bar, good"), r'"foo", "bar, good"'], + [("bar\"good",), r'"bar\"good"'] + ] + + def test_serialize_font_families(self): + for test in self._serialize_tests: + with self.subTest(test[0]): + c = serialize_font_family(test[0]) + self.assertEqual(c, test[1]) + if __name__ == '__main__': unittest.main()