From a7d6ee40b23b0e7814b9a6b5d2cd3fae9be64488 Mon Sep 17 00:00:00 2001 From: Tobias Deiminger Date: Sun, 10 Nov 2024 13:14:45 +0100 Subject: [PATCH] Mark diff colors safe and escape raw diff input For HTML escaping of the diff view we have to consider two things. 1. Diff input comes from two git checkouts of the project at specific revisions. The revisions sdocs are considered untrusted user input, could contain special characters and must be escaped. 2. After analyzing with difflib we add a bit HTML to colorize the output. This specific HTML fragments are trusted and safe. Relates to #1920. --- .../diff_screen_results_view_object.py | 25 ++++++++------- .../view_objects/diff_screen_view_object.py | 18 ++++++----- strictdoc/git/change.py | 32 ++++++++++--------- strictdoc/git/project_diff_analyzer.py | 28 ++++++++-------- strictdoc/helpers/diff.py | 12 ++++--- .../diff/120__escaping__basic/lhs/input.sdoc | 22 +++++++++++++ .../diff/120__escaping__basic/rhs/input.sdoc | 22 +++++++++++++ .../diff/120__escaping__basic/strictdoc.toml | 6 ++++ .../diff/120__escaping__basic/test.itest | 29 +++++++++++++++++ 9 files changed, 141 insertions(+), 53 deletions(-) create mode 100644 tests/integration/features/diff/120__escaping__basic/lhs/input.sdoc create mode 100644 tests/integration/features/diff/120__escaping__basic/rhs/input.sdoc create mode 100644 tests/integration/features/diff/120__escaping__basic/strictdoc.toml create mode 100644 tests/integration/features/diff/120__escaping__basic/test.itest diff --git a/strictdoc/export/html/generators/view_objects/diff_screen_results_view_object.py b/strictdoc/export/html/generators/view_objects/diff_screen_results_view_object.py index 626bbff45..65eb65276 100644 --- a/strictdoc/export/html/generators/view_objects/diff_screen_results_view_object.py +++ b/strictdoc/export/html/generators/view_objects/diff_screen_results_view_object.py @@ -3,6 +3,8 @@ from datetime import datetime from typing import Optional +from markupsafe import Markup + from strictdoc import __version__ from strictdoc.core.project_config import ProjectConfig from strictdoc.export.html.html_templates import JinjaEnvironment @@ -58,31 +60,30 @@ def __init__( self.strictdoc_version = __version__ self.error_message: Optional[str] = None - def render_screen(self, jinja_environment: JinjaEnvironment): - template = jinja_environment.environment.overlay( - autoescape=False - ).get_template("screens/git/index.jinja") - return template.render(view_object=self) + def render_screen(self, jinja_environment: JinjaEnvironment) -> Markup: + return jinja_environment.render_template_as_markup( + "screens/git/index.jinja", view_object=self + ) - def render_url(self, url: str): - return self.link_renderer.render_url(url) + def render_url(self, url: str) -> Markup: + return Markup(self.link_renderer.render_url(url)) - def render_node_link(self, incoming_link, document, document_type): + def render_node_link(self, incoming_link, document, document_type) -> str: return self.link_renderer.render_node_link( incoming_link, document, document_type ) - def render_static_url(self, url: str): - return self.link_renderer.render_static_url(url) + def render_static_url(self, url: str) -> Markup: + return Markup(self.link_renderer.render_static_url(url)) def render_static_url_with_prefix(self, url: str) -> str: return self.link_renderer.render_static_url_with_prefix(url) - def render_local_anchor(self, node): + def render_local_anchor(self, node) -> str: return self.link_renderer.render_local_anchor(node) def is_empty_tree(self) -> bool: return self.document_tree_iterator.is_empty_tree() - def date_today(self): + def date_today(self) -> str: return datetime.today().strftime("%Y-%m-%d") diff --git a/strictdoc/export/html/generators/view_objects/diff_screen_view_object.py b/strictdoc/export/html/generators/view_objects/diff_screen_view_object.py index 003139136..ed223d2b7 100644 --- a/strictdoc/export/html/generators/view_objects/diff_screen_view_object.py +++ b/strictdoc/export/html/generators/view_objects/diff_screen_view_object.py @@ -2,6 +2,8 @@ from dataclasses import dataclass from datetime import datetime +from markupsafe import Markup + from strictdoc import __version__ from strictdoc.core.project_config import ProjectConfig from strictdoc.export.html.html_templates import JinjaEnvironment @@ -39,27 +41,27 @@ def __init__( self.is_running_on_server: bool = project_config.is_running_on_server self.strictdoc_version = __version__ - def render_screen(self, jinja_environment: JinjaEnvironment): + def render_screen(self, jinja_environment: JinjaEnvironment) -> Markup: return jinja_environment.render_template_as_markup( "screens/git/index.jinja", view_object=self ) - def render_url(self, url: str): - return self.link_renderer.render_url(url) + def render_url(self, url: str) -> Markup: + return Markup(self.link_renderer.render_url(url)) - def render_node_link(self, incoming_link, document, document_type): + def render_node_link(self, incoming_link, document, document_type) -> str: return self.link_renderer.render_node_link( incoming_link, document, document_type ) - def render_static_url(self, url: str): - return self.link_renderer.render_static_url(url) + def render_static_url(self, url: str) -> Markup: + return Markup(self.link_renderer.render_static_url(url)) def render_static_url_with_prefix(self, url: str) -> str: return self.link_renderer.render_static_url_with_prefix(url) - def render_local_anchor(self, node): + def render_local_anchor(self, node) -> str: return self.link_renderer.render_local_anchor(node) - def date_today(self): + def date_today(self) -> str: return datetime.today().strftime("%Y-%m-%d") diff --git a/strictdoc/git/change.py b/strictdoc/git/change.py index e8c4ad6ee..e2e8aa626 100644 --- a/strictdoc/git/change.py +++ b/strictdoc/git/change.py @@ -1,6 +1,8 @@ from enum import Enum from typing import Dict, List, Optional, Union +from markupsafe import Markup + from strictdoc.backend.sdoc.models.document import SDocDocument from strictdoc.backend.sdoc.models.node import ( SDocNode, @@ -36,8 +38,8 @@ def __init__( rhs_document: Optional[SDocDocument], uid_modified: bool, title_modified: bool, - lhs_colored_title_diff: Optional[str], - rhs_colored_title_diff: Optional[str], + lhs_colored_title_diff: Optional[Markup], + rhs_colored_title_diff: Optional[Markup], ): assert lhs_document is not None or rhs_document is not None if matched_uid is not None: @@ -45,15 +47,15 @@ def __init__( self.matched_uid: Optional[str] = matched_uid self.uid_modified: bool = uid_modified self.title_modified: bool = title_modified - self.lhs_colored_title_diff: Optional[str] = lhs_colored_title_diff - self.rhs_colored_title_diff: Optional[str] = rhs_colored_title_diff + self.lhs_colored_title_diff: Optional[Markup] = lhs_colored_title_diff + self.rhs_colored_title_diff: Optional[Markup] = rhs_colored_title_diff self.lhs_document: Optional[SDocDocument] = lhs_document self.rhs_document: Optional[SDocDocument] = rhs_document self.change_type: ChangeType = ChangeType.DOCUMENT_MODIFIED - def get_colored_title_diff(self, side: str) -> Optional[str]: + def get_colored_title_diff(self, side: str) -> Optional[Markup]: assert self.title_modified if side == "left": return self.lhs_colored_title_diff @@ -74,8 +76,8 @@ def __init__( rhs_section: Optional[SDocSection], uid_modified: bool, title_modified: bool, - lhs_colored_title_diff: Optional[str], - rhs_colored_title_diff: Optional[str], + lhs_colored_title_diff: Optional[Markup], + rhs_colored_title_diff: Optional[Markup], ): assert lhs_section is not None or rhs_section is not None if matched_uid is not None: @@ -85,8 +87,8 @@ def __init__( self.section_token: Optional[str] = section_token self.uid_modified: bool = uid_modified self.title_modified: bool = title_modified - self.lhs_colored_title_diff: Optional[str] = lhs_colored_title_diff - self.rhs_colored_title_diff: Optional[str] = rhs_colored_title_diff + self.lhs_colored_title_diff: Optional[Markup] = lhs_colored_title_diff + self.rhs_colored_title_diff: Optional[Markup] = rhs_colored_title_diff self.lhs_section: Optional[SDocSection] = lhs_section self.rhs_section: Optional[SDocSection] = rhs_section @@ -108,7 +110,7 @@ def __init__( def is_paired_change(self) -> bool: return self.lhs_section is not None and self.rhs_section is not None - def get_colored_title_diff(self, side: str) -> Optional[str]: + def get_colored_title_diff(self, side: str) -> Optional[Markup]: assert self.title_modified if side == "left": return self.lhs_colored_title_diff @@ -125,8 +127,8 @@ def __init__( field_name: str, lhs_field: Optional[SDocNodeField], rhs_field: Optional[SDocNodeField], - left_diff: Optional[str], - right_diff: Optional[str], + left_diff: Optional[Markup], + right_diff: Optional[Markup], ): assert isinstance(field_name, str) and len(field_name) > 0 assert lhs_field is not None or rhs_field is not None @@ -137,10 +139,10 @@ def __init__( self.field_name: str = field_name self.lhs_field: Optional[SDocNodeField] = lhs_field self.rhs_field: Optional[SDocNodeField] = rhs_field - self.left_diff: Optional[str] = left_diff - self.right_diff: Optional[str] = right_diff + self.left_diff: Optional[Markup] = left_diff + self.right_diff: Optional[Markup] = right_diff - def get_colored_free_text_diff(self, side: str) -> Optional[str]: + def get_colored_free_text_diff(self, side: str) -> Optional[Markup]: if side == "left": return self.left_diff if side == "right": diff --git a/strictdoc/git/project_diff_analyzer.py b/strictdoc/git/project_diff_analyzer.py index 342cc228c..8cb1685e2 100644 --- a/strictdoc/git/project_diff_analyzer.py +++ b/strictdoc/git/project_diff_analyzer.py @@ -4,6 +4,8 @@ from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Set, Tuple, Union +from markupsafe import Markup + from strictdoc.backend.sdoc.models.document import SDocDocument from strictdoc.backend.sdoc.models.node import ( SDocNode, @@ -24,7 +26,7 @@ SectionChange, ) from strictdoc.helpers.cast import assert_cast, assert_optional_cast -from strictdoc.helpers.diff import get_colored_diff_string, similar +from strictdoc.helpers.diff import get_colored_html_diff_string, similar from strictdoc.helpers.mid import MID @@ -347,8 +349,8 @@ def _iterate_one_index( uid_modified: bool = False title_modified: bool = False - lhs_colored_title_diff: Optional[str] = None - rhs_colored_title_diff: Optional[str] = None + lhs_colored_title_diff: Optional[Markup] = None + rhs_colored_title_diff: Optional[Markup] = None # If there is another section and the UIDs are not the # same, consider the UID modified. @@ -366,12 +368,12 @@ def _iterate_one_index( if other_document_or_none is not None: if document.title != other_document_or_none.title: title_modified = True - lhs_colored_title_diff = get_colored_diff_string( + lhs_colored_title_diff = get_colored_html_diff_string( document.title, other_document_or_none.title, "left", ) - rhs_colored_title_diff = get_colored_diff_string( + rhs_colored_title_diff = get_colored_html_diff_string( document.title, other_document_or_none.title, "right", @@ -461,8 +463,8 @@ def _iterate_one_index( uid_modified: bool = False title_modified: bool = False - lhs_colored_title_diff: Optional[str] = None - rhs_colored_title_diff: Optional[str] = None + lhs_colored_title_diff: Optional[Markup] = None + rhs_colored_title_diff: Optional[Markup] = None # If there is another section and the UIDs are not the # same, consider the UID modified. @@ -481,14 +483,14 @@ def _iterate_one_index( if node.title != other_section_or_none.title: title_modified = True lhs_colored_title_diff = ( - get_colored_diff_string( + get_colored_html_diff_string( node.title, other_section_or_none.title, "left", ) ) rhs_colored_title_diff = ( - get_colored_diff_string( + get_colored_html_diff_string( node.title, other_section_or_none.title, "right", @@ -769,10 +771,10 @@ def create_field_change( other_requirement_field_value = ( other_requirement_field.get_text_value() ) - left_diff = get_colored_diff_string( + left_diff = get_colored_html_diff_string( requirement_field_value, other_requirement_field_value, "left" ) - right_diff = get_colored_diff_string( + right_diff = get_colored_html_diff_string( requirement_field_value, other_requirement_field_value, "right" ) @@ -857,12 +859,12 @@ def create_comment_field_changes( comment_other_value = changed_other_field_.get_text_value() assert comment_other_value is not None - left_diff = get_colored_diff_string( + left_diff = get_colored_html_diff_string( comment_value, comment_other_value, "left", ) - right_diff = get_colored_diff_string( + right_diff = get_colored_html_diff_string( comment_value, comment_other_value, "right", diff --git a/strictdoc/helpers/diff.py b/strictdoc/helpers/diff.py index f0b0822f8..f66bde583 100644 --- a/strictdoc/helpers/diff.py +++ b/strictdoc/helpers/diff.py @@ -2,17 +2,19 @@ import difflib from difflib import SequenceMatcher +from markupsafe import Markup, escape + def similar(a, b): return SequenceMatcher(None, a, b).ratio() -red = lambda text: f'{text}' -green = lambda text: f'{text}' -white = lambda text: f"{text}" +red = lambda text: f'{escape(text)}' +green = lambda text: f'{escape(text)}' +white = lambda text: f"{escape(text)}" -def get_colored_diff_string(old: str, new: str, flag: str): +def get_colored_html_diff_string(old: str, new: str, flag: str) -> Markup: assert old is not None assert new is not None assert flag in ("left", "right") @@ -33,4 +35,4 @@ def get_colored_diff_string(old: str, new: str, flag: str): result += red(old[code[1] : code[2]]) else: result += green(new[code[3] : code[4]]) - return result + return Markup(result) diff --git a/tests/integration/features/diff/120__escaping__basic/lhs/input.sdoc b/tests/integration/features/diff/120__escaping__basic/lhs/input.sdoc new file mode 100644 index 000000000..93b84b393 --- /dev/null +++ b/tests/integration/features/diff/120__escaping__basic/lhs/input.sdoc @@ -0,0 +1,22 @@ +[DOCUMENT] +TITLE: Doc Title with special characters <> + +[SECTION] +TITLE: To be removed section with special characters <> + +[REQUIREMENT] +TITLE: To be removed title with special characters <> +STATEMENT: To be removed statement with special characters <> + +[/SECTION] + +[SECTION] +UID: SECT-1 +TITLE: To be changed section with special characters <> + +[REQUIREMENT] +UID: REQ-1 +TITLE: To be changed title with special characters <> +STATEMENT: To be changed statement with special characters <> + +[/SECTION] diff --git a/tests/integration/features/diff/120__escaping__basic/rhs/input.sdoc b/tests/integration/features/diff/120__escaping__basic/rhs/input.sdoc new file mode 100644 index 000000000..1fd4cd3e1 --- /dev/null +++ b/tests/integration/features/diff/120__escaping__basic/rhs/input.sdoc @@ -0,0 +1,22 @@ +[DOCUMENT] +TITLE: Doc Title with more special characters <>&"' + +[SECTION] +TITLE: Added section with more special characters <>&"' + +[REQUIREMENT] +TITLE: Added title with more special characters <>&"' +STATEMENT: Added statement with more special characters <>&"' + +[/SECTION] + +[SECTION] +UID: SECT-1 +TITLE: Changed section with more special characters <>&"' + +[REQUIREMENT] +UID: REQ-1 +TITLE: Changed title with more special characters <>&"' +STATEMENT: Changed statement with more special characters <>&"' + +[/SECTION] diff --git a/tests/integration/features/diff/120__escaping__basic/strictdoc.toml b/tests/integration/features/diff/120__escaping__basic/strictdoc.toml new file mode 100644 index 000000000..8da279a5a --- /dev/null +++ b/tests/integration/features/diff/120__escaping__basic/strictdoc.toml @@ -0,0 +1,6 @@ +[project] +title = "Test Project" + +features = [ + "DIFF", +] diff --git a/tests/integration/features/diff/120__escaping__basic/test.itest b/tests/integration/features/diff/120__escaping__basic/test.itest new file mode 100644 index 000000000..415c4608e --- /dev/null +++ b/tests/integration/features/diff/120__escaping__basic/test.itest @@ -0,0 +1,29 @@ +RUN: %strictdoc diff %S/lhs %S/rhs --output-dir Output + +RUN: cat %S/Output/diff.html | filecheck %s --dump-input=fail --check-prefix=CHECK-DIFF + +CHECK-DIFF:Doc Title with special characters <> +CHECK-DIFF:Doc Title with special characters <> +CHECK-DIFF:To be removed section with special characters <> +CHECK-DIFF:
To be removed section with special characters <>
+CHECK-DIFF:To be removed title with special characters <> +CHECK-DIFF:To be removed title with special characters <> +CHECK-DIFF:To be removed statement with special characters <> +CHECK-DIFF:To be changed section with special characters <> +CHECK-DIFF:
To be changed section with special characters <>
+CHECK-DIFF:To be changed title with special characters <> +CHECK-DIFF:To be changed title with special characters <> +CHECK-DIFF:To be changed statement with special characters <> + +CHECK-DIFF:Doc Title with more special characters <>&"' +CHECK-DIFF:Doc Title with more special characters <>&"' +CHECK-DIFF:Added section with more special characters <>&"' +CHECK-DIFF:
Added section with more special characters <>&"'
+CHECK-DIFF:Added title with more special characters <>&"' +CHECK-DIFF:Added title with more special characters <>&"' +CHECK-DIFF:Added statement with more special characters <>&"' +CHECK-DIFF:Changed section with more special characters <>&"' +CHECK-DIFF:
Changed section with more special characters <>&"' +CHECK-DIFF:Changed title with more special characters <>&"' +CHECK-DIFF:Changed title with more special characters <>&"' +CHECK-DIFF:Changed statement with more special characters <>&"'