Skip to content

Commit

Permalink
Mark diff colors safe and escape raw diff input
Browse files Browse the repository at this point in the history
For HTML escaping of the diff view we have to consider two things.

1. Diff input comes from two git checkouts of the project at specific
   revisions. The revisions sdocs are considered untrusted user input,
   could contain special characters and must be escaped.
2. After analyzing with difflib we add a bit HTML to colorize the
   output. This specific HTML fragments are trusted and safe.

Relates to #1920.
  • Loading branch information
haxtibal committed Nov 10, 2024
1 parent 9531d7e commit a7d6ee4
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from datetime import datetime
from typing import Optional

from markupsafe import Markup

from strictdoc import __version__
from strictdoc.core.project_config import ProjectConfig
from strictdoc.export.html.html_templates import JinjaEnvironment
Expand Down Expand Up @@ -58,31 +60,30 @@ def __init__(
self.strictdoc_version = __version__
self.error_message: Optional[str] = None

def render_screen(self, jinja_environment: JinjaEnvironment):
template = jinja_environment.environment.overlay(
autoescape=False
).get_template("screens/git/index.jinja")
return template.render(view_object=self)
def render_screen(self, jinja_environment: JinjaEnvironment) -> Markup:
return jinja_environment.render_template_as_markup(
"screens/git/index.jinja", view_object=self
)

def render_url(self, url: str):
return self.link_renderer.render_url(url)
def render_url(self, url: str) -> Markup:
return Markup(self.link_renderer.render_url(url))

def render_node_link(self, incoming_link, document, document_type):
def render_node_link(self, incoming_link, document, document_type) -> str:
return self.link_renderer.render_node_link(
incoming_link, document, document_type
)

def render_static_url(self, url: str):
return self.link_renderer.render_static_url(url)
def render_static_url(self, url: str) -> Markup:
return Markup(self.link_renderer.render_static_url(url))

def render_static_url_with_prefix(self, url: str) -> str:
return self.link_renderer.render_static_url_with_prefix(url)

def render_local_anchor(self, node):
def render_local_anchor(self, node) -> str:
return self.link_renderer.render_local_anchor(node)

def is_empty_tree(self) -> bool:
return self.document_tree_iterator.is_empty_tree()

def date_today(self):
def date_today(self) -> str:
return datetime.today().strftime("%Y-%m-%d")
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from dataclasses import dataclass
from datetime import datetime

from markupsafe import Markup

from strictdoc import __version__
from strictdoc.core.project_config import ProjectConfig
from strictdoc.export.html.html_templates import JinjaEnvironment
Expand Down Expand Up @@ -39,27 +41,27 @@ def __init__(
self.is_running_on_server: bool = project_config.is_running_on_server
self.strictdoc_version = __version__

def render_screen(self, jinja_environment: JinjaEnvironment):
def render_screen(self, jinja_environment: JinjaEnvironment) -> Markup:
return jinja_environment.render_template_as_markup(
"screens/git/index.jinja", view_object=self
)

def render_url(self, url: str):
return self.link_renderer.render_url(url)
def render_url(self, url: str) -> Markup:
return Markup(self.link_renderer.render_url(url))

def render_node_link(self, incoming_link, document, document_type):
def render_node_link(self, incoming_link, document, document_type) -> str:
return self.link_renderer.render_node_link(
incoming_link, document, document_type
)

def render_static_url(self, url: str):
return self.link_renderer.render_static_url(url)
def render_static_url(self, url: str) -> Markup:
return Markup(self.link_renderer.render_static_url(url))

def render_static_url_with_prefix(self, url: str) -> str:
return self.link_renderer.render_static_url_with_prefix(url)

def render_local_anchor(self, node):
def render_local_anchor(self, node) -> str:
return self.link_renderer.render_local_anchor(node)

def date_today(self):
def date_today(self) -> str:
return datetime.today().strftime("%Y-%m-%d")
32 changes: 17 additions & 15 deletions strictdoc/git/change.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from enum import Enum
from typing import Dict, List, Optional, Union

from markupsafe import Markup

from strictdoc.backend.sdoc.models.document import SDocDocument
from strictdoc.backend.sdoc.models.node import (
SDocNode,
Expand Down Expand Up @@ -36,24 +38,24 @@ def __init__(
rhs_document: Optional[SDocDocument],
uid_modified: bool,
title_modified: bool,
lhs_colored_title_diff: Optional[str],
rhs_colored_title_diff: Optional[str],
lhs_colored_title_diff: Optional[Markup],
rhs_colored_title_diff: Optional[Markup],
):
assert lhs_document is not None or rhs_document is not None
if matched_uid is not None:
assert len(matched_uid) > 0
self.matched_uid: Optional[str] = matched_uid
self.uid_modified: bool = uid_modified
self.title_modified: bool = title_modified
self.lhs_colored_title_diff: Optional[str] = lhs_colored_title_diff
self.rhs_colored_title_diff: Optional[str] = rhs_colored_title_diff
self.lhs_colored_title_diff: Optional[Markup] = lhs_colored_title_diff
self.rhs_colored_title_diff: Optional[Markup] = rhs_colored_title_diff

self.lhs_document: Optional[SDocDocument] = lhs_document
self.rhs_document: Optional[SDocDocument] = rhs_document

self.change_type: ChangeType = ChangeType.DOCUMENT_MODIFIED

def get_colored_title_diff(self, side: str) -> Optional[str]:
def get_colored_title_diff(self, side: str) -> Optional[Markup]:
assert self.title_modified
if side == "left":
return self.lhs_colored_title_diff
Expand All @@ -74,8 +76,8 @@ def __init__(
rhs_section: Optional[SDocSection],
uid_modified: bool,
title_modified: bool,
lhs_colored_title_diff: Optional[str],
rhs_colored_title_diff: Optional[str],
lhs_colored_title_diff: Optional[Markup],
rhs_colored_title_diff: Optional[Markup],
):
assert lhs_section is not None or rhs_section is not None
if matched_uid is not None:
Expand All @@ -85,8 +87,8 @@ def __init__(
self.section_token: Optional[str] = section_token
self.uid_modified: bool = uid_modified
self.title_modified: bool = title_modified
self.lhs_colored_title_diff: Optional[str] = lhs_colored_title_diff
self.rhs_colored_title_diff: Optional[str] = rhs_colored_title_diff
self.lhs_colored_title_diff: Optional[Markup] = lhs_colored_title_diff
self.rhs_colored_title_diff: Optional[Markup] = rhs_colored_title_diff

self.lhs_section: Optional[SDocSection] = lhs_section
self.rhs_section: Optional[SDocSection] = rhs_section
Expand All @@ -108,7 +110,7 @@ def __init__(
def is_paired_change(self) -> bool:
return self.lhs_section is not None and self.rhs_section is not None

def get_colored_title_diff(self, side: str) -> Optional[str]:
def get_colored_title_diff(self, side: str) -> Optional[Markup]:
assert self.title_modified
if side == "left":
return self.lhs_colored_title_diff
Expand All @@ -125,8 +127,8 @@ def __init__(
field_name: str,
lhs_field: Optional[SDocNodeField],
rhs_field: Optional[SDocNodeField],
left_diff: Optional[str],
right_diff: Optional[str],
left_diff: Optional[Markup],
right_diff: Optional[Markup],
):
assert isinstance(field_name, str) and len(field_name) > 0
assert lhs_field is not None or rhs_field is not None
Expand All @@ -137,10 +139,10 @@ def __init__(
self.field_name: str = field_name
self.lhs_field: Optional[SDocNodeField] = lhs_field
self.rhs_field: Optional[SDocNodeField] = rhs_field
self.left_diff: Optional[str] = left_diff
self.right_diff: Optional[str] = right_diff
self.left_diff: Optional[Markup] = left_diff
self.right_diff: Optional[Markup] = right_diff

def get_colored_free_text_diff(self, side: str) -> Optional[str]:
def get_colored_free_text_diff(self, side: str) -> Optional[Markup]:
if side == "left":
return self.left_diff
if side == "right":
Expand Down
28 changes: 15 additions & 13 deletions strictdoc/git/project_diff_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Set, Tuple, Union

from markupsafe import Markup

from strictdoc.backend.sdoc.models.document import SDocDocument
from strictdoc.backend.sdoc.models.node import (
SDocNode,
Expand All @@ -24,7 +26,7 @@
SectionChange,
)
from strictdoc.helpers.cast import assert_cast, assert_optional_cast
from strictdoc.helpers.diff import get_colored_diff_string, similar
from strictdoc.helpers.diff import get_colored_html_diff_string, similar
from strictdoc.helpers.mid import MID


Expand Down Expand Up @@ -347,8 +349,8 @@ def _iterate_one_index(

uid_modified: bool = False
title_modified: bool = False
lhs_colored_title_diff: Optional[str] = None
rhs_colored_title_diff: Optional[str] = None
lhs_colored_title_diff: Optional[Markup] = None
rhs_colored_title_diff: Optional[Markup] = None

# If there is another section and the UIDs are not the
# same, consider the UID modified.
Expand All @@ -366,12 +368,12 @@ def _iterate_one_index(
if other_document_or_none is not None:
if document.title != other_document_or_none.title:
title_modified = True
lhs_colored_title_diff = get_colored_diff_string(
lhs_colored_title_diff = get_colored_html_diff_string(
document.title,
other_document_or_none.title,
"left",
)
rhs_colored_title_diff = get_colored_diff_string(
rhs_colored_title_diff = get_colored_html_diff_string(
document.title,
other_document_or_none.title,
"right",
Expand Down Expand Up @@ -461,8 +463,8 @@ def _iterate_one_index(

uid_modified: bool = False
title_modified: bool = False
lhs_colored_title_diff: Optional[str] = None
rhs_colored_title_diff: Optional[str] = None
lhs_colored_title_diff: Optional[Markup] = None
rhs_colored_title_diff: Optional[Markup] = None

# If there is another section and the UIDs are not the
# same, consider the UID modified.
Expand All @@ -481,14 +483,14 @@ def _iterate_one_index(
if node.title != other_section_or_none.title:
title_modified = True
lhs_colored_title_diff = (
get_colored_diff_string(
get_colored_html_diff_string(
node.title,
other_section_or_none.title,
"left",
)
)
rhs_colored_title_diff = (
get_colored_diff_string(
get_colored_html_diff_string(
node.title,
other_section_or_none.title,
"right",
Expand Down Expand Up @@ -769,10 +771,10 @@ def create_field_change(
other_requirement_field_value = (
other_requirement_field.get_text_value()
)
left_diff = get_colored_diff_string(
left_diff = get_colored_html_diff_string(
requirement_field_value, other_requirement_field_value, "left"
)
right_diff = get_colored_diff_string(
right_diff = get_colored_html_diff_string(
requirement_field_value, other_requirement_field_value, "right"
)

Expand Down Expand Up @@ -857,12 +859,12 @@ def create_comment_field_changes(
comment_other_value = changed_other_field_.get_text_value()
assert comment_other_value is not None

left_diff = get_colored_diff_string(
left_diff = get_colored_html_diff_string(
comment_value,
comment_other_value,
"left",
)
right_diff = get_colored_diff_string(
right_diff = get_colored_html_diff_string(
comment_value,
comment_other_value,
"right",
Expand Down
12 changes: 7 additions & 5 deletions strictdoc/helpers/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@
import difflib
from difflib import SequenceMatcher

from markupsafe import Markup, escape


def similar(a, b):
return SequenceMatcher(None, a, b).ratio()


red = lambda text: f'<span class="lambda_red">{text}</span>'
green = lambda text: f'<span class="lambda_green">{text}</span>'
white = lambda text: f"<span>{text}</span>"
red = lambda text: f'<span class="lambda_red">{escape(text)}</span>'
green = lambda text: f'<span class="lambda_green">{escape(text)}</span>'
white = lambda text: f"<span>{escape(text)}</span>"


def get_colored_diff_string(old: str, new: str, flag: str):
def get_colored_html_diff_string(old: str, new: str, flag: str) -> Markup:
assert old is not None
assert new is not None
assert flag in ("left", "right")
Expand All @@ -33,4 +35,4 @@ def get_colored_diff_string(old: str, new: str, flag: str):
result += red(old[code[1] : code[2]])
else:
result += green(new[code[3] : code[4]])
return result
return Markup(result)
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[DOCUMENT]
TITLE: Doc Title with special characters <>

[SECTION]
TITLE: To be removed section with special characters <>

[REQUIREMENT]
TITLE: To be removed title with special characters <>
STATEMENT: To be removed statement with special characters <>

[/SECTION]

[SECTION]
UID: SECT-1
TITLE: To be changed section with special characters <>

[REQUIREMENT]
UID: REQ-1
TITLE: To be changed title with special characters <>
STATEMENT: To be changed statement with special characters <>

[/SECTION]
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[DOCUMENT]
TITLE: Doc Title with more special characters <>&"'

[SECTION]
TITLE: Added section with more special characters <>&"'

[REQUIREMENT]
TITLE: Added title with more special characters <>&"'
STATEMENT: Added statement with more special characters <>&"'

[/SECTION]

[SECTION]
UID: SECT-1
TITLE: Changed section with more special characters <>&"'

[REQUIREMENT]
UID: REQ-1
TITLE: Changed title with more special characters <>&"'
STATEMENT: Changed statement with more special characters <>&"'

[/SECTION]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[project]
title = "Test Project"

features = [
"DIFF",
]
Loading

0 comments on commit a7d6ee4

Please sign in to comment.