From abd54cadcdd5e038b7b814855f13f56872a04fe1 Mon Sep 17 00:00:00 2001 From: Xiang Gao Date: Wed, 18 Jan 2017 15:46:26 -0500 Subject: [PATCH] Use html.escape to escape <, > and & Add tests for escape --- nbconvert/filters/markdown_mistune.py | 14 ++++++------- nbconvert/filters/tests/test_markdown.py | 25 ++++++++++++++++++++---- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py index 2e346a48d..463583bf1 100644 --- a/nbconvert/filters/markdown_mistune.py +++ b/nbconvert/filters/markdown_mistune.py @@ -8,6 +8,7 @@ from __future__ import print_function import re +import html import mistune @@ -104,20 +105,19 @@ def header(self, text, level, raw=None): html = super(IPythonRenderer, self).header(text, level, raw=raw) return add_anchor(html) - def escape_lt(self,text): - return text.replace('<','<') + def escape_html(self,text): + return html.escape(text,quote=False) - # Pass math through unaltered - mathjax does the rendering in the browser def block_math(self, text): - return '$$%s$$' % self.escape_lt(text) + return '$$%s$$' % self.escape_html(text) def latex_environment(self, name, text): - name = self.escape_lt(name) - text = self.escape_lt(text) + name = self.escape_html(name) + text = self.escape_html(text) return r'\begin{%s}%s\end{%s}' % (name, text, name) def inline_math(self, text): - return '$%s$' % self.escape_lt(text) + return '$%s$' % self.escape_html(text) def markdown2html_mistune(source): """Convert a markdown string to HTML using mistune""" diff --git a/nbconvert/filters/tests/test_markdown.py b/nbconvert/filters/tests/test_markdown.py index bce62b967..3e5d08d55 100644 --- a/nbconvert/filters/tests/test_markdown.py +++ b/nbconvert/filters/tests/test_markdown.py @@ -5,6 +5,7 @@ # Distributed under the terms of the Modified BSD License. import re +import html from copy import copy from functools import partial @@ -118,8 +119,8 @@ def test_markdown2html_heading_anchors(self): ]: self._try_markdown(markdown2html, md, tokens) - def test_markdown2html_math(self): - # Mathematical expressions should be passed through unaltered + def test_markdown2html_math_noescape(self): + # Mathematical expressions not containing <, >, & should be passed through unaltered cases = [("\\begin{equation*}\n" "\\left( \\sum_{k=1}^n a_k b_k \\right)^2 \\leq \\left( \\sum_{k=1}^n a_k^2 \\right) \\left( \\sum_{k=1}^n b_k^2 \\right)\n" "\\end{equation*}"), @@ -131,7 +132,23 @@ def test_markdown2html_math(self): ] for case in cases: self.assertIn(case, markdown2html(case)) - + + def test_markdown2html_math_escape(self): + # all the "<", ">", "&" must be escaped correctly + cases = [ "$aa;a-b<0$", + "$$"] + for case in cases: + result = markdown2html(case) + math = re.search("\$.*\$",result).group(0) + # the resulting math part can not contain "<", ">" or + # "&" not followed by "lt;", "gt;", or "amp;". + self.assertNotIn("<", math) + self.assertNotIn(">", math) + self.assertNotRegex(math,"&(?![gt;|lt;|amp;])") + # the result should be able to be unescaped correctly + self.assertEquals(case,html.unescape(math)) + def test_markdown2html_math_mixed(self): """ensure markdown between inline and inline-block math""" case = """The entries of $C$ are given by the exact formula: @@ -171,7 +188,7 @@ def test_markdown2html_math_paragraph(self): ] for case in cases: - self.assertIn(case, markdown2html(case)) + self.assertIn(case, html.unescape(markdown2html(case))) @dec.onlyif_cmds_exist('pandoc') def test_markdown2rst(self):