-
Notifications
You must be signed in to change notification settings - Fork 11
/
Latex.py
105 lines (75 loc) · 3.1 KB
/
Latex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import binascii
import re
from pdf2image import convert_from_bytes
import subprocess
from typing import AnyStr, Tuple
from Cryptodome.Hash import SHA1
from Formatters import stripHTML
LATEX = re.compile(r"(?xsi)\[latex\](.+?)\[/latex\]|\[\$\](.+?)\[/\$\]|\[\$\$\](.+?)\[/\$\$\]")
LATEX_NEWLINES = re.compile(r"(?xi)<br(/)?>|<div>")
class ExtractedLatex(object):
def __init__(self, fname: str, latex: str):
self.fname = fname
self.latex = latex
def __str__(self):
return "ExtractedLatex {\n\tfname:" + self.fname + ", \n\tlatex:" + self.latex + "\n}"
def __repr__(self):
return "ExtractedLatex {\n\tfname:" + self.fname + ", \n\tlatex:" + self.latex + "\n}"
def _string_checksum(string: AnyStr) -> bytes:
h = SHA1.new()
h.update(string.encode())
return h.hexdigest()[:20].encode()
def contains_latex(text: AnyStr) -> bool:
return LATEX.match(text) is not None
def fname_for_latex(latex: str, is_svg: bool) -> str:
ext = "svg" if is_svg else "png"
csum = binascii.hexlify(_string_checksum(latex)).decode()
return "latex-{}.{}".format(csum, ext)
def image_link_for_fname(fname: str) -> str:
return "<img class=latex src=\"{}\">".format(fname)
def strip_html_for_latex(html: str) -> str:
out = html
o = LATEX_NEWLINES.sub("\n", html)
if o is not None:
out = o
o = stripHTML(out)
if o is not None:
out = o
return out
def export_latex(latex_src: ExtractedLatex, latexPre: str, latexPost: str) -> None:
filename = latex_src.fname.split(".")[0] + '.tex'
template = r'''\documentclass[preview]{{standalone}}\begin{{document}}{}\end{{document}}'''
with open(filename, 'wb') as f:
f.write(bytes(template.format(str(latex_src.latex.replace("\n", " \\\\ "))), 'UTF-8'))
subprocess.call('pdflatex ' + filename, shell=True, )
images = convert_from_bytes(open(latex_src.fname.split(".")[0] + ".pdf", 'rb').read())
images[0].save(latex_src.fname.split(".")[0] + ".png")
def extract_latex(text: str, svg: bool) -> [AnyStr, [ExtractedLatex]]:
extracted = []
def replace(match: re.Match) -> str:
latex = None
m1, m2, m3 = match.group(1), match.group(2), match.group(3)
if m1 is not None:
latex = m1
elif m2 is not None:
latex = "${}$".format(m2)
elif m3 is not None:
latex = r"\begin{{displaymath}}{}\end{{displaymath}}".format(m3)
latex_text = strip_html_for_latex(latex);
fname = fname_for_latex(latex_text, svg);
img_link = image_link_for_fname(fname);
extracted.append(ExtractedLatex
(
fname,
latex=latex_text
)
)
return img_link
return LATEX.sub(replace, text), extracted
if __name__ == '__main__':
latexPre = "\\documentclass[12pt]{article}\n\\special{papersize=3in,5in}\n\\usepackage{amssymb,amsmath}\n\\pagestyle{empty}\n\\setlength{\\parindent}{0in}\n\\begin{document}\n"
latexPost = "\\end{document}"
export_latex(extract_latex("a[latex]one<br>and<div>two[/latex]b", False)[1][0], latexPre,
latexPost) # , end="\n\n")
export_latex(extract_latex("[$]<b>hello</b> world[/$]", True)[1][0], latexPre, latexPost) # , end="\n\n")
export_latex(extract_latex("[$$]math & stuff[/$$]", False)[1][0], latexPre, latexPost) # ,end="\n\n")