forked from phoible/phoible.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convertMdToHTML.py
123 lines (111 loc) · 4.41 KB
/
convertMdToHTML.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
from os import path
import bs4
import re
# Edit the following path
RSTUDIO_PANDOC = '/Applications/RStudio.app/Contents/MacOS/pandoc'
def fix_FAQ(file_path, output_path):
div_content = None
with open(file_path) as f:
soup = bs4.BeautifulSoup(f.read(), 'html.parser')
# beautify tables
tables = soup.find_all('table')
ths = soup.find_all('th')
for table in tables:
table['cellpadding'] = '0'
table['cellspacing'] = '0'
table['border'] = '0'
table['class'] = 'table table-bordered order-column compact stripe dataTable no-footer table-nonfluid'
table['role'] = 'grid'
for th in ths:
th['role'] = 'row'
ps = soup.find_all('p')
# fix blockquotes
for p in ps:
if '>' in str(p):
p_temp = str(p).split('>')
p.clear()
p.string = p_temp[0].replace('<p>','')
blockquote = soup.new_tag('blockquote')
blockquote.append(bs4.BeautifulSoup(p_temp[1][0: len(p_temp) - 6], 'html.parser'))
p.append(blockquote)
# fix <em> Spacing
ems = soup.find_all('em')
for em in ems:
temp = str(em).replace('<em>', '').replace('</em>', '').strip()
em.clear()
em.append(bs4.BeautifulSoup(temp, 'html.parser'))
# fix references
references_div = soup.find('div', {'class':'references'})
if references_div is not None:
references_ps = references_div.find_all('p')
for p in references_ps:
# fix url
if 'Online: urlhttp' in str(p):
p_temp = p.get_text().split('Online: urlhttp')
p.string = p_temp[0]
a = soup.new_tag('a')
a.string = 'http' + p_temp[1]
a['href'] = a.string
p.append(a)
# fix spacing
if ' ,' in str(p):
p.string = re.sub(r' +,', ',', p.get_text())
if ' .' in str(p):
p.string = re.sub(r' +.', '.', p.get_text())
if p.get_text().endswith(':'):
p.string = p.get_text()[0 : len(p.get_text()) - 1] + '.'
# fix titles size
for level in list(range(5, 0, -1)):
tags = soup.find_all(f'h{level}')
for tag in tags:
tag.name = f'h{level + 1}'
div_content = soup.find('div', {'class': 'container-fluid main-container'})
with open(output_path, 'w') as file:
file.write(str(div_content))
# write scripts
with open('scripts.js') as f2:
file.write('\n')
file.write(f2.read())
def fix_conventions(file_path, output_path):
div_content = None
with open(file_path) as f:
soup = bs4.BeautifulSoup(f.read(), 'html.parser')
# beautify tables
tables = soup.find_all('table')
ths = soup.find_all('th')
for table in tables:
table['cellpadding'] = '0'
table['cellspacing'] = '0'
table['border'] = '0'
table['class'] = 'table table-bordered order-column compact stripe dataTable no-footer table-nonfluid'
table['role'] = 'grid'
for th in ths:
th['role'] = 'row'
tbodys = soup.find_all('tbody')
for tbody in tbodys:
counter = 1
for tr in tbody.find_all('tr'):
if counter % 2 == 0:
tr['class'] = 'even'
else:
tr['class'] = 'odd'
counter += 1
# fix titles size
for level in list(range(5, 0, -1)):
tags = soup.find_all(f'h{level}')
for tag in tags:
tag.name = f'h{level + 1}'
with open(output_path, 'w') as file:
file.write(str(soup))
def main():
print('Start kniting Rmd to HTML...')
print('File: _faq.Rmd')
os.system('Rscript --vanilla knitRmdToHTML.R _faq.Rmd ' + RSTUDIO_PANDOC)
fix_FAQ('_faq.html', 'faq_with_indexes.html')
print('File: conventions.rst')
os.system('rst2html5 conventions.rst conventions.html')
fix_conventions('conventions.html', 'conventions.html')
print('Converted! Output files: \033[94m faq_with_indexes.html conventions.html')
if __name__ == '__main__':
main()