-
Notifications
You must be signed in to change notification settings - Fork 1
/
GenerateWP.py
62 lines (37 loc) · 1.57 KB
/
GenerateWP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import re
import os
import argparse
def main():
# create an argument parser to take commandline arguments (e.g. target filename)
arg_parser = argparse.ArgumentParser(description="html file to have chapter headings extracted")
arg_parser.add_argument("filename")
args = arg_parser.parse_args()
chapterHeading = re.compile("(<h1 id=\")(.*)(\">)(.*)(</h1>)")
# e.g. load the file passed in from the command line
targetFile = args.filename
# strip the file extension from the target file to create target folder
outputFileName = targetFile[:-5]+"_rr.html"
revisedOutputFile = targetFile[:-5]+"_web.html"
sourceStream = open(targetFile, "rU")
sourceText = sourceStream.readlines()
myOutput = open(outputFileName, "w")
revisedOutput = open(revisedOutputFile, "w")
lastlineHolder = ""
for line in sourceText:
# if we're at a chapter break, write the appropriate link to the right rail file
if chapterHeading.match(line):
headline_match = chapterHeading.match(line)
right_rail_line = '<a href="#'+headline_match.group(2)+'">'+headline_match.group(4)+'</a><br/>'
myOutput.write(right_rail_line)
# however! also write a special anchor tag to the _web content file, and
# remove the anchor tag from the original header
anchor_span = '<span id="'+headline_match.group(2)+'" class="anchor"></span><br/>'
new_header = '<h1>'+headline_match.group(4)+'</h1><br/>'
revisedOutput.write(anchor_span)
revisedOutput.write(new_header)
else:
revisedOutput.write(line)
lastlineHolder = line
sourceStream.close()
myOutput.close()
main()