-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmake_html.py
executable file
·123 lines (108 loc) · 3.56 KB
/
make_html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
#-*- coding:utf-8 -*-
############################
# Usage:
# File Name: epub.py
# Author: annhe
# Mail: i@annhe.net
# Created Time: 2017-10-25 17:14:18
############################
from bs4 import BeautifulSoup
import re
import sys
import os
def _init(version):
path = "view/" + version + "/index.html"
html_doc=open(path, "rb")
soup = BeautifulSoup(html_doc, "html.parser")
toc = soup.find('div', { 'class': 'toc' })
parts = toc.find_all('li', class_ = re.compile('part|index|preface'))
return soup,parts
def get_part_title(part):
title = ""
try:
title = part.find('h3').string
except:
title = part.find('h4').string
if not title:
title = part.find('h3').find('a').string
return title.strip()
def get_innerlink_map(version):
inner_link_map_long = {}
inner_link_map_short = {}
rootdir = 'view/' + version
listdir = os.walk(rootdir)
title_class = re.compile('sect1|part|chapter|title|appendindex|preface')
for root,dirs,files in listdir:
long_root = root.replace(rootdir, '..')
short_root = root.replace(rootdir + '/', '').replace(rootdir, '')
for name in files:
if name.endswith(".html"):
k = os.path.join(root,name)
kl = os.path.join(long_root, name)
ks = os.path.join(short_root, name)
try:
sp = BeautifulSoup(open(k, "rb"), 'html.parser')
sect_title_id = sp.find('h1', class_ = title_class).find('a')['id']
sect_title_id = '#' + sect_title_id
inner_link_map_long[kl] = sect_title_id
inner_link_map_short[ks] = sect_title_id
except:
pass
return inner_link_map_long, inner_link_map_short
def copyrightPage(version):
sp = BeautifulSoup(open('view/' + version + '/legalnotice.html', "rb"), 'html.parser')
html = sp.find('div', class_ = 'legalnotice').decode_contents(formatter="html")
html = "<h1>Legal Notice</h1>" + html
return html
def genHtml(version):
path = 'view/' + version + '/'
soup,parts = _init(version)
book = soup.find("div", class_ = "book")
html = ""
inner_link_map_long, inner_link_map_short = get_innerlink_map(version)
for part in parts:
part_title = get_part_title(part)
html += '<h1>' + part_title + '</h1>'
chapters = part.find_all('li', class_ = "chapter")
if not chapters:
chapters = part.find_all('li', class_ = "sect1")
if not chapters:
chapters = []
chapters.append(part)
for chapter in chapters:
try:
chapter_title = chapter.find('h4').string.strip()
except:
chapter_title = ""
if chapter_title:
html += '<h2>' + chapter_title + '</h2>'
sects = chapter.find_all('a')
for sect in sects:
link = sect['href']
fullpath = path + link
sp = BeautifulSoup(open(fullpath, "rb"), "html.parser")
content = sp.find('div', class_ = re.compile('sect1|appendix|index|wrap')).decode_contents(formatter="html")
#sect_title = replace("h1", "h3")
html += content.replace('h1', 'h3')
#new_tag = soup.new_tag('div')
#lfs_replace_str = "LFS_CONTENT_REPLACE_FROM_LFS_EPUB"
#new_tag.string = lfs_replace_str
#soup.find('div', class_ = "toc").append(new_tag)
#ret = soup.decode_contents(formatter="html").replace(lfs_replace_str, html)
ret = html.replace('../images/', 'images/')
for k,v in inner_link_map_long.items():
ret = ret.replace(k, v)
for k,v in inner_link_map_short.items():
ret = ret.replace(k, v)
ret = re.sub('href="#(.*?)#(.*?)"', 'href="#\\2"', ret)
return ret
if __name__ == '__main__':
if(len(sys.argv) < 2):
version = "8.1-systemd"
else:
version = sys.argv[1];
legalnotice = copyrightPage(version)
html = genHtml(version)
print(legalnotice + html)
#print(get_innerlink_map(version))