]>
git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/doc/htmlsplit.py
3 # This script takes the single-page HTML output from pandoc - tutorial.html -
4 # and splits it into many pages in split/: one page index.html for the table
5 # of contents, and an additional page for each chapter. We make sure that
6 # links from the TOC to each chapter, and also links across chapters,
7 # continue to work correctly, and also had links from each chapter back to
8 # the TOC, as well as to the next and previous chapters.
11 # Copyright (C) 2018 ScyllaDB.
13 # This file is open source software, licensed to you under the terms
14 # of the Apache License, Version 2.0 (the "License"). See the NOTICE file
15 # distributed with this work for additional information regarding copyright
16 # ownership. You may not use this file except in compliance with the License.
18 # You may obtain a copy of the License at
20 # http://www.apache.org/licenses/LICENSE-2.0
22 # Unless required by applicable law or agreed to in writing,
23 # software distributed under the License is distributed on an
24 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
25 # KIND, either express or implied. See the License for the
26 # specific language governing permissions and limitations
29 from xml
.etree
import ElementTree
34 # chapter number to chapter title
36 # section id => chapter number
40 def add_elem_to_body(tree
, e
):
41 body
= next(tree
.iterfind('./body'))
45 def add_nav_to_body(tree
, chap_num
):
46 body
= next(tree
.iterfind('./body'))
48 nav
= ElementTree
.SubElement(body
, 'div')
49 e
= ElementTree
.SubElement(nav
, 'a',
51 e
.text
= 'Back to table of contents'
53 prev_index
= chap_num
- 1
54 if prev_index
in titles
:
55 e
.tail
+= " Previous: "
56 prev_title
= titles
[prev_index
]
57 e
= ElementTree
.SubElement(nav
, 'a',
58 href
=f
'{prev_index}.html')
59 e
.text
= f
'{prev_index} {prev_title}'
61 next_index
= chap_num
+ 1
62 if next_index
in titles
:
64 next_title
= titles
[next_index
]
65 e
= ElementTree
.SubElement(nav
, 'a',
66 href
=f
'{next_index}.html')
67 e
.text
= f
'{next_index} {next_title}'
72 for chap
in toc
.iterfind('./ul/li'):
73 chap_href_elem
= next(chap
.iterfind('./a[@href]'))
74 chap_num_elem
= next(chap_href_elem
.iterfind(
75 './span[@class="toc-section-number"]'))
76 # For chapters, remember the mapping from number to name in the
77 # map "titles", so we can use them later in links to next and
79 chap_num
= int(chap_num_elem
.text
)
80 titles
[chap_num
] = chap_num_elem
.tail
.strip()
82 # For all sections, remember the mapping from name-with-dashes
83 # to the chapter number they are in in "sections". We need this
84 # to support links to other sections.
85 href
= chap_href_elem
.get('href')
86 sections
[href
] = chap_num
87 for section
in chap
.iterfind('.//ul/li/a[@href]'):
88 href
= section
.get('href')
89 # replace the link to '#section' with number N.M to chapterN#section
90 if href
.startswith('#'):
91 sections
[href
] = chap_num
95 for link
in e
.findall('.//a[@href]'):
96 href
= link
.get('href')
97 if href
.startswith('#') and href
in sections
:
98 # In a chapter we can have a link to a different subsection, which
99 # looks like <a href="#some-title">Some title</A>. We need to
100 # replace this to refer to the right file after the split.
101 chap_num
= sections
[href
]
102 link
.set('href', f
'{chap_num}.html{href}')
105 def remove_ns_prefix(tree
):
106 prefix
= '{http://www.w3.org/1999/xhtml}'
107 for e
in tree
.iter():
108 if e
.tag
.startswith(prefix
):
109 e
.tag
= e
.tag
[len(prefix
):]
112 def get_chap_num(element
):
113 data_num
= e
.get('data-number')
116 data_num
= e
.findtext('./span[@class="header-section-number"]')
119 assert data_num
, "section number not found"
122 parser
= argparse
.ArgumentParser()
123 parser
.add_argument('--input')
124 parser
.add_argument('--output-dir')
125 args
= parser
.parse_args()
127 tree
= ElementTree
.parse(args
.input)
128 for e
in tree
.iter():
130 template
= copy
.deepcopy(tree
.getroot())
131 template_body
= next(template
.iterfind('./body'))
132 template_body
.clear()
134 # iterate through the children elements in body
135 # body element is composed of
139 # h1 marks the beginning of a chapter
143 for e
in next(tree
.iterfind('./body')):
144 if e
.tag
== 'header':
145 template_body
.append(e
)
146 elif e
.get('id') == 'TOC':
149 toc_tree
= ElementTree
.ElementTree(copy
.deepcopy(template
))
150 add_elem_to_body(toc_tree
, e
)
151 toc_tree
.write(os
.path
.join(args
.output_dir
, 'index.html'),
157 add_nav_to_body(chap_tree
, chap_num
)
158 chap_tree
.write(os
.path
.join(args
.output_dir
, f
'{chap_num}.html'),
160 chap_num
= get_chap_num(e
)
161 chap_tree
= ElementTree
.ElementTree(copy
.deepcopy(template
))
162 add_nav_to_body(chap_tree
, chap_num
)
163 add_elem_to_body(chap_tree
, e
)
165 assert chap_tree
is not None
167 add_elem_to_body(chap_tree
, e
)
169 add_nav_to_body(chap_tree
, chap_num
)
170 chap_tree
.write(os
.path
.join(args
.output_dir
, f
'{chap_num}.html'),