]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/doc/htmlsplit.py
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / seastar / doc / htmlsplit.py
CommitLineData
11fdf7f2
TL
1#!/usr/bin/env python3
2
3# This script takes the single-page HTML output from pandoc - tutorial.html -
4# and splits it into many pages in split/: one page index.html for the table
5# of contents, and an additional page for each chapter. We make sure that
6# links from the TOC to each chapter, and also links across chapters,
7# continue to work correctly, and also had links from each chapter back to
8# the TOC, as well as to the next and previous chapters.
9
10
11# Copyright (C) 2018 ScyllaDB.
12#
13# This file is open source software, licensed to you under the terms
14# of the Apache License, Version 2.0 (the "License"). See the NOTICE file
15# distributed with this work for additional information regarding copyright
16# ownership. You may not use this file except in compliance with the License.
17#
18# You may obtain a copy of the License at
19#
20# http://www.apache.org/licenses/LICENSE-2.0
21#
22# Unless required by applicable law or agreed to in writing,
23# software distributed under the License is distributed on an
24# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
25# KIND, either express or implied. See the License for the
26# specific language governing permissions and limitations
27# under the License.
28
f67539c2
TL
29from xml.etree import ElementTree
30import argparse
31import copy
32import os
33
34# chapter number to chapter title
11fdf7f2 35titles = {}
f67539c2 36# section id => chapter number
11fdf7f2 37sections = {}
f67539c2
TL
38
39
40def add_elem_to_body(tree, e):
41 body = next(tree.iterfind('./body'))
42 body.append(e)
43
44
45def add_nav_to_body(tree, chap_num):
46 body = next(tree.iterfind('./body'))
47
48 nav = ElementTree.SubElement(body, 'div')
49 e = ElementTree.SubElement(nav, 'a',
50 href='index.html')
51 e.text = 'Back to table of contents'
52 e.tail = '.'
53 prev_index = chap_num - 1
54 if prev_index in titles:
55 e.tail += " Previous: "
56 prev_title = titles[prev_index]
57 e = ElementTree.SubElement(nav, 'a',
58 href=f'{prev_index}.html')
59 e.text = f'{prev_index} {prev_title}'
60 e.tail = '.'
61 next_index = chap_num + 1
62 if next_index in titles:
63 e.tail += " Next: "
64 next_title = titles[next_index]
65 e = ElementTree.SubElement(nav, 'a',
66 href=f'{next_index}.html')
67 e.text = f'{next_index} {next_title}'
68 e.tail = '.'
69
70
71def handle_toc(toc):
72 for chap in toc.iterfind('./ul/li'):
73 chap_href_elem = next(chap.iterfind('./a[@href]'))
74 chap_num_elem = next(chap_href_elem.iterfind(
75 './span[@class="toc-section-number"]'))
76 # For chapters, remember the mapping from number to name in the
77 # map "titles", so we can use them later in links to next and
78 # previous chapter
79 chap_num = int(chap_num_elem.text)
80 titles[chap_num] = chap_num_elem.tail.strip()
81
82 # For all sections, remember the mapping from name-with-dashes
83 # to the chapter number they are in in "sections". We need this
84 # to support links to other sections.
85 href = chap_href_elem.get('href')
86 sections[href] = chap_num
87 for section in chap.iterfind('.//ul/li/a[@href]'):
88 href = section.get('href')
11fdf7f2 89 # replace the link to '#section' with number N.M to chapterN#section
f67539c2
TL
90 if href.startswith('#'):
91 sections[href] = chap_num
92
93
94def fix_links(e):
95 for link in e.findall('.//a[@href]'):
96 href = link.get('href')
97 if href.startswith('#') and href in sections:
11fdf7f2
TL
98 # In a chapter we can have a link to a different subsection, which
99 # looks like <a href="#some-title">Some title</A>. We need to
100 # replace this to refer to the right file after the split.
f67539c2
TL
101 chap_num = sections[href]
102 link.set('href', f'{chap_num}.html{href}')
103
104
105def remove_ns_prefix(tree):
106 prefix = '{http://www.w3.org/1999/xhtml}'
107 for e in tree.iter():
108 if e.tag.startswith(prefix):
109 e.tag = e.tag[len(prefix):]
110
111
112def get_chap_num(element):
113 data_num = e.get('data-number')
114 if data_num:
115 return int(data_num)
116 data_num = e.findtext('./span[@class="header-section-number"]')
117 if data_num:
118 return int(data_num)
119 assert data_num, "section number not found"
120
121
122parser = argparse.ArgumentParser()
123parser.add_argument('--input')
124parser.add_argument('--output-dir')
125args = parser.parse_args()
126
127tree = ElementTree.parse(args.input)
128for e in tree.iter():
129 remove_ns_prefix(e)
130template = copy.deepcopy(tree.getroot())
131template_body = next(template.iterfind('./body'))
132template_body.clear()
133
134# iterate through the children elements in body
135# body element is composed of
136# - header
137# - toc
138# - h1,h2,p,...
139# h1 marks the beginning of a chapter
140
141chap_num = 0
142chap_tree = None
143for e in next(tree.iterfind('./body')):
144 if e.tag == 'header':
145 template_body.append(e)
146 elif e.get('id') == 'TOC':
147 handle_toc(e)
148 fix_links(e)
149 toc_tree = ElementTree.ElementTree(copy.deepcopy(template))
150 add_elem_to_body(toc_tree, e)
151 toc_tree.write(os.path.join(args.output_dir, 'index.html'),
152 method='html')
153 elif e.tag == 'h1':
154 assert titles
155 assert sections
156 if chap_num > 0:
157 add_nav_to_body(chap_tree, chap_num)
158 chap_tree.write(os.path.join(args.output_dir, f'{chap_num}.html'),
159 method='html')
160 chap_num = get_chap_num(e)
161 chap_tree = ElementTree.ElementTree(copy.deepcopy(template))
162 add_nav_to_body(chap_tree, chap_num)
163 add_elem_to_body(chap_tree, e)
164 else:
165 assert chap_tree is not None
166 fix_links(e)
167 add_elem_to_body(chap_tree, e)
168
169add_nav_to_body(chap_tree, chap_num)
170chap_tree.write(os.path.join(args.output_dir, f'{chap_num}.html'),
171 method='html')