ceph/src/seastar/doc/htmlsplit.py

   1 #!/usr/bin/env python3
   2
   3 # This script takes the single-page HTML output from pandoc - tutorial.html -
   4 # and splits it into many pages in split/: one page index.html for the table
   5 # of contents, and an additional page for each chapter. We make sure that
   6 # links from the TOC to each chapter, and also links across chapters,
   7 # continue to work correctly, and also had links from each chapter back to
   8 # the TOC, as well as to the next and previous chapters.
   9
  10
  11 # Copyright (C) 2018 ScyllaDB.
  12 #
  13 # This file is open source software, licensed to you under the terms
  14 # of the Apache License, Version 2.0 (the "License").  See the NOTICE file
  15 # distributed with this work for additional information regarding copyright
  16 # ownership.  You may not use this file except in compliance with the License.
  17 #
  18 # You may obtain a copy of the License at
  19 #
  20 #   http://www.apache.org/licenses/LICENSE-2.0
  21 #
  22 # Unless required by applicable law or agreed to in writing,
  23 # software distributed under the License is distributed on an
  24 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  25 # KIND, either express or implied.  See the License for the
  26 # specific language governing permissions and limitations
  27 # under the License.
  28
  29 import re
  30 titles = {}
  31 sections = {}
  32 def links(out, chapter):
  33     if chapter == 0:
  34         return
  35     out.write('<A HREF="index.html">Back to table of contents</A>. ')
  36     try:
  37         out.write('Previous: <A HREF="' + str(chapter-1) +'.html">' + str(chapter-1) + '. ' + titles[chapter-1] + '</A>. ')
  38     except:
  39         pass
  40     try:
  41         out.write('Next: <A HREF="' + str(chapter+1) +'.html">' + str(chapter+1) + '. ' + titles[chapter+1] + '</A>. ')
  42     except:
  43         pass
  44 def flush(chapter, header, chunk):
  45     fn = 'index.html' if chapter == 0 else str(chapter) + '.html'
  46     with open('split/' + fn, 'w') as out:
  47         out.write(header)
  48         links(out, chapter)
  49         out.write(chunk)
  50         links(out, chapter)
  51         out.write('</body></html>')
  52 with open("tutorial.html") as f:
  53     chunk = ""
  54     # Chapter currently being read. Set to 0 while reading the TOC, or
  55     # numbers > 0 while reading a chapter
  56     chapter = None
  57     for line in f:
  58         if line == '<div id="TOC">\n' or line =='<nav id="TOC">\n':
  59             header = chunk
  60             chapter = 0
  61             chunk = ""
  62         elif line.startswith('<h1 id="'):
  63             flush(chapter, header, chunk)
  64             chunk = ""
  65             chapter += 1
  66         elif chapter == 0 and line.startswith('<li><a href="#'):
  67             # For all sections, remember the mapping from name-with-dashes
  68             # to the chapter number they are in in "sections". We need this
  69             # to support links to other sections.
  70             match = re.search('href="#([^"]*)".*>([0-9]+)[.<]', line)
  71             if match:
  72                 sections[match.group(1)] = match.group(2)
  73             # replace the link to '#section' with number N.M to chapterN#section
  74             match = re.match('^(.*href=")(#.*>)([0-9]+)([.<].*)$', line)
  75             line = match.group(1) + match.group(3) + '.html' + match.group(2) + match.group(3) + match.group(4) + '\n'
  76             # For chapters, remember the mapping from number to name in the
  77             # map "titles", so we can use them later in links to next and
  78             # previous chapter
  79             match = re.search('>([0-9]+)</span> (.*)</a>', line)
  80             if match:
  81                 titles[int(match.group(1))] = match.group(2)
  82         elif chapter != 0:
  83             # In a chapter we can have a link to a different subsection, which
  84             # looks like <a href="#some-title">Some title</A>. We need to
  85             # replace this to refer to the right file after the split.
  86             line = re.sub('<a href="#([^"]*)">([^<]*)</a>', lambda m: '<a href="' + sections[m.group(1)] + '.html#' + m.group(1) + '">' + m.group(2) + '</a>', line)
  87         chunk += line
  88     flush(chapter, header, chunk)