+++ /dev/null
-""" TeXcheck.py -- rough syntax checking on Python style LaTeX documents.\r
-\r
- Written by Raymond D. Hettinger <python at rcn.com>\r
- Copyright (c) 2003 Python Software Foundation. All rights reserved.\r
-\r
-Designed to catch common markup errors including:\r
-* Unbalanced or mismatched parenthesis, brackets, and braces.\r
-* Unbalanced or mismatched \\begin and \\end blocks.\r
-* Misspelled or invalid LaTeX commands.\r
-* Use of forward slashes instead of backslashes for commands.\r
-* Table line size mismatches.\r
-\r
-Sample command line usage:\r
- python texcheck.py -k chapterheading -m lib/librandomtex *.tex\r
-\r
-Options:\r
- -m Munge parenthesis and brackets. [0,n) would normally mismatch.\r
- -k keyword: Keyword is a valid LaTeX command. Do not include the backslash.\r
- -d: Delimiter check only (useful for non-LaTeX files).\r
- -h: Help\r
- -s lineno: Start at lineno (useful for skipping complex sections).\r
- -v: Verbose. Trace the matching of //begin and //end blocks.\r
-"""\r
-\r
-import re\r
-import sys\r
-import getopt\r
-from itertools import izip, count, islice\r
-import glob\r
-\r
-cmdstr = r"""\r
- \section \module \declaremodule \modulesynopsis \moduleauthor\r
- \sectionauthor \versionadded \code \class \method \begin\r
- \optional \var \ref \end \subsection \lineiii \hline \label\r
- \indexii \textrm \ldots \keyword \stindex \index \item \note\r
- \withsubitem \ttindex \footnote \citetitle \samp \opindex\r
- \noindent \exception \strong \dfn \ctype \obindex \character\r
- \indexiii \function \bifuncindex \refmodule \refbimodindex\r
- \subsubsection \nodename \member \chapter \emph \ASCII \UNIX\r
- \regexp \program \production \token \productioncont \term\r
- \grammartoken \lineii \seemodule \file \EOF \documentclass\r
- \usepackage \title \input \maketitle \ifhtml \fi \url \Cpp\r
- \tableofcontents \kbd \programopt \envvar \refstmodindex\r
- \cfunction \constant \NULL \moreargs \cfuncline \cdata\r
- \textasciicircum \n \ABC \setindexsubitem \versionchanged\r
- \deprecated \seetext \newcommand \POSIX \pep \warning \rfc\r
- \verbatiminput \methodline \textgreater \seetitle \lineiv\r
- \funclineni \ulink \manpage \funcline \dataline \unspecified\r
- \textbackslash \mimetype \mailheader \seepep \textunderscore\r
- \longprogramopt \infinity \plusminus \shortversion \version\r
- \refmodindex \seerfc \makeindex \makemodindex \renewcommand\r
- \indexname \appendix \protect \indexiv \mbox \textasciitilde\r
- \platform \seeurl \leftmargin \labelwidth \localmoduletable\r
- \LaTeX \copyright \memberline \backslash \pi \centerline\r
- \caption \vspace \textwidth \menuselection \textless\r
- \makevar \csimplemacro \menuselection \bfcode \sub \release\r
- \email \kwindex \refexmodindex \filenq \e \menuselection\r
- \exindex \linev \newsgroup \verbatim \setshortversion\r
- \author \authoraddress \paragraph \subparagraph \cmemberline\r
- \textbar \C \seelink\r
-"""\r
-\r
-def matchclose(c_lineno, c_symbol, openers, pairmap):\r
- "Verify that closing delimiter matches most recent opening delimiter"\r
- try:\r
- o_lineno, o_symbol = openers.pop()\r
- except IndexError:\r
- print "\nDelimiter mismatch. On line %d, encountered closing '%s' without corresponding open" % (c_lineno, c_symbol)\r
- return\r
- if o_symbol in pairmap.get(c_symbol, [c_symbol]): return\r
- print "\nOpener '%s' on line %d was not closed before encountering '%s' on line %d" % (o_symbol, o_lineno, c_symbol, c_lineno)\r
- return\r
-\r
-def checkit(source, opts, morecmds=[]):\r
- """Check the LaTeX formatting in a sequence of lines.\r
-\r
- Opts is a mapping of options to option values if any:\r
- -m munge parenthesis and brackets\r
- -d delimiters only checking\r
- -v verbose trace of delimiter matching\r
- -s lineno: linenumber to start scan (default is 1).\r
-\r
- Morecmds is a sequence of LaTeX commands (without backslashes) that\r
- are to be considered valid in the scan.\r
- """\r
-\r
- texcmd = re.compile(r'\\[A-Za-z]+')\r
- falsetexcmd = re.compile(r'\/([A-Za-z]+)') # Mismarked with forward slash\r
-\r
- validcmds = set(cmdstr.split())\r
- for cmd in morecmds:\r
- validcmds.add('\\' + cmd)\r
-\r
- if '-m' in opts:\r
- pairmap = {']':'[(', ')':'(['} # Munged openers\r
- else:\r
- pairmap = {']':'[', ')':'('} # Normal opener for a given closer\r
- openpunct = set('([') # Set of valid openers\r
-\r
- delimiters = re.compile(r'\\(begin|end){([_a-zA-Z]+)}|([()\[\]])')\r
- braces = re.compile(r'({)|(})')\r
- doubledwords = re.compile(r'(\b[A-za-z]+\b) \b\1\b')\r
- spacingmarkup = re.compile(r'\\(ABC|ASCII|C|Cpp|EOF|infinity|NULL|plusminus|POSIX|UNIX)\s')\r
-\r
- openers = [] # Stack of pending open delimiters\r
- bracestack = [] # Stack of pending open braces\r
-\r
- tablestart = re.compile(r'\\begin{(?:long)?table([iv]+)}')\r
- tableline = re.compile(r'\\line([iv]+){')\r
- tableend = re.compile(r'\\end{(?:long)?table([iv]+)}')\r
- tablelevel = ''\r
- tablestartline = 0\r
-\r
- startline = int(opts.get('-s', '1'))\r
- lineno = 0\r
-\r
- for lineno, line in izip(count(startline), islice(source, startline-1, None)):\r
- line = line.rstrip()\r
-\r
- # Check balancing of open/close parenthesis, brackets, and begin/end blocks\r
- for begend, name, punct in delimiters.findall(line):\r
- if '-v' in opts:\r
- print lineno, '|', begend, name, punct,\r
- if begend == 'begin' and '-d' not in opts:\r
- openers.append((lineno, name))\r
- elif punct in openpunct:\r
- openers.append((lineno, punct))\r
- elif begend == 'end' and '-d' not in opts:\r
- matchclose(lineno, name, openers, pairmap)\r
- elif punct in pairmap:\r
- matchclose(lineno, punct, openers, pairmap)\r
- if '-v' in opts:\r
- print ' --> ', openers\r
-\r
- # Balance opening and closing braces\r
- for open, close in braces.findall(line):\r
- if open == '{':\r
- bracestack.append(lineno)\r
- if close == '}':\r
- try:\r
- bracestack.pop()\r
- except IndexError:\r
- print r'Warning, unmatched } on line %s.' % (lineno,)\r
-\r
- # Optionally, skip LaTeX specific checks\r
- if '-d' in opts:\r
- continue\r
-\r
- # Warn whenever forward slashes encountered with a LaTeX command\r
- for cmd in falsetexcmd.findall(line):\r
- if '822' in line or '.html' in line:\r
- continue # Ignore false positives for urls and for /rfc822\r
- if '\\' + cmd in validcmds:\r
- print 'Warning, forward slash used on line %d with cmd: /%s' % (lineno, cmd)\r
-\r
- # Check for markup requiring {} for correct spacing\r
- for cmd in spacingmarkup.findall(line):\r
- print r'Warning, \%s should be written as \%s{} on line %d' % (cmd, cmd, lineno)\r
-\r
- # Validate commands\r
- nc = line.find(r'\newcommand')\r
- if nc != -1:\r
- start = line.find('{', nc)\r
- end = line.find('}', start)\r
- validcmds.add(line[start+1:end])\r
- for cmd in texcmd.findall(line):\r
- if cmd not in validcmds:\r
- print r'Warning, unknown tex cmd on line %d: \%s' % (lineno, cmd)\r
-\r
- # Check table levels (make sure lineii only inside tableii)\r
- m = tablestart.search(line)\r
- if m:\r
- tablelevel = m.group(1)\r
- tablestartline = lineno\r
- m = tableline.search(line)\r
- if m and m.group(1) != tablelevel:\r
- print r'Warning, \line%s on line %d does not match \table%s on line %d' % (m.group(1), lineno, tablelevel, tablestartline)\r
- if tableend.search(line):\r
- tablelevel = ''\r
-\r
- # Style guide warnings\r
- if 'e.g.' in line or 'i.e.' in line:\r
- print r'Style warning, avoid use of i.e or e.g. on line %d' % (lineno,)\r
-\r
- for dw in doubledwords.findall(line):\r
- print r'Doubled word warning. "%s" on line %d' % (dw, lineno)\r
-\r
- lastline = lineno\r
- for lineno, symbol in openers:\r
- print "Unmatched open delimiter '%s' on line %d" % (symbol, lineno)\r
- for lineno in bracestack:\r
- print "Unmatched { on line %d" % (lineno,)\r
- print 'Done checking %d lines.' % (lastline,)\r
- return 0\r
-\r
-def main(args=None):\r
- if args is None:\r
- args = sys.argv[1:]\r
- optitems, arglist = getopt.getopt(args, "k:mdhs:v")\r
- opts = dict(optitems)\r
- if '-h' in opts or args==[]:\r
- print __doc__\r
- return 0\r
-\r
- if len(arglist) < 1:\r
- print 'Please specify a file to be checked'\r
- return 1\r
-\r
- for i, filespec in enumerate(arglist):\r
- if '*' in filespec or '?' in filespec:\r
- arglist[i:i+1] = glob.glob(filespec)\r
-\r
- morecmds = [v for k,v in optitems if k=='-k']\r
- err = []\r
-\r
- for filename in arglist:\r
- print '=' * 30\r
- print "Checking", filename\r
- try:\r
- f = open(filename)\r
- except IOError:\r
- print 'Cannot open file %s.' % arglist[0]\r
- return 2\r
-\r
- try:\r
- err.append(checkit(f, opts, morecmds))\r
- finally:\r
- f.close()\r
-\r
- return max(err)\r
-\r
-if __name__ == '__main__':\r
- sys.exit(main())\r