+++ /dev/null
-"""Text wrapping and filling.\r
-"""\r
-\r
-# Copyright (C) 1999-2001 Gregory P. Ward.\r
-# Copyright (C) 2002, 2003 Python Software Foundation.\r
-# Written by Greg Ward <gward@python.net>\r
-\r
-__revision__ = "$Id$"\r
-\r
-import string, re\r
-\r
-try:\r
- _unicode = unicode\r
-except NameError:\r
- # If Python is built without Unicode support, the unicode type\r
- # will not exist. Fake one.\r
- class _unicode(object):\r
- pass\r
-\r
-# Do the right thing with boolean values for all known Python versions\r
-# (so this module can be copied to projects that don't depend on Python\r
-# 2.3, e.g. Optik and Docutils) by uncommenting the block of code below.\r
-#try:\r
-# True, False\r
-#except NameError:\r
-# (True, False) = (1, 0)\r
-\r
-__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent']\r
-\r
-# Hardcode the recognized whitespace characters to the US-ASCII\r
-# whitespace characters. The main reason for doing this is that in\r
-# ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales\r
-# that character winds up in string.whitespace. Respecting\r
-# string.whitespace in those cases would 1) make textwrap treat 0xa0 the\r
-# same as any other whitespace char, which is clearly wrong (it's a\r
-# *non-breaking* space), 2) possibly cause problems with Unicode,\r
-# since 0xa0 is not in range(128).\r
-_whitespace = '\t\n\x0b\x0c\r '\r
-\r
-class TextWrapper:\r
- """\r
- Object for wrapping/filling text. The public interface consists of\r
- the wrap() and fill() methods; the other methods are just there for\r
- subclasses to override in order to tweak the default behaviour.\r
- If you want to completely replace the main wrapping algorithm,\r
- you'll probably have to override _wrap_chunks().\r
-\r
- Several instance attributes control various aspects of wrapping:\r
- width (default: 70)\r
- the maximum width of wrapped lines (unless break_long_words\r
- is false)\r
- initial_indent (default: "")\r
- string that will be prepended to the first line of wrapped\r
- output. Counts towards the line's width.\r
- subsequent_indent (default: "")\r
- string that will be prepended to all lines save the first\r
- of wrapped output; also counts towards each line's width.\r
- expand_tabs (default: true)\r
- Expand tabs in input text to spaces before further processing.\r
- Each tab will become 1 .. 8 spaces, depending on its position in\r
- its line. If false, each tab is treated as a single character.\r
- replace_whitespace (default: true)\r
- Replace all whitespace characters in the input text by spaces\r
- after tab expansion. Note that if expand_tabs is false and\r
- replace_whitespace is true, every tab will be converted to a\r
- single space!\r
- fix_sentence_endings (default: false)\r
- Ensure that sentence-ending punctuation is always followed\r
- by two spaces. Off by default because the algorithm is\r
- (unavoidably) imperfect.\r
- break_long_words (default: true)\r
- Break words longer than 'width'. If false, those words will not\r
- be broken, and some lines might be longer than 'width'.\r
- break_on_hyphens (default: true)\r
- Allow breaking hyphenated words. If true, wrapping will occur\r
- preferably on whitespaces and right after hyphens part of\r
- compound words.\r
- drop_whitespace (default: true)\r
- Drop leading and trailing whitespace from lines.\r
- """\r
-\r
- whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))\r
-\r
- unicode_whitespace_trans = {}\r
- uspace = ord(u' ')\r
- for x in map(ord, _whitespace):\r
- unicode_whitespace_trans[x] = uspace\r
-\r
- # This funky little regex is just the trick for splitting\r
- # text up into word-wrappable chunks. E.g.\r
- # "Hello there -- you goof-ball, use the -b option!"\r
- # splits into\r
- # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!\r
- # (after stripping out empty strings).\r
- wordsep_re = re.compile(\r
- r'(\s+|' # any whitespace\r
- r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words\r
- r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash\r
-\r
- # This less funky little regex just split on recognized spaces. E.g.\r
- # "Hello there -- you goof-ball, use the -b option!"\r
- # splits into\r
- # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/\r
- wordsep_simple_re = re.compile(r'(\s+)')\r
-\r
- # XXX this is not locale- or charset-aware -- string.lowercase\r
- # is US-ASCII only (and therefore English-only)\r
- sentence_end_re = re.compile(r'[%s]' # lowercase letter\r
- r'[\.\!\?]' # sentence-ending punct.\r
- r'[\"\']?' # optional end-of-quote\r
- r'\Z' # end of chunk\r
- % string.lowercase)\r
-\r
-\r
- def __init__(self,\r
- width=70,\r
- initial_indent="",\r
- subsequent_indent="",\r
- expand_tabs=True,\r
- replace_whitespace=True,\r
- fix_sentence_endings=False,\r
- break_long_words=True,\r
- drop_whitespace=True,\r
- break_on_hyphens=True):\r
- self.width = width\r
- self.initial_indent = initial_indent\r
- self.subsequent_indent = subsequent_indent\r
- self.expand_tabs = expand_tabs\r
- self.replace_whitespace = replace_whitespace\r
- self.fix_sentence_endings = fix_sentence_endings\r
- self.break_long_words = break_long_words\r
- self.drop_whitespace = drop_whitespace\r
- self.break_on_hyphens = break_on_hyphens\r
-\r
- # recompile the regexes for Unicode mode -- done in this clumsy way for\r
- # backwards compatibility because it's rather common to monkey-patch\r
- # the TextWrapper class' wordsep_re attribute.\r
- self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U)\r
- self.wordsep_simple_re_uni = re.compile(\r
- self.wordsep_simple_re.pattern, re.U)\r
-\r
-\r
- # -- Private methods -----------------------------------------------\r
- # (possibly useful for subclasses to override)\r
-\r
- def _munge_whitespace(self, text):\r
- """_munge_whitespace(text : string) -> string\r
-\r
- Munge whitespace in text: expand tabs and convert all other\r
- whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"\r
- becomes " foo bar baz".\r
- """\r
- if self.expand_tabs:\r
- text = text.expandtabs()\r
- if self.replace_whitespace:\r
- if isinstance(text, str):\r
- text = text.translate(self.whitespace_trans)\r
- elif isinstance(text, _unicode):\r
- text = text.translate(self.unicode_whitespace_trans)\r
- return text\r
-\r
-\r
- def _split(self, text):\r
- """_split(text : string) -> [string]\r
-\r
- Split the text to wrap into indivisible chunks. Chunks are\r
- not quite the same as words; see _wrap_chunks() for full\r
- details. As an example, the text\r
- Look, goof-ball -- use the -b option!\r
- breaks into the following chunks:\r
- 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',\r
- 'use', ' ', 'the', ' ', '-b', ' ', 'option!'\r
- if break_on_hyphens is True, or in:\r
- 'Look,', ' ', 'goof-ball', ' ', '--', ' ',\r
- 'use', ' ', 'the', ' ', '-b', ' ', option!'\r
- otherwise.\r
- """\r
- if isinstance(text, _unicode):\r
- if self.break_on_hyphens:\r
- pat = self.wordsep_re_uni\r
- else:\r
- pat = self.wordsep_simple_re_uni\r
- else:\r
- if self.break_on_hyphens:\r
- pat = self.wordsep_re\r
- else:\r
- pat = self.wordsep_simple_re\r
- chunks = pat.split(text)\r
- chunks = filter(None, chunks) # remove empty chunks\r
- return chunks\r
-\r
- def _fix_sentence_endings(self, chunks):\r
- """_fix_sentence_endings(chunks : [string])\r
-\r
- Correct for sentence endings buried in 'chunks'. Eg. when the\r
- original text contains "... foo.\\nBar ...", munge_whitespace()\r
- and split() will convert that to [..., "foo.", " ", "Bar", ...]\r
- which has one too few spaces; this method simply changes the one\r
- space to two.\r
- """\r
- i = 0\r
- patsearch = self.sentence_end_re.search\r
- while i < len(chunks)-1:\r
- if chunks[i+1] == " " and patsearch(chunks[i]):\r
- chunks[i+1] = " "\r
- i += 2\r
- else:\r
- i += 1\r
-\r
- def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):\r
- """_handle_long_word(chunks : [string],\r
- cur_line : [string],\r
- cur_len : int, width : int)\r
-\r
- Handle a chunk of text (most likely a word, not whitespace) that\r
- is too long to fit in any line.\r
- """\r
- # Figure out when indent is larger than the specified width, and make\r
- # sure at least one character is stripped off on every pass\r
- if width < 1:\r
- space_left = 1\r
- else:\r
- space_left = width - cur_len\r
-\r
- # If we're allowed to break long words, then do so: put as much\r
- # of the next chunk onto the current line as will fit.\r
- if self.break_long_words:\r
- cur_line.append(reversed_chunks[-1][:space_left])\r
- reversed_chunks[-1] = reversed_chunks[-1][space_left:]\r
-\r
- # Otherwise, we have to preserve the long word intact. Only add\r
- # it to the current line if there's nothing already there --\r
- # that minimizes how much we violate the width constraint.\r
- elif not cur_line:\r
- cur_line.append(reversed_chunks.pop())\r
-\r
- # If we're not allowed to break long words, and there's already\r
- # text on the current line, do nothing. Next time through the\r
- # main loop of _wrap_chunks(), we'll wind up here again, but\r
- # cur_len will be zero, so the next line will be entirely\r
- # devoted to the long word that we can't handle right now.\r
-\r
- def _wrap_chunks(self, chunks):\r
- """_wrap_chunks(chunks : [string]) -> [string]\r
-\r
- Wrap a sequence of text chunks and return a list of lines of\r
- length 'self.width' or less. (If 'break_long_words' is false,\r
- some lines may be longer than this.) Chunks correspond roughly\r
- to words and the whitespace between them: each chunk is\r
- indivisible (modulo 'break_long_words'), but a line break can\r
- come between any two chunks. Chunks should not have internal\r
- whitespace; ie. a chunk is either all whitespace or a "word".\r
- Whitespace chunks will be removed from the beginning and end of\r
- lines, but apart from that whitespace is preserved.\r
- """\r
- lines = []\r
- if self.width <= 0:\r
- raise ValueError("invalid width %r (must be > 0)" % self.width)\r
-\r
- # Arrange in reverse order so items can be efficiently popped\r
- # from a stack of chucks.\r
- chunks.reverse()\r
-\r
- while chunks:\r
-\r
- # Start the list of chunks that will make up the current line.\r
- # cur_len is just the length of all the chunks in cur_line.\r
- cur_line = []\r
- cur_len = 0\r
-\r
- # Figure out which static string will prefix this line.\r
- if lines:\r
- indent = self.subsequent_indent\r
- else:\r
- indent = self.initial_indent\r
-\r
- # Maximum width for this line.\r
- width = self.width - len(indent)\r
-\r
- # First chunk on line is whitespace -- drop it, unless this\r
- # is the very beginning of the text (ie. no lines started yet).\r
- if self.drop_whitespace and chunks[-1].strip() == '' and lines:\r
- del chunks[-1]\r
-\r
- while chunks:\r
- l = len(chunks[-1])\r
-\r
- # Can at least squeeze this chunk onto the current line.\r
- if cur_len + l <= width:\r
- cur_line.append(chunks.pop())\r
- cur_len += l\r
-\r
- # Nope, this line is full.\r
- else:\r
- break\r
-\r
- # The current line is full, and the next chunk is too big to\r
- # fit on *any* line (not just this one).\r
- if chunks and len(chunks[-1]) > width:\r
- self._handle_long_word(chunks, cur_line, cur_len, width)\r
-\r
- # If the last chunk on this line is all whitespace, drop it.\r
- if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':\r
- del cur_line[-1]\r
-\r
- # Convert current line back to a string and store it in list\r
- # of all lines (return value).\r
- if cur_line:\r
- lines.append(indent + ''.join(cur_line))\r
-\r
- return lines\r
-\r
-\r
- # -- Public interface ----------------------------------------------\r
-\r
- def wrap(self, text):\r
- """wrap(text : string) -> [string]\r
-\r
- Reformat the single paragraph in 'text' so it fits in lines of\r
- no more than 'self.width' columns, and return a list of wrapped\r
- lines. Tabs in 'text' are expanded with string.expandtabs(),\r
- and all other whitespace characters (including newline) are\r
- converted to space.\r
- """\r
- text = self._munge_whitespace(text)\r
- chunks = self._split(text)\r
- if self.fix_sentence_endings:\r
- self._fix_sentence_endings(chunks)\r
- return self._wrap_chunks(chunks)\r
-\r
- def fill(self, text):\r
- """fill(text : string) -> string\r
-\r
- Reformat the single paragraph in 'text' to fit in lines of no\r
- more than 'self.width' columns, and return a new string\r
- containing the entire wrapped paragraph.\r
- """\r
- return "\n".join(self.wrap(text))\r
-\r
-\r
-# -- Convenience interface ---------------------------------------------\r
-\r
-def wrap(text, width=70, **kwargs):\r
- """Wrap a single paragraph of text, returning a list of wrapped lines.\r
-\r
- Reformat the single paragraph in 'text' so it fits in lines of no\r
- more than 'width' columns, and return a list of wrapped lines. By\r
- default, tabs in 'text' are expanded with string.expandtabs(), and\r
- all other whitespace characters (including newline) are converted to\r
- space. See TextWrapper class for available keyword args to customize\r
- wrapping behaviour.\r
- """\r
- w = TextWrapper(width=width, **kwargs)\r
- return w.wrap(text)\r
-\r
-def fill(text, width=70, **kwargs):\r
- """Fill a single paragraph of text, returning a new string.\r
-\r
- Reformat the single paragraph in 'text' to fit in lines of no more\r
- than 'width' columns, and return a new string containing the entire\r
- wrapped paragraph. As with wrap(), tabs are expanded and other\r
- whitespace characters converted to space. See TextWrapper class for\r
- available keyword args to customize wrapping behaviour.\r
- """\r
- w = TextWrapper(width=width, **kwargs)\r
- return w.fill(text)\r
-\r
-\r
-# -- Loosely related functionality -------------------------------------\r
-\r
-_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)\r
-_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)\r
-\r
-def dedent(text):\r
- """Remove any common leading whitespace from every line in `text`.\r
-\r
- This can be used to make triple-quoted strings line up with the left\r
- edge of the display, while still presenting them in the source code\r
- in indented form.\r
-\r
- Note that tabs and spaces are both treated as whitespace, but they\r
- are not equal: the lines " hello" and "\\thello" are\r
- considered to have no common leading whitespace. (This behaviour is\r
- new in Python 2.5; older versions of this module incorrectly\r
- expanded tabs before searching for common leading whitespace.)\r
- """\r
- # Look for the longest leading string of spaces and tabs common to\r
- # all lines.\r
- margin = None\r
- text = _whitespace_only_re.sub('', text)\r
- indents = _leading_whitespace_re.findall(text)\r
- for indent in indents:\r
- if margin is None:\r
- margin = indent\r
-\r
- # Current line more deeply indented than previous winner:\r
- # no change (previous winner is still on top).\r
- elif indent.startswith(margin):\r
- pass\r
-\r
- # Current line consistent with and no deeper than previous winner:\r
- # it's the new winner.\r
- elif margin.startswith(indent):\r
- margin = indent\r
-\r
- # Current line and previous winner have no common whitespace:\r
- # there is no margin.\r
- else:\r
- margin = ""\r
- break\r
-\r
- # sanity check (testing/debugging only)\r
- if 0 and margin:\r
- for line in text.split("\n"):\r
- assert not line or line.startswith(margin), \\r
- "line = %r, margin = %r" % (line, margin)\r
-\r
- if margin:\r
- text = re.sub(r'(?m)^' + margin, '', text)\r
- return text\r
-\r
-if __name__ == "__main__":\r
- #print dedent("\tfoo\n\tbar")\r
- #print dedent(" \thello there\n \t how are you?")\r
- print dedent("Hello there.\n This is indented.")\r