]> git.proxmox.com Git - mirror_edk2.git/blob - AppPkg/Applications/Python/Python-2.7.10/Lib/textwrap.py
EmbeddedPkg: Extend NvVarStoreFormattedLib LIBRARY_CLASS
[mirror_edk2.git] / AppPkg / Applications / Python / Python-2.7.10 / Lib / textwrap.py
1 """Text wrapping and filling.
2 """
3
4 # Copyright (C) 1999-2001 Gregory P. Ward.
5 # Copyright (C) 2002, 2003 Python Software Foundation.
6 # Written by Greg Ward <gward@python.net>
7
8 __revision__ = "$Id$"
9
10 import string, re
11
12 try:
13 _unicode = unicode
14 except NameError:
15 # If Python is built without Unicode support, the unicode type
16 # will not exist. Fake one.
17 class _unicode(object):
18 pass
19
20 # Do the right thing with boolean values for all known Python versions
21 # (so this module can be copied to projects that don't depend on Python
22 # 2.3, e.g. Optik and Docutils) by uncommenting the block of code below.
23 #try:
24 # True, False
25 #except NameError:
26 # (True, False) = (1, 0)
27
28 __all__ = ['TextWrapper', 'wrap', 'fill', 'dedent']
29
30 # Hardcode the recognized whitespace characters to the US-ASCII
31 # whitespace characters. The main reason for doing this is that in
32 # ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
33 # that character winds up in string.whitespace. Respecting
34 # string.whitespace in those cases would 1) make textwrap treat 0xa0 the
35 # same as any other whitespace char, which is clearly wrong (it's a
36 # *non-breaking* space), 2) possibly cause problems with Unicode,
37 # since 0xa0 is not in range(128).
38 _whitespace = '\t\n\x0b\x0c\r '
39
40 class TextWrapper:
41 """
42 Object for wrapping/filling text. The public interface consists of
43 the wrap() and fill() methods; the other methods are just there for
44 subclasses to override in order to tweak the default behaviour.
45 If you want to completely replace the main wrapping algorithm,
46 you'll probably have to override _wrap_chunks().
47
48 Several instance attributes control various aspects of wrapping:
49 width (default: 70)
50 the maximum width of wrapped lines (unless break_long_words
51 is false)
52 initial_indent (default: "")
53 string that will be prepended to the first line of wrapped
54 output. Counts towards the line's width.
55 subsequent_indent (default: "")
56 string that will be prepended to all lines save the first
57 of wrapped output; also counts towards each line's width.
58 expand_tabs (default: true)
59 Expand tabs in input text to spaces before further processing.
60 Each tab will become 1 .. 8 spaces, depending on its position in
61 its line. If false, each tab is treated as a single character.
62 replace_whitespace (default: true)
63 Replace all whitespace characters in the input text by spaces
64 after tab expansion. Note that if expand_tabs is false and
65 replace_whitespace is true, every tab will be converted to a
66 single space!
67 fix_sentence_endings (default: false)
68 Ensure that sentence-ending punctuation is always followed
69 by two spaces. Off by default because the algorithm is
70 (unavoidably) imperfect.
71 break_long_words (default: true)
72 Break words longer than 'width'. If false, those words will not
73 be broken, and some lines might be longer than 'width'.
74 break_on_hyphens (default: true)
75 Allow breaking hyphenated words. If true, wrapping will occur
76 preferably on whitespaces and right after hyphens part of
77 compound words.
78 drop_whitespace (default: true)
79 Drop leading and trailing whitespace from lines.
80 """
81
82 whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
83
84 unicode_whitespace_trans = {}
85 uspace = ord(u' ')
86 for x in map(ord, _whitespace):
87 unicode_whitespace_trans[x] = uspace
88
89 # This funky little regex is just the trick for splitting
90 # text up into word-wrappable chunks. E.g.
91 # "Hello there -- you goof-ball, use the -b option!"
92 # splits into
93 # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
94 # (after stripping out empty strings).
95 wordsep_re = re.compile(
96 r'(\s+|' # any whitespace
97 r'[^\s\w]*\w+[^0-9\W]-(?=\w+[^0-9\W])|' # hyphenated words
98 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
99
100 # This less funky little regex just split on recognized spaces. E.g.
101 # "Hello there -- you goof-ball, use the -b option!"
102 # splits into
103 # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
104 wordsep_simple_re = re.compile(r'(\s+)')
105
106 # XXX this is not locale- or charset-aware -- string.lowercase
107 # is US-ASCII only (and therefore English-only)
108 sentence_end_re = re.compile(r'[%s]' # lowercase letter
109 r'[\.\!\?]' # sentence-ending punct.
110 r'[\"\']?' # optional end-of-quote
111 r'\Z' # end of chunk
112 % string.lowercase)
113
114
115 def __init__(self,
116 width=70,
117 initial_indent="",
118 subsequent_indent="",
119 expand_tabs=True,
120 replace_whitespace=True,
121 fix_sentence_endings=False,
122 break_long_words=True,
123 drop_whitespace=True,
124 break_on_hyphens=True):
125 self.width = width
126 self.initial_indent = initial_indent
127 self.subsequent_indent = subsequent_indent
128 self.expand_tabs = expand_tabs
129 self.replace_whitespace = replace_whitespace
130 self.fix_sentence_endings = fix_sentence_endings
131 self.break_long_words = break_long_words
132 self.drop_whitespace = drop_whitespace
133 self.break_on_hyphens = break_on_hyphens
134
135 # recompile the regexes for Unicode mode -- done in this clumsy way for
136 # backwards compatibility because it's rather common to monkey-patch
137 # the TextWrapper class' wordsep_re attribute.
138 self.wordsep_re_uni = re.compile(self.wordsep_re.pattern, re.U)
139 self.wordsep_simple_re_uni = re.compile(
140 self.wordsep_simple_re.pattern, re.U)
141
142
143 # -- Private methods -----------------------------------------------
144 # (possibly useful for subclasses to override)
145
146 def _munge_whitespace(self, text):
147 """_munge_whitespace(text : string) -> string
148
149 Munge whitespace in text: expand tabs and convert all other
150 whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
151 becomes " foo bar baz".
152 """
153 if self.expand_tabs:
154 text = text.expandtabs()
155 if self.replace_whitespace:
156 if isinstance(text, str):
157 text = text.translate(self.whitespace_trans)
158 elif isinstance(text, _unicode):
159 text = text.translate(self.unicode_whitespace_trans)
160 return text
161
162
163 def _split(self, text):
164 """_split(text : string) -> [string]
165
166 Split the text to wrap into indivisible chunks. Chunks are
167 not quite the same as words; see _wrap_chunks() for full
168 details. As an example, the text
169 Look, goof-ball -- use the -b option!
170 breaks into the following chunks:
171 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
172 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
173 if break_on_hyphens is True, or in:
174 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
175 'use', ' ', 'the', ' ', '-b', ' ', option!'
176 otherwise.
177 """
178 if isinstance(text, _unicode):
179 if self.break_on_hyphens:
180 pat = self.wordsep_re_uni
181 else:
182 pat = self.wordsep_simple_re_uni
183 else:
184 if self.break_on_hyphens:
185 pat = self.wordsep_re
186 else:
187 pat = self.wordsep_simple_re
188 chunks = pat.split(text)
189 chunks = filter(None, chunks) # remove empty chunks
190 return chunks
191
192 def _fix_sentence_endings(self, chunks):
193 """_fix_sentence_endings(chunks : [string])
194
195 Correct for sentence endings buried in 'chunks'. Eg. when the
196 original text contains "... foo.\\nBar ...", munge_whitespace()
197 and split() will convert that to [..., "foo.", " ", "Bar", ...]
198 which has one too few spaces; this method simply changes the one
199 space to two.
200 """
201 i = 0
202 patsearch = self.sentence_end_re.search
203 while i < len(chunks)-1:
204 if chunks[i+1] == " " and patsearch(chunks[i]):
205 chunks[i+1] = " "
206 i += 2
207 else:
208 i += 1
209
210 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
211 """_handle_long_word(chunks : [string],
212 cur_line : [string],
213 cur_len : int, width : int)
214
215 Handle a chunk of text (most likely a word, not whitespace) that
216 is too long to fit in any line.
217 """
218 # Figure out when indent is larger than the specified width, and make
219 # sure at least one character is stripped off on every pass
220 if width < 1:
221 space_left = 1
222 else:
223 space_left = width - cur_len
224
225 # If we're allowed to break long words, then do so: put as much
226 # of the next chunk onto the current line as will fit.
227 if self.break_long_words:
228 cur_line.append(reversed_chunks[-1][:space_left])
229 reversed_chunks[-1] = reversed_chunks[-1][space_left:]
230
231 # Otherwise, we have to preserve the long word intact. Only add
232 # it to the current line if there's nothing already there --
233 # that minimizes how much we violate the width constraint.
234 elif not cur_line:
235 cur_line.append(reversed_chunks.pop())
236
237 # If we're not allowed to break long words, and there's already
238 # text on the current line, do nothing. Next time through the
239 # main loop of _wrap_chunks(), we'll wind up here again, but
240 # cur_len will be zero, so the next line will be entirely
241 # devoted to the long word that we can't handle right now.
242
243 def _wrap_chunks(self, chunks):
244 """_wrap_chunks(chunks : [string]) -> [string]
245
246 Wrap a sequence of text chunks and return a list of lines of
247 length 'self.width' or less. (If 'break_long_words' is false,
248 some lines may be longer than this.) Chunks correspond roughly
249 to words and the whitespace between them: each chunk is
250 indivisible (modulo 'break_long_words'), but a line break can
251 come between any two chunks. Chunks should not have internal
252 whitespace; ie. a chunk is either all whitespace or a "word".
253 Whitespace chunks will be removed from the beginning and end of
254 lines, but apart from that whitespace is preserved.
255 """
256 lines = []
257 if self.width <= 0:
258 raise ValueError("invalid width %r (must be > 0)" % self.width)
259
260 # Arrange in reverse order so items can be efficiently popped
261 # from a stack of chucks.
262 chunks.reverse()
263
264 while chunks:
265
266 # Start the list of chunks that will make up the current line.
267 # cur_len is just the length of all the chunks in cur_line.
268 cur_line = []
269 cur_len = 0
270
271 # Figure out which static string will prefix this line.
272 if lines:
273 indent = self.subsequent_indent
274 else:
275 indent = self.initial_indent
276
277 # Maximum width for this line.
278 width = self.width - len(indent)
279
280 # First chunk on line is whitespace -- drop it, unless this
281 # is the very beginning of the text (ie. no lines started yet).
282 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
283 del chunks[-1]
284
285 while chunks:
286 l = len(chunks[-1])
287
288 # Can at least squeeze this chunk onto the current line.
289 if cur_len + l <= width:
290 cur_line.append(chunks.pop())
291 cur_len += l
292
293 # Nope, this line is full.
294 else:
295 break
296
297 # The current line is full, and the next chunk is too big to
298 # fit on *any* line (not just this one).
299 if chunks and len(chunks[-1]) > width:
300 self._handle_long_word(chunks, cur_line, cur_len, width)
301
302 # If the last chunk on this line is all whitespace, drop it.
303 if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
304 del cur_line[-1]
305
306 # Convert current line back to a string and store it in list
307 # of all lines (return value).
308 if cur_line:
309 lines.append(indent + ''.join(cur_line))
310
311 return lines
312
313
314 # -- Public interface ----------------------------------------------
315
316 def wrap(self, text):
317 """wrap(text : string) -> [string]
318
319 Reformat the single paragraph in 'text' so it fits in lines of
320 no more than 'self.width' columns, and return a list of wrapped
321 lines. Tabs in 'text' are expanded with string.expandtabs(),
322 and all other whitespace characters (including newline) are
323 converted to space.
324 """
325 text = self._munge_whitespace(text)
326 chunks = self._split(text)
327 if self.fix_sentence_endings:
328 self._fix_sentence_endings(chunks)
329 return self._wrap_chunks(chunks)
330
331 def fill(self, text):
332 """fill(text : string) -> string
333
334 Reformat the single paragraph in 'text' to fit in lines of no
335 more than 'self.width' columns, and return a new string
336 containing the entire wrapped paragraph.
337 """
338 return "\n".join(self.wrap(text))
339
340
341 # -- Convenience interface ---------------------------------------------
342
343 def wrap(text, width=70, **kwargs):
344 """Wrap a single paragraph of text, returning a list of wrapped lines.
345
346 Reformat the single paragraph in 'text' so it fits in lines of no
347 more than 'width' columns, and return a list of wrapped lines. By
348 default, tabs in 'text' are expanded with string.expandtabs(), and
349 all other whitespace characters (including newline) are converted to
350 space. See TextWrapper class for available keyword args to customize
351 wrapping behaviour.
352 """
353 w = TextWrapper(width=width, **kwargs)
354 return w.wrap(text)
355
356 def fill(text, width=70, **kwargs):
357 """Fill a single paragraph of text, returning a new string.
358
359 Reformat the single paragraph in 'text' to fit in lines of no more
360 than 'width' columns, and return a new string containing the entire
361 wrapped paragraph. As with wrap(), tabs are expanded and other
362 whitespace characters converted to space. See TextWrapper class for
363 available keyword args to customize wrapping behaviour.
364 """
365 w = TextWrapper(width=width, **kwargs)
366 return w.fill(text)
367
368
369 # -- Loosely related functionality -------------------------------------
370
371 _whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
372 _leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
373
374 def dedent(text):
375 """Remove any common leading whitespace from every line in `text`.
376
377 This can be used to make triple-quoted strings line up with the left
378 edge of the display, while still presenting them in the source code
379 in indented form.
380
381 Note that tabs and spaces are both treated as whitespace, but they
382 are not equal: the lines " hello" and "\\thello" are
383 considered to have no common leading whitespace. (This behaviour is
384 new in Python 2.5; older versions of this module incorrectly
385 expanded tabs before searching for common leading whitespace.)
386 """
387 # Look for the longest leading string of spaces and tabs common to
388 # all lines.
389 margin = None
390 text = _whitespace_only_re.sub('', text)
391 indents = _leading_whitespace_re.findall(text)
392 for indent in indents:
393 if margin is None:
394 margin = indent
395
396 # Current line more deeply indented than previous winner:
397 # no change (previous winner is still on top).
398 elif indent.startswith(margin):
399 pass
400
401 # Current line consistent with and no deeper than previous winner:
402 # it's the new winner.
403 elif margin.startswith(indent):
404 margin = indent
405
406 # Current line and previous winner have no common whitespace:
407 # there is no margin.
408 else:
409 margin = ""
410 break
411
412 # sanity check (testing/debugging only)
413 if 0 and margin:
414 for line in text.split("\n"):
415 assert not line or line.startswith(margin), \
416 "line = %r, margin = %r" % (line, margin)
417
418 if margin:
419 text = re.sub(r'(?m)^' + margin, '', text)
420 return text
421
422 if __name__ == "__main__":
423 #print dedent("\tfoo\n\tbar")
424 #print dedent(" \thello there\n \t how are you?")
425 print dedent("Hello there.\n This is indented.")