]>
Commit | Line | Data |
---|---|---|
4710c53d | 1 | # module 'string' -- A collection of string operations\r |
2 | \r | |
3 | # Warning: most of the code you see here isn't normally used nowadays. With\r | |
4 | # Python 1.6, many of these functions are implemented as methods on the\r | |
5 | # standard string object. They used to be implemented by a built-in module\r | |
6 | # called strop, but strop is now obsolete itself.\r | |
7 | \r | |
8 | """Common string manipulations.\r | |
9 | \r | |
10 | Public module variables:\r | |
11 | \r | |
12 | whitespace -- a string containing all characters considered whitespace\r | |
13 | lowercase -- a string containing all characters considered lowercase letters\r | |
14 | uppercase -- a string containing all characters considered uppercase letters\r | |
15 | letters -- a string containing all characters considered letters\r | |
16 | digits -- a string containing all characters considered decimal digits\r | |
17 | hexdigits -- a string containing all characters considered hexadecimal digits\r | |
18 | octdigits -- a string containing all characters considered octal digits\r | |
19 | \r | |
20 | """\r | |
21 | from warnings import warnpy3k\r | |
22 | warnpy3k("the stringold module has been removed in Python 3.0", stacklevel=2)\r | |
23 | del warnpy3k\r | |
24 | \r | |
25 | # Some strings for ctype-style character classification\r | |
26 | whitespace = ' \t\n\r\v\f'\r | |
27 | lowercase = 'abcdefghijklmnopqrstuvwxyz'\r | |
28 | uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\r | |
29 | letters = lowercase + uppercase\r | |
30 | digits = '0123456789'\r | |
31 | hexdigits = digits + 'abcdef' + 'ABCDEF'\r | |
32 | octdigits = '01234567'\r | |
33 | \r | |
34 | # Case conversion helpers\r | |
35 | _idmap = ''\r | |
36 | for i in range(256): _idmap = _idmap + chr(i)\r | |
37 | del i\r | |
38 | \r | |
39 | # Backward compatible names for exceptions\r | |
40 | index_error = ValueError\r | |
41 | atoi_error = ValueError\r | |
42 | atof_error = ValueError\r | |
43 | atol_error = ValueError\r | |
44 | \r | |
45 | # convert UPPER CASE letters to lower case\r | |
46 | def lower(s):\r | |
47 | """lower(s) -> string\r | |
48 | \r | |
49 | Return a copy of the string s converted to lowercase.\r | |
50 | \r | |
51 | """\r | |
52 | return s.lower()\r | |
53 | \r | |
54 | # Convert lower case letters to UPPER CASE\r | |
55 | def upper(s):\r | |
56 | """upper(s) -> string\r | |
57 | \r | |
58 | Return a copy of the string s converted to uppercase.\r | |
59 | \r | |
60 | """\r | |
61 | return s.upper()\r | |
62 | \r | |
63 | # Swap lower case letters and UPPER CASE\r | |
64 | def swapcase(s):\r | |
65 | """swapcase(s) -> string\r | |
66 | \r | |
67 | Return a copy of the string s with upper case characters\r | |
68 | converted to lowercase and vice versa.\r | |
69 | \r | |
70 | """\r | |
71 | return s.swapcase()\r | |
72 | \r | |
73 | # Strip leading and trailing tabs and spaces\r | |
74 | def strip(s):\r | |
75 | """strip(s) -> string\r | |
76 | \r | |
77 | Return a copy of the string s with leading and trailing\r | |
78 | whitespace removed.\r | |
79 | \r | |
80 | """\r | |
81 | return s.strip()\r | |
82 | \r | |
83 | # Strip leading tabs and spaces\r | |
84 | def lstrip(s):\r | |
85 | """lstrip(s) -> string\r | |
86 | \r | |
87 | Return a copy of the string s with leading whitespace removed.\r | |
88 | \r | |
89 | """\r | |
90 | return s.lstrip()\r | |
91 | \r | |
92 | # Strip trailing tabs and spaces\r | |
93 | def rstrip(s):\r | |
94 | """rstrip(s) -> string\r | |
95 | \r | |
96 | Return a copy of the string s with trailing whitespace\r | |
97 | removed.\r | |
98 | \r | |
99 | """\r | |
100 | return s.rstrip()\r | |
101 | \r | |
102 | \r | |
103 | # Split a string into a list of space/tab-separated words\r | |
104 | def split(s, sep=None, maxsplit=0):\r | |
105 | """split(str [,sep [,maxsplit]]) -> list of strings\r | |
106 | \r | |
107 | Return a list of the words in the string s, using sep as the\r | |
108 | delimiter string. If maxsplit is nonzero, splits into at most\r | |
109 | maxsplit words If sep is not specified, any whitespace string\r | |
110 | is a separator. Maxsplit defaults to 0.\r | |
111 | \r | |
112 | (split and splitfields are synonymous)\r | |
113 | \r | |
114 | """\r | |
115 | return s.split(sep, maxsplit)\r | |
116 | splitfields = split\r | |
117 | \r | |
118 | # Join fields with optional separator\r | |
119 | def join(words, sep = ' '):\r | |
120 | """join(list [,sep]) -> string\r | |
121 | \r | |
122 | Return a string composed of the words in list, with\r | |
123 | intervening occurrences of sep. The default separator is a\r | |
124 | single space.\r | |
125 | \r | |
126 | (joinfields and join are synonymous)\r | |
127 | \r | |
128 | """\r | |
129 | return sep.join(words)\r | |
130 | joinfields = join\r | |
131 | \r | |
132 | # for a little bit of speed\r | |
133 | _apply = apply\r | |
134 | \r | |
135 | # Find substring, raise exception if not found\r | |
136 | def index(s, *args):\r | |
137 | """index(s, sub [,start [,end]]) -> int\r | |
138 | \r | |
139 | Like find but raises ValueError when the substring is not found.\r | |
140 | \r | |
141 | """\r | |
142 | return _apply(s.index, args)\r | |
143 | \r | |
144 | # Find last substring, raise exception if not found\r | |
145 | def rindex(s, *args):\r | |
146 | """rindex(s, sub [,start [,end]]) -> int\r | |
147 | \r | |
148 | Like rfind but raises ValueError when the substring is not found.\r | |
149 | \r | |
150 | """\r | |
151 | return _apply(s.rindex, args)\r | |
152 | \r | |
153 | # Count non-overlapping occurrences of substring\r | |
154 | def count(s, *args):\r | |
155 | """count(s, sub[, start[,end]]) -> int\r | |
156 | \r | |
157 | Return the number of occurrences of substring sub in string\r | |
158 | s[start:end]. Optional arguments start and end are\r | |
159 | interpreted as in slice notation.\r | |
160 | \r | |
161 | """\r | |
162 | return _apply(s.count, args)\r | |
163 | \r | |
164 | # Find substring, return -1 if not found\r | |
165 | def find(s, *args):\r | |
166 | """find(s, sub [,start [,end]]) -> in\r | |
167 | \r | |
168 | Return the lowest index in s where substring sub is found,\r | |
169 | such that sub is contained within s[start,end]. Optional\r | |
170 | arguments start and end are interpreted as in slice notation.\r | |
171 | \r | |
172 | Return -1 on failure.\r | |
173 | \r | |
174 | """\r | |
175 | return _apply(s.find, args)\r | |
176 | \r | |
177 | # Find last substring, return -1 if not found\r | |
178 | def rfind(s, *args):\r | |
179 | """rfind(s, sub [,start [,end]]) -> int\r | |
180 | \r | |
181 | Return the highest index in s where substring sub is found,\r | |
182 | such that sub is contained within s[start,end]. Optional\r | |
183 | arguments start and end are interpreted as in slice notation.\r | |
184 | \r | |
185 | Return -1 on failure.\r | |
186 | \r | |
187 | """\r | |
188 | return _apply(s.rfind, args)\r | |
189 | \r | |
190 | # for a bit of speed\r | |
191 | _float = float\r | |
192 | _int = int\r | |
193 | _long = long\r | |
194 | _StringType = type('')\r | |
195 | \r | |
196 | # Convert string to float\r | |
197 | def atof(s):\r | |
198 | """atof(s) -> float\r | |
199 | \r | |
200 | Return the floating point number represented by the string s.\r | |
201 | \r | |
202 | """\r | |
203 | if type(s) == _StringType:\r | |
204 | return _float(s)\r | |
205 | else:\r | |
206 | raise TypeError('argument 1: expected string, %s found' %\r | |
207 | type(s).__name__)\r | |
208 | \r | |
209 | # Convert string to integer\r | |
210 | def atoi(*args):\r | |
211 | """atoi(s [,base]) -> int\r | |
212 | \r | |
213 | Return the integer represented by the string s in the given\r | |
214 | base, which defaults to 10. The string s must consist of one\r | |
215 | or more digits, possibly preceded by a sign. If base is 0, it\r | |
216 | is chosen from the leading characters of s, 0 for octal, 0x or\r | |
217 | 0X for hexadecimal. If base is 16, a preceding 0x or 0X is\r | |
218 | accepted.\r | |
219 | \r | |
220 | """\r | |
221 | try:\r | |
222 | s = args[0]\r | |
223 | except IndexError:\r | |
224 | raise TypeError('function requires at least 1 argument: %d given' %\r | |
225 | len(args))\r | |
226 | # Don't catch type error resulting from too many arguments to int(). The\r | |
227 | # error message isn't compatible but the error type is, and this function\r | |
228 | # is complicated enough already.\r | |
229 | if type(s) == _StringType:\r | |
230 | return _apply(_int, args)\r | |
231 | else:\r | |
232 | raise TypeError('argument 1: expected string, %s found' %\r | |
233 | type(s).__name__)\r | |
234 | \r | |
235 | \r | |
236 | # Convert string to long integer\r | |
237 | def atol(*args):\r | |
238 | """atol(s [,base]) -> long\r | |
239 | \r | |
240 | Return the long integer represented by the string s in the\r | |
241 | given base, which defaults to 10. The string s must consist\r | |
242 | of one or more digits, possibly preceded by a sign. If base\r | |
243 | is 0, it is chosen from the leading characters of s, 0 for\r | |
244 | octal, 0x or 0X for hexadecimal. If base is 16, a preceding\r | |
245 | 0x or 0X is accepted. A trailing L or l is not accepted,\r | |
246 | unless base is 0.\r | |
247 | \r | |
248 | """\r | |
249 | try:\r | |
250 | s = args[0]\r | |
251 | except IndexError:\r | |
252 | raise TypeError('function requires at least 1 argument: %d given' %\r | |
253 | len(args))\r | |
254 | # Don't catch type error resulting from too many arguments to long(). The\r | |
255 | # error message isn't compatible but the error type is, and this function\r | |
256 | # is complicated enough already.\r | |
257 | if type(s) == _StringType:\r | |
258 | return _apply(_long, args)\r | |
259 | else:\r | |
260 | raise TypeError('argument 1: expected string, %s found' %\r | |
261 | type(s).__name__)\r | |
262 | \r | |
263 | \r | |
264 | # Left-justify a string\r | |
265 | def ljust(s, width):\r | |
266 | """ljust(s, width) -> string\r | |
267 | \r | |
268 | Return a left-justified version of s, in a field of the\r | |
269 | specified width, padded with spaces as needed. The string is\r | |
270 | never truncated.\r | |
271 | \r | |
272 | """\r | |
273 | n = width - len(s)\r | |
274 | if n <= 0: return s\r | |
275 | return s + ' '*n\r | |
276 | \r | |
277 | # Right-justify a string\r | |
278 | def rjust(s, width):\r | |
279 | """rjust(s, width) -> string\r | |
280 | \r | |
281 | Return a right-justified version of s, in a field of the\r | |
282 | specified width, padded with spaces as needed. The string is\r | |
283 | never truncated.\r | |
284 | \r | |
285 | """\r | |
286 | n = width - len(s)\r | |
287 | if n <= 0: return s\r | |
288 | return ' '*n + s\r | |
289 | \r | |
290 | # Center a string\r | |
291 | def center(s, width):\r | |
292 | """center(s, width) -> string\r | |
293 | \r | |
294 | Return a center version of s, in a field of the specified\r | |
295 | width. padded with spaces as needed. The string is never\r | |
296 | truncated.\r | |
297 | \r | |
298 | """\r | |
299 | n = width - len(s)\r | |
300 | if n <= 0: return s\r | |
301 | half = n/2\r | |
302 | if n%2 and width%2:\r | |
303 | # This ensures that center(center(s, i), j) = center(s, j)\r | |
304 | half = half+1\r | |
305 | return ' '*half + s + ' '*(n-half)\r | |
306 | \r | |
307 | # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'\r | |
308 | # Decadent feature: the argument may be a string or a number\r | |
309 | # (Use of this is deprecated; it should be a string as with ljust c.s.)\r | |
310 | def zfill(x, width):\r | |
311 | """zfill(x, width) -> string\r | |
312 | \r | |
313 | Pad a numeric string x with zeros on the left, to fill a field\r | |
314 | of the specified width. The string x is never truncated.\r | |
315 | \r | |
316 | """\r | |
317 | if type(x) == type(''): s = x\r | |
318 | else: s = repr(x)\r | |
319 | n = len(s)\r | |
320 | if n >= width: return s\r | |
321 | sign = ''\r | |
322 | if s[0] in ('-', '+'):\r | |
323 | sign, s = s[0], s[1:]\r | |
324 | return sign + '0'*(width-n) + s\r | |
325 | \r | |
326 | # Expand tabs in a string.\r | |
327 | # Doesn't take non-printing chars into account, but does understand \n.\r | |
328 | def expandtabs(s, tabsize=8):\r | |
329 | """expandtabs(s [,tabsize]) -> string\r | |
330 | \r | |
331 | Return a copy of the string s with all tab characters replaced\r | |
332 | by the appropriate number of spaces, depending on the current\r | |
333 | column, and the tabsize (default 8).\r | |
334 | \r | |
335 | """\r | |
336 | res = line = ''\r | |
337 | for c in s:\r | |
338 | if c == '\t':\r | |
339 | c = ' '*(tabsize - len(line) % tabsize)\r | |
340 | line = line + c\r | |
341 | if c == '\n':\r | |
342 | res = res + line\r | |
343 | line = ''\r | |
344 | return res + line\r | |
345 | \r | |
346 | # Character translation through look-up table.\r | |
347 | def translate(s, table, deletions=""):\r | |
348 | """translate(s,table [,deletechars]) -> string\r | |
349 | \r | |
350 | Return a copy of the string s, where all characters occurring\r | |
351 | in the optional argument deletechars are removed, and the\r | |
352 | remaining characters have been mapped through the given\r | |
353 | translation table, which must be a string of length 256.\r | |
354 | \r | |
355 | """\r | |
356 | return s.translate(table, deletions)\r | |
357 | \r | |
358 | # Capitalize a string, e.g. "aBc dEf" -> "Abc def".\r | |
359 | def capitalize(s):\r | |
360 | """capitalize(s) -> string\r | |
361 | \r | |
362 | Return a copy of the string s with only its first character\r | |
363 | capitalized.\r | |
364 | \r | |
365 | """\r | |
366 | return s.capitalize()\r | |
367 | \r | |
368 | # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".\r | |
369 | def capwords(s, sep=None):\r | |
370 | """capwords(s, [sep]) -> string\r | |
371 | \r | |
372 | Split the argument into words using split, capitalize each\r | |
373 | word using capitalize, and join the capitalized words using\r | |
374 | join. Note that this replaces runs of whitespace characters by\r | |
375 | a single space.\r | |
376 | \r | |
377 | """\r | |
378 | return join(map(capitalize, s.split(sep)), sep or ' ')\r | |
379 | \r | |
380 | # Construct a translation string\r | |
381 | _idmapL = None\r | |
382 | def maketrans(fromstr, tostr):\r | |
383 | """maketrans(frm, to) -> string\r | |
384 | \r | |
385 | Return a translation table (a string of 256 bytes long)\r | |
386 | suitable for use in string.translate. The strings frm and to\r | |
387 | must be of the same length.\r | |
388 | \r | |
389 | """\r | |
390 | if len(fromstr) != len(tostr):\r | |
391 | raise ValueError, "maketrans arguments must have same length"\r | |
392 | global _idmapL\r | |
393 | if not _idmapL:\r | |
394 | _idmapL = list(_idmap)\r | |
395 | L = _idmapL[:]\r | |
396 | fromstr = map(ord, fromstr)\r | |
397 | for i in range(len(fromstr)):\r | |
398 | L[fromstr[i]] = tostr[i]\r | |
399 | return join(L, "")\r | |
400 | \r | |
401 | # Substring replacement (global)\r | |
402 | def replace(s, old, new, maxsplit=0):\r | |
403 | """replace (str, old, new[, maxsplit]) -> string\r | |
404 | \r | |
405 | Return a copy of string str with all occurrences of substring\r | |
406 | old replaced by new. If the optional argument maxsplit is\r | |
407 | given, only the first maxsplit occurrences are replaced.\r | |
408 | \r | |
409 | """\r | |
410 | return s.replace(old, new, maxsplit)\r | |
411 | \r | |
412 | \r | |
413 | # XXX: transitional\r | |
414 | #\r | |
415 | # If string objects do not have methods, then we need to use the old string.py\r | |
416 | # library, which uses strop for many more things than just the few outlined\r | |
417 | # below.\r | |
418 | try:\r | |
419 | ''.upper\r | |
420 | except AttributeError:\r | |
421 | from stringold import *\r | |
422 | \r | |
423 | # Try importing optional built-in module "strop" -- if it exists,\r | |
424 | # it redefines some string operations that are 100-1000 times faster.\r | |
425 | # It also defines values for whitespace, lowercase and uppercase\r | |
426 | # that match <ctype.h>'s definitions.\r | |
427 | \r | |
428 | try:\r | |
429 | from strop import maketrans, lowercase, uppercase, whitespace\r | |
430 | letters = lowercase + uppercase\r | |
431 | except ImportError:\r | |
432 | pass # Use the original versions\r |