]>
Commit | Line | Data |
---|---|---|
3257aa99 DM |
1 | """Helper class to quickly write a loop over all standard input files.\r |
2 | \r | |
3 | Typical use is:\r | |
4 | \r | |
5 | import fileinput\r | |
6 | for line in fileinput.input():\r | |
7 | process(line)\r | |
8 | \r | |
9 | This iterates over the lines of all files listed in sys.argv[1:],\r | |
10 | defaulting to sys.stdin if the list is empty. If a filename is '-' it\r | |
11 | is also replaced by sys.stdin. To specify an alternative list of\r | |
12 | filenames, pass it as the argument to input(). A single file name is\r | |
13 | also allowed.\r | |
14 | \r | |
15 | Functions filename(), lineno() return the filename and cumulative line\r | |
16 | number of the line that has just been read; filelineno() returns its\r | |
17 | line number in the current file; isfirstline() returns true iff the\r | |
18 | line just read is the first line of its file; isstdin() returns true\r | |
19 | iff the line was read from sys.stdin. Function nextfile() closes the\r | |
20 | current file so that the next iteration will read the first line from\r | |
21 | the next file (if any); lines not read from the file will not count\r | |
22 | towards the cumulative line count; the filename is not changed until\r | |
23 | after the first line of the next file has been read. Function close()\r | |
24 | closes the sequence.\r | |
25 | \r | |
26 | Before any lines have been read, filename() returns None and both line\r | |
27 | numbers are zero; nextfile() has no effect. After all lines have been\r | |
28 | read, filename() and the line number functions return the values\r | |
29 | pertaining to the last line read; nextfile() has no effect.\r | |
30 | \r | |
31 | All files are opened in text mode by default, you can override this by\r | |
32 | setting the mode parameter to input() or FileInput.__init__().\r | |
33 | If an I/O error occurs during opening or reading a file, the IOError\r | |
34 | exception is raised.\r | |
35 | \r | |
36 | If sys.stdin is used more than once, the second and further use will\r | |
37 | return no lines, except perhaps for interactive use, or if it has been\r | |
38 | explicitly reset (e.g. using sys.stdin.seek(0)).\r | |
39 | \r | |
40 | Empty files are opened and immediately closed; the only time their\r | |
41 | presence in the list of filenames is noticeable at all is when the\r | |
42 | last file opened is empty.\r | |
43 | \r | |
44 | It is possible that the last line of a file doesn't end in a newline\r | |
45 | character; otherwise lines are returned including the trailing\r | |
46 | newline.\r | |
47 | \r | |
48 | Class FileInput is the implementation; its methods filename(),\r | |
49 | lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()\r | |
50 | correspond to the functions in the module. In addition it has a\r | |
51 | readline() method which returns the next input line, and a\r | |
52 | __getitem__() method which implements the sequence behavior. The\r | |
53 | sequence must be accessed in strictly sequential order; sequence\r | |
54 | access and readline() cannot be mixed.\r | |
55 | \r | |
56 | Optional in-place filtering: if the keyword argument inplace=1 is\r | |
57 | passed to input() or to the FileInput constructor, the file is moved\r | |
58 | to a backup file and standard output is directed to the input file.\r | |
59 | This makes it possible to write a filter that rewrites its input file\r | |
60 | in place. If the keyword argument backup=".<some extension>" is also\r | |
61 | given, it specifies the extension for the backup file, and the backup\r | |
62 | file remains around; by default, the extension is ".bak" and it is\r | |
63 | deleted when the output file is closed. In-place filtering is\r | |
64 | disabled when standard input is read. XXX The current implementation\r | |
65 | does not work for MS-DOS 8+3 filesystems.\r | |
66 | \r | |
67 | Performance: this module is unfortunately one of the slower ways of\r | |
68 | processing large numbers of input lines. Nevertheless, a significant\r | |
69 | speed-up has been obtained by using readlines(bufsize) instead of\r | |
70 | readline(). A new keyword argument, bufsize=N, is present on the\r | |
71 | input() function and the FileInput() class to override the default\r | |
72 | buffer size.\r | |
73 | \r | |
74 | XXX Possible additions:\r | |
75 | \r | |
76 | - optional getopt argument processing\r | |
77 | - isatty()\r | |
78 | - read(), read(size), even readlines()\r | |
79 | \r | |
80 | """\r | |
81 | \r | |
82 | import sys, os\r | |
83 | \r | |
84 | __all__ = ["input","close","nextfile","filename","lineno","filelineno",\r | |
85 | "isfirstline","isstdin","FileInput"]\r | |
86 | \r | |
87 | _state = None\r | |
88 | \r | |
89 | DEFAULT_BUFSIZE = 8*1024\r | |
90 | \r | |
91 | def input(files=None, inplace=0, backup="", bufsize=0,\r | |
92 | mode="r", openhook=None):\r | |
93 | """Return an instance of the FileInput class, which can be iterated.\r | |
94 | \r | |
95 | The parameters are passed to the constructor of the FileInput class.\r | |
96 | The returned instance, in addition to being an iterator,\r | |
97 | keeps global state for the functions of this module,.\r | |
98 | """\r | |
99 | global _state\r | |
100 | if _state and _state._file:\r | |
101 | raise RuntimeError, "input() already active"\r | |
102 | _state = FileInput(files, inplace, backup, bufsize, mode, openhook)\r | |
103 | return _state\r | |
104 | \r | |
105 | def close():\r | |
106 | """Close the sequence."""\r | |
107 | global _state\r | |
108 | state = _state\r | |
109 | _state = None\r | |
110 | if state:\r | |
111 | state.close()\r | |
112 | \r | |
113 | def nextfile():\r | |
114 | """\r | |
115 | Close the current file so that the next iteration will read the first\r | |
116 | line from the next file (if any); lines not read from the file will\r | |
117 | not count towards the cumulative line count. The filename is not\r | |
118 | changed until after the first line of the next file has been read.\r | |
119 | Before the first line has been read, this function has no effect;\r | |
120 | it cannot be used to skip the first file. After the last line of the\r | |
121 | last file has been read, this function has no effect.\r | |
122 | """\r | |
123 | if not _state:\r | |
124 | raise RuntimeError, "no active input()"\r | |
125 | return _state.nextfile()\r | |
126 | \r | |
127 | def filename():\r | |
128 | """\r | |
129 | Return the name of the file currently being read.\r | |
130 | Before the first line has been read, returns None.\r | |
131 | """\r | |
132 | if not _state:\r | |
133 | raise RuntimeError, "no active input()"\r | |
134 | return _state.filename()\r | |
135 | \r | |
136 | def lineno():\r | |
137 | """\r | |
138 | Return the cumulative line number of the line that has just been read.\r | |
139 | Before the first line has been read, returns 0. After the last line\r | |
140 | of the last file has been read, returns the line number of that line.\r | |
141 | """\r | |
142 | if not _state:\r | |
143 | raise RuntimeError, "no active input()"\r | |
144 | return _state.lineno()\r | |
145 | \r | |
146 | def filelineno():\r | |
147 | """\r | |
148 | Return the line number in the current file. Before the first line\r | |
149 | has been read, returns 0. After the last line of the last file has\r | |
150 | been read, returns the line number of that line within the file.\r | |
151 | """\r | |
152 | if not _state:\r | |
153 | raise RuntimeError, "no active input()"\r | |
154 | return _state.filelineno()\r | |
155 | \r | |
156 | def fileno():\r | |
157 | """\r | |
158 | Return the file number of the current file. When no file is currently\r | |
159 | opened, returns -1.\r | |
160 | """\r | |
161 | if not _state:\r | |
162 | raise RuntimeError, "no active input()"\r | |
163 | return _state.fileno()\r | |
164 | \r | |
165 | def isfirstline():\r | |
166 | """\r | |
167 | Returns true the line just read is the first line of its file,\r | |
168 | otherwise returns false.\r | |
169 | """\r | |
170 | if not _state:\r | |
171 | raise RuntimeError, "no active input()"\r | |
172 | return _state.isfirstline()\r | |
173 | \r | |
174 | def isstdin():\r | |
175 | """\r | |
176 | Returns true if the last line was read from sys.stdin,\r | |
177 | otherwise returns false.\r | |
178 | """\r | |
179 | if not _state:\r | |
180 | raise RuntimeError, "no active input()"\r | |
181 | return _state.isstdin()\r | |
182 | \r | |
183 | class FileInput:\r | |
184 | """FileInput([files[, inplace[, backup[, bufsize[, mode[, openhook]]]]]])\r | |
185 | \r | |
186 | Class FileInput is the implementation of the module; its methods\r | |
187 | filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),\r | |
188 | nextfile() and close() correspond to the functions of the same name\r | |
189 | in the module.\r | |
190 | In addition it has a readline() method which returns the next\r | |
191 | input line, and a __getitem__() method which implements the\r | |
192 | sequence behavior. The sequence must be accessed in strictly\r | |
193 | sequential order; random access and readline() cannot be mixed.\r | |
194 | """\r | |
195 | \r | |
196 | def __init__(self, files=None, inplace=0, backup="", bufsize=0,\r | |
197 | mode="r", openhook=None):\r | |
198 | if isinstance(files, basestring):\r | |
199 | files = (files,)\r | |
200 | else:\r | |
201 | if files is None:\r | |
202 | files = sys.argv[1:]\r | |
203 | if not files:\r | |
204 | files = ('-',)\r | |
205 | else:\r | |
206 | files = tuple(files)\r | |
207 | self._files = files\r | |
208 | self._inplace = inplace\r | |
209 | self._backup = backup\r | |
210 | self._bufsize = bufsize or DEFAULT_BUFSIZE\r | |
211 | self._savestdout = None\r | |
212 | self._output = None\r | |
213 | self._filename = None\r | |
214 | self._lineno = 0\r | |
215 | self._filelineno = 0\r | |
216 | self._file = None\r | |
217 | self._isstdin = False\r | |
218 | self._backupfilename = None\r | |
219 | self._buffer = []\r | |
220 | self._bufindex = 0\r | |
221 | # restrict mode argument to reading modes\r | |
222 | if mode not in ('r', 'rU', 'U', 'rb'):\r | |
223 | raise ValueError("FileInput opening mode must be one of "\r | |
224 | "'r', 'rU', 'U' and 'rb'")\r | |
225 | self._mode = mode\r | |
226 | if inplace and openhook:\r | |
227 | raise ValueError("FileInput cannot use an opening hook in inplace mode")\r | |
228 | elif openhook and not hasattr(openhook, '__call__'):\r | |
229 | raise ValueError("FileInput openhook must be callable")\r | |
230 | self._openhook = openhook\r | |
231 | \r | |
232 | def __del__(self):\r | |
233 | self.close()\r | |
234 | \r | |
235 | def close(self):\r | |
236 | try:\r | |
237 | self.nextfile()\r | |
238 | finally:\r | |
239 | self._files = ()\r | |
240 | \r | |
241 | def __iter__(self):\r | |
242 | return self\r | |
243 | \r | |
244 | def next(self):\r | |
245 | try:\r | |
246 | line = self._buffer[self._bufindex]\r | |
247 | except IndexError:\r | |
248 | pass\r | |
249 | else:\r | |
250 | self._bufindex += 1\r | |
251 | self._lineno += 1\r | |
252 | self._filelineno += 1\r | |
253 | return line\r | |
254 | line = self.readline()\r | |
255 | if not line:\r | |
256 | raise StopIteration\r | |
257 | return line\r | |
258 | \r | |
259 | def __getitem__(self, i):\r | |
260 | if i != self._lineno:\r | |
261 | raise RuntimeError, "accessing lines out of order"\r | |
262 | try:\r | |
263 | return self.next()\r | |
264 | except StopIteration:\r | |
265 | raise IndexError, "end of input reached"\r | |
266 | \r | |
267 | def nextfile(self):\r | |
268 | savestdout = self._savestdout\r | |
269 | self._savestdout = 0\r | |
270 | if savestdout:\r | |
271 | sys.stdout = savestdout\r | |
272 | \r | |
273 | output = self._output\r | |
274 | self._output = 0\r | |
275 | try:\r | |
276 | if output:\r | |
277 | output.close()\r | |
278 | finally:\r | |
279 | file = self._file\r | |
280 | self._file = 0\r | |
281 | try:\r | |
282 | if file and not self._isstdin:\r | |
283 | file.close()\r | |
284 | finally:\r | |
285 | backupfilename = self._backupfilename\r | |
286 | self._backupfilename = 0\r | |
287 | if backupfilename and not self._backup:\r | |
288 | try: os.unlink(backupfilename)\r | |
289 | except OSError: pass\r | |
290 | \r | |
291 | self._isstdin = False\r | |
292 | self._buffer = []\r | |
293 | self._bufindex = 0\r | |
294 | \r | |
295 | def readline(self):\r | |
296 | try:\r | |
297 | line = self._buffer[self._bufindex]\r | |
298 | except IndexError:\r | |
299 | pass\r | |
300 | else:\r | |
301 | self._bufindex += 1\r | |
302 | self._lineno += 1\r | |
303 | self._filelineno += 1\r | |
304 | return line\r | |
305 | if not self._file:\r | |
306 | if not self._files:\r | |
307 | return ""\r | |
308 | self._filename = self._files[0]\r | |
309 | self._files = self._files[1:]\r | |
310 | self._filelineno = 0\r | |
311 | self._file = None\r | |
312 | self._isstdin = False\r | |
313 | self._backupfilename = 0\r | |
314 | if self._filename == '-':\r | |
315 | self._filename = '<stdin>'\r | |
316 | self._file = sys.stdin\r | |
317 | self._isstdin = True\r | |
318 | else:\r | |
319 | if self._inplace:\r | |
320 | self._backupfilename = (\r | |
321 | self._filename + (self._backup or os.extsep+"bak"))\r | |
322 | try: os.unlink(self._backupfilename)\r | |
323 | except os.error: pass\r | |
324 | # The next few lines may raise IOError\r | |
325 | os.rename(self._filename, self._backupfilename)\r | |
326 | self._file = open(self._backupfilename, self._mode)\r | |
327 | try:\r | |
328 | perm = os.fstat(self._file.fileno()).st_mode\r | |
329 | except OSError:\r | |
330 | self._output = open(self._filename, "w")\r | |
331 | else:\r | |
332 | fd = os.open(self._filename,\r | |
333 | os.O_CREAT | os.O_WRONLY | os.O_TRUNC,\r | |
334 | perm)\r | |
335 | self._output = os.fdopen(fd, "w")\r | |
336 | try:\r | |
337 | if hasattr(os, 'chmod'):\r | |
338 | os.chmod(self._filename, perm)\r | |
339 | except OSError:\r | |
340 | pass\r | |
341 | self._savestdout = sys.stdout\r | |
342 | sys.stdout = self._output\r | |
343 | else:\r | |
344 | # This may raise IOError\r | |
345 | if self._openhook:\r | |
346 | self._file = self._openhook(self._filename, self._mode)\r | |
347 | else:\r | |
348 | self._file = open(self._filename, self._mode)\r | |
349 | self._buffer = self._file.readlines(self._bufsize)\r | |
350 | self._bufindex = 0\r | |
351 | if not self._buffer:\r | |
352 | self.nextfile()\r | |
353 | # Recursive call\r | |
354 | return self.readline()\r | |
355 | \r | |
356 | def filename(self):\r | |
357 | return self._filename\r | |
358 | \r | |
359 | def lineno(self):\r | |
360 | return self._lineno\r | |
361 | \r | |
362 | def filelineno(self):\r | |
363 | return self._filelineno\r | |
364 | \r | |
365 | def fileno(self):\r | |
366 | if self._file:\r | |
367 | try:\r | |
368 | return self._file.fileno()\r | |
369 | except ValueError:\r | |
370 | return -1\r | |
371 | else:\r | |
372 | return -1\r | |
373 | \r | |
374 | def isfirstline(self):\r | |
375 | return self._filelineno == 1\r | |
376 | \r | |
377 | def isstdin(self):\r | |
378 | return self._isstdin\r | |
379 | \r | |
380 | \r | |
381 | def hook_compressed(filename, mode):\r | |
382 | ext = os.path.splitext(filename)[1]\r | |
383 | if ext == '.gz':\r | |
384 | import gzip\r | |
385 | return gzip.open(filename, mode)\r | |
386 | elif ext == '.bz2':\r | |
387 | import bz2\r | |
388 | return bz2.BZ2File(filename, mode)\r | |
389 | else:\r | |
390 | return open(filename, mode)\r | |
391 | \r | |
392 | \r | |
393 | def hook_encoded(encoding):\r | |
394 | import io\r | |
395 | def openhook(filename, mode):\r | |
396 | mode = mode.replace('U', '').replace('b', '') or 'r'\r | |
397 | return io.open(filename, mode, encoding=encoding, newline='')\r | |
398 | return openhook\r | |
399 | \r | |
400 | \r | |
401 | def _test():\r | |
402 | import getopt\r | |
403 | inplace = 0\r | |
404 | backup = 0\r | |
405 | opts, args = getopt.getopt(sys.argv[1:], "ib:")\r | |
406 | for o, a in opts:\r | |
407 | if o == '-i': inplace = 1\r | |
408 | if o == '-b': backup = a\r | |
409 | for line in input(args, inplace=inplace, backup=backup):\r | |
410 | if line[-1:] == '\n': line = line[:-1]\r | |
411 | if line[-1:] == '\r': line = line[:-1]\r | |
412 | print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),\r | |
413 | isfirstline() and "*" or "", line)\r | |
414 | print "%d: %s[%d]" % (lineno(), filename(), filelineno())\r | |
415 | \r | |
416 | if __name__ == '__main__':\r | |
417 | _test()\r |