+++ /dev/null
-"""text_file\r
-\r
-provides the TextFile class, which gives an interface to text files\r
-that (optionally) takes care of stripping comments, ignoring blank\r
-lines, and joining lines with backslashes."""\r
-\r
-__revision__ = "$Id$"\r
-\r
-import sys\r
-\r
-\r
-class TextFile:\r
-\r
- """Provides a file-like object that takes care of all the things you\r
- commonly want to do when processing a text file that has some\r
- line-by-line syntax: strip comments (as long as "#" is your\r
- comment character), skip blank lines, join adjacent lines by\r
- escaping the newline (ie. backslash at end of line), strip\r
- leading and/or trailing whitespace. All of these are optional\r
- and independently controllable.\r
-\r
- Provides a 'warn()' method so you can generate warning messages that\r
- report physical line number, even if the logical line in question\r
- spans multiple physical lines. Also provides 'unreadline()' for\r
- implementing line-at-a-time lookahead.\r
-\r
- Constructor is called as:\r
-\r
- TextFile (filename=None, file=None, **options)\r
-\r
- It bombs (RuntimeError) if both 'filename' and 'file' are None;\r
- 'filename' should be a string, and 'file' a file object (or\r
- something that provides 'readline()' and 'close()' methods). It is\r
- recommended that you supply at least 'filename', so that TextFile\r
- can include it in warning messages. If 'file' is not supplied,\r
- TextFile creates its own using the 'open()' builtin.\r
-\r
- The options are all boolean, and affect the value returned by\r
- 'readline()':\r
- strip_comments [default: true]\r
- strip from "#" to end-of-line, as well as any whitespace\r
- leading up to the "#" -- unless it is escaped by a backslash\r
- lstrip_ws [default: false]\r
- strip leading whitespace from each line before returning it\r
- rstrip_ws [default: true]\r
- strip trailing whitespace (including line terminator!) from\r
- each line before returning it\r
- skip_blanks [default: true}\r
- skip lines that are empty *after* stripping comments and\r
- whitespace. (If both lstrip_ws and rstrip_ws are false,\r
- then some lines may consist of solely whitespace: these will\r
- *not* be skipped, even if 'skip_blanks' is true.)\r
- join_lines [default: false]\r
- if a backslash is the last non-newline character on a line\r
- after stripping comments and whitespace, join the following line\r
- to it to form one "logical line"; if N consecutive lines end\r
- with a backslash, then N+1 physical lines will be joined to\r
- form one logical line.\r
- collapse_join [default: false]\r
- strip leading whitespace from lines that are joined to their\r
- predecessor; only matters if (join_lines and not lstrip_ws)\r
-\r
- Note that since 'rstrip_ws' can strip the trailing newline, the\r
- semantics of 'readline()' must differ from those of the builtin file\r
- object's 'readline()' method! In particular, 'readline()' returns\r
- None for end-of-file: an empty string might just be a blank line (or\r
- an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is\r
- not."""\r
-\r
- default_options = { 'strip_comments': 1,\r
- 'skip_blanks': 1,\r
- 'lstrip_ws': 0,\r
- 'rstrip_ws': 1,\r
- 'join_lines': 0,\r
- 'collapse_join': 0,\r
- }\r
-\r
- def __init__ (self, filename=None, file=None, **options):\r
- """Construct a new TextFile object. At least one of 'filename'\r
- (a string) and 'file' (a file-like object) must be supplied.\r
- They keyword argument options are described above and affect\r
- the values returned by 'readline()'."""\r
-\r
- if filename is None and file is None:\r
- raise RuntimeError, \\r
- "you must supply either or both of 'filename' and 'file'"\r
-\r
- # set values for all options -- either from client option hash\r
- # or fallback to default_options\r
- for opt in self.default_options.keys():\r
- if opt in options:\r
- setattr (self, opt, options[opt])\r
-\r
- else:\r
- setattr (self, opt, self.default_options[opt])\r
-\r
- # sanity check client option hash\r
- for opt in options.keys():\r
- if opt not in self.default_options:\r
- raise KeyError, "invalid TextFile option '%s'" % opt\r
-\r
- if file is None:\r
- self.open (filename)\r
- else:\r
- self.filename = filename\r
- self.file = file\r
- self.current_line = 0 # assuming that file is at BOF!\r
-\r
- # 'linebuf' is a stack of lines that will be emptied before we\r
- # actually read from the file; it's only populated by an\r
- # 'unreadline()' operation\r
- self.linebuf = []\r
-\r
-\r
- def open (self, filename):\r
- """Open a new file named 'filename'. This overrides both the\r
- 'filename' and 'file' arguments to the constructor."""\r
-\r
- self.filename = filename\r
- self.file = open (self.filename, 'r')\r
- self.current_line = 0\r
-\r
-\r
- def close (self):\r
- """Close the current file and forget everything we know about it\r
- (filename, current line number)."""\r
-\r
- self.file.close ()\r
- self.file = None\r
- self.filename = None\r
- self.current_line = None\r
-\r
-\r
- def gen_error (self, msg, line=None):\r
- outmsg = []\r
- if line is None:\r
- line = self.current_line\r
- outmsg.append(self.filename + ", ")\r
- if isinstance(line, (list, tuple)):\r
- outmsg.append("lines %d-%d: " % tuple (line))\r
- else:\r
- outmsg.append("line %d: " % line)\r
- outmsg.append(str(msg))\r
- return ''.join(outmsg)\r
-\r
-\r
- def error (self, msg, line=None):\r
- raise ValueError, "error: " + self.gen_error(msg, line)\r
-\r
- def warn (self, msg, line=None):\r
- """Print (to stderr) a warning message tied to the current logical\r
- line in the current file. If the current logical line in the\r
- file spans multiple physical lines, the warning refers to the\r
- whole range, eg. "lines 3-5". If 'line' supplied, it overrides\r
- the current line number; it may be a list or tuple to indicate a\r
- range of physical lines, or an integer for a single physical\r
- line."""\r
- sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")\r
-\r
-\r
- def readline (self):\r
- """Read and return a single logical line from the current file (or\r
- from an internal buffer if lines have previously been "unread"\r
- with 'unreadline()'). If the 'join_lines' option is true, this\r
- may involve reading multiple physical lines concatenated into a\r
- single string. Updates the current line number, so calling\r
- 'warn()' after 'readline()' emits a warning about the physical\r
- line(s) just read. Returns None on end-of-file, since the empty\r
- string can occur if 'rstrip_ws' is true but 'strip_blanks' is\r
- not."""\r
-\r
- # If any "unread" lines waiting in 'linebuf', return the top\r
- # one. (We don't actually buffer read-ahead data -- lines only\r
- # get put in 'linebuf' if the client explicitly does an\r
- # 'unreadline()'.\r
- if self.linebuf:\r
- line = self.linebuf[-1]\r
- del self.linebuf[-1]\r
- return line\r
-\r
- buildup_line = ''\r
-\r
- while 1:\r
- # read the line, make it None if EOF\r
- line = self.file.readline()\r
- if line == '': line = None\r
-\r
- if self.strip_comments and line:\r
-\r
- # Look for the first "#" in the line. If none, never\r
- # mind. If we find one and it's the first character, or\r
- # is not preceded by "\", then it starts a comment --\r
- # strip the comment, strip whitespace before it, and\r
- # carry on. Otherwise, it's just an escaped "#", so\r
- # unescape it (and any other escaped "#"'s that might be\r
- # lurking in there) and otherwise leave the line alone.\r
-\r
- pos = line.find("#")\r
- if pos == -1: # no "#" -- no comments\r
- pass\r
-\r
- # It's definitely a comment -- either "#" is the first\r
- # character, or it's elsewhere and unescaped.\r
- elif pos == 0 or line[pos-1] != "\\":\r
- # Have to preserve the trailing newline, because it's\r
- # the job of a later step (rstrip_ws) to remove it --\r
- # and if rstrip_ws is false, we'd better preserve it!\r
- # (NB. this means that if the final line is all comment\r
- # and has no trailing newline, we will think that it's\r
- # EOF; I think that's OK.)\r
- eol = (line[-1] == '\n') and '\n' or ''\r
- line = line[0:pos] + eol\r
-\r
- # If all that's left is whitespace, then skip line\r
- # *now*, before we try to join it to 'buildup_line' --\r
- # that way constructs like\r
- # hello \\\r
- # # comment that should be ignored\r
- # there\r
- # result in "hello there".\r
- if line.strip() == "":\r
- continue\r
-\r
- else: # it's an escaped "#"\r
- line = line.replace("\\#", "#")\r
-\r
-\r
- # did previous line end with a backslash? then accumulate\r
- if self.join_lines and buildup_line:\r
- # oops: end of file\r
- if line is None:\r
- self.warn ("continuation line immediately precedes "\r
- "end-of-file")\r
- return buildup_line\r
-\r
- if self.collapse_join:\r
- line = line.lstrip()\r
- line = buildup_line + line\r
-\r
- # careful: pay attention to line number when incrementing it\r
- if isinstance(self.current_line, list):\r
- self.current_line[1] = self.current_line[1] + 1\r
- else:\r
- self.current_line = [self.current_line,\r
- self.current_line+1]\r
- # just an ordinary line, read it as usual\r
- else:\r
- if line is None: # eof\r
- return None\r
-\r
- # still have to be careful about incrementing the line number!\r
- if isinstance(self.current_line, list):\r
- self.current_line = self.current_line[1] + 1\r
- else:\r
- self.current_line = self.current_line + 1\r
-\r
-\r
- # strip whitespace however the client wants (leading and\r
- # trailing, or one or the other, or neither)\r
- if self.lstrip_ws and self.rstrip_ws:\r
- line = line.strip()\r
- elif self.lstrip_ws:\r
- line = line.lstrip()\r
- elif self.rstrip_ws:\r
- line = line.rstrip()\r
-\r
- # blank line (whether we rstrip'ed or not)? skip to next line\r
- # if appropriate\r
- if (line == '' or line == '\n') and self.skip_blanks:\r
- continue\r
-\r
- if self.join_lines:\r
- if line[-1] == '\\':\r
- buildup_line = line[:-1]\r
- continue\r
-\r
- if line[-2:] == '\\\n':\r
- buildup_line = line[0:-2] + '\n'\r
- continue\r
-\r
- # well, I guess there's some actual content there: return it\r
- return line\r
-\r
- # readline ()\r
-\r
-\r
- def readlines (self):\r
- """Read and return the list of all logical lines remaining in the\r
- current file."""\r
-\r
- lines = []\r
- while 1:\r
- line = self.readline()\r
- if line is None:\r
- return lines\r
- lines.append (line)\r
-\r
-\r
- def unreadline (self, line):\r
- """Push 'line' (a string) onto an internal buffer that will be\r
- checked by future 'readline()' calls. Handy for implementing\r
- a parser with line-at-a-time lookahead."""\r
-\r
- self.linebuf.append (line)\r