+++ /dev/null
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.\r
-# Licensed to PSF under a Contributor Agreement.\r
-\r
-"""This module defines the data structures used to represent a grammar.\r
-\r
-These are a bit arcane because they are derived from the data\r
-structures used by Python's 'pgen' parser generator.\r
-\r
-There's also a table here mapping operators to their names in the\r
-token module; the Python tokenize module reports all operators as the\r
-fallback token code OP, but the parser needs the actual token code.\r
-\r
-"""\r
-\r
-# Python imports\r
-import pickle\r
-\r
-# Local imports\r
-from . import token, tokenize\r
-\r
-\r
-class Grammar(object):\r
- """Pgen parsing tables tables conversion class.\r
-\r
- Once initialized, this class supplies the grammar tables for the\r
- parsing engine implemented by parse.py. The parsing engine\r
- accesses the instance variables directly. The class here does not\r
- provide initialization of the tables; several subclasses exist to\r
- do this (see the conv and pgen modules).\r
-\r
- The load() method reads the tables from a pickle file, which is\r
- much faster than the other ways offered by subclasses. The pickle\r
- file is written by calling dump() (after loading the grammar\r
- tables using a subclass). The report() method prints a readable\r
- representation of the tables to stdout, for debugging.\r
-\r
- The instance variables are as follows:\r
-\r
- symbol2number -- a dict mapping symbol names to numbers. Symbol\r
- numbers are always 256 or higher, to distinguish\r
- them from token numbers, which are between 0 and\r
- 255 (inclusive).\r
-\r
- number2symbol -- a dict mapping numbers to symbol names;\r
- these two are each other's inverse.\r
-\r
- states -- a list of DFAs, where each DFA is a list of\r
- states, each state is is a list of arcs, and each\r
- arc is a (i, j) pair where i is a label and j is\r
- a state number. The DFA number is the index into\r
- this list. (This name is slightly confusing.)\r
- Final states are represented by a special arc of\r
- the form (0, j) where j is its own state number.\r
-\r
- dfas -- a dict mapping symbol numbers to (DFA, first)\r
- pairs, where DFA is an item from the states list\r
- above, and first is a set of tokens that can\r
- begin this grammar rule (represented by a dict\r
- whose values are always 1).\r
-\r
- labels -- a list of (x, y) pairs where x is either a token\r
- number or a symbol number, and y is either None\r
- or a string; the strings are keywords. The label\r
- number is the index in this list; label numbers\r
- are used to mark state transitions (arcs) in the\r
- DFAs.\r
-\r
- start -- the number of the grammar's start symbol.\r
-\r
- keywords -- a dict mapping keyword strings to arc labels.\r
-\r
- tokens -- a dict mapping token numbers to arc labels.\r
-\r
- """\r
-\r
- def __init__(self):\r
- self.symbol2number = {}\r
- self.number2symbol = {}\r
- self.states = []\r
- self.dfas = {}\r
- self.labels = [(0, "EMPTY")]\r
- self.keywords = {}\r
- self.tokens = {}\r
- self.symbol2label = {}\r
- self.start = 256\r
-\r
- def dump(self, filename):\r
- """Dump the grammar tables to a pickle file."""\r
- f = open(filename, "wb")\r
- pickle.dump(self.__dict__, f, 2)\r
- f.close()\r
-\r
- def load(self, filename):\r
- """Load the grammar tables from a pickle file."""\r
- f = open(filename, "rb")\r
- d = pickle.load(f)\r
- f.close()\r
- self.__dict__.update(d)\r
-\r
- def copy(self):\r
- """\r
- Copy the grammar.\r
- """\r
- new = self.__class__()\r
- for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",\r
- "tokens", "symbol2label"):\r
- setattr(new, dict_attr, getattr(self, dict_attr).copy())\r
- new.labels = self.labels[:]\r
- new.states = self.states[:]\r
- new.start = self.start\r
- return new\r
-\r
- def report(self):\r
- """Dump the grammar tables to standard output, for debugging."""\r
- from pprint import pprint\r
- print "s2n"\r
- pprint(self.symbol2number)\r
- print "n2s"\r
- pprint(self.number2symbol)\r
- print "states"\r
- pprint(self.states)\r
- print "dfas"\r
- pprint(self.dfas)\r
- print "labels"\r
- pprint(self.labels)\r
- print "start", self.start\r
-\r
-\r
-# Map from operator to number (since tokenize doesn't do this)\r
-\r
-opmap_raw = """\r
-( LPAR\r
-) RPAR\r
-[ LSQB\r
-] RSQB\r
-: COLON\r
-, COMMA\r
-; SEMI\r
-+ PLUS\r
-- MINUS\r
-* STAR\r
-/ SLASH\r
-| VBAR\r
-& AMPER\r
-< LESS\r
-> GREATER\r
-= EQUAL\r
-. DOT\r
-% PERCENT\r
-` BACKQUOTE\r
-{ LBRACE\r
-} RBRACE\r
-@ AT\r
-== EQEQUAL\r
-!= NOTEQUAL\r
-<> NOTEQUAL\r
-<= LESSEQUAL\r
->= GREATEREQUAL\r
-~ TILDE\r
-^ CIRCUMFLEX\r
-<< LEFTSHIFT\r
->> RIGHTSHIFT\r
-** DOUBLESTAR\r
-+= PLUSEQUAL\r
--= MINEQUAL\r
-*= STAREQUAL\r
-/= SLASHEQUAL\r
-%= PERCENTEQUAL\r
-&= AMPEREQUAL\r
-|= VBAREQUAL\r
-^= CIRCUMFLEXEQUAL\r
-<<= LEFTSHIFTEQUAL\r
->>= RIGHTSHIFTEQUAL\r
-**= DOUBLESTAREQUAL\r
-// DOUBLESLASH\r
-//= DOUBLESLASHEQUAL\r
--> RARROW\r
-"""\r
-\r
-opmap = {}\r
-for line in opmap_raw.splitlines():\r
- if line:\r
- op, name = line.split()\r
- opmap[op] = getattr(token, name)\r