+++ /dev/null
-\r
-/* Parser implementation */\r
-\r
-/* For a description, see the comments at end of this file */\r
-\r
-/* XXX To do: error recovery */\r
-\r
-#include "Python.h"\r
-#include "pgenheaders.h"\r
-#include "token.h"\r
-#include "grammar.h"\r
-#include "node.h"\r
-#include "parser.h"\r
-#include "errcode.h"\r
-\r
-\r
-#ifdef Py_DEBUG\r
-extern int Py_DebugFlag;\r
-#define D(x) if (!Py_DebugFlag); else x\r
-#else\r
-#define D(x)\r
-#endif\r
-\r
-\r
-/* STACK DATA TYPE */\r
-\r
-static void s_reset(stack *);\r
-\r
-static void\r
-s_reset(stack *s)\r
-{\r
- s->s_top = &s->s_base[MAXSTACK];\r
-}\r
-\r
-#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])\r
-\r
-static int\r
-s_push(register stack *s, dfa *d, node *parent)\r
-{\r
- register stackentry *top;\r
- if (s->s_top == s->s_base) {\r
- fprintf(stderr, "s_push: parser stack overflow\n");\r
- return E_NOMEM;\r
- }\r
- top = --s->s_top;\r
- top->s_dfa = d;\r
- top->s_parent = parent;\r
- top->s_state = 0;\r
- return 0;\r
-}\r
-\r
-#ifdef Py_DEBUG\r
-\r
-static void\r
-s_pop(register stack *s)\r
-{\r
- if (s_empty(s))\r
- Py_FatalError("s_pop: parser stack underflow -- FATAL");\r
- s->s_top++;\r
-}\r
-\r
-#else /* !Py_DEBUG */\r
-\r
-#define s_pop(s) (s)->s_top++\r
-\r
-#endif\r
-\r
-\r
-/* PARSER CREATION */\r
-\r
-parser_state *\r
-PyParser_New(grammar *g, int start)\r
-{\r
- parser_state *ps;\r
-\r
- if (!g->g_accel)\r
- PyGrammar_AddAccelerators(g);\r
- ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));\r
- if (ps == NULL)\r
- return NULL;\r
- ps->p_grammar = g;\r
-#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD\r
- ps->p_flags = 0;\r
-#endif\r
- ps->p_tree = PyNode_New(start);\r
- if (ps->p_tree == NULL) {\r
- PyMem_FREE(ps);\r
- return NULL;\r
- }\r
- s_reset(&ps->p_stack);\r
- (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);\r
- return ps;\r
-}\r
-\r
-void\r
-PyParser_Delete(parser_state *ps)\r
-{\r
- /* NB If you want to save the parse tree,\r
- you must set p_tree to NULL before calling delparser! */\r
- PyNode_Free(ps->p_tree);\r
- PyMem_FREE(ps);\r
-}\r
-\r
-\r
-/* PARSER STACK OPERATIONS */\r
-\r
-static int\r
-shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset)\r
-{\r
- int err;\r
- assert(!s_empty(s));\r
- err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);\r
- if (err)\r
- return err;\r
- s->s_top->s_state = newstate;\r
- return 0;\r
-}\r
-\r
-static int\r
-push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)\r
-{\r
- int err;\r
- register node *n;\r
- n = s->s_top->s_parent;\r
- assert(!s_empty(s));\r
- err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);\r
- if (err)\r
- return err;\r
- s->s_top->s_state = newstate;\r
- return s_push(s, d, CHILD(n, NCH(n)-1));\r
-}\r
-\r
-\r
-/* PARSER PROPER */\r
-\r
-static int\r
-classify(parser_state *ps, int type, char *str)\r
-{\r
- grammar *g = ps->p_grammar;\r
- register int n = g->g_ll.ll_nlabels;\r
-\r
- if (type == NAME) {\r
- register char *s = str;\r
- register label *l = g->g_ll.ll_label;\r
- register int i;\r
- for (i = n; i > 0; i--, l++) {\r
- if (l->lb_type != NAME || l->lb_str == NULL ||\r
- l->lb_str[0] != s[0] ||\r
- strcmp(l->lb_str, s) != 0)\r
- continue;\r
-#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD\r
- if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&\r
- s[0] == 'p' && strcmp(s, "print") == 0) {\r
- break; /* no longer a keyword */\r
- }\r
-#endif\r
- D(printf("It's a keyword\n"));\r
- return n - i;\r
- }\r
- }\r
-\r
- {\r
- register label *l = g->g_ll.ll_label;\r
- register int i;\r
- for (i = n; i > 0; i--, l++) {\r
- if (l->lb_type == type && l->lb_str == NULL) {\r
- D(printf("It's a token we know\n"));\r
- return n - i;\r
- }\r
- }\r
- }\r
-\r
- D(printf("Illegal token\n"));\r
- return -1;\r
-}\r
-\r
-#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD\r
-static void\r
-future_hack(parser_state *ps)\r
-{\r
- node *n = ps->p_stack.s_top->s_parent;\r
- node *ch, *cch;\r
- int i;\r
-\r
- /* from __future__ import ..., must have at least 4 children */\r
- n = CHILD(n, 0);\r
- if (NCH(n) < 4)\r
- return;\r
- ch = CHILD(n, 0);\r
- if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)\r
- return;\r
- ch = CHILD(n, 1);\r
- if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&\r
- strcmp(STR(CHILD(ch, 0)), "__future__") != 0)\r
- return;\r
- ch = CHILD(n, 3);\r
- /* ch can be a star, a parenthesis or import_as_names */\r
- if (TYPE(ch) == STAR)\r
- return;\r
- if (TYPE(ch) == LPAR)\r
- ch = CHILD(n, 4);\r
-\r
- for (i = 0; i < NCH(ch); i += 2) {\r
- cch = CHILD(ch, i);\r
- if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {\r
- char *str_ch = STR(CHILD(cch, 0));\r
- if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {\r
- ps->p_flags |= CO_FUTURE_WITH_STATEMENT;\r
- } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {\r
- ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;\r
- } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {\r
- ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;\r
- }\r
- }\r
- }\r
-}\r
-#endif /* future keyword */\r
-\r
-int\r
-PyParser_AddToken(register parser_state *ps, register int type, char *str,\r
- int lineno, int col_offset, int *expected_ret)\r
-{\r
- register int ilabel;\r
- int err;\r
-\r
- D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));\r
-\r
- /* Find out which label this token is */\r
- ilabel = classify(ps, type, str);\r
- if (ilabel < 0)\r
- return E_SYNTAX;\r
-\r
- /* Loop until the token is shifted or an error occurred */\r
- for (;;) {\r
- /* Fetch the current dfa and state */\r
- register dfa *d = ps->p_stack.s_top->s_dfa;\r
- register state *s = &d->d_state[ps->p_stack.s_top->s_state];\r
-\r
- D(printf(" DFA '%s', state %d:",\r
- d->d_name, ps->p_stack.s_top->s_state));\r
-\r
- /* Check accelerator */\r
- if (s->s_lower <= ilabel && ilabel < s->s_upper) {\r
- register int x = s->s_accel[ilabel - s->s_lower];\r
- if (x != -1) {\r
- if (x & (1<<7)) {\r
- /* Push non-terminal */\r
- int nt = (x >> 8) + NT_OFFSET;\r
- int arrow = x & ((1<<7)-1);\r
- dfa *d1 = PyGrammar_FindDFA(\r
- ps->p_grammar, nt);\r
- if ((err = push(&ps->p_stack, nt, d1,\r
- arrow, lineno, col_offset)) > 0) {\r
- D(printf(" MemError: push\n"));\r
- return err;\r
- }\r
- D(printf(" Push ...\n"));\r
- continue;\r
- }\r
-\r
- /* Shift the token */\r
- if ((err = shift(&ps->p_stack, type, str,\r
- x, lineno, col_offset)) > 0) {\r
- D(printf(" MemError: shift.\n"));\r
- return err;\r
- }\r
- D(printf(" Shift.\n"));\r
- /* Pop while we are in an accept-only state */\r
- while (s = &d->d_state\r
- [ps->p_stack.s_top->s_state],\r
- s->s_accept && s->s_narcs == 1) {\r
- D(printf(" DFA '%s', state %d: "\r
- "Direct pop.\n",\r
- d->d_name,\r
- ps->p_stack.s_top->s_state));\r
-#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD\r
- if (d->d_name[0] == 'i' &&\r
- strcmp(d->d_name,\r
- "import_stmt") == 0)\r
- future_hack(ps);\r
-#endif\r
- s_pop(&ps->p_stack);\r
- if (s_empty(&ps->p_stack)) {\r
- D(printf(" ACCEPT.\n"));\r
- return E_DONE;\r
- }\r
- d = ps->p_stack.s_top->s_dfa;\r
- }\r
- return E_OK;\r
- }\r
- }\r
-\r
- if (s->s_accept) {\r
-#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD\r
- if (d->d_name[0] == 'i' &&\r
- strcmp(d->d_name, "import_stmt") == 0)\r
- future_hack(ps);\r
-#endif\r
- /* Pop this dfa and try again */\r
- s_pop(&ps->p_stack);\r
- D(printf(" Pop ...\n"));\r
- if (s_empty(&ps->p_stack)) {\r
- D(printf(" Error: bottom of stack.\n"));\r
- return E_SYNTAX;\r
- }\r
- continue;\r
- }\r
-\r
- /* Stuck, report syntax error */\r
- D(printf(" Error.\n"));\r
- if (expected_ret) {\r
- if (s->s_lower == s->s_upper - 1) {\r
- /* Only one possible expected token */\r
- *expected_ret = ps->p_grammar->\r
- g_ll.ll_label[s->s_lower].lb_type;\r
- }\r
- else\r
- *expected_ret = -1;\r
- }\r
- return E_SYNTAX;\r
- }\r
-}\r
-\r
-\r
-#ifdef Py_DEBUG\r
-\r
-/* DEBUG OUTPUT */\r
-\r
-void\r
-dumptree(grammar *g, node *n)\r
-{\r
- int i;\r
-\r
- if (n == NULL)\r
- printf("NIL");\r
- else {\r
- label l;\r
- l.lb_type = TYPE(n);\r
- l.lb_str = STR(n);\r
- printf("%s", PyGrammar_LabelRepr(&l));\r
- if (ISNONTERMINAL(TYPE(n))) {\r
- printf("(");\r
- for (i = 0; i < NCH(n); i++) {\r
- if (i > 0)\r
- printf(",");\r
- dumptree(g, CHILD(n, i));\r
- }\r
- printf(")");\r
- }\r
- }\r
-}\r
-\r
-void\r
-showtree(grammar *g, node *n)\r
-{\r
- int i;\r
-\r
- if (n == NULL)\r
- return;\r
- if (ISNONTERMINAL(TYPE(n))) {\r
- for (i = 0; i < NCH(n); i++)\r
- showtree(g, CHILD(n, i));\r
- }\r
- else if (ISTERMINAL(TYPE(n))) {\r
- printf("%s", _PyParser_TokenNames[TYPE(n)]);\r
- if (TYPE(n) == NUMBER || TYPE(n) == NAME)\r
- printf("(%s)", STR(n));\r
- printf(" ");\r
- }\r
- else\r
- printf("? ");\r
-}\r
-\r
-void\r
-printtree(parser_state *ps)\r
-{\r
- if (Py_DebugFlag) {\r
- printf("Parse tree:\n");\r
- dumptree(ps->p_grammar, ps->p_tree);\r
- printf("\n");\r
- printf("Tokens:\n");\r
- showtree(ps->p_grammar, ps->p_tree);\r
- printf("\n");\r
- }\r
- printf("Listing:\n");\r
- PyNode_ListTree(ps->p_tree);\r
- printf("\n");\r
-}\r
-\r
-#endif /* Py_DEBUG */\r
-\r
-/*\r
-\r
-Description\r
------------\r
-\r
-The parser's interface is different than usual: the function addtoken()\r
-must be called for each token in the input. This makes it possible to\r
-turn it into an incremental parsing system later. The parsing system\r
-constructs a parse tree as it goes.\r
-\r
-A parsing rule is represented as a Deterministic Finite-state Automaton\r
-(DFA). A node in a DFA represents a state of the parser; an arc represents\r
-a transition. Transitions are either labeled with terminal symbols or\r
-with non-terminals. When the parser decides to follow an arc labeled\r
-with a non-terminal, it is invoked recursively with the DFA representing\r
-the parsing rule for that as its initial state; when that DFA accepts,\r
-the parser that invoked it continues. The parse tree constructed by the\r
-recursively called parser is inserted as a child in the current parse tree.\r
-\r
-The DFA's can be constructed automatically from a more conventional\r
-language description. An extended LL(1) grammar (ELL(1)) is suitable.\r
-Certain restrictions make the parser's life easier: rules that can produce\r
-the empty string should be outlawed (there are other ways to put loops\r
-or optional parts in the language). To avoid the need to construct\r
-FIRST sets, we can require that all but the last alternative of a rule\r
-(really: arc going out of a DFA's state) must begin with a terminal\r
-symbol.\r
-\r
-As an example, consider this grammar:\r
-\r
-expr: term (OP term)*\r
-term: CONSTANT | '(' expr ')'\r
-\r
-The DFA corresponding to the rule for expr is:\r
-\r
-------->.---term-->.------->\r
- ^ |\r
- | |\r
- \----OP----/\r
-\r
-The parse tree generated for the input a+b is:\r
-\r
-(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))\r
-\r
-*/\r