src/libsyntax/parse/mod.rs

   1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! The main parser interface
  12
  13
  14 use ast::node_id;
  15 use ast;
  16 use codemap::{span, CodeMap, FileMap, FileSubstr};
  17 use codemap;
  18 use diagnostic::{span_handler, mk_span_handler, mk_handler, Emitter};
  19 use parse::attr::parser_attr;
  20 use parse::lexer::reader;
  21 use parse::parser::Parser;
  22
  23 use std::io;
  24 use std::path::Path;
  25
  26 pub mod lexer;
  27 pub mod parser;
  28 pub mod token;
  29 pub mod comments;
  30 pub mod attr;
  31
  32
  33 /// Common routines shared by parser mods
  34 pub mod common;
  35
  36 /// Routines the parser uses to classify AST nodes
  37 pub mod classify;
  38
  39 /// Reporting obsolete syntax
  40 pub mod obsolete;
  41
  42 // info about a parsing session.
  43 pub struct ParseSess {
  44     cm: @codemap::CodeMap, // better be the same as the one in the reader!
  45     next_id: node_id,
  46     span_diagnostic: @span_handler, // better be the same as the one in the reader!
  47 }
  48
  49 pub fn new_parse_sess(demitter: Option<Emitter>) -> @mut ParseSess {
  50     let cm = @CodeMap::new();
  51     @mut ParseSess {
  52         cm: cm,
  53         next_id: 1,
  54         span_diagnostic: mk_span_handler(mk_handler(demitter), cm),
  55     }
  56 }
  57
  58 pub fn new_parse_sess_special_handler(sh: @span_handler,
  59                                       cm: @codemap::CodeMap)
  60                                    -> @mut ParseSess {
  61     @mut ParseSess {
  62         cm: cm,
  63         next_id: 1,
  64         span_diagnostic: sh,
  65     }
  66 }
  67
  68 // a bunch of utility functions of the form parse_<thing>_from_<source>
  69 // where <thing> includes crate, expr, item, stmt, tts, and one that
  70 // uses a HOF to parse anything, and <source> includes file and
  71 // source_str.
  72
  73 pub fn parse_crate_from_file(
  74     input: &Path,
  75     cfg: ast::crate_cfg,
  76     sess: @mut ParseSess
  77 ) -> @ast::crate {
  78     new_parser_from_file(sess, /*bad*/ copy cfg, input).parse_crate_mod()
  79     // why is there no p.abort_if_errors here?
  80 }
  81
  82 pub fn parse_crate_from_source_str(
  83     name: @str,
  84     source: @str,
  85     cfg: ast::crate_cfg,
  86     sess: @mut ParseSess
  87 ) -> @ast::crate {
  88     let p = new_parser_from_source_str(
  89         sess,
  90         /*bad*/ copy cfg,
  91         name,
  92         source
  93     );
  94     maybe_aborted(p.parse_crate_mod(),p)
  95 }
  96
  97 pub fn parse_expr_from_source_str(
  98     name: @str,
  99     source: @str,
 100     cfg: ast::crate_cfg,
 101     sess: @mut ParseSess
 102 ) -> @ast::expr {
 103     let p = new_parser_from_source_str(
 104         sess,
 105         cfg,
 106         name,
 107         source
 108     );
 109     maybe_aborted(p.parse_expr(), p)
 110 }
 111
 112 pub fn parse_item_from_source_str(
 113     name: @str,
 114     source: @str,
 115     cfg: ast::crate_cfg,
 116     attrs: ~[ast::attribute],
 117     sess: @mut ParseSess
 118 ) -> Option<@ast::item> {
 119     let p = new_parser_from_source_str(
 120         sess,
 121         cfg,
 122         name,
 123         source
 124     );
 125     maybe_aborted(p.parse_item(attrs),p)
 126 }
 127
 128 pub fn parse_meta_from_source_str(
 129     name: @str,
 130     source: @str,
 131     cfg: ast::crate_cfg,
 132     sess: @mut ParseSess
 133 ) -> @ast::meta_item {
 134     let p = new_parser_from_source_str(
 135         sess,
 136         cfg,
 137         name,
 138         source
 139     );
 140     maybe_aborted(p.parse_meta_item(),p)
 141 }
 142
 143 pub fn parse_stmt_from_source_str(
 144     name: @str,
 145     source: @str,
 146     cfg: ast::crate_cfg,
 147     attrs: ~[ast::attribute],
 148     sess: @mut ParseSess
 149 ) -> @ast::stmt {
 150     let p = new_parser_from_source_str(
 151         sess,
 152         cfg,
 153         name,
 154         source
 155     );
 156     maybe_aborted(p.parse_stmt(attrs),p)
 157 }
 158
 159 pub fn parse_tts_from_source_str(
 160     name: @str,
 161     source: @str,
 162     cfg: ast::crate_cfg,
 163     sess: @mut ParseSess
 164 ) -> ~[ast::token_tree] {
 165     let p = new_parser_from_source_str(
 166         sess,
 167         cfg,
 168         name,
 169         source
 170     );
 171     *p.quote_depth += 1u;
 172     // right now this is re-creating the token trees from ... token trees.
 173     maybe_aborted(p.parse_all_token_trees(),p)
 174 }
 175
 176 // given a function and parsing information (source str,
 177 // filename, crate cfg, and sess), create a parser,
 178 // apply the function, and check that the parser
 179 // consumed all of the input before returning the function's
 180 // result.
 181 pub fn parse_from_source_str<T>(
 182     f: &fn(&Parser) -> T,
 183     name: @str, ss: codemap::FileSubstr,
 184     source: @str,
 185     cfg: ast::crate_cfg,
 186     sess: @mut ParseSess
 187 ) -> T {
 188     let p = new_parser_from_source_substr(
 189         sess,
 190         cfg,
 191         name,
 192         ss,
 193         source
 194     );
 195     let r = f(&p);
 196     if !p.reader.is_eof() {
 197         p.reader.fatal(~"expected end-of-string");
 198     }
 199     maybe_aborted(r,p)
 200 }
 201
 202 // return the next unused node id.
 203 pub fn next_node_id(sess: @mut ParseSess) -> node_id {
 204     let rv = sess.next_id;
 205     sess.next_id += 1;
 206     // ID 0 is reserved for the crate and doesn't actually exist in the AST
 207     assert!(rv != 0);
 208     return rv;
 209 }
 210
 211 // Create a new parser from a source string
 212 pub fn new_parser_from_source_str(sess: @mut ParseSess,
 213                                   cfg: ast::crate_cfg,
 214                                   name: @str,
 215                                   source: @str)
 216                                -> Parser {
 217     filemap_to_parser(sess,string_to_filemap(sess,source,name),cfg)
 218 }
 219
 220 // Create a new parser from a source string where the origin
 221 // is specified as a substring of another file.
 222 pub fn new_parser_from_source_substr(sess: @mut ParseSess,
 223                                   cfg: ast::crate_cfg,
 224                                   name: @str,
 225                                   ss: codemap::FileSubstr,
 226                                   source: @str)
 227                                -> Parser {
 228     filemap_to_parser(sess,substring_to_filemap(sess,source,name,ss),cfg)
 229 }
 230
 231 /// Create a new parser, handling errors as appropriate
 232 /// if the file doesn't exist
 233 pub fn new_parser_from_file(
 234     sess: @mut ParseSess,
 235     cfg: ast::crate_cfg,
 236     path: &Path
 237 ) -> Parser {
 238     filemap_to_parser(sess,file_to_filemap(sess,path,None),cfg)
 239 }
 240
 241 /// Given a session, a crate config, a path, and a span, add
 242 /// the file at the given path to the codemap, and return a parser.
 243 /// On an error, use the given span as the source of the problem.
 244 pub fn new_sub_parser_from_file(
 245     sess: @mut ParseSess,
 246     cfg: ast::crate_cfg,
 247     path: &Path,
 248     sp: span
 249 ) -> Parser {
 250     filemap_to_parser(sess,file_to_filemap(sess,path,Some(sp)),cfg)
 251 }
 252
 253 /// Given a filemap and config, return a parser
 254 pub fn filemap_to_parser(sess: @mut ParseSess,
 255                          filemap: @FileMap,
 256                          cfg: ast::crate_cfg) -> Parser {
 257     tts_to_parser(sess,filemap_to_tts(sess,filemap),cfg)
 258 }
 259
 260 // must preserve old name for now, because quote! from the *existing*
 261 // compiler expands into it
 262 pub fn new_parser_from_tts(sess: @mut ParseSess,
 263                      cfg: ast::crate_cfg,
 264                      tts: ~[ast::token_tree]) -> Parser {
 265     tts_to_parser(sess,tts,cfg)
 266 }
 267
 268
 269 // base abstractions
 270
 271 /// Given a session and a path and an optional span (for error reporting),
 272 /// add the path to the session's codemap and return the new filemap.
 273 pub fn file_to_filemap(sess: @mut ParseSess, path: &Path, spanopt: Option<span>)
 274     -> @FileMap {
 275     match io::read_whole_file_str(path) {
 276         Ok(src) => string_to_filemap(sess, src.to_managed(), path.to_str().to_managed()),
 277         Err(e) => {
 278             match spanopt {
 279                 Some(span) => sess.span_diagnostic.span_fatal(span, e),
 280                 None => sess.span_diagnostic.handler().fatal(e)
 281             }
 282         }
 283     }
 284 }
 285
 286 // given a session and a string, add the string to
 287 // the session's codemap and return the new filemap
 288 pub fn string_to_filemap(sess: @mut ParseSess, source: @str, path: @str)
 289     -> @FileMap {
 290     sess.cm.new_filemap(path, source)
 291 }
 292
 293 // given a session and a string and a path and a FileSubStr, add
 294 // the string to the CodeMap and return the new FileMap
 295 pub fn substring_to_filemap(sess: @mut ParseSess, source: @str, path: @str,
 296                            filesubstr: FileSubstr) -> @FileMap {
 297     sess.cm.new_filemap_w_substr(path,filesubstr,source)
 298 }
 299
 300 // given a filemap, produce a sequence of token-trees
 301 pub fn filemap_to_tts(sess: @mut ParseSess, filemap: @FileMap)
 302     -> ~[ast::token_tree] {
 303     // it appears to me that the cfg doesn't matter here... indeed,
 304     // parsing tt's probably shouldn't require a parser at all.
 305     let cfg = ~[];
 306     let srdr = lexer::new_string_reader(copy sess.span_diagnostic, filemap);
 307     let p1 = Parser(sess, cfg, srdr as @reader);
 308     p1.parse_all_token_trees()
 309 }
 310
 311 // given tts and cfg, produce a parser
 312 pub fn tts_to_parser(sess: @mut ParseSess,
 313                      tts: ~[ast::token_tree],
 314                      cfg: ast::crate_cfg) -> Parser {
 315     let trdr = lexer::new_tt_reader(
 316         copy sess.span_diagnostic,
 317         None,
 318         tts
 319     );
 320     Parser(sess, cfg, trdr as @reader)
 321 }
 322
 323 // abort if necessary
 324 pub fn maybe_aborted<T>(result : T, p: Parser) -> T {
 325     p.abort_if_errors();
 326     result
 327 }
 328
 329
 330
 331 #[cfg(test)]
 332 mod test {
 333     use super::*;
 334     use extra::serialize::Encodable;
 335     use extra;
 336     use std::io;
 337     use codemap::{span, BytePos, spanned};
 338     use opt_vec;
 339     use ast;
 340     use abi;
 341     use parse::parser::Parser;
 342     use parse::token::{str_to_ident};
 343     use util::parser_testing::{string_to_tts_and_sess, string_to_parser};
 344     use util::parser_testing::{string_to_expr, string_to_item};
 345     use util::parser_testing::{string_to_stmt, strs_to_idents};
 346
 347     // map a string to tts, return the tt without its parsesess
 348     fn string_to_tts_only(source_str : @str) -> ~[ast::token_tree] {
 349         let (tts,_ps) = string_to_tts_and_sess(source_str);
 350         tts
 351     }
 352
 353
 354     #[cfg(test)] fn to_json_str<E : Encodable<extra::json::Encoder>>(val: @E) -> ~str {
 355         do io::with_str_writer |writer| {
 356             let mut encoder = extra::json::Encoder(writer);
 357             val.encode(&mut encoder);
 358         }
 359     }
 360
 361     // produce a codemap::span
 362     fn sp (a: uint, b: uint) -> span {
 363         span{lo:BytePos(a),hi:BytePos(b),expn_info:None}
 364     }
 365
 366     #[test] fn path_exprs_1 () {
 367         assert_eq!(string_to_expr(@"a"),
 368                    @ast::expr{id:1,
 369                               node:ast::expr_path(@ast::Path {span:sp(0,1),
 370                                                               global:false,
 371                                                               idents:~[str_to_ident("a")],
 372                                                               rp:None,
 373                                                               types:~[]}),
 374                               span:sp(0,1)})
 375     }
 376
 377     #[test] fn path_exprs_2 () {
 378         assert_eq!(string_to_expr(@"::a::b"),
 379                    @ast::expr{id:1,
 380                                node:ast::expr_path(
 381                                    @ast::Path {span:sp(0,6),
 382                                                global:true,
 383                                                idents:strs_to_idents(~["a","b"]),
 384                                                rp:None,
 385                                                types:~[]}),
 386                               span:sp(0,6)})
 387     }
 388
 389     // FIXME (#6416): For some reason, this fails and causes a test failure, even though it's
 390     // marked as `#[should_fail]`.
 391     /*#[should_fail]
 392     #[test] fn bad_path_expr_1() {
 393         string_to_expr(@"::abc::def::return");
 394     }*/
 395
 396     #[test] fn string_to_tts_1 () {
 397         let (tts,_ps) = string_to_tts_and_sess(@"fn a (b : int) { b; }");
 398         assert_eq!(to_json_str(@tts),
 399                    ~"[\
 400                 [\"tt_tok\",null,[\"IDENT\",\"fn\",false]],\
 401                 [\"tt_tok\",null,[\"IDENT\",\"a\",false]],\
 402                 [\
 403                     \"tt_delim\",\
 404                     [\
 405                         [\"tt_tok\",null,\"LPAREN\"],\
 406                         [\"tt_tok\",null,[\"IDENT\",\"b\",false]],\
 407                         [\"tt_tok\",null,\"COLON\"],\
 408                         [\"tt_tok\",null,[\"IDENT\",\"int\",false]],\
 409                         [\"tt_tok\",null,\"RPAREN\"]\
 410                     ]\
 411                 ],\
 412                 [\
 413                     \"tt_delim\",\
 414                     [\
 415                         [\"tt_tok\",null,\"LBRACE\"],\
 416                         [\"tt_tok\",null,[\"IDENT\",\"b\",false]],\
 417                         [\"tt_tok\",null,\"SEMI\"],\
 418                         [\"tt_tok\",null,\"RBRACE\"]\
 419                     ]\
 420                 ]\
 421             ]"
 422                   );
 423     }
 424
 425     #[test] fn ret_expr() {
 426         assert_eq!(string_to_expr(@"return d"),
 427                    @ast::expr{id:2,
 428                               node:ast::expr_ret(
 429                                   Some(@ast::expr{id:1,
 430                                                   node:ast::expr_path(
 431                                                       @ast::Path{span:sp(7,8),
 432                                                                  global:false,
 433                                                                  idents:~[str_to_ident("d")],
 434                                                                  rp:None,
 435                                                                  types:~[]
 436                                                                 }),
 437                                                   span:sp(7,8)})),
 438                               span:sp(0,8)})
 439     }
 440
 441     #[test] fn parse_stmt_1 () {
 442         assert_eq!(string_to_stmt(@"b;"),
 443                    @spanned{
 444                        node: ast::stmt_expr(@ast::expr{
 445                            id: 1,
 446                            node: ast::expr_path(
 447                                @ast::Path{
 448                                    span:sp(0,1),
 449                                    global:false,
 450                                    idents:~[str_to_ident("b")],
 451                                    rp:None,
 452                                    types: ~[]}),
 453                            span: sp(0,1)},
 454                                             2), // fixme
 455                        span: sp(0,1)})
 456
 457     }
 458
 459     fn parser_done(p: Parser){
 460         assert_eq!(copy *p.token,token::EOF);
 461     }
 462
 463     #[test] fn parse_ident_pat () {
 464         let parser = string_to_parser(@"b");
 465         assert_eq!(parser.parse_pat(),
 466                    @ast::pat{id:1, // fixme
 467                              node: ast::pat_ident(ast::bind_infer,
 468                                                   @ast::Path{
 469                                                       span:sp(0,1),
 470                                                       global:false,
 471                                                       idents:~[str_to_ident("b")],
 472                                                       rp: None,
 473                                                       types: ~[]},
 474                                                   None // no idea
 475                                                  ),
 476                              span: sp(0,1)});
 477         parser_done(parser);
 478     }
 479
 480     #[test] fn parse_arg () {
 481         let parser = string_to_parser(@"b : int");
 482         assert_eq!(parser.parse_arg_general(true),
 483                    ast::arg{
 484                        is_mutbl: false,
 485                        ty: @ast::Ty{id:3, // fixme
 486                                     node: ast::ty_path(@ast::Path{
 487                                         span:sp(4,4), // this is bizarre...
 488                                         // check this in the original parser?
 489                                         global:false,
 490                                         idents:~[str_to_ident("int")],
 491                                         rp: None,
 492                                         types: ~[]},
 493                                                        @None, 2),
 494                                     span:sp(4,7)},
 495                        pat: @ast::pat{id:1,
 496                                       node: ast::pat_ident(ast::bind_infer,
 497                                                            @ast::Path{
 498                                                                span:sp(0,1),
 499                                                                global:false,
 500                                                                idents:~[str_to_ident("b")],
 501                                                                rp: None,
 502                                                                types: ~[]},
 503                                                            None // no idea
 504                                                           ),
 505                                       span: sp(0,1)},
 506                        id: 4 // fixme
 507                    })
 508     }
 509
 510     // check the contents of the tt manually:
 511     #[test] fn parse_fundecl () {
 512         // this test depends on the intern order of "fn" and "int", and on the
 513         // assignment order of the node_ids.
 514         assert_eq!(string_to_item(@"fn a (b : int) { b; }"),
 515                   Some(
 516                       @ast::item{ident:str_to_ident("a"),
 517                             attrs:~[],
 518                             id: 9, // fixme
 519                             node: ast::item_fn(ast::fn_decl{
 520                                 inputs: ~[ast::arg{
 521                                     is_mutbl: false,
 522                                     ty: @ast::Ty{id:3, // fixme
 523                                                 node: ast::ty_path(@ast::Path{
 524                                         span:sp(10,13),
 525                                         global:false,
 526                                         idents:~[str_to_ident("int")],
 527                                         rp: None,
 528                                         types: ~[]},
 529                                                        @None, 2),
 530                                                 span:sp(10,13)},
 531                                     pat: @ast::pat{id:1, // fixme
 532                                                    node: ast::pat_ident(
 533                                                        ast::bind_infer,
 534                                                        @ast::Path{
 535                                                            span:sp(6,7),
 536                                                            global:false,
 537                                                            idents:~[str_to_ident("b")],
 538                                                            rp: None,
 539                                                            types: ~[]},
 540                                                        None // no idea
 541                                                    ),
 542                                                   span: sp(6,7)},
 543                                     id: 4 // fixme
 544                                 }],
 545                                 output: @ast::Ty{id:5, // fixme
 546                                                  node: ast::ty_nil,
 547                                                  span:sp(15,15)}, // not sure
 548                                 cf: ast::return_val
 549                             },
 550                                     ast::impure_fn,
 551                                     abi::AbiSet::Rust(),
 552                                     ast::Generics{ // no idea on either of these:
 553                                         lifetimes: opt_vec::Empty,
 554                                         ty_params: opt_vec::Empty,
 555                                     },
 556                                     spanned{
 557                                         span: sp(15,21),
 558                                         node: ast::blk_{
 559                                             view_items: ~[],
 560                                             stmts: ~[@spanned{
 561                                                 node: ast::stmt_semi(@ast::expr{
 562                                                     id: 6,
 563                                                     node: ast::expr_path(
 564                                                         @ast::Path{
 565                                                             span:sp(17,18),
 566                                                             global:false,
 567                                                             idents:~[str_to_ident("b")],
 568                                                             rp:None,
 569                                                             types: ~[]}),
 570                                                     span: sp(17,18)},
 571                                                                      7), // fixme
 572                                                 span: sp(17,18)}],
 573                                             expr: None,
 574                                             id: 8, // fixme
 575                                             rules: ast::default_blk // no idea
 576                                         }}),
 577                             vis: ast::inherited,
 578                             span: sp(0,21)}));
 579     }
 580
 581
 582     #[test] fn parse_exprs () {
 583         // just make sure that they parse....
 584         string_to_expr(@"3 + 4");
 585         string_to_expr(@"a::z.froob(b,@(987+3))");
 586     }
 587
 588     #[test] fn attrs_fix_bug () {
 589         string_to_item(@"pub fn mk_file_writer(path: &Path, flags: &[FileFlag])
 590                    -> Result<@Writer, ~str> {
 591     #[cfg(windows)]
 592     fn wb() -> c_int {
 593       (O_WRONLY | libc::consts::os::extra::O_BINARY) as c_int
 594     }
 595
 596     #[cfg(unix)]
 597     fn wb() -> c_int { O_WRONLY as c_int }
 598
 599     let mut fflags: c_int = wb();
 600 }");
 601     }
 602
 603 }