[rustc.git] / src / libsyntax / util / parser_testing.rs

// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use ast;
use parse::{ParseSess,filemap_to_tts};
use parse::new_parser_from_source_str;
use parse::parser::Parser;
use parse::token;
use ptr::P;
use str::char_at;

/// Map a string to tts, using a made-up filename:
pub fn string_to_tts(source_str: String) -> Vec<ast::TokenTree> {
    let ps = ParseSess::new();
    filemap_to_tts(&ps, ps.codemap().new_filemap("bogofile".to_string(), source_str))
}

/// Map string to parser (via tts)
pub fn string_to_parser<'a>(ps: &'a ParseSess, source_str: String) -> Parser<'a> {
    new_parser_from_source_str(ps,
                               Vec::new(),
                               "bogofile".to_string(),
                               source_str)
}

fn with_error_checking_parse<T, F>(s: String, f: F) -> T where
    F: FnOnce(&mut Parser) -> T,
{
    let ps = ParseSess::new();
    let mut p = string_to_parser(&ps, s);
    let x = f(&mut p);
    p.abort_if_errors();
    x
}

/// Parse a string, return a crate.
pub fn string_to_crate (source_str : String) -> ast::Crate {
    with_error_checking_parse(source_str, |p| {
        panictry!(p.parse_crate_mod())
    })
}

/// Parse a string, return an expr
pub fn string_to_expr (source_str : String) -> P<ast::Expr> {
    with_error_checking_parse(source_str, |p| {
        p.parse_expr()
    })
}

/// Parse a string, return an item
pub fn string_to_item (source_str : String) -> Option<P<ast::Item>> {
    with_error_checking_parse(source_str, |p| {
        p.parse_item()
    })
}

/// Parse a string, return a stmt
pub fn string_to_stmt(source_str : String) -> P<ast::Stmt> {
    with_error_checking_parse(source_str, |p| {
        p.parse_stmt().unwrap()
    })
}

/// Parse a string, return a pat. Uses "irrefutable"... which doesn't
/// (currently) affect parsing.
pub fn string_to_pat(source_str: String) -> P<ast::Pat> {
    // Binding `sess` and `parser` works around dropck-injected
    // region-inference issues; see #25212, #22323, #22321.
    let sess = ParseSess::new();
    let mut parser = string_to_parser(&sess, source_str);
    parser.parse_pat()
}

/// Convert a vector of strings to a vector of ast::Ident's
pub fn strs_to_idents(ids: Vec<&str> ) -> Vec<ast::Ident> {
    ids.iter().map(|u| token::str_to_ident(*u)).collect()
}

/// Does the given string match the pattern? whitespace in the first string
/// may be deleted or replaced with other whitespace to match the pattern.
/// this function is Unicode-ignorant; fortunately, the careful design of
/// UTF-8 mitigates this ignorance.  In particular, this function only collapses
/// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate Unicode
/// chars. Unsurprisingly, it doesn't do NKF-normalization(?).
pub fn matches_codepattern(a : &str, b : &str) -> bool {
    let mut idx_a = 0;
    let mut idx_b = 0;
    loop {
        if idx_a == a.len() && idx_b == b.len() {
            return true;
        }
        else if idx_a == a.len() {return false;}
        else if idx_b == b.len() {
            // maybe the stuff left in a is all ws?
            if is_whitespace(char_at(a, idx_a)) {
                return scan_for_non_ws_or_end(a,idx_a) == a.len();
            } else {
                return false;
            }
        }
        // ws in both given and pattern:
        else if is_whitespace(char_at(a, idx_a))
           && is_whitespace(char_at(b, idx_b)) {
            idx_a = scan_for_non_ws_or_end(a,idx_a);
            idx_b = scan_for_non_ws_or_end(b,idx_b);
        }
        // ws in given only:
        else if is_whitespace(char_at(a, idx_a)) {
            idx_a = scan_for_non_ws_or_end(a,idx_a);
        }
        // *don't* silently eat ws in expected only.
        else if char_at(a, idx_a) == char_at(b, idx_b) {
            idx_a += 1;
            idx_b += 1;
        }
        else {
            return false;
        }
    }
}

/// Given a string and an index, return the first usize >= idx
/// that is a non-ws-char or is outside of the legal range of
/// the string.
fn scan_for_non_ws_or_end(a : &str, idx: usize) -> usize {
    let mut i = idx;
    let len = a.len();
    while (i < len) && (is_whitespace(char_at(a, i))) {
        i += 1;
    }
    i
}

/// Copied from lexer.
pub fn is_whitespace(c: char) -> bool {
    return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test] fn eqmodws() {
        assert_eq!(matches_codepattern("",""),true);
        assert_eq!(matches_codepattern("","a"),false);
        assert_eq!(matches_codepattern("a",""),false);
        assert_eq!(matches_codepattern("a","a"),true);
        assert_eq!(matches_codepattern("a b","a   \n\t\r  b"),true);
        assert_eq!(matches_codepattern("a b ","a   \n\t\r  b"),true);
        assert_eq!(matches_codepattern("a b","a   \n\t\r  b "),false);
        assert_eq!(matches_codepattern("a   b","a b"),true);
        assert_eq!(matches_codepattern("ab","a b"),false);
        assert_eq!(matches_codepattern("a   b","ab"),true);
    }
}
Commit	Line	Data
970d7e83 LB	1	// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
	2	// file at the top-level directory of this distribution and at
	3	// http://rust-lang.org/COPYRIGHT.
	4	//
	5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	8	// option. This file may not be copied, modified, or distributed
	9	// except according to those terms.
	10
	11	use ast;
62682a34	12	use parse::{ParseSess,filemap_to_tts};
c34b1796	13	use parse::new_parser_from_source_str;
970d7e83 LB	14	use parse::parser::Parser;
970d7e83 LB	15	use parse::token;
1a4d82fc	16	use ptr::P;
d9579d0f	17	use str::char_at;
970d7e83	18
1a4d82fc JJ	19	/// Map a string to tts, using a made-up filename:
1a4d82fc JJ	20	pub fn string_to_tts(source_str: String) -> Vec<ast::TokenTree> {
62682a34 SL	21	let ps = ParseSess::new();
62682a34 SL	22	filemap_to_tts(&ps, ps.codemap().new_filemap("bogofile".to_string(), source_str))
970d7e83 LB	23	}
970d7e83 LB	24
1a4d82fc JJ	25	/// Map string to parser (via tts)
	26	pub fn string_to_parser<'a>(ps: &'a ParseSess, source_str: String) -> Parser<'a> {
	27	new_parser_from_source_str(ps,
	28	Vec::new(),
	29	"bogofile".to_string(),
	30	source_str)
970d7e83 LB	31	}
970d7e83 LB	32
1a4d82fc JJ	33	fn with_error_checking_parse<T, F>(s: String, f: F) -> T where
	34	F: FnOnce(&mut Parser) -> T,
	35	{
62682a34	36	let ps = ParseSess::new();
1a4d82fc JJ	37	let mut p = string_to_parser(&ps, s);
	38	let x = f(&mut p);
	39	p.abort_if_errors();
	40	x
970d7e83 LB	41	}
970d7e83 LB	42
1a4d82fc JJ	43	/// Parse a string, return a crate.
	44	pub fn string_to_crate (source_str : String) -> ast::Crate {
	45	with_error_checking_parse(source_str, \|p\| {
9346a6ac	46	panictry!(p.parse_crate_mod())
1a4d82fc	47	})
970d7e83 LB	48	}
970d7e83 LB	49
1a4d82fc JJ	50	/// Parse a string, return an expr
	51	pub fn string_to_expr (source_str : String) -> P<ast::Expr> {
	52	with_error_checking_parse(source_str, \|p\| {
	53	p.parse_expr()
	54	})
970d7e83 LB	55	}
970d7e83 LB	56
1a4d82fc JJ	57	/// Parse a string, return an item
	58	pub fn string_to_item (source_str : String) -> Option<P<ast::Item>> {
	59	with_error_checking_parse(source_str, \|p\| {
c34b1796	60	p.parse_item()
1a4d82fc	61	})
970d7e83 LB	62	}
970d7e83 LB	63
1a4d82fc JJ	64	/// Parse a string, return a stmt
	65	pub fn string_to_stmt(source_str : String) -> P<ast::Stmt> {
	66	with_error_checking_parse(source_str, \|p\| {
c34b1796	67	p.parse_stmt().unwrap()
1a4d82fc	68	})
970d7e83 LB	69	}
970d7e83 LB	70
1a4d82fc JJ	71	/// Parse a string, return a pat. Uses "irrefutable"... which doesn't
	72	/// (currently) affect parsing.
	73	pub fn string_to_pat(source_str: String) -> P<ast::Pat> {
bd371182 AL	74	// Binding `sess` and `parser` works around dropck-injected
bd371182 AL	75	// region-inference issues; see #25212, #22323, #22321.
62682a34	76	let sess = ParseSess::new();
bd371182 AL	77	let mut parser = string_to_parser(&sess, source_str);
bd371182 AL	78	parser.parse_pat()
970d7e83 LB	79	}
970d7e83 LB	80
1a4d82fc JJ	81	/// Convert a vector of strings to a vector of ast::Ident's
	82	pub fn strs_to_idents(ids: Vec<&str> ) -> Vec<ast::Ident> {
	83	ids.iter().map(\|u\| token::str_to_ident(*u)).collect()
970d7e83 LB	84	}
970d7e83 LB	85
1a4d82fc JJ	86	/// Does the given string match the pattern? whitespace in the first string
	87	/// may be deleted or replaced with other whitespace to match the pattern.
	88	/// this function is Unicode-ignorant; fortunately, the careful design of
	89	/// UTF-8 mitigates this ignorance. In particular, this function only collapses
	90	/// sequences of \n, \r, ' ', and \t, but it should otherwise tolerate Unicode
	91	/// chars. Unsurprisingly, it doesn't do NKF-normalization(?).
970d7e83 LB	92	pub fn matches_codepattern(a : &str, b : &str) -> bool {
	93	let mut idx_a = 0;
	94	let mut idx_b = 0;
	95	loop {
1a4d82fc	96	if idx_a == a.len() && idx_b == b.len() {
970d7e83 LB	97	return true;
970d7e83 LB	98	}
1a4d82fc JJ	99	else if idx_a == a.len() {return false;}
1a4d82fc JJ	100	else if idx_b == b.len() {
970d7e83	101	// maybe the stuff left in a is all ws?
d9579d0f	102	if is_whitespace(char_at(a, idx_a)) {
1a4d82fc	103	return scan_for_non_ws_or_end(a,idx_a) == a.len();
970d7e83 LB	104	} else {
	105	return false;
	106	}
	107	}
	108	// ws in both given and pattern:
d9579d0f AL	109	else if is_whitespace(char_at(a, idx_a))
d9579d0f AL	110	&& is_whitespace(char_at(b, idx_b)) {
970d7e83 LB	111	idx_a = scan_for_non_ws_or_end(a,idx_a);
	112	idx_b = scan_for_non_ws_or_end(b,idx_b);
	113	}
	114	// ws in given only:
d9579d0f	115	else if is_whitespace(char_at(a, idx_a)) {
970d7e83 LB	116	idx_a = scan_for_non_ws_or_end(a,idx_a);
	117	}
	118	// don't silently eat ws in expected only.
d9579d0f	119	else if char_at(a, idx_a) == char_at(b, idx_b) {
970d7e83 LB	120	idx_a += 1;
	121	idx_b += 1;
	122	}
	123	else {
	124	return false;
	125	}
	126	}
	127	}
	128
85aaf69f	129	/// Given a string and an index, return the first usize >= idx
1a4d82fc JJ	130	/// that is a non-ws-char or is outside of the legal range of
1a4d82fc JJ	131	/// the string.
85aaf69f	132	fn scan_for_non_ws_or_end(a : &str, idx: usize) -> usize {
970d7e83 LB	133	let mut i = idx;
970d7e83 LB	134	let len = a.len();
d9579d0f	135	while (i < len) && (is_whitespace(char_at(a, i))) {
970d7e83 LB	136	i += 1;
	137	}
	138	i
	139	}
	140
1a4d82fc	141	/// Copied from lexer.
970d7e83 LB	142	pub fn is_whitespace(c: char) -> bool {
	143	return c == ' ' \|\| c == '\t' \|\| c == '\r' \|\| c == '\n';
	144	}
	145
	146	#[cfg(test)]
d9579d0f	147	mod tests {
970d7e83 LB	148	use super::*;
	149
	150	#[test] fn eqmodws() {
	151	assert_eq!(matches_codepattern("",""),true);
	152	assert_eq!(matches_codepattern("","a"),false);
	153	assert_eq!(matches_codepattern("a",""),false);
	154	assert_eq!(matches_codepattern("a","a"),true);
	155	assert_eq!(matches_codepattern("a b","a \n\t\r b"),true);
	156	assert_eq!(matches_codepattern("a b ","a \n\t\r b"),true);
	157	assert_eq!(matches_codepattern("a b","a \n\t\r b "),false);
	158	assert_eq!(matches_codepattern("a b","a b"),true);
	159	assert_eq!(matches_codepattern("ab","a b"),false);
	160	assert_eq!(matches_codepattern("a b","ab"),true);
	161	}
	162	}