[rustc.git] / src / libsyntax / util / parser_testing.rs

// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use ast;
use parse::{ParseSess,PResult,filemap_to_tts};
use parse::{lexer, new_parser_from_source_str};
use parse::parser::Parser;
use parse::token;
use ptr::P;
use std::iter::Peekable;

/// Map a string to tts, using a made-up filename:
pub fn string_to_tts(source_str: String) -> Vec<ast::TokenTree> {
    let ps = ParseSess::new();
    filemap_to_tts(&ps, ps.codemap().new_filemap("bogofile".to_string(), source_str))
}

/// Map string to parser (via tts)
pub fn string_to_parser<'a>(ps: &'a ParseSess, source_str: String) -> Parser<'a> {
    new_parser_from_source_str(ps,
                               Vec::new(),
                               "bogofile".to_string(),
                               source_str)
}

fn with_error_checking_parse<'a, T, F>(s: String, ps: &'a ParseSess, f: F) -> T where
    F: FnOnce(&mut Parser<'a>) -> PResult<'a, T>,
{
    let mut p = string_to_parser(&ps, s);
    let x = panictry!(f(&mut p));
    p.abort_if_errors();
    x
}

/// Parse a string, return a crate.
pub fn string_to_crate (source_str : String) -> ast::Crate {
    let ps = ParseSess::new();
    with_error_checking_parse(source_str, &ps, |p| {
        p.parse_crate_mod()
    })
}

/// Parse a string, return an expr
pub fn string_to_expr (source_str : String) -> P<ast::Expr> {
    let ps = ParseSess::new();
    with_error_checking_parse(source_str, &ps, |p| {
        p.parse_expr()
    })
}

/// Parse a string, return an item
pub fn string_to_item (source_str : String) -> Option<P<ast::Item>> {
    let ps = ParseSess::new();
    with_error_checking_parse(source_str, &ps, |p| {
        p.parse_item()
    })
}

/// Parse a string, return a stmt
pub fn string_to_stmt(source_str : String) -> Option<ast::Stmt> {
    let ps = ParseSess::new();
    with_error_checking_parse(source_str, &ps, |p| {
        p.parse_stmt()
    })
}

/// Parse a string, return a pat. Uses "irrefutable"... which doesn't
/// (currently) affect parsing.
pub fn string_to_pat(source_str: String) -> P<ast::Pat> {
    let ps = ParseSess::new();
    with_error_checking_parse(source_str, &ps, |p| {
        p.parse_pat()
    })
}

/// Convert a vector of strings to a vector of ast::Ident's
pub fn strs_to_idents(ids: Vec<&str> ) -> Vec<ast::Ident> {
    ids.iter().map(|u| token::str_to_ident(*u)).collect()
}

/// Does the given string match the pattern? whitespace in the first string
/// may be deleted or replaced with other whitespace to match the pattern.
/// This function is relatively Unicode-ignorant; fortunately, the careful design
/// of UTF-8 mitigates this ignorance. It doesn't do NKF-normalization(?).
pub fn matches_codepattern(a : &str, b : &str) -> bool {
    let mut a_iter = a.chars().peekable();
    let mut b_iter = b.chars().peekable();

    loop {
        let (a, b) = match (a_iter.peek(), b_iter.peek()) {
            (None, None) => return true,
            (None, _) => return false,
            (Some(&a), None) => {
                if is_pattern_whitespace(a) {
                    break // trailing whitespace check is out of loop for borrowck
                } else {
                    return false
                }
            }
            (Some(&a), Some(&b)) => (a, b)
        };

        if is_pattern_whitespace(a) && is_pattern_whitespace(b) {
            // skip whitespace for a and b
            scan_for_non_ws_or_end(&mut a_iter);
            scan_for_non_ws_or_end(&mut b_iter);
        } else if is_pattern_whitespace(a) {
            // skip whitespace for a
            scan_for_non_ws_or_end(&mut a_iter);
        } else if a == b {
            a_iter.next();
            b_iter.next();
        } else {
            return false
        }
    }

    // check if a has *only* trailing whitespace
    a_iter.all(is_pattern_whitespace)
}

/// Advances the given peekable `Iterator` until it reaches a non-whitespace character
fn scan_for_non_ws_or_end<I: Iterator<Item= char>>(iter: &mut Peekable<I>) {
    while lexer::is_pattern_whitespace(iter.peek().cloned()) {
        iter.next();
    }
}

pub fn is_pattern_whitespace(c: char) -> bool {
    lexer::is_pattern_whitespace(Some(c))
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn eqmodws() {
        assert_eq!(matches_codepattern("",""),true);
        assert_eq!(matches_codepattern("","a"),false);
        assert_eq!(matches_codepattern("a",""),false);
        assert_eq!(matches_codepattern("a","a"),true);
        assert_eq!(matches_codepattern("a b","a   \n\t\r  b"),true);
        assert_eq!(matches_codepattern("a b ","a   \n\t\r  b"),true);
        assert_eq!(matches_codepattern("a b","a   \n\t\r  b "),false);
        assert_eq!(matches_codepattern("a   b","a b"),true);
        assert_eq!(matches_codepattern("ab","a b"),false);
        assert_eq!(matches_codepattern("a   b","ab"),true);
        assert_eq!(matches_codepattern(" a   b","ab"),true);
    }

    #[test]
    fn pattern_whitespace() {
        assert_eq!(matches_codepattern("","\x0C"), false);
        assert_eq!(matches_codepattern("a b ","a   \u{0085}\n\t\r  b"),true);
        assert_eq!(matches_codepattern("a b","a   \u{0085}\n\t\r  b "),false);
    }

    #[test]
    fn non_pattern_whitespace() {
        // These have the property 'White_Space' but not 'Pattern_White_Space'
        assert_eq!(matches_codepattern("a b","a\u{2002}b"), false);
        assert_eq!(matches_codepattern("a   b","a\u{2002}b"), false);
        assert_eq!(matches_codepattern("\u{205F}a   b","ab"), false);
        assert_eq!(matches_codepattern("a  \u{3000}b","ab"), false);
    }
}
Commit	Line	Data
970d7e83 LB	1	// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
	2	// file at the top-level directory of this distribution and at
	3	// http://rust-lang.org/COPYRIGHT.
	4	//
	5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	8	// option. This file may not be copied, modified, or distributed
	9	// except according to those terms.
	10
	11	use ast;
92a42be0	12	use parse::{ParseSess,PResult,filemap_to_tts};
54a0048b	13	use parse::{lexer, new_parser_from_source_str};
970d7e83 LB	14	use parse::parser::Parser;
970d7e83 LB	15	use parse::token;
1a4d82fc	16	use ptr::P;
54a0048b	17	use std::iter::Peekable;
970d7e83	18
1a4d82fc JJ	19	/// Map a string to tts, using a made-up filename:
1a4d82fc JJ	20	pub fn string_to_tts(source_str: String) -> Vec<ast::TokenTree> {
62682a34 SL	21	let ps = ParseSess::new();
62682a34 SL	22	filemap_to_tts(&ps, ps.codemap().new_filemap("bogofile".to_string(), source_str))
970d7e83 LB	23	}
970d7e83 LB	24
1a4d82fc JJ	25	/// Map string to parser (via tts)
	26	pub fn string_to_parser<'a>(ps: &'a ParseSess, source_str: String) -> Parser<'a> {
	27	new_parser_from_source_str(ps,
	28	Vec::new(),
	29	"bogofile".to_string(),
	30	source_str)
970d7e83 LB	31	}
970d7e83 LB	32
9cc50fc6 SL	33	fn with_error_checking_parse<'a, T, F>(s: String, ps: &'a ParseSess, f: F) -> T where
9cc50fc6 SL	34	F: FnOnce(&mut Parser<'a>) -> PResult<'a, T>,
1a4d82fc	35	{
1a4d82fc	36	let mut p = string_to_parser(&ps, s);
92a42be0	37	let x = panictry!(f(&mut p));
1a4d82fc JJ	38	p.abort_if_errors();
1a4d82fc JJ	39	x
970d7e83 LB	40	}
970d7e83 LB	41
1a4d82fc JJ	42	/// Parse a string, return a crate.
1a4d82fc JJ	43	pub fn string_to_crate (source_str : String) -> ast::Crate {
9cc50fc6 SL	44	let ps = ParseSess::new();
9cc50fc6 SL	45	with_error_checking_parse(source_str, &ps, \|p\| {
92a42be0	46	p.parse_crate_mod()
1a4d82fc	47	})
970d7e83 LB	48	}
970d7e83 LB	49
1a4d82fc JJ	50	/// Parse a string, return an expr
1a4d82fc JJ	51	pub fn string_to_expr (source_str : String) -> P<ast::Expr> {
9cc50fc6 SL	52	let ps = ParseSess::new();
9cc50fc6 SL	53	with_error_checking_parse(source_str, &ps, \|p\| {
1a4d82fc JJ	54	p.parse_expr()
1a4d82fc JJ	55	})
970d7e83 LB	56	}
970d7e83 LB	57
1a4d82fc JJ	58	/// Parse a string, return an item
1a4d82fc JJ	59	pub fn string_to_item (source_str : String) -> Option<P<ast::Item>> {
9cc50fc6 SL	60	let ps = ParseSess::new();
9cc50fc6 SL	61	with_error_checking_parse(source_str, &ps, \|p\| {
c34b1796	62	p.parse_item()
1a4d82fc	63	})
970d7e83 LB	64	}
970d7e83 LB	65
1a4d82fc	66	/// Parse a string, return a stmt
7453a54e	67	pub fn string_to_stmt(source_str : String) -> Option<ast::Stmt> {
9cc50fc6 SL	68	let ps = ParseSess::new();
9cc50fc6 SL	69	with_error_checking_parse(source_str, &ps, \|p\| {
92a42be0	70	p.parse_stmt()
1a4d82fc	71	})
970d7e83 LB	72	}
970d7e83 LB	73
1a4d82fc JJ	74	/// Parse a string, return a pat. Uses "irrefutable"... which doesn't
	75	/// (currently) affect parsing.
	76	pub fn string_to_pat(source_str: String) -> P<ast::Pat> {
9cc50fc6 SL	77	let ps = ParseSess::new();
9cc50fc6 SL	78	with_error_checking_parse(source_str, &ps, \|p\| {
92a42be0 SL	79	p.parse_pat()
92a42be0 SL	80	})
970d7e83 LB	81	}
970d7e83 LB	82
1a4d82fc JJ	83	/// Convert a vector of strings to a vector of ast::Ident's
	84	pub fn strs_to_idents(ids: Vec<&str> ) -> Vec<ast::Ident> {
	85	ids.iter().map(\|u\| token::str_to_ident(*u)).collect()
970d7e83 LB	86	}
970d7e83 LB	87
1a4d82fc JJ	88	/// Does the given string match the pattern? whitespace in the first string
1a4d82fc JJ	89	/// may be deleted or replaced with other whitespace to match the pattern.
54a0048b SL	90	/// This function is relatively Unicode-ignorant; fortunately, the careful design
54a0048b SL	91	/// of UTF-8 mitigates this ignorance. It doesn't do NKF-normalization(?).
970d7e83	92	pub fn matches_codepattern(a : &str, b : &str) -> bool {
54a0048b SL	93	let mut a_iter = a.chars().peekable();
	94	let mut b_iter = b.chars().peekable();
	95
970d7e83	96	loop {
54a0048b SL	97	let (a, b) = match (a_iter.peek(), b_iter.peek()) {
	98	(None, None) => return true,
	99	(None, _) => return false,
	100	(Some(&a), None) => {
	101	if is_pattern_whitespace(a) {
	102	break // trailing whitespace check is out of loop for borrowck
	103	} else {
	104	return false
	105	}
970d7e83	106	}
54a0048b SL	107	(Some(&a), Some(&b)) => (a, b)
	108	};
	109
	110	if is_pattern_whitespace(a) && is_pattern_whitespace(b) {
	111	// skip whitespace for a and b
	112	scan_for_non_ws_or_end(&mut a_iter);
	113	scan_for_non_ws_or_end(&mut b_iter);
	114	} else if is_pattern_whitespace(a) {
	115	// skip whitespace for a
	116	scan_for_non_ws_or_end(&mut a_iter);
	117	} else if a == b {
	118	a_iter.next();
	119	b_iter.next();
	120	} else {
	121	return false
970d7e83 LB	122	}
970d7e83 LB	123	}
54a0048b SL	124
	125	// check if a has only trailing whitespace
	126	a_iter.all(is_pattern_whitespace)
970d7e83 LB	127	}
970d7e83 LB	128
54a0048b SL	129	/// Advances the given peekable `Iterator` until it reaches a non-whitespace character
	130	fn scan_for_non_ws_or_end<I: Iterator<Item= char>>(iter: &mut Peekable<I>) {
	131	while lexer::is_pattern_whitespace(iter.peek().cloned()) {
	132	iter.next();
970d7e83	133	}
970d7e83 LB	134	}
970d7e83 LB	135
54a0048b SL	136	pub fn is_pattern_whitespace(c: char) -> bool {
54a0048b SL	137	lexer::is_pattern_whitespace(Some(c))
970d7e83 LB	138	}
	139
	140	#[cfg(test)]
d9579d0f	141	mod tests {
970d7e83 LB	142	use super::*;
970d7e83 LB	143
54a0048b SL	144	#[test]
54a0048b SL	145	fn eqmodws() {
970d7e83 LB	146	assert_eq!(matches_codepattern("",""),true);
	147	assert_eq!(matches_codepattern("","a"),false);
	148	assert_eq!(matches_codepattern("a",""),false);
	149	assert_eq!(matches_codepattern("a","a"),true);
	150	assert_eq!(matches_codepattern("a b","a \n\t\r b"),true);
	151	assert_eq!(matches_codepattern("a b ","a \n\t\r b"),true);
	152	assert_eq!(matches_codepattern("a b","a \n\t\r b "),false);
	153	assert_eq!(matches_codepattern("a b","a b"),true);
	154	assert_eq!(matches_codepattern("ab","a b"),false);
	155	assert_eq!(matches_codepattern("a b","ab"),true);
54a0048b SL	156	assert_eq!(matches_codepattern(" a b","ab"),true);
	157	}
	158
	159	#[test]
	160	fn pattern_whitespace() {
	161	assert_eq!(matches_codepattern("","\x0C"), false);
	162	assert_eq!(matches_codepattern("a b ","a \u{0085}\n\t\r b"),true);
	163	assert_eq!(matches_codepattern("a b","a \u{0085}\n\t\r b "),false);
	164	}
	165
	166	#[test]
	167	fn non_pattern_whitespace() {
	168	// These have the property 'White_Space' but not 'Pattern_White_Space'
	169	assert_eq!(matches_codepattern("a b","a\u{2002}b"), false);
	170	assert_eq!(matches_codepattern("a b","a\u{2002}b"), false);
	171	assert_eq!(matches_codepattern("\u{205F}a b","ab"), false);
	172	assert_eq!(matches_codepattern("a \u{3000}b","ab"), false);
970d7e83 LB	173	}
970d7e83 LB	174	}