]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /////////////////////////////////////////////////////////////////////////////// |
2 | /// \file regex_primitives.hpp | |
3 | /// Contains the syntax elements for writing static regular expressions. | |
4 | // | |
5 | // Copyright 2008 Eric Niebler. Distributed under the Boost | |
6 | // Software License, Version 1.0. (See accompanying file | |
7 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
8 | ||
9 | #ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005 | |
10 | #define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005 | |
11 | ||
12 | #include <vector> | |
13 | #include <climits> | |
14 | #include <boost/config.hpp> | |
15 | #include <boost/assert.hpp> | |
16 | #include <boost/mpl/if.hpp> | |
17 | #include <boost/mpl/and.hpp> | |
18 | #include <boost/mpl/assert.hpp> | |
19 | #include <boost/detail/workaround.hpp> | |
20 | #include <boost/preprocessor/cat.hpp> | |
21 | #include <boost/xpressive/detail/detail_fwd.hpp> | |
22 | #include <boost/xpressive/detail/core/matchers.hpp> | |
23 | #include <boost/xpressive/detail/core/regex_domain.hpp> | |
24 | #include <boost/xpressive/detail/utility/ignore_unused.hpp> | |
25 | ||
26 | // Doxygen can't handle proto :-( | |
27 | #ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED | |
28 | # include <boost/proto/core.hpp> | |
29 | # include <boost/proto/transform/arg.hpp> | |
30 | # include <boost/proto/transform/when.hpp> | |
31 | # include <boost/xpressive/detail/core/icase.hpp> | |
32 | # include <boost/xpressive/detail/static/compile.hpp> | |
33 | # include <boost/xpressive/detail/static/modifier.hpp> | |
34 | #endif | |
35 | ||
36 | namespace boost { namespace xpressive { namespace detail | |
37 | { | |
38 | ||
39 | typedef assert_word_placeholder<word_boundary<mpl::true_> > assert_word_boundary; | |
40 | typedef assert_word_placeholder<word_begin> assert_word_begin; | |
41 | typedef assert_word_placeholder<word_end> assert_word_end; | |
42 | ||
43 | // workaround msvc-7.1 bug with function pointer types | |
44 | // within function types: | |
45 | #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) | |
46 | #define mark_number(x) proto::call<mark_number(x)> | |
47 | #define minus_one() proto::make<minus_one()> | |
48 | #endif | |
49 | ||
50 | struct push_back : proto::callable | |
51 | { | |
52 | typedef int result_type; | |
53 | ||
54 | template<typename Subs> | |
55 | int operator ()(Subs &subs, int i) const | |
56 | { | |
57 | subs.push_back(i); | |
58 | return i; | |
59 | } | |
60 | }; | |
61 | ||
62 | struct mark_number : proto::callable | |
63 | { | |
64 | typedef int result_type; | |
65 | ||
66 | template<typename Expr> | |
67 | int operator ()(Expr const &expr) const | |
68 | { | |
69 | return expr.mark_number_; | |
70 | } | |
71 | }; | |
72 | ||
73 | typedef mpl::int_<-1> minus_one; | |
74 | ||
75 | // s1 or -s1 | |
76 | struct SubMatch | |
77 | : proto::or_< | |
78 | proto::when<basic_mark_tag, push_back(proto::_data, mark_number(proto::_value)) > | |
79 | , proto::when<proto::negate<basic_mark_tag>, push_back(proto::_data, minus_one()) > | |
80 | > | |
81 | {}; | |
82 | ||
83 | struct SubMatchList | |
84 | : proto::or_<SubMatch, proto::comma<SubMatchList, SubMatch> > | |
85 | {}; | |
86 | ||
87 | template<typename Subs> | |
88 | typename enable_if< | |
89 | mpl::and_<proto::is_expr<Subs>, proto::matches<Subs, SubMatchList> > | |
90 | , std::vector<int> | |
91 | >::type | |
92 | to_vector(Subs const &subs) | |
93 | { | |
94 | std::vector<int> subs_; | |
95 | SubMatchList()(subs, 0, subs_); | |
96 | return subs_; | |
97 | } | |
98 | ||
99 | #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) | |
100 | #undef mark_number | |
101 | #undef minus_one | |
102 | #endif | |
103 | ||
104 | // replace "Expr" with "keep(*State) >> Expr" | |
105 | struct skip_primitives : proto::transform<skip_primitives> | |
106 | { | |
107 | template<typename Expr, typename State, typename Data> | |
108 | struct impl : proto::transform_impl<Expr, State, Data> | |
109 | { | |
110 | typedef | |
111 | typename proto::shift_right< | |
112 | typename proto::unary_expr< | |
113 | keeper_tag | |
114 | , typename proto::dereference<State>::type | |
115 | >::type | |
116 | , Expr | |
117 | >::type | |
118 | result_type; | |
119 | ||
120 | result_type operator ()( | |
121 | typename impl::expr_param expr | |
122 | , typename impl::state_param state | |
123 | , typename impl::data_param | |
124 | ) const | |
125 | { | |
126 | result_type that = {{{state}}, expr}; | |
127 | return that; | |
128 | } | |
129 | }; | |
130 | }; | |
131 | ||
132 | struct Primitives | |
133 | : proto::or_< | |
134 | proto::terminal<proto::_> | |
135 | , proto::comma<proto::_, proto::_> | |
136 | , proto::subscript<proto::terminal<set_initializer>, proto::_> | |
137 | , proto::assign<proto::terminal<set_initializer>, proto::_> | |
138 | , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_> | |
139 | , proto::complement<Primitives> | |
140 | > | |
141 | {}; | |
142 | ||
143 | struct SkipGrammar | |
144 | : proto::or_< | |
145 | proto::when<Primitives, skip_primitives> | |
146 | , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar> // don't "skip" mark tags | |
147 | , proto::subscript<SkipGrammar, proto::_> // don't put skips in actions | |
148 | , proto::binary_expr<modifier_tag, proto::_, SkipGrammar> // don't skip modifiers | |
149 | , proto::unary_expr<lookbehind_tag, proto::_> // don't skip lookbehinds | |
150 | , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> > // everything else is fair game! | |
151 | > | |
152 | {}; | |
153 | ||
154 | template<typename Skip> | |
155 | struct skip_directive | |
156 | { | |
157 | typedef typename proto::result_of::as_expr<Skip>::type skip_type; | |
158 | ||
159 | skip_directive(Skip const &skip) | |
160 | : skip_(proto::as_expr(skip)) | |
161 | {} | |
162 | ||
163 | template<typename Sig> | |
164 | struct result {}; | |
165 | ||
166 | template<typename This, typename Expr> | |
167 | struct result<This(Expr)> | |
168 | { | |
169 | typedef | |
170 | SkipGrammar::impl< | |
171 | typename proto::result_of::as_expr<Expr>::type | |
172 | , skip_type const & | |
173 | , mpl::void_ & | |
174 | > | |
175 | skip_transform; | |
176 | ||
177 | typedef | |
178 | typename proto::shift_right< | |
179 | typename skip_transform::result_type | |
180 | , typename proto::dereference<skip_type>::type | |
181 | >::type | |
182 | type; | |
183 | }; | |
184 | ||
185 | template<typename Expr> | |
186 | typename result<skip_directive(Expr)>::type | |
187 | operator ()(Expr const &expr) const | |
188 | { | |
189 | mpl::void_ ignore; | |
190 | typedef result<skip_directive(Expr)> result_fun; | |
191 | typename result_fun::type that = { | |
192 | typename result_fun::skip_transform()(proto::as_expr(expr), this->skip_, ignore) | |
193 | , {skip_} | |
194 | }; | |
195 | return that; | |
196 | } | |
197 | ||
198 | private: | |
199 | skip_type skip_; | |
200 | }; | |
201 | ||
202 | /* | |
203 | /////////////////////////////////////////////////////////////////////////////// | |
204 | /// INTERNAL ONLY | |
205 | // BOOST_XPRESSIVE_GLOBAL | |
206 | // for defining globals that neither violate the One Definition Rule nor | |
207 | // lead to undefined behavior due to global object initialization order. | |
208 | //#define BOOST_XPRESSIVE_GLOBAL(type, name, init) \ | |
209 | // namespace detail \ | |
210 | // { \ | |
211 | // template<int Dummy> \ | |
212 | // struct BOOST_PP_CAT(global_pod_, name) \ | |
213 | // { \ | |
214 | // static type const value; \ | |
215 | // private: \ | |
216 | // union type_must_be_pod \ | |
217 | // { \ | |
218 | // type t; \ | |
219 | // char ch; \ | |
220 | // } u; \ | |
221 | // }; \ | |
222 | // template<int Dummy> \ | |
223 | // type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init; \ | |
224 | // } \ | |
225 | // type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value | |
226 | */ | |
227 | ||
228 | ||
229 | } // namespace detail | |
230 | ||
231 | /// INTERNAL ONLY (for backwards compatibility) | |
232 | unsigned int const repeat_max = UINT_MAX-1; | |
233 | ||
234 | /////////////////////////////////////////////////////////////////////////////// | |
235 | /// \brief For infinite repetition of a sub-expression. | |
236 | /// | |
237 | /// Magic value used with the repeat\<\>() function template | |
238 | /// to specify an unbounded repeat. Use as: repeat<17, inf>('a'). | |
239 | /// The equivalent in perl is /a{17,}/. | |
240 | unsigned int const inf = UINT_MAX-1; | |
241 | ||
242 | /// INTERNAL ONLY (for backwards compatibility) | |
243 | proto::terminal<detail::epsilon_matcher>::type const epsilon = {{}}; | |
244 | ||
245 | /////////////////////////////////////////////////////////////////////////////// | |
246 | /// \brief Successfully matches nothing. | |
247 | /// | |
248 | /// Successfully matches a zero-width sequence. nil always succeeds and | |
249 | /// never consumes any characters. | |
250 | proto::terminal<detail::epsilon_matcher>::type const nil = {{}}; | |
251 | ||
252 | /////////////////////////////////////////////////////////////////////////////// | |
253 | /// \brief Matches an alpha-numeric character. | |
254 | /// | |
255 | /// The regex traits are used to determine which characters are alpha-numeric. | |
256 | /// To match any character that is not alpha-numeric, use ~alnum. | |
257 | /// | |
258 | /// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent | |
259 | /// to /[[:^alnum:]]/ in perl. | |
260 | proto::terminal<detail::posix_charset_placeholder>::type const alnum = {{"alnum", false}}; | |
261 | ||
262 | /////////////////////////////////////////////////////////////////////////////// | |
263 | /// \brief Matches an alphabetic character. | |
264 | /// | |
265 | /// The regex traits are used to determine which characters are alphabetic. | |
266 | /// To match any character that is not alphabetic, use ~alpha. | |
267 | /// | |
268 | /// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent | |
269 | /// to /[[:^alpha:]]/ in perl. | |
270 | proto::terminal<detail::posix_charset_placeholder>::type const alpha = {{"alpha", false}}; | |
271 | ||
272 | /////////////////////////////////////////////////////////////////////////////// | |
273 | /// \brief Matches a blank (horizonal white-space) character. | |
274 | /// | |
275 | /// The regex traits are used to determine which characters are blank characters. | |
276 | /// To match any character that is not blank, use ~blank. | |
277 | /// | |
278 | /// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent | |
279 | /// to /[[:^blank:]]/ in perl. | |
280 | proto::terminal<detail::posix_charset_placeholder>::type const blank = {{"blank", false}}; | |
281 | ||
282 | /////////////////////////////////////////////////////////////////////////////// | |
283 | /// \brief Matches a control character. | |
284 | /// | |
285 | /// The regex traits are used to determine which characters are control characters. | |
286 | /// To match any character that is not a control character, use ~cntrl. | |
287 | /// | |
288 | /// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent | |
289 | /// to /[[:^cntrl:]]/ in perl. | |
290 | proto::terminal<detail::posix_charset_placeholder>::type const cntrl = {{"cntrl", false}}; | |
291 | ||
292 | /////////////////////////////////////////////////////////////////////////////// | |
293 | /// \brief Matches a digit character. | |
294 | /// | |
295 | /// The regex traits are used to determine which characters are digits. | |
296 | /// To match any character that is not a digit, use ~digit. | |
297 | /// | |
298 | /// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent | |
299 | /// to /[[:^digit:]]/ in perl. | |
300 | proto::terminal<detail::posix_charset_placeholder>::type const digit = {{"digit", false}}; | |
301 | ||
302 | /////////////////////////////////////////////////////////////////////////////// | |
303 | /// \brief Matches a graph character. | |
304 | /// | |
305 | /// The regex traits are used to determine which characters are graphable. | |
306 | /// To match any character that is not graphable, use ~graph. | |
307 | /// | |
308 | /// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent | |
309 | /// to /[[:^graph:]]/ in perl. | |
310 | proto::terminal<detail::posix_charset_placeholder>::type const graph = {{"graph", false}}; | |
311 | ||
312 | /////////////////////////////////////////////////////////////////////////////// | |
313 | /// \brief Matches a lower-case character. | |
314 | /// | |
315 | /// The regex traits are used to determine which characters are lower-case. | |
316 | /// To match any character that is not a lower-case character, use ~lower. | |
317 | /// | |
318 | /// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent | |
319 | /// to /[[:^lower:]]/ in perl. | |
320 | proto::terminal<detail::posix_charset_placeholder>::type const lower = {{"lower", false}}; | |
321 | ||
322 | /////////////////////////////////////////////////////////////////////////////// | |
323 | /// \brief Matches a printable character. | |
324 | /// | |
325 | /// The regex traits are used to determine which characters are printable. | |
326 | /// To match any character that is not printable, use ~print. | |
327 | /// | |
328 | /// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent | |
329 | /// to /[[:^print:]]/ in perl. | |
330 | proto::terminal<detail::posix_charset_placeholder>::type const print = {{"print", false}}; | |
331 | ||
332 | /////////////////////////////////////////////////////////////////////////////// | |
333 | /// \brief Matches a punctuation character. | |
334 | /// | |
335 | /// The regex traits are used to determine which characters are punctuation. | |
336 | /// To match any character that is not punctuation, use ~punct. | |
337 | /// | |
338 | /// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent | |
339 | /// to /[[:^punct:]]/ in perl. | |
340 | proto::terminal<detail::posix_charset_placeholder>::type const punct = {{"punct", false}}; | |
341 | ||
342 | /////////////////////////////////////////////////////////////////////////////// | |
343 | /// \brief Matches a space character. | |
344 | /// | |
345 | /// The regex traits are used to determine which characters are space characters. | |
346 | /// To match any character that is not white-space, use ~space. | |
347 | /// | |
348 | /// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent | |
349 | /// to /[[:^space:]]/ in perl. | |
350 | proto::terminal<detail::posix_charset_placeholder>::type const space = {{"space", false}}; | |
351 | ||
352 | /////////////////////////////////////////////////////////////////////////////// | |
353 | /// \brief Matches an upper-case character. | |
354 | /// | |
355 | /// The regex traits are used to determine which characters are upper-case. | |
356 | /// To match any character that is not upper-case, use ~upper. | |
357 | /// | |
358 | /// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent | |
359 | /// to /[[:^upper:]]/ in perl. | |
360 | proto::terminal<detail::posix_charset_placeholder>::type const upper = {{"upper", false}}; | |
361 | ||
362 | /////////////////////////////////////////////////////////////////////////////// | |
363 | /// \brief Matches a hexadecimal digit character. | |
364 | /// | |
365 | /// The regex traits are used to determine which characters are hex digits. | |
366 | /// To match any character that is not a hex digit, use ~xdigit. | |
367 | /// | |
368 | /// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent | |
369 | /// to /[[:^xdigit:]]/ in perl. | |
370 | proto::terminal<detail::posix_charset_placeholder>::type const xdigit = {{"xdigit", false}}; | |
371 | ||
372 | /////////////////////////////////////////////////////////////////////////////// | |
373 | /// \brief Beginning of sequence assertion. | |
374 | /// | |
375 | /// For the character sequence [begin, end), 'bos' matches the | |
376 | /// zero-width sub-sequence [begin, begin). | |
377 | proto::terminal<detail::assert_bos_matcher>::type const bos = {{}}; | |
378 | ||
379 | /////////////////////////////////////////////////////////////////////////////// | |
380 | /// \brief End of sequence assertion. | |
381 | /// | |
382 | /// For the character sequence [begin, end), | |
383 | /// 'eos' matches the zero-width sub-sequence [end, end). | |
384 | /// | |
385 | /// \attention Unlike the perl end of sequence assertion \$, 'eos' will | |
386 | /// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To | |
387 | /// get that behavior, use (!_n >> eos). | |
388 | proto::terminal<detail::assert_eos_matcher>::type const eos = {{}}; | |
389 | ||
390 | /////////////////////////////////////////////////////////////////////////////// | |
391 | /// \brief Beginning of line assertion. | |
392 | /// | |
393 | /// 'bol' matches the zero-width sub-sequence | |
394 | /// immediately following a logical newline sequence. The regex traits | |
395 | /// is used to determine what constitutes a logical newline sequence. | |
396 | proto::terminal<detail::assert_bol_placeholder>::type const bol = {{}}; | |
397 | ||
398 | /////////////////////////////////////////////////////////////////////////////// | |
399 | /// \brief End of line assertion. | |
400 | /// | |
401 | /// 'eol' matches the zero-width sub-sequence | |
402 | /// immediately preceeding a logical newline sequence. The regex traits | |
403 | /// is used to determine what constitutes a logical newline sequence. | |
404 | proto::terminal<detail::assert_eol_placeholder>::type const eol = {{}}; | |
405 | ||
406 | /////////////////////////////////////////////////////////////////////////////// | |
407 | /// \brief Beginning of word assertion. | |
408 | /// | |
409 | /// 'bow' matches the zero-width sub-sequence | |
410 | /// immediately following a non-word character and preceeding a word character. | |
411 | /// The regex traits are used to determine what constitutes a word character. | |
412 | proto::terminal<detail::assert_word_begin>::type const bow = {{}}; | |
413 | ||
414 | /////////////////////////////////////////////////////////////////////////////// | |
415 | /// \brief End of word assertion. | |
416 | /// | |
417 | /// 'eow' matches the zero-width sub-sequence | |
418 | /// immediately following a word character and preceeding a non-word character. | |
419 | /// The regex traits are used to determine what constitutes a word character. | |
420 | proto::terminal<detail::assert_word_end>::type const eow = {{}}; | |
421 | ||
422 | /////////////////////////////////////////////////////////////////////////////// | |
423 | /// \brief Word boundary assertion. | |
424 | /// | |
425 | /// '_b' matches the zero-width sub-sequence at the beginning or the end of a word. | |
426 | /// It is equivalent to (bow | eow). The regex traits are used to determine what | |
427 | /// constitutes a word character. To match a non-word boundary, use ~_b. | |
428 | /// | |
429 | /// \attention _b is like \\b in perl. ~_b is like \\B in perl. | |
430 | proto::terminal<detail::assert_word_boundary>::type const _b = {{}}; | |
431 | ||
432 | /////////////////////////////////////////////////////////////////////////////// | |
433 | /// \brief Matches a word character. | |
434 | /// | |
435 | /// '_w' matches a single word character. The regex traits are used to determine which | |
436 | /// characters are word characters. Use ~_w to match a character that is not a word | |
437 | /// character. | |
438 | /// | |
439 | /// \attention _w is like \\w in perl. ~_w is like \\W in perl. | |
440 | proto::terminal<detail::posix_charset_placeholder>::type const _w = {{"w", false}}; | |
441 | ||
442 | /////////////////////////////////////////////////////////////////////////////// | |
443 | /// \brief Matches a digit character. | |
444 | /// | |
445 | /// '_d' matches a single digit character. The regex traits are used to determine which | |
446 | /// characters are digits. Use ~_d to match a character that is not a digit | |
447 | /// character. | |
448 | /// | |
449 | /// \attention _d is like \\d in perl. ~_d is like \\D in perl. | |
450 | proto::terminal<detail::posix_charset_placeholder>::type const _d = {{"d", false}}; | |
451 | ||
452 | /////////////////////////////////////////////////////////////////////////////// | |
453 | /// \brief Matches a space character. | |
454 | /// | |
455 | /// '_s' matches a single space character. The regex traits are used to determine which | |
456 | /// characters are space characters. Use ~_s to match a character that is not a space | |
457 | /// character. | |
458 | /// | |
459 | /// \attention _s is like \\s in perl. ~_s is like \\S in perl. | |
460 | proto::terminal<detail::posix_charset_placeholder>::type const _s = {{"s", false}}; | |
461 | ||
462 | /////////////////////////////////////////////////////////////////////////////// | |
463 | /// \brief Matches a literal newline character, '\\n'. | |
464 | /// | |
465 | /// '_n' matches a single newline character, '\\n'. Use ~_n to match a character | |
466 | /// that is not a newline. | |
467 | /// | |
468 | /// \attention ~_n is like '.' in perl without the /s modifier. | |
469 | proto::terminal<char>::type const _n = {'\n'}; | |
470 | ||
471 | /////////////////////////////////////////////////////////////////////////////// | |
472 | /// \brief Matches a logical newline sequence. | |
473 | /// | |
474 | /// '_ln' matches a logical newline sequence. This can be any character in the | |
475 | /// line separator class, as determined by the regex traits, or the '\\r\\n' sequence. | |
476 | /// For the purpose of back-tracking, '\\r\\n' is treated as a unit. | |
477 | /// To match any one character that is not a logical newline, use ~_ln. | |
478 | detail::logical_newline_xpression const _ln = {{}}; | |
479 | ||
480 | /////////////////////////////////////////////////////////////////////////////// | |
481 | /// \brief Matches any one character. | |
482 | /// | |
483 | /// Match any character, similar to '.' in perl syntax with the /s modifier. | |
484 | /// '_' matches any one character, including the newline. | |
485 | /// | |
486 | /// \attention To match any character except the newline, use ~_n | |
487 | proto::terminal<detail::any_matcher>::type const _ = {{}}; | |
488 | ||
489 | /////////////////////////////////////////////////////////////////////////////// | |
490 | /// \brief Reference to the current regex object | |
491 | /// | |
492 | /// Useful when constructing recursive regular expression objects. The 'self' | |
493 | /// identifier is a short-hand for the current regex object. For instance, | |
494 | /// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that | |
495 | /// matches balanced parens such as "((()))". | |
496 | proto::terminal<detail::self_placeholder>::type const self = {{}}; | |
497 | ||
498 | /////////////////////////////////////////////////////////////////////////////// | |
499 | /// \brief Used to create character sets. | |
500 | /// | |
501 | /// There are two ways to create character sets with the 'set' identifier. The | |
502 | /// easiest is to create a comma-separated list of the characters in the set, | |
503 | /// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other | |
504 | /// way is to define the set as an argument to the set subscript operator. | |
505 | /// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b', | |
506 | /// 'c' or a digit character. | |
507 | /// | |
508 | /// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c') | |
509 | /// will match any character that is not an 'a', 'b', or 'c'. | |
510 | /// | |
511 | /// Sets can be composed of other, possibly complemented, sets. For instance, | |
512 | /// set[ ~digit | ~(set= 'a','b','c') ]. | |
513 | detail::set_initializer_type const set = {{}}; | |
514 | ||
515 | /////////////////////////////////////////////////////////////////////////////// | |
516 | /// \brief Sub-match placeholder type, used to create named captures in | |
517 | /// static regexes. | |
518 | /// | |
519 | /// \c mark_tag is the type of the global sub-match placeholders \c s0, \c s1, etc.. You | |
520 | /// can use the \c mark_tag type to create your own sub-match placeholders with | |
521 | /// more meaningful names. This is roughly equivalent to the "named capture" | |
522 | /// feature of dynamic regular expressions. | |
523 | /// | |
524 | /// To create a named sub-match placeholder, initialize it with a unique integer. | |
525 | /// The integer must only be unique within the regex in which the placeholder | |
526 | /// is used. Then you can use it within static regexes to created sub-matches | |
527 | /// by assigning a sub-expression to it, or to refer back to already created | |
528 | /// sub-matches. | |
529 | /// | |
530 | /// \code | |
531 | /// mark_tag number(1); // "number" is now equivalent to "s1" | |
532 | /// // Match a number, followed by a space and the same number again | |
533 | /// sregex rx = (number = +_d) >> ' ' >> number; | |
534 | /// \endcode | |
535 | /// | |
536 | /// After a successful \c regex_match() or \c regex_search(), the sub-match placeholder | |
537 | /// can be used to index into the <tt>match_results\<\></tt> object to retrieve the | |
538 | /// corresponding sub-match. | |
539 | struct mark_tag | |
540 | : proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> | |
541 | { | |
542 | private: | |
543 | typedef proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> base_type; | |
544 | ||
545 | static detail::basic_mark_tag make_tag(int mark_nbr) | |
546 | { | |
547 | detail::basic_mark_tag mark = {{mark_nbr}}; | |
548 | return mark; | |
549 | } | |
550 | ||
551 | public: | |
552 | /// \brief Initialize a mark_tag placeholder | |
553 | /// \param mark_nbr An integer that uniquely identifies this \c mark_tag | |
554 | /// within the static regexes in which this \c mark_tag will be used. | |
555 | /// \pre <tt>mark_nbr \> 0</tt> | |
556 | mark_tag(int mark_nbr) | |
557 | : base_type(mark_tag::make_tag(mark_nbr)) | |
558 | { | |
559 | // Marks numbers must be integers greater than 0. | |
560 | BOOST_ASSERT(mark_nbr > 0); | |
561 | } | |
562 | ||
563 | /// INTERNAL ONLY | |
564 | operator detail::basic_mark_tag const &() const | |
565 | { | |
566 | return this->proto_base(); | |
567 | } | |
568 | ||
569 | BOOST_PROTO_EXTENDS_USING_ASSIGN_NON_DEPENDENT(mark_tag) | |
570 | }; | |
571 | ||
572 | // This macro is used when declaring mark_tags that are global because | |
573 | // it guarantees that they are statically initialized. That avoids | |
574 | // order-of-initialization bugs. In user code, the simpler: mark_tag s0(0); | |
575 | // would be preferable. | |
576 | /// INTERNAL ONLY | |
577 | #define BOOST_XPRESSIVE_GLOBAL_MARK_TAG(NAME, VALUE) \ | |
578 | boost::xpressive::mark_tag::proto_base_expr const NAME = {{VALUE}} \ | |
579 | /**/ | |
580 | ||
581 | /////////////////////////////////////////////////////////////////////////////// | |
582 | /// \brief Sub-match placeholder, like $& in Perl | |
583 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s0, 0); | |
584 | ||
585 | /////////////////////////////////////////////////////////////////////////////// | |
586 | /// \brief Sub-match placeholder, like $1 in perl. | |
587 | /// | |
588 | /// To create a sub-match, assign a sub-expression to the sub-match placeholder. | |
589 | /// For instance, (s1= _) will match any one character and remember which | |
590 | /// character was matched in the 1st sub-match. Later in the pattern, you can | |
591 | /// refer back to the sub-match. For instance, (s1= _) >> s1 will match any | |
592 | /// character, and then match the same character again. | |
593 | /// | |
594 | /// After a successful regex_match() or regex_search(), the sub-match placeholders | |
595 | /// can be used to index into the match_results\<\> object to retrieve the Nth | |
596 | /// sub-match. | |
597 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s1, 1); | |
598 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s2, 2); | |
599 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s3, 3); | |
600 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s4, 4); | |
601 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s5, 5); | |
602 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s6, 6); | |
603 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s7, 7); | |
604 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s8, 8); | |
605 | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s9, 9); | |
606 | ||
607 | // NOTE: For the purpose of xpressive's documentation, make icase() look like an | |
608 | // ordinary function. In reality, it is a function object defined in detail/icase.hpp | |
609 | // so that it can serve double-duty as regex_constants::icase, the syntax_option_type. | |
610 | #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED | |
611 | /////////////////////////////////////////////////////////////////////////////// | |
612 | /// \brief Makes a sub-expression case-insensitive. | |
613 | /// | |
614 | /// Use icase() to make a sub-expression case-insensitive. For instance, | |
615 | /// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by | |
616 | /// "bar" irrespective of case. | |
617 | template<typename Expr> detail::unspecified icase(Expr const &expr) { return 0; } | |
618 | #endif | |
619 | ||
620 | /////////////////////////////////////////////////////////////////////////////// | |
621 | /// \brief Makes a literal into a regular expression. | |
622 | /// | |
623 | /// Use as_xpr() to turn a literal into a regular expression. For instance, | |
624 | /// "foo" >> "bar" will not compile because both operands to the right-shift | |
625 | /// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar" | |
626 | /// instead. | |
627 | /// | |
628 | /// You can use as_xpr() with character literals in addition to string literals. | |
629 | /// For instance, as_xpr('a') will match an 'a'. You can also complement a | |
630 | /// character literal, as with ~as_xpr('a'). This will match any one character | |
631 | /// that is not an 'a'. | |
632 | #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED | |
633 | template<typename Literal> detail::unspecified as_xpr(Literal const &literal) { return 0; } | |
634 | #else | |
635 | proto::functional::as_expr<> const as_xpr = {}; | |
636 | #endif | |
637 | ||
638 | /////////////////////////////////////////////////////////////////////////////// | |
639 | /// \brief Embed a regex object by reference. | |
640 | /// | |
641 | /// \param rex The basic_regex object to embed by reference. | |
642 | template<typename BidiIter> | |
643 | inline typename proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type const | |
644 | by_ref(basic_regex<BidiIter> const &rex) | |
645 | { | |
646 | reference_wrapper<basic_regex<BidiIter> const> ref(rex); | |
647 | return proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type::make(ref); | |
648 | } | |
649 | ||
650 | /////////////////////////////////////////////////////////////////////////////// | |
651 | /// \brief Match a range of characters. | |
652 | /// | |
653 | /// Match any character in the range [ch_min, ch_max]. | |
654 | /// | |
655 | /// \param ch_min The lower end of the range to match. | |
656 | /// \param ch_max The upper end of the range to match. | |
657 | template<typename Char> | |
658 | inline typename proto::terminal<detail::range_placeholder<Char> >::type const | |
659 | range(Char ch_min, Char ch_max) | |
660 | { | |
661 | detail::range_placeholder<Char> that = {ch_min, ch_max, false}; | |
662 | return proto::terminal<detail::range_placeholder<Char> >::type::make(that); | |
663 | } | |
664 | ||
665 | /////////////////////////////////////////////////////////////////////////////// | |
666 | /// \brief Make a sub-expression optional. Equivalent to !as_xpr(expr). | |
667 | /// | |
668 | /// \param expr The sub-expression to make optional. | |
669 | template<typename Expr> | |
670 | typename proto::result_of::make_expr< | |
671 | proto::tag::logical_not | |
672 | , proto::default_domain | |
673 | , Expr const & | |
674 | >::type const | |
675 | optional(Expr const &expr) | |
676 | { | |
677 | return proto::make_expr< | |
678 | proto::tag::logical_not | |
679 | , proto::default_domain | |
680 | >(boost::ref(expr)); | |
681 | } | |
682 | ||
683 | /////////////////////////////////////////////////////////////////////////////// | |
684 | /// \brief Repeat a sub-expression multiple times. | |
685 | /// | |
686 | /// There are two forms of the repeat\<\>() function template. To match a | |
687 | /// sub-expression N times, use repeat\<N\>(expr). To match a sub-expression | |
688 | /// from M to N times, use repeat\<M,N\>(expr). | |
689 | /// | |
690 | /// The repeat\<\>() function creates a greedy quantifier. To make the quantifier | |
691 | /// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(expr). | |
692 | /// | |
693 | /// \param expr The sub-expression to repeat. | |
694 | template<unsigned int Min, unsigned int Max, typename Expr> | |
695 | typename proto::result_of::make_expr< | |
696 | detail::generic_quant_tag<Min, Max> | |
697 | , proto::default_domain | |
698 | , Expr const & | |
699 | >::type const | |
700 | repeat(Expr const &expr) | |
701 | { | |
702 | return proto::make_expr< | |
703 | detail::generic_quant_tag<Min, Max> | |
704 | , proto::default_domain | |
705 | >(boost::ref(expr)); | |
706 | } | |
707 | ||
708 | /// \overload | |
709 | /// | |
710 | template<unsigned int Count, typename Expr2> | |
711 | typename proto::result_of::make_expr< | |
712 | detail::generic_quant_tag<Count, Count> | |
713 | , proto::default_domain | |
714 | , Expr2 const & | |
715 | >::type const | |
716 | repeat(Expr2 const &expr2) | |
717 | { | |
718 | return proto::make_expr< | |
719 | detail::generic_quant_tag<Count, Count> | |
720 | , proto::default_domain | |
721 | >(boost::ref(expr2)); | |
722 | } | |
723 | ||
724 | /////////////////////////////////////////////////////////////////////////////// | |
725 | /// \brief Create an independent sub-expression. | |
726 | /// | |
727 | /// Turn off back-tracking for a sub-expression. Any branches or repeats within | |
728 | /// the sub-expression will match only one way, and no other alternatives are | |
729 | /// tried. | |
730 | /// | |
731 | /// \attention keep(expr) is equivalent to the perl (?>...) extension. | |
732 | /// | |
733 | /// \param expr The sub-expression to modify. | |
734 | template<typename Expr> | |
735 | typename proto::result_of::make_expr< | |
736 | detail::keeper_tag | |
737 | , proto::default_domain | |
738 | , Expr const & | |
739 | >::type const | |
740 | keep(Expr const &expr) | |
741 | { | |
742 | return proto::make_expr< | |
743 | detail::keeper_tag | |
744 | , proto::default_domain | |
745 | >(boost::ref(expr)); | |
746 | } | |
747 | ||
748 | /////////////////////////////////////////////////////////////////////////////// | |
749 | /// \brief Look-ahead assertion. | |
750 | /// | |
751 | /// before(expr) succeeds if the expr sub-expression would match at the current | |
752 | /// position in the sequence, but expr is not included in the match. For instance, | |
753 | /// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be | |
754 | /// negated with the bit-compliment operator. | |
755 | /// | |
756 | /// \attention before(expr) is equivalent to the perl (?=...) extension. | |
757 | /// ~before(expr) is a negative look-ahead assertion, equivalent to the | |
758 | /// perl (?!...) extension. | |
759 | /// | |
760 | /// \param expr The sub-expression to put in the look-ahead assertion. | |
761 | template<typename Expr> | |
762 | typename proto::result_of::make_expr< | |
763 | detail::lookahead_tag | |
764 | , proto::default_domain | |
765 | , Expr const & | |
766 | >::type const | |
767 | before(Expr const &expr) | |
768 | { | |
769 | return proto::make_expr< | |
770 | detail::lookahead_tag | |
771 | , proto::default_domain | |
772 | >(boost::ref(expr)); | |
773 | } | |
774 | ||
775 | /////////////////////////////////////////////////////////////////////////////// | |
776 | /// \brief Look-behind assertion. | |
777 | /// | |
778 | /// after(expr) succeeds if the expr sub-expression would match at the current | |
779 | /// position minus N in the sequence, where N is the width of expr. expr is not included in | |
780 | /// the match. For instance, after("foo") succeeds if we are after a "foo". Look-behind | |
781 | /// assertions can be negated with the bit-complement operator. | |
782 | /// | |
783 | /// \attention after(expr) is equivalent to the perl (?<=...) extension. | |
784 | /// ~after(expr) is a negative look-behind assertion, equivalent to the | |
785 | /// perl (?<!...) extension. | |
786 | /// | |
787 | /// \param expr The sub-expression to put in the look-ahead assertion. | |
788 | /// | |
789 | /// \pre expr cannot match a variable number of characters. | |
790 | template<typename Expr> | |
791 | typename proto::result_of::make_expr< | |
792 | detail::lookbehind_tag | |
793 | , proto::default_domain | |
794 | , Expr const & | |
795 | >::type const | |
796 | after(Expr const &expr) | |
797 | { | |
798 | return proto::make_expr< | |
799 | detail::lookbehind_tag | |
800 | , proto::default_domain | |
801 | >(boost::ref(expr)); | |
802 | } | |
803 | ||
804 | /////////////////////////////////////////////////////////////////////////////// | |
805 | /// \brief Specify a regex traits or a std::locale. | |
806 | /// | |
807 | /// imbue() instructs the regex engine to use the specified traits or locale | |
808 | /// when matching the regex. The entire expression must use the same traits/locale. | |
809 | /// For instance, the following specifies a locale for use with a regex: | |
810 | /// std::locale loc; | |
811 | /// sregex rx = imbue(loc)(+digit); | |
812 | /// | |
813 | /// \param loc The std::locale or regex traits object. | |
814 | template<typename Locale> | |
815 | inline detail::modifier_op<detail::locale_modifier<Locale> > const | |
816 | imbue(Locale const &loc) | |
817 | { | |
818 | detail::modifier_op<detail::locale_modifier<Locale> > mod = | |
819 | { | |
820 | detail::locale_modifier<Locale>(loc) | |
821 | , regex_constants::ECMAScript | |
822 | }; | |
823 | return mod; | |
824 | } | |
825 | ||
826 | proto::terminal<detail::attribute_placeholder<mpl::int_<1> > >::type const a1 = {{}}; | |
827 | proto::terminal<detail::attribute_placeholder<mpl::int_<2> > >::type const a2 = {{}}; | |
828 | proto::terminal<detail::attribute_placeholder<mpl::int_<3> > >::type const a3 = {{}}; | |
829 | proto::terminal<detail::attribute_placeholder<mpl::int_<4> > >::type const a4 = {{}}; | |
830 | proto::terminal<detail::attribute_placeholder<mpl::int_<5> > >::type const a5 = {{}}; | |
831 | proto::terminal<detail::attribute_placeholder<mpl::int_<6> > >::type const a6 = {{}}; | |
832 | proto::terminal<detail::attribute_placeholder<mpl::int_<7> > >::type const a7 = {{}}; | |
833 | proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {{}}; | |
834 | proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {{}}; | |
835 | ||
836 | /////////////////////////////////////////////////////////////////////////////// | |
837 | /// \brief Specify which characters to skip when matching a regex. | |
838 | /// | |
839 | /// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching | |
840 | /// a regex. It is most useful for writing regexes that ignore whitespace. | |
841 | /// For instance, the following specifies a regex that skips whitespace and | |
842 | /// punctuation: | |
843 | /// | |
844 | /// \code | |
845 | /// // A sentence is one or more words separated by whitespace | |
846 | /// // and punctuation. | |
847 | /// sregex word = +alpha; | |
848 | /// sregex sentence = skip(set[_s | punct])( +word ); | |
849 | /// \endcode | |
850 | /// | |
851 | /// The way it works in the above example is to insert | |
852 | /// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex. | |
853 | /// A "primitive" includes terminals like strings, character sets and nested | |
854 | /// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the | |
855 | /// regex. The regex <tt>sentence</tt> specified above is equivalent to | |
856 | /// the following: | |
857 | /// | |
858 | /// \code | |
859 | /// sregex sentence = +( keep(*set[_s | punct]) >> word ) | |
860 | /// >> *set[_s | punct]; | |
861 | /// \endcode | |
862 | /// | |
863 | /// \attention Skipping does not affect how nested regexes are handled because | |
864 | /// they are treated atomically. String literals are also treated | |
865 | /// atomically; that is, no skipping is done within a string literal. So | |
866 | /// <tt>skip(_s)("this that")</tt> is not the same as | |
867 | /// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match | |
868 | /// when there is only one space between "this" and "that". The second will | |
869 | /// skip any and all whitespace between "this" and "that". | |
870 | /// | |
871 | /// \param skip A regex that specifies which characters to skip. | |
872 | template<typename Skip> | |
873 | detail::skip_directive<Skip> skip(Skip const &skip) | |
874 | { | |
875 | return detail::skip_directive<Skip>(skip); | |
876 | } | |
877 | ||
878 | namespace detail | |
879 | { | |
880 | inline void ignore_unused_regex_primitives() | |
881 | { | |
882 | detail::ignore_unused(repeat_max); | |
883 | detail::ignore_unused(inf); | |
884 | detail::ignore_unused(epsilon); | |
885 | detail::ignore_unused(nil); | |
886 | detail::ignore_unused(alnum); | |
887 | detail::ignore_unused(bos); | |
888 | detail::ignore_unused(eos); | |
889 | detail::ignore_unused(bol); | |
890 | detail::ignore_unused(eol); | |
891 | detail::ignore_unused(bow); | |
892 | detail::ignore_unused(eow); | |
893 | detail::ignore_unused(_b); | |
894 | detail::ignore_unused(_w); | |
895 | detail::ignore_unused(_d); | |
896 | detail::ignore_unused(_s); | |
897 | detail::ignore_unused(_n); | |
898 | detail::ignore_unused(_ln); | |
899 | detail::ignore_unused(_); | |
900 | detail::ignore_unused(self); | |
901 | detail::ignore_unused(set); | |
902 | detail::ignore_unused(s0); | |
903 | detail::ignore_unused(s1); | |
904 | detail::ignore_unused(s2); | |
905 | detail::ignore_unused(s3); | |
906 | detail::ignore_unused(s4); | |
907 | detail::ignore_unused(s5); | |
908 | detail::ignore_unused(s6); | |
909 | detail::ignore_unused(s7); | |
910 | detail::ignore_unused(s8); | |
911 | detail::ignore_unused(s9); | |
912 | detail::ignore_unused(a1); | |
913 | detail::ignore_unused(a2); | |
914 | detail::ignore_unused(a3); | |
915 | detail::ignore_unused(a4); | |
916 | detail::ignore_unused(a5); | |
917 | detail::ignore_unused(a6); | |
918 | detail::ignore_unused(a7); | |
919 | detail::ignore_unused(a8); | |
920 | detail::ignore_unused(a9); | |
921 | detail::ignore_unused(as_xpr); | |
922 | } | |
923 | } | |
924 | ||
925 | }} // namespace boost::xpressive | |
926 | ||
927 | #endif |