]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | [/ |
2 | Copyright 2006-2007 John Maddock. | |
3 | Distributed under the Boost Software License, Version 1.0. | |
4 | (See accompanying file LICENSE_1_0.txt or copy at | |
5 | http://www.boost.org/LICENSE_1_0.txt). | |
6 | ] | |
7 | ||
8 | ||
9 | [section:syntax_option_type syntax_option_type] | |
10 | ||
11 | [section:syntax_option_type_synopsis syntax_option_type Synopsis] | |
12 | ||
13 | Type [syntax_option_type] is an implementation specific bitmask type | |
14 | that controls how a regular expression string is to be interpreted. | |
15 | For convenience note that all the constants listed here, are also | |
16 | duplicated within the scope of class template [basic_regex]. | |
17 | ||
18 | namespace std{ namespace regex_constants{ | |
19 | ||
20 | typedef implementation-specific-bitmask-type syntax_option_type; | |
21 | ||
22 | // these flags are standardized: | |
23 | static const syntax_option_type normal; | |
24 | static const syntax_option_type ECMAScript = normal; | |
25 | static const syntax_option_type JavaScript = normal; | |
26 | static const syntax_option_type JScript = normal; | |
27 | static const syntax_option_type perl = normal; | |
28 | static const syntax_option_type basic; | |
29 | static const syntax_option_type sed = basic; | |
30 | static const syntax_option_type extended; | |
31 | static const syntax_option_type awk; | |
32 | static const syntax_option_type grep; | |
33 | static const syntax_option_type egrep; | |
34 | static const syntax_option_type icase; | |
35 | static const syntax_option_type nosubs; | |
36 | static const syntax_option_type optimize; | |
37 | static const syntax_option_type collate; | |
38 | ||
39 | // | |
40 | // The remaining options are specific to Boost.Regex: | |
41 | // | |
42 | ||
43 | // Options common to both Perl and POSIX regular expressions: | |
44 | static const syntax_option_type newline_alt; | |
45 | static const syntax_option_type no_except; | |
46 | static const syntax_option_type save_subexpression_location; | |
47 | ||
48 | // Perl specific options: | |
49 | static const syntax_option_type no_mod_m; | |
50 | static const syntax_option_type no_mod_s; | |
51 | static const syntax_option_type mod_s; | |
52 | static const syntax_option_type mod_x; | |
53 | static const syntax_option_type no_empty_expressions; | |
54 | ||
55 | // POSIX extended specific options: | |
56 | static const syntax_option_type no_escape_in_lists; | |
57 | static const syntax_option_type no_bk_refs; | |
58 | ||
59 | // POSIX basic specific options: | |
60 | static const syntax_option_type no_escape_in_lists; | |
61 | static const syntax_option_type no_char_classes; | |
62 | static const syntax_option_type no_intervals; | |
63 | static const syntax_option_type bk_plus_qm; | |
64 | static const syntax_option_type bk_vbar; | |
65 | ||
66 | } // namespace regex_constants | |
67 | } // namespace std | |
68 | ||
69 | [endsect] | |
70 | ||
71 | [section:syntax_option_type_overview Overview of syntax_option_type] | |
72 | ||
73 | The type [syntax_option_type] is an implementation specific bitmask type | |
74 | (see C++ standard 17.3.2.1.2). Setting its elements has the effects listed | |
75 | in the table below, a valid value of type [syntax_option_type] will always | |
76 | have exactly one of the elements `normal`, `basic`, `extended`, | |
77 | `awk`, `grep`, `egrep`, `sed`, `literal` or `perl` set. | |
78 | ||
79 | Note that for convenience all the constants listed here are duplicated within | |
80 | the scope of class template [basic_regex], so you can use any of: | |
81 | ||
82 | boost::regex_constants::constant_name | |
83 | ||
84 | or | |
85 | ||
86 | boost::regex::constant_name | |
87 | ||
88 | or | |
89 | ||
90 | boost::wregex::constant_name | |
91 | ||
92 | in an interchangeable manner. | |
93 | ||
94 | [endsect] | |
95 | ||
96 | [section:syntax_option_type_perl Options for Perl Regular Expressions] | |
97 | ||
98 | One of the following must always be set for perl regular expressions: | |
99 | ||
100 | [table | |
101 | [[Element][Standardized][Effect when set]] | |
102 | [[ECMAScript][Yes][Specifies that the grammar recognized by the regular | |
103 | expression engine uses its normal semantics: that is the same as | |
104 | that given in the ECMA-262, ECMAScript Language Specification, | |
105 | Chapter 15 part 10, RegExp (Regular Expression) Objects (FWD.1). | |
106 | ||
107 | This is functionally identical to the | |
108 | [link boost_regex.syntax.perl_syntax Perl regular expression syntax]. | |
109 | ||
110 | Boost.Regex also recognizes all of the perl-compatible `(?...)` | |
111 | extensions in this mode.]] | |
112 | [[perl][No][As above.]] | |
113 | [[normal][No][As above.]] | |
114 | [[JavaScript][No][As above.]] | |
115 | [[JScript][No][As above.]] | |
116 | ] | |
117 | ||
118 | The following options may also be set when using perl-style regular expressions: | |
119 | ||
120 | [table | |
121 | [[Element][Standardized][Effect when set]] | |
122 | [[icase][Yes][Specifies that matching of regular expressions against a | |
123 | character container sequence shall be performed without regard to case.]] | |
124 | [[nosubs][Yes][Specifies that when a regular expression is matched against | |
125 | a character container sequence, then no sub-expression matches are | |
126 | to be stored in the supplied [match_results] structure.]] | |
127 | [[optimize][Yes][Specifies that the regular expression engine should pay | |
128 | more attention to the speed with which regular expressions are matched, | |
129 | and less to the speed with which regular expression objects are | |
130 | constructed. Otherwise it has no detectable effect on the program output. | |
131 | This currently has no effect for Boost.Regex.]] | |
132 | [[collate][Yes][Specifies that character ranges of the form `[a-b]` should be | |
133 | locale sensitive.]] | |
134 | [[newline_alt][No][Specifies that the \\n character has the same effect as | |
135 | the alternation operator |. Allows newline separated lists to be | |
136 | used as a list of alternatives.]] | |
137 | [[no_except][No][Prevents [basic_regex] from throwing an exception when an | |
138 | invalid expression is encountered.]] | |
139 | [[no_mod_m][No][Normally Boost.Regex behaves as if the Perl m-modifier is on: | |
140 | so the assertions ^ and $ match after and before embedded | |
141 | newlines respectively, setting this flags is equivalent to prefixing | |
142 | the expression with (?-m).]] | |
143 | [[no_mod_s][No][Normally whether Boost.Regex will match "." against a | |
144 | newline character is determined by the match flag `match_dot_not_newline`. | |
145 | Specifying this flag is equivalent to prefixing the expression with `(?-s)` | |
146 | and therefore causes "." not to match a newline character regardless of | |
147 | whether `match_not_dot_newline` is set in the match flags.]] | |
148 | [[mod_s][No][Normally whether Boost.Regex will match "." against a newline | |
149 | character is determined by the match flag `match_dot_not_newline`. | |
150 | Specifying this flag is equivalent to prefixing the expression with `(?s)` | |
151 | and therefore causes "." to match a newline character regardless of | |
152 | whether `match_not_dot_newline` is set in the match flags.]] | |
153 | [[mod_x][No][Turns on the perl x-modifier: causes unescaped whitespace | |
154 | in the expression to be ignored.]] | |
155 | [[no_empty_expressions][No][When set then empty expressions/alternatives are prohibited.]] | |
156 | [[save_subexpression_location][No][When set then the locations of individual | |
157 | sub-expressions within the ['original regular expression string] can be accessed | |
158 | via the [link boost_regex.basic_regex.subexpression `subexpression()`] member function of `basic_regex`.]] | |
159 | ] | |
160 | ||
161 | [endsect] | |
162 | ||
163 | [section:syntax_option_type_extended Options for POSIX Extended Regular Expressions] | |
164 | ||
165 | Exactly one of the following must always be set for | |
166 | [link boost_regex.syntax.basic_extended POSIX extended | |
167 | regular expressions]: | |
168 | ||
169 | [table | |
170 | [[Element][Standardized][Effect when set]] | |
171 | [[extended][Yes][Specifies that the grammar recognized by the regular | |
172 | expression engine is the same as that used by POSIX extended regular | |
173 | expressions in IEEE Std 1003.1-2001, Portable Operating System Interface | |
174 | (POSIX ), Base Definitions and Headers, Section 9, Regular Expressions (FWD.1). | |
175 | ||
176 | Refer to the [link boost_regex.syntax.basic_extended POSIX extended | |
177 | regular expression guide] for more information. | |
178 | ||
179 | In addition some perl-style escape sequences are supported | |
180 | (The POSIX standard specifies that only "special" characters may be | |
181 | escaped, all other escape sequences result in undefined behavior).]] | |
182 | [[egrep][Yes][Specifies that the grammar recognized by the regular expression | |
183 | engine is the same as that used by POSIX utility grep when given the | |
184 | -E option in IEEE Std 1003.1-2001, Portable Operating System | |
185 | Interface (POSIX ), Shells and Utilities, Section 4, Utilities, grep (FWD.1). | |
186 | ||
187 | That is to say, the same as [link boost_regex.syntax.basic_extended | |
188 | POSIX extended syntax], but with the newline character acting as an | |
189 | alternation character in addition to "|".]] | |
190 | [[awk][Yes][Specifies that the grammar recognized by the regular | |
191 | expression engine is the same as that used by POSIX utility awk | |
192 | in IEEE Std 1003.1-2001, Portable Operating System Interface (POSIX ), | |
193 | Shells and Utilities, Section 4, awk (FWD.1). | |
194 | ||
195 | That is to say: the same as [link boost_regex.syntax.basic_extended | |
196 | POSIX extended syntax], but with escape sequences in character | |
197 | classes permitted. | |
198 | ||
199 | In addition some perl-style escape sequences are supported (actually | |
200 | the awk syntax only requires \\a \\b \\t \\v \\f \\n and \\r to be | |
201 | recognised, all other Perl-style escape sequences invoke undefined | |
202 | behavior according to the POSIX standard, but are in fact | |
203 | recognised by Boost.Regex).]] | |
204 | ] | |
205 | ||
206 | The following options may also be set when using POSIX extended regular expressions: | |
207 | ||
208 | [table | |
209 | [[Element][Standardized][Effect when set]] | |
210 | [[icase][Yes][Specifies that matching of regular expressions against a | |
211 | character container sequence shall be performed without regard to case.]] | |
212 | [[nosubs][Yes][Specifies that when a regular expression is matched against a | |
213 | character container sequence, then no sub-expression matches are | |
214 | to be stored in the supplied [match_results] structure.]] | |
215 | [[optimize][Yes][Specifies that the regular expression engine should pay | |
216 | more attention to the speed with which regular expressions are matched, | |
217 | and less to the speed with which regular expression objects are | |
218 | constructed. Otherwise it has no detectable effect on the program output. | |
219 | This currently has no effect for Boost.Regex.]] | |
220 | [[collate][Yes][Specifies that character ranges of the form `[a-b]` should be | |
221 | locale sensitive. This bit is on by default for POSIX-Extended | |
222 | regular expressions, but can be unset to force ranges to be compared | |
223 | by code point only.]] | |
224 | [[newline_alt][No][Specifies that the \\n character has the same effect as | |
225 | the alternation operator |. Allows newline separated lists to be used | |
226 | as a list of alternatives.]] | |
227 | [[no_escape_in_lists][No][When set this makes the escape character ordinary | |
228 | inside lists, so that `[\b]` would match either '\\' or 'b'. This bit | |
229 | is on by default for POSIX-Extended regular expressions, but can be | |
230 | unset to force escapes to be recognised inside lists.]] | |
231 | [[no_bk_refs][No][When set then backreferences are disabled. This bit is on | |
232 | by default for POSIX-Extended regular expressions, but can be unset | |
233 | to support for backreferences on.]] | |
234 | [[no_except][No][Prevents [basic_regex] from throwing an exception when | |
235 | an invalid expression is encountered.]] | |
236 | [[save_subexpression_location][No][When set then the locations of individual | |
237 | sub-expressions within the ['original regular expression string] can be accessed | |
238 | via the [link boost_regex.basic_regex.subexpression `subexpression()`] member function of `basic_regex`.]] | |
239 | ] | |
240 | ||
241 | [endsect] | |
242 | [section:syntax_option_type_basic Options for POSIX Basic Regular Expressions] | |
243 | ||
244 | Exactly one of the following must always be set for POSIX basic regular expressions: | |
245 | ||
246 | [table | |
247 | [[Element][Standardized][Effect When Set]] | |
248 | [[basic][Yes][Specifies that the grammar recognized by the regular expression | |
249 | engine is the same as that used by | |
250 | [link boost_regex.syntax.basic_syntax POSIX basic regular expressions] in IEEE Std 1003.1-2001, Portable | |
251 | Operating System Interface (POSIX ), Base Definitions and Headers, | |
252 | Section 9, Regular Expressions (FWD.1).]] | |
253 | [[sed][No][As Above.]] | |
254 | [[grep][Yes][Specifies that the grammar recognized by the regular | |
255 | expression engine is the same as that used by | |
256 | POSIX utility `grep` in IEEE Std 1003.1-2001, Portable Operating | |
257 | System Interface (POSIX ), Shells and Utilities, Section 4, | |
258 | Utilit\ies, grep (FWD.1). | |
259 | ||
260 | That is to say, the same as [link boost_regex.syntax.basic_syntax | |
261 | POSIX basic syntax], but with the newline character acting as an | |
262 | alternation character; the expression is treated as a newline | |
263 | separated list of alternatives.]] | |
264 | [[emacs][No][Specifies that the grammar recognised is the superset of the | |
265 | [link boost_regex.syntax.basic_syntax POSIX-Basic syntax] used by | |
266 | the emacs program.]] | |
267 | ] | |
268 | ||
269 | The following options may also be set when using POSIX basic regular expressions: | |
270 | ||
271 | [table | |
272 | [[Element][Standardized][Effect when set]] | |
273 | [[icase][Yes][Specifies that matching of regular expressions against a | |
274 | character container sequence shall be performed without regard to case.]] | |
275 | [[nosubs][Yes][Specifies that when a regular expression is matched against | |
276 | a character container sequence, then no sub-expression matches are | |
277 | to be stored in the supplied [match_results] structure.]] | |
278 | [[optimize][Yes][Specifies that the regular expression engine should pay | |
279 | more attention to the speed with which regular expressions are | |
280 | matched, and less to the speed with which regular expression objects | |
281 | are constructed. Otherwise it has no detectable effect on the program output. | |
282 | This currently has no effect for Boost.Regex.]] | |
283 | [[collate][Yes][Specifies that character ranges of the form `[a-b]` should | |
284 | be locale sensitive. This bit is on by default for | |
285 | [link boost_regex.syntax.basic_syntax POSIX-Basic regular expressions], | |
286 | but can be unset to force ranges to be compared by code point only.]] | |
287 | [[newline_alt][No][Specifies that the \\n character has the same effect as the | |
288 | alternation operator |. Allows newline separated lists to be used | |
289 | as a list of alternatives. This bit is already set, if you use the | |
290 | `grep` option.]] | |
291 | [[no_char_classes][No][When set then character classes such as `[[:alnum:]]` | |
292 | are not allowed.]] | |
293 | [[no_escape_in_lists][No][When set this makes the escape character ordinary | |
294 | inside lists, so that `[\b]` would match either '\\' or 'b'. This bit | |
295 | is on by default for [link boost_regex.syntax.basic_syntax POSIX-basic | |
296 | regular expressions], but can be unset to force escapes to be recognised | |
297 | inside lists.]] | |
298 | [[no_intervals][No][When set then bounded repeats such as a{2,3} are not permitted.]] | |
299 | [[bk_plus_qm][No][When set then `\?` acts as a zero-or-one repeat operator, | |
300 | and `\+` acts as a one-or-more repeat operator.]] | |
301 | [[bk_vbar][No][When set then `\|` acts as the alternation operator.]] | |
302 | [[no_except][No][Prevents [basic_regex] from throwing an exception when an | |
303 | invalid expression is encountered.]] | |
304 | [[save_subexpression_location][No][When set then the locations of individual | |
305 | sub-expressions within the ['original regular expression string] can be accessed | |
306 | via the [link boost_regex.basic_regex.subexpression `subexpression()`] member function of `basic_regex`.]] | |
307 | ] | |
308 | ||
309 | [endsect] | |
310 | ||
311 | [section:syntax_option_type_literal Options for Literal Strings] | |
312 | ||
313 | The following must always be set to interpret the expression as a string literal: | |
314 | ||
315 | [table | |
316 | [[Element][Standardized][Effect when set]] | |
317 | [[literal][Yes][Treat the string as a literal (no special characters).]] | |
318 | ] | |
319 | ||
320 | The following options may also be combined with the literal flag: | |
321 | ||
322 | [table | |
323 | [[Element][Standardized][Effect when set]] | |
324 | [[icase][Yes][Specifies that matching of regular expressions against a | |
325 | character container sequence shall be performed without regard to case.]] | |
326 | [[optimize][Yes][Specifies that the regular expression engine should pay | |
327 | more attention to the speed with which regular expressions are matched, | |
328 | and less to the speed with which regular expression objects are constructed. | |
329 | Otherwise it has no detectable effect on the program output. This | |
330 | currently has no effect for Boost.Regex.]] | |
331 | ] | |
332 | ||
333 | [endsect] | |
334 | ||
335 | [endsect] | |
336 |