]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | [/ |
2 | Copyright 2006-2007 John Maddock. | |
3 | Distributed under the Boost Software License, Version 1.0. | |
4 | (See accompanying file LICENSE_1_0.txt or copy at | |
5 | http://www.boost.org/LICENSE_1_0.txt). | |
6 | ] | |
7 | ||
8 | ||
9 | [section:regex_token_iterator regex_token_iterator] | |
10 | ||
11 | The template class [regex_token_iterator] is an iterator adapter; that is to | |
12 | say it represents a new view of an existing iterator sequence, | |
13 | by enumerating all the occurrences of a regular expression within that | |
14 | sequence, and presenting one or more character sequence for each match found. | |
15 | Each position enumerated by the iterator is a [sub_match] object that represents | |
16 | what matched a particular sub-expression within the regular expression. | |
17 | When class [regex_token_iterator] is used to enumerate a single sub-expression | |
18 | with index -1, then the iterator performs field splitting: that is | |
19 | to say it enumerates one character sequence for each section of the character | |
20 | container sequence that does not match the regular expression specified. | |
21 | ||
22 | template <class BidirectionalIterator, | |
23 | class charT = iterator_traits<BidirectionalIterator>::value_type, | |
24 | class traits = regex_traits<charT> > | |
25 | class regex_token_iterator | |
26 | { | |
27 | public: | |
28 | typedef basic_regex<charT, traits> regex_type; | |
29 | typedef sub_match<BidirectionalIterator> value_type; | |
30 | typedef typename iterator_traits<BidirectionalIterator>::difference_type difference_type; | |
31 | typedef const value_type* pointer; | |
32 | typedef const value_type& reference; | |
33 | typedef std::forward_iterator_tag iterator_category; | |
34 | ||
35 | ``[link boost_regex.regex_token_iterator.construct1 regex_token_iterator]``(); | |
36 | ``[link boost_regex.regex_token_iterator.construct2 regex_token_iterator]``(BidirectionalIterator a, | |
37 | BidirectionalIterator b, | |
38 | const regex_type& re, | |
39 | int submatch = 0, | |
40 | match_flag_type m = match_default); | |
41 | ``[link boost_regex.regex_token_iterator.construct3 regex_token_iterator]``(BidirectionalIterator a, | |
42 | BidirectionalIterator b, | |
43 | const regex_type& re, | |
44 | const std::vector<int>& submatches, | |
45 | match_flag_type m = match_default); | |
46 | template <std::size_t N> | |
47 | ``[link boost_regex.regex_token_iterator.construct4 regex_token_iterator]``(BidirectionalIterator a, | |
48 | BidirectionalIterator b, | |
49 | const regex_type& re, | |
50 | const int (&submatches)[N], | |
51 | match_flag_type m = match_default); | |
52 | ``[link boost_regex.regex_token_iterator.construct5 regex_token_iterator]``(const regex_token_iterator&); | |
53 | regex_token_iterator& ``[link boost_regex.regex_token_iterator.assign operator=]``(const regex_token_iterator&); | |
54 | bool ``[link boost_regex.regex_token_iterator.op_eq operator==]``(const regex_token_iterator&)const; | |
55 | bool ``[link boost_regex.regex_token_iterator.op_ne operator!=]``(const regex_token_iterator&)const; | |
56 | const value_type& ``[link boost_regex.regex_token_iterator.op_deref operator*]``()const; | |
57 | const value_type* ``[link boost_regex.regex_token_iterator.op_arrow operator->]``()const; | |
58 | regex_token_iterator& ``[link boost_regex.regex_token_iterator.op_inc1 operator++]``(); | |
59 | regex_token_iterator ``[link boost_regex.regex_token_iterator.op_inc2 operator++]``(int); | |
60 | }; | |
61 | ||
62 | typedef regex_token_iterator<const char*> cregex_token_iterator; | |
63 | typedef regex_token_iterator<std::string::const_iterator> sregex_token_iterator; | |
64 | #ifndef BOOST_NO_WREGEX | |
65 | typedef regex_token_iterator<const wchar_t*> wcregex_token_iterator; | |
66 | typedef regex_token_iterator<<std::wstring::const_iterator> wsregex_token_iterator; | |
67 | #endif | |
68 | ||
69 | template <class charT, class traits> | |
70 | regex_token_iterator<const charT*, charT, traits> | |
71 | ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``( | |
72 | const charT* p, | |
73 | const basic_regex<charT, traits>& e, | |
74 | int submatch = 0, | |
75 | regex_constants::match_flag_type m = regex_constants::match_default); | |
76 | ||
77 | template <class charT, class traits, class ST, class SA> | |
78 | regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> | |
79 | ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``( | |
80 | const std::basic_string<charT, ST, SA>& p, | |
81 | const basic_regex<charT, traits>& e, | |
82 | int submatch = 0, | |
83 | regex_constants::match_flag_type m = regex_constants::match_default); | |
84 | ||
85 | template <class charT, class traits, std::size_t N> | |
86 | regex_token_iterator<const charT*, charT, traits> | |
87 | ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``( | |
88 | const charT* p, | |
89 | const basic_regex<charT, traits>& e, | |
90 | const int (&submatch)[N], | |
91 | regex_constants::match_flag_type m = regex_constants::match_default); | |
92 | ||
93 | template <class charT, class traits, class ST, class SA, std::size_t N> | |
94 | regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> | |
95 | ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``( | |
96 | const std::basic_string<charT, ST, SA>& p, | |
97 | const basic_regex<charT, traits>& e, | |
98 | const int (&submatch)[N], | |
99 | regex_constants::match_flag_type m = regex_constants::match_default); | |
100 | ||
101 | template <class charT, class traits> | |
102 | regex_token_iterator<const charT*, charT, traits> | |
103 | ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``( | |
104 | const charT* p, | |
105 | const basic_regex<charT, traits>& e, | |
106 | const std::vector<int>& submatch, | |
107 | regex_constants::match_flag_type m = regex_constants::match_default); | |
108 | ||
109 | template <class charT, class traits, class ST, class SA> | |
110 | regex_token_iterator< | |
111 | typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> | |
112 | ``[link boost_regex.regex_token_iterator.make make_regex_token_iterator]``( | |
113 | const std::basic_string<charT, ST, SA>& p, | |
114 | const basic_regex<charT, traits>& e, | |
115 | const std::vector<int>& submatch, | |
116 | regex_constants::match_flag_type m = regex_constants::match_default); | |
117 | ||
118 | [h4 Description] | |
119 | ||
120 | [#boost_regex.regex_token_iterator.construct1] | |
121 | ||
122 | regex_token_iterator(); | |
123 | ||
124 | [*Effects]: constructs an end of sequence iterator. | |
125 | ||
126 | [#boost_regex.regex_token_iterator.construct2] | |
127 | ||
128 | regex_token_iterator(BidirectionalIterator a, | |
129 | BidirectionalIterator b, | |
130 | const regex_type& re, | |
131 | int submatch = 0, | |
132 | match_flag_type m = match_default); | |
133 | ||
134 | [*Preconditions]: `!re.empty()`. Object /re/ shall exist for the lifetime of | |
135 | the iterator constructed from it. | |
136 | ||
137 | [*Effects]: constructs a [regex_token_iterator] that will enumerate one string for | |
138 | each regular expression match of the expression /re/ found within the sequence \[a,b), | |
139 | using match flags /m/ (see [match_flag_type]). The string enumerated is the sub-expression /submatch/ | |
140 | for each match found; if /submatch/ is -1, then enumerates all the text | |
141 | sequences that did not match the expression /re/ (that is to performs field | |
142 | splitting). | |
143 | ||
144 | [*Throws]: `std::runtime_error` if the complexity of matching the expression against | |
145 | an N character string begins to exceed O(N[super 2]), or if the program runs | |
146 | out of stack space while matching the expression (if Boost.Regex is configured | |
147 | in recursive mode), or if the matcher exhausts its permitted memory | |
148 | allocation (if Boost.Regex is configured in non-recursive mode). | |
149 | ||
150 | [#boost_regex.regex_token_iterator.construct3] | |
151 | ||
152 | regex_token_iterator(BidirectionalIterator a, | |
153 | BidirectionalIterator b, | |
154 | const regex_type& re, | |
155 | const std::vector<int>& submatches, | |
156 | match_flag_type m = match_default); | |
157 | ||
158 | [*Preconditions]: `submatches.size() && !re.empty()`. Object /re/ shall | |
159 | exist for the lifetime of the iterator constructed from it. | |
160 | ||
161 | [*Effects]: constructs a [regex_token_iterator] that will enumerate | |
162 | `submatches.size()` strings for each regular expression match of | |
163 | the expression /re/ found within the sequence \[a,b), using match flags /m/ | |
164 | (see [match_flag_type]). For each match found one string will be enumerated | |
165 | for each sub-expression index contained within submatches vector; if | |
166 | `submatches[0]` is -1, then the first string enumerated for each match will be | |
167 | all of the text from end of the last match to the start of the current match, | |
168 | in addition there will be one extra string enumerated when no more matches can | |
169 | be found: from the end of the last match found, to the end of the underlying sequence. | |
170 | ||
171 | [*Throws]: `std::runtime_error` if the complexity of matching the expression | |
172 | against an N character string begins to exceed O(N[super 2]), or if the | |
173 | program runs out of stack space while matching the expression (if Boost.Regex is | |
174 | configured in recursive mode), or if the matcher exhausts its permitted memory | |
175 | allocation (if Boost.Regex is configured in non-recursive mode). | |
176 | ||
177 | [#boost_regex.regex_token_iterator.construct4] | |
178 | ||
179 | template <std::size_t N> | |
180 | regex_token_iterator(BidirectionalIterator a, | |
181 | BidirectionalIterator b, | |
182 | const regex_type& re, | |
183 | const int (&submatches)[R], | |
184 | match_flag_type m = match_default); | |
185 | ||
186 | [*Preconditions]: `!re.empty()`. Object /re/ shall exist for the lifetime of the iterator constructed from it. | |
187 | ||
188 | [*Effects]: constructs a [regex_token_iterator] that will enumerate /R/ strings | |
189 | for each regular expression match of the expression /re/ found within the sequence | |
190 | \[a,b), using match flags /m/ (see [match_flag_type]). For each match found one | |
191 | string will be enumerated for each sub-expression index contained within the | |
192 | /submatches/ array; if `submatches[0]` is -1, then the first string enumerated for | |
193 | each match will be all of the text from end of the last match to the start | |
194 | of the current match, in addition there will be one extra string enumerated when | |
195 | no more matches can be found: from the end of the last match found, to | |
196 | the end of the underlying sequence. | |
197 | ||
198 | [*Throws]: `std::runtime_error` if the complexity of matching the expression | |
199 | against an N character string begins to exceed O(N[super 2]), or if the | |
200 | program runs out of stack space while matching the expression (if Boost.Regex | |
201 | is configured in recursive mode), or if the matcher exhausts its | |
202 | permitted memory allocation (if Boost.Regex is configured in non-recursive mode). | |
203 | ||
204 | [#boost_regex.regex_token_iterator.construct5] | |
205 | ||
206 | regex_token_iterator(const regex_token_iterator& that); | |
207 | ||
208 | [*Effects]: constructs a copy of `that`. | |
209 | ||
210 | [*Postconditions]: `*this == that`. | |
211 | ||
212 | [#boost_regex.regex_token_iterator.assign] | |
213 | ||
214 | regex_token_iterator& operator=(const regex_token_iterator& that); | |
215 | ||
216 | [*Effects]: sets `*this` to be equal to `that`. | |
217 | ||
218 | [*Postconditions]: `*this == that`. | |
219 | ||
220 | [#boost_regex.regex_token_iterator.op_eq] | |
221 | ||
222 | bool operator==(const regex_token_iterator&)const; | |
223 | ||
224 | [*Effects]: returns true if `*this` is the same position as `that`. | |
225 | ||
226 | [#boost_regex.regex_token_iterator.op_ne] | |
227 | ||
228 | bool operator!=(const regex_token_iterator&)const; | |
229 | ||
230 | [*Effects]: returns `!(*this == that)`. | |
231 | ||
232 | [#boost_regex.regex_token_iterator.op_deref] | |
233 | ||
234 | const value_type& operator*()const; | |
235 | ||
236 | [*Effects]: returns the current character sequence being enumerated. | |
237 | ||
238 | [#boost_regex.regex_token_iterator.op_arrow] | |
239 | ||
240 | const value_type* operator->()const; | |
241 | ||
242 | [*Effects]: returns `&(*this)`. | |
243 | ||
244 | [#boost_regex.regex_token_iterator.op_inc1] | |
245 | ||
246 | regex_token_iterator& operator++(); | |
247 | ||
248 | [*Effects]: Moves on to the next character sequence to be enumerated. | |
249 | ||
250 | [*Throws]: `std::runtime_error` if the complexity of matching the expression | |
251 | against an N character string begins to exceed O(N[super 2]), or if the program | |
252 | runs out of stack space while matching the expression (if Boost.Regex is | |
253 | configured in recursive mode), or if the matcher exhausts its permitted | |
254 | memory allocation (if Boost.Regex is configured in non-recursive mode). | |
255 | ||
256 | [*Returns]: `*this`. | |
257 | ||
258 | [#boost_regex.regex_token_iterator.op_inc2] | |
259 | ||
260 | regex_token_iterator& operator++(int); | |
261 | ||
262 | [*Effects]: constructs a copy result of `*this`, then calls `++(*this)`. | |
263 | ||
264 | [*Returns]: result. | |
265 | ||
266 | [#boost_regex.regex_token_iterator.make] | |
267 | ||
268 | template <class charT, class traits> | |
269 | regex_token_iterator<const charT*, charT, traits> | |
270 | make_regex_token_iterator( | |
271 | const charT* p, | |
272 | const basic_regex<charT, traits>& e, | |
273 | int submatch = 0, | |
274 | regex_constants::match_flag_type m = regex_constants::match_default); | |
275 | ||
276 | template <class charT, class traits, class ST, class SA> | |
277 | regex_token_iterator<typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> | |
278 | make_regex_token_iterator( | |
279 | const std::basic_string<charT, ST, SA>& p, | |
280 | const basic_regex<charT, traits>& e, | |
281 | int submatch = 0, | |
282 | regex_constants::match_flag_type m = regex_constants::match_default); | |
283 | ||
284 | template <class charT, class traits, std::size_t N> | |
285 | regex_token_iterator<const charT*, charT, traits> | |
286 | make_regex_token_iterator( | |
287 | const charT* p, | |
288 | const basic_regex<charT, traits>& e, | |
289 | const int (&submatch)[N], | |
290 | regex_constants::match_flag_type m = regex_constants::match_default); | |
291 | ||
292 | template <class charT, class traits, class ST, class SA, std::size_t N> | |
293 | regex_token_iterator< | |
294 | typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> | |
295 | make_regex_token_iterator( | |
296 | const std::basic_string<charT, ST, SA>& p, | |
297 | const basic_regex<charT, traits>& e, | |
298 | const int (&submatch)[N], | |
299 | regex_constants::match_flag_type m = regex_constants::match_default); | |
300 | ||
301 | template <class charT, class traits> | |
302 | regex_token_iterator<const charT*, charT, traits> | |
303 | make_regex_token_iterator( | |
304 | const charT* p, | |
305 | const basic_regex<charT, traits>& e, | |
306 | const std::vector<int>& submatch, | |
307 | regex_constants::match_flag_type m = regex_constants::match_default); | |
308 | ||
309 | template <class charT, class traits, class ST, class SA> | |
310 | regex_token_iterator< | |
311 | typename std::basic_string<charT, ST, SA>::const_iterator, charT, traits> | |
312 | make_regex_token_iterator( | |
313 | const std::basic_string<charT, ST, SA>& p, | |
314 | const basic_regex<charT, traits>& e, | |
315 | const std::vector<int>& submatch, | |
316 | regex_constants::match_flag_type m = regex_constants::match_default); | |
317 | ||
318 | [*Effects]: returns a [regex_token_iterator] that enumerates one [sub_match] | |
319 | for each value in /submatch/ for each occurrence of regular expression /e/ | |
320 | in string /p/, matched using [match_flag_type] /m/. | |
321 | ||
322 | [h4 Examples] | |
323 | ||
324 | The following example takes a string and splits it into a series of tokens: | |
325 | ||
326 | #include <iostream> | |
327 | #include <boost/regex.hpp> | |
328 | ||
329 | using namespace std; | |
330 | ||
331 | int main(int argc) | |
332 | { | |
333 | string s; | |
334 | do{ | |
335 | if(argc == 1) | |
336 | { | |
337 | cout << "Enter text to split (or \"quit\" to exit): "; | |
338 | getline(cin, s); | |
339 | if(s == "quit") break; | |
340 | } | |
341 | else | |
342 | s = "This is a string of tokens"; | |
343 | ||
344 | boost::regex re("\\s+"); | |
345 | boost::sregex_token_iterator i(s.begin(), s.end(), re, -1); | |
346 | boost::sregex_token_iterator j; | |
347 | ||
348 | unsigned count = 0; | |
349 | while(i != j) | |
350 | { | |
351 | cout << *i++ << endl; | |
352 | count++; | |
353 | } | |
354 | cout << "There were " << count << " tokens found." << endl; | |
355 | ||
356 | }while(argc == 1); | |
357 | return 0; | |
358 | } | |
359 | ||
360 | ||
361 | The following example takes a html file and outputs a list of all the linked files: | |
362 | ||
363 | #include <fstream> | |
364 | #include <iostream> | |
365 | #include <iterator> | |
366 | #include <boost/regex.hpp> | |
367 | ||
368 | boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"", | |
369 | boost::regex::normal | boost::regbase::icase); | |
370 | ||
371 | void load_file(std::string& s, std::istream& is) | |
372 | { | |
373 | s.erase(); | |
374 | // | |
375 | // attempt to grow string buffer to match file size, | |
376 | // this doesn't always work... | |
377 | s.reserve(is.rdbuf()->in_avail()); | |
378 | char c; | |
379 | while(is.get(c)) | |
380 | { | |
381 | // use logarithmic growth strategy, in case | |
382 | // in_avail (above) returned zero: | |
383 | if(s.capacity() == s.size()) | |
384 | s.reserve(s.capacity() * 3); | |
385 | s.append(1, c); | |
386 | } | |
387 | } | |
388 | ||
389 | int main(int argc, char** argv) | |
390 | { | |
391 | std::string s; | |
392 | int i; | |
393 | for(i = 1; i < argc; ++i) | |
394 | { | |
395 | std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; | |
396 | s.erase(); | |
397 | std::ifstream is(argv[i]); | |
398 | load_file(s, is); | |
399 | boost::sregex_token_iterator i(s.begin(), s.end(), e, 1); | |
400 | boost::sregex_token_iterator j; | |
401 | while(i != j) | |
402 | { | |
403 | std::cout << *i++ << std::endl; | |
404 | } | |
405 | } | |
406 | // | |
407 | // alternative method: | |
408 | // test the array-literal constructor, and split out the whole | |
409 | // match as well as $1.... | |
410 | // | |
411 | for(i = 1; i < argc; ++i) | |
412 | { | |
413 | std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; | |
414 | s.erase(); | |
415 | std::ifstream is(argv[i]); | |
416 | load_file(s, is); | |
417 | const int subs[] = {1, 0,}; | |
418 | boost::sregex_token_iterator i(s.begin(), s.end(), e, subs); | |
419 | boost::sregex_token_iterator j; | |
420 | while(i != j) | |
421 | { | |
422 | std::cout << *i++ << std::endl; | |
423 | } | |
424 | } | |
425 | ||
426 | return 0; | |
427 | } | |
428 | ||
429 | ||
430 | [endsect] | |
431 |