]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | [/ |
2 | Copyright 2006-2007 John Maddock. | |
3 | Distributed under the Boost Software License, Version 1.0. | |
4 | (See accompanying file LICENSE_1_0.txt or copy at | |
5 | http://www.boost.org/LICENSE_1_0.txt). | |
6 | ] | |
7 | ||
8 | ||
9 | [section:regex_split regex_split (deprecated)] | |
10 | ||
11 | The algorithm [regex_split] has been deprecated in favor of the iterator | |
12 | [regex_token_iterator] which has a more flexible and powerful interface, | |
13 | as well as following the more usual standard library "pull" rather than | |
14 | "push" semantics. | |
15 | ||
16 | Code which uses [regex_split] will continue to compile, the following | |
17 | documentation is taken from a previous Boost.Regex version: | |
18 | ||
19 | #include <boost/regex.hpp> | |
20 | ||
21 | Algorithm [regex_split] performs a similar operation to the perl split operation, | |
22 | and comes in three overloaded forms: | |
23 | ||
24 | template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2> | |
25 | std::size_t regex_split(OutputIterator out, | |
26 | std::basic_string<charT, Traits1, Alloc1>& s, | |
27 | const basic_regex<charT, Traits2>& e, | |
28 | boost::match_flag_type flags, | |
29 | std::size_t max_split); | |
30 | ||
31 | template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2> | |
32 | std::size_t regex_split(OutputIterator out, | |
33 | std::basic_string<charT, Traits1, Alloc1>& s, | |
34 | const basic_regex<charT, Traits2>& e, | |
35 | boost::match_flag_type flags = match_default); | |
36 | ||
37 | template <class OutputIterator, class charT, class Traits1, class Alloc1> | |
38 | std::size_t regex_split(OutputIterator out, | |
39 | std::basic_string<charT, Traits1, Alloc1>& s); | |
40 | ||
41 | [*Effects]: Each version of the algorithm takes an output-iterator for | |
42 | output, and a string for input. If the expression contains no marked | |
43 | sub-expressions, then the algorithm writes one string onto the output-iterator | |
44 | for each section of input that does not match the expression. | |
45 | If the expression does contain marked sub-expressions, then each | |
46 | time a match is found, one string for each marked sub-expression will be | |
47 | written to the output-iterator. No more than max_split strings will be written | |
48 | to the output-iterator. Before returning, all the input processed will be | |
49 | deleted from the string /s/ (if /max_split/ is not reached then all of /s/ | |
50 | will be deleted). Returns the number of strings written to the output-iterator. | |
51 | If the parameter /max_split/ is not specified then it defaults to `UINT_MAX`. | |
52 | If no expression is specified, then it defaults to "\\s+", and splitting occurs | |
53 | on whitespace. | |
54 | ||
55 | [*Throws]: `std::runtime_error` if the complexity of matching the expression | |
56 | against an N character string begins to exceed O(N[super 2]), or if the | |
57 | program runs out of stack space while matching the expression (if Boost.Regex is | |
58 | configured in recursive mode), or if the matcher exhausts its permitted | |
59 | memory allocation (if Boost.Regex is configured in non-recursive mode). | |
60 | ||
61 | [*Example]: the following function will split the input string into a | |
62 | series of tokens, and remove each token from the string /s/: | |
63 | ||
64 | unsigned tokenise(std::list<std::string>& l, std::string& s) | |
65 | { | |
66 | return boost::regex_split(std::back_inserter(l), s); | |
67 | } | |
68 | ||
69 | Example: the following short program will extract all of the URL's | |
70 | from a html file, and print them out to cout: | |
71 | ||
72 | #include <list> | |
73 | #include <fstream> | |
74 | #include <iostream> | |
75 | #include <boost/regex.hpp> | |
76 | ||
77 | boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"", | |
78 | boost::regbase::normal | boost::regbase::icase); | |
79 | ||
80 | void load_file(std::string& s, std::istream& is) | |
81 | { | |
82 | s.erase(); | |
83 | // | |
84 | // attempt to grow string buffer to match file size, | |
85 | // this doesn't always work... | |
86 | s.reserve(is.rdbuf()->in_avail()); | |
87 | char c; | |
88 | while(is.get(c)) | |
89 | { | |
90 | // use logarithmic growth strategy, in case | |
91 | // in_avail (above) returned zero: | |
92 | if(s.capacity() == s.size()) | |
93 | s.reserve(s.capacity() * 3); | |
94 | s.append(1, c); | |
95 | } | |
96 | } | |
97 | ||
98 | ||
99 | int main(int argc, char** argv) | |
100 | { | |
101 | std::string s; | |
102 | std::list<std::string> l; | |
103 | ||
104 | for(int i = 1; i < argc; ++i) | |
105 | { | |
106 | std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; | |
107 | s.erase(); | |
108 | std::ifstream is(argv[i]); | |
109 | load_file(s, is); | |
110 | boost::regex_split(std::back_inserter(l), s, e); | |
111 | while(l.size()) | |
112 | { | |
113 | s = *(l.begin()); | |
114 | l.pop_front(); | |
115 | std::cout << s << std::endl; | |
116 | } | |
117 | } | |
118 | return 0; | |
119 | } | |
120 | ||
121 | [endsect] | |
122 |