]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /* |
2 | * | |
3 | * Copyright (c) 1998-2002 | |
4 | * John Maddock | |
5 | * | |
6 | * Use, modification and distribution are subject to the | |
7 | * Boost Software License, Version 1.0. (See accompanying file | |
8 | * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
9 | * | |
10 | */ | |
11 | ||
12 | /* | |
13 | * LOCATION: see http://www.boost.org for most recent version. | |
14 | * FILE regex_split_example_2.cpp | |
15 | * VERSION see <boost/version.hpp> | |
16 | * DESCRIPTION: regex_split example: spit out linked URL's. | |
17 | */ | |
18 | ||
19 | ||
b32b8144 | 20 | #include <boost/regex.hpp> |
7c673cae FG |
21 | #include <list> |
22 | #include <fstream> | |
23 | #include <iostream> | |
24 | #include <iterator> | |
7c673cae FG |
25 | |
26 | boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"", | |
27 | boost::regex::normal | boost::regbase::icase); | |
28 | ||
29 | void load_file(std::string& s, std::istream& is) | |
30 | { | |
31 | s.erase(); | |
32 | if(is.bad()) return; | |
33 | // | |
34 | // attempt to grow string buffer to match file size, | |
35 | // this doesn't always work... | |
36 | s.reserve(static_cast<std::string::size_type>(is.rdbuf()->in_avail())); | |
37 | char c; | |
38 | while(is.get(c)) | |
39 | { | |
40 | // use logarithmic growth stategy, in case | |
41 | // in_avail (above) returned zero: | |
42 | if(s.capacity() == s.size()) | |
43 | s.reserve(s.capacity() * 3); | |
44 | s.append(1, c); | |
45 | } | |
46 | } | |
47 | ||
48 | int main(int argc, char** argv) | |
49 | { | |
50 | std::string s; | |
51 | std::list<std::string> l; | |
52 | int i; | |
53 | for(i = 1; i < argc; ++i) | |
54 | { | |
55 | std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; | |
56 | s.erase(); | |
57 | std::ifstream is(argv[i]); | |
58 | load_file(s, is); | |
59 | is.close(); | |
60 | boost::regex_split(std::back_inserter(l), s, e); | |
61 | while(l.size()) | |
62 | { | |
63 | s = *(l.begin()); | |
64 | l.pop_front(); | |
65 | std::cout << s << std::endl; | |
66 | } | |
67 | } | |
68 | // | |
69 | // alternative method: | |
70 | // split one match at a time and output direct to | |
71 | // cout via ostream_iterator<std::string>.... | |
72 | // | |
73 | for(i = 1; i < argc; ++i) | |
74 | { | |
75 | std::cout << "Findings URL's in " << argv[i] << ":" << std::endl; | |
76 | s.erase(); | |
77 | std::ifstream is(argv[i]); | |
78 | load_file(s, is); | |
79 | is.close(); | |
80 | while(boost::regex_split(std::ostream_iterator<std::string>(std::cout), s, e, boost::match_default, 1)) std::cout << std::endl; | |
81 | } | |
82 | ||
83 | return 0; | |
84 | } | |
85 | ||
86 |