]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Boost.Bimap |
2 | // | |
3 | // Copyright (c) 2006-2007 Matias Capeletto | |
4 | // | |
5 | // Distributed under the Boost Software License, Version 1.0. | |
6 | // (See accompanying file LICENSE_1_0.txt or copy at | |
7 | // http://www.boost.org/LICENSE_1_0.txt) | |
8 | ||
9 | ||
10 | /***************************************************************************** | |
11 | Boost.MultiIndex | |
12 | *****************************************************************************/ | |
13 | ||
14 | #include <boost/config.hpp> | |
15 | ||
16 | //[ code_mi_to_b_path_mi_hashed_indices | |
17 | ||
18 | #include <iostream> | |
19 | #include <iomanip> | |
20 | ||
21 | #include <boost/tokenizer.hpp> | |
22 | ||
23 | #include <boost/multi_index_container.hpp> | |
24 | #include <boost/multi_index/key_extractors.hpp> | |
25 | #include <boost/multi_index/ordered_index.hpp> | |
26 | #include <boost/multi_index/hashed_index.hpp> | |
27 | #include <boost/lambda/lambda.hpp> | |
28 | ||
29 | using namespace boost::multi_index; | |
30 | namespace bl = boost::lambda; | |
31 | ||
32 | // word_counter keeps the ocurrences of words inserted. A hashed | |
33 | // index allows for fast checking of preexisting entries. | |
34 | ||
35 | struct word_counter_entry | |
36 | { | |
37 | std::string word; | |
38 | unsigned int occurrences; | |
39 | ||
40 | word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {} | |
41 | }; | |
42 | ||
43 | typedef multi_index_container | |
44 | < | |
45 | word_counter_entry, | |
46 | indexed_by | |
47 | < | |
48 | ordered_non_unique | |
49 | < | |
50 | BOOST_MULTI_INDEX_MEMBER( | |
51 | word_counter_entry,unsigned int,occurrences), | |
52 | std::greater<unsigned int> | |
53 | >, | |
54 | hashed_unique | |
55 | < | |
56 | BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word) | |
57 | > | |
58 | > | |
59 | ||
60 | > word_counter; | |
61 | ||
62 | typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer; | |
63 | ||
64 | int main() | |
65 | { | |
66 | std::string text= | |
67 | "En un lugar de la Mancha, de cuyo nombre no quiero acordarme... " | |
68 | "...snip..." | |
69 | "...no se salga un punto de la verdad."; | |
70 | ||
71 | // feed the text into the container | |
72 | ||
73 | word_counter wc; | |
74 | text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-")); | |
75 | unsigned int total_occurrences = 0; | |
76 | ||
77 | for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end(); | |
78 | it != it_end ; ++it ) | |
79 | { | |
80 | ++total_occurrences; | |
81 | word_counter::iterator wit = wc.insert(*it).first; | |
82 | wc.modify_key( wit, ++ bl::_1 ); | |
83 | } | |
84 | ||
85 | // list words by frequency of appearance | |
86 | ||
87 | std::cout << std::fixed << std::setprecision(2); | |
88 | ||
89 | for( word_counter::iterator wit = wc.begin(), wit_end=wc.end(); | |
90 | wit != wit_end; ++wit ) | |
91 | { | |
92 | std::cout << std::setw(11) << wit->word << ": " | |
93 | << std::setw(5) | |
94 | << 100.0 * wit->occurrences / total_occurrences << "%" | |
95 | << std::endl; | |
96 | } | |
97 | ||
98 | return 0; | |
99 | } | |
100 | //] |