[ceph.git] / ceph / src / boost / libs / bimap / example / mi_to_b_path / mi_hashed_indices.cpp

// Boost.Bimap
//
// Copyright (c) 2006-2007 Matias Capeletto
//
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)


/*****************************************************************************
Boost.MultiIndex
*****************************************************************************/

#include <boost/config.hpp>

//[ code_mi_to_b_path_mi_hashed_indices

#include <iostream>
#include <iomanip>

#include <boost/tokenizer.hpp>

#include <boost/multi_index_container.hpp>
#include <boost/multi_index/key_extractors.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/lambda/lambda.hpp>

using namespace boost::multi_index;
namespace bl = boost::lambda;

// word_counter keeps the ocurrences of words inserted. A hashed
// index allows for fast checking of preexisting entries.

struct word_counter_entry
{
    std::string  word;
    unsigned int occurrences;

    word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {}
};

typedef multi_index_container
<
    word_counter_entry,
    indexed_by
    <
        ordered_non_unique
        <
            BOOST_MULTI_INDEX_MEMBER(
                word_counter_entry,unsigned int,occurrences),
            std::greater<unsigned int>
        >,
        hashed_unique
        <
            BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word)
        >
  >

> word_counter;

typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer;

int main()
{
    std::string text=
        "En un lugar de la Mancha, de cuyo nombre no quiero acordarme... "
        "...snip..."
        "...no se salga un punto de la verdad.";

    // feed the text into the container

    word_counter   wc;
    text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-"));
    unsigned int   total_occurrences = 0;

    for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end();
         it != it_end ; ++it )
    {
        ++total_occurrences;
        word_counter::iterator wit = wc.insert(*it).first;
        wc.modify_key( wit, ++ bl::_1 );
    }

    // list words by frequency of appearance

    std::cout << std::fixed << std::setprecision(2);

    for( word_counter::iterator wit = wc.begin(), wit_end=wc.end();
         wit != wit_end; ++wit )
    {
        std::cout << std::setw(11) << wit->word << ": "
                  << std::setw(5)
                  << 100.0 * wit->occurrences / total_occurrences << "%"
                  << std::endl;
    }

    return 0;
}
//]
Commit	Line	Data
7c673cae FG	1	// Boost.Bimap
	2	//
	3	// Copyright (c) 2006-2007 Matias Capeletto
	4	//
	5	// Distributed under the Boost Software License, Version 1.0.
	6	// (See accompanying file LICENSE_1_0.txt or copy at
	7	// http://www.boost.org/LICENSE_1_0.txt)
	8
	9
	10	/*****************************************************************************
	11	Boost.MultiIndex
	12	*****************************************************************************/
	13
	14	#include <boost/config.hpp>
	15
	16	//[ code_mi_to_b_path_mi_hashed_indices
	17
	18	#include <iostream>
	19	#include <iomanip>
	20
	21	#include <boost/tokenizer.hpp>
	22
	23	#include <boost/multi_index_container.hpp>
	24	#include <boost/multi_index/key_extractors.hpp>
	25	#include <boost/multi_index/ordered_index.hpp>
	26	#include <boost/multi_index/hashed_index.hpp>
	27	#include <boost/lambda/lambda.hpp>
	28
	29	using namespace boost::multi_index;
	30	namespace bl = boost::lambda;
	31
	32	// word_counter keeps the ocurrences of words inserted. A hashed
	33	// index allows for fast checking of preexisting entries.
	34
	35	struct word_counter_entry
	36	{
	37	std::string word;
	38	unsigned int occurrences;
	39
	40	word_counter_entry( std::string word_ ) : word(word_), occurrences(0) {}
	41	};
	42
	43	typedef multi_index_container
	44	<
	45	word_counter_entry,
	46	indexed_by
	47	<
	48	ordered_non_unique
	49	<
	50	BOOST_MULTI_INDEX_MEMBER(
	51	word_counter_entry,unsigned int,occurrences),
	52	std::greater<unsigned int>
	53	>,
	54	hashed_unique
	55	<
	56	BOOST_MULTI_INDEX_MEMBER(word_counter_entry,std::string,word)
	57	>
	58	>
	59
	60	> word_counter;
	61
	62	typedef boost::tokenizer<boost::char_separator<char> > text_tokenizer;
	63
	64	int main()
65	{
66	std::string text=
67	"En un lugar de la Mancha, de cuyo nombre no quiero acordarme... "
68	"...snip..."
69	"...no se salga un punto de la verdad.";
70
71	// feed the text into the container
72
73	word_counter wc;
74	text_tokenizer tok(text,boost::char_separator<char>(" \t\n.,;:!?'\"-"));
75	unsigned int total_occurrences = 0;
76
77	for( text_tokenizer::iterator it = tok.begin(), it_end = tok.end();
78	it != it_end ; ++it )
79	{
80	++total_occurrences;
81	word_counter::iterator wit = wc.insert(*it).first;
82	wc.modify_key( wit, ++ bl::_1 );
83	}
84
85	// list words by frequency of appearance
86
87	std::cout << std::fixed << std::setprecision(2);
88
89	for( word_counter::iterator wit = wc.begin(), wit_end=wc.end();
90	wit != wit_end; ++wit )
91	{
92	std::cout << std::setw(11) << wit->word << ": "
93	<< std::setw(5)
94	<< 100.0 * wit->occurrences / total_occurrences << "%"
95	<< std::endl;
96	}
97
98	return 0;
99	}
100	//]