]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | /////////////////////////////////////////////////////////////////////////////// | |
3 | // density.hpp | |
4 | // | |
5 | // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost | |
6 | // Software License, Version 1.0. (See accompanying file | |
7 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
8 | ||
9 | #ifndef BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006 | |
10 | #define BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006 | |
11 | ||
12 | #include <vector> | |
13 | #include <limits> | |
14 | #include <functional> | |
15 | #include <boost/range.hpp> | |
16 | #include <boost/parameter/keyword.hpp> | |
17 | #include <boost/mpl/placeholders.hpp> | |
18 | #include <boost/accumulators/accumulators_fwd.hpp> | |
19 | #include <boost/accumulators/framework/accumulator_base.hpp> | |
20 | #include <boost/accumulators/framework/extractor.hpp> | |
21 | #include <boost/accumulators/numeric/functional.hpp> | |
22 | #include <boost/accumulators/framework/parameters/sample.hpp> | |
23 | #include <boost/accumulators/framework/depends_on.hpp> | |
24 | #include <boost/accumulators/statistics_fwd.hpp> | |
25 | #include <boost/accumulators/statistics/count.hpp> | |
26 | #include <boost/accumulators/statistics/max.hpp> | |
27 | #include <boost/accumulators/statistics/min.hpp> | |
28 | ||
29 | namespace boost { namespace accumulators | |
30 | { | |
31 | ||
32 | /////////////////////////////////////////////////////////////////////////////// | |
33 | // cache_size and num_bins named parameters | |
34 | // | |
35 | BOOST_PARAMETER_NESTED_KEYWORD(tag, density_cache_size, cache_size) | |
36 | BOOST_PARAMETER_NESTED_KEYWORD(tag, density_num_bins, num_bins) | |
37 | ||
38 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(density_cache_size) | |
39 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(density_num_bins) | |
40 | ||
41 | namespace impl | |
42 | { | |
43 | /////////////////////////////////////////////////////////////////////////////// | |
44 | // density_impl | |
45 | // density histogram | |
46 | /** | |
47 | @brief Histogram density estimator | |
48 | ||
49 | The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins | |
50 | are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the | |
51 | maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally, | |
52 | an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined, | |
53 | the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is | |
54 | return, where each pair contains the position of the bin (lower bound) and the samples count (normalized with the | |
55 | total number of samples). | |
56 | ||
57 | @param density_cache_size Number of first samples used to determine min and max. | |
58 | @param density_num_bins Number of bins (two additional bins collect under- and overflow samples). | |
59 | */ | |
60 | template<typename Sample> | |
61 | struct density_impl | |
62 | : accumulator_base | |
63 | { | |
64 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; | |
65 | typedef std::vector<std::pair<float_type, float_type> > histogram_type; | |
66 | typedef std::vector<float_type> array_type; | |
67 | // for boost::result_of | |
68 | typedef iterator_range<typename histogram_type::iterator> result_type; | |
69 | ||
70 | template<typename Args> | |
71 | density_impl(Args const &args) | |
72 | : cache_size(args[density_cache_size]) | |
73 | , cache(cache_size) | |
74 | , num_bins(args[density_num_bins]) | |
75 | , samples_in_bin(num_bins + 2, 0.) | |
76 | , bin_positions(num_bins + 2) | |
77 | , histogram( | |
78 | num_bins + 2 | |
79 | , std::make_pair( | |
80 | numeric::fdiv(args[sample | Sample()],(std::size_t)1) | |
81 | , numeric::fdiv(args[sample | Sample()],(std::size_t)1) | |
82 | ) | |
83 | ) | |
84 | , is_dirty(true) | |
85 | { | |
86 | } | |
87 | ||
88 | template<typename Args> | |
89 | void operator ()(Args const &args) | |
90 | { | |
91 | this->is_dirty = true; | |
92 | ||
93 | std::size_t cnt = count(args); | |
94 | ||
95 | // Fill up cache with cache_size first samples | |
96 | if (cnt <= this->cache_size) | |
97 | { | |
98 | this->cache[cnt - 1] = args[sample]; | |
99 | } | |
100 | ||
101 | // Once cache_size samples have been accumulated, create num_bins bins of same size between | |
102 | // the minimum and maximum of the cached samples as well as under and overflow bins. | |
103 | // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin). | |
104 | if (cnt == this->cache_size) | |
105 | { | |
106 | float_type minimum = numeric::fdiv((min)(args), (std::size_t)1); | |
107 | float_type maximum = numeric::fdiv((max)(args), (std::size_t)1); | |
108 | float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins ); | |
109 | ||
110 | // determine bin positions (their lower bounds) | |
111 | for (std::size_t i = 0; i < this->num_bins + 2; ++i) | |
112 | { | |
113 | this->bin_positions[i] = minimum + (i - 1.) * bin_size; | |
114 | } | |
115 | ||
116 | for (typename array_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter) | |
117 | { | |
118 | if (*iter < this->bin_positions[1]) | |
119 | { | |
120 | ++(this->samples_in_bin[0]); | |
121 | } | |
122 | else if (*iter >= this->bin_positions[this->num_bins + 1]) | |
123 | { | |
124 | ++(this->samples_in_bin[this->num_bins + 1]); | |
125 | } | |
126 | else | |
127 | { | |
128 | typename array_type::iterator it = std::upper_bound( | |
129 | this->bin_positions.begin() | |
130 | , this->bin_positions.end() | |
131 | , *iter | |
132 | ); | |
133 | ||
134 | std::size_t d = std::distance(this->bin_positions.begin(), it); | |
135 | ++(this->samples_in_bin[d - 1]); | |
136 | } | |
137 | } | |
138 | } | |
139 | // Add each subsequent sample to the correct bin | |
140 | else if (cnt > this->cache_size) | |
141 | { | |
142 | if (args[sample] < this->bin_positions[1]) | |
143 | { | |
144 | ++(this->samples_in_bin[0]); | |
145 | } | |
146 | else if (args[sample] >= this->bin_positions[this->num_bins + 1]) | |
147 | { | |
148 | ++(this->samples_in_bin[this->num_bins + 1]); | |
149 | } | |
150 | else | |
151 | { | |
152 | typename array_type::iterator it = std::upper_bound( | |
153 | this->bin_positions.begin() | |
154 | , this->bin_positions.end() | |
155 | , args[sample] | |
156 | ); | |
157 | ||
158 | std::size_t d = std::distance(this->bin_positions.begin(), it); | |
159 | ++(this->samples_in_bin[d - 1]); | |
160 | } | |
161 | } | |
162 | } | |
163 | ||
164 | /** | |
165 | @pre The number of samples must meet or exceed the cache size | |
166 | */ | |
167 | template<typename Args> | |
168 | result_type result(Args const &args) const | |
169 | { | |
170 | if (this->is_dirty) | |
171 | { | |
172 | this->is_dirty = false; | |
173 | ||
174 | // creates a vector of std::pair where each pair i holds | |
175 | // the values bin_positions[i] (x-axis of histogram) and | |
176 | // samples_in_bin[i] / cnt (y-axis of histogram). | |
177 | ||
178 | for (std::size_t i = 0; i < this->num_bins + 2; ++i) | |
179 | { | |
180 | this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], count(args))); | |
181 | } | |
182 | } | |
183 | // returns a range of pairs | |
184 | return make_iterator_range(this->histogram); | |
185 | } | |
186 | ||
187 | private: | |
188 | std::size_t cache_size; // number of cached samples | |
189 | array_type cache; // cache to store the first cache_size samples | |
190 | std::size_t num_bins; // number of bins | |
191 | array_type samples_in_bin; // number of samples in each bin | |
192 | array_type bin_positions; // lower bounds of bins | |
193 | mutable histogram_type histogram; // histogram | |
194 | mutable bool is_dirty; | |
195 | }; | |
196 | ||
197 | } // namespace impl | |
198 | ||
199 | /////////////////////////////////////////////////////////////////////////////// | |
200 | // tag::density | |
201 | // | |
202 | namespace tag | |
203 | { | |
204 | struct density | |
205 | : depends_on<count, min, max> | |
206 | , density_cache_size | |
207 | , density_num_bins | |
208 | { | |
209 | /// INTERNAL ONLY | |
210 | /// | |
211 | typedef accumulators::impl::density_impl<mpl::_1> impl; | |
212 | ||
213 | #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED | |
214 | /// tag::density::cache_size named parameter | |
215 | /// tag::density::num_bins named parameter | |
216 | static boost::parameter::keyword<density_cache_size> const cache_size; | |
217 | static boost::parameter::keyword<density_num_bins> const num_bins; | |
218 | #endif | |
219 | }; | |
220 | } | |
221 | ||
222 | /////////////////////////////////////////////////////////////////////////////// | |
223 | // extract::density | |
224 | // | |
225 | namespace extract | |
226 | { | |
227 | extractor<tag::density> const density = {}; | |
228 | ||
229 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(density) | |
230 | } | |
231 | ||
232 | using extract::density; | |
233 | ||
234 | // So that density can be automatically substituted | |
235 | // with weighted_density when the weight parameter is non-void. | |
236 | template<> | |
237 | struct as_weighted_feature<tag::density> | |
238 | { | |
239 | typedef tag::weighted_density type; | |
240 | }; | |
241 | ||
242 | template<> | |
243 | struct feature_of<tag::weighted_density> | |
244 | : feature_of<tag::density> | |
245 | { | |
246 | }; | |
247 | ||
248 | }} // namespace boost::accumulators | |
249 | ||
250 | #endif |