]> git.proxmox.com Git - ceph.git/blame - ceph/src/boost/boost/math/statistics/univariate_statistics.hpp
import quincy beta 17.1.0
[ceph.git] / ceph / src / boost / boost / math / statistics / univariate_statistics.hpp
CommitLineData
92f5a8d4
TL
1// (C) Copyright Nick Thompson 2018.
2// Use, modification and distribution are subject to the
3// Boost Software License, Version 1.0. (See accompanying file
4// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5
6#ifndef BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP
7#define BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP
8
9#include <algorithm>
10#include <iterator>
f67539c2
TL
11#include <tuple>
12#include <cmath>
20effc67 13#include <vector>
92f5a8d4
TL
14#include <boost/assert.hpp>
15
16namespace boost::math::statistics {
17
18template<class ForwardIterator>
19auto mean(ForwardIterator first, ForwardIterator last)
20{
21 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
22 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
23 if constexpr (std::is_integral<Real>::value)
24 {
25 double mu = 0;
26 double i = 1;
27 for(auto it = first; it != last; ++it) {
28 mu = mu + (*it - mu)/i;
29 i += 1;
30 }
31 return mu;
32 }
33 else if constexpr (std::is_same_v<typename std::iterator_traits<ForwardIterator>::iterator_category, std::random_access_iterator_tag>)
34 {
35 size_t elements = std::distance(first, last);
36 Real mu0 = 0;
37 Real mu1 = 0;
38 Real mu2 = 0;
39 Real mu3 = 0;
40 Real i = 1;
41 auto end = last - (elements % 4);
42 for(auto it = first; it != end; it += 4) {
43 Real inv = Real(1)/i;
44 Real tmp0 = (*it - mu0);
45 Real tmp1 = (*(it+1) - mu1);
46 Real tmp2 = (*(it+2) - mu2);
47 Real tmp3 = (*(it+3) - mu3);
48 // please generate a vectorized fma here
49 mu0 += tmp0*inv;
50 mu1 += tmp1*inv;
51 mu2 += tmp2*inv;
52 mu3 += tmp3*inv;
53 i += 1;
54 }
55 Real num1 = Real(elements - (elements %4))/Real(4);
56 Real num2 = num1 + Real(elements % 4);
57
58 for (auto it = end; it != last; ++it)
59 {
60 mu3 += (*it-mu3)/i;
61 i += 1;
62 }
63
64 return (num1*(mu0+mu1+mu2) + num2*mu3)/Real(elements);
65 }
66 else
67 {
68 auto it = first;
69 Real mu = *it;
70 Real i = 2;
71 while(++it != last)
72 {
73 mu += (*it - mu)/i;
74 i += 1;
75 }
76 return mu;
77 }
78}
79
80template<class Container>
81inline auto mean(Container const & v)
82{
83 return mean(v.cbegin(), v.cend());
84}
85
86template<class ForwardIterator>
87auto variance(ForwardIterator first, ForwardIterator last)
88{
89 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
90 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
91 // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
92 if constexpr (std::is_integral<Real>::value)
93 {
94 double M = *first;
95 double Q = 0;
96 double k = 2;
97 for (auto it = std::next(first); it != last; ++it)
98 {
99 double tmp = *it - M;
100 Q = Q + ((k-1)*tmp*tmp)/k;
101 M = M + tmp/k;
102 k += 1;
103 }
104 return Q/(k-1);
105 }
106 else
107 {
108 Real M = *first;
109 Real Q = 0;
110 Real k = 2;
111 for (auto it = std::next(first); it != last; ++it)
112 {
113 Real tmp = (*it - M)/k;
114 Q += k*(k-1)*tmp*tmp;
115 M += tmp;
116 k += 1;
117 }
118 return Q/(k-1);
119 }
120}
121
122template<class Container>
123inline auto variance(Container const & v)
124{
125 return variance(v.cbegin(), v.cend());
126}
127
128template<class ForwardIterator>
129auto sample_variance(ForwardIterator first, ForwardIterator last)
130{
131 size_t n = std::distance(first, last);
132 BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance.");
133 return n*variance(first, last)/(n-1);
134}
135
136template<class Container>
137inline auto sample_variance(Container const & v)
138{
139 return sample_variance(v.cbegin(), v.cend());
140}
141
142template<class ForwardIterator>
143auto mean_and_sample_variance(ForwardIterator first, ForwardIterator last)
144{
145 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
146 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
147 // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
148 if constexpr (std::is_integral<Real>::value)
149 {
150 double M = *first;
151 double Q = 0;
152 double k = 2;
153 for (auto it = std::next(first); it != last; ++it)
154 {
155 double tmp = *it - M;
156 Q = Q + ((k-1)*tmp*tmp)/k;
157 M = M + tmp/k;
158 k += 1;
159 }
160 return std::pair<double, double>{M, Q/(k-2)};
161 }
162 else
163 {
164 Real M = *first;
165 Real Q = 0;
166 Real k = 2;
167 for (auto it = std::next(first); it != last; ++it)
168 {
169 Real tmp = (*it - M)/k;
170 Q += k*(k-1)*tmp*tmp;
171 M += tmp;
172 k += 1;
173 }
174 return std::pair<Real, Real>{M, Q/(k-2)};
175 }
176}
177
178template<class Container>
179auto mean_and_sample_variance(Container const & v)
180{
181 return mean_and_sample_variance(v.begin(), v.end());
182}
183
184// Follows equation 1.5 of:
185// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
186template<class ForwardIterator>
187auto skewness(ForwardIterator first, ForwardIterator last)
188{
189 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
f67539c2 190 using std::sqrt;
92f5a8d4
TL
191 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness.");
192 if constexpr (std::is_integral<Real>::value)
193 {
194 double M1 = *first;
195 double M2 = 0;
196 double M3 = 0;
197 double n = 2;
198 for (auto it = std::next(first); it != last; ++it)
199 {
200 double delta21 = *it - M1;
201 double tmp = delta21/n;
202 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
203 M2 = M2 + tmp*(n-1)*delta21;
204 M1 = M1 + tmp;
205 n += 1;
206 }
207
208 double var = M2/(n-1);
209 if (var == 0)
210 {
211 // The limit is technically undefined, but the interpretation here is clear:
212 // A constant dataset has no skewness.
213 return double(0);
214 }
215 double skew = M3/(M2*sqrt(var));
216 return skew;
217 }
218 else
219 {
220 Real M1 = *first;
221 Real M2 = 0;
222 Real M3 = 0;
223 Real n = 2;
224 for (auto it = std::next(first); it != last; ++it)
225 {
226 Real delta21 = *it - M1;
227 Real tmp = delta21/n;
228 M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
229 M2 += tmp*(n-1)*delta21;
230 M1 += tmp;
231 n += 1;
232 }
233
234 Real var = M2/(n-1);
235 if (var == 0)
236 {
237 // The limit is technically undefined, but the interpretation here is clear:
238 // A constant dataset has no skewness.
239 return Real(0);
240 }
241 Real skew = M3/(M2*sqrt(var));
242 return skew;
243 }
244}
245
246template<class Container>
247inline auto skewness(Container const & v)
248{
249 return skewness(v.cbegin(), v.cend());
250}
251
252// Follows equation 1.5/1.6 of:
253// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
254template<class ForwardIterator>
255auto first_four_moments(ForwardIterator first, ForwardIterator last)
256{
257 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
258 BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments.");
259 if constexpr (std::is_integral<Real>::value)
260 {
261 double M1 = *first;
262 double M2 = 0;
263 double M3 = 0;
264 double M4 = 0;
265 double n = 2;
266 for (auto it = std::next(first); it != last; ++it)
267 {
268 double delta21 = *it - M1;
269 double tmp = delta21/n;
270 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
271 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
272 M2 = M2 + tmp*(n-1)*delta21;
273 M1 = M1 + tmp;
274 n += 1;
275 }
276
277 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
278 }
279 else
280 {
281 Real M1 = *first;
282 Real M2 = 0;
283 Real M3 = 0;
284 Real M4 = 0;
285 Real n = 2;
286 for (auto it = std::next(first); it != last; ++it)
287 {
288 Real delta21 = *it - M1;
289 Real tmp = delta21/n;
290 M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
291 M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
292 M2 = M2 + tmp*(n-1)*delta21;
293 M1 = M1 + tmp;
294 n += 1;
295 }
296
297 return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
298 }
299}
300
301template<class Container>
302inline auto first_four_moments(Container const & v)
303{
304 return first_four_moments(v.cbegin(), v.cend());
305}
306
307
308// Follows equation 1.6 of:
309// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
310template<class ForwardIterator>
311auto kurtosis(ForwardIterator first, ForwardIterator last)
312{
313 auto [M1, M2, M3, M4] = first_four_moments(first, last);
314 if (M2 == 0)
315 {
316 return M2;
317 }
318 return M4/(M2*M2);
319}
320
321template<class Container>
322inline auto kurtosis(Container const & v)
323{
324 return kurtosis(v.cbegin(), v.cend());
325}
326
327template<class ForwardIterator>
328auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
329{
330 return kurtosis(first, last) - 3;
331}
332
333template<class Container>
334inline auto excess_kurtosis(Container const & v)
335{
336 return excess_kurtosis(v.cbegin(), v.cend());
337}
338
339
340template<class RandomAccessIterator>
341auto median(RandomAccessIterator first, RandomAccessIterator last)
342{
343 size_t num_elems = std::distance(first, last);
344 BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
345 if (num_elems & 1)
346 {
347 auto middle = first + (num_elems - 1)/2;
348 std::nth_element(first, middle, last);
349 return *middle;
350 }
351 else
352 {
353 auto middle = first + num_elems/2 - 1;
354 std::nth_element(first, middle, last);
355 std::nth_element(middle, middle+1, last);
356 return (*middle + *(middle+1))/2;
357 }
358}
359
360
361template<class RandomAccessContainer>
362inline auto median(RandomAccessContainer & v)
363{
364 return median(v.begin(), v.end());
365}
366
367template<class RandomAccessIterator>
368auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
369{
370 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
371 BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
372
373 std::sort(first, last);
374 if constexpr (std::is_integral<Real>::value)
375 {
376 double i = 1;
377 double num = 0;
378 double denom = 0;
379 for (auto it = first; it != last; ++it)
380 {
381 num += *it*i;
382 denom += *it;
383 ++i;
384 }
385
386 // If the l1 norm is zero, all elements are zero, so every element is the same.
387 if (denom == 0)
388 {
389 return double(0);
390 }
391
392 return ((2*num)/denom - i)/(i-1);
393 }
394 else
395 {
396 Real i = 1;
397 Real num = 0;
398 Real denom = 0;
399 for (auto it = first; it != last; ++it)
400 {
401 num += *it*i;
402 denom += *it;
403 ++i;
404 }
405
406 // If the l1 norm is zero, all elements are zero, so every element is the same.
407 if (denom == 0)
408 {
409 return Real(0);
410 }
411
412 return ((2*num)/denom - i)/(i-1);
413 }
414}
415
416template<class RandomAccessContainer>
417inline auto gini_coefficient(RandomAccessContainer & v)
418{
419 return gini_coefficient(v.begin(), v.end());
420}
421
422template<class RandomAccessIterator>
423inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
424{
425 size_t n = std::distance(first, last);
426 return n*gini_coefficient(first, last)/(n-1);
427}
428
429template<class RandomAccessContainer>
430inline auto sample_gini_coefficient(RandomAccessContainer & v)
431{
432 return sample_gini_coefficient(v.begin(), v.end());
433}
434
435template<class RandomAccessIterator>
436auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN())
437{
438 using std::abs;
439 using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
440 using std::isnan;
441 if (isnan(center))
442 {
443 center = boost::math::statistics::median(first, last);
444 }
445 size_t num_elems = std::distance(first, last);
446 BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined.");
447 auto comparator = [&center](Real a, Real b) { return abs(a-center) < abs(b-center);};
448 if (num_elems & 1)
449 {
450 auto middle = first + (num_elems - 1)/2;
451 std::nth_element(first, middle, last, comparator);
452 return abs(*middle);
453 }
454 else
455 {
456 auto middle = first + num_elems/2 - 1;
457 std::nth_element(first, middle, last, comparator);
458 std::nth_element(middle, middle+1, last, comparator);
459 return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2));
460 }
461}
462
463template<class RandomAccessContainer>
464inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN())
465{
466 return median_absolute_deviation(v.begin(), v.end(), center);
467}
468
f67539c2
TL
469template<class ForwardIterator>
470auto interquartile_range(ForwardIterator first, ForwardIterator last)
471{
472 using Real = typename std::iterator_traits<ForwardIterator>::value_type;
473 static_assert(!std::is_integral<Real>::value, "Integer values have not yet been implemented.");
474 auto m = std::distance(first,last);
475 BOOST_ASSERT_MSG(m >= 3, "At least 3 samples are required to compute the interquartile range.");
476 auto k = m/4;
477 auto j = m - (4*k);
478 // m = 4k+j.
479 // If j = 0 or j = 1, then there are an even number of samples below the median, and an even number above the median.
480 // Then we must average adjacent elements to get the quartiles.
481 // If j = 2 or j = 3, there are an odd number of samples above and below the median, these elements may be directly extracted to get the quartiles.
482
483 if (j==2 || j==3)
484 {
485 auto q1 = first + k;
486 auto q3 = first + 3*k + j - 1;
487 std::nth_element(first, q1, last);
488 Real Q1 = *q1;
489 std::nth_element(q1, q3, last);
490 Real Q3 = *q3;
491 return Q3 - Q1;
492 } else {
493 // j == 0 or j==1:
494 auto q1 = first + k - 1;
495 auto q3 = first + 3*k - 1 + j;
496 std::nth_element(first, q1, last);
497 Real a = *q1;
498 std::nth_element(q1, q1 + 1, last);
499 Real b = *(q1 + 1);
500 Real Q1 = (a+b)/2;
501 std::nth_element(q1, q3, last);
502 a = *q3;
503 std::nth_element(q3, q3 + 1, last);
504 b = *(q3 + 1);
505 Real Q3 = (a+b)/2;
506 return Q3 - Q1;
507 }
508}
509
510template<class RandomAccessContainer>
511inline auto interquartile_range(RandomAccessContainer & v)
512{
513 return interquartile_range(v.begin(), v.end());
514}
515
20effc67
TL
516template<class ForwardIterator, class OutputIterator>
517auto sorted_mode(ForwardIterator first, ForwardIterator last, OutputIterator output) -> decltype(output)
518{
519 using Z = typename std::iterator_traits<ForwardIterator>::value_type;
520 static_assert(std::is_integral<Z>::value, "Floating point values have not yet been implemented.");
521 using Size = typename std::iterator_traits<ForwardIterator>::difference_type;
522
523 std::vector<Z> modes {};
524 modes.reserve(16);
525 Size max_counter {0};
526
527 while(first != last)
528 {
529 Size current_count {0};
530 auto end_it {first};
531 while(end_it != last && *end_it == *first)
532 {
533 ++current_count;
534 ++end_it;
535 }
536
537 if(current_count > max_counter)
538 {
539 modes.resize(1);
540 modes[0] = *first;
541 max_counter = current_count;
542 }
543
544 else if(current_count == max_counter)
545 {
546 modes.emplace_back(*first);
547 }
548
549 first = end_it;
550 }
551
552 return std::move(modes.begin(), modes.end(), output);
553}
554
555template<class Container, class OutputIterator>
556inline auto sorted_mode(Container & v, OutputIterator output) -> decltype(output)
557{
558 return sorted_mode(v.begin(), v.end(), output);
559}
560
561template<class RandomAccessIterator, class OutputIterator>
562auto mode(RandomAccessIterator first, RandomAccessIterator last, OutputIterator output) -> decltype(output)
563{
564 std::sort(first, last);
565 return sorted_mode(first, last, output);
566}
567
568template<class RandomAccessContainer, class OutputIterator>
569inline auto mode(RandomAccessContainer & v, OutputIterator output) -> decltype(output)
570{
571 return mode(v.begin(), v.end(), output);
572}
f67539c2 573
92f5a8d4
TL
574}
575#endif