]>
Commit | Line | Data |
---|---|---|
92f5a8d4 TL |
1 | // (C) Copyright Nick Thompson 2018. |
2 | // Use, modification and distribution are subject to the | |
3 | // Boost Software License, Version 1.0. (See accompanying file | |
4 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) | |
5 | ||
6 | #ifndef BOOST_MATH_STATISTICS_BIVARIATE_STATISTICS_HPP | |
7 | #define BOOST_MATH_STATISTICS_BIVARIATE_STATISTICS_HPP | |
8 | ||
9 | #include <iterator> | |
10 | #include <tuple> | |
11 | #include <boost/assert.hpp> | |
12 | ||
13 | ||
14 | namespace boost{ namespace math{ namespace statistics { | |
15 | ||
16 | template<class Container> | |
17 | auto means_and_covariance(Container const & u, Container const & v) | |
18 | { | |
19 | using Real = typename Container::value_type; | |
20 | using std::size; | |
21 | BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); | |
22 | BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample."); | |
23 | ||
24 | // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al. | |
25 | Real cov = 0; | |
26 | Real mu_u = u[0]; | |
27 | Real mu_v = v[0]; | |
28 | ||
29 | for(size_t i = 1; i < size(u); ++i) | |
30 | { | |
31 | Real u_tmp = (u[i] - mu_u)/(i+1); | |
32 | Real v_tmp = v[i] - mu_v; | |
33 | cov += i*u_tmp*v_tmp; | |
34 | mu_u = mu_u + u_tmp; | |
35 | mu_v = mu_v + v_tmp/(i+1); | |
36 | } | |
37 | ||
38 | return std::make_tuple(mu_u, mu_v, cov/size(u)); | |
39 | } | |
40 | ||
41 | template<class Container> | |
42 | auto covariance(Container const & u, Container const & v) | |
43 | { | |
44 | auto [mu_u, mu_v, cov] = boost::math::statistics::means_and_covariance(u, v); | |
45 | return cov; | |
46 | } | |
47 | ||
48 | template<class Container> | |
49 | auto correlation_coefficient(Container const & u, Container const & v) | |
50 | { | |
51 | using Real = typename Container::value_type; | |
52 | using std::size; | |
53 | BOOST_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); | |
54 | BOOST_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples."); | |
55 | ||
56 | Real cov = 0; | |
57 | Real mu_u = u[0]; | |
58 | Real mu_v = v[0]; | |
59 | Real Qu = 0; | |
60 | Real Qv = 0; | |
61 | ||
62 | for(size_t i = 1; i < size(u); ++i) | |
63 | { | |
64 | Real u_tmp = u[i] - mu_u; | |
65 | Real v_tmp = v[i] - mu_v; | |
66 | Qu = Qu + (i*u_tmp*u_tmp)/(i+1); | |
67 | Qv = Qv + (i*v_tmp*v_tmp)/(i+1); | |
68 | cov += i*u_tmp*v_tmp/(i+1); | |
69 | mu_u = mu_u + u_tmp/(i+1); | |
70 | mu_v = mu_v + v_tmp/(i+1); | |
71 | } | |
72 | ||
73 | // If both datasets are constant, then they are perfectly correlated. | |
74 | if (Qu == 0 && Qv == 0) | |
75 | { | |
76 | return Real(1); | |
77 | } | |
78 | // If one dataset is constant and the other isn't, then they have no correlation: | |
79 | if (Qu == 0 || Qv == 0) | |
80 | { | |
81 | return Real(0); | |
82 | } | |
83 | ||
84 | // Make sure rho in [-1, 1], even in the presence of numerical noise. | |
85 | Real rho = cov/sqrt(Qu*Qv); | |
86 | if (rho > 1) { | |
87 | rho = 1; | |
88 | } | |
89 | if (rho < -1) { | |
90 | rho = -1; | |
91 | } | |
92 | return rho; | |
93 | } | |
94 | ||
95 | }}} | |
96 | #endif |