ceph/src/boost/boost/math/statistics/signal_statistics.hpp

   1 //  (C) Copyright Nick Thompson 2018.
   2 //  Use, modification and distribution are subject to the
   3 //  Boost Software License, Version 1.0. (See accompanying file
   4 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   5
   6 #ifndef BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP
   7 #define BOOST_MATH_TOOLS_SIGNAL_STATISTICS_HPP
   8
   9 #include <algorithm>
  10 #include <iterator>
  11 #include <boost/assert.hpp>
  12 #include <boost/math/tools/complex.hpp>
  13 #include <boost/math/tools/roots.hpp>
  14 #include <boost/math/statistics/univariate_statistics.hpp>
  15
  16
  17 namespace boost::math::statistics {
  18
  19 template<class ForwardIterator>
  20 auto absolute_gini_coefficient(ForwardIterator first, ForwardIterator last)
  21 {
  22     using std::abs;
  23     using RealOrComplex = typename std::iterator_traits<ForwardIterator>::value_type;
  24     BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
  25
  26     std::sort(first, last,  [](RealOrComplex a, RealOrComplex b) { return abs(b) > abs(a); });
  27
  28
  29     decltype(abs(*first)) i = 1;
  30     decltype(abs(*first)) num = 0;
  31     decltype(abs(*first)) denom = 0;
  32     for (auto it = first; it != last; ++it)
  33     {
  34         decltype(abs(*first)) tmp = abs(*it);
  35         num += tmp*i;
  36         denom += tmp;
  37         ++i;
  38     }
  39
  40     // If the l1 norm is zero, all elements are zero, so every element is the same.
  41     if (denom == 0)
  42     {
  43         decltype(abs(*first)) zero = 0;
  44         return zero;
  45     }
  46     return ((2*num)/denom - i)/(i-1);
  47 }
  48
  49 template<class RandomAccessContainer>
  50 inline auto absolute_gini_coefficient(RandomAccessContainer & v)
  51 {
  52     return boost::math::statistics::absolute_gini_coefficient(v.begin(), v.end());
  53 }
  54
  55 template<class ForwardIterator>
  56 auto sample_absolute_gini_coefficient(ForwardIterator first, ForwardIterator last)
  57 {
  58     size_t n = std::distance(first, last);
  59     return n*boost::math::statistics::absolute_gini_coefficient(first, last)/(n-1);
  60 }
  61
  62 template<class RandomAccessContainer>
  63 inline auto sample_absolute_gini_coefficient(RandomAccessContainer & v)
  64 {
  65     return boost::math::statistics::sample_absolute_gini_coefficient(v.begin(), v.end());
  66 }
  67
  68
  69 // The Hoyer sparsity measure is defined in:
  70 // https://arxiv.org/pdf/0811.4706.pdf
  71 template<class ForwardIterator>
  72 auto hoyer_sparsity(const ForwardIterator first, const ForwardIterator last)
  73 {
  74     using T = typename std::iterator_traits<ForwardIterator>::value_type;
  75     using std::abs;
  76     using std::sqrt;
  77     BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Hoyer sparsity requires at least two samples.");
  78
  79     if constexpr (std::is_unsigned<T>::value)
  80     {
  81         T l1 = 0;
  82         T l2 = 0;
  83         size_t n = 0;
  84         for (auto it = first; it != last; ++it)
  85         {
  86             l1 += *it;
  87             l2 += (*it)*(*it);
  88             n += 1;
  89         }
  90
  91         double rootn = sqrt(n);
  92         return (rootn - l1/sqrt(l2) )/ (rootn - 1);
  93     }
  94     else {
  95         decltype(abs(*first)) l1 = 0;
  96         decltype(abs(*first)) l2 = 0;
  97         // We wouldn't need to count the elements if it was a random access iterator,
  98         // but our only constraint is that it's a forward iterator.
  99         size_t n = 0;
 100         for (auto it = first; it != last; ++it)
 101         {
 102             decltype(abs(*first)) tmp = abs(*it);
 103             l1 += tmp;
 104             l2 += tmp*tmp;
 105             n += 1;
 106         }
 107         if constexpr (std::is_integral<T>::value)
 108         {
 109             double rootn = sqrt(n);
 110             return (rootn - l1/sqrt(l2) )/ (rootn - 1);
 111         }
 112         else
 113         {
 114             decltype(abs(*first)) rootn = sqrt(static_cast<decltype(abs(*first))>(n));
 115             return (rootn - l1/sqrt(l2) )/ (rootn - 1);
 116         }
 117     }
 118 }
 119
 120 template<class Container>
 121 inline auto hoyer_sparsity(Container const & v)
 122 {
 123     return boost::math::statistics::hoyer_sparsity(v.cbegin(), v.cend());
 124 }
 125
 126
 127 template<class Container>
 128 auto oracle_snr(Container const & signal, Container const & noisy_signal)
 129 {
 130     using Real = typename Container::value_type;
 131     BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(),
 132                      "Signal and noisy_signal must be have the same number of elements.");
 133     if constexpr (std::is_integral<Real>::value)
 134     {
 135         double numerator = 0;
 136         double denominator = 0;
 137         for (size_t i = 0; i < signal.size(); ++i)
 138         {
 139             numerator += signal[i]*signal[i];
 140             denominator += (noisy_signal[i] - signal[i])*(noisy_signal[i] - signal[i]);
 141         }
 142         if (numerator == 0 && denominator == 0)
 143         {
 144             return std::numeric_limits<double>::quiet_NaN();
 145         }
 146         if (denominator == 0)
 147         {
 148             return std::numeric_limits<double>::infinity();
 149         }
 150         return numerator/denominator;
 151     }
 152     else if constexpr (boost::math::tools::is_complex_type<Real>::value)
 153
 154     {
 155         using std::norm;
 156         typename Real::value_type numerator = 0;
 157         typename Real::value_type denominator = 0;
 158         for (size_t i = 0; i < signal.size(); ++i)
 159         {
 160             numerator += norm(signal[i]);
 161             denominator += norm(noisy_signal[i] - signal[i]);
 162         }
 163         if (numerator == 0 && denominator == 0)
 164         {
 165             return std::numeric_limits<typename Real::value_type>::quiet_NaN();
 166         }
 167         if (denominator == 0)
 168         {
 169             return std::numeric_limits<typename Real::value_type>::infinity();
 170         }
 171
 172         return numerator/denominator;
 173     }
 174     else
 175     {
 176         Real numerator = 0;
 177         Real denominator = 0;
 178         for (size_t i = 0; i < signal.size(); ++i)
 179         {
 180             numerator += signal[i]*signal[i];
 181             denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]);
 182         }
 183         if (numerator == 0 && denominator == 0)
 184         {
 185             return std::numeric_limits<Real>::quiet_NaN();
 186         }
 187         if (denominator == 0)
 188         {
 189             return std::numeric_limits<Real>::infinity();
 190         }
 191
 192         return numerator/denominator;
 193     }
 194 }
 195
 196 template<class Container>
 197 auto mean_invariant_oracle_snr(Container const & signal, Container const & noisy_signal)
 198 {
 199     using Real = typename Container::value_type;
 200     BOOST_ASSERT_MSG(signal.size() == noisy_signal.size(), "Signal and noisy signal must be have the same number of elements.");
 201
 202     Real mu = boost::math::statistics::mean(signal);
 203     Real numerator = 0;
 204     Real denominator = 0;
 205     for (size_t i = 0; i < signal.size(); ++i)
 206     {
 207         Real tmp = signal[i] - mu;
 208         numerator += tmp*tmp;
 209         denominator += (signal[i] - noisy_signal[i])*(signal[i] - noisy_signal[i]);
 210     }
 211     if (numerator == 0 && denominator == 0)
 212     {
 213         return std::numeric_limits<Real>::quiet_NaN();
 214     }
 215     if (denominator == 0)
 216     {
 217         return std::numeric_limits<Real>::infinity();
 218     }
 219
 220     return numerator/denominator;
 221
 222 }
 223
 224 template<class Container>
 225 auto mean_invariant_oracle_snr_db(Container const & signal, Container const & noisy_signal)
 226 {
 227     using std::log10;
 228     return 10*log10(boost::math::statistics::mean_invariant_oracle_snr(signal, noisy_signal));
 229 }
 230
 231
 232 // Follows the definition of SNR given in Mallat, A Wavelet Tour of Signal Processing, equation 11.16.
 233 template<class Container>
 234 auto oracle_snr_db(Container const & signal, Container const & noisy_signal)
 235 {
 236     using std::log10;
 237     return 10*log10(boost::math::statistics::oracle_snr(signal, noisy_signal));
 238 }
 239
 240 // A good reference on the M2M4 estimator:
 241 // D. R. Pauluzzi and N. C. Beaulieu, "A comparison of SNR estimation techniques for the AWGN channel," IEEE Trans. Communications, Vol. 48, No. 10, pp. 1681-1691, 2000.
 242 // A nice python implementation:
 243 // https://github.com/gnuradio/gnuradio/blob/master/gr-digital/examples/snr_estimators.py
 244 template<class ForwardIterator>
 245 auto m2m4_snr_estimator(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3)
 246 {
 247     BOOST_ASSERT_MSG(estimated_signal_kurtosis > 0, "The estimated signal kurtosis must be positive");
 248     BOOST_ASSERT_MSG(estimated_noise_kurtosis > 0, "The estimated noise kurtosis must be positive.");
 249     using Real = typename std::iterator_traits<ForwardIterator>::value_type;
 250     using std::sqrt;
 251     if constexpr (std::is_floating_point<Real>::value || std::numeric_limits<Real>::max_exponent)
 252     {
 253         // If we first eliminate N, we obtain the quadratic equation:
 254         // (ka+kw-6)S^2 + 2M2(3-kw)S + kw*M2^2 - M4 = 0 =: a*S^2 + bs*N + cs = 0
 255         // If we first eliminate S, we obtain the quadratic equation:
 256         // (ka+kw-6)N^2 + 2M2(3-ka)N + ka*M2^2 - M4 = 0 =: a*N^2 + bn*N + cn = 0
 257         // I believe these equations are totally independent quadratics;
 258         // if one has a complex solution it is not necessarily the case that the other must also.
 259         // However, I can't prove that, so there is a chance that this does unnecessary work.
 260         // Future improvements: There are algorithms which can solve quadratics much more effectively than the naive implementation found here.
 261         // See: https://stackoverflow.com/questions/48979861/numerically-stable-method-for-solving-quadratic-equations/50065711#50065711
 262         auto [M1, M2, M3, M4] = boost::math::statistics::first_four_moments(first, last);
 263         if (M4 == 0)
 264         {
 265             // The signal is constant. There is no noise:
 266             return std::numeric_limits<Real>::infinity();
 267         }
 268         // Change to notation in Pauluzzi, equation 41:
 269         auto kw = estimated_noise_kurtosis;
 270         auto ka = estimated_signal_kurtosis;
 271         // A common case, since it's the default:
 272         Real a = (ka+kw-6);
 273         Real bs = 2*M2*(3-kw);
 274         Real cs = kw*M2*M2 - M4;
 275         Real bn = 2*M2*(3-ka);
 276         Real cn = ka*M2*M2 - M4;
 277         auto [S0, S1] = boost::math::tools::quadratic_roots(a, bs, cs);
 278         if (S1 > 0)
 279         {
 280             auto N = M2 - S1;
 281             if (N > 0)
 282             {
 283                 return S1/N;
 284             }
 285             if (S0 > 0)
 286             {
 287                 N = M2 - S0;
 288                 if (N > 0)
 289                 {
 290                     return S0/N;
 291                 }
 292             }
 293         }
 294         auto [N0, N1] = boost::math::tools::quadratic_roots(a, bn, cn);
 295         if (N1 > 0)
 296         {
 297             auto S = M2 - N1;
 298             if (S > 0)
 299             {
 300                 return S/N1;
 301             }
 302             if (N0 > 0)
 303             {
 304                 S = M2 - N0;
 305                 if (S > 0)
 306                 {
 307                     return S/N0;
 308                 }
 309             }
 310         }
 311         // This happens distressingly often. It's a limitation of the method.
 312         return std::numeric_limits<Real>::quiet_NaN();
 313     }
 314     else
 315     {
 316         BOOST_ASSERT_MSG(false, "The M2M4 estimator has not been implemented for this type.");
 317         return std::numeric_limits<Real>::quiet_NaN();
 318     }
 319 }
 320
 321 template<class Container>
 322 inline auto m2m4_snr_estimator(Container const & noisy_signal,  typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3)
 323 {
 324     return m2m4_snr_estimator(noisy_signal.cbegin(), noisy_signal.cend(), estimated_signal_kurtosis, estimated_noise_kurtosis);
 325 }
 326
 327 template<class ForwardIterator>
 328 inline auto m2m4_snr_estimator_db(ForwardIterator first, ForwardIterator last, decltype(*first) estimated_signal_kurtosis=1, decltype(*first) estimated_noise_kurtosis=3)
 329 {
 330     using std::log10;
 331     return 10*log10(m2m4_snr_estimator(first, last, estimated_signal_kurtosis, estimated_noise_kurtosis));
 332 }
 333
 334
 335 template<class Container>
 336 inline auto m2m4_snr_estimator_db(Container const & noisy_signal,  typename Container::value_type estimated_signal_kurtosis=1, typename Container::value_type estimated_noise_kurtosis=3)
 337 {
 338     using std::log10;
 339     return 10*log10(m2m4_snr_estimator(noisy_signal, estimated_signal_kurtosis, estimated_noise_kurtosis));
 340 }
 341
 342 }
 343 #endif