[ceph.git] / ceph / src / boost / boost / math / statistics / univariate_statistics.hpp

//  (C) Copyright Nick Thompson 2018.
//  Use, modification and distribution are subject to the
//  Boost Software License, Version 1.0. (See accompanying file
//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#ifndef BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP
#define BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP

#include <algorithm>
#include <iterator>
#include <tuple>
#include <cmath>
#include <vector>
#include <boost/assert.hpp>

namespace boost::math::statistics {

template<class ForwardIterator>
auto mean(ForwardIterator first, ForwardIterator last)
{
    using Real = typename std::iterator_traits<ForwardIterator>::value_type;
    BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
    if constexpr (std::is_integral<Real>::value)
    {
        double mu = 0;
        double i = 1;
        for(auto it = first; it != last; ++it) {
            mu = mu + (*it - mu)/i;
            i += 1;
        }
        return mu;
    }
    else if constexpr (std::is_same_v<typename std::iterator_traits<ForwardIterator>::iterator_category, std::random_access_iterator_tag>)
    {
        size_t elements = std::distance(first, last);
        Real mu0 = 0;
        Real mu1 = 0;
        Real mu2 = 0;
        Real mu3 = 0;
        Real i = 1;
        auto end = last - (elements % 4);
        for(auto it = first; it != end;  it += 4) {
            Real inv = Real(1)/i;
            Real tmp0 = (*it - mu0);
            Real tmp1 = (*(it+1) - mu1);
            Real tmp2 = (*(it+2) - mu2);
            Real tmp3 = (*(it+3) - mu3);
            // please generate a vectorized fma here
            mu0 += tmp0*inv;
            mu1 += tmp1*inv;
            mu2 += tmp2*inv;
            mu3 += tmp3*inv;
            i += 1;
        }
        Real num1 = Real(elements  - (elements %4))/Real(4);
        Real num2 = num1 + Real(elements % 4);

        for (auto it = end; it != last; ++it)
        {
            mu3 += (*it-mu3)/i;
            i += 1;
        }

        return (num1*(mu0+mu1+mu2) + num2*mu3)/Real(elements);
    }
    else
    {
        auto it = first;
        Real mu = *it;
        Real i = 2;
        while(++it != last)
        {
            mu += (*it - mu)/i;
            i += 1;
        }
        return mu;
    }
}

template<class Container>
inline auto mean(Container const & v)
{
    return mean(v.cbegin(), v.cend());
}

template<class ForwardIterator>
auto variance(ForwardIterator first, ForwardIterator last)
{
    using Real = typename std::iterator_traits<ForwardIterator>::value_type;
    BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
    // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
    if constexpr (std::is_integral<Real>::value)
    {
        double M = *first;
        double Q = 0;
        double k = 2;
        for (auto it = std::next(first); it != last; ++it)
        {
            double tmp = *it - M;
            Q = Q + ((k-1)*tmp*tmp)/k;
            M = M + tmp/k;
            k += 1;
        }
        return Q/(k-1);
    }
    else
    {
        Real M = *first;
        Real Q = 0;
        Real k = 2;
        for (auto it = std::next(first); it != last; ++it)
        {
            Real tmp = (*it - M)/k;
            Q += k*(k-1)*tmp*tmp;
            M += tmp;
            k += 1;
        }
        return Q/(k-1);
    }
}

template<class Container>
inline auto variance(Container const & v)
{
    return variance(v.cbegin(), v.cend());
}

template<class ForwardIterator>
auto sample_variance(ForwardIterator first, ForwardIterator last)
{
    size_t n = std::distance(first, last);
    BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance.");
    return n*variance(first, last)/(n-1);
}

template<class Container>
inline auto sample_variance(Container const & v)
{
    return sample_variance(v.cbegin(), v.cend());
}

template<class ForwardIterator>
auto mean_and_sample_variance(ForwardIterator first, ForwardIterator last)
{
    using Real = typename std::iterator_traits<ForwardIterator>::value_type;
    BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
    // Higham, Accuracy and Stability, equation 1.6a and 1.6b:
    if constexpr (std::is_integral<Real>::value)
    {
        double M = *first;
        double Q = 0;
        double k = 2;
        for (auto it = std::next(first); it != last; ++it)
        {
            double tmp = *it - M;
            Q = Q + ((k-1)*tmp*tmp)/k;
            M = M + tmp/k;
            k += 1;
        }
        return std::pair<double, double>{M, Q/(k-2)};
    }
    else
    {
        Real M = *first;
        Real Q = 0;
        Real k = 2;
        for (auto it = std::next(first); it != last; ++it)
        {
            Real tmp = (*it - M)/k;
            Q += k*(k-1)*tmp*tmp;
            M += tmp;
            k += 1;
        }
        return std::pair<Real, Real>{M, Q/(k-2)};
    }
}

template<class Container>
auto mean_and_sample_variance(Container const & v)
{
    return mean_and_sample_variance(v.begin(), v.end());
}

// Follows equation 1.5 of:
// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
template<class ForwardIterator>
auto skewness(ForwardIterator first, ForwardIterator last)
{
    using Real = typename std::iterator_traits<ForwardIterator>::value_type;
    using std::sqrt;
    BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness.");
    if constexpr (std::is_integral<Real>::value)
    {
        double M1 = *first;
        double M2 = 0;
        double M3 = 0;
        double n = 2;
        for (auto it = std::next(first); it != last; ++it)
        {
            double delta21 = *it - M1;
            double tmp = delta21/n;
            M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
            M2 = M2 + tmp*(n-1)*delta21;
            M1 = M1 + tmp;
            n += 1;
        }

        double var = M2/(n-1);
        if (var == 0)
        {
            // The limit is technically undefined, but the interpretation here is clear:
            // A constant dataset has no skewness.
            return double(0);
        }
        double skew = M3/(M2*sqrt(var));
        return skew;
    }
    else
    {
        Real M1 = *first;
        Real M2 = 0;
        Real M3 = 0;
        Real n = 2;
        for (auto it = std::next(first); it != last; ++it)
        {
            Real delta21 = *it - M1;
            Real tmp = delta21/n;
            M3 += tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
            M2 += tmp*(n-1)*delta21;
            M1 += tmp;
            n += 1;
        }

        Real var = M2/(n-1);
        if (var == 0)
        {
            // The limit is technically undefined, but the interpretation here is clear:
            // A constant dataset has no skewness.
            return Real(0);
        }
        Real skew = M3/(M2*sqrt(var));
        return skew;
    }
}

template<class Container>
inline auto skewness(Container const & v)
{
    return skewness(v.cbegin(), v.cend());
}

// Follows equation 1.5/1.6 of:
// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
template<class ForwardIterator>
auto first_four_moments(ForwardIterator first, ForwardIterator last)
{
    using Real = typename std::iterator_traits<ForwardIterator>::value_type;
    BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments.");
    if constexpr (std::is_integral<Real>::value)
    {
        double M1 = *first;
        double M2 = 0;
        double M3 = 0;
        double M4 = 0;
        double n = 2;
        for (auto it = std::next(first); it != last; ++it)
        {
            double delta21 = *it - M1;
            double tmp = delta21/n;
            M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
            M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
            M2 = M2 + tmp*(n-1)*delta21;
            M1 = M1 + tmp;
            n += 1;
        }

        return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
    }
    else
    {
        Real M1 = *first;
        Real M2 = 0;
        Real M3 = 0;
        Real M4 = 0;
        Real n = 2;
        for (auto it = std::next(first); it != last; ++it)
        {
            Real delta21 = *it - M1;
            Real tmp = delta21/n;
            M4 = M4 + tmp*(tmp*tmp*delta21*((n-1)*(n*n-3*n+3)) + 6*tmp*M2 - 4*M3);
            M3 = M3 + tmp*((n-1)*(n-2)*delta21*tmp - 3*M2);
            M2 = M2 + tmp*(n-1)*delta21;
            M1 = M1 + tmp;
            n += 1;
        }

        return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
    }
}

template<class Container>
inline auto first_four_moments(Container const & v)
{
    return first_four_moments(v.cbegin(), v.cend());
}


// Follows equation 1.6 of:
// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
template<class ForwardIterator>
auto kurtosis(ForwardIterator first, ForwardIterator last)
{
    auto [M1, M2, M3, M4] = first_four_moments(first, last);
    if (M2 == 0)
    {
        return M2;
    }
    return M4/(M2*M2);
}

template<class Container>
inline auto kurtosis(Container const & v)
{
    return kurtosis(v.cbegin(), v.cend());
}

template<class ForwardIterator>
auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
{
    return kurtosis(first, last) - 3;
}

template<class Container>
inline auto excess_kurtosis(Container const & v)
{
    return excess_kurtosis(v.cbegin(), v.cend());
}


template<class RandomAccessIterator>
auto median(RandomAccessIterator first, RandomAccessIterator last)
{
    size_t num_elems = std::distance(first, last);
    BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
    if (num_elems & 1)
    {
        auto middle = first + (num_elems - 1)/2;
        std::nth_element(first, middle, last);
        return *middle;
    }
    else
    {
        auto middle = first + num_elems/2 - 1;
        std::nth_element(first, middle, last);
        std::nth_element(middle, middle+1, last);
        return (*middle + *(middle+1))/2;
    }
}


template<class RandomAccessContainer>
inline auto median(RandomAccessContainer & v)
{
    return median(v.begin(), v.end());
}

template<class RandomAccessIterator>
auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
{
    using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
    BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");

    std::sort(first, last);
    if constexpr (std::is_integral<Real>::value)
    {
        double i = 1;
        double num = 0;
        double denom = 0;
        for (auto it = first; it != last; ++it)
        {
            num += *it*i;
            denom += *it;
            ++i;
        }

        // If the l1 norm is zero, all elements are zero, so every element is the same.
        if (denom == 0)
        {
            return double(0);
        }

        return ((2*num)/denom - i)/(i-1);
    }
    else
    {
        Real i = 1;
        Real num = 0;
        Real denom = 0;
        for (auto it = first; it != last; ++it)
        {
            num += *it*i;
            denom += *it;
            ++i;
        }

        // If the l1 norm is zero, all elements are zero, so every element is the same.
        if (denom == 0)
        {
            return Real(0);
        }

        return ((2*num)/denom - i)/(i-1);
    }
}

template<class RandomAccessContainer>
inline auto gini_coefficient(RandomAccessContainer & v)
{
    return gini_coefficient(v.begin(), v.end());
}

template<class RandomAccessIterator>
inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
{
    size_t n = std::distance(first, last);
    return n*gini_coefficient(first, last)/(n-1);
}

template<class RandomAccessContainer>
inline auto sample_gini_coefficient(RandomAccessContainer & v)
{
    return sample_gini_coefficient(v.begin(), v.end());
}

template<class RandomAccessIterator>
auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN())
{
    using std::abs;
    using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
    using std::isnan;
    if (isnan(center))
    {
        center = boost::math::statistics::median(first, last);
    }
    size_t num_elems = std::distance(first, last);
    BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined.");
    auto comparator = [&center](Real a, Real b) { return abs(a-center) < abs(b-center);};
    if (num_elems & 1)
    {
        auto middle = first + (num_elems - 1)/2;
        std::nth_element(first, middle, last, comparator);
        return abs(*middle);
    }
    else
    {
        auto middle = first + num_elems/2 - 1;
        std::nth_element(first, middle, last, comparator);
        std::nth_element(middle, middle+1, last, comparator);
        return (abs(*middle) + abs(*(middle+1)))/abs(static_cast<Real>(2));
    }
}

template<class RandomAccessContainer>
inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN())
{
    return median_absolute_deviation(v.begin(), v.end(), center);
}

template<class ForwardIterator>
auto interquartile_range(ForwardIterator first, ForwardIterator last)
{
    using Real = typename std::iterator_traits<ForwardIterator>::value_type;
    static_assert(!std::is_integral<Real>::value, "Integer values have not yet been implemented.");
    auto m = std::distance(first,last);
    BOOST_ASSERT_MSG(m >= 3, "At least 3 samples are required to compute the interquartile range.");
    auto k = m/4;
    auto j = m - (4*k);
    // m = 4k+j.
    // If j = 0 or j = 1, then there are an even number of samples below the median, and an even number above the median.
    //    Then we must average adjacent elements to get the quartiles.
    // If j = 2 or j = 3, there are an odd number of samples above and below the median, these elements may be directly extracted to get the quartiles.

    if (j==2 || j==3)
    {
        auto q1 = first + k;
        auto q3 = first + 3*k + j - 1;
        std::nth_element(first, q1, last);
        Real Q1 = *q1;
        std::nth_element(q1, q3, last);
        Real Q3 = *q3;
        return Q3 - Q1;
    } else {
        // j == 0 or j==1:
        auto q1 = first + k - 1;
        auto q3 = first + 3*k - 1 + j;
        std::nth_element(first, q1, last);
        Real a = *q1;
        std::nth_element(q1, q1 + 1, last);
        Real b = *(q1 + 1);
        Real Q1 = (a+b)/2;
        std::nth_element(q1, q3, last);
        a = *q3;
        std::nth_element(q3, q3 + 1, last);
        b = *(q3 + 1);
        Real Q3 = (a+b)/2;
        return Q3 - Q1;
    }
}

template<class RandomAccessContainer>
inline auto interquartile_range(RandomAccessContainer & v)
{
    return interquartile_range(v.begin(), v.end());
}

template<class ForwardIterator, class OutputIterator>
auto sorted_mode(ForwardIterator first, ForwardIterator last, OutputIterator output) -> decltype(output)
{
    using Z = typename std::iterator_traits<ForwardIterator>::value_type;
    static_assert(std::is_integral<Z>::value, "Floating point values have not yet been implemented.");
    using Size = typename std::iterator_traits<ForwardIterator>::difference_type;

    std::vector<Z> modes {};
    modes.reserve(16);
    Size max_counter {0};

    while(first != last)
    {
        Size current_count {0};
        auto end_it {first};
        while(end_it != last && *end_it == *first)
        {
            ++current_count;
            ++end_it;
        }

        if(current_count > max_counter)
        {
            modes.resize(1);
            modes[0] = *first;
            max_counter = current_count;
        }

        else if(current_count == max_counter)
        {
            modes.emplace_back(*first);
        }

        first = end_it;
    }

    return std::move(modes.begin(), modes.end(), output);
}

template<class Container, class OutputIterator>
inline auto sorted_mode(Container & v, OutputIterator output) -> decltype(output)
{
    return sorted_mode(v.begin(), v.end(), output);
}

template<class RandomAccessIterator, class OutputIterator>
auto mode(RandomAccessIterator first, RandomAccessIterator last, OutputIterator output) -> decltype(output)
{
    std::sort(first, last);
    return sorted_mode(first, last, output);
}

template<class RandomAccessContainer, class OutputIterator>
inline auto mode(RandomAccessContainer & v, OutputIterator output) -> decltype(output)
{
    return mode(v.begin(), v.end(), output);
}

}
#endif
Commit	Line	Data
92f5a8d4 TL	1	// (C) Copyright Nick Thompson 2018.
	2	// Use, modification and distribution are subject to the
	3	// Boost Software License, Version 1.0. (See accompanying file
	4	// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	5
	6	#ifndef BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP
	7	#define BOOST_MATH_STATISTICS_UNIVARIATE_STATISTICS_HPP
	8
	9	#include <algorithm>
	10	#include <iterator>
f67539c2 TL	11	#include <tuple>
f67539c2 TL	12	#include <cmath>
20effc67	13	#include <vector>
92f5a8d4 TL	14	#include <boost/assert.hpp>
	15
	16	namespace boost::math::statistics {
	17
	18	template<class ForwardIterator>
	19	auto mean(ForwardIterator first, ForwardIterator last)
	20	{
	21	using Real = typename std::iterator_traits<ForwardIterator>::value_type;
	22	BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the mean.");
	23	if constexpr (std::is_integral<Real>::value)
	24	{
	25	double mu = 0;
	26	double i = 1;
	27	for(auto it = first; it != last; ++it) {
	28	mu = mu + (*it - mu)/i;
	29	i += 1;
	30	}
	31	return mu;
	32	}
	33	else if constexpr (std::is_same_v<typename std::iterator_traits<ForwardIterator>::iterator_category, std::random_access_iterator_tag>)
	34	{
	35	size_t elements = std::distance(first, last);
	36	Real mu0 = 0;
	37	Real mu1 = 0;
	38	Real mu2 = 0;
	39	Real mu3 = 0;
	40	Real i = 1;
	41	auto end = last - (elements % 4);
	42	for(auto it = first; it != end; it += 4) {
	43	Real inv = Real(1)/i;
	44	Real tmp0 = (*it - mu0);
	45	Real tmp1 = (*(it+1) - mu1);
	46	Real tmp2 = (*(it+2) - mu2);
	47	Real tmp3 = (*(it+3) - mu3);
	48	// please generate a vectorized fma here
	49	mu0 += tmp0*inv;
	50	mu1 += tmp1*inv;
	51	mu2 += tmp2*inv;
	52	mu3 += tmp3*inv;
	53	i += 1;
	54	}
	55	Real num1 = Real(elements - (elements %4))/Real(4);
	56	Real num2 = num1 + Real(elements % 4);
	57
	58	for (auto it = end; it != last; ++it)
	59	{
	60	mu3 += (*it-mu3)/i;
	61	i += 1;
	62	}
	63
	64	return (num1(mu0+mu1+mu2) + num2mu3)/Real(elements);
	65	}
	66	else
	67	{
	68	auto it = first;
	69	Real mu = *it;
	70	Real i = 2;
	71	while(++it != last)
	72	{
	73	mu += (*it - mu)/i;
	74	i += 1;
	75	}
	76	return mu;
	77	}
78	}
79
80	template<class Container>
81	inline auto mean(Container const & v)
82	{
83	return mean(v.cbegin(), v.cend());
84	}
85
86	template<class ForwardIterator>
87	auto variance(ForwardIterator first, ForwardIterator last)
88	{
89	using Real = typename std::iterator_traits<ForwardIterator>::value_type;
90	BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
91	// Higham, Accuracy and Stability, equation 1.6a and 1.6b:
92	if constexpr (std::is_integral<Real>::value)
93	{
94	double M = *first;
95	double Q = 0;
96	double k = 2;
97	for (auto it = std::next(first); it != last; ++it)
98	{
99	double tmp = *it - M;
100	Q = Q + ((k-1)tmptmp)/k;
101	M = M + tmp/k;
102	k += 1;
103	}
104	return Q/(k-1);
105	}
106	else
107	{
108	Real M = *first;
109	Real Q = 0;
110	Real k = 2;
111	for (auto it = std::next(first); it != last; ++it)
112	{
113	Real tmp = (*it - M)/k;
114	Q += k(k-1)tmp*tmp;
115	M += tmp;
116	k += 1;
117	}
118	return Q/(k-1);
119	}
120	}
121
122	template<class Container>
123	inline auto variance(Container const & v)
124	{
125	return variance(v.cbegin(), v.cend());
126	}
127
128	template<class ForwardIterator>
129	auto sample_variance(ForwardIterator first, ForwardIterator last)
130	{
131	size_t n = std::distance(first, last);
132	BOOST_ASSERT_MSG(n > 1, "At least two samples are required to compute the sample variance.");
133	return n*variance(first, last)/(n-1);
134	}
135
136	template<class Container>
137	inline auto sample_variance(Container const & v)
138	{
139	return sample_variance(v.cbegin(), v.cend());
140	}
141
142	template<class ForwardIterator>
143	auto mean_and_sample_variance(ForwardIterator first, ForwardIterator last)
144	{
145	using Real = typename std::iterator_traits<ForwardIterator>::value_type;
146	BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute mean and variance.");
147	// Higham, Accuracy and Stability, equation 1.6a and 1.6b:
148	if constexpr (std::is_integral<Real>::value)
149	{
150	double M = *first;
151	double Q = 0;
152	double k = 2;
153	for (auto it = std::next(first); it != last; ++it)
154	{
155	double tmp = *it - M;
156	Q = Q + ((k-1)tmptmp)/k;
157	M = M + tmp/k;
158	k += 1;
159	}
160	return std::pair<double, double>{M, Q/(k-2)};
161	}
162	else
163	{
164	Real M = *first;
165	Real Q = 0;
166	Real k = 2;
167	for (auto it = std::next(first); it != last; ++it)
168	{
169	Real tmp = (*it - M)/k;
170	Q += k(k-1)tmp*tmp;
171	M += tmp;
172	k += 1;
173	}
174	return std::pair<Real, Real>{M, Q/(k-2)};
175	}
176	}
177
178	template<class Container>
179	auto mean_and_sample_variance(Container const & v)
180	{
181	return mean_and_sample_variance(v.begin(), v.end());
182	}
183
184	// Follows equation 1.5 of:
185	// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
186	template<class ForwardIterator>
187	auto skewness(ForwardIterator first, ForwardIterator last)
188	{
189	using Real = typename std::iterator_traits<ForwardIterator>::value_type;
f67539c2	190	using std::sqrt;
92f5a8d4 TL	191	BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute skewness.");
	192	if constexpr (std::is_integral<Real>::value)
	193	{
	194	double M1 = *first;
	195	double M2 = 0;
	196	double M3 = 0;
	197	double n = 2;
	198	for (auto it = std::next(first); it != last; ++it)
	199	{
	200	double delta21 = *it - M1;
	201	double tmp = delta21/n;
	202	M3 = M3 + tmp((n-1)(n-2)delta21tmp - 3*M2);
	203	M2 = M2 + tmp(n-1)delta21;
	204	M1 = M1 + tmp;
	205	n += 1;
	206	}
	207
	208	double var = M2/(n-1);
	209	if (var == 0)
	210	{
	211	// The limit is technically undefined, but the interpretation here is clear:
	212	// A constant dataset has no skewness.
	213	return double(0);
	214	}
	215	double skew = M3/(M2*sqrt(var));
	216	return skew;
	217	}
	218	else
	219	{
	220	Real M1 = *first;
	221	Real M2 = 0;
	222	Real M3 = 0;
	223	Real n = 2;
	224	for (auto it = std::next(first); it != last; ++it)
	225	{
	226	Real delta21 = *it - M1;
	227	Real tmp = delta21/n;
	228	M3 += tmp((n-1)(n-2)delta21tmp - 3*M2);
	229	M2 += tmp(n-1)delta21;
	230	M1 += tmp;
	231	n += 1;
	232	}
	233
	234	Real var = M2/(n-1);
	235	if (var == 0)
	236	{
	237	// The limit is technically undefined, but the interpretation here is clear:
	238	// A constant dataset has no skewness.
	239	return Real(0);
	240	}
	241	Real skew = M3/(M2*sqrt(var));
	242	return skew;
	243	}
	244	}
	245
	246	template<class Container>
	247	inline auto skewness(Container const & v)
	248	{
	249	return skewness(v.cbegin(), v.cend());
	250	}
	251
	252	// Follows equation 1.5/1.6 of:
	253	// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
	254	template<class ForwardIterator>
255	auto first_four_moments(ForwardIterator first, ForwardIterator last)
256	{
257	using Real = typename std::iterator_traits<ForwardIterator>::value_type;
258	BOOST_ASSERT_MSG(first != last, "At least one sample is required to compute the first four moments.");
259	if constexpr (std::is_integral<Real>::value)
260	{
261	double M1 = *first;
262	double M2 = 0;
263	double M3 = 0;
264	double M4 = 0;
265	double n = 2;
266	for (auto it = std::next(first); it != last; ++it)
267	{
268	double delta21 = *it - M1;
269	double tmp = delta21/n;
270	M4 = M4 + tmp(tmptmpdelta21((n-1)(nn-3n+3)) + 6tmpM2 - 4M3);
271	M3 = M3 + tmp((n-1)(n-2)delta21tmp - 3*M2);
272	M2 = M2 + tmp(n-1)delta21;
273	M1 = M1 + tmp;
274	n += 1;
275	}
276
277	return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
278	}
279	else
280	{
281	Real M1 = *first;
282	Real M2 = 0;
283	Real M3 = 0;
284	Real M4 = 0;
285	Real n = 2;
286	for (auto it = std::next(first); it != last; ++it)
287	{
288	Real delta21 = *it - M1;
289	Real tmp = delta21/n;
290	M4 = M4 + tmp(tmptmpdelta21((n-1)(nn-3n+3)) + 6tmpM2 - 4M3);
291	M3 = M3 + tmp((n-1)(n-2)delta21tmp - 3*M2);
292	M2 = M2 + tmp(n-1)delta21;
293	M1 = M1 + tmp;
294	n += 1;
295	}
296
297	return std::make_tuple(M1, M2/(n-1), M3/(n-1), M4/(n-1));
298	}
299	}
300
301	template<class Container>
302	inline auto first_four_moments(Container const & v)
303	{
304	return first_four_moments(v.cbegin(), v.cend());
305	}
306
307
308	// Follows equation 1.6 of:
309	// https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
310	template<class ForwardIterator>
311	auto kurtosis(ForwardIterator first, ForwardIterator last)
312	{
313	auto [M1, M2, M3, M4] = first_four_moments(first, last);
314	if (M2 == 0)
315	{
316	return M2;
317	}
318	return M4/(M2*M2);
319	}
320
321	template<class Container>
322	inline auto kurtosis(Container const & v)
323	{
324	return kurtosis(v.cbegin(), v.cend());
325	}
326
327	template<class ForwardIterator>
328	auto excess_kurtosis(ForwardIterator first, ForwardIterator last)
329	{
330	return kurtosis(first, last) - 3;
331	}
332
333	template<class Container>
334	inline auto excess_kurtosis(Container const & v)
335	{
336	return excess_kurtosis(v.cbegin(), v.cend());
337	}
338
339
340	template<class RandomAccessIterator>
341	auto median(RandomAccessIterator first, RandomAccessIterator last)
342	{
343	size_t num_elems = std::distance(first, last);
344	BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero length vector is undefined.");
345	if (num_elems & 1)
346	{
347	auto middle = first + (num_elems - 1)/2;
348	std::nth_element(first, middle, last);
349	return *middle;
350	}
351	else
352	{
353	auto middle = first + num_elems/2 - 1;
354	std::nth_element(first, middle, last);
355	std::nth_element(middle, middle+1, last);
356	return (middle + (middle+1))/2;
357	}
358	}
359
360
361	template<class RandomAccessContainer>
362	inline auto median(RandomAccessContainer & v)
363	{
364	return median(v.begin(), v.end());
365	}
366
367	template<class RandomAccessIterator>
368	auto gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
369	{
370	using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
371	BOOST_ASSERT_MSG(first != last && std::next(first) != last, "Computation of the Gini coefficient requires at least two samples.");
372
373	std::sort(first, last);
374	if constexpr (std::is_integral<Real>::value)
375	{
376	double i = 1;
377	double num = 0;
378	double denom = 0;
379	for (auto it = first; it != last; ++it)
380	{
381	num += iti;
382	denom += *it;
383	++i;
384	}
385
386	// If the l1 norm is zero, all elements are zero, so every element is the same.
387	if (denom == 0)
388	{
389	return double(0);
390	}
391
392	return ((2*num)/denom - i)/(i-1);
393	}
394	else
395	{
396	Real i = 1;
397	Real num = 0;
398	Real denom = 0;
399	for (auto it = first; it != last; ++it)
400	{
401	num += iti;
402	denom += *it;
403	++i;
404	}
405
406	// If the l1 norm is zero, all elements are zero, so every element is the same.
407	if (denom == 0)
408	{
409	return Real(0);
410	}
411
412	return ((2*num)/denom - i)/(i-1);
413	}
414	}
415
416	template<class RandomAccessContainer>
417	inline auto gini_coefficient(RandomAccessContainer & v)
418	{
419	return gini_coefficient(v.begin(), v.end());
420	}
421
422	template<class RandomAccessIterator>
423	inline auto sample_gini_coefficient(RandomAccessIterator first, RandomAccessIterator last)
424	{
425	size_t n = std::distance(first, last);
426	return n*gini_coefficient(first, last)/(n-1);
427	}
428
429	template<class RandomAccessContainer>
430	inline auto sample_gini_coefficient(RandomAccessContainer & v)
431	{
432	return sample_gini_coefficient(v.begin(), v.end());
433	}
434
435	template<class RandomAccessIterator>
436	auto median_absolute_deviation(RandomAccessIterator first, RandomAccessIterator last, typename std::iterator_traits<RandomAccessIterator>::value_type center=std::numeric_limits<typename std::iterator_traits<RandomAccessIterator>::value_type>::quiet_NaN())
437	{
438	using std::abs;
439	using Real = typename std::iterator_traits<RandomAccessIterator>::value_type;
440	using std::isnan;
441	if (isnan(center))
442	{
443	center = boost::math::statistics::median(first, last);
444	}
445	size_t num_elems = std::distance(first, last);
446	BOOST_ASSERT_MSG(num_elems > 0, "The median of a zero-length vector is undefined.");
447	auto comparator = [&center](Real a, Real b) { return abs(a-center) < abs(b-center);};
448	if (num_elems & 1)
449	{
450	auto middle = first + (num_elems - 1)/2;
451	std::nth_element(first, middle, last, comparator);
452	return abs(*middle);
453	}
454	else
455	{
456	auto middle = first + num_elems/2 - 1;
457	std::nth_element(first, middle, last, comparator);
458	std::nth_element(middle, middle+1, last, comparator);
459	return (abs(middle) + abs((middle+1)))/abs(static_cast<Real>(2));
460	}
461	}
462
463	template<class RandomAccessContainer>
464	inline auto median_absolute_deviation(RandomAccessContainer & v, typename RandomAccessContainer::value_type center=std::numeric_limits<typename RandomAccessContainer::value_type>::quiet_NaN())
465	{
466	return median_absolute_deviation(v.begin(), v.end(), center);
467	}
468
f67539c2 TL	469	template<class ForwardIterator>
	470	auto interquartile_range(ForwardIterator first, ForwardIterator last)
	471	{
	472	using Real = typename std::iterator_traits<ForwardIterator>::value_type;
	473	static_assert(!std::is_integral<Real>::value, "Integer values have not yet been implemented.");
	474	auto m = std::distance(first,last);
	475	BOOST_ASSERT_MSG(m >= 3, "At least 3 samples are required to compute the interquartile range.");
	476	auto k = m/4;
	477	auto j = m - (4*k);
	478	// m = 4k+j.
	479	// If j = 0 or j = 1, then there are an even number of samples below the median, and an even number above the median.
	480	// Then we must average adjacent elements to get the quartiles.
	481	// If j = 2 or j = 3, there are an odd number of samples above and below the median, these elements may be directly extracted to get the quartiles.
	482
	483	if (j==2 \|\| j==3)
	484	{
	485	auto q1 = first + k;
	486	auto q3 = first + 3*k + j - 1;
	487	std::nth_element(first, q1, last);
	488	Real Q1 = *q1;
	489	std::nth_element(q1, q3, last);
	490	Real Q3 = *q3;
	491	return Q3 - Q1;
	492	} else {
	493	// j == 0 or j==1:
	494	auto q1 = first + k - 1;
	495	auto q3 = first + 3*k - 1 + j;
	496	std::nth_element(first, q1, last);
	497	Real a = *q1;
	498	std::nth_element(q1, q1 + 1, last);
	499	Real b = *(q1 + 1);
	500	Real Q1 = (a+b)/2;
	501	std::nth_element(q1, q3, last);
	502	a = *q3;
	503	std::nth_element(q3, q3 + 1, last);
	504	b = *(q3 + 1);
	505	Real Q3 = (a+b)/2;
	506	return Q3 - Q1;
	507	}
	508	}
	509
	510	template<class RandomAccessContainer>
	511	inline auto interquartile_range(RandomAccessContainer & v)
	512	{
	513	return interquartile_range(v.begin(), v.end());
	514	}
	515
20effc67 TL	516	template<class ForwardIterator, class OutputIterator>
	517	auto sorted_mode(ForwardIterator first, ForwardIterator last, OutputIterator output) -> decltype(output)
	518	{
	519	using Z = typename std::iterator_traits<ForwardIterator>::value_type;
	520	static_assert(std::is_integral<Z>::value, "Floating point values have not yet been implemented.");
	521	using Size = typename std::iterator_traits<ForwardIterator>::difference_type;
	522
	523	std::vector<Z> modes {};
	524	modes.reserve(16);
	525	Size max_counter {0};
	526
	527	while(first != last)
	528	{
	529	Size current_count {0};
	530	auto end_it {first};
	531	while(end_it != last && end_it == first)
	532	{
	533	++current_count;
	534	++end_it;
	535	}
	536
	537	if(current_count > max_counter)
	538	{
	539	modes.resize(1);
	540	modes[0] = *first;
	541	max_counter = current_count;
	542	}
	543
	544	else if(current_count == max_counter)
	545	{
	546	modes.emplace_back(*first);
	547	}
	548
	549	first = end_it;
	550	}
	551
	552	return std::move(modes.begin(), modes.end(), output);
	553	}
	554
	555	template<class Container, class OutputIterator>
	556	inline auto sorted_mode(Container & v, OutputIterator output) -> decltype(output)
	557	{
	558	return sorted_mode(v.begin(), v.end(), output);
	559	}
	560
	561	template<class RandomAccessIterator, class OutputIterator>
	562	auto mode(RandomAccessIterator first, RandomAccessIterator last, OutputIterator output) -> decltype(output)
	563	{
	564	std::sort(first, last);
	565	return sorted_mode(first, last, output);
	566	}
	567
	568	template<class RandomAccessContainer, class OutputIterator>
	569	inline auto mode(RandomAccessContainer & v, OutputIterator output) -> decltype(output)
	570	{
	571	return mode(v.begin(), v.end(), output);
	572	}
f67539c2	573
92f5a8d4 TL	574	}
92f5a8d4 TL	575	#endif