[ceph.git] / ceph / src / boost / libs / accumulators / include / boost / accumulators / statistics / peaks_over_threshold.hpp

///////////////////////////////////////////////////////////////////////////////
// peaks_over_threshold.hpp
//
//  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
//  Software License, Version 1.0. (See accompanying file
//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
#define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006

#include <vector>
#include <limits>
#include <numeric>
#include <functional>
#include <boost/config/no_tr1/cmath.hpp> // pow
#include <sstream> // stringstream
#include <stdexcept> // runtime_error
#include <boost/throw_exception.hpp>
#include <boost/range.hpp>
#include <boost/mpl/if.hpp>
#include <boost/mpl/int.hpp>
#include <boost/mpl/placeholders.hpp>
#include <boost/parameter/keyword.hpp>
#include <boost/tuple/tuple.hpp>
#include <boost/accumulators/accumulators_fwd.hpp>
#include <boost/accumulators/framework/accumulator_base.hpp>
#include <boost/accumulators/framework/extractor.hpp>
#include <boost/accumulators/numeric/functional.hpp>
#include <boost/accumulators/framework/parameters/sample.hpp>
#include <boost/accumulators/framework/depends_on.hpp>
#include <boost/accumulators/statistics_fwd.hpp>
#include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
#include <boost/accumulators/statistics/count.hpp>
#include <boost/accumulators/statistics/tail.hpp>

#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4127) // conditional expression is constant
#endif

namespace boost { namespace accumulators
{

///////////////////////////////////////////////////////////////////////////////
// threshold_probability and threshold named parameters
//
BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value)
BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability)

BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value)
BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability)

namespace impl
{
    ///////////////////////////////////////////////////////////////////////////////
    // peaks_over_threshold_impl
    //  works with an explicit threshold value and does not depend on order statistics
    /**
        @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation

        According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of
        the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$
        may be approximated by a generalized Pareto distribution
        \f[
            G_{\xi,\beta}(x) =
            \left\{
            \begin{array}{ll}
                \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\
                \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0,
            \end{array}
            \right.
        \f]
        with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf.
        Hosking and Wallis (1987),
        \f[
            \begin{array}{lll}
            \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\
            \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right],
            \end{array}
        \f]
        \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over
        the threshold \f$u\f$. Equivalently, the distribution function
        \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by
        \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$
        can be written as
        \f[
            F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u)
        \f]
        and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function
        \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by
        \f[
            \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u).
        \f]
        It can be shown that \f$\widehat{F}(x)\f$ is a generalized
        Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$
        and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$,
        one obtains an estimator for the \f$\alpha\f$-quantile,
        \f[
            \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right],
        \f]
        and similarly an estimator for the (coherent) tail mean,
        \f[
            \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi},
        \f]
        cf. McNeil and Frey (2000).

        Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the
        \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define
        the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are
        computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the
        correct result.

        For further details, see

        J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution,
        Technometrics, Volume 29, 1987, p. 339-349

        A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series:
        an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300

        @param quantile_probability
        @param pot_threshold_value
    */
    template<typename Sample, typename LeftRight>
    struct peaks_over_threshold_impl
      : accumulator_base
    {
        typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
        // for boost::result_of
        typedef boost::tuple<float_type, float_type, float_type> result_type;
        // for left tail fitting, mirror the extreme values
        typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;

        template<typename Args>
        peaks_over_threshold_impl(Args const &args)
          : Nu_(0)
          , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
          , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
          , threshold_(sign::value * args[pot_threshold_value])
          , fit_parameters_(boost::make_tuple(0., 0., 0.))
          , is_dirty_(true)
        {
        }

        template<typename Args>
        void operator ()(Args const &args)
        {
            this->is_dirty_ = true;

            if (sign::value * args[sample] > this->threshold_)
            {
                this->mu_ += args[sample];
                this->sigma2_ += args[sample] * args[sample];
                ++this->Nu_;
            }
        }

        template<typename Args>
        result_type result(Args const &args) const
        {
            if (this->is_dirty_)
            {
                this->is_dirty_ = false;

                std::size_t cnt = count(args);

                this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_);
                this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_);
                this->sigma2_ -= this->mu_ * this->mu_;

                float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt);

                float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
                float_type xi_hat = 0.5 * ( 1. - tmp );
                float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
                float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
                float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
                this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
            }

            return this->fit_parameters_;
        }

    private:
        std::size_t Nu_;                     // number of samples larger than threshold
        mutable float_type mu_;              // mean of Nu_ largest samples
        mutable float_type sigma2_;          // variance of Nu_ largest samples
        float_type threshold_;
        mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
        mutable bool is_dirty_;
    };

    ///////////////////////////////////////////////////////////////////////////////
    // peaks_over_threshold_prob_impl
    //  determines threshold from a given threshold probability using order statistics
    /**
        @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation

        @sa peaks_over_threshold_impl

        @param quantile_probability
        @param pot_threshold_probability
    */
    template<typename Sample, typename LeftRight>
    struct peaks_over_threshold_prob_impl
      : accumulator_base
    {
        typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
        // for boost::result_of
        typedef boost::tuple<float_type, float_type, float_type> result_type;
        // for left tail fitting, mirror the extreme values
        typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;

        template<typename Args>
        peaks_over_threshold_prob_impl(Args const &args)
          : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
          , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
          , threshold_probability_(args[pot_threshold_probability])
          , fit_parameters_(boost::make_tuple(0., 0., 0.))
          , is_dirty_(true)
        {
        }

        void operator ()(dont_care)
        {
            this->is_dirty_ = true;
        }

        template<typename Args>
        result_type result(Args const &args) const
        {
            if (this->is_dirty_)
            {
                this->is_dirty_ = false;

                std::size_t cnt = count(args);

                // the n'th cached sample provides an approximate threshold value u
                std::size_t n = static_cast<std::size_t>(
                    std::ceil(
                        cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ )
                    )
                );

                // If n is in a valid range, return result, otherwise return NaN or throw exception
                if ( n >= static_cast<std::size_t>(tail(args).size()))
                {
                    if (std::numeric_limits<float_type>::has_quiet_NaN)
                    {
                        return boost::make_tuple(
                            std::numeric_limits<float_type>::quiet_NaN()
                          , std::numeric_limits<float_type>::quiet_NaN()
                          , std::numeric_limits<float_type>::quiet_NaN()
                        );
                    }
                    else
                    {
                        std::ostringstream msg;
                        msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
                        boost::throw_exception(std::runtime_error(msg.str()));
                        return boost::make_tuple(Sample(0), Sample(0), Sample(0));
                    }
                }
                else
                {
                    float_type u = *(tail(args).begin() + n - 1) * sign::value;

                    // compute mean and variance of samples above/under threshold value u
                    for (std::size_t i = 0; i < n; ++i)
                    {
                        mu_ += *(tail(args).begin() + i);
                        sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i));
                    }

                    this->mu_ = sign::value * numeric::fdiv(this->mu_, n);
                    this->sigma2_ = numeric::fdiv(this->sigma2_, n);
                    this->sigma2_ -= this->mu_ * this->mu_;

                    if (is_same<LeftRight, left>::value)
                        this->threshold_probability_ = 1. - this->threshold_probability_;

                    float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
                    float_type xi_hat = 0.5 * ( 1. - tmp );
                    float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
                    float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
                    float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
                    this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
                }
            }

            return this->fit_parameters_;
        }

    private:
        mutable float_type mu_;                     // mean of samples above threshold u
        mutable float_type sigma2_;                 // variance of samples above threshold u
        mutable float_type threshold_probability_;
        mutable result_type fit_parameters_;        // boost::tuple that stores fit parameters
        mutable bool is_dirty_;
    };

} // namespace impl

///////////////////////////////////////////////////////////////////////////////
// tag::peaks_over_threshold
//
namespace tag
{
    template<typename LeftRight>
    struct peaks_over_threshold
      : depends_on<count>
      , pot_threshold_value
    {
        /// INTERNAL ONLY
        ///
        typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl;
    };

    template<typename LeftRight>
    struct peaks_over_threshold_prob
      : depends_on<count, tail<LeftRight> >
      , pot_threshold_probability
    {
        /// INTERNAL ONLY
        ///
        typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl;
    };

    struct abstract_peaks_over_threshold
      : depends_on<>
    {
    };
}

///////////////////////////////////////////////////////////////////////////////
// extract::peaks_over_threshold
//
namespace extract
{
    extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {};

    BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold)
}

using extract::peaks_over_threshold;

// peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight>
template<typename LeftRight>
struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)>
{
    typedef tag::peaks_over_threshold<LeftRight> type;
};

// peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight>
template<typename LeftRight>
struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)>
{
    typedef tag::peaks_over_threshold_prob<LeftRight> type;
};

template<typename LeftRight>
struct feature_of<tag::peaks_over_threshold<LeftRight> >
  : feature_of<tag::abstract_peaks_over_threshold>
{
};

template<typename LeftRight>
struct feature_of<tag::peaks_over_threshold_prob<LeftRight> >
  : feature_of<tag::abstract_peaks_over_threshold>
{
};

// So that peaks_over_threshold can be automatically substituted
// with weighted_peaks_over_threshold when the weight parameter is non-void.
template<typename LeftRight>
struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> >
{
    typedef tag::weighted_peaks_over_threshold<LeftRight> type;
};

template<typename LeftRight>
struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> >
  : feature_of<tag::peaks_over_threshold<LeftRight> >
{};

// So that peaks_over_threshold_prob can be automatically substituted
// with weighted_peaks_over_threshold_prob when the weight parameter is non-void.
template<typename LeftRight>
struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> >
{
    typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
};

template<typename LeftRight>
struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> >
  : feature_of<tag::peaks_over_threshold_prob<LeftRight> >
{};

}} // namespace boost::accumulators

#ifdef _MSC_VER
# pragma warning(pop)
#endif

#endif
Commit	Line	Data
7c673cae FG	1	///////////////////////////////////////////////////////////////////////////////
	2	// peaks_over_threshold.hpp
	3	//
	4	// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
	5	// Software License, Version 1.0. (See accompanying file
	6	// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
	7
	8	#ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
	9	#define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
	10
	11	#include <vector>
	12	#include <limits>
	13	#include <numeric>
	14	#include <functional>
	15	#include <boost/config/no_tr1/cmath.hpp> // pow
	16	#include <sstream> // stringstream
	17	#include <stdexcept> // runtime_error
	18	#include <boost/throw_exception.hpp>
	19	#include <boost/range.hpp>
	20	#include <boost/mpl/if.hpp>
	21	#include <boost/mpl/int.hpp>
	22	#include <boost/mpl/placeholders.hpp>
	23	#include <boost/parameter/keyword.hpp>
	24	#include <boost/tuple/tuple.hpp>
	25	#include <boost/accumulators/accumulators_fwd.hpp>
	26	#include <boost/accumulators/framework/accumulator_base.hpp>
	27	#include <boost/accumulators/framework/extractor.hpp>
	28	#include <boost/accumulators/numeric/functional.hpp>
	29	#include <boost/accumulators/framework/parameters/sample.hpp>
	30	#include <boost/accumulators/framework/depends_on.hpp>
	31	#include <boost/accumulators/statistics_fwd.hpp>
	32	#include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
	33	#include <boost/accumulators/statistics/count.hpp>
	34	#include <boost/accumulators/statistics/tail.hpp>
	35
	36	#ifdef _MSC_VER
	37	# pragma warning(push)
	38	# pragma warning(disable: 4127) // conditional expression is constant
	39	#endif
	40
	41	namespace boost { namespace accumulators
	42	{
	43
	44	///////////////////////////////////////////////////////////////////////////////
	45	// threshold_probability and threshold named parameters
	46	//
	47	BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value)
	48	BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability)
	49
	50	BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value)
	51	BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability)
	52
	53	namespace impl
	54	{
	55	///////////////////////////////////////////////////////////////////////////////
	56	// peaks_over_threshold_impl
	57	// works with an explicit threshold value and does not depend on order statistics
	58	/**
	59	@brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
	60
	61	According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of
	62	the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$
	63	may be approximated by a generalized Pareto distribution
	64	\f[
65	G_{\xi,\beta}(x) =
66	\left\{
67	\begin{array}{ll}
68	\beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\
69	\beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0,
70	\end{array}
71	\right.
72	\f]
73	with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf.
74	Hosking and Wallis (1987),
75	\f[
76	\begin{array}{lll}
77	\hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\
78	\hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right],
79	\end{array}
80	\f]
81	\f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over
82	the threshold \f$u\f$. Equivalently, the distribution function
83	\f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by
84	\f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$
85	can be written as
86	\f[
87	F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u)
88	\f]
89	and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function
90	\f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by
91	\f[
92	\widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u).
93	\f]
94	It can be shown that \f$\widehat{F}(x)\f$ is a generalized
95	Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$
96	and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$,
97	one obtains an estimator for the \f$\alpha\f$-quantile,
98	\f[
99	\hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right],
100	\f]
101	and similarly an estimator for the (coherent) tail mean,
102	\f[
103	\widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi},
104	\f]
105	cf. McNeil and Frey (2000).
106
107	Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the
108	\f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define
109	the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are
110	computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the
111	correct result.
112
113	For further details, see
114
115	J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution,
116	Technometrics, Volume 29, 1987, p. 339-349
117
118	A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series:
119	an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300
120
121	@param quantile_probability
122	@param pot_threshold_value
123	*/
124	template<typename Sample, typename LeftRight>
125	struct peaks_over_threshold_impl
126	: accumulator_base
127	{
128	typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
129	// for boost::result_of
130	typedef boost::tuple<float_type, float_type, float_type> result_type;
131	// for left tail fitting, mirror the extreme values
132	typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
133
134	template<typename Args>
135	peaks_over_threshold_impl(Args const &args)
136	: Nu_(0)
137	, mu_(sign::value * numeric::fdiv(args[sample \| Sample()], (std::size_t)1))
138	, sigma2_(numeric::fdiv(args[sample \| Sample()], (std::size_t)1))
139	, threshold_(sign::value * args[pot_threshold_value])
140	, fit_parameters_(boost::make_tuple(0., 0., 0.))
141	, is_dirty_(true)
142	{
143	}
144
145	template<typename Args>
146	void operator ()(Args const &args)
147	{
148	this->is_dirty_ = true;
149
150	if (sign::value * args[sample] > this->threshold_)
151	{
152	this->mu_ += args[sample];
153	this->sigma2_ += args[sample] * args[sample];
154	++this->Nu_;
155	}
156	}
157
158	template<typename Args>
159	result_type result(Args const &args) const
160	{
161	if (this->is_dirty_)
162	{
163	this->is_dirty_ = false;
164
165	std::size_t cnt = count(args);
166
167	this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_);
168	this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_);
169	this->sigma2_ -= this->mu_ * this->mu_;
170
171	float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt);
172
173	float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
174	float_type xi_hat = 0.5 * ( 1. - tmp );
175	float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
176	float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
177	float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
178	this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
179	}
180
181	return this->fit_parameters_;
182	}
183
184	private:
185	std::size_t Nu_; // number of samples larger than threshold
186	mutable float_type mu_; // mean of Nu_ largest samples
187	mutable float_type sigma2_; // variance of Nu_ largest samples
188	float_type threshold_;
189	mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
190	mutable bool is_dirty_;
191	};
192
193	///////////////////////////////////////////////////////////////////////////////
194	// peaks_over_threshold_prob_impl
195	// determines threshold from a given threshold probability using order statistics
196	/**
197	@brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
198
199	@sa peaks_over_threshold_impl
200
201	@param quantile_probability
202	@param pot_threshold_probability
203	*/
204	template<typename Sample, typename LeftRight>
205	struct peaks_over_threshold_prob_impl
206	: accumulator_base
207	{
208	typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
209	// for boost::result_of
210	typedef boost::tuple<float_type, float_type, float_type> result_type;
211	// for left tail fitting, mirror the extreme values
212	typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
213
214	template<typename Args>
215	peaks_over_threshold_prob_impl(Args const &args)
216	: mu_(sign::value * numeric::fdiv(args[sample \| Sample()], (std::size_t)1))
217	, sigma2_(numeric::fdiv(args[sample \| Sample()], (std::size_t)1))
218	, threshold_probability_(args[pot_threshold_probability])
219	, fit_parameters_(boost::make_tuple(0., 0., 0.))
220	, is_dirty_(true)
221	{
222	}
223
224	void operator ()(dont_care)
225	{
226	this->is_dirty_ = true;
227	}
228
229	template<typename Args>
230	result_type result(Args const &args) const
231	{
232	if (this->is_dirty_)
233	{
234	this->is_dirty_ = false;
235
236	std::size_t cnt = count(args);
237
238	// the n'th cached sample provides an approximate threshold value u
239	std::size_t n = static_cast<std::size_t>(
240	std::ceil(
241	cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ )
242	)
243	);
244
245	// If n is in a valid range, return result, otherwise return NaN or throw exception
246	if ( n >= static_cast<std::size_t>(tail(args).size()))
247	{
248	if (std::numeric_limits<float_type>::has_quiet_NaN)
249	{
250	return boost::make_tuple(
251	std::numeric_limits<float_type>::quiet_NaN()
252	, std::numeric_limits<float_type>::quiet_NaN()
253	, std::numeric_limits<float_type>::quiet_NaN()
254	);
255	}
256	else
257	{
258	std::ostringstream msg;
259	msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
260	boost::throw_exception(std::runtime_error(msg.str()));
261	return boost::make_tuple(Sample(0), Sample(0), Sample(0));
262	}
263	}
264	else
265	{
266	float_type u = (tail(args).begin() + n - 1) sign::value;
267
268	// compute mean and variance of samples above/under threshold value u
269	for (std::size_t i = 0; i < n; ++i)
270	{
271	mu_ += *(tail(args).begin() + i);
272	sigma2_ += (tail(args).begin() + i) (*(tail(args).begin() + i));
273	}
274
275	this->mu_ = sign::value * numeric::fdiv(this->mu_, n);
276	this->sigma2_ = numeric::fdiv(this->sigma2_, n);
277	this->sigma2_ -= this->mu_ * this->mu_;
278
279	if (is_same<LeftRight, left>::value)
280	this->threshold_probability_ = 1. - this->threshold_probability_;
281
282	float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
283	float_type xi_hat = 0.5 * ( 1. - tmp );
284	float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
285	float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
286	float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
287	this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
288	}
289	}
290
291	return this->fit_parameters_;
292	}
293
294	private:
295	mutable float_type mu_; // mean of samples above threshold u
296	mutable float_type sigma2_; // variance of samples above threshold u
297	mutable float_type threshold_probability_;
298	mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
299	mutable bool is_dirty_;
300	};
301
302	} // namespace impl
303
304	///////////////////////////////////////////////////////////////////////////////
305	// tag::peaks_over_threshold
306	//
307	namespace tag
308	{
309	template<typename LeftRight>
310	struct peaks_over_threshold
311	: depends_on<count>
312	, pot_threshold_value
313	{
314	/// INTERNAL ONLY
315	///
316	typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl;
317	};
318
319	template<typename LeftRight>
320	struct peaks_over_threshold_prob
321	: depends_on<count, tail<LeftRight> >
322	, pot_threshold_probability
323	{
324	/// INTERNAL ONLY
325	///
326	typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl;
327	};
328
329	struct abstract_peaks_over_threshold
330	: depends_on<>
331	{
332	};
333	}
334
335	///////////////////////////////////////////////////////////////////////////////
336	// extract::peaks_over_threshold
337	//
338	namespace extract
339	{
340	extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {};
341
342	BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold)
343	}
344
345	using extract::peaks_over_threshold;
346
347	// peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight>
348	template<typename LeftRight>
349	struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)>
350	{
351	typedef tag::peaks_over_threshold<LeftRight> type;
352	};
353
354	// peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight>
355	template<typename LeftRight>
356	struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)>
357	{
358	typedef tag::peaks_over_threshold_prob<LeftRight> type;
359	};
360
361	template<typename LeftRight>
362	struct feature_of<tag::peaks_over_threshold<LeftRight> >
363	: feature_of<tag::abstract_peaks_over_threshold>
364	{
365	};
366
367	template<typename LeftRight>
368	struct feature_of<tag::peaks_over_threshold_prob<LeftRight> >
369	: feature_of<tag::abstract_peaks_over_threshold>
370	{
371	};
372
373	// So that peaks_over_threshold can be automatically substituted
374	// with weighted_peaks_over_threshold when the weight parameter is non-void.
375	template<typename LeftRight>
376	struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> >
377	{
378	typedef tag::weighted_peaks_over_threshold<LeftRight> type;
379	};
380
381	template<typename LeftRight>
382	struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> >
383	: feature_of<tag::peaks_over_threshold<LeftRight> >
384	{};
385
386	// So that peaks_over_threshold_prob can be automatically substituted
387	// with weighted_peaks_over_threshold_prob when the weight parameter is non-void.
388	template<typename LeftRight>
389	struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> >
390	{
391	typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
392	};
393
394	template<typename LeftRight>
395	struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> >
396	: feature_of<tag::peaks_over_threshold_prob<LeftRight> >
397	{};
398
399	}} // namespace boost::accumulators
400
401	#ifdef _MSC_VER
402	# pragma warning(pop)
403	#endif
404
405	#endif