ceph/src/boost/boost/accumulators/statistics/peaks_over_threshold.hpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // peaks_over_threshold.hpp
   3 //
   4 //  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
   5 //  Software License, Version 1.0. (See accompanying file
   6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   7
   8 #ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
   9 #define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
  10
  11 #include <vector>
  12 #include <limits>
  13 #include <numeric>
  14 #include <functional>
  15 #include <boost/config/no_tr1/cmath.hpp> // pow
  16 #include <sstream> // stringstream
  17 #include <stdexcept> // runtime_error
  18 #include <boost/throw_exception.hpp>
  19 #include <boost/range.hpp>
  20 #include <boost/mpl/if.hpp>
  21 #include <boost/mpl/int.hpp>
  22 #include <boost/mpl/placeholders.hpp>
  23 #include <boost/parameter/keyword.hpp>
  24 #include <boost/tuple/tuple.hpp>
  25 #include <boost/accumulators/accumulators_fwd.hpp>
  26 #include <boost/accumulators/framework/accumulator_base.hpp>
  27 #include <boost/accumulators/framework/extractor.hpp>
  28 #include <boost/accumulators/numeric/functional.hpp>
  29 #include <boost/accumulators/framework/parameters/sample.hpp>
  30 #include <boost/accumulators/framework/depends_on.hpp>
  31 #include <boost/accumulators/statistics_fwd.hpp>
  32 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
  33 #include <boost/accumulators/statistics/count.hpp>
  34 #include <boost/accumulators/statistics/tail.hpp>
  35
  36 #ifdef _MSC_VER
  37 # pragma warning(push)
  38 # pragma warning(disable: 4127) // conditional expression is constant
  39 #endif
  40
  41 namespace boost { namespace accumulators
  42 {
  43
  44 ///////////////////////////////////////////////////////////////////////////////
  45 // threshold_probability and threshold named parameters
  46 //
  47 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value)
  48 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability)
  49
  50 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value)
  51 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability)
  52
  53 namespace impl
  54 {
  55     ///////////////////////////////////////////////////////////////////////////////
  56     // peaks_over_threshold_impl
  57     //  works with an explicit threshold value and does not depend on order statistics
  58     /**
  59         @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
  60
  61         According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of
  62         the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$
  63         may be approximated by a generalized Pareto distribution
  64         \f[
  65             G_{\xi,\beta}(x) =
  66             \left\{
  67             \begin{array}{ll}
  68                 \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\
  69                 \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0,
  70             \end{array}
  71             \right.
  72         \f]
  73         with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf.
  74         Hosking and Wallis (1987),
  75         \f[
  76             \begin{array}{lll}
  77             \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\
  78             \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right],
  79             \end{array}
  80         \f]
  81         \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over
  82         the threshold \f$u\f$. Equivalently, the distribution function
  83         \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by
  84         \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$
  85         can be written as
  86         \f[
  87             F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u)
  88         \f]
  89         and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function
  90         \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by
  91         \f[
  92             \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u).
  93         \f]
  94         It can be shown that \f$\widehat{F}(x)\f$ is a generalized
  95         Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$
  96         and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$,
  97         one obtains an estimator for the \f$\alpha\f$-quantile,
  98         \f[
  99             \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right],
 100         \f]
 101         and similarly an estimator for the (coherent) tail mean,
 102         \f[
 103             \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi},
 104         \f]
 105         cf. McNeil and Frey (2000).
 106
 107         Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the
 108         \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define
 109         the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are
 110         computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the
 111         correct result.
 112
 113         For further details, see
 114
 115         J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution,
 116         Technometrics, Volume 29, 1987, p. 339-349
 117
 118         A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series:
 119         an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300
 120
 121         @param quantile_probability
 122         @param pot_threshold_value
 123     */
 124     template<typename Sample, typename LeftRight>
 125     struct peaks_over_threshold_impl
 126       : accumulator_base
 127     {
 128         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
 129         // for boost::result_of
 130         typedef boost::tuple<float_type, float_type, float_type> result_type;
 131         // for left tail fitting, mirror the extreme values
 132         typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
 133
 134         template<typename Args>
 135         peaks_over_threshold_impl(Args const &args)
 136           : Nu_(0)
 137           , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
 138           , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
 139           , threshold_(sign::value * args[pot_threshold_value])
 140           , fit_parameters_(boost::make_tuple(0., 0., 0.))
 141           , is_dirty_(true)
 142         {
 143         }
 144
 145         template<typename Args>
 146         void operator ()(Args const &args)
 147         {
 148             this->is_dirty_ = true;
 149
 150             if (sign::value * args[sample] > this->threshold_)
 151             {
 152                 this->mu_ += args[sample];
 153                 this->sigma2_ += args[sample] * args[sample];
 154                 ++this->Nu_;
 155             }
 156         }
 157
 158         template<typename Args>
 159         result_type result(Args const &args) const
 160         {
 161             if (this->is_dirty_)
 162             {
 163                 this->is_dirty_ = false;
 164
 165                 std::size_t cnt = count(args);
 166
 167                 this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_);
 168                 this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_);
 169                 this->sigma2_ -= this->mu_ * this->mu_;
 170
 171                 float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt);
 172
 173                 float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
 174                 float_type xi_hat = 0.5 * ( 1. - tmp );
 175                 float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
 176                 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
 177                 float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
 178                 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
 179             }
 180
 181             return this->fit_parameters_;
 182         }
 183
 184     private:
 185         std::size_t Nu_;                     // number of samples larger than threshold
 186         mutable float_type mu_;              // mean of Nu_ largest samples
 187         mutable float_type sigma2_;          // variance of Nu_ largest samples
 188         float_type threshold_;
 189         mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
 190         mutable bool is_dirty_;
 191     };
 192
 193     ///////////////////////////////////////////////////////////////////////////////
 194     // peaks_over_threshold_prob_impl
 195     //  determines threshold from a given threshold probability using order statistics
 196     /**
 197         @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
 198
 199         @sa peaks_over_threshold_impl
 200
 201         @param quantile_probability
 202         @param pot_threshold_probability
 203     */
 204     template<typename Sample, typename LeftRight>
 205     struct peaks_over_threshold_prob_impl
 206       : accumulator_base
 207     {
 208         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
 209         // for boost::result_of
 210         typedef boost::tuple<float_type, float_type, float_type> result_type;
 211         // for left tail fitting, mirror the extreme values
 212         typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
 213
 214         template<typename Args>
 215         peaks_over_threshold_prob_impl(Args const &args)
 216           : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
 217           , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
 218           , threshold_probability_(args[pot_threshold_probability])
 219           , fit_parameters_(boost::make_tuple(0., 0., 0.))
 220           , is_dirty_(true)
 221         {
 222         }
 223
 224         void operator ()(dont_care)
 225         {
 226             this->is_dirty_ = true;
 227         }
 228
 229         template<typename Args>
 230         result_type result(Args const &args) const
 231         {
 232             if (this->is_dirty_)
 233             {
 234                 this->is_dirty_ = false;
 235
 236                 std::size_t cnt = count(args);
 237
 238                 // the n'th cached sample provides an approximate threshold value u
 239                 std::size_t n = static_cast<std::size_t>(
 240                     std::ceil(
 241                         cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ )
 242                     )
 243                 );
 244
 245                 // If n is in a valid range, return result, otherwise return NaN or throw exception
 246                 if ( n >= static_cast<std::size_t>(tail(args).size()))
 247                 {
 248                     if (std::numeric_limits<float_type>::has_quiet_NaN)
 249                     {
 250                         return boost::make_tuple(
 251                             std::numeric_limits<float_type>::quiet_NaN()
 252                           , std::numeric_limits<float_type>::quiet_NaN()
 253                           , std::numeric_limits<float_type>::quiet_NaN()
 254                         );
 255                     }
 256                     else
 257                     {
 258                         std::ostringstream msg;
 259                         msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
 260                         boost::throw_exception(std::runtime_error(msg.str()));
 261                         return boost::make_tuple(Sample(0), Sample(0), Sample(0));
 262                     }
 263                 }
 264                 else
 265                 {
 266                     float_type u = *(tail(args).begin() + n - 1) * sign::value;
 267
 268                     // compute mean and variance of samples above/under threshold value u
 269                     for (std::size_t i = 0; i < n; ++i)
 270                     {
 271                         mu_ += *(tail(args).begin() + i);
 272                         sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i));
 273                     }
 274
 275                     this->mu_ = sign::value * numeric::fdiv(this->mu_, n);
 276                     this->sigma2_ = numeric::fdiv(this->sigma2_, n);
 277                     this->sigma2_ -= this->mu_ * this->mu_;
 278
 279                     if (is_same<LeftRight, left>::value)
 280                         this->threshold_probability_ = 1. - this->threshold_probability_;
 281
 282                     float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
 283                     float_type xi_hat = 0.5 * ( 1. - tmp );
 284                     float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
 285                     float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
 286                     float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
 287                     this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
 288                 }
 289             }
 290
 291             return this->fit_parameters_;
 292         }
 293
 294     private:
 295         mutable float_type mu_;                     // mean of samples above threshold u
 296         mutable float_type sigma2_;                 // variance of samples above threshold u
 297         mutable float_type threshold_probability_;
 298         mutable result_type fit_parameters_;        // boost::tuple that stores fit parameters
 299         mutable bool is_dirty_;
 300     };
 301
 302 } // namespace impl
 303
 304 ///////////////////////////////////////////////////////////////////////////////
 305 // tag::peaks_over_threshold
 306 //
 307 namespace tag
 308 {
 309     template<typename LeftRight>
 310     struct peaks_over_threshold
 311       : depends_on<count>
 312       , pot_threshold_value
 313     {
 314         /// INTERNAL ONLY
 315         ///
 316         typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl;
 317     };
 318
 319     template<typename LeftRight>
 320     struct peaks_over_threshold_prob
 321       : depends_on<count, tail<LeftRight> >
 322       , pot_threshold_probability
 323     {
 324         /// INTERNAL ONLY
 325         ///
 326         typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl;
 327     };
 328
 329     struct abstract_peaks_over_threshold
 330       : depends_on<>
 331     {
 332     };
 333 }
 334
 335 ///////////////////////////////////////////////////////////////////////////////
 336 // extract::peaks_over_threshold
 337 //
 338 namespace extract
 339 {
 340     extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {};
 341
 342     BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold)
 343 }
 344
 345 using extract::peaks_over_threshold;
 346
 347 // peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight>
 348 template<typename LeftRight>
 349 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)>
 350 {
 351     typedef tag::peaks_over_threshold<LeftRight> type;
 352 };
 353
 354 // peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight>
 355 template<typename LeftRight>
 356 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)>
 357 {
 358     typedef tag::peaks_over_threshold_prob<LeftRight> type;
 359 };
 360
 361 template<typename LeftRight>
 362 struct feature_of<tag::peaks_over_threshold<LeftRight> >
 363   : feature_of<tag::abstract_peaks_over_threshold>
 364 {
 365 };
 366
 367 template<typename LeftRight>
 368 struct feature_of<tag::peaks_over_threshold_prob<LeftRight> >
 369   : feature_of<tag::abstract_peaks_over_threshold>
 370 {
 371 };
 372
 373 // So that peaks_over_threshold can be automatically substituted
 374 // with weighted_peaks_over_threshold when the weight parameter is non-void.
 375 template<typename LeftRight>
 376 struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> >
 377 {
 378     typedef tag::weighted_peaks_over_threshold<LeftRight> type;
 379 };
 380
 381 template<typename LeftRight>
 382 struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> >
 383   : feature_of<tag::peaks_over_threshold<LeftRight> >
 384 {};
 385
 386 // So that peaks_over_threshold_prob can be automatically substituted
 387 // with weighted_peaks_over_threshold_prob when the weight parameter is non-void.
 388 template<typename LeftRight>
 389 struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> >
 390 {
 391     typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
 392 };
 393
 394 template<typename LeftRight>
 395 struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> >
 396   : feature_of<tag::peaks_over_threshold_prob<LeftRight> >
 397 {};
 398
 399 }} // namespace boost::accumulators
 400
 401 #ifdef _MSC_VER
 402 # pragma warning(pop)
 403 #endif
 404
 405 #endif