ceph/src/boost/boost/accumulators/statistics/peaks_over_threshold.hpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // peaks_over_threshold.hpp
   3 //
   4 //  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
   5 //  Software License, Version 1.0. (See accompanying file
   6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   7
   8 #ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
   9 #define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
  10
  11 #include <vector>
  12 #include <limits>
  13 #include <numeric>
  14 #include <functional>
  15 #include <boost/config/no_tr1/cmath.hpp> // pow
  16 #include <sstream> // stringstream
  17 #include <stdexcept> // runtime_error
  18 #include <boost/throw_exception.hpp>
  19 #include <boost/range.hpp>
  20 #include <boost/mpl/if.hpp>
  21 #include <boost/mpl/int.hpp>
  22 #include <boost/mpl/placeholders.hpp>
  23 #include <boost/parameter/keyword.hpp>
  24 #include <boost/tuple/tuple.hpp>
  25 #include <boost/accumulators/accumulators_fwd.hpp>
  26 #include <boost/accumulators/framework/accumulator_base.hpp>
  27 #include <boost/accumulators/framework/extractor.hpp>
  28 #include <boost/accumulators/numeric/functional.hpp>
  29 #include <boost/accumulators/framework/parameters/sample.hpp>
  30 #include <boost/accumulators/framework/depends_on.hpp>
  31 #include <boost/accumulators/statistics_fwd.hpp>
  32 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
  33 #include <boost/accumulators/statistics/count.hpp>
  34 #include <boost/accumulators/statistics/tail.hpp>
  35
  36 #ifdef _MSC_VER
  37 # pragma warning(push)
  38 # pragma warning(disable: 4127) // conditional expression is constant
  39 #endif
  40
  41 namespace boost { namespace accumulators
  42 {
  43
  44 ///////////////////////////////////////////////////////////////////////////////
  45 // threshold_probability and threshold named parameters
  46 //
  47 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value)
  48 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability)
  49
  50 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value)
  51 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability)
  52
  53 namespace impl
  54 {
  55     ///////////////////////////////////////////////////////////////////////////////
  56     // peaks_over_threshold_impl
  57     //  works with an explicit threshold value and does not depend on order statistics
  58     /**
  59         @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
  60
  61         According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of
  62         the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$
  63         may be approximated by a generalized Pareto distribution
  64         \f[
  65             G_{\xi,\beta}(x) =
  66             \left\{
  67             \begin{array}{ll}
  68                 \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\
  69                 \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0,
  70             \end{array}
  71             \right.
  72         \f]
  73         with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf.
  74         Hosking and Wallis (1987),
  75         \f[
  76             \begin{array}{lll}
  77             \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\
  78             \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right],
  79             \end{array}
  80         \f]
  81         \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over
  82         the threshold \f$u\f$. Equivalently, the distribution function
  83         \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by
  84         \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$
  85         can be written as
  86         \f[
  87             F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u)
  88         \f]
  89         and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function
  90         \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by
  91         \f[
  92             \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u).
  93         \f]
  94         It can be shown that \f$\widehat{F}(x)\f$ is a generalized
  95         Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$
  96         and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$,
  97         one obtains an estimator for the \f$\alpha\f$-quantile,
  98         \f[
  99             \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right],
 100         \f]
 101         and similarly an estimator for the (coherent) tail mean,
 102         \f[
 103             \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi},
 104         \f]
 105         cf. McNeil and Frey (2000).
 106
 107         Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the
 108         \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define
 109         the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are
 110         computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the
 111         correct result.
 112
 113         For further details, see
 114
 115         J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution,
 116         Technometrics, Volume 29, 1987, p. 339-349
 117
 118         A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series:
 119         an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300
 120
 121         @param quantile_probability
 122         @param pot_threshold_value
 123     */
 124     template<typename Sample, typename LeftRight>
 125     struct peaks_over_threshold_impl
 126       : accumulator_base
 127     {
 128         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
 129         // for boost::result_of
 130         typedef boost::tuple<float_type, float_type, float_type> result_type;
 131         // for left tail fitting, mirror the extreme values
 132         typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
 133
 134         template<typename Args>
 135         peaks_over_threshold_impl(Args const &args)
 136           : Nu_(0)
 137           , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
 138           , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
 139           , threshold_(sign::value * args[pot_threshold_value])
 140           , fit_parameters_(boost::make_tuple(0., 0., 0.))
 141           , is_dirty_(true)
 142         {
 143         }
 144
 145         template<typename Args>
 146         void operator ()(Args const &args)
 147         {
 148             this->is_dirty_ = true;
 149
 150             if (sign::value * args[sample] > this->threshold_)
 151             {
 152                 this->mu_ += args[sample];
 153                 this->sigma2_ += args[sample] * args[sample];
 154                 ++this->Nu_;
 155             }
 156         }
 157
 158         template<typename Args>
 159         result_type result(Args const &args) const
 160         {
 161             if (this->is_dirty_)
 162             {
 163                 this->is_dirty_ = false;
 164
 165                 std::size_t cnt = count(args);
 166
 167                 this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_);
 168                 this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_);
 169                 this->sigma2_ -= this->mu_ * this->mu_;
 170
 171                 float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt);
 172
 173                 float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
 174                 float_type xi_hat = 0.5 * ( 1. - tmp );
 175                 float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
 176                 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
 177                 float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
 178                 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
 179             }
 180
 181             return this->fit_parameters_;
 182         }
 183
 184         // make this accumulator serializeable
 185         // TODO: do we need to split to load/save and verify that threshold did not change?
 186         template<class Archive>
 187         void serialize(Archive & ar, const unsigned int file_version)
 188         {
 189             ar & Nu_;
 190             ar & mu_;
 191             ar & sigma2_;
 192             ar & threshold_;
 193             ar & get<0>(fit_parameters_);
 194             ar & get<1>(fit_parameters_);
 195             ar & get<2>(fit_parameters_);
 196             ar & is_dirty_;
 197         }
 198
 199     private:
 200         std::size_t Nu_;                     // number of samples larger than threshold
 201         mutable float_type mu_;              // mean of Nu_ largest samples
 202         mutable float_type sigma2_;          // variance of Nu_ largest samples
 203         float_type threshold_;
 204         mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
 205         mutable bool is_dirty_;
 206     };
 207
 208     ///////////////////////////////////////////////////////////////////////////////
 209     // peaks_over_threshold_prob_impl
 210     //  determines threshold from a given threshold probability using order statistics
 211     /**
 212         @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
 213
 214         @sa peaks_over_threshold_impl
 215
 216         @param quantile_probability
 217         @param pot_threshold_probability
 218     */
 219     template<typename Sample, typename LeftRight>
 220     struct peaks_over_threshold_prob_impl
 221       : accumulator_base
 222     {
 223         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
 224         // for boost::result_of
 225         typedef boost::tuple<float_type, float_type, float_type> result_type;
 226         // for left tail fitting, mirror the extreme values
 227         typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
 228
 229         template<typename Args>
 230         peaks_over_threshold_prob_impl(Args const &args)
 231           : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
 232           , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
 233           , threshold_probability_(args[pot_threshold_probability])
 234           , fit_parameters_(boost::make_tuple(0., 0., 0.))
 235           , is_dirty_(true)
 236         {
 237         }
 238
 239         void operator ()(dont_care)
 240         {
 241             this->is_dirty_ = true;
 242         }
 243
 244         template<typename Args>
 245         result_type result(Args const &args) const
 246         {
 247             if (this->is_dirty_)
 248             {
 249                 this->is_dirty_ = false;
 250
 251                 std::size_t cnt = count(args);
 252
 253                 // the n'th cached sample provides an approximate threshold value u
 254                 std::size_t n = static_cast<std::size_t>(
 255                     std::ceil(
 256                         cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ )
 257                     )
 258                 );
 259
 260                 // If n is in a valid range, return result, otherwise return NaN or throw exception
 261                 if ( n >= static_cast<std::size_t>(tail(args).size()))
 262                 {
 263                     if (std::numeric_limits<float_type>::has_quiet_NaN)
 264                     {
 265                         return boost::make_tuple(
 266                             std::numeric_limits<float_type>::quiet_NaN()
 267                           , std::numeric_limits<float_type>::quiet_NaN()
 268                           , std::numeric_limits<float_type>::quiet_NaN()
 269                         );
 270                     }
 271                     else
 272                     {
 273                         std::ostringstream msg;
 274                         msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
 275                         boost::throw_exception(std::runtime_error(msg.str()));
 276                         return boost::make_tuple(Sample(0), Sample(0), Sample(0));
 277                     }
 278                 }
 279                 else
 280                 {
 281                     float_type u = *(tail(args).begin() + n - 1) * sign::value;
 282
 283                     // compute mean and variance of samples above/under threshold value u
 284                     for (std::size_t i = 0; i < n; ++i)
 285                     {
 286                         mu_ += *(tail(args).begin() + i);
 287                         sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i));
 288                     }
 289
 290                     this->mu_ = sign::value * numeric::fdiv(this->mu_, n);
 291                     this->sigma2_ = numeric::fdiv(this->sigma2_, n);
 292                     this->sigma2_ -= this->mu_ * this->mu_;
 293
 294                     if (is_same<LeftRight, left>::value)
 295                         this->threshold_probability_ = 1. - this->threshold_probability_;
 296
 297                     float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
 298                     float_type xi_hat = 0.5 * ( 1. - tmp );
 299                     float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
 300                     float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
 301                     float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
 302                     this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
 303                 }
 304             }
 305
 306             return this->fit_parameters_;
 307         }
 308
 309         // make this accumulator serializeable
 310         // TODO: do we need to split to load/save and verify that threshold did not change?
 311         template<class Archive>
 312         void serialize(Archive & ar, const unsigned int file_version)
 313         {
 314             ar & mu_;
 315             ar & sigma2_;
 316             ar & threshold_probability_;
 317             ar & get<0>(fit_parameters_);
 318             ar & get<1>(fit_parameters_);
 319             ar & get<2>(fit_parameters_);
 320             ar & is_dirty_;
 321         }
 322
 323     private:
 324         mutable float_type mu_;                     // mean of samples above threshold u
 325         mutable float_type sigma2_;                 // variance of samples above threshold u
 326         mutable float_type threshold_probability_;
 327         mutable result_type fit_parameters_;        // boost::tuple that stores fit parameters
 328         mutable bool is_dirty_;
 329     };
 330
 331 } // namespace impl
 332
 333 ///////////////////////////////////////////////////////////////////////////////
 334 // tag::peaks_over_threshold
 335 //
 336 namespace tag
 337 {
 338     template<typename LeftRight>
 339     struct peaks_over_threshold
 340       : depends_on<count>
 341       , pot_threshold_value
 342     {
 343         /// INTERNAL ONLY
 344         ///
 345         typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl;
 346     };
 347
 348     template<typename LeftRight>
 349     struct peaks_over_threshold_prob
 350       : depends_on<count, tail<LeftRight> >
 351       , pot_threshold_probability
 352     {
 353         /// INTERNAL ONLY
 354         ///
 355         typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl;
 356     };
 357
 358     struct abstract_peaks_over_threshold
 359       : depends_on<>
 360     {
 361     };
 362 }
 363
 364 ///////////////////////////////////////////////////////////////////////////////
 365 // extract::peaks_over_threshold
 366 //
 367 namespace extract
 368 {
 369     extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {};
 370
 371     BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold)
 372 }
 373
 374 using extract::peaks_over_threshold;
 375
 376 // peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight>
 377 template<typename LeftRight>
 378 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)>
 379 {
 380     typedef tag::peaks_over_threshold<LeftRight> type;
 381 };
 382
 383 // peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight>
 384 template<typename LeftRight>
 385 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)>
 386 {
 387     typedef tag::peaks_over_threshold_prob<LeftRight> type;
 388 };
 389
 390 template<typename LeftRight>
 391 struct feature_of<tag::peaks_over_threshold<LeftRight> >
 392   : feature_of<tag::abstract_peaks_over_threshold>
 393 {
 394 };
 395
 396 template<typename LeftRight>
 397 struct feature_of<tag::peaks_over_threshold_prob<LeftRight> >
 398   : feature_of<tag::abstract_peaks_over_threshold>
 399 {
 400 };
 401
 402 // So that peaks_over_threshold can be automatically substituted
 403 // with weighted_peaks_over_threshold when the weight parameter is non-void.
 404 template<typename LeftRight>
 405 struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> >
 406 {
 407     typedef tag::weighted_peaks_over_threshold<LeftRight> type;
 408 };
 409
 410 template<typename LeftRight>
 411 struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> >
 412   : feature_of<tag::peaks_over_threshold<LeftRight> >
 413 {};
 414
 415 // So that peaks_over_threshold_prob can be automatically substituted
 416 // with weighted_peaks_over_threshold_prob when the weight parameter is non-void.
 417 template<typename LeftRight>
 418 struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> >
 419 {
 420     typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
 421 };
 422
 423 template<typename LeftRight>
 424 struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> >
 425   : feature_of<tag::peaks_over_threshold_prob<LeftRight> >
 426 {};
 427
 428 }} // namespace boost::accumulators
 429
 430 #ifdef _MSC_VER
 431 # pragma warning(pop)
 432 #endif
 433
 434 #endif