1 ///////////////////////////////////////////////////////////////////////////////
2 // peaks_over_threshold.hpp
4 // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
5 // Software License, Version 1.0. (See accompanying file
6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
9 #define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
15 #include <boost/config/no_tr1/cmath.hpp> // pow
16 #include <sstream> // stringstream
17 #include <stdexcept> // runtime_error
18 #include <boost/throw_exception.hpp>
19 #include <boost/range.hpp>
20 #include <boost/mpl/if.hpp>
21 #include <boost/mpl/int.hpp>
22 #include <boost/mpl/placeholders.hpp>
23 #include <boost/parameter/keyword.hpp>
24 #include <boost/tuple/tuple.hpp>
25 #include <boost/accumulators/accumulators_fwd.hpp>
26 #include <boost/accumulators/framework/accumulator_base.hpp>
27 #include <boost/accumulators/framework/extractor.hpp>
28 #include <boost/accumulators/numeric/functional.hpp>
29 #include <boost/accumulators/framework/parameters/sample.hpp>
30 #include <boost/accumulators/framework/depends_on.hpp>
31 #include <boost/accumulators/statistics_fwd.hpp>
32 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
33 #include <boost/accumulators/statistics/count.hpp>
34 #include <boost/accumulators/statistics/tail.hpp>
37 # pragma warning(push)
38 # pragma warning(disable: 4127) // conditional expression is constant
41 namespace boost { namespace accumulators
44 ///////////////////////////////////////////////////////////////////////////////
45 // threshold_probability and threshold named parameters
47 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value)
48 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability)
50 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value)
51 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability)
55 ///////////////////////////////////////////////////////////////////////////////
56 // peaks_over_threshold_impl
57 // works with an explicit threshold value and does not depend on order statistics
59 @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
61 According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of
62 the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$
63 may be approximated by a generalized Pareto distribution
68 \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\
69 \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0,
73 with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf.
74 Hosking and Wallis (1987),
77 \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\
78 \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right],
81 \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over
82 the threshold \f$u\f$. Equivalently, the distribution function
83 \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by
84 \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$
87 F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u)
89 and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function
90 \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by
92 \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u).
94 It can be shown that \f$\widehat{F}(x)\f$ is a generalized
95 Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$
96 and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$,
97 one obtains an estimator for the \f$\alpha\f$-quantile,
99 \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right],
101 and similarly an estimator for the (coherent) tail mean,
103 \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi},
105 cf. McNeil and Frey (2000).
107 Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the
108 \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define
109 the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are
110 computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the
113 For further details, see
115 J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution,
116 Technometrics, Volume 29, 1987, p. 339-349
118 A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series:
119 an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300
121 @param quantile_probability
122 @param pot_threshold_value
124 template<typename Sample, typename LeftRight>
125 struct peaks_over_threshold_impl
128 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
129 // for boost::result_of
130 typedef boost::tuple<float_type, float_type, float_type> result_type;
131 // for left tail fitting, mirror the extreme values
132 typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
134 template<typename Args>
135 peaks_over_threshold_impl(Args const &args)
137 , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
138 , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
139 , threshold_(sign::value * args[pot_threshold_value])
140 , fit_parameters_(boost::make_tuple(0., 0., 0.))
145 template<typename Args>
146 void operator ()(Args const &args)
148 this->is_dirty_ = true;
150 if (sign::value * args[sample] > this->threshold_)
152 this->mu_ += args[sample];
153 this->sigma2_ += args[sample] * args[sample];
158 template<typename Args>
159 result_type result(Args const &args) const
163 this->is_dirty_ = false;
165 std::size_t cnt = count(args);
167 this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_);
168 this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_);
169 this->sigma2_ -= this->mu_ * this->mu_;
171 float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt);
173 float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
174 float_type xi_hat = 0.5 * ( 1. - tmp );
175 float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
176 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
177 float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
178 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
181 return this->fit_parameters_;
184 // make this accumulator serializeable
185 // TODO: do we need to split to load/save and verify that threshold did not change?
186 template<class Archive>
187 void serialize(Archive & ar, const unsigned int file_version)
193 ar & get<0>(fit_parameters_);
194 ar & get<1>(fit_parameters_);
195 ar & get<2>(fit_parameters_);
200 std::size_t Nu_; // number of samples larger than threshold
201 mutable float_type mu_; // mean of Nu_ largest samples
202 mutable float_type sigma2_; // variance of Nu_ largest samples
203 float_type threshold_;
204 mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
205 mutable bool is_dirty_;
208 ///////////////////////////////////////////////////////////////////////////////
209 // peaks_over_threshold_prob_impl
210 // determines threshold from a given threshold probability using order statistics
212 @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
214 @sa peaks_over_threshold_impl
216 @param quantile_probability
217 @param pot_threshold_probability
219 template<typename Sample, typename LeftRight>
220 struct peaks_over_threshold_prob_impl
223 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
224 // for boost::result_of
225 typedef boost::tuple<float_type, float_type, float_type> result_type;
226 // for left tail fitting, mirror the extreme values
227 typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign;
229 template<typename Args>
230 peaks_over_threshold_prob_impl(Args const &args)
231 : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
232 , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
233 , threshold_probability_(args[pot_threshold_probability])
234 , fit_parameters_(boost::make_tuple(0., 0., 0.))
239 void operator ()(dont_care)
241 this->is_dirty_ = true;
244 template<typename Args>
245 result_type result(Args const &args) const
249 this->is_dirty_ = false;
251 std::size_t cnt = count(args);
253 // the n'th cached sample provides an approximate threshold value u
254 std::size_t n = static_cast<std::size_t>(
256 cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ )
260 // If n is in a valid range, return result, otherwise return NaN or throw exception
261 if ( n >= static_cast<std::size_t>(tail(args).size()))
263 if (std::numeric_limits<float_type>::has_quiet_NaN)
265 return boost::make_tuple(
266 std::numeric_limits<float_type>::quiet_NaN()
267 , std::numeric_limits<float_type>::quiet_NaN()
268 , std::numeric_limits<float_type>::quiet_NaN()
273 std::ostringstream msg;
274 msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
275 boost::throw_exception(std::runtime_error(msg.str()));
276 return boost::make_tuple(Sample(0), Sample(0), Sample(0));
281 float_type u = *(tail(args).begin() + n - 1) * sign::value;
283 // compute mean and variance of samples above/under threshold value u
284 for (std::size_t i = 0; i < n; ++i)
286 mu_ += *(tail(args).begin() + i);
287 sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i));
290 this->mu_ = sign::value * numeric::fdiv(this->mu_, n);
291 this->sigma2_ = numeric::fdiv(this->sigma2_, n);
292 this->sigma2_ -= this->mu_ * this->mu_;
294 if (is_same<LeftRight, left>::value)
295 this->threshold_probability_ = 1. - this->threshold_probability_;
297 float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
298 float_type xi_hat = 0.5 * ( 1. - tmp );
299 float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
300 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
301 float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
302 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
306 return this->fit_parameters_;
309 // make this accumulator serializeable
310 // TODO: do we need to split to load/save and verify that threshold did not change?
311 template<class Archive>
312 void serialize(Archive & ar, const unsigned int file_version)
316 ar & threshold_probability_;
317 ar & get<0>(fit_parameters_);
318 ar & get<1>(fit_parameters_);
319 ar & get<2>(fit_parameters_);
324 mutable float_type mu_; // mean of samples above threshold u
325 mutable float_type sigma2_; // variance of samples above threshold u
326 mutable float_type threshold_probability_;
327 mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
328 mutable bool is_dirty_;
333 ///////////////////////////////////////////////////////////////////////////////
334 // tag::peaks_over_threshold
338 template<typename LeftRight>
339 struct peaks_over_threshold
341 , pot_threshold_value
345 typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl;
348 template<typename LeftRight>
349 struct peaks_over_threshold_prob
350 : depends_on<count, tail<LeftRight> >
351 , pot_threshold_probability
355 typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl;
358 struct abstract_peaks_over_threshold
364 ///////////////////////////////////////////////////////////////////////////////
365 // extract::peaks_over_threshold
369 extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {};
371 BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold)
374 using extract::peaks_over_threshold;
376 // peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight>
377 template<typename LeftRight>
378 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)>
380 typedef tag::peaks_over_threshold<LeftRight> type;
383 // peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight>
384 template<typename LeftRight>
385 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)>
387 typedef tag::peaks_over_threshold_prob<LeftRight> type;
390 template<typename LeftRight>
391 struct feature_of<tag::peaks_over_threshold<LeftRight> >
392 : feature_of<tag::abstract_peaks_over_threshold>
396 template<typename LeftRight>
397 struct feature_of<tag::peaks_over_threshold_prob<LeftRight> >
398 : feature_of<tag::abstract_peaks_over_threshold>
402 // So that peaks_over_threshold can be automatically substituted
403 // with weighted_peaks_over_threshold when the weight parameter is non-void.
404 template<typename LeftRight>
405 struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> >
407 typedef tag::weighted_peaks_over_threshold<LeftRight> type;
410 template<typename LeftRight>
411 struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> >
412 : feature_of<tag::peaks_over_threshold<LeftRight> >
415 // So that peaks_over_threshold_prob can be automatically substituted
416 // with weighted_peaks_over_threshold_prob when the weight parameter is non-void.
417 template<typename LeftRight>
418 struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> >
420 typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
423 template<typename LeftRight>
424 struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> >
425 : feature_of<tag::peaks_over_threshold_prob<LeftRight> >
428 }} // namespace boost::accumulators
431 # pragma warning(pop)