]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/math/example/normal_misc_examples.cpp
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / boost / libs / math / example / normal_misc_examples.cpp
1 // normal_misc_examples.cpp
2
3 // Copyright Paul A. Bristow 2007, 2010.
4
5 // Use, modification and distribution are subject to the
6 // Boost Software License, Version 1.0.
7 // (See accompanying file LICENSE_1_0.txt
8 // or copy at http://www.boost.org/LICENSE_1_0.txt)
9
10 // Example of using normal distribution.
11
12 // Note that this file contains Quickbook mark-up as well as code
13 // and comments, don't change any of the special comment mark-ups!
14
15 //[normal_basic1
16 /*`
17 First we need some includes to access the normal distribution
18 (and some std output of course).
19 */
20
21 #include <boost/math/distributions/normal.hpp> // for normal_distribution
22 using boost::math::normal; // typedef provides default type is double.
23
24 #include <iostream>
25 using std::cout; using std::endl; using std::left; using std::showpoint; using std::noshowpoint;
26 #include <iomanip>
27 using std::setw; using std::setprecision;
28 #include <limits>
29 using std::numeric_limits;
30
31 int main()
32 {
33 cout << "Example: Normal distribution, Miscellaneous Applications.";
34
35 try
36 {
37 { // Traditional tables and values.
38 /*`Let's start by printing some traditional tables.
39 */
40 double step = 1.; // in z
41 double range = 4; // min and max z = -range to +range.
42 int precision = 17; // traditional tables are only computed to much lower precision.
43 // but std::numeric_limits<double>::max_digits10; on new Standard Libraries gives
44 // 17, the maximum number of digits that can possibly be significant.
45 // std::numeric_limits<double>::digits10; == 15 is number of guaranteed digits,
46 // the other two digits being 'noisy'.
47
48 // Construct a standard normal distribution s
49 normal s; // (default mean = zero, and standard deviation = unity)
50 cout << "Standard normal distribution, mean = "<< s.mean()
51 << ", standard deviation = " << s.standard_deviation() << endl;
52
53 /*` First the probability distribution function (pdf).
54 */
55 cout << "Probability distribution function values" << endl;
56 cout << " z " " pdf " << endl;
57 cout.precision(5);
58 for (double z = -range; z < range + step; z += step)
59 {
60 cout << left << setprecision(3) << setw(6) << z << " "
61 << setprecision(precision) << setw(12) << pdf(s, z) << endl;
62 }
63 cout.precision(6); // default
64 /*`And the area under the normal curve from -[infin] up to z,
65 the cumulative distribution function (cdf).
66 */
67 // For a standard normal distribution
68 cout << "Standard normal mean = "<< s.mean()
69 << ", standard deviation = " << s.standard_deviation() << endl;
70 cout << "Integral (area under the curve) from - infinity up to z " << endl;
71 cout << " z " " cdf " << endl;
72 for (double z = -range; z < range + step; z += step)
73 {
74 cout << left << setprecision(3) << setw(6) << z << " "
75 << setprecision(precision) << setw(12) << cdf(s, z) << endl;
76 }
77 cout.precision(6); // default
78
79 /*`And all this you can do with a nanoscopic amount of work compared to
80 the team of *human computers* toiling with Milton Abramovitz and Irene Stegen
81 at the US National Bureau of Standards (now [@http://www.nist.gov NIST]).
82 Starting in 1938, their "Handbook of Mathematical Functions with Formulas, Graphs and Mathematical Tables",
83 was eventually published in 1964, and has been reprinted numerous times since.
84 (A major replacement is planned at [@http://dlmf.nist.gov Digital Library of Mathematical Functions]).
85
86 Pretty-printing a traditional 2-dimensional table is left as an exercise for the student,
87 but why bother now that the Math Toolkit lets you write
88 */
89 double z = 2.;
90 cout << "Area for z = " << z << " is " << cdf(s, z) << endl; // to get the area for z.
91 /*`
92 Correspondingly, we can obtain the traditional 'critical' values for significance levels.
93 For the 95% confidence level, the significance level usually called alpha,
94 is 0.05 = 1 - 0.95 (for a one-sided test), so we can write
95 */
96 cout << "95% of area has a z below " << quantile(s, 0.95) << endl;
97 // 95% of area has a z below 1.64485
98 /*`and a two-sided test (a comparison between two levels, rather than a one-sided test)
99
100 */
101 cout << "95% of area has a z between " << quantile(s, 0.975)
102 << " and " << -quantile(s, 0.975) << endl;
103 // 95% of area has a z between 1.95996 and -1.95996
104 /*`
105
106 First, define a table of significance levels: these are the probabilities
107 that the true occurrence frequency lies outside the calculated interval.
108
109 It is convenient to have an alpha level for the probability that z lies outside just one standard deviation.
110 This will not be some nice neat number like 0.05, but we can easily calculate it,
111 */
112 double alpha1 = cdf(s, -1) * 2; // 0.3173105078629142
113 cout << setprecision(17) << "Significance level for z == 1 is " << alpha1 << endl;
114 /*`
115 and place in our array of favorite alpha values.
116 */
117 double alpha[] = {0.3173105078629142, // z for 1 standard deviation.
118 0.20, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
119 /*`
120
121 Confidence value as % is (1 - alpha) * 100 (so alpha 0.05 == 95% confidence)
122 that the true occurrence frequency lies *inside* the calculated interval.
123
124 */
125 cout << "level of significance (alpha)" << setprecision(4) << endl;
126 cout << "2-sided 1 -sided z(alpha) " << endl;
127 for (int i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
128 {
129 cout << setw(15) << alpha[i] << setw(15) << alpha[i] /2 << setw(10) << quantile(complement(s, alpha[i]/2)) << endl;
130 // Use quantile(complement(s, alpha[i]/2)) to avoid potential loss of accuracy from quantile(s, 1 - alpha[i]/2)
131 }
132 cout << endl;
133
134 /*`Notice the distinction between one-sided (also called one-tailed)
135 where we are using a > *or* < test (and not both)
136 and considering the area of the tail (integral) from z up to +[infin],
137 and a two-sided test where we are using two > *and* < tests, and thus considering two tails,
138 from -[infin] up to z low and z high up to +[infin].
139
140 So the 2-sided values alpha[i] are calculated using alpha[i]/2.
141
142 If we consider a simple example of alpha = 0.05, then for a two-sided test,
143 the lower tail area from -[infin] up to -1.96 is 0.025 (alpha/2)
144 and the upper tail area from +z up to +1.96 is also 0.025 (alpha/2),
145 and the area between -1.96 up to 12.96 is alpha = 0.95.
146 and the sum of the two tails is 0.025 + 0.025 = 0.05,
147
148 */
149 //] [/[normal_basic1]
150
151 //[normal_basic2
152
153 /*`Armed with the cumulative distribution function, we can easily calculate the
154 easy to remember proportion of values that lie within 1, 2 and 3 standard deviations from the mean.
155
156 */
157 cout.precision(3);
158 cout << showpoint << "cdf(s, s.standard_deviation()) = "
159 << cdf(s, s.standard_deviation()) << endl; // from -infinity to 1 sd
160 cout << "cdf(complement(s, s.standard_deviation())) = "
161 << cdf(complement(s, s.standard_deviation())) << endl;
162 cout << "Fraction 1 standard deviation within either side of mean is "
163 << 1 - cdf(complement(s, s.standard_deviation())) * 2 << endl;
164 cout << "Fraction 2 standard deviations within either side of mean is "
165 << 1 - cdf(complement(s, 2 * s.standard_deviation())) * 2 << endl;
166 cout << "Fraction 3 standard deviations within either side of mean is "
167 << 1 - cdf(complement(s, 3 * s.standard_deviation())) * 2 << endl;
168
169 /*`
170 To a useful precision, the 1, 2 & 3 percentages are 68, 95 and 99.7,
171 and these are worth memorising as useful 'rules of thumb', as, for example, in
172 [@http://en.wikipedia.org/wiki/Standard_deviation standard deviation]:
173
174 [pre
175 Fraction 1 standard deviation within either side of mean is 0.683
176 Fraction 2 standard deviations within either side of mean is 0.954
177 Fraction 3 standard deviations within either side of mean is 0.997
178 ]
179
180 We could of course get some really accurate values for these
181 [@http://en.wikipedia.org/wiki/Confidence_interval confidence intervals]
182 by using cout.precision(15);
183
184 [pre
185 Fraction 1 standard deviation within either side of mean is 0.682689492137086
186 Fraction 2 standard deviations within either side of mean is 0.954499736103642
187 Fraction 3 standard deviations within either side of mean is 0.997300203936740
188 ]
189
190 But before you get too excited about this impressive precision,
191 don't forget that the *confidence intervals of the standard deviation* are surprisingly wide,
192 especially if you have estimated the standard deviation from only a few measurements.
193 */
194 //] [/[normal_basic2]
195
196
197 //[normal_bulbs_example1
198 /*`
199 Examples from K. Krishnamoorthy, Handbook of Statistical Distributions with Applications,
200 ISBN 1 58488 635 8, page 125... implemented using the Math Toolkit library.
201
202 A few very simple examples are shown here:
203 */
204 // K. Krishnamoorthy, Handbook of Statistical Distributions with Applications,
205 // ISBN 1 58488 635 8, page 125, example 10.3.5
206 /*`Mean lifespan of 100 W bulbs is 1100 h with standard deviation of 100 h.
207 Assuming, perhaps with little evidence and much faith, that the distribution is normal,
208 we construct a normal distribution called /bulbs/ with these values:
209 */
210 double mean_life = 1100.;
211 double life_standard_deviation = 100.;
212 normal bulbs(mean_life, life_standard_deviation);
213 double expected_life = 1000.;
214
215 /*`The we can use the Cumulative distribution function to predict fractions
216 (or percentages, if * 100) that will last various lifetimes.
217 */
218 cout << "Fraction of bulbs that will last at best (<=) " // P(X <= 1000)
219 << expected_life << " is "<< cdf(bulbs, expected_life) << endl;
220 cout << "Fraction of bulbs that will last at least (>) " // P(X > 1000)
221 << expected_life << " is "<< cdf(complement(bulbs, expected_life)) << endl;
222 double min_life = 900;
223 double max_life = 1200;
224 cout << "Fraction of bulbs that will last between "
225 << min_life << " and " << max_life << " is "
226 << cdf(bulbs, max_life) // P(X <= 1200)
227 - cdf(bulbs, min_life) << endl; // P(X <= 900)
228 /*`
229 [note Real-life failures are often very ab-normal,
230 with a significant number that 'dead-on-arrival' or suffer failure very early in their life:
231 the lifetime of the survivors of 'early mortality' may be well described by the normal distribution.]
232 */
233 //] [/normal_bulbs_example1 Quickbook end]
234 }
235 {
236 // K. Krishnamoorthy, Handbook of Statistical Distributions with Applications,
237 // ISBN 1 58488 635 8, page 125, Example 10.3.6
238
239 //[normal_bulbs_example3
240 /*`Weekly demand for 5 lb sacks of onions at a store is normally distributed with mean 140 sacks and standard deviation 10.
241 */
242 double mean = 140.; // sacks per week.
243 double standard_deviation = 10;
244 normal sacks(mean, standard_deviation);
245
246 double stock = 160.; // per week.
247 cout << "Percentage of weeks overstocked "
248 << cdf(sacks, stock) * 100. << endl; // P(X <=160)
249 // Percentage of weeks overstocked 97.7
250
251 /*`So there will be lots of mouldy onions!
252 So we should be able to say what stock level will meet demand 95% of the weeks.
253 */
254 double stock_95 = quantile(sacks, 0.95);
255 cout << "Store should stock " << int(stock_95) << " sacks to meet 95% of demands." << endl;
256 /*`And it is easy to estimate how to meet 80% of demand, and waste even less.
257 */
258 double stock_80 = quantile(sacks, 0.80);
259 cout << "Store should stock " << int(stock_80) << " sacks to meet 8 out of 10 demands." << endl;
260 //] [/normal_bulbs_example3 Quickbook end]
261 }
262 { // K. Krishnamoorthy, Handbook of Statistical Distributions with Applications,
263 // ISBN 1 58488 635 8, page 125, Example 10.3.7
264
265 //[normal_bulbs_example4
266
267 /*`A machine is set to pack 3 kg of ground beef per pack.
268 Over a long period of time it is found that the average packed was 3 kg
269 with a standard deviation of 0.1 kg.
270 Assuming the packing is normally distributed,
271 we can find the fraction (or %) of packages that weigh more than 3.1 kg.
272 */
273
274 double mean = 3.; // kg
275 double standard_deviation = 0.1; // kg
276 normal packs(mean, standard_deviation);
277
278 double max_weight = 3.1; // kg
279 cout << "Percentage of packs > " << max_weight << " is "
280 << cdf(complement(packs, max_weight)) << endl; // P(X > 3.1)
281
282 double under_weight = 2.9;
283 cout <<"fraction of packs <= " << under_weight << " with a mean of " << mean
284 << " is " << cdf(complement(packs, under_weight)) << endl;
285 // fraction of packs <= 2.9 with a mean of 3 is 0.841345
286 // This is 0.84 - more than the target 0.95
287 // Want 95% to be over this weight, so what should we set the mean weight to be?
288 // KK StatCalc says:
289 double over_mean = 3.0664;
290 normal xpacks(over_mean, standard_deviation);
291 cout << "fraction of packs >= " << under_weight
292 << " with a mean of " << xpacks.mean()
293 << " is " << cdf(complement(xpacks, under_weight)) << endl;
294 // fraction of packs >= 2.9 with a mean of 3.06449 is 0.950005
295 double under_fraction = 0.05; // so 95% are above the minimum weight mean - sd = 2.9
296 double low_limit = standard_deviation;
297 double offset = mean - low_limit - quantile(packs, under_fraction);
298 double nominal_mean = mean + offset;
299
300 normal nominal_packs(nominal_mean, standard_deviation);
301 cout << "Setting the packer to " << nominal_mean << " will mean that "
302 << "fraction of packs >= " << under_weight
303 << " is " << cdf(complement(nominal_packs, under_weight)) << endl;
304
305 /*`
306 Setting the packer to 3.06449 will mean that fraction of packs >= 2.9 is 0.95.
307
308 Setting the packer to 3.13263 will mean that fraction of packs >= 2.9 is 0.99,
309 but will more than double the mean loss from 0.0644 to 0.133.
310
311 Alternatively, we could invest in a better (more precise) packer with a lower standard deviation.
312
313 To estimate how much better (how much smaller standard deviation) it would have to be,
314 we need to get the 5% quantile to be located at the under_weight limit, 2.9
315 */
316 double p = 0.05; // wanted p th quantile.
317 cout << "Quantile of " << p << " = " << quantile(packs, p)
318 << ", mean = " << packs.mean() << ", sd = " << packs.standard_deviation() << endl; //
319 /*`
320 Quantile of 0.05 = 2.83551, mean = 3, sd = 0.1
321
322 With the current packer (mean = 3, sd = 0.1), the 5% quantile is at 2.8551 kg,
323 a little below our target of 2.9 kg.
324 So we know that the standard deviation is going to have to be smaller.
325
326 Let's start by guessing that it (now 0.1) needs to be halved, to a standard deviation of 0.05
327 */
328 normal pack05(mean, 0.05);
329 cout << "Quantile of " << p << " = " << quantile(pack05, p)
330 << ", mean = " << pack05.mean() << ", sd = " << pack05.standard_deviation() << endl;
331
332 cout <<"Fraction of packs >= " << under_weight << " with a mean of " << mean
333 << " and standard deviation of " << pack05.standard_deviation()
334 << " is " << cdf(complement(pack05, under_weight)) << endl;
335 //
336 /*`
337 Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.05 is 0.9772
338
339 So 0.05 was quite a good guess, but we are a little over the 2.9 target,
340 so the standard deviation could be a tiny bit more. So we could do some
341 more guessing to get closer, say by increasing to 0.06
342 */
343
344 normal pack06(mean, 0.06);
345 cout << "Quantile of " << p << " = " << quantile(pack06, p)
346 << ", mean = " << pack06.mean() << ", sd = " << pack06.standard_deviation() << endl;
347
348 cout <<"Fraction of packs >= " << under_weight << " with a mean of " << mean
349 << " and standard deviation of " << pack06.standard_deviation()
350 << " is " << cdf(complement(pack06, under_weight)) << endl;
351 /*`
352 Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.06 is 0.9522
353
354 Now we are getting really close, but to do the job properly,
355 we could use root finding method, for example the tools provided, and used elsewhere,
356 in the Math Toolkit, see __root_finding_without_derivatives.
357
358 But in this normal distribution case, we could be even smarter and make a direct calculation.
359 */
360
361 normal s; // For standard normal distribution,
362 double sd = 0.1;
363 double x = 2.9; // Our required limit.
364 // then probability p = N((x - mean) / sd)
365 // So if we want to find the standard deviation that would be required to meet this limit,
366 // so that the p th quantile is located at x,
367 // in this case the 0.95 (95%) quantile at 2.9 kg pack weight, when the mean is 3 kg.
368
369 double prob = pdf(s, (x - mean) / sd);
370 double qp = quantile(s, 0.95);
371 cout << "prob = " << prob << ", quantile(p) " << qp << endl; // p = 0.241971, quantile(p) 1.64485
372 // Rearranging, we can directly calculate the required standard deviation:
373 double sd95 = std::abs((x - mean)) / qp;
374
375 cout << "If we want the "<< p << " th quantile to be located at "
376 << x << ", would need a standard deviation of " << sd95 << endl;
377
378 normal pack95(mean, sd95); // Distribution of the 'ideal better' packer.
379 cout <<"Fraction of packs >= " << under_weight << " with a mean of " << mean
380 << " and standard deviation of " << pack95.standard_deviation()
381 << " is " << cdf(complement(pack95, under_weight)) << endl;
382
383 // Fraction of packs >= 2.9 with a mean of 3 and standard deviation of 0.0608 is 0.95
384
385 /*`Notice that these two deceptively simple questions
386 (do we over-fill or measure better) are actually very common.
387 The weight of beef might be replaced by a measurement of more or less anything.
388 But the calculations rely on the accuracy of the standard deviation - something
389 that is almost always less good than we might wish,
390 especially if based on a few measurements.
391 */
392
393 //] [/normal_bulbs_example4 Quickbook end]
394 }
395
396 { // K. Krishnamoorthy, Handbook of Statistical Distributions with Applications,
397 // ISBN 1 58488 635 8, page 125, example 10.3.8
398 //[normal_bulbs_example5
399 /*`A bolt is usable if between 3.9 and 4.1 long.
400 From a large batch of bolts, a sample of 50 show a
401 mean length of 3.95 with standard deviation 0.1.
402 Assuming a normal distribution, what proportion is usable?
403 The true sample mean is unknown,
404 but we can use the sample mean and standard deviation to find approximate solutions.
405 */
406
407 normal bolts(3.95, 0.1);
408 double top = 4.1;
409 double bottom = 3.9;
410
411 cout << "Fraction long enough [ P(X <= " << top << ") ] is " << cdf(bolts, top) << endl;
412 cout << "Fraction too short [ P(X <= " << bottom << ") ] is " << cdf(bolts, bottom) << endl;
413 cout << "Fraction OK -between " << bottom << " and " << top
414 << "[ P(X <= " << top << ") - P(X<= " << bottom << " ) ] is "
415 << cdf(bolts, top) - cdf(bolts, bottom) << endl;
416
417 cout << "Fraction too long [ P(X > " << top << ") ] is "
418 << cdf(complement(bolts, top)) << endl;
419
420 cout << "95% of bolts are shorter than " << quantile(bolts, 0.95) << endl;
421
422 //] [/normal_bulbs_example5 Quickbook end]
423 }
424 }
425 catch(const std::exception& e)
426 { // Always useful to include try & catch blocks because default policies
427 // are to throw exceptions on arguments that cause errors like underflow, overflow.
428 // Lacking try & catch blocks, the program will abort without a message below,
429 // which may give some helpful clues as to the cause of the exception.
430 std::cout <<
431 "\n""Message from thrown exception was:\n " << e.what() << std::endl;
432 }
433 return 0;
434 } // int main()
435
436
437 /*
438
439 Output is:
440
441 Autorun "i:\boost-06-05-03-1300\libs\math\test\Math_test\debug\normal_misc_examples.exe"
442 Example: Normal distribution, Miscellaneous Applications.Standard normal distribution, mean = 0, standard deviation = 1
443 Probability distribution function values
444 z pdf
445 -4 0.00013383022576488537
446 -3 0.0044318484119380075
447 -2 0.053990966513188063
448 -1 0.24197072451914337
449 0 0.3989422804014327
450 1 0.24197072451914337
451 2 0.053990966513188063
452 3 0.0044318484119380075
453 4 0.00013383022576488537
454 Standard normal mean = 0, standard deviation = 1
455 Integral (area under the curve) from - infinity up to z
456 z cdf
457 -4 3.1671241833119979e-005
458 -3 0.0013498980316300959
459 -2 0.022750131948179219
460 -1 0.1586552539314571
461 0 0.5
462 1 0.84134474606854293
463 2 0.97724986805182079
464 3 0.9986501019683699
465 4 0.99996832875816688
466 Area for z = 2 is 0.97725
467 95% of area has a z below 1.64485
468 95% of area has a z between 1.95996 and -1.95996
469 Significance level for z == 1 is 0.3173105078629142
470 level of significance (alpha)
471 2-sided 1 -sided z(alpha)
472 0.3173 0.1587 1
473 0.2 0.1 1.282
474 0.1 0.05 1.645
475 0.05 0.025 1.96
476 0.01 0.005 2.576
477 0.001 0.0005 3.291
478 0.0001 5e-005 3.891
479 1e-005 5e-006 4.417
480 cdf(s, s.standard_deviation()) = 0.841
481 cdf(complement(s, s.standard_deviation())) = 0.159
482 Fraction 1 standard deviation within either side of mean is 0.683
483 Fraction 2 standard deviations within either side of mean is 0.954
484 Fraction 3 standard deviations within either side of mean is 0.997
485 Fraction of bulbs that will last at best (<=) 1.00e+003 is 0.159
486 Fraction of bulbs that will last at least (>) 1.00e+003 is 0.841
487 Fraction of bulbs that will last between 900. and 1.20e+003 is 0.819
488 Percentage of weeks overstocked 97.7
489 Store should stock 156 sacks to meet 95% of demands.
490 Store should stock 148 sacks to meet 8 out of 10 demands.
491 Percentage of packs > 3.10 is 0.159
492 fraction of packs <= 2.90 with a mean of 3.00 is 0.841
493 fraction of packs >= 2.90 with a mean of 3.07 is 0.952
494 Setting the packer to 3.06 will mean that fraction of packs >= 2.90 is 0.950
495 Quantile of 0.0500 = 2.84, mean = 3.00, sd = 0.100
496 Quantile of 0.0500 = 2.92, mean = 3.00, sd = 0.0500
497 Fraction of packs >= 2.90 with a mean of 3.00 and standard deviation of 0.0500 is 0.977
498 Quantile of 0.0500 = 2.90, mean = 3.00, sd = 0.0600
499 Fraction of packs >= 2.90 with a mean of 3.00 and standard deviation of 0.0600 is 0.952
500 prob = 0.242, quantile(p) 1.64
501 If we want the 0.0500 th quantile to be located at 2.90, would need a standard deviation of 0.0608
502 Fraction of packs >= 2.90 with a mean of 3.00 and standard deviation of 0.0608 is 0.950
503 Fraction long enough [ P(X <= 4.10) ] is 0.933
504 Fraction too short [ P(X <= 3.90) ] is 0.309
505 Fraction OK -between 3.90 and 4.10[ P(X <= 4.10) - P(X<= 3.90 ) ] is 0.625
506 Fraction too long [ P(X > 4.10) ] is 0.0668
507 95% of bolts are shorter than 4.11
508
509 */