]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright John Maddock 2006 |
2 | // Copyright Paul A. Bristow 2007, 2010 | |
3 | ||
4 | // Use, modification and distribution are subject to the | |
5 | // Boost Software License, Version 1.0. | |
6 | // (See accompanying file LICENSE_1_0.txt | |
7 | // or copy at http://www.boost.org/LICENSE_1_0.txt) | |
8 | ||
9 | #ifdef _MSC_VER | |
10 | # pragma warning(disable: 4512) // assignment operator could not be generated. | |
11 | # pragma warning(disable: 4510) // default constructor could not be generated. | |
12 | # pragma warning(disable: 4610) // can never be instantiated - user defined constructor required. | |
13 | #endif | |
14 | ||
15 | #include <boost/math/distributions/students_t.hpp> | |
16 | ||
17 | // avoid "using namespace std;" and "using namespace boost::math;" | |
18 | // to avoid potential ambiguity with names in std random. | |
19 | #include <iostream> | |
20 | using std::cout; using std::endl; | |
21 | using std::left; using std::fixed; using std::right; using std::scientific; | |
22 | #include <iomanip> | |
23 | using std::setw; | |
24 | using std::setprecision; | |
25 | ||
26 | void confidence_limits_on_mean(double Sm, double Sd, unsigned Sn) | |
27 | { | |
28 | // | |
29 | // Sm = Sample Mean. | |
30 | // Sd = Sample Standard Deviation. | |
31 | // Sn = Sample Size. | |
32 | // | |
33 | // Calculate confidence intervals for the mean. | |
34 | // For example if we set the confidence limit to | |
35 | // 0.95, we know that if we repeat the sampling | |
36 | // 100 times, then we expect that the true mean | |
37 | // will be between out limits on 95 occations. | |
38 | // Note: this is not the same as saying a 95% | |
39 | // confidence interval means that there is a 95% | |
40 | // probability that the interval contains the true mean. | |
41 | // The interval computed from a given sample either | |
42 | // contains the true mean or it does not. | |
43 | // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm | |
44 | ||
45 | using boost::math::students_t; | |
46 | ||
47 | // Print out general info: | |
48 | cout << | |
49 | "__________________________________\n" | |
50 | "2-Sided Confidence Limits For Mean\n" | |
51 | "__________________________________\n\n"; | |
52 | cout << setprecision(7); | |
53 | cout << setw(40) << left << "Number of Observations" << "= " << Sn << "\n"; | |
54 | cout << setw(40) << left << "Mean" << "= " << Sm << "\n"; | |
55 | cout << setw(40) << left << "Standard Deviation" << "= " << Sd << "\n"; | |
56 | // | |
57 | // Define a table of significance/risk levels: | |
58 | // | |
59 | double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 }; | |
60 | // | |
61 | // Start by declaring the distribution we'll need: | |
62 | // | |
63 | students_t dist(Sn - 1); | |
64 | // | |
65 | // Print table header: | |
66 | // | |
67 | cout << "\n\n" | |
68 | "_______________________________________________________________\n" | |
69 | "Confidence T Interval Lower Upper\n" | |
70 | " Value (%) Value Width Limit Limit\n" | |
71 | "_______________________________________________________________\n"; | |
72 | // | |
73 | // Now print out the data for the table rows. | |
74 | // | |
75 | for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i) | |
76 | { | |
77 | // Confidence value: | |
78 | cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]); | |
79 | // calculate T: | |
80 | double T = quantile(complement(dist, alpha[i] / 2)); | |
81 | // Print T: | |
82 | cout << fixed << setprecision(3) << setw(10) << right << T; | |
83 | // Calculate width of interval (one sided): | |
84 | double w = T * Sd / sqrt(double(Sn)); | |
85 | // Print width: | |
86 | if(w < 0.01) | |
87 | cout << scientific << setprecision(3) << setw(17) << right << w; | |
88 | else | |
89 | cout << fixed << setprecision(3) << setw(17) << right << w; | |
90 | // Print Limits: | |
91 | cout << fixed << setprecision(5) << setw(15) << right << Sm - w; | |
92 | cout << fixed << setprecision(5) << setw(15) << right << Sm + w << endl; | |
93 | } | |
94 | cout << endl; | |
95 | } // void confidence_limits_on_mean | |
96 | ||
97 | void single_sample_t_test(double M, double Sm, double Sd, unsigned Sn, double alpha) | |
98 | { | |
99 | // | |
100 | // M = true mean. | |
101 | // Sm = Sample Mean. | |
102 | // Sd = Sample Standard Deviation. | |
103 | // Sn = Sample Size. | |
104 | // alpha = Significance Level. | |
105 | // | |
106 | // A Students t test applied to a single set of data. | |
107 | // We are testing the null hypothesis that the true | |
108 | // mean of the sample is M, and that any variation is down | |
109 | // to chance. We can also test the alternative hypothesis | |
110 | // that any difference is not down to chance. | |
111 | // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm | |
112 | ||
113 | using boost::math::students_t; | |
114 | ||
115 | // Print header: | |
116 | cout << | |
117 | "__________________________________\n" | |
118 | "Student t test for a single sample\n" | |
119 | "__________________________________\n\n"; | |
120 | cout << setprecision(5); | |
121 | cout << setw(55) << left << "Number of Observations" << "= " << Sn << "\n"; | |
122 | cout << setw(55) << left << "Sample Mean" << "= " << Sm << "\n"; | |
123 | cout << setw(55) << left << "Sample Standard Deviation" << "= " << Sd << "\n"; | |
124 | cout << setw(55) << left << "Expected True Mean" << "= " << M << "\n\n"; | |
125 | // | |
126 | // Now we can calculate and output some stats: | |
127 | // | |
128 | // Difference in means: | |
129 | double diff = Sm - M; | |
130 | cout << setw(55) << left << "Sample Mean - Expected Test Mean" << "= " << diff << "\n"; | |
131 | // Degrees of freedom: | |
132 | unsigned v = Sn - 1; | |
133 | cout << setw(55) << left << "Degrees of Freedom" << "= " << v << "\n"; | |
134 | // t-statistic: | |
135 | double t_stat = diff * sqrt(double(Sn)) / Sd; | |
136 | cout << setw(55) << left << "T Statistic" << "= " << t_stat << "\n"; | |
137 | // | |
138 | // Finally define our distribution, and get the probability: | |
139 | // | |
140 | students_t dist(v); | |
141 | double q = cdf(complement(dist, fabs(t_stat))); | |
142 | cout << setw(55) << left << "Probability that difference is due to chance" << "= " | |
143 | << setprecision(3) << scientific << 2 * q << "\n\n"; | |
144 | // | |
145 | // Finally print out results of alternative hypothesis: | |
146 | // | |
147 | cout << setw(55) << left << | |
148 | "Results for Alternative Hypothesis and alpha" << "= " | |
149 | << setprecision(4) << fixed << alpha << "\n\n"; | |
150 | cout << "Alternative Hypothesis Conclusion\n"; | |
151 | cout << "Mean != " << setprecision(3) << fixed << M << " "; | |
152 | if(q < alpha / 2) | |
153 | cout << "NOT REJECTED\n"; | |
154 | else | |
155 | cout << "REJECTED\n"; | |
156 | cout << "Mean < " << setprecision(3) << fixed << M << " "; | |
157 | if(cdf(complement(dist, t_stat)) > alpha) | |
158 | cout << "NOT REJECTED\n"; | |
159 | else | |
160 | cout << "REJECTED\n"; | |
161 | cout << "Mean > " << setprecision(3) << fixed << M << " "; | |
162 | if(cdf(dist, t_stat) > alpha) | |
163 | cout << "NOT REJECTED\n"; | |
164 | else | |
165 | cout << "REJECTED\n"; | |
166 | cout << endl << endl; | |
167 | } // void single_sample_t_test( | |
168 | ||
169 | void single_sample_find_df(double M, double Sm, double Sd) | |
170 | { | |
171 | // | |
172 | // M = true mean. | |
173 | // Sm = Sample Mean. | |
174 | // Sd = Sample Standard Deviation. | |
175 | // | |
176 | ||
177 | using boost::math::students_t; | |
178 | ||
179 | // Print out general info: | |
180 | cout << | |
181 | "_____________________________________________________________\n" | |
182 | "Estimated sample sizes required for various confidence levels\n" | |
183 | "_____________________________________________________________\n\n"; | |
184 | cout << setprecision(5); | |
185 | cout << setw(40) << left << "True Mean" << "= " << M << "\n"; | |
186 | cout << setw(40) << left << "Sample Mean" << "= " << Sm << "\n"; | |
187 | cout << setw(40) << left << "Sample Standard Deviation" << "= " << Sd << "\n"; | |
188 | // | |
189 | // Define a table of significance intervals: | |
190 | // | |
191 | double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 }; | |
192 | // | |
193 | // Print table header: | |
194 | // | |
195 | cout << "\n\n" | |
196 | "_______________________________________________________________\n" | |
197 | "Confidence Estimated Estimated\n" | |
198 | " Value (%) Sample Size Sample Size\n" | |
199 | " (one sided test) (two sided test)\n" | |
200 | "_______________________________________________________________\n"; | |
201 | // | |
202 | // Now print out the data for the table rows. | |
203 | // | |
204 | for(unsigned i = 1; i < sizeof(alpha)/sizeof(alpha[0]); ++i) | |
205 | { | |
206 | // Confidence value: | |
207 | cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]); | |
208 | // calculate df for single sided test: | |
209 | double df = students_t::find_degrees_of_freedom( | |
210 | fabs(M - Sm), alpha[i], alpha[i], Sd); | |
211 | // convert to sample size, always one more than the degrees of freedom: | |
212 | double size = ceil(df) + 1; | |
213 | // Print size: | |
214 | cout << fixed << setprecision(0) << setw(16) << right << size; | |
215 | // calculate df for two sided test: | |
216 | df = students_t::find_degrees_of_freedom( | |
217 | fabs(M - Sm), alpha[i]/2, alpha[i], Sd); | |
218 | // convert to sample size: | |
219 | size = ceil(df) + 1; | |
220 | // Print size: | |
221 | cout << fixed << setprecision(0) << setw(16) << right << size << endl; | |
222 | } | |
223 | cout << endl; | |
224 | } // void single_sample_find_df | |
225 | ||
226 | int main() | |
227 | { | |
228 | // | |
229 | // Run tests for Heat Flow Meter data | |
230 | // see http://www.itl.nist.gov/div898/handbook/eda/section4/eda428.htm | |
231 | // The data was collected while calibrating a heat flow meter | |
232 | // against a known value. | |
233 | // | |
234 | confidence_limits_on_mean(9.261460, 0.2278881e-01, 195); | |
235 | single_sample_t_test(5, 9.261460, 0.2278881e-01, 195, 0.05); | |
236 | single_sample_find_df(5, 9.261460, 0.2278881e-01); | |
237 | ||
238 | // | |
239 | // Data for this example from: | |
240 | // P.K.Hou, O. W. Lau & M.C. Wong, Analyst (1983) vol. 108, p 64. | |
241 | // from Statistics for Analytical Chemistry, 3rd ed. (1994), pp 54-55 | |
242 | // J. C. Miller and J. N. Miller, Ellis Horwood ISBN 0 13 0309907 | |
243 | // | |
244 | // Determination of mercury by cold-vapour atomic absorption, | |
245 | // the following values were obtained fusing a trusted | |
246 | // Standard Reference Material containing 38.9% mercury, | |
247 | // which we assume is correct or 'true'. | |
248 | // | |
249 | confidence_limits_on_mean(37.8, 0.964365, 3); | |
250 | // 95% test: | |
251 | single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.05); | |
252 | // 90% test: | |
253 | single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.1); | |
254 | // parameter estimate: | |
255 | single_sample_find_df(38.9, 37.8, 0.964365); | |
256 | ||
257 | return 0; | |
258 | } // int main() | |
259 | ||
260 | /* | |
261 | ||
262 | Output: | |
263 | ||
264 | ------ Rebuild All started: Project: students_t_single_sample, Configuration: Release Win32 ------ | |
265 | students_t_single_sample.cpp | |
266 | Generating code | |
267 | Finished generating code | |
268 | students_t_single_sample.vcxproj -> J:\Cpp\MathToolkit\test\Math_test\Release\students_t_single_sample.exe | |
269 | __________________________________ | |
270 | 2-Sided Confidence Limits For Mean | |
271 | __________________________________ | |
272 | ||
273 | Number of Observations = 195 | |
274 | Mean = 9.26146 | |
275 | Standard Deviation = 0.02278881 | |
276 | ||
277 | ||
278 | _______________________________________________________________ | |
279 | Confidence T Interval Lower Upper | |
280 | Value (%) Value Width Limit Limit | |
281 | _______________________________________________________________ | |
282 | 50.000 0.676 1.103e-003 9.26036 9.26256 | |
283 | 75.000 1.154 1.883e-003 9.25958 9.26334 | |
284 | 90.000 1.653 2.697e-003 9.25876 9.26416 | |
285 | 95.000 1.972 3.219e-003 9.25824 9.26468 | |
286 | 99.000 2.601 4.245e-003 9.25721 9.26571 | |
287 | 99.900 3.341 5.453e-003 9.25601 9.26691 | |
288 | 99.990 3.973 6.484e-003 9.25498 9.26794 | |
289 | 99.999 4.537 7.404e-003 9.25406 9.26886 | |
290 | ||
291 | __________________________________ | |
292 | Student t test for a single sample | |
293 | __________________________________ | |
294 | ||
295 | Number of Observations = 195 | |
296 | Sample Mean = 9.26146 | |
297 | Sample Standard Deviation = 0.02279 | |
298 | Expected True Mean = 5.00000 | |
299 | ||
300 | Sample Mean - Expected Test Mean = 4.26146 | |
301 | Degrees of Freedom = 194 | |
302 | T Statistic = 2611.28380 | |
303 | Probability that difference is due to chance = 0.000e+000 | |
304 | ||
305 | Results for Alternative Hypothesis and alpha = 0.0500 | |
306 | ||
307 | Alternative Hypothesis Conclusion | |
308 | Mean != 5.000 NOT REJECTED | |
309 | Mean < 5.000 REJECTED | |
310 | Mean > 5.000 NOT REJECTED | |
311 | ||
312 | ||
313 | _____________________________________________________________ | |
314 | Estimated sample sizes required for various confidence levels | |
315 | _____________________________________________________________ | |
316 | ||
317 | True Mean = 5.00000 | |
318 | Sample Mean = 9.26146 | |
319 | Sample Standard Deviation = 0.02279 | |
320 | ||
321 | ||
322 | _______________________________________________________________ | |
323 | Confidence Estimated Estimated | |
324 | Value (%) Sample Size Sample Size | |
325 | (one sided test) (two sided test) | |
326 | _______________________________________________________________ | |
327 | 75.000 2 2 | |
328 | 90.000 2 2 | |
329 | 95.000 2 2 | |
330 | 99.000 2 2 | |
331 | 99.900 3 3 | |
332 | 99.990 3 3 | |
333 | 99.999 4 4 | |
334 | ||
335 | __________________________________ | |
336 | 2-Sided Confidence Limits For Mean | |
337 | __________________________________ | |
338 | ||
339 | Number of Observations = 3 | |
340 | Mean = 37.8000000 | |
341 | Standard Deviation = 0.9643650 | |
342 | ||
343 | ||
344 | _______________________________________________________________ | |
345 | Confidence T Interval Lower Upper | |
346 | Value (%) Value Width Limit Limit | |
347 | _______________________________________________________________ | |
348 | 50.000 0.816 0.455 37.34539 38.25461 | |
349 | 75.000 1.604 0.893 36.90717 38.69283 | |
350 | 90.000 2.920 1.626 36.17422 39.42578 | |
351 | 95.000 4.303 2.396 35.40438 40.19562 | |
352 | 99.000 9.925 5.526 32.27408 43.32592 | |
353 | 99.900 31.599 17.594 20.20639 55.39361 | |
354 | 99.990 99.992 55.673 -17.87346 93.47346 | |
355 | 99.999 316.225 176.067 -138.26683 213.86683 | |
356 | ||
357 | __________________________________ | |
358 | Student t test for a single sample | |
359 | __________________________________ | |
360 | ||
361 | Number of Observations = 3 | |
362 | Sample Mean = 37.80000 | |
363 | Sample Standard Deviation = 0.96437 | |
364 | Expected True Mean = 38.90000 | |
365 | ||
366 | Sample Mean - Expected Test Mean = -1.10000 | |
367 | Degrees of Freedom = 2 | |
368 | T Statistic = -1.97566 | |
369 | Probability that difference is due to chance = 1.869e-001 | |
370 | ||
371 | Results for Alternative Hypothesis and alpha = 0.0500 | |
372 | ||
373 | Alternative Hypothesis Conclusion | |
374 | Mean != 38.900 REJECTED | |
375 | Mean < 38.900 NOT REJECTED | |
376 | Mean > 38.900 NOT REJECTED | |
377 | ||
378 | ||
379 | __________________________________ | |
380 | Student t test for a single sample | |
381 | __________________________________ | |
382 | ||
383 | Number of Observations = 3 | |
384 | Sample Mean = 37.80000 | |
385 | Sample Standard Deviation = 0.96437 | |
386 | Expected True Mean = 38.90000 | |
387 | ||
388 | Sample Mean - Expected Test Mean = -1.10000 | |
389 | Degrees of Freedom = 2 | |
390 | T Statistic = -1.97566 | |
391 | Probability that difference is due to chance = 1.869e-001 | |
392 | ||
393 | Results for Alternative Hypothesis and alpha = 0.1000 | |
394 | ||
395 | Alternative Hypothesis Conclusion | |
396 | Mean != 38.900 REJECTED | |
397 | Mean < 38.900 NOT REJECTED | |
398 | Mean > 38.900 REJECTED | |
399 | ||
400 | ||
401 | _____________________________________________________________ | |
402 | Estimated sample sizes required for various confidence levels | |
403 | _____________________________________________________________ | |
404 | ||
405 | True Mean = 38.90000 | |
406 | Sample Mean = 37.80000 | |
407 | Sample Standard Deviation = 0.96437 | |
408 | ||
409 | ||
410 | _______________________________________________________________ | |
411 | Confidence Estimated Estimated | |
412 | Value (%) Sample Size Sample Size | |
413 | (one sided test) (two sided test) | |
414 | _______________________________________________________________ | |
415 | 75.000 3 4 | |
416 | 90.000 7 9 | |
417 | 95.000 11 13 | |
418 | 99.000 20 22 | |
419 | 99.900 35 37 | |
420 | 99.990 50 53 | |
421 | 99.999 66 68 | |
422 | ||
423 | */ | |
424 |