]>
Commit | Line | Data |
---|---|---|
b3f23c90 SH |
1 | /* |
2 | * Experimental data distribution table generator | |
ebfd0f31 | 3 | * Taken from the uncopyrighted NISTnet code (public domain). |
b3f23c90 | 4 | * |
ebfd0f31 | 5 | * Read in a series of "random" data values, either |
b3f23c90 SH |
6 | * experimentally or generated from some probability distribution. |
7 | * From this, create the inverse distribution table used to approximate | |
8 | * the distribution. | |
9 | */ | |
10 | #include <stdio.h> | |
11 | #include <stdlib.h> | |
12 | #include <math.h> | |
13 | #include <malloc.h> | |
14 | #include <string.h> | |
15 | #include <sys/types.h> | |
16 | #include <sys/stat.h> | |
17 | ||
18 | ||
19 | double * | |
20 | readdoubles(FILE *fp, int *number) | |
21 | { | |
22 | struct stat info; | |
23 | double *x; | |
24 | int limit; | |
25 | int n=0, i; | |
26 | ||
d304b05c PS |
27 | if (!fstat(fileno(fp), &info) && |
28 | info.st_size > 0) { | |
b3f23c90 SH |
29 | limit = 2*info.st_size/sizeof(double); /* @@ approximate */ |
30 | } else { | |
31 | limit = 10000; | |
32 | } | |
33 | ||
34 | x = calloc(limit, sizeof(double)); | |
35 | if (!x) { | |
36 | perror("double alloc"); | |
37 | exit(3); | |
38 | } | |
39 | ||
40 | for (i=0; i<limit; ++i){ | |
92963d13 PS |
41 | if (fscanf(fp, "%lf", &x[i]) != 1 || |
42 | feof(fp)) | |
b3f23c90 SH |
43 | break; |
44 | ++n; | |
45 | } | |
46 | *number = n; | |
47 | return x; | |
48 | } | |
49 | ||
50 | void | |
51 | arraystats(double *x, int limit, double *mu, double *sigma, double *rho) | |
52 | { | |
53 | int n=0, i; | |
54 | double sumsquare=0.0, sum=0.0, top=0.0; | |
55 | double sigma2=0.0; | |
56 | ||
57 | for (i=0; i<limit; ++i){ | |
58 | sumsquare += x[i]*x[i]; | |
59 | sum += x[i]; | |
60 | ++n; | |
61 | } | |
62 | *mu = sum/(double)n; | |
63 | *sigma = sqrt((sumsquare - (double)n*(*mu)*(*mu))/(double)(n-1)); | |
64 | ||
65 | for (i=1; i < n; ++i){ | |
66 | top += ((double)x[i]- *mu)*((double)x[i-1]- *mu); | |
67 | sigma2 += ((double)x[i-1] - *mu)*((double)x[i-1] - *mu); | |
68 | ||
69 | } | |
70 | *rho = top/sigma2; | |
71 | } | |
72 | ||
73 | /* Create a (normalized) distribution table from a set of observed | |
74 | * values. The table is fixed to run from (as it happens) -4 to +4, | |
75 | * with granularity .00002. | |
76 | */ | |
77 | ||
78 | #define TABLESIZE 16384/4 | |
79 | #define TABLEFACTOR 8192 | |
80 | #ifndef MINSHORT | |
81 | #define MINSHORT -32768 | |
82 | #define MAXSHORT 32767 | |
83 | #endif | |
84 | ||
85 | /* Since entries in the inverse are scaled by TABLEFACTOR, and can't be bigger | |
86 | * than MAXSHORT, we don't bother looking at a larger domain than this: | |
87 | */ | |
88 | #define DISTTABLEDOMAIN ((MAXSHORT/TABLEFACTOR)+1) | |
89 | #define DISTTABLEGRANULARITY 50000 | |
90 | #define DISTTABLESIZE (DISTTABLEDOMAIN*DISTTABLEGRANULARITY*2) | |
91 | ||
92 | static int * | |
93 | makedist(double *x, int limit, double mu, double sigma) | |
94 | { | |
95 | int *table; | |
96 | int i, index, first=DISTTABLESIZE, last=0; | |
97 | double input; | |
98 | ||
99 | table = calloc(DISTTABLESIZE, sizeof(int)); | |
100 | if (!table) { | |
101 | perror("table alloc"); | |
102 | exit(3); | |
103 | } | |
104 | ||
105 | for (i=0; i < limit; ++i) { | |
106 | /* Normalize value */ | |
107 | input = (x[i]-mu)/sigma; | |
108 | ||
109 | index = (int)rint((input+DISTTABLEDOMAIN)*DISTTABLEGRANULARITY); | |
110 | if (index < 0) index = 0; | |
111 | if (index >= DISTTABLESIZE) index = DISTTABLESIZE-1; | |
112 | ++table[index]; | |
113 | if (index > last) | |
114 | last = index +1; | |
115 | if (index < first) | |
116 | first = index; | |
117 | } | |
118 | return table; | |
119 | } | |
120 | ||
121 | /* replace an array by its cumulative distribution */ | |
122 | static void | |
123 | cumulativedist(int *table, int limit, int *total) | |
124 | { | |
125 | int accum=0; | |
126 | ||
127 | while (--limit >= 0) { | |
128 | accum += *table; | |
129 | *table++ = accum; | |
130 | } | |
131 | *total = accum; | |
132 | } | |
133 | ||
134 | static short * | |
135 | inverttable(int *table, int inversesize, int tablesize, int cumulative) | |
136 | { | |
137 | int i, inverseindex, inversevalue; | |
138 | short *inverse; | |
139 | double findex, fvalue; | |
140 | ||
141 | inverse = (short *)malloc(inversesize*sizeof(short)); | |
142 | for (i=0; i < inversesize; ++i) { | |
143 | inverse[i] = MINSHORT; | |
144 | } | |
145 | for (i=0; i < tablesize; ++i) { | |
146 | findex = ((double)i/(double)DISTTABLEGRANULARITY) - DISTTABLEDOMAIN; | |
147 | fvalue = (double)table[i]/(double)cumulative; | |
148 | inverseindex = (int)rint(fvalue*inversesize); | |
149 | inversevalue = (int)rint(findex*TABLEFACTOR); | |
150 | if (inversevalue <= MINSHORT) inversevalue = MINSHORT+1; | |
151 | if (inversevalue > MAXSHORT) inversevalue = MAXSHORT; | |
2d3af167 SH |
152 | if (inverseindex >= inversesize) inverseindex = inversesize- 1; |
153 | ||
b3f23c90 SH |
154 | inverse[inverseindex] = inversevalue; |
155 | } | |
156 | return inverse; | |
157 | ||
158 | } | |
159 | ||
160 | /* Run simple linear interpolation over the table to fill in missing entries */ | |
161 | static void | |
162 | interpolatetable(short *table, int limit) | |
163 | { | |
164 | int i, j, last, lasti = -1; | |
165 | ||
166 | last = MINSHORT; | |
167 | for (i=0; i < limit; ++i) { | |
168 | if (table[i] == MINSHORT) { | |
169 | for (j=i; j < limit; ++j) | |
170 | if (table[j] != MINSHORT) | |
171 | break; | |
172 | if (j < limit) { | |
173 | table[i] = last + (i-lasti)*(table[j]-last)/(j-lasti); | |
174 | } else { | |
175 | table[i] = last + (i-lasti)*(MAXSHORT-last)/(limit-lasti); | |
176 | } | |
177 | } else { | |
178 | last = table[i]; | |
179 | lasti = i; | |
180 | } | |
181 | } | |
182 | } | |
183 | ||
184 | static void | |
185 | printtable(const short *table, int limit) | |
186 | { | |
187 | int i; | |
188 | ||
189 | printf("# This is the distribution table for the experimental distribution.\n"); | |
190 | ||
191 | for (i=0 ; i < limit; ++i) { | |
192 | printf("%d%c", table[i], | |
193 | (i % 8) == 7 ? '\n' : ' '); | |
194 | } | |
195 | } | |
196 | ||
81c61790 | 197 | int |
b3f23c90 SH |
198 | main(int argc, char **argv) |
199 | { | |
200 | FILE *fp; | |
201 | double *x; | |
202 | double mu, sigma, rho; | |
203 | int limit; | |
204 | int *table; | |
205 | short *inverse; | |
206 | int total; | |
207 | ||
208 | if (argc > 1) { | |
209 | if (!(fp = fopen(argv[1], "r"))) { | |
210 | perror(argv[1]); | |
211 | exit(1); | |
212 | } | |
213 | } else { | |
214 | fp = stdin; | |
e9e9365b | 215 | } |
b3f23c90 SH |
216 | x = readdoubles(fp, &limit); |
217 | if (limit <= 0) { | |
218 | fprintf(stderr, "Nothing much read!\n"); | |
219 | exit(2); | |
220 | } | |
221 | arraystats(x, limit, &mu, &sigma, &rho); | |
222 | #ifdef DEBUG | |
223 | fprintf(stderr, "%d values, mu %10.4f, sigma %10.4f, rho %10.4f\n", | |
224 | limit, mu, sigma, rho); | |
225 | #endif | |
e9e9365b | 226 | |
b3f23c90 SH |
227 | table = makedist(x, limit, mu, sigma); |
228 | free((void *) x); | |
229 | cumulativedist(table, DISTTABLESIZE, &total); | |
230 | inverse = inverttable(table, TABLESIZE, DISTTABLESIZE, total); | |
231 | interpolatetable(inverse, TABLESIZE); | |
232 | printtable(inverse, TABLESIZE); | |
233 | return 0; | |
234 | } |