]>
Commit | Line | Data |
---|---|---|
ba9703b0 | 1 | #!/usr/bin/env python3 |
e9174d1e SL |
2 | |
3 | """ | |
4 | Testing dec2flt | |
5 | =============== | |
6 | These are *really* extensive tests. Expect them to run for hours. Due to the | |
7 | nature of the problem (the input is a string of arbitrary length), exhaustive | |
8 | testing is not really possible. Instead, there are exhaustive tests for some | |
9 | classes of inputs for which that is feasible and a bunch of deterministic and | |
10 | random non-exhaustive tests for covering everything else. | |
11 | ||
12 | The actual tests (generating decimal strings and feeding them to dec2flt) is | |
13 | performed by a set of stand-along rust programs. This script compiles, runs, | |
7453a54e SL |
14 | and supervises them. The programs report the strings they generate and the |
15 | floating point numbers they converted those strings to, and this script | |
16 | checks that the results are correct. | |
e9174d1e SL |
17 | |
18 | You can run specific tests rather than all of them by giving their names | |
19 | (without .rs extension) as command line parameters. | |
20 | ||
21 | Verification | |
22 | ------------ | |
23 | The tricky part is not generating those inputs but verifying the outputs. | |
24 | Comparing with the result of Python's float() does not cut it because | |
25 | (and this is apparently undocumented) although Python includes a version of | |
26 | Martin Gay's code including the decimal-to-float part, it doesn't actually use | |
27 | it for float() (only for round()) instead relying on the system scanf() which | |
28 | is not necessarily completely accurate. | |
29 | ||
30 | Instead, we take the input and compute the true value with bignum arithmetic | |
31 | (as a fraction, using the ``fractions`` module). | |
32 | ||
33 | Given an input string and the corresponding float computed via Rust, simply | |
2c00a5a8 | 34 | decode the float into f * 2^k (for integers f, k) and the ULP. |
e9174d1e SL |
35 | We can now easily compute the error and check if it is within 0.5 ULP as it |
36 | should be. Zero and infinites are handled similarly: | |
37 | ||
38 | - If the approximation is 0.0, the exact value should be *less or equal* | |
39 | half the smallest denormal float: the smallest denormal floating point | |
40 | number has an odd mantissa (00...001) and thus half of that is rounded | |
41 | to 00...00, i.e., zero. | |
42 | - If the approximation is Inf, the exact value should be *greater or equal* | |
43 | to the largest finite float + 0.5 ULP: the largest finite float has an odd | |
44 | mantissa (11...11), so that plus half an ULP is rounded up to the nearest | |
45 | even number, which overflows. | |
46 | ||
47 | Implementation details | |
48 | ---------------------- | |
49 | This directory contains a set of single-file Rust programs that perform | |
50 | tests with a particular class of inputs. Each is compiled and run without | |
51 | parameters, outputs (f64, f32, decimal) pairs to verify externally, and | |
52 | in any case either exits gracefully or with a panic. | |
53 | ||
54 | If a test binary writes *anything at all* to stderr or exits with an | |
55 | exit code that's not 0, the test fails. | |
56 | The output on stdout is treated as (f64, f32, decimal) record, encoded thusly: | |
57 | ||
7453a54e SL |
58 | - First, the bits of the f64 encoded as an ASCII hex string. |
59 | - Second, the bits of the f32 encoded as an ASCII hex string. | |
60 | - Then the corresponding string input, in ASCII | |
e9174d1e SL |
61 | - The record is terminated with a newline. |
62 | ||
63 | Incomplete records are an error. Not-a-Number bit patterns are invalid too. | |
64 | ||
b039eaaf | 65 | The tests run serially but the validation for a single test is parallelized |
e9174d1e SL |
66 | with ``multiprocessing``. Each test is launched as a subprocess. |
67 | One thread supervises it: Accepts and enqueues records to validate, observe | |
68 | stderr, and waits for the process to exit. A set of worker processes perform | |
69 | the validation work for the outputs enqueued there. Another thread listens | |
70 | for progress updates from the workers. | |
71 | ||
72 | Known issues | |
73 | ------------ | |
74 | Some errors (e.g., NaN outputs) aren't handled very gracefully. | |
75 | Also, if there is an exception or the process is interrupted (at least on | |
76 | Windows) the worker processes are leaked and stick around forever. | |
77 | They're only a few megabytes each, but still, this script should not be run | |
78 | if you aren't prepared to manually kill a lot of orphaned processes. | |
79 | """ | |
80 | from __future__ import print_function | |
81 | import sys | |
82 | import os.path | |
83 | import time | |
84 | import struct | |
85 | from fractions import Fraction | |
86 | from collections import namedtuple | |
87 | from subprocess import Popen, check_call, PIPE | |
88 | from glob import glob | |
89 | import multiprocessing | |
e9174d1e SL |
90 | import threading |
91 | import ctypes | |
92 | import binascii | |
93 | ||
abe05a73 XL |
94 | try: # Python 3 |
95 | import queue as Queue | |
96 | except ImportError: # Python 2 | |
97 | import Queue | |
98 | ||
e9174d1e SL |
99 | NUM_WORKERS = 2 |
100 | UPDATE_EVERY_N = 50000 | |
101 | INF = namedtuple('INF', '')() | |
102 | NEG_INF = namedtuple('NEG_INF', '')() | |
103 | ZERO = namedtuple('ZERO', '')() | |
104 | MAILBOX = None # The queue for reporting errors to the main process. | |
105 | STDOUT_LOCK = threading.Lock() | |
106 | test_name = None | |
107 | child_processes = [] | |
108 | exit_status = 0 | |
109 | ||
110 | def msg(*args): | |
111 | with STDOUT_LOCK: | |
112 | print("[" + test_name + "]", *args) | |
113 | sys.stdout.flush() | |
114 | ||
115 | ||
116 | def write_errors(): | |
117 | global exit_status | |
118 | f = open("errors.txt", 'w') | |
119 | have_seen_error = False | |
120 | while True: | |
121 | args = MAILBOX.get() | |
122 | if args is None: | |
123 | f.close() | |
124 | break | |
125 | print(*args, file=f) | |
126 | f.flush() | |
127 | if not have_seen_error: | |
128 | have_seen_error = True | |
129 | msg("Something is broken:", *args) | |
130 | msg("Future errors logged to errors.txt") | |
131 | exit_status = 101 | |
132 | ||
133 | ||
134 | def rustc(test): | |
135 | rs = test + '.rs' | |
136 | exe = test + '.exe' # hopefully this makes it work on *nix | |
137 | print("compiling", test) | |
138 | sys.stdout.flush() | |
139 | check_call(['rustc', rs, '-o', exe]) | |
140 | ||
141 | ||
142 | def run(test): | |
143 | global test_name | |
144 | test_name = test | |
145 | ||
146 | t0 = time.clock() | |
147 | msg("setting up supervisor") | |
148 | exe = test + '.exe' | |
149 | proc = Popen(exe, bufsize=1<<20 , stdin=PIPE, stdout=PIPE, stderr=PIPE) | |
150 | done = multiprocessing.Value(ctypes.c_bool) | |
151 | queue = multiprocessing.Queue(maxsize=5)#(maxsize=1024) | |
152 | workers = [] | |
153 | for n in range(NUM_WORKERS): | |
154 | worker = multiprocessing.Process(name='Worker-' + str(n + 1), | |
155 | target=init_worker, | |
156 | args=[test, MAILBOX, queue, done]) | |
157 | workers.append(worker) | |
158 | child_processes.append(worker) | |
159 | for worker in workers: | |
160 | worker.start() | |
161 | msg("running test") | |
162 | interact(proc, queue) | |
163 | with done.get_lock(): | |
164 | done.value = True | |
165 | for worker in workers: | |
166 | worker.join() | |
167 | msg("python is done") | |
168 | assert queue.empty(), "did not validate everything" | |
169 | dt = time.clock() - t0 | |
170 | msg("took", round(dt, 3), "seconds") | |
171 | ||
172 | ||
173 | def interact(proc, queue): | |
e9174d1e SL |
174 | n = 0 |
175 | while proc.poll() is None: | |
176 | line = proc.stdout.readline() | |
177 | if not line: | |
178 | continue | |
179 | assert line.endswith('\n'), "incomplete line: " + repr(line) | |
180 | queue.put(line) | |
e9174d1e SL |
181 | n += 1 |
182 | if n % UPDATE_EVERY_N == 0: | |
183 | msg("got", str(n // 1000) + "k", "records") | |
184 | msg("rust is done. exit code:", proc.returncode) | |
185 | rest, stderr = proc.communicate() | |
186 | if stderr: | |
187 | msg("rust stderr output:", stderr) | |
188 | for line in rest.split('\n'): | |
189 | if not line: | |
190 | continue | |
191 | queue.put(line) | |
192 | ||
193 | ||
194 | def main(): | |
195 | global MAILBOX | |
3dfed10e XL |
196 | all_tests = [os.path.splitext(f)[0] for f in glob('*.rs') if not f.startswith('_')] |
197 | args = sys.argv[1:] | |
198 | if args: | |
199 | tests = [test for test in all_tests if test in args] | |
200 | else | |
201 | tests = all_tests | |
e9174d1e SL |
202 | if not tests: |
203 | print("Error: No tests to run") | |
204 | sys.exit(1) | |
205 | # Compile first for quicker feedback | |
206 | for test in tests: | |
207 | rustc(test) | |
208 | # Set up mailbox once for all tests | |
209 | MAILBOX = multiprocessing.Queue() | |
210 | mailman = threading.Thread(target=write_errors) | |
211 | mailman.daemon = True | |
212 | mailman.start() | |
213 | for test in tests: | |
e9174d1e SL |
214 | run(test) |
215 | MAILBOX.put(None) | |
216 | mailman.join() | |
217 | ||
218 | ||
219 | # ---- Worker thread code ---- | |
220 | ||
221 | ||
222 | POW2 = { e: Fraction(2) ** e for e in range(-1100, 1100) } | |
223 | HALF_ULP = { e: (Fraction(2) ** e)/2 for e in range(-1100, 1100) } | |
224 | DONE_FLAG = None | |
225 | ||
226 | ||
227 | def send_error_to_supervisor(*args): | |
228 | MAILBOX.put(args) | |
229 | ||
230 | ||
231 | def init_worker(test, mailbox, queue, done): | |
232 | global test_name, MAILBOX, DONE_FLAG | |
233 | test_name = test | |
234 | MAILBOX = mailbox | |
235 | DONE_FLAG = done | |
236 | do_work(queue) | |
237 | ||
238 | ||
239 | def is_done(): | |
240 | with DONE_FLAG.get_lock(): | |
241 | return DONE_FLAG.value | |
242 | ||
243 | ||
244 | def do_work(queue): | |
245 | while True: | |
246 | try: | |
247 | line = queue.get(timeout=0.01) | |
248 | except Queue.Empty: | |
249 | if queue.empty() and is_done(): | |
250 | return | |
251 | else: | |
252 | continue | |
253 | bin64, bin32, text = line.rstrip().split() | |
254 | validate(bin64, bin32, text) | |
255 | ||
256 | ||
257 | def decode_binary64(x): | |
258 | """ | |
259 | Turn a IEEE 754 binary64 into (mantissa, exponent), except 0.0 and | |
260 | infinity (positive and negative), which return ZERO, INF, and NEG_INF | |
261 | respectively. | |
262 | """ | |
263 | x = binascii.unhexlify(x) | |
264 | assert len(x) == 8, repr(x) | |
265 | [bits] = struct.unpack(b'>Q', x) | |
266 | if bits == 0: | |
267 | return ZERO | |
268 | exponent = (bits >> 52) & 0x7FF | |
269 | negative = bits >> 63 | |
270 | low_bits = bits & 0xFFFFFFFFFFFFF | |
271 | if exponent == 0: | |
272 | mantissa = low_bits | |
273 | exponent += 1 | |
274 | if mantissa == 0: | |
275 | return ZERO | |
276 | elif exponent == 0x7FF: | |
277 | assert low_bits == 0, "NaN" | |
278 | if negative: | |
279 | return NEG_INF | |
280 | else: | |
281 | return INF | |
282 | else: | |
283 | mantissa = low_bits | (1 << 52) | |
284 | exponent -= 1023 + 52 | |
285 | if negative: | |
286 | mantissa = -mantissa | |
287 | return (mantissa, exponent) | |
288 | ||
289 | ||
290 | def decode_binary32(x): | |
291 | """ | |
292 | Turn a IEEE 754 binary32 into (mantissa, exponent), except 0.0 and | |
293 | infinity (positive and negative), which return ZERO, INF, and NEG_INF | |
294 | respectively. | |
295 | """ | |
296 | x = binascii.unhexlify(x) | |
297 | assert len(x) == 4, repr(x) | |
298 | [bits] = struct.unpack(b'>I', x) | |
299 | if bits == 0: | |
300 | return ZERO | |
301 | exponent = (bits >> 23) & 0xFF | |
302 | negative = bits >> 31 | |
303 | low_bits = bits & 0x7FFFFF | |
304 | if exponent == 0: | |
305 | mantissa = low_bits | |
306 | exponent += 1 | |
307 | if mantissa == 0: | |
308 | return ZERO | |
309 | elif exponent == 0xFF: | |
310 | if negative: | |
311 | return NEG_INF | |
312 | else: | |
313 | return INF | |
314 | else: | |
315 | mantissa = low_bits | (1 << 23) | |
316 | exponent -= 127 + 23 | |
317 | if negative: | |
318 | mantissa = -mantissa | |
319 | return (mantissa, exponent) | |
320 | ||
321 | ||
322 | MIN_SUBNORMAL_DOUBLE = Fraction(2) ** -1074 | |
323 | MIN_SUBNORMAL_SINGLE = Fraction(2) ** -149 # XXX unsure | |
324 | MAX_DOUBLE = (2 - Fraction(2) ** -52) * (2 ** 1023) | |
325 | MAX_SINGLE = (2 - Fraction(2) ** -23) * (2 ** 127) | |
326 | MAX_ULP_DOUBLE = 1023 - 52 | |
327 | MAX_ULP_SINGLE = 127 - 23 | |
328 | DOUBLE_ZERO_CUTOFF = MIN_SUBNORMAL_DOUBLE / 2 | |
329 | DOUBLE_INF_CUTOFF = MAX_DOUBLE + 2 ** (MAX_ULP_DOUBLE - 1) | |
330 | SINGLE_ZERO_CUTOFF = MIN_SUBNORMAL_SINGLE / 2 | |
331 | SINGLE_INF_CUTOFF = MAX_SINGLE + 2 ** (MAX_ULP_SINGLE - 1) | |
332 | ||
333 | def validate(bin64, bin32, text): | |
334 | double = decode_binary64(bin64) | |
335 | single = decode_binary32(bin32) | |
336 | real = Fraction(text) | |
337 | ||
338 | if double is ZERO: | |
339 | if real > DOUBLE_ZERO_CUTOFF: | |
340 | record_special_error(text, "f64 zero") | |
341 | elif double is INF: | |
342 | if real < DOUBLE_INF_CUTOFF: | |
343 | record_special_error(text, "f64 inf") | |
344 | elif double is NEG_INF: | |
345 | if -real < DOUBLE_INF_CUTOFF: | |
346 | record_special_error(text, "f64 -inf") | |
347 | elif len(double) == 2: | |
348 | sig, k = double | |
349 | validate_normal(text, real, sig, k, "f64") | |
350 | else: | |
351 | assert 0, "didn't handle binary64" | |
352 | if single is ZERO: | |
353 | if real > SINGLE_ZERO_CUTOFF: | |
354 | record_special_error(text, "f32 zero") | |
355 | elif single is INF: | |
356 | if real < SINGLE_INF_CUTOFF: | |
357 | record_special_error(text, "f32 inf") | |
358 | elif single is NEG_INF: | |
359 | if -real < SINGLE_INF_CUTOFF: | |
360 | record_special_error(text, "f32 -inf") | |
361 | elif len(single) == 2: | |
362 | sig, k = single | |
363 | validate_normal(text, real, sig, k, "f32") | |
364 | else: | |
365 | assert 0, "didn't handle binary32" | |
366 | ||
367 | def record_special_error(text, descr): | |
368 | send_error_to_supervisor(text.strip(), "wrongly rounded to", descr) | |
369 | ||
370 | ||
371 | def validate_normal(text, real, sig, k, kind): | |
372 | approx = sig * POW2[k] | |
373 | error = abs(approx - real) | |
374 | if error > HALF_ULP[k]: | |
375 | record_normal_error(text, error, k, kind) | |
376 | ||
377 | ||
378 | def record_normal_error(text, error, k, kind): | |
379 | one_ulp = HALF_ULP[k + 1] | |
380 | assert one_ulp == 2 * HALF_ULP[k] | |
381 | relative_error = error / one_ulp | |
382 | text = text.strip() | |
383 | try: | |
384 | err_repr = float(relative_error) | |
385 | except ValueError: | |
386 | err_repr = str(err_repr).replace('/', ' / ') | |
387 | send_error_to_supervisor(err_repr, "ULP error on", text, "(" + kind + ")") | |
388 | ||
389 | ||
390 | if __name__ == '__main__': | |
391 | main() |