]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | #!/usr/bin/env python |
2 | ||
3 | # ################################################################ | |
f67539c2 | 4 | # Copyright (c) 2016-2020, Facebook, Inc. |
11fdf7f2 TL |
5 | # All rights reserved. |
6 | # | |
7 | # This source code is licensed under both the BSD-style license (found in the | |
8 | # LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
9 | # in the COPYING file in the root directory of this source tree). | |
f67539c2 | 10 | # You may select, at your option, one of the above-listed licenses. |
11fdf7f2 TL |
11 | # ########################################################################## |
12 | ||
13 | import argparse | |
14 | import contextlib | |
15 | import os | |
16 | import re | |
9f95a23c | 17 | import shlex |
11fdf7f2 TL |
18 | import shutil |
19 | import subprocess | |
20 | import sys | |
21 | import tempfile | |
22 | ||
23 | ||
24 | def abs_join(a, *p): | |
25 | return os.path.abspath(os.path.join(a, *p)) | |
26 | ||
27 | ||
f67539c2 TL |
28 | class InputType(object): |
29 | RAW_DATA = 1 | |
30 | COMPRESSED_DATA = 2 | |
31 | DICTIONARY_DATA = 3 | |
32 | ||
33 | ||
34 | class FrameType(object): | |
35 | ZSTD = 1 | |
36 | BLOCK = 2 | |
37 | ||
38 | ||
39 | class TargetInfo(object): | |
40 | def __init__(self, input_type, frame_type=FrameType.ZSTD): | |
41 | self.input_type = input_type | |
42 | self.frame_type = frame_type | |
43 | ||
44 | ||
11fdf7f2 TL |
45 | # Constants |
46 | FUZZ_DIR = os.path.abspath(os.path.dirname(__file__)) | |
47 | CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora') | |
f67539c2 TL |
48 | TARGET_INFO = { |
49 | 'simple_round_trip': TargetInfo(InputType.RAW_DATA), | |
50 | 'stream_round_trip': TargetInfo(InputType.RAW_DATA), | |
51 | 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK), | |
52 | 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA), | |
53 | 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA), | |
54 | 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK), | |
55 | 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA), | |
56 | 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA), | |
57 | 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA), | |
58 | 'simple_compress': TargetInfo(InputType.RAW_DATA), | |
59 | 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA), | |
60 | 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA), | |
61 | 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA), | |
62 | } | |
63 | TARGETS = list(TARGET_INFO.keys()) | |
11fdf7f2 TL |
64 | ALL_TARGETS = TARGETS + ['all'] |
65 | FUZZ_RNG_SEED_SIZE = 4 | |
66 | ||
67 | # Standard environment variables | |
68 | CC = os.environ.get('CC', 'cc') | |
69 | CXX = os.environ.get('CXX', 'c++') | |
70 | CPPFLAGS = os.environ.get('CPPFLAGS', '') | |
71 | CFLAGS = os.environ.get('CFLAGS', '-O3') | |
72 | CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS) | |
73 | LDFLAGS = os.environ.get('LDFLAGS', '') | |
74 | MFLAGS = os.environ.get('MFLAGS', '-j') | |
75 | ||
76 | # Fuzzing environment variables | |
77 | LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a') | |
78 | AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz') | |
79 | DECODECORPUS = os.environ.get('DECODECORPUS', | |
80 | abs_join(FUZZ_DIR, '..', 'decodecorpus')) | |
f67539c2 | 81 | ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd')) |
11fdf7f2 TL |
82 | |
83 | # Sanitizer environment variables | |
84 | MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '') | |
85 | MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '') | |
86 | MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '') | |
87 | MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '') | |
88 | ||
89 | ||
90 | def create(r): | |
91 | d = os.path.abspath(r) | |
92 | if not os.path.isdir(d): | |
f67539c2 | 93 | os.makedirs(d) |
11fdf7f2 TL |
94 | return d |
95 | ||
96 | ||
97 | def check(r): | |
98 | d = os.path.abspath(r) | |
99 | if not os.path.isdir(d): | |
100 | return None | |
101 | return d | |
102 | ||
103 | ||
104 | @contextlib.contextmanager | |
105 | def tmpdir(): | |
106 | dirpath = tempfile.mkdtemp() | |
107 | try: | |
108 | yield dirpath | |
109 | finally: | |
110 | shutil.rmtree(dirpath, ignore_errors=True) | |
111 | ||
112 | ||
113 | def parse_targets(in_targets): | |
114 | targets = set() | |
115 | for target in in_targets: | |
116 | if not target: | |
117 | continue | |
118 | if target == 'all': | |
119 | targets = targets.union(TARGETS) | |
120 | elif target in TARGETS: | |
121 | targets.add(target) | |
122 | else: | |
123 | raise RuntimeError('{} is not a valid target'.format(target)) | |
124 | return list(targets) | |
125 | ||
126 | ||
127 | def targets_parser(args, description): | |
128 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) | |
129 | parser.add_argument( | |
130 | 'TARGET', | |
131 | nargs='*', | |
132 | type=str, | |
133 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))) | |
134 | args, extra = parser.parse_known_args(args) | |
135 | args.extra = extra | |
136 | ||
137 | args.TARGET = parse_targets(args.TARGET) | |
138 | ||
139 | return args | |
140 | ||
141 | ||
142 | def parse_env_flags(args, flags): | |
143 | """ | |
144 | Look for flags set by environment variables. | |
145 | """ | |
146 | san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags)) | |
147 | nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags)) | |
148 | ||
149 | def set_sanitizer(sanitizer, default, san, nosan): | |
150 | if sanitizer in san and sanitizer in nosan: | |
151 | raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'. | |
152 | format(s=sanitizer)) | |
153 | if sanitizer in san: | |
154 | return True | |
155 | if sanitizer in nosan: | |
156 | return False | |
157 | return default | |
158 | ||
159 | san = set(san_flags.split(',')) | |
160 | nosan = set(nosan_flags.split(',')) | |
161 | ||
162 | args.asan = set_sanitizer('address', args.asan, san, nosan) | |
163 | args.msan = set_sanitizer('memory', args.msan, san, nosan) | |
164 | args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan) | |
165 | ||
166 | args.sanitize = args.asan or args.msan or args.ubsan | |
167 | ||
168 | return args | |
169 | ||
170 | ||
171 | def compiler_version(cc, cxx): | |
172 | """ | |
173 | Determines the compiler and version. | |
174 | Only works for clang and gcc. | |
175 | """ | |
176 | cc_version_bytes = subprocess.check_output([cc, "--version"]) | |
177 | cxx_version_bytes = subprocess.check_output([cxx, "--version"]) | |
9f95a23c TL |
178 | compiler = None |
179 | version = None | |
180 | if b'clang' in cc_version_bytes: | |
181 | assert(b'clang' in cxx_version_bytes) | |
11fdf7f2 | 182 | compiler = 'clang' |
9f95a23c | 183 | elif b'gcc' in cc_version_bytes: |
f67539c2 | 184 | assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes) |
11fdf7f2 | 185 | compiler = 'gcc' |
9f95a23c TL |
186 | if compiler is not None: |
187 | version_regex = b'([0-9])+\.([0-9])+\.([0-9])+' | |
188 | version_match = re.search(version_regex, cc_version_bytes) | |
189 | version = tuple(int(version_match.group(i)) for i in range(1, 4)) | |
11fdf7f2 TL |
190 | return compiler, version |
191 | ||
192 | ||
193 | def overflow_ubsan_flags(cc, cxx): | |
194 | compiler, version = compiler_version(cc, cxx) | |
195 | if compiler == 'gcc': | |
196 | return ['-fno-sanitize=signed-integer-overflow'] | |
197 | if compiler == 'clang' and version >= (5, 0, 0): | |
198 | return ['-fno-sanitize=pointer-overflow'] | |
199 | return [] | |
200 | ||
201 | ||
202 | def build_parser(args): | |
203 | description = """ | |
204 | Cleans the repository and builds a fuzz target (or all). | |
205 | Many flags default to environment variables (default says $X='y'). | |
206 | Options that aren't enabling features default to the correct values for | |
207 | zstd. | |
208 | Enable sanitizers with --enable-*san. | |
209 | For regression testing just build. | |
210 | For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage. | |
211 | For AFL set CC and CXX to AFL's compilers and set | |
212 | LIB_FUZZING_ENGINE='libregression.a'. | |
213 | """ | |
214 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) | |
215 | parser.add_argument( | |
216 | '--lib-fuzzing-engine', | |
217 | dest='lib_fuzzing_engine', | |
218 | type=str, | |
219 | default=LIB_FUZZING_ENGINE, | |
220 | help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a ' | |
221 | "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE))) | |
9f95a23c TL |
222 | |
223 | fuzz_group = parser.add_mutually_exclusive_group() | |
224 | fuzz_group.add_argument( | |
11fdf7f2 TL |
225 | '--enable-coverage', |
226 | dest='coverage', | |
227 | action='store_true', | |
228 | help='Enable coverage instrumentation (-fsanitize-coverage)') | |
9f95a23c TL |
229 | fuzz_group.add_argument( |
230 | '--enable-fuzzer', | |
231 | dest='fuzzer', | |
232 | action='store_true', | |
233 | help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled ' | |
234 | 'LIB_FUZZING_ENGINE is ignored') | |
235 | ) | |
236 | ||
11fdf7f2 TL |
237 | parser.add_argument( |
238 | '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN') | |
239 | parser.add_argument( | |
240 | '--enable-ubsan', | |
241 | dest='ubsan', | |
242 | action='store_true', | |
243 | help='Enable UBSAN') | |
244 | parser.add_argument( | |
245 | '--enable-ubsan-pointer-overflow', | |
246 | dest='ubsan_pointer_overflow', | |
247 | action='store_true', | |
248 | help='Enable UBSAN pointer overflow check (known failure)') | |
249 | parser.add_argument( | |
250 | '--enable-msan', dest='msan', action='store_true', help='Enable MSAN') | |
251 | parser.add_argument( | |
252 | '--enable-msan-track-origins', dest='msan_track_origins', | |
253 | action='store_true', help='Enable MSAN origin tracking') | |
254 | parser.add_argument( | |
255 | '--msan-extra-cppflags', | |
256 | dest='msan_extra_cppflags', | |
257 | type=str, | |
258 | default=MSAN_EXTRA_CPPFLAGS, | |
259 | help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')". | |
260 | format(MSAN_EXTRA_CPPFLAGS)) | |
261 | parser.add_argument( | |
262 | '--msan-extra-cflags', | |
263 | dest='msan_extra_cflags', | |
264 | type=str, | |
265 | default=MSAN_EXTRA_CFLAGS, | |
266 | help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format( | |
267 | MSAN_EXTRA_CFLAGS)) | |
268 | parser.add_argument( | |
269 | '--msan-extra-cxxflags', | |
270 | dest='msan_extra_cxxflags', | |
271 | type=str, | |
272 | default=MSAN_EXTRA_CXXFLAGS, | |
273 | help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')". | |
274 | format(MSAN_EXTRA_CXXFLAGS)) | |
275 | parser.add_argument( | |
276 | '--msan-extra-ldflags', | |
277 | dest='msan_extra_ldflags', | |
278 | type=str, | |
279 | default=MSAN_EXTRA_LDFLAGS, | |
280 | help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')". | |
281 | format(MSAN_EXTRA_LDFLAGS)) | |
282 | parser.add_argument( | |
283 | '--enable-sanitize-recover', | |
284 | dest='sanitize_recover', | |
285 | action='store_true', | |
286 | help='Non-fatal sanitizer errors where possible') | |
287 | parser.add_argument( | |
288 | '--debug', | |
289 | dest='debug', | |
290 | type=int, | |
291 | default=1, | |
9f95a23c | 292 | help='Set DEBUGLEVEL (default: 1)') |
11fdf7f2 TL |
293 | parser.add_argument( |
294 | '--force-memory-access', | |
295 | dest='memory_access', | |
296 | type=int, | |
297 | default=0, | |
298 | help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)') | |
299 | parser.add_argument( | |
300 | '--fuzz-rng-seed-size', | |
301 | dest='fuzz_rng_seed_size', | |
302 | type=int, | |
303 | default=4, | |
304 | help='Set FUZZ_RNG_SEED_SIZE (default: 4)') | |
305 | parser.add_argument( | |
306 | '--disable-fuzzing-mode', | |
307 | dest='fuzzing_mode', | |
308 | action='store_false', | |
9f95a23c | 309 | help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION') |
11fdf7f2 TL |
310 | parser.add_argument( |
311 | '--enable-stateful-fuzzing', | |
312 | dest='stateful_fuzzing', | |
313 | action='store_true', | |
314 | help='Reuse contexts between runs (makes reproduction impossible)') | |
315 | parser.add_argument( | |
316 | '--cc', | |
317 | dest='cc', | |
318 | type=str, | |
319 | default=CC, | |
320 | help="CC (default: $CC='{}')".format(CC)) | |
321 | parser.add_argument( | |
322 | '--cxx', | |
323 | dest='cxx', | |
324 | type=str, | |
325 | default=CXX, | |
326 | help="CXX (default: $CXX='{}')".format(CXX)) | |
327 | parser.add_argument( | |
328 | '--cppflags', | |
329 | dest='cppflags', | |
330 | type=str, | |
331 | default=CPPFLAGS, | |
332 | help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS)) | |
333 | parser.add_argument( | |
334 | '--cflags', | |
335 | dest='cflags', | |
336 | type=str, | |
337 | default=CFLAGS, | |
338 | help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS)) | |
339 | parser.add_argument( | |
340 | '--cxxflags', | |
341 | dest='cxxflags', | |
342 | type=str, | |
343 | default=CXXFLAGS, | |
344 | help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS)) | |
345 | parser.add_argument( | |
346 | '--ldflags', | |
347 | dest='ldflags', | |
348 | type=str, | |
349 | default=LDFLAGS, | |
350 | help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS)) | |
351 | parser.add_argument( | |
352 | '--mflags', | |
353 | dest='mflags', | |
354 | type=str, | |
355 | default=MFLAGS, | |
356 | help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS)) | |
357 | parser.add_argument( | |
358 | 'TARGET', | |
359 | nargs='*', | |
360 | type=str, | |
361 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)) | |
362 | ) | |
363 | args = parser.parse_args(args) | |
364 | args = parse_env_flags(args, ' '.join( | |
365 | [args.cppflags, args.cflags, args.cxxflags, args.ldflags])) | |
366 | ||
9f95a23c | 367 | # Check option sanity |
11fdf7f2 TL |
368 | if args.msan and (args.asan or args.ubsan): |
369 | raise RuntimeError('MSAN may not be used with any other sanitizers') | |
370 | if args.msan_track_origins and not args.msan: | |
371 | raise RuntimeError('--enable-msan-track-origins requires MSAN') | |
372 | if args.ubsan_pointer_overflow and not args.ubsan: | |
9f95a23c | 373 | raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN') |
11fdf7f2 TL |
374 | if args.sanitize_recover and not args.sanitize: |
375 | raise RuntimeError('--enable-sanitize-recover but no sanitizers used') | |
376 | ||
377 | return args | |
378 | ||
379 | ||
380 | def build(args): | |
381 | try: | |
382 | args = build_parser(args) | |
383 | except Exception as e: | |
384 | print(e) | |
385 | return 1 | |
386 | # The compilation flags we are setting | |
387 | targets = args.TARGET | |
388 | cc = args.cc | |
389 | cxx = args.cxx | |
9f95a23c TL |
390 | cppflags = shlex.split(args.cppflags) |
391 | cflags = shlex.split(args.cflags) | |
392 | ldflags = shlex.split(args.ldflags) | |
393 | cxxflags = shlex.split(args.cxxflags) | |
394 | mflags = shlex.split(args.mflags) | |
11fdf7f2 TL |
395 | # Flags to be added to both cflags and cxxflags |
396 | common_flags = [] | |
397 | ||
398 | cppflags += [ | |
9f95a23c | 399 | '-DDEBUGLEVEL={}'.format(args.debug), |
11fdf7f2 TL |
400 | '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access), |
401 | '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size), | |
402 | ] | |
403 | ||
11fdf7f2 | 404 | # Set flags for options |
9f95a23c | 405 | assert not (args.fuzzer and args.coverage) |
11fdf7f2 TL |
406 | if args.coverage: |
407 | common_flags += [ | |
408 | '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp' | |
409 | ] | |
9f95a23c TL |
410 | if args.fuzzer: |
411 | common_flags += ['-fsanitize=fuzzer'] | |
412 | args.lib_fuzzing_engine = '' | |
413 | ||
414 | mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)] | |
11fdf7f2 TL |
415 | |
416 | if args.sanitize_recover: | |
417 | recover_flags = ['-fsanitize-recover=all'] | |
418 | else: | |
419 | recover_flags = ['-fno-sanitize-recover=all'] | |
420 | if args.sanitize: | |
421 | common_flags += recover_flags | |
422 | ||
423 | if args.msan: | |
424 | msan_flags = ['-fsanitize=memory'] | |
425 | if args.msan_track_origins: | |
426 | msan_flags += ['-fsanitize-memory-track-origins'] | |
427 | common_flags += msan_flags | |
428 | # Append extra MSAN flags (it might require special setup) | |
429 | cppflags += [args.msan_extra_cppflags] | |
430 | cflags += [args.msan_extra_cflags] | |
431 | cxxflags += [args.msan_extra_cxxflags] | |
432 | ldflags += [args.msan_extra_ldflags] | |
433 | ||
434 | if args.asan: | |
435 | common_flags += ['-fsanitize=address'] | |
436 | ||
437 | if args.ubsan: | |
438 | ubsan_flags = ['-fsanitize=undefined'] | |
439 | if not args.ubsan_pointer_overflow: | |
440 | ubsan_flags += overflow_ubsan_flags(cc, cxx) | |
441 | common_flags += ubsan_flags | |
442 | ||
443 | if args.stateful_fuzzing: | |
444 | cppflags += ['-DSTATEFUL_FUZZING'] | |
445 | ||
446 | if args.fuzzing_mode: | |
9f95a23c | 447 | cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION'] |
11fdf7f2 TL |
448 | |
449 | if args.lib_fuzzing_engine == 'libregression.a': | |
450 | targets = ['libregression.a'] + targets | |
451 | ||
452 | # Append the common flags | |
453 | cflags += common_flags | |
454 | cxxflags += common_flags | |
455 | ||
456 | # Prepare the flags for Make | |
457 | cc_str = "CC={}".format(cc) | |
458 | cxx_str = "CXX={}".format(cxx) | |
459 | cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags)) | |
460 | cflags_str = "CFLAGS={}".format(' '.join(cflags)) | |
461 | cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags)) | |
462 | ldflags_str = "LDFLAGS={}".format(' '.join(ldflags)) | |
463 | ||
464 | # Print the flags | |
465 | print('MFLAGS={}'.format(' '.join(mflags))) | |
466 | print(cc_str) | |
467 | print(cxx_str) | |
468 | print(cppflags_str) | |
469 | print(cflags_str) | |
470 | print(cxxflags_str) | |
471 | print(ldflags_str) | |
472 | ||
473 | # Clean and build | |
474 | clean_cmd = ['make', 'clean'] + mflags | |
475 | print(' '.join(clean_cmd)) | |
476 | subprocess.check_call(clean_cmd) | |
477 | build_cmd = [ | |
478 | 'make', | |
479 | cc_str, | |
480 | cxx_str, | |
481 | cppflags_str, | |
482 | cflags_str, | |
483 | cxxflags_str, | |
484 | ldflags_str, | |
485 | ] + mflags + targets | |
486 | print(' '.join(build_cmd)) | |
487 | subprocess.check_call(build_cmd) | |
488 | return 0 | |
489 | ||
490 | ||
491 | def libfuzzer_parser(args): | |
492 | description = """ | |
493 | Runs a libfuzzer binary. | |
494 | Passes all extra arguments to libfuzzer. | |
495 | The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to | |
496 | libFuzzer.a. | |
497 | Generates output in the CORPORA directory, puts crashes in the ARTIFACT | |
498 | directory, and takes extra input from the SEED directory. | |
499 | To merge AFL's output pass the SEED as AFL's output directory and pass | |
500 | '-merge=1'. | |
501 | """ | |
502 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) | |
503 | parser.add_argument( | |
504 | '--corpora', | |
505 | type=str, | |
506 | help='Override the default corpora dir (default: {})'.format( | |
507 | abs_join(CORPORA_DIR, 'TARGET'))) | |
508 | parser.add_argument( | |
509 | '--artifact', | |
510 | type=str, | |
511 | help='Override the default artifact dir (default: {})'.format( | |
512 | abs_join(CORPORA_DIR, 'TARGET-crash'))) | |
513 | parser.add_argument( | |
514 | '--seed', | |
515 | type=str, | |
516 | help='Override the default seed dir (default: {})'.format( | |
517 | abs_join(CORPORA_DIR, 'TARGET-seed'))) | |
518 | parser.add_argument( | |
519 | 'TARGET', | |
520 | type=str, | |
521 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) | |
522 | args, extra = parser.parse_known_args(args) | |
523 | args.extra = extra | |
524 | ||
525 | if args.TARGET and args.TARGET not in TARGETS: | |
526 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) | |
527 | ||
528 | return args | |
529 | ||
530 | ||
531 | def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None): | |
532 | if corpora is None: | |
533 | corpora = abs_join(CORPORA_DIR, target) | |
534 | if artifact is None: | |
535 | artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target)) | |
536 | if seed is None: | |
537 | seed = abs_join(CORPORA_DIR, '{}-seed'.format(target)) | |
538 | if extra_args is None: | |
539 | extra_args = [] | |
540 | ||
541 | target = abs_join(FUZZ_DIR, target) | |
542 | ||
543 | corpora = [create(corpora)] | |
544 | artifact = create(artifact) | |
545 | seed = check(seed) | |
546 | ||
547 | corpora += [artifact] | |
548 | if seed is not None: | |
549 | corpora += [seed] | |
550 | ||
551 | cmd = [target, '-artifact_prefix={}/'.format(artifact)] | |
552 | cmd += corpora + extra_args | |
553 | print(' '.join(cmd)) | |
554 | subprocess.check_call(cmd) | |
555 | ||
556 | ||
557 | def libfuzzer_cmd(args): | |
558 | try: | |
559 | args = libfuzzer_parser(args) | |
560 | except Exception as e: | |
561 | print(e) | |
562 | return 1 | |
563 | libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra) | |
564 | return 0 | |
565 | ||
566 | ||
567 | def afl_parser(args): | |
568 | description = """ | |
569 | Runs an afl-fuzz job. | |
570 | Passes all extra arguments to afl-fuzz. | |
571 | The fuzzer should have been built with CC/CXX set to the AFL compilers, | |
572 | and with LIB_FUZZING_ENGINE='libregression.a'. | |
573 | Takes input from CORPORA and writes output to OUTPUT. | |
574 | Uses AFL_FUZZ as the binary (set from flag or environment variable). | |
575 | """ | |
576 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) | |
577 | parser.add_argument( | |
578 | '--corpora', | |
579 | type=str, | |
580 | help='Override the default corpora dir (default: {})'.format( | |
581 | abs_join(CORPORA_DIR, 'TARGET'))) | |
582 | parser.add_argument( | |
583 | '--output', | |
584 | type=str, | |
585 | help='Override the default AFL output dir (default: {})'.format( | |
586 | abs_join(CORPORA_DIR, 'TARGET-afl'))) | |
587 | parser.add_argument( | |
588 | '--afl-fuzz', | |
589 | type=str, | |
590 | default=AFL_FUZZ, | |
591 | help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ)) | |
592 | parser.add_argument( | |
593 | 'TARGET', | |
594 | type=str, | |
595 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) | |
596 | args, extra = parser.parse_known_args(args) | |
597 | args.extra = extra | |
598 | ||
599 | if args.TARGET and args.TARGET not in TARGETS: | |
600 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) | |
601 | ||
602 | if not args.corpora: | |
603 | args.corpora = abs_join(CORPORA_DIR, args.TARGET) | |
604 | if not args.output: | |
605 | args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET)) | |
606 | ||
607 | return args | |
608 | ||
609 | ||
610 | def afl(args): | |
611 | try: | |
612 | args = afl_parser(args) | |
613 | except Exception as e: | |
614 | print(e) | |
615 | return 1 | |
616 | target = abs_join(FUZZ_DIR, args.TARGET) | |
617 | ||
618 | corpora = create(args.corpora) | |
619 | output = create(args.output) | |
620 | ||
621 | cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra | |
622 | cmd += [target, '@@'] | |
623 | print(' '.join(cmd)) | |
624 | subprocess.call(cmd) | |
625 | return 0 | |
626 | ||
627 | ||
628 | def regression(args): | |
629 | try: | |
630 | description = """ | |
631 | Runs one or more regression tests. | |
632 | The fuzzer should have been built with with | |
633 | LIB_FUZZING_ENGINE='libregression.a'. | |
634 | Takes input from CORPORA. | |
635 | """ | |
636 | args = targets_parser(args, description) | |
637 | except Exception as e: | |
638 | print(e) | |
639 | return 1 | |
640 | for target in args.TARGET: | |
641 | corpora = create(abs_join(CORPORA_DIR, target)) | |
642 | target = abs_join(FUZZ_DIR, target) | |
643 | cmd = [target, corpora] | |
644 | print(' '.join(cmd)) | |
645 | subprocess.check_call(cmd) | |
646 | return 0 | |
647 | ||
648 | ||
649 | def gen_parser(args): | |
650 | description = """ | |
9f95a23c | 651 | Generate a seed corpus appropriate for TARGET with data generated with |
11fdf7f2 TL |
652 | decodecorpus. |
653 | The fuzz inputs are prepended with a seed before the zstd data, so the | |
654 | output of decodecorpus shouldn't be used directly. | |
655 | Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and | |
656 | puts the output in SEED. | |
657 | DECODECORPUS is the decodecorpus binary, and must already be built. | |
658 | """ | |
659 | parser = argparse.ArgumentParser(prog=args.pop(0), description=description) | |
660 | parser.add_argument( | |
661 | '--number', | |
662 | '-n', | |
663 | type=int, | |
664 | default=100, | |
665 | help='Number of samples to generate') | |
666 | parser.add_argument( | |
667 | '--max-size-log', | |
668 | type=int, | |
f67539c2 | 669 | default=18, |
11fdf7f2 TL |
670 | help='Maximum sample size to generate') |
671 | parser.add_argument( | |
672 | '--seed', | |
673 | type=str, | |
674 | help='Override the default seed dir (default: {})'.format( | |
675 | abs_join(CORPORA_DIR, 'TARGET-seed'))) | |
676 | parser.add_argument( | |
677 | '--decodecorpus', | |
678 | type=str, | |
679 | default=DECODECORPUS, | |
680 | help="decodecorpus binary (default: $DECODECORPUS='{}')".format( | |
681 | DECODECORPUS)) | |
f67539c2 TL |
682 | parser.add_argument( |
683 | '--zstd', | |
684 | type=str, | |
685 | default=ZSTD, | |
686 | help="zstd binary (default: $ZSTD='{}')".format(ZSTD)) | |
11fdf7f2 TL |
687 | parser.add_argument( |
688 | '--fuzz-rng-seed-size', | |
689 | type=int, | |
690 | default=4, | |
691 | help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)" | |
692 | ) | |
693 | parser.add_argument( | |
694 | 'TARGET', | |
695 | type=str, | |
696 | help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS))) | |
697 | args, extra = parser.parse_known_args(args) | |
698 | args.extra = extra | |
699 | ||
700 | if args.TARGET and args.TARGET not in TARGETS: | |
701 | raise RuntimeError('{} is not a valid target'.format(args.TARGET)) | |
702 | ||
703 | if not args.seed: | |
704 | args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET)) | |
705 | ||
706 | if not os.path.isfile(args.decodecorpus): | |
707 | raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'". | |
708 | format(args.decodecorpus, abs_join(FUZZ_DIR, '..'))) | |
709 | ||
710 | return args | |
711 | ||
712 | ||
713 | def gen(args): | |
714 | try: | |
715 | args = gen_parser(args) | |
716 | except Exception as e: | |
717 | print(e) | |
718 | return 1 | |
719 | ||
720 | seed = create(args.seed) | |
f67539c2 TL |
721 | with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict: |
722 | info = TARGET_INFO[args.TARGET] | |
723 | ||
724 | if info.input_type == InputType.DICTIONARY_DATA: | |
725 | number = max(args.number, 1000) | |
726 | else: | |
727 | number = args.number | |
728 | cmd = [ | |
729 | args.decodecorpus, | |
730 | '-n{}'.format(args.number), | |
731 | '-p{}/'.format(compressed), | |
732 | '-o{}'.format(decompressed), | |
733 | ] | |
734 | ||
735 | if info.frame_type == FrameType.BLOCK: | |
736 | cmd += [ | |
737 | '--gen-blocks', | |
738 | '--max-block-size-log={}'.format(min(args.max_size_log, 17)) | |
11fdf7f2 | 739 | ] |
f67539c2 TL |
740 | else: |
741 | cmd += ['--max-content-size-log={}'.format(args.max_size_log)] | |
11fdf7f2 | 742 | |
f67539c2 TL |
743 | print(' '.join(cmd)) |
744 | subprocess.check_call(cmd) | |
745 | ||
746 | if info.input_type == InputType.RAW_DATA: | |
747 | print('using decompressed data in {}'.format(decompressed)) | |
748 | samples = decompressed | |
749 | elif info.input_type == InputType.COMPRESSED_DATA: | |
750 | print('using compressed data in {}'.format(compressed)) | |
751 | samples = compressed | |
752 | else: | |
753 | assert info.input_type == InputType.DICTIONARY_DATA | |
754 | print('making dictionary data from {}'.format(decompressed)) | |
755 | samples = dict | |
756 | min_dict_size_log = 9 | |
757 | max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log) | |
758 | for dict_size_log in range(min_dict_size_log, max_dict_size_log): | |
759 | dict_size = 1 << dict_size_log | |
760 | cmd = [ | |
761 | args.zstd, | |
762 | '--train', | |
763 | '-r', decompressed, | |
764 | '--maxdict={}'.format(dict_size), | |
765 | '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size)) | |
11fdf7f2 | 766 | ] |
f67539c2 TL |
767 | print(' '.join(cmd)) |
768 | subprocess.check_call(cmd) | |
769 | ||
770 | # Copy the samples over and prepend the RNG seeds | |
771 | for name in os.listdir(samples): | |
772 | samplename = abs_join(samples, name) | |
773 | outname = abs_join(seed, name) | |
774 | with open(samplename, 'rb') as sample: | |
775 | with open(outname, 'wb') as out: | |
776 | CHUNK_SIZE = 131072 | |
777 | chunk = sample.read(CHUNK_SIZE) | |
778 | while len(chunk) > 0: | |
779 | out.write(chunk) | |
11fdf7f2 | 780 | chunk = sample.read(CHUNK_SIZE) |
11fdf7f2 TL |
781 | return 0 |
782 | ||
783 | ||
784 | def minimize(args): | |
785 | try: | |
786 | description = """ | |
787 | Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in | |
788 | TARGET_seed_corpus. All extra args are passed to libfuzzer. | |
789 | """ | |
790 | args = targets_parser(args, description) | |
791 | except Exception as e: | |
792 | print(e) | |
793 | return 1 | |
794 | ||
795 | for target in args.TARGET: | |
796 | # Merge the corpus + anything else into the seed_corpus | |
797 | corpus = abs_join(CORPORA_DIR, target) | |
798 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) | |
799 | extra_args = [corpus, "-merge=1"] + args.extra | |
800 | libfuzzer(target, corpora=seed_corpus, extra_args=extra_args) | |
801 | seeds = set(os.listdir(seed_corpus)) | |
802 | # Copy all crashes directly into the seed_corpus if not already present | |
803 | crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target)) | |
804 | for crash in os.listdir(crashes): | |
805 | if crash not in seeds: | |
806 | shutil.copy(abs_join(crashes, crash), seed_corpus) | |
807 | seeds.add(crash) | |
808 | ||
809 | ||
810 | def zip_cmd(args): | |
811 | try: | |
812 | description = """ | |
813 | Zips up the seed corpus. | |
814 | """ | |
815 | args = targets_parser(args, description) | |
816 | except Exception as e: | |
817 | print(e) | |
818 | return 1 | |
819 | ||
820 | for target in args.TARGET: | |
821 | # Zip the seed_corpus | |
822 | seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target)) | |
11fdf7f2 | 823 | zip_file = "{}.zip".format(seed_corpus) |
9f95a23c TL |
824 | cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."] |
825 | print(' '.join(cmd)) | |
826 | subprocess.check_call(cmd, cwd=seed_corpus) | |
11fdf7f2 TL |
827 | |
828 | ||
829 | def list_cmd(args): | |
830 | print("\n".join(TARGETS)) | |
831 | ||
832 | ||
833 | def short_help(args): | |
834 | name = args[0] | |
835 | print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name)) | |
836 | ||
837 | ||
838 | def help(args): | |
839 | short_help(args) | |
840 | print("\tfuzzing helpers (select a command and pass -h for help)\n") | |
841 | print("Options:") | |
842 | print("\t-h, --help\tPrint this message") | |
843 | print("") | |
844 | print("Commands:") | |
845 | print("\tbuild\t\tBuild a fuzzer") | |
846 | print("\tlibfuzzer\tRun a libFuzzer fuzzer") | |
847 | print("\tafl\t\tRun an AFL fuzzer") | |
848 | print("\tregression\tRun a regression test") | |
849 | print("\tgen\t\tGenerate a seed corpus for a fuzzer") | |
850 | print("\tminimize\tMinimize the test corpora") | |
851 | print("\tzip\t\tZip the minimized corpora up") | |
852 | print("\tlist\t\tList the available targets") | |
853 | ||
854 | ||
855 | def main(): | |
856 | args = sys.argv | |
857 | if len(args) < 2: | |
858 | help(args) | |
859 | return 1 | |
860 | if args[1] == '-h' or args[1] == '--help' or args[1] == '-H': | |
861 | help(args) | |
862 | return 1 | |
863 | command = args.pop(1) | |
864 | args[0] = "{} {}".format(args[0], command) | |
865 | if command == "build": | |
866 | return build(args) | |
867 | if command == "libfuzzer": | |
868 | return libfuzzer_cmd(args) | |
869 | if command == "regression": | |
870 | return regression(args) | |
871 | if command == "afl": | |
872 | return afl(args) | |
873 | if command == "gen": | |
874 | return gen(args) | |
875 | if command == "minimize": | |
876 | return minimize(args) | |
877 | if command == "zip": | |
878 | return zip_cmd(args) | |
879 | if command == "list": | |
880 | return list_cmd(args) | |
881 | short_help(args) | |
882 | print("Error: No such command {} (pass -h for help)".format(command)) | |
883 | return 1 | |
884 | ||
885 | ||
886 | if __name__ == "__main__": | |
887 | sys.exit(main()) |