]> git.proxmox.com Git - ceph.git/blame - ceph/src/zstd/tests/fuzz/fuzz.py
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / zstd / tests / fuzz / fuzz.py
CommitLineData
11fdf7f2
TL
1#!/usr/bin/env python
2
3# ################################################################
f67539c2 4# Copyright (c) 2016-2020, Facebook, Inc.
11fdf7f2
TL
5# All rights reserved.
6#
7# This source code is licensed under both the BSD-style license (found in the
8# LICENSE file in the root directory of this source tree) and the GPLv2 (found
9# in the COPYING file in the root directory of this source tree).
f67539c2 10# You may select, at your option, one of the above-listed licenses.
11fdf7f2
TL
11# ##########################################################################
12
13import argparse
14import contextlib
15import os
16import re
9f95a23c 17import shlex
11fdf7f2
TL
18import shutil
19import subprocess
20import sys
21import tempfile
22
23
24def abs_join(a, *p):
25 return os.path.abspath(os.path.join(a, *p))
26
27
f67539c2
TL
28class InputType(object):
29 RAW_DATA = 1
30 COMPRESSED_DATA = 2
31 DICTIONARY_DATA = 3
32
33
34class FrameType(object):
35 ZSTD = 1
36 BLOCK = 2
37
38
39class TargetInfo(object):
40 def __init__(self, input_type, frame_type=FrameType.ZSTD):
41 self.input_type = input_type
42 self.frame_type = frame_type
43
44
11fdf7f2
TL
45# Constants
46FUZZ_DIR = os.path.abspath(os.path.dirname(__file__))
47CORPORA_DIR = abs_join(FUZZ_DIR, 'corpora')
f67539c2
TL
48TARGET_INFO = {
49 'simple_round_trip': TargetInfo(InputType.RAW_DATA),
50 'stream_round_trip': TargetInfo(InputType.RAW_DATA),
51 'block_round_trip': TargetInfo(InputType.RAW_DATA, FrameType.BLOCK),
52 'simple_decompress': TargetInfo(InputType.COMPRESSED_DATA),
53 'stream_decompress': TargetInfo(InputType.COMPRESSED_DATA),
54 'block_decompress': TargetInfo(InputType.COMPRESSED_DATA, FrameType.BLOCK),
55 'dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
56 'dictionary_decompress': TargetInfo(InputType.COMPRESSED_DATA),
57 'zstd_frame_info': TargetInfo(InputType.COMPRESSED_DATA),
58 'simple_compress': TargetInfo(InputType.RAW_DATA),
59 'dictionary_loader': TargetInfo(InputType.DICTIONARY_DATA),
60 'raw_dictionary_round_trip': TargetInfo(InputType.RAW_DATA),
61 'dictionary_stream_round_trip': TargetInfo(InputType.RAW_DATA),
62}
63TARGETS = list(TARGET_INFO.keys())
11fdf7f2
TL
64ALL_TARGETS = TARGETS + ['all']
65FUZZ_RNG_SEED_SIZE = 4
66
67# Standard environment variables
68CC = os.environ.get('CC', 'cc')
69CXX = os.environ.get('CXX', 'c++')
70CPPFLAGS = os.environ.get('CPPFLAGS', '')
71CFLAGS = os.environ.get('CFLAGS', '-O3')
72CXXFLAGS = os.environ.get('CXXFLAGS', CFLAGS)
73LDFLAGS = os.environ.get('LDFLAGS', '')
74MFLAGS = os.environ.get('MFLAGS', '-j')
75
76# Fuzzing environment variables
77LIB_FUZZING_ENGINE = os.environ.get('LIB_FUZZING_ENGINE', 'libregression.a')
78AFL_FUZZ = os.environ.get('AFL_FUZZ', 'afl-fuzz')
79DECODECORPUS = os.environ.get('DECODECORPUS',
80 abs_join(FUZZ_DIR, '..', 'decodecorpus'))
f67539c2 81ZSTD = os.environ.get('ZSTD', abs_join(FUZZ_DIR, '..', '..', 'zstd'))
11fdf7f2
TL
82
83# Sanitizer environment variables
84MSAN_EXTRA_CPPFLAGS = os.environ.get('MSAN_EXTRA_CPPFLAGS', '')
85MSAN_EXTRA_CFLAGS = os.environ.get('MSAN_EXTRA_CFLAGS', '')
86MSAN_EXTRA_CXXFLAGS = os.environ.get('MSAN_EXTRA_CXXFLAGS', '')
87MSAN_EXTRA_LDFLAGS = os.environ.get('MSAN_EXTRA_LDFLAGS', '')
88
89
90def create(r):
91 d = os.path.abspath(r)
92 if not os.path.isdir(d):
f67539c2 93 os.makedirs(d)
11fdf7f2
TL
94 return d
95
96
97def check(r):
98 d = os.path.abspath(r)
99 if not os.path.isdir(d):
100 return None
101 return d
102
103
104@contextlib.contextmanager
105def tmpdir():
106 dirpath = tempfile.mkdtemp()
107 try:
108 yield dirpath
109 finally:
110 shutil.rmtree(dirpath, ignore_errors=True)
111
112
113def parse_targets(in_targets):
114 targets = set()
115 for target in in_targets:
116 if not target:
117 continue
118 if target == 'all':
119 targets = targets.union(TARGETS)
120 elif target in TARGETS:
121 targets.add(target)
122 else:
123 raise RuntimeError('{} is not a valid target'.format(target))
124 return list(targets)
125
126
127def targets_parser(args, description):
128 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
129 parser.add_argument(
130 'TARGET',
131 nargs='*',
132 type=str,
133 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS)))
134 args, extra = parser.parse_known_args(args)
135 args.extra = extra
136
137 args.TARGET = parse_targets(args.TARGET)
138
139 return args
140
141
142def parse_env_flags(args, flags):
143 """
144 Look for flags set by environment variables.
145 """
146 san_flags = ','.join(re.findall('-fsanitize=((?:[a-z]+,?)+)', flags))
147 nosan_flags = ','.join(re.findall('-fno-sanitize=((?:[a-z]+,?)+)', flags))
148
149 def set_sanitizer(sanitizer, default, san, nosan):
150 if sanitizer in san and sanitizer in nosan:
151 raise RuntimeError('-fno-sanitize={s} and -fsanitize={s} passed'.
152 format(s=sanitizer))
153 if sanitizer in san:
154 return True
155 if sanitizer in nosan:
156 return False
157 return default
158
159 san = set(san_flags.split(','))
160 nosan = set(nosan_flags.split(','))
161
162 args.asan = set_sanitizer('address', args.asan, san, nosan)
163 args.msan = set_sanitizer('memory', args.msan, san, nosan)
164 args.ubsan = set_sanitizer('undefined', args.ubsan, san, nosan)
165
166 args.sanitize = args.asan or args.msan or args.ubsan
167
168 return args
169
170
171def compiler_version(cc, cxx):
172 """
173 Determines the compiler and version.
174 Only works for clang and gcc.
175 """
176 cc_version_bytes = subprocess.check_output([cc, "--version"])
177 cxx_version_bytes = subprocess.check_output([cxx, "--version"])
9f95a23c
TL
178 compiler = None
179 version = None
180 if b'clang' in cc_version_bytes:
181 assert(b'clang' in cxx_version_bytes)
11fdf7f2 182 compiler = 'clang'
9f95a23c 183 elif b'gcc' in cc_version_bytes:
f67539c2 184 assert(b'gcc' in cxx_version_bytes or b'g++' in cxx_version_bytes)
11fdf7f2 185 compiler = 'gcc'
9f95a23c
TL
186 if compiler is not None:
187 version_regex = b'([0-9])+\.([0-9])+\.([0-9])+'
188 version_match = re.search(version_regex, cc_version_bytes)
189 version = tuple(int(version_match.group(i)) for i in range(1, 4))
11fdf7f2
TL
190 return compiler, version
191
192
193def overflow_ubsan_flags(cc, cxx):
194 compiler, version = compiler_version(cc, cxx)
195 if compiler == 'gcc':
196 return ['-fno-sanitize=signed-integer-overflow']
197 if compiler == 'clang' and version >= (5, 0, 0):
198 return ['-fno-sanitize=pointer-overflow']
199 return []
200
201
202def build_parser(args):
203 description = """
204 Cleans the repository and builds a fuzz target (or all).
205 Many flags default to environment variables (default says $X='y').
206 Options that aren't enabling features default to the correct values for
207 zstd.
208 Enable sanitizers with --enable-*san.
209 For regression testing just build.
210 For libFuzzer set LIB_FUZZING_ENGINE and pass --enable-coverage.
211 For AFL set CC and CXX to AFL's compilers and set
212 LIB_FUZZING_ENGINE='libregression.a'.
213 """
214 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
215 parser.add_argument(
216 '--lib-fuzzing-engine',
217 dest='lib_fuzzing_engine',
218 type=str,
219 default=LIB_FUZZING_ENGINE,
220 help=('The fuzzing engine to use e.g. /path/to/libFuzzer.a '
221 "(default: $LIB_FUZZING_ENGINE='{})".format(LIB_FUZZING_ENGINE)))
9f95a23c
TL
222
223 fuzz_group = parser.add_mutually_exclusive_group()
224 fuzz_group.add_argument(
11fdf7f2
TL
225 '--enable-coverage',
226 dest='coverage',
227 action='store_true',
228 help='Enable coverage instrumentation (-fsanitize-coverage)')
9f95a23c
TL
229 fuzz_group.add_argument(
230 '--enable-fuzzer',
231 dest='fuzzer',
232 action='store_true',
233 help=('Enable clang fuzzer (-fsanitize=fuzzer). When enabled '
234 'LIB_FUZZING_ENGINE is ignored')
235 )
236
11fdf7f2
TL
237 parser.add_argument(
238 '--enable-asan', dest='asan', action='store_true', help='Enable UBSAN')
239 parser.add_argument(
240 '--enable-ubsan',
241 dest='ubsan',
242 action='store_true',
243 help='Enable UBSAN')
244 parser.add_argument(
245 '--enable-ubsan-pointer-overflow',
246 dest='ubsan_pointer_overflow',
247 action='store_true',
248 help='Enable UBSAN pointer overflow check (known failure)')
249 parser.add_argument(
250 '--enable-msan', dest='msan', action='store_true', help='Enable MSAN')
251 parser.add_argument(
252 '--enable-msan-track-origins', dest='msan_track_origins',
253 action='store_true', help='Enable MSAN origin tracking')
254 parser.add_argument(
255 '--msan-extra-cppflags',
256 dest='msan_extra_cppflags',
257 type=str,
258 default=MSAN_EXTRA_CPPFLAGS,
259 help="Extra CPPFLAGS for MSAN (default: $MSAN_EXTRA_CPPFLAGS='{}')".
260 format(MSAN_EXTRA_CPPFLAGS))
261 parser.add_argument(
262 '--msan-extra-cflags',
263 dest='msan_extra_cflags',
264 type=str,
265 default=MSAN_EXTRA_CFLAGS,
266 help="Extra CFLAGS for MSAN (default: $MSAN_EXTRA_CFLAGS='{}')".format(
267 MSAN_EXTRA_CFLAGS))
268 parser.add_argument(
269 '--msan-extra-cxxflags',
270 dest='msan_extra_cxxflags',
271 type=str,
272 default=MSAN_EXTRA_CXXFLAGS,
273 help="Extra CXXFLAGS for MSAN (default: $MSAN_EXTRA_CXXFLAGS='{}')".
274 format(MSAN_EXTRA_CXXFLAGS))
275 parser.add_argument(
276 '--msan-extra-ldflags',
277 dest='msan_extra_ldflags',
278 type=str,
279 default=MSAN_EXTRA_LDFLAGS,
280 help="Extra LDFLAGS for MSAN (default: $MSAN_EXTRA_LDFLAGS='{}')".
281 format(MSAN_EXTRA_LDFLAGS))
282 parser.add_argument(
283 '--enable-sanitize-recover',
284 dest='sanitize_recover',
285 action='store_true',
286 help='Non-fatal sanitizer errors where possible')
287 parser.add_argument(
288 '--debug',
289 dest='debug',
290 type=int,
291 default=1,
9f95a23c 292 help='Set DEBUGLEVEL (default: 1)')
11fdf7f2
TL
293 parser.add_argument(
294 '--force-memory-access',
295 dest='memory_access',
296 type=int,
297 default=0,
298 help='Set MEM_FORCE_MEMORY_ACCESS (default: 0)')
299 parser.add_argument(
300 '--fuzz-rng-seed-size',
301 dest='fuzz_rng_seed_size',
302 type=int,
303 default=4,
304 help='Set FUZZ_RNG_SEED_SIZE (default: 4)')
305 parser.add_argument(
306 '--disable-fuzzing-mode',
307 dest='fuzzing_mode',
308 action='store_false',
9f95a23c 309 help='Do not define FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION')
11fdf7f2
TL
310 parser.add_argument(
311 '--enable-stateful-fuzzing',
312 dest='stateful_fuzzing',
313 action='store_true',
314 help='Reuse contexts between runs (makes reproduction impossible)')
315 parser.add_argument(
316 '--cc',
317 dest='cc',
318 type=str,
319 default=CC,
320 help="CC (default: $CC='{}')".format(CC))
321 parser.add_argument(
322 '--cxx',
323 dest='cxx',
324 type=str,
325 default=CXX,
326 help="CXX (default: $CXX='{}')".format(CXX))
327 parser.add_argument(
328 '--cppflags',
329 dest='cppflags',
330 type=str,
331 default=CPPFLAGS,
332 help="CPPFLAGS (default: $CPPFLAGS='{}')".format(CPPFLAGS))
333 parser.add_argument(
334 '--cflags',
335 dest='cflags',
336 type=str,
337 default=CFLAGS,
338 help="CFLAGS (default: $CFLAGS='{}')".format(CFLAGS))
339 parser.add_argument(
340 '--cxxflags',
341 dest='cxxflags',
342 type=str,
343 default=CXXFLAGS,
344 help="CXXFLAGS (default: $CXXFLAGS='{}')".format(CXXFLAGS))
345 parser.add_argument(
346 '--ldflags',
347 dest='ldflags',
348 type=str,
349 default=LDFLAGS,
350 help="LDFLAGS (default: $LDFLAGS='{}')".format(LDFLAGS))
351 parser.add_argument(
352 '--mflags',
353 dest='mflags',
354 type=str,
355 default=MFLAGS,
356 help="Extra Make flags (default: $MFLAGS='{}')".format(MFLAGS))
357 parser.add_argument(
358 'TARGET',
359 nargs='*',
360 type=str,
361 help='Fuzz target(s) to build {{{}}}'.format(', '.join(ALL_TARGETS))
362 )
363 args = parser.parse_args(args)
364 args = parse_env_flags(args, ' '.join(
365 [args.cppflags, args.cflags, args.cxxflags, args.ldflags]))
366
9f95a23c 367 # Check option sanity
11fdf7f2
TL
368 if args.msan and (args.asan or args.ubsan):
369 raise RuntimeError('MSAN may not be used with any other sanitizers')
370 if args.msan_track_origins and not args.msan:
371 raise RuntimeError('--enable-msan-track-origins requires MSAN')
372 if args.ubsan_pointer_overflow and not args.ubsan:
9f95a23c 373 raise RuntimeError('--enable-ubsan-pointer-overflow requires UBSAN')
11fdf7f2
TL
374 if args.sanitize_recover and not args.sanitize:
375 raise RuntimeError('--enable-sanitize-recover but no sanitizers used')
376
377 return args
378
379
380def build(args):
381 try:
382 args = build_parser(args)
383 except Exception as e:
384 print(e)
385 return 1
386 # The compilation flags we are setting
387 targets = args.TARGET
388 cc = args.cc
389 cxx = args.cxx
9f95a23c
TL
390 cppflags = shlex.split(args.cppflags)
391 cflags = shlex.split(args.cflags)
392 ldflags = shlex.split(args.ldflags)
393 cxxflags = shlex.split(args.cxxflags)
394 mflags = shlex.split(args.mflags)
11fdf7f2
TL
395 # Flags to be added to both cflags and cxxflags
396 common_flags = []
397
398 cppflags += [
9f95a23c 399 '-DDEBUGLEVEL={}'.format(args.debug),
11fdf7f2
TL
400 '-DMEM_FORCE_MEMORY_ACCESS={}'.format(args.memory_access),
401 '-DFUZZ_RNG_SEED_SIZE={}'.format(args.fuzz_rng_seed_size),
402 ]
403
11fdf7f2 404 # Set flags for options
9f95a23c 405 assert not (args.fuzzer and args.coverage)
11fdf7f2
TL
406 if args.coverage:
407 common_flags += [
408 '-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp'
409 ]
9f95a23c
TL
410 if args.fuzzer:
411 common_flags += ['-fsanitize=fuzzer']
412 args.lib_fuzzing_engine = ''
413
414 mflags += ['LIB_FUZZING_ENGINE={}'.format(args.lib_fuzzing_engine)]
11fdf7f2
TL
415
416 if args.sanitize_recover:
417 recover_flags = ['-fsanitize-recover=all']
418 else:
419 recover_flags = ['-fno-sanitize-recover=all']
420 if args.sanitize:
421 common_flags += recover_flags
422
423 if args.msan:
424 msan_flags = ['-fsanitize=memory']
425 if args.msan_track_origins:
426 msan_flags += ['-fsanitize-memory-track-origins']
427 common_flags += msan_flags
428 # Append extra MSAN flags (it might require special setup)
429 cppflags += [args.msan_extra_cppflags]
430 cflags += [args.msan_extra_cflags]
431 cxxflags += [args.msan_extra_cxxflags]
432 ldflags += [args.msan_extra_ldflags]
433
434 if args.asan:
435 common_flags += ['-fsanitize=address']
436
437 if args.ubsan:
438 ubsan_flags = ['-fsanitize=undefined']
439 if not args.ubsan_pointer_overflow:
440 ubsan_flags += overflow_ubsan_flags(cc, cxx)
441 common_flags += ubsan_flags
442
443 if args.stateful_fuzzing:
444 cppflags += ['-DSTATEFUL_FUZZING']
445
446 if args.fuzzing_mode:
9f95a23c 447 cppflags += ['-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION']
11fdf7f2
TL
448
449 if args.lib_fuzzing_engine == 'libregression.a':
450 targets = ['libregression.a'] + targets
451
452 # Append the common flags
453 cflags += common_flags
454 cxxflags += common_flags
455
456 # Prepare the flags for Make
457 cc_str = "CC={}".format(cc)
458 cxx_str = "CXX={}".format(cxx)
459 cppflags_str = "CPPFLAGS={}".format(' '.join(cppflags))
460 cflags_str = "CFLAGS={}".format(' '.join(cflags))
461 cxxflags_str = "CXXFLAGS={}".format(' '.join(cxxflags))
462 ldflags_str = "LDFLAGS={}".format(' '.join(ldflags))
463
464 # Print the flags
465 print('MFLAGS={}'.format(' '.join(mflags)))
466 print(cc_str)
467 print(cxx_str)
468 print(cppflags_str)
469 print(cflags_str)
470 print(cxxflags_str)
471 print(ldflags_str)
472
473 # Clean and build
474 clean_cmd = ['make', 'clean'] + mflags
475 print(' '.join(clean_cmd))
476 subprocess.check_call(clean_cmd)
477 build_cmd = [
478 'make',
479 cc_str,
480 cxx_str,
481 cppflags_str,
482 cflags_str,
483 cxxflags_str,
484 ldflags_str,
485 ] + mflags + targets
486 print(' '.join(build_cmd))
487 subprocess.check_call(build_cmd)
488 return 0
489
490
491def libfuzzer_parser(args):
492 description = """
493 Runs a libfuzzer binary.
494 Passes all extra arguments to libfuzzer.
495 The fuzzer should have been build with LIB_FUZZING_ENGINE pointing to
496 libFuzzer.a.
497 Generates output in the CORPORA directory, puts crashes in the ARTIFACT
498 directory, and takes extra input from the SEED directory.
499 To merge AFL's output pass the SEED as AFL's output directory and pass
500 '-merge=1'.
501 """
502 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
503 parser.add_argument(
504 '--corpora',
505 type=str,
506 help='Override the default corpora dir (default: {})'.format(
507 abs_join(CORPORA_DIR, 'TARGET')))
508 parser.add_argument(
509 '--artifact',
510 type=str,
511 help='Override the default artifact dir (default: {})'.format(
512 abs_join(CORPORA_DIR, 'TARGET-crash')))
513 parser.add_argument(
514 '--seed',
515 type=str,
516 help='Override the default seed dir (default: {})'.format(
517 abs_join(CORPORA_DIR, 'TARGET-seed')))
518 parser.add_argument(
519 'TARGET',
520 type=str,
521 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
522 args, extra = parser.parse_known_args(args)
523 args.extra = extra
524
525 if args.TARGET and args.TARGET not in TARGETS:
526 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
527
528 return args
529
530
531def libfuzzer(target, corpora=None, artifact=None, seed=None, extra_args=None):
532 if corpora is None:
533 corpora = abs_join(CORPORA_DIR, target)
534 if artifact is None:
535 artifact = abs_join(CORPORA_DIR, '{}-crash'.format(target))
536 if seed is None:
537 seed = abs_join(CORPORA_DIR, '{}-seed'.format(target))
538 if extra_args is None:
539 extra_args = []
540
541 target = abs_join(FUZZ_DIR, target)
542
543 corpora = [create(corpora)]
544 artifact = create(artifact)
545 seed = check(seed)
546
547 corpora += [artifact]
548 if seed is not None:
549 corpora += [seed]
550
551 cmd = [target, '-artifact_prefix={}/'.format(artifact)]
552 cmd += corpora + extra_args
553 print(' '.join(cmd))
554 subprocess.check_call(cmd)
555
556
557def libfuzzer_cmd(args):
558 try:
559 args = libfuzzer_parser(args)
560 except Exception as e:
561 print(e)
562 return 1
563 libfuzzer(args.TARGET, args.corpora, args.artifact, args.seed, args.extra)
564 return 0
565
566
567def afl_parser(args):
568 description = """
569 Runs an afl-fuzz job.
570 Passes all extra arguments to afl-fuzz.
571 The fuzzer should have been built with CC/CXX set to the AFL compilers,
572 and with LIB_FUZZING_ENGINE='libregression.a'.
573 Takes input from CORPORA and writes output to OUTPUT.
574 Uses AFL_FUZZ as the binary (set from flag or environment variable).
575 """
576 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
577 parser.add_argument(
578 '--corpora',
579 type=str,
580 help='Override the default corpora dir (default: {})'.format(
581 abs_join(CORPORA_DIR, 'TARGET')))
582 parser.add_argument(
583 '--output',
584 type=str,
585 help='Override the default AFL output dir (default: {})'.format(
586 abs_join(CORPORA_DIR, 'TARGET-afl')))
587 parser.add_argument(
588 '--afl-fuzz',
589 type=str,
590 default=AFL_FUZZ,
591 help='AFL_FUZZ (default: $AFL_FUZZ={})'.format(AFL_FUZZ))
592 parser.add_argument(
593 'TARGET',
594 type=str,
595 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
596 args, extra = parser.parse_known_args(args)
597 args.extra = extra
598
599 if args.TARGET and args.TARGET not in TARGETS:
600 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
601
602 if not args.corpora:
603 args.corpora = abs_join(CORPORA_DIR, args.TARGET)
604 if not args.output:
605 args.output = abs_join(CORPORA_DIR, '{}-afl'.format(args.TARGET))
606
607 return args
608
609
610def afl(args):
611 try:
612 args = afl_parser(args)
613 except Exception as e:
614 print(e)
615 return 1
616 target = abs_join(FUZZ_DIR, args.TARGET)
617
618 corpora = create(args.corpora)
619 output = create(args.output)
620
621 cmd = [args.afl_fuzz, '-i', corpora, '-o', output] + args.extra
622 cmd += [target, '@@']
623 print(' '.join(cmd))
624 subprocess.call(cmd)
625 return 0
626
627
628def regression(args):
629 try:
630 description = """
631 Runs one or more regression tests.
632 The fuzzer should have been built with with
633 LIB_FUZZING_ENGINE='libregression.a'.
634 Takes input from CORPORA.
635 """
636 args = targets_parser(args, description)
637 except Exception as e:
638 print(e)
639 return 1
640 for target in args.TARGET:
641 corpora = create(abs_join(CORPORA_DIR, target))
642 target = abs_join(FUZZ_DIR, target)
643 cmd = [target, corpora]
644 print(' '.join(cmd))
645 subprocess.check_call(cmd)
646 return 0
647
648
649def gen_parser(args):
650 description = """
9f95a23c 651 Generate a seed corpus appropriate for TARGET with data generated with
11fdf7f2
TL
652 decodecorpus.
653 The fuzz inputs are prepended with a seed before the zstd data, so the
654 output of decodecorpus shouldn't be used directly.
655 Generates NUMBER samples prepended with FUZZ_RNG_SEED_SIZE random bytes and
656 puts the output in SEED.
657 DECODECORPUS is the decodecorpus binary, and must already be built.
658 """
659 parser = argparse.ArgumentParser(prog=args.pop(0), description=description)
660 parser.add_argument(
661 '--number',
662 '-n',
663 type=int,
664 default=100,
665 help='Number of samples to generate')
666 parser.add_argument(
667 '--max-size-log',
668 type=int,
f67539c2 669 default=18,
11fdf7f2
TL
670 help='Maximum sample size to generate')
671 parser.add_argument(
672 '--seed',
673 type=str,
674 help='Override the default seed dir (default: {})'.format(
675 abs_join(CORPORA_DIR, 'TARGET-seed')))
676 parser.add_argument(
677 '--decodecorpus',
678 type=str,
679 default=DECODECORPUS,
680 help="decodecorpus binary (default: $DECODECORPUS='{}')".format(
681 DECODECORPUS))
f67539c2
TL
682 parser.add_argument(
683 '--zstd',
684 type=str,
685 default=ZSTD,
686 help="zstd binary (default: $ZSTD='{}')".format(ZSTD))
11fdf7f2
TL
687 parser.add_argument(
688 '--fuzz-rng-seed-size',
689 type=int,
690 default=4,
691 help="FUZZ_RNG_SEED_SIZE used for generate the samples (must match)"
692 )
693 parser.add_argument(
694 'TARGET',
695 type=str,
696 help='Fuzz target(s) to build {{{}}}'.format(', '.join(TARGETS)))
697 args, extra = parser.parse_known_args(args)
698 args.extra = extra
699
700 if args.TARGET and args.TARGET not in TARGETS:
701 raise RuntimeError('{} is not a valid target'.format(args.TARGET))
702
703 if not args.seed:
704 args.seed = abs_join(CORPORA_DIR, '{}-seed'.format(args.TARGET))
705
706 if not os.path.isfile(args.decodecorpus):
707 raise RuntimeError("{} is not a file run 'make -C {} decodecorpus'".
708 format(args.decodecorpus, abs_join(FUZZ_DIR, '..')))
709
710 return args
711
712
713def gen(args):
714 try:
715 args = gen_parser(args)
716 except Exception as e:
717 print(e)
718 return 1
719
720 seed = create(args.seed)
f67539c2
TL
721 with tmpdir() as compressed, tmpdir() as decompressed, tmpdir() as dict:
722 info = TARGET_INFO[args.TARGET]
723
724 if info.input_type == InputType.DICTIONARY_DATA:
725 number = max(args.number, 1000)
726 else:
727 number = args.number
728 cmd = [
729 args.decodecorpus,
730 '-n{}'.format(args.number),
731 '-p{}/'.format(compressed),
732 '-o{}'.format(decompressed),
733 ]
734
735 if info.frame_type == FrameType.BLOCK:
736 cmd += [
737 '--gen-blocks',
738 '--max-block-size-log={}'.format(min(args.max_size_log, 17))
11fdf7f2 739 ]
f67539c2
TL
740 else:
741 cmd += ['--max-content-size-log={}'.format(args.max_size_log)]
11fdf7f2 742
f67539c2
TL
743 print(' '.join(cmd))
744 subprocess.check_call(cmd)
745
746 if info.input_type == InputType.RAW_DATA:
747 print('using decompressed data in {}'.format(decompressed))
748 samples = decompressed
749 elif info.input_type == InputType.COMPRESSED_DATA:
750 print('using compressed data in {}'.format(compressed))
751 samples = compressed
752 else:
753 assert info.input_type == InputType.DICTIONARY_DATA
754 print('making dictionary data from {}'.format(decompressed))
755 samples = dict
756 min_dict_size_log = 9
757 max_dict_size_log = max(min_dict_size_log + 1, args.max_size_log)
758 for dict_size_log in range(min_dict_size_log, max_dict_size_log):
759 dict_size = 1 << dict_size_log
760 cmd = [
761 args.zstd,
762 '--train',
763 '-r', decompressed,
764 '--maxdict={}'.format(dict_size),
765 '-o', abs_join(dict, '{}.zstd-dict'.format(dict_size))
11fdf7f2 766 ]
f67539c2
TL
767 print(' '.join(cmd))
768 subprocess.check_call(cmd)
769
770 # Copy the samples over and prepend the RNG seeds
771 for name in os.listdir(samples):
772 samplename = abs_join(samples, name)
773 outname = abs_join(seed, name)
774 with open(samplename, 'rb') as sample:
775 with open(outname, 'wb') as out:
776 CHUNK_SIZE = 131072
777 chunk = sample.read(CHUNK_SIZE)
778 while len(chunk) > 0:
779 out.write(chunk)
11fdf7f2 780 chunk = sample.read(CHUNK_SIZE)
11fdf7f2
TL
781 return 0
782
783
784def minimize(args):
785 try:
786 description = """
787 Runs a libfuzzer fuzzer with -merge=1 to build a minimal corpus in
788 TARGET_seed_corpus. All extra args are passed to libfuzzer.
789 """
790 args = targets_parser(args, description)
791 except Exception as e:
792 print(e)
793 return 1
794
795 for target in args.TARGET:
796 # Merge the corpus + anything else into the seed_corpus
797 corpus = abs_join(CORPORA_DIR, target)
798 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
799 extra_args = [corpus, "-merge=1"] + args.extra
800 libfuzzer(target, corpora=seed_corpus, extra_args=extra_args)
801 seeds = set(os.listdir(seed_corpus))
802 # Copy all crashes directly into the seed_corpus if not already present
803 crashes = abs_join(CORPORA_DIR, '{}-crash'.format(target))
804 for crash in os.listdir(crashes):
805 if crash not in seeds:
806 shutil.copy(abs_join(crashes, crash), seed_corpus)
807 seeds.add(crash)
808
809
810def zip_cmd(args):
811 try:
812 description = """
813 Zips up the seed corpus.
814 """
815 args = targets_parser(args, description)
816 except Exception as e:
817 print(e)
818 return 1
819
820 for target in args.TARGET:
821 # Zip the seed_corpus
822 seed_corpus = abs_join(CORPORA_DIR, "{}_seed_corpus".format(target))
11fdf7f2 823 zip_file = "{}.zip".format(seed_corpus)
9f95a23c
TL
824 cmd = ["zip", "-r", "-q", "-j", "-9", zip_file, "."]
825 print(' '.join(cmd))
826 subprocess.check_call(cmd, cwd=seed_corpus)
11fdf7f2
TL
827
828
829def list_cmd(args):
830 print("\n".join(TARGETS))
831
832
833def short_help(args):
834 name = args[0]
835 print("Usage: {} [OPTIONS] COMMAND [ARGS]...\n".format(name))
836
837
838def help(args):
839 short_help(args)
840 print("\tfuzzing helpers (select a command and pass -h for help)\n")
841 print("Options:")
842 print("\t-h, --help\tPrint this message")
843 print("")
844 print("Commands:")
845 print("\tbuild\t\tBuild a fuzzer")
846 print("\tlibfuzzer\tRun a libFuzzer fuzzer")
847 print("\tafl\t\tRun an AFL fuzzer")
848 print("\tregression\tRun a regression test")
849 print("\tgen\t\tGenerate a seed corpus for a fuzzer")
850 print("\tminimize\tMinimize the test corpora")
851 print("\tzip\t\tZip the minimized corpora up")
852 print("\tlist\t\tList the available targets")
853
854
855def main():
856 args = sys.argv
857 if len(args) < 2:
858 help(args)
859 return 1
860 if args[1] == '-h' or args[1] == '--help' or args[1] == '-H':
861 help(args)
862 return 1
863 command = args.pop(1)
864 args[0] = "{} {}".format(args[0], command)
865 if command == "build":
866 return build(args)
867 if command == "libfuzzer":
868 return libfuzzer_cmd(args)
869 if command == "regression":
870 return regression(args)
871 if command == "afl":
872 return afl(args)
873 if command == "gen":
874 return gen(args)
875 if command == "minimize":
876 return minimize(args)
877 if command == "zip":
878 return zip_cmd(args)
879 if command == "list":
880 return list_cmd(args)
881 short_help(args)
882 print("Error: No such command {} (pass -h for help)".format(command))
883 return 1
884
885
886if __name__ == "__main__":
887 sys.exit(main())