1 //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 //===----------------------------------------------------------------------===//
12 #ifndef LLVM_FUZZER_CORPUS
13 #define LLVM_FUZZER_CORPUS
15 #include "FuzzerDefs.h"
17 #include "FuzzerRandom.h"
18 #include "FuzzerSHA1.h"
19 #include "FuzzerTracePC.h"
22 #include <unordered_set>
27 Unit U
; // The actual input data.
28 uint8_t Sha1
[kSHA1NumBytes
]; // Checksum.
29 // Number of features that this input has and no smaller input has.
30 size_t NumFeatures
= 0;
31 size_t Tmp
= 0; // Used by ValidateFeatureSet.
33 size_t NumExecutedMutations
= 0;
34 size_t NumSuccessfullMutations
= 0;
35 bool MayDeleteFile
= false;
40 static const size_t kFeatureSetSize
= 1 << 16;
41 InputCorpus(const std::string
&OutputCorpus
) : OutputCorpus(OutputCorpus
) {
42 memset(InputSizesPerFeature
, 0, sizeof(InputSizesPerFeature
));
43 memset(SmallestElementPerFeature
, 0, sizeof(SmallestElementPerFeature
));
46 for (auto II
: Inputs
)
49 size_t size() const { return Inputs
.size(); }
50 size_t SizeInBytes() const {
52 for (auto II
: Inputs
)
56 size_t NumActiveUnits() const {
58 for (auto II
: Inputs
)
59 Res
+= !II
->U
.empty();
62 bool empty() const { return Inputs
.empty(); }
63 const Unit
&operator[] (size_t Idx
) const { return Inputs
[Idx
]->U
; }
64 void AddToCorpus(const Unit
&U
, size_t NumFeatures
, bool MayDeleteFile
= false) {
66 uint8_t Hash
[kSHA1NumBytes
];
68 Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs
.size(), NumFeatures
);
69 ComputeSHA1(U
.data(), U
.size(), Hash
);
70 Hashes
.insert(Sha1ToString(Hash
));
71 Inputs
.push_back(new InputInfo());
72 InputInfo
&II
= *Inputs
.back();
74 II
.NumFeatures
= NumFeatures
;
75 II
.MayDeleteFile
= MayDeleteFile
;
76 memcpy(II
.Sha1
, Hash
, kSHA1NumBytes
);
77 UpdateCorpusDistribution();
81 bool HasUnit(const Unit
&U
) { return Hashes
.count(Hash(U
)); }
82 bool HasUnit(const std::string
&H
) { return Hashes
.count(H
); }
83 InputInfo
&ChooseUnitToMutate(Random
&Rand
) {
84 InputInfo
&II
= *Inputs
[ChooseUnitIdxToMutate(Rand
)];
85 assert(!II
.U
.empty());
89 // Returns an index of random unit from the corpus to mutate.
90 // Hypothesis: units added to the corpus last are more likely to be
91 // interesting. This function gives more weight to the more recent units.
92 size_t ChooseUnitIdxToMutate(Random
&Rand
) {
93 size_t Idx
= static_cast<size_t>(CorpusDistribution(Rand
.Get_mt19937()));
94 assert(Idx
< Inputs
.size());
99 for (size_t i
= 0; i
< Inputs
.size(); i
++) {
100 const auto &II
= *Inputs
[i
];
101 Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i
,
102 Sha1ToString(II
.Sha1
).c_str(), II
.U
.size(),
103 II
.NumExecutedMutations
, II
.NumSuccessfullMutations
);
107 void PrintFeatureSet() {
108 for (size_t i
= 0; i
< kFeatureSetSize
; i
++) {
109 if(size_t Sz
= GetFeature(i
))
110 Printf("[%zd: id %zd sz%zd] ", i
, SmallestElementPerFeature
[i
], Sz
);
113 for (size_t i
= 0; i
< Inputs
.size(); i
++)
114 if (size_t N
= Inputs
[i
]->NumFeatures
)
115 Printf(" %zd=>%zd ", i
, N
);
119 void DeleteInput(size_t Idx
) {
120 InputInfo
&II
= *Inputs
[Idx
];
121 if (!OutputCorpus
.empty() && II
.MayDeleteFile
)
122 RemoveFile(DirPlusFile(OutputCorpus
, Sha1ToString(II
.Sha1
)));
125 Printf("EVICTED %zd\n", Idx
);
128 bool AddFeature(size_t Idx
, uint32_t NewSize
, bool Shrink
) {
130 Idx
= Idx
% kFeatureSetSize
;
131 uint32_t OldSize
= GetFeature(Idx
);
132 if (OldSize
== 0 || (Shrink
&& OldSize
> NewSize
)) {
134 size_t OldIdx
= SmallestElementPerFeature
[Idx
];
135 InputInfo
&II
= *Inputs
[OldIdx
];
136 assert(II
.NumFeatures
> 0);
138 if (II
.NumFeatures
== 0)
142 Printf("ADD FEATURE %zd sz %d\n", Idx
, NewSize
);
143 SmallestElementPerFeature
[Idx
] = Inputs
.size();
144 InputSizesPerFeature
[Idx
] = NewSize
;
145 CountingFeatures
= true;
151 size_t NumFeatures() const {
153 for (size_t i
= 0; i
< kFeatureSetSize
; i
++)
154 Res
+= GetFeature(i
) != 0;
158 void ResetFeatureSet() {
159 assert(Inputs
.empty());
160 memset(InputSizesPerFeature
, 0, sizeof(InputSizesPerFeature
));
161 memset(SmallestElementPerFeature
, 0, sizeof(SmallestElementPerFeature
));
166 static const bool FeatureDebug
= false;
168 size_t GetFeature(size_t Idx
) const { return InputSizesPerFeature
[Idx
]; }
170 void ValidateFeatureSet() {
171 if (!CountingFeatures
) return;
174 for (size_t Idx
= 0; Idx
< kFeatureSetSize
; Idx
++)
176 Inputs
[SmallestElementPerFeature
[Idx
]]->Tmp
++;
177 for (auto II
: Inputs
) {
178 if (II
->Tmp
!= II
->NumFeatures
)
179 Printf("ZZZ %zd %zd\n", II
->Tmp
, II
->NumFeatures
);
180 assert(II
->Tmp
== II
->NumFeatures
);
185 // Updates the probability distribution for the units in the corpus.
186 // Must be called whenever the corpus or unit weights are changed.
187 void UpdateCorpusDistribution() {
188 size_t N
= Inputs
.size();
189 Intervals
.resize(N
+ 1);
191 std::iota(Intervals
.begin(), Intervals
.end(), 0);
192 if (CountingFeatures
)
193 for (size_t i
= 0; i
< N
; i
++)
194 Weights
[i
] = Inputs
[i
]->NumFeatures
* (i
+ 1);
196 std::iota(Weights
.begin(), Weights
.end(), 1);
197 CorpusDistribution
= std::piecewise_constant_distribution
<double>(
198 Intervals
.begin(), Intervals
.end(), Weights
.begin());
200 std::piecewise_constant_distribution
<double> CorpusDistribution
;
202 std::vector
<double> Intervals
;
203 std::vector
<double> Weights
;
205 std::unordered_set
<std::string
> Hashes
;
206 std::vector
<InputInfo
*> Inputs
;
208 bool CountingFeatures
= false;
209 uint32_t InputSizesPerFeature
[kFeatureSetSize
];
210 uint32_t SmallestElementPerFeature
[kFeatureSetSize
];
212 std::string OutputCorpus
;
215 } // namespace fuzzer
217 #endif // LLVM_FUZZER_CORPUS