2 * Copyright 2017 WebAssembly Community Group participants
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 // Tries to reduce the input wasm into the smallest possible wasm
19 // that still generates the same result on a given command. This is
20 // useful to reduce bug testcases, for example, if a file crashes
21 // the optimizer, you can reduce it to find the smallest file that
22 // also crashes it (which generally will show the same bug, in a
23 // much more debuggable manner).
31 #include "support/command-line.h"
32 #include "support/file.h"
34 #include "wasm-builder.h"
35 #include "ir/literal-utils.h"
36 #include "wasm-validator.h"
40 // a timeout on every execution of the command
43 struct ProgramResult
{
48 ProgramResult(std::string command
) {
49 getFromExecution(command
);
52 // runs the command and notes the output
53 // TODO: also stderr, not just stdout?
54 void getFromExecution(std::string command
) {
55 // do this using just core stdio.h and stdlib.h, for portability
56 // sadly this requires two invokes
57 code
= system(("timeout " + std::to_string(timeout
) + "s " + command
+ " > /dev/null 2> /dev/null").c_str());
58 const int MAX_BUFFER
= 1024;
59 char buffer
[MAX_BUFFER
];
60 FILE *stream
= popen(("timeout " + std::to_string(timeout
) + "s " + command
+ " 2> /dev/null").c_str(), "r");
61 while (fgets(buffer
, MAX_BUFFER
, stream
) != NULL
) {
62 output
.append(buffer
);
67 bool operator==(ProgramResult
& other
) {
68 return code
== other
.code
&& output
== other
.output
;
70 bool operator!=(ProgramResult
& other
) {
71 return !(*this == other
);
78 void dump(std::ostream
& o
) {
79 o
<< "[ProgramResult] code: " << code
<< " stdout: \n" << output
<< "[/ProgramResult]\n";
85 inline std::ostream
& operator<<(std::ostream
& o
, ProgramResult
& result
) {
92 ProgramResult expected
;
94 struct Reducer
: public WalkerPass
<PostWalker
<Reducer
, UnifiedExpressionVisitor
<Reducer
>>> {
95 std::string command
, test
, working
;
96 bool verbose
, debugInfo
;
98 // test is the file we write to that the command will operate on
99 // working is the current temporary state, the reduction so far
100 Reducer(std::string command
, std::string test
, std::string working
, bool verbose
, bool debugInfo
) : command(command
), test(test
), working(working
), verbose(verbose
), debugInfo(debugInfo
) {}
102 // runs passes in order to reduce, until we can't reduce any more
103 // the criterion here is wasm binary size
104 void reduceUsingPasses() {
105 // run optimization passes until we can't shrink it any more
106 std::vector
<std::string
> passes
= {
112 "--coalesce-locals --vacuum",
114 "--duplicate-function-elimination",
116 "--inlining-optimizing",
117 "--optimize-level=3 --inlining-optimizing",
118 "--local-cse --vacuum",
120 "--remove-unused-names --merge-blocks --vacuum",
121 "--optimize-instructions",
125 "--remove-unused-names --remove-unused-brs",
126 "--remove-unused-module-elements",
127 "--reorder-functions",
129 "--simplify-locals --vacuum",
132 auto oldSize
= file_size(working
);
135 //std::cerr << "| starting passes loop iteration\n";
137 // try both combining with a generic shrink (so minor pass overhead is compensated for), and without
138 for (auto shrinking
: { false, true }) {
139 for (auto pass
: passes
) {
140 std::string currCommand
= "bin/wasm-opt ";
141 if (shrinking
) currCommand
+= " --dce --vacuum ";
142 currCommand
+= working
+ " -o " + test
+ " " + pass
;
143 if (debugInfo
) currCommand
+= " -g ";
144 if (verbose
) std::cerr
<< "| trying pass command: " << currCommand
<< "\n";
145 if (!ProgramResult(currCommand
).failed()) {
146 auto newSize
= file_size(test
);
147 if (newSize
< oldSize
) {
148 // the pass didn't fail, and the size looks smaller, so promising
149 // see if it is still has the property we are preserving
150 if (ProgramResult(command
) == expected
) {
151 std::cerr
<< "| command \"" << currCommand
<< "\" succeeded, reduced size to " << newSize
<< ", and preserved the property\n";
152 copy_file(test
, working
);
161 if (verbose
) std::cerr
<< "| done with passes for now\n";
164 // does one pass of slow and destructive reduction. returns whether it
166 // the criterion here is a logical change in the program. this may actually
167 // increase wasm size in some cases, but it should allow more reduction later.
168 // @param factor how much to ignore. starting with a high factor skips through
169 // most of the file, which is often faster than going one by one
171 size_t reduceDestructively(int factor_
) {
175 reader
.read(working
, wasm
);
178 builder
= make_unique
<Builder
>(wasm
);
180 // before we do any changes, it should be valid to write out the module:
181 // size should be as expected, and output should be as expected
183 if (!writeAndTestReduction()) {
184 std::cerr
<< "\n|! WARNING: writing before destructive reduction fails, very unlikely reduction can work\n\n";
191 // destructive reduction state
194 Expression
* beforeReduction
;
195 std::unique_ptr
<Builder
> builder
;
199 // write the module and see if the command still fails on it as expected
200 bool writeAndTestReduction() {
201 ProgramResult result
;
202 return writeAndTestReduction(result
);
205 bool writeAndTestReduction(ProgramResult
& out
) {
206 // write the module out
208 writer
.setBinary(true);
209 writer
.setDebugInfo(debugInfo
);
210 writer
.write(*getModule(), test
);
211 // note that it is ok for the destructively-reduced module to be bigger
212 // than the previous - each destructive reduction removes logical code,
213 // and so is strictly better, even if the wasm binary format happens to
214 // encode things slightly less efficiently.
216 out
.getFromExecution(command
);
217 return out
== expected
;
220 bool shouldTryToReduce(size_t bonus
= 1) {
221 static size_t counter
= 0;
223 return (counter
% factor
) <= bonus
;
226 // tests a reduction on the current traversal node, and undos if it failed
227 bool tryToReplaceCurrent(Expression
* with
) {
228 auto* curr
= getCurrent();
229 //std::cerr << "try " << curr << " => " << with << '\n';
230 if (curr
->type
!= with
->type
) return false;
231 if (!shouldTryToReduce()) return false;
232 replaceCurrent(with
);
233 if (!writeAndTestReduction()) {
234 replaceCurrent(curr
);
237 std::cerr
<< "| tryToReplaceCurrent succeeded (in " << getLocation() << ")\n";
242 void noteReduction() {
244 copy_file(test
, working
);
247 // tests a reduction on an arbitrary child
248 bool tryToReplaceChild(Expression
*& child
, Expression
* with
) {
249 if (child
->type
!= with
->type
) return false;
250 if (!shouldTryToReduce()) return false;
251 auto* before
= child
;
253 if (!writeAndTestReduction()) {
257 std::cerr
<< "| tryToReplaceChild succeeded (in " << getLocation() << ")\n";
258 //std::cerr << "| " << before << " => " << with << '\n';
263 std::string
getLocation() {
264 if (getFunction()) return getFunction()->name
.str
;
265 return "(non-function context)";
268 // visitors. in each we try to remove code in a destructive and nontrivial way.
269 // "nontrivial" means something that optimization passes can't achieve, since we
270 // don't need to duplicate work that they do
272 void visitExpression(Expression
* curr
) {
273 if (curr
->type
== none
) {
274 if (tryToReduceCurrentToNone()) return;
275 } else if (isConcreteWasmType(curr
->type
)) {
276 if (tryToReduceCurrentToConst()) return;
278 assert(curr
->type
== unreachable
);
279 if (tryToReduceCurrentToUnreachable()) return;
281 // specific reductions
282 if (auto* iff
= curr
->dynCast
<If
>()) {
283 if (iff
->type
== none
) {
284 // perhaps we need just the condition?
285 if (tryToReplaceCurrent(builder
->makeDrop(iff
->condition
))) {
289 handleCondition(iff
->condition
);
290 } else if (auto* br
= curr
->dynCast
<Break
>()) {
291 handleCondition(br
->condition
);
292 } else if (auto* select
= curr
->dynCast
<Select
>()) {
293 handleCondition(select
->condition
);
294 } else if (auto* sw
= curr
->dynCast
<Switch
>()) {
295 handleCondition(sw
->condition
);
296 } else if (auto* set
= curr
->dynCast
<SetLocal
>()) {
298 // maybe we don't need the set
299 tryToReplaceCurrent(set
->value
);
301 } else if (auto* unary
= curr
->dynCast
<Unary
>()) {
302 // maybe we can pass through
303 tryToReplaceCurrent(unary
->value
);
304 } else if (auto* binary
= curr
->dynCast
<Binary
>()) {
305 // maybe we can pass through
306 if (!tryToReplaceCurrent(binary
->left
)) {
307 tryToReplaceCurrent(binary
->right
);
309 } else if (auto* call
= curr
->dynCast
<Call
>()) {
311 } else if (auto* call
= curr
->dynCast
<CallImport
>()) {
313 } else if (auto* call
= curr
->dynCast
<CallIndirect
>()) {
314 if (tryToReplaceCurrent(call
->target
)) return;
319 void visitFunction(Function
* curr
) {
320 // extra chance to work on the function toplevel element, as if it can
321 // be reduced it's great
322 visitExpression(curr
->body
);
326 int percentage
= (100 * funcsSeen
) / getModule()->functions
.size();
327 if (std::abs(percentage
- last
) >= 5) {
328 std::cerr
<< "| " << percentage
<< "% of funcs complete\n";
333 // TODO: bisection on segment shrinking?
335 void visitTable(Table
* curr
) {
336 std::cerr
<< "| try to simplify table\n";
338 for (auto& segment
: curr
->segments
) {
339 for (auto item
: segment
.data
) {
343 if (!first
.isNull()) break;
345 visitSegmented(curr
, first
, 100);
348 void visitMemory(Memory
* curr
) {
349 std::cerr
<< "| try to simplify memory\n";
350 visitSegmented(curr
, 0, 2);
353 template<typename T
, typename U
>
354 void visitSegmented(T
* curr
, U zero
, size_t bonus
) {
355 // try to reduce to first function
356 // shrink segment elements
357 for (auto& segment
: curr
->segments
) {
358 auto& data
= segment
.data
;
359 size_t skip
= 1; // when we succeed, try to shrink by more and more, similar to bisection
360 for (size_t i
= 0; i
< data
.size() && !data
.empty(); i
++) {
361 if (!shouldTryToReduce(bonus
)) continue;
363 for (size_t j
= 0; j
< skip
; j
++) {
364 if (!data
.empty()) data
.pop_back();
366 if (writeAndTestReduction()) {
367 std::cerr
<< "| shrank segment (skip: " << skip
<< ")\n";
369 skip
= std::min(size_t(factor
), 2 * skip
);
376 // the "opposite" of shrinking: copy a 'zero' element
377 for (auto& segment
: curr
->segments
) {
378 if (segment
.data
.empty()) continue;
379 for (auto& item
: segment
.data
) {
380 if (!shouldTryToReduce(bonus
)) continue;
381 if (item
== zero
) continue;
384 if (writeAndTestReduction()) {
385 std::cerr
<< "| zeroed segment\n";
394 void visitModule(Module
* curr
) {
395 // try to remove exports
396 std::cerr
<< "| try to remove exports\n";
397 std::vector
<Export
> exports
;
398 for (auto& exp
: curr
->exports
) {
399 if (!shouldTryToReduce(10000)) continue;
400 exports
.push_back(*exp
);
402 for (auto& exp
: exports
) {
403 curr
->removeExport(exp
.name
);
404 if (!writeAndTestReduction()) {
405 curr
->addExport(new Export(exp
));
407 std::cerr
<< "| removed export " << exp
.name
<< '\n';
411 // try to remove functions
412 std::cerr
<< "| try to remove functions\n";
413 std::vector
<Function
> functions
;
414 for (auto& func
: curr
->functions
) {
415 if (!shouldTryToReduce(10000)) continue;
416 functions
.push_back(*func
);
418 for (auto& func
: functions
) {
419 curr
->removeFunction(func
.name
);
420 if (WasmValidator().validate(*curr
, Feature::MVP
, WasmValidator::Globally
| WasmValidator::Quiet
) &&
421 writeAndTestReduction()) {
422 std::cerr
<< "| removed function " << func
.name
<< '\n';
425 curr
->addFunction(new Function(func
));
432 // try to replace condition with always true and always false
433 void handleCondition(Expression
*& condition
) {
434 if (!condition
) return;
435 if (condition
->is
<Const
>()) return;
436 auto* c
= builder
->makeConst(Literal(int32_t(0)));
437 if (!tryToReplaceChild(condition
, c
)) {
438 c
->value
= Literal(int32_t(1));
439 tryToReplaceChild(condition
, c
);
444 void handleCall(T
* call
) {
445 for (auto* op
: call
->operands
) {
446 if (tryToReplaceCurrent(op
)) return;
450 bool tryToReduceCurrentToNone() {
451 auto* curr
= getCurrent();
452 if (curr
->is
<Nop
>()) return false;
453 // try to replace with a trivial value
455 if (tryToReplaceCurrent(&nop
)) {
456 replaceCurrent(builder
->makeNop());
462 // try to replace a concrete value with a trivial constant
463 bool tryToReduceCurrentToConst() {
464 auto* curr
= getCurrent();
465 if (curr
->is
<Const
>()) return false;
466 // try to replace with a trivial value
467 Const
* c
= builder
->makeConst(Literal(int32_t(0)));
468 if (tryToReplaceCurrent(c
)) return true;
469 c
->value
= LiteralUtils::makeLiteralFromInt32(1, curr
->type
);
470 c
->type
= curr
->type
;
471 return tryToReplaceCurrent(c
);
474 bool tryToReduceCurrentToUnreachable() {
475 auto* curr
= getCurrent();
476 if (curr
->is
<Unreachable
>()) return false;
477 // try to replace with a trivial value
479 if (tryToReplaceCurrent(&un
)) {
480 replaceCurrent(builder
->makeUnreachable());
483 // maybe a return? TODO
492 int main(int argc
, const char* argv
[]) {
493 std::string input
, test
, working
, command
;
494 bool verbose
= false,
497 Options
options("wasm-reduce", "Reduce a wasm file to a smaller one that has the same behavior on a given command");
499 .add("--command", "-cmd", "The command to run on the test, that we want to reduce while keeping the command's output identical. "
500 "We look at the command's return code and stdout here (TODO: stderr), "
501 "and we reduce while keeping those unchanged.",
502 Options::Arguments::One
,
503 [&](Options
* o
, const std::string
& argument
) {
506 .add("--test", "-t", "Test file (this will be written to to test, the given command should read it when we call it)",
507 Options::Arguments::One
,
508 [&](Options
* o
, const std::string
& argument
) {
511 .add("--working", "-w", "Working file (this will contain the current good state while doing temporary computations, "
512 "and will contain the final best result at the end)",
513 Options::Arguments::One
,
514 [&](Options
* o
, const std::string
& argument
) {
517 .add("--verbose", "-v", "Verbose output mode",
518 Options::Arguments::Zero
,
519 [&](Options
* o
, const std::string
& argument
) {
522 .add("--debugInfo", "-g", "Keep debug info in binaries",
523 Options::Arguments::Zero
,
524 [&](Options
* o
, const std::string
& argument
) {
527 .add("--force", "-f", "Force the reduction attempt, ignoring problems that imply it is unlikely to succeed",
528 Options::Arguments::Zero
,
529 [&](Options
* o
, const std::string
& argument
) {
532 .add("--timeout", "-to", "A timeout to apply to each execution of the command, in seconds (default: 2)",
533 Options::Arguments::One
,
534 [&](Options
* o
, const std::string
& argument
) {
535 timeout
= atoi(argument
.c_str());
536 std::cout
<< "|applying timeout: " << timeout
<< "\n";
538 .add_positional("INFILE", Options::Arguments::One
,
539 [&](Options
* o
, const std::string
& argument
) {
542 options
.parse(argc
, argv
);
544 if (test
.size() == 0) Fatal() << "test file not provided\n";
545 if (working
.size() == 0) Fatal() << "working file not provided\n";
547 std::cerr
<< "|input: " << input
<< '\n';
548 std::cerr
<< "|test: " << test
<< '\n';
549 std::cerr
<< "|working: " << working
<< '\n';
551 // get the expected output
552 copy_file(input
, test
);
553 expected
.getFromExecution(command
);
555 std::cerr
<< "|expected result:\n" << expected
<< '\n';
557 auto stopIfNotForced
= [&](std::string message
, ProgramResult
& result
) {
558 std::cerr
<< "|! " << message
<< '\n' << result
<< '\n';
560 Fatal() << "|! stopping, as it is very unlikely reduction can succeed (use -f to ignore this check)";
564 std::cerr
<< "|checking that command has different behavior on invalid binary\n";
567 std::ofstream
dst(test
, std::ios::binary
);
568 dst
<< "waka waka\n";
570 ProgramResult
result(command
);
571 if (result
== expected
) {
572 stopIfNotForced("running command on an invalid module should give different results", result
);
576 std::cerr
<< "|checking that command has expected behavior on canonicalized (read-written) binary\n";
579 ProgramResult
readWrite(std::string("bin/wasm-opt ") + input
+ " -o " + test
);
580 if (readWrite
.failed()) {
581 stopIfNotForced("failed to read and write the binary", readWrite
);
583 ProgramResult
result(command
);
584 if (result
!= expected
) {
585 stopIfNotForced("running command on the canonicalized module should give the same results", result
);
590 copy_file(input
, working
);
591 std::cerr
<< "|input size: " << file_size(working
) << "\n";
593 std::cerr
<< "|starting reduction!\n";
596 size_t lastDestructiveReductions
= 0;
597 size_t lastPostPassesSize
= 0;
599 bool stopping
= false;
602 Reducer
reducer(command
, test
, working
, verbose
, debugInfo
);
604 // run binaryen optimization passes to reduce. passes are fast to run
605 // and can often reduce large amounts of code efficiently, as opposed
606 // to detructive reduction (i.e., that doesn't preserve correctness as
607 // passes do) since destrucive must operate one change at a time
608 std::cerr
<< "| reduce using passes...\n";
609 auto oldSize
= file_size(working
);
610 reducer
.reduceUsingPasses();
611 auto newSize
= file_size(working
);
612 auto passProgress
= oldSize
- newSize
;
613 std::cerr
<< "| after pass reduction: " << newSize
<< "\n";
615 // always stop after a pass reduction attempt, for final cleanup
618 // check if the full cycle (destructive/passes) has helped or not
619 if (lastPostPassesSize
&& newSize
>= lastPostPassesSize
) {
620 std::cerr
<< "| progress has stopped, skipping to the end\n";
622 // this is after doing work with factor 1, so after the remaining work, stop
625 // just try to remove all we can and finish up
629 lastPostPassesSize
= newSize
;
631 // if destructive reductions lead to useful proportionate pass reductions, keep
632 // going at the same factor, as pass reductions are far faster
633 std::cerr
<< "| pass progress: " << passProgress
<< ", last destructive: " << lastDestructiveReductions
<< '\n';
634 if (passProgress
>= 4 * lastDestructiveReductions
) {
636 std::cerr
<< "| progress is good, do not quickly decrease factor\n";
639 factor
= (factor
/ 3) + 1;
641 factor
= (factor
+ 1) / 2; // stable on 1
645 // no point in a factor lorger than the size
646 assert(newSize
> 4); // wasm modules are >4 bytes anyhow
647 factor
= std::min(factor
, int(newSize
) / 4);
649 // try to reduce destructively. if a high factor fails to find anything,
650 // quickly try a lower one (no point in doing passes until we reduce
651 // destructively at least a little)
653 std::cerr
<< "| reduce destructively... (factor: " << factor
<< ")\n";
654 lastDestructiveReductions
= reducer
.reduceDestructively(factor
);
655 if (lastDestructiveReductions
> 0) break;
656 // we failed to reduce destructively
661 factor
= std::max(1, factor
/ 4); // quickly now, try to find *something* we can reduce
664 std::cerr
<< "| destructive reduction led to size: " << file_size(working
) << '\n';
666 std::cerr
<< "|finished, final size: " << file_size(working
) << "\n";
667 copy_file(working
, test
); // just to avoid confusion