]> git.proxmox.com Git - rustc.git/blob - src/binaryen/src/tools/wasm-reduce.cpp
New upstream version 1.23.0+dfsg1
[rustc.git] / src / binaryen / src / tools / wasm-reduce.cpp
1 /*
2 * Copyright 2017 WebAssembly Community Group participants
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //
18 // Tries to reduce the input wasm into the smallest possible wasm
19 // that still generates the same result on a given command. This is
20 // useful to reduce bug testcases, for example, if a file crashes
21 // the optimizer, you can reduce it to find the smallest file that
22 // also crashes it (which generally will show the same bug, in a
23 // much more debuggable manner).
24 //
25
26 #include <memory>
27 #include <cstdio>
28 #include <cstdlib>
29
30 #include "pass.h"
31 #include "support/command-line.h"
32 #include "support/file.h"
33 #include "wasm-io.h"
34 #include "wasm-builder.h"
35 #include "ir/literal-utils.h"
36 #include "wasm-validator.h"
37
38 using namespace wasm;
39
40 // a timeout on every execution of the command
41 size_t timeout = 2;
42
43 struct ProgramResult {
44 int code;
45 std::string output;
46
47 ProgramResult() {}
48 ProgramResult(std::string command) {
49 getFromExecution(command);
50 }
51
52 // runs the command and notes the output
53 // TODO: also stderr, not just stdout?
54 void getFromExecution(std::string command) {
55 // do this using just core stdio.h and stdlib.h, for portability
56 // sadly this requires two invokes
57 code = system(("timeout " + std::to_string(timeout) + "s " + command + " > /dev/null 2> /dev/null").c_str());
58 const int MAX_BUFFER = 1024;
59 char buffer[MAX_BUFFER];
60 FILE *stream = popen(("timeout " + std::to_string(timeout) + "s " + command + " 2> /dev/null").c_str(), "r");
61 while (fgets(buffer, MAX_BUFFER, stream) != NULL) {
62 output.append(buffer);
63 }
64 pclose(stream);
65 }
66
67 bool operator==(ProgramResult& other) {
68 return code == other.code && output == other.output;
69 }
70 bool operator!=(ProgramResult& other) {
71 return !(*this == other);
72 }
73
74 bool failed() {
75 return code != 0;
76 }
77
78 void dump(std::ostream& o) {
79 o << "[ProgramResult] code: " << code << " stdout: \n" << output << "[/ProgramResult]\n";
80 }
81 };
82
83 namespace std {
84
85 inline std::ostream& operator<<(std::ostream& o, ProgramResult& result) {
86 result.dump(o);
87 return o;
88 }
89
90 }
91
92 ProgramResult expected;
93
94 struct Reducer : public WalkerPass<PostWalker<Reducer, UnifiedExpressionVisitor<Reducer>>> {
95 std::string command, test, working;
96 bool verbose, debugInfo;
97
98 // test is the file we write to that the command will operate on
99 // working is the current temporary state, the reduction so far
100 Reducer(std::string command, std::string test, std::string working, bool verbose, bool debugInfo) : command(command), test(test), working(working), verbose(verbose), debugInfo(debugInfo) {}
101
102 // runs passes in order to reduce, until we can't reduce any more
103 // the criterion here is wasm binary size
104 void reduceUsingPasses() {
105 // run optimization passes until we can't shrink it any more
106 std::vector<std::string> passes = {
107 "-Oz",
108 "-Os",
109 "-O1",
110 "-O2",
111 "-O3",
112 "--coalesce-locals --vacuum",
113 "--dce",
114 "--duplicate-function-elimination",
115 "--inlining",
116 "--inlining-optimizing",
117 "--optimize-level=3 --inlining-optimizing",
118 "--local-cse --vacuum",
119 "--memory-packing",
120 "--remove-unused-names --merge-blocks --vacuum",
121 "--optimize-instructions",
122 "--precompute",
123 "--remove-imports",
124 "--remove-memory",
125 "--remove-unused-names --remove-unused-brs",
126 "--remove-unused-module-elements",
127 "--reorder-functions",
128 "--reorder-locals",
129 "--simplify-locals --vacuum",
130 "--vacuum"
131 };
132 auto oldSize = file_size(working);
133 bool more = true;
134 while (more) {
135 //std::cerr << "| starting passes loop iteration\n";
136 more = false;
137 // try both combining with a generic shrink (so minor pass overhead is compensated for), and without
138 for (auto shrinking : { false, true }) {
139 for (auto pass : passes) {
140 std::string currCommand = "bin/wasm-opt ";
141 if (shrinking) currCommand += " --dce --vacuum ";
142 currCommand += working + " -o " + test + " " + pass;
143 if (debugInfo) currCommand += " -g ";
144 if (verbose) std::cerr << "| trying pass command: " << currCommand << "\n";
145 if (!ProgramResult(currCommand).failed()) {
146 auto newSize = file_size(test);
147 if (newSize < oldSize) {
148 // the pass didn't fail, and the size looks smaller, so promising
149 // see if it is still has the property we are preserving
150 if (ProgramResult(command) == expected) {
151 std::cerr << "| command \"" << currCommand << "\" succeeded, reduced size to " << newSize << ", and preserved the property\n";
152 copy_file(test, working);
153 more = true;
154 oldSize = newSize;
155 }
156 }
157 }
158 }
159 }
160 }
161 if (verbose) std::cerr << "| done with passes for now\n";
162 }
163
164 // does one pass of slow and destructive reduction. returns whether it
165 // succeeded or not
166 // the criterion here is a logical change in the program. this may actually
167 // increase wasm size in some cases, but it should allow more reduction later.
168 // @param factor how much to ignore. starting with a high factor skips through
169 // most of the file, which is often faster than going one by one
170 // from the start
171 size_t reduceDestructively(int factor_) {
172 factor = factor_;
173 Module wasm;
174 ModuleReader reader;
175 reader.read(working, wasm);
176 // prepare
177 reduced = 0;
178 builder = make_unique<Builder>(wasm);
179 funcsSeen = 0;
180 // before we do any changes, it should be valid to write out the module:
181 // size should be as expected, and output should be as expected
182 setModule(&wasm);
183 if (!writeAndTestReduction()) {
184 std::cerr << "\n|! WARNING: writing before destructive reduction fails, very unlikely reduction can work\n\n";
185 }
186 // destroy!
187 walkModule(&wasm);
188 return reduced;
189 }
190
191 // destructive reduction state
192
193 size_t reduced;
194 Expression* beforeReduction;
195 std::unique_ptr<Builder> builder;
196 Index funcsSeen;
197 int factor;
198
199 // write the module and see if the command still fails on it as expected
200 bool writeAndTestReduction() {
201 ProgramResult result;
202 return writeAndTestReduction(result);
203 }
204
205 bool writeAndTestReduction(ProgramResult& out) {
206 // write the module out
207 ModuleWriter writer;
208 writer.setBinary(true);
209 writer.setDebugInfo(debugInfo);
210 writer.write(*getModule(), test);
211 // note that it is ok for the destructively-reduced module to be bigger
212 // than the previous - each destructive reduction removes logical code,
213 // and so is strictly better, even if the wasm binary format happens to
214 // encode things slightly less efficiently.
215 // test it
216 out.getFromExecution(command);
217 return out == expected;
218 }
219
220 bool shouldTryToReduce(size_t bonus = 1) {
221 static size_t counter = 0;
222 counter += bonus;
223 return (counter % factor) <= bonus;
224 }
225
226 // tests a reduction on the current traversal node, and undos if it failed
227 bool tryToReplaceCurrent(Expression* with) {
228 auto* curr = getCurrent();
229 //std::cerr << "try " << curr << " => " << with << '\n';
230 if (curr->type != with->type) return false;
231 if (!shouldTryToReduce()) return false;
232 replaceCurrent(with);
233 if (!writeAndTestReduction()) {
234 replaceCurrent(curr);
235 return false;
236 }
237 std::cerr << "| tryToReplaceCurrent succeeded (in " << getLocation() << ")\n";
238 noteReduction();
239 return true;
240 }
241
242 void noteReduction() {
243 reduced++;
244 copy_file(test, working);
245 }
246
247 // tests a reduction on an arbitrary child
248 bool tryToReplaceChild(Expression*& child, Expression* with) {
249 if (child->type != with->type) return false;
250 if (!shouldTryToReduce()) return false;
251 auto* before = child;
252 child = with;
253 if (!writeAndTestReduction()) {
254 child = before;
255 return false;
256 }
257 std::cerr << "| tryToReplaceChild succeeded (in " << getLocation() << ")\n";
258 //std::cerr << "| " << before << " => " << with << '\n';
259 noteReduction();
260 return true;
261 }
262
263 std::string getLocation() {
264 if (getFunction()) return getFunction()->name.str;
265 return "(non-function context)";
266 }
267
268 // visitors. in each we try to remove code in a destructive and nontrivial way.
269 // "nontrivial" means something that optimization passes can't achieve, since we
270 // don't need to duplicate work that they do
271
272 void visitExpression(Expression* curr) {
273 if (curr->type == none) {
274 if (tryToReduceCurrentToNone()) return;
275 } else if (isConcreteWasmType(curr->type)) {
276 if (tryToReduceCurrentToConst()) return;
277 } else {
278 assert(curr->type == unreachable);
279 if (tryToReduceCurrentToUnreachable()) return;
280 }
281 // specific reductions
282 if (auto* iff = curr->dynCast<If>()) {
283 if (iff->type == none) {
284 // perhaps we need just the condition?
285 if (tryToReplaceCurrent(builder->makeDrop(iff->condition))) {
286 return;
287 }
288 }
289 handleCondition(iff->condition);
290 } else if (auto* br = curr->dynCast<Break>()) {
291 handleCondition(br->condition);
292 } else if (auto* select = curr->dynCast<Select>()) {
293 handleCondition(select->condition);
294 } else if (auto* sw = curr->dynCast<Switch>()) {
295 handleCondition(sw->condition);
296 } else if (auto* set = curr->dynCast<SetLocal>()) {
297 if (set->isTee()) {
298 // maybe we don't need the set
299 tryToReplaceCurrent(set->value);
300 }
301 } else if (auto* unary = curr->dynCast<Unary>()) {
302 // maybe we can pass through
303 tryToReplaceCurrent(unary->value);
304 } else if (auto* binary = curr->dynCast<Binary>()) {
305 // maybe we can pass through
306 if (!tryToReplaceCurrent(binary->left)) {
307 tryToReplaceCurrent(binary->right);
308 }
309 } else if (auto* call = curr->dynCast<Call>()) {
310 handleCall(call);
311 } else if (auto* call = curr->dynCast<CallImport>()) {
312 handleCall(call);
313 } else if (auto* call = curr->dynCast<CallIndirect>()) {
314 if (tryToReplaceCurrent(call->target)) return;
315 handleCall(call);
316 }
317 }
318
319 void visitFunction(Function* curr) {
320 // extra chance to work on the function toplevel element, as if it can
321 // be reduced it's great
322 visitExpression(curr->body);
323 // finish function
324 funcsSeen++;
325 static int last = 0;
326 int percentage = (100 * funcsSeen) / getModule()->functions.size();
327 if (std::abs(percentage - last) >= 5) {
328 std::cerr << "| " << percentage << "% of funcs complete\n";
329 last = percentage;
330 }
331 }
332
333 // TODO: bisection on segment shrinking?
334
335 void visitTable(Table* curr) {
336 std::cerr << "| try to simplify table\n";
337 Name first;
338 for (auto& segment : curr->segments) {
339 for (auto item : segment.data) {
340 first = item;
341 break;
342 }
343 if (!first.isNull()) break;
344 }
345 visitSegmented(curr, first, 100);
346 }
347
348 void visitMemory(Memory* curr) {
349 std::cerr << "| try to simplify memory\n";
350 visitSegmented(curr, 0, 2);
351 }
352
353 template<typename T, typename U>
354 void visitSegmented(T* curr, U zero, size_t bonus) {
355 // try to reduce to first function
356 // shrink segment elements
357 for (auto& segment : curr->segments) {
358 auto& data = segment.data;
359 size_t skip = 1; // when we succeed, try to shrink by more and more, similar to bisection
360 for (size_t i = 0; i < data.size() && !data.empty(); i++) {
361 if (!shouldTryToReduce(bonus)) continue;
362 auto save = data;
363 for (size_t j = 0; j < skip; j++) {
364 if (!data.empty()) data.pop_back();
365 }
366 if (writeAndTestReduction()) {
367 std::cerr << "| shrank segment (skip: " << skip << ")\n";
368 noteReduction();
369 skip = std::min(size_t(factor), 2 * skip);
370 } else {
371 data = save;
372 break;
373 }
374 }
375 }
376 // the "opposite" of shrinking: copy a 'zero' element
377 for (auto& segment : curr->segments) {
378 if (segment.data.empty()) continue;
379 for (auto& item : segment.data) {
380 if (!shouldTryToReduce(bonus)) continue;
381 if (item == zero) continue;
382 auto save = item;
383 item = zero;
384 if (writeAndTestReduction()) {
385 std::cerr << "| zeroed segment\n";
386 noteReduction();
387 } else {
388 item = save;
389 }
390 }
391 }
392 }
393
394 void visitModule(Module* curr) {
395 // try to remove exports
396 std::cerr << "| try to remove exports\n";
397 std::vector<Export> exports;
398 for (auto& exp : curr->exports) {
399 if (!shouldTryToReduce(10000)) continue;
400 exports.push_back(*exp);
401 }
402 for (auto& exp : exports) {
403 curr->removeExport(exp.name);
404 if (!writeAndTestReduction()) {
405 curr->addExport(new Export(exp));
406 } else {
407 std::cerr << "| removed export " << exp.name << '\n';
408 noteReduction();
409 }
410 }
411 // try to remove functions
412 std::cerr << "| try to remove functions\n";
413 std::vector<Function> functions;
414 for (auto& func : curr->functions) {
415 if (!shouldTryToReduce(10000)) continue;
416 functions.push_back(*func);
417 }
418 for (auto& func : functions) {
419 curr->removeFunction(func.name);
420 if (WasmValidator().validate(*curr, Feature::MVP, WasmValidator::Globally | WasmValidator::Quiet) &&
421 writeAndTestReduction()) {
422 std::cerr << "| removed function " << func.name << '\n';
423 noteReduction();
424 } else {
425 curr->addFunction(new Function(func));
426 }
427 }
428 }
429
430 // helpers
431
432 // try to replace condition with always true and always false
433 void handleCondition(Expression*& condition) {
434 if (!condition) return;
435 if (condition->is<Const>()) return;
436 auto* c = builder->makeConst(Literal(int32_t(0)));
437 if (!tryToReplaceChild(condition, c)) {
438 c->value = Literal(int32_t(1));
439 tryToReplaceChild(condition, c);
440 }
441 }
442
443 template<typename T>
444 void handleCall(T* call) {
445 for (auto* op : call->operands) {
446 if (tryToReplaceCurrent(op)) return;
447 }
448 }
449
450 bool tryToReduceCurrentToNone() {
451 auto* curr = getCurrent();
452 if (curr->is<Nop>()) return false;
453 // try to replace with a trivial value
454 Nop nop;
455 if (tryToReplaceCurrent(&nop)) {
456 replaceCurrent(builder->makeNop());
457 return true;
458 }
459 return false;
460 }
461
462 // try to replace a concrete value with a trivial constant
463 bool tryToReduceCurrentToConst() {
464 auto* curr = getCurrent();
465 if (curr->is<Const>()) return false;
466 // try to replace with a trivial value
467 Const* c = builder->makeConst(Literal(int32_t(0)));
468 if (tryToReplaceCurrent(c)) return true;
469 c->value = LiteralUtils::makeLiteralFromInt32(1, curr->type);
470 c->type = curr->type;
471 return tryToReplaceCurrent(c);
472 }
473
474 bool tryToReduceCurrentToUnreachable() {
475 auto* curr = getCurrent();
476 if (curr->is<Unreachable>()) return false;
477 // try to replace with a trivial value
478 Unreachable un;
479 if (tryToReplaceCurrent(&un)) {
480 replaceCurrent(builder->makeUnreachable());
481 return true;
482 }
483 // maybe a return? TODO
484 return false;
485 }
486 };
487
488 //
489 // main
490 //
491
492 int main(int argc, const char* argv[]) {
493 std::string input, test, working, command;
494 bool verbose = false,
495 debugInfo = false,
496 force = false;
497 Options options("wasm-reduce", "Reduce a wasm file to a smaller one that has the same behavior on a given command");
498 options
499 .add("--command", "-cmd", "The command to run on the test, that we want to reduce while keeping the command's output identical. "
500 "We look at the command's return code and stdout here (TODO: stderr), "
501 "and we reduce while keeping those unchanged.",
502 Options::Arguments::One,
503 [&](Options* o, const std::string& argument) {
504 command = argument;
505 })
506 .add("--test", "-t", "Test file (this will be written to to test, the given command should read it when we call it)",
507 Options::Arguments::One,
508 [&](Options* o, const std::string& argument) {
509 test = argument;
510 })
511 .add("--working", "-w", "Working file (this will contain the current good state while doing temporary computations, "
512 "and will contain the final best result at the end)",
513 Options::Arguments::One,
514 [&](Options* o, const std::string& argument) {
515 working = argument;
516 })
517 .add("--verbose", "-v", "Verbose output mode",
518 Options::Arguments::Zero,
519 [&](Options* o, const std::string& argument) {
520 verbose = true;
521 })
522 .add("--debugInfo", "-g", "Keep debug info in binaries",
523 Options::Arguments::Zero,
524 [&](Options* o, const std::string& argument) {
525 debugInfo = true;
526 })
527 .add("--force", "-f", "Force the reduction attempt, ignoring problems that imply it is unlikely to succeed",
528 Options::Arguments::Zero,
529 [&](Options* o, const std::string& argument) {
530 force = true;
531 })
532 .add("--timeout", "-to", "A timeout to apply to each execution of the command, in seconds (default: 2)",
533 Options::Arguments::One,
534 [&](Options* o, const std::string& argument) {
535 timeout = atoi(argument.c_str());
536 std::cout << "|applying timeout: " << timeout << "\n";
537 })
538 .add_positional("INFILE", Options::Arguments::One,
539 [&](Options* o, const std::string& argument) {
540 input = argument;
541 });
542 options.parse(argc, argv);
543
544 if (test.size() == 0) Fatal() << "test file not provided\n";
545 if (working.size() == 0) Fatal() << "working file not provided\n";
546
547 std::cerr << "|input: " << input << '\n';
548 std::cerr << "|test: " << test << '\n';
549 std::cerr << "|working: " << working << '\n';
550
551 // get the expected output
552 copy_file(input, test);
553 expected.getFromExecution(command);
554
555 std::cerr << "|expected result:\n" << expected << '\n';
556
557 auto stopIfNotForced = [&](std::string message, ProgramResult& result) {
558 std::cerr << "|! " << message << '\n' << result << '\n';
559 if (!force) {
560 Fatal() << "|! stopping, as it is very unlikely reduction can succeed (use -f to ignore this check)";
561 }
562 };
563
564 std::cerr << "|checking that command has different behavior on invalid binary\n";
565 {
566 {
567 std::ofstream dst(test, std::ios::binary);
568 dst << "waka waka\n";
569 }
570 ProgramResult result(command);
571 if (result == expected) {
572 stopIfNotForced("running command on an invalid module should give different results", result);
573 }
574 }
575
576 std::cerr << "|checking that command has expected behavior on canonicalized (read-written) binary\n";
577 {
578 // read and write it
579 ProgramResult readWrite(std::string("bin/wasm-opt ") + input + " -o " + test);
580 if (readWrite.failed()) {
581 stopIfNotForced("failed to read and write the binary", readWrite);
582 } else {
583 ProgramResult result(command);
584 if (result != expected) {
585 stopIfNotForced("running command on the canonicalized module should give the same results", result);
586 }
587 }
588 }
589
590 copy_file(input, working);
591 std::cerr << "|input size: " << file_size(working) << "\n";
592
593 std::cerr << "|starting reduction!\n";
594
595 int factor = 4096;
596 size_t lastDestructiveReductions = 0;
597 size_t lastPostPassesSize = 0;
598
599 bool stopping = false;
600
601 while (1) {
602 Reducer reducer(command, test, working, verbose, debugInfo);
603
604 // run binaryen optimization passes to reduce. passes are fast to run
605 // and can often reduce large amounts of code efficiently, as opposed
606 // to detructive reduction (i.e., that doesn't preserve correctness as
607 // passes do) since destrucive must operate one change at a time
608 std::cerr << "| reduce using passes...\n";
609 auto oldSize = file_size(working);
610 reducer.reduceUsingPasses();
611 auto newSize = file_size(working);
612 auto passProgress = oldSize - newSize;
613 std::cerr << "| after pass reduction: " << newSize << "\n";
614
615 // always stop after a pass reduction attempt, for final cleanup
616 if (stopping) break;
617
618 // check if the full cycle (destructive/passes) has helped or not
619 if (lastPostPassesSize && newSize >= lastPostPassesSize) {
620 std::cerr << "| progress has stopped, skipping to the end\n";
621 if (factor == 1) {
622 // this is after doing work with factor 1, so after the remaining work, stop
623 stopping = true;
624 } else {
625 // just try to remove all we can and finish up
626 factor = 1;
627 }
628 }
629 lastPostPassesSize = newSize;
630
631 // if destructive reductions lead to useful proportionate pass reductions, keep
632 // going at the same factor, as pass reductions are far faster
633 std::cerr << "| pass progress: " << passProgress << ", last destructive: " << lastDestructiveReductions << '\n';
634 if (passProgress >= 4 * lastDestructiveReductions) {
635 // don't change
636 std::cerr << "| progress is good, do not quickly decrease factor\n";
637 } else {
638 if (factor > 10) {
639 factor = (factor / 3) + 1;
640 } else {
641 factor = (factor + 1) / 2; // stable on 1
642 }
643 }
644
645 // no point in a factor lorger than the size
646 assert(newSize > 4); // wasm modules are >4 bytes anyhow
647 factor = std::min(factor, int(newSize) / 4);
648
649 // try to reduce destructively. if a high factor fails to find anything,
650 // quickly try a lower one (no point in doing passes until we reduce
651 // destructively at least a little)
652 while (1) {
653 std::cerr << "| reduce destructively... (factor: " << factor << ")\n";
654 lastDestructiveReductions = reducer.reduceDestructively(factor);
655 if (lastDestructiveReductions > 0) break;
656 // we failed to reduce destructively
657 if (factor == 1) {
658 stopping = true;
659 break;
660 }
661 factor = std::max(1, factor / 4); // quickly now, try to find *something* we can reduce
662 }
663
664 std::cerr << "| destructive reduction led to size: " << file_size(working) << '\n';
665 }
666 std::cerr << "|finished, final size: " << file_size(working) << "\n";
667 copy_file(working, test); // just to avoid confusion
668 }
669