src/binaryen/src/passes/Inlining.cpp

   1 /*
   2  * Copyright 2016 WebAssembly Community Group participants
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 //
  18 // Inlining.
  19 //
  20 // By default, this does a conservative inlining of all functions that have
  21 // exactly one use, and are fairly small. That should not increase code
  22 // size, and may have speed benefits.
  23 //
  24 // When opt level is 3+ (-O3 or above), we more aggressively inline
  25 // even functions with more than one use, that seem to be "lightweight"
  26 // (no loops or calls etc.), so inlining them may get rid of call overhead
  27 // that would be noticeable otherwise
  28 //
  29
  30 #include <atomic>
  31
  32 #include <wasm.h>
  33 #include <pass.h>
  34 #include <wasm-builder.h>
  35 #include <ir/utils.h>
  36 #include <ir/literal-utils.h>
  37 #include <parsing.h>
  38
  39 namespace wasm {
  40
  41 // A limit on how big a function to inline when being careful about size
  42 static const int CAREFUL_SIZE_LIMIT = 15;
  43
  44 // A limit on how big a function to inline when being more flexible. In
  45 // particular it's nice that with this limit we can inline the clamp
  46 // functions (i32s-div, f64-to-int, etc.), that can affect perf.
  47 static const int FLEXIBLE_SIZE_LIMIT = 20;
  48
  49 // Useful into on a function, helping us decide if we can inline it
  50 struct FunctionInfo {
  51   std::atomic<Index> calls;
  52   Index size;
  53   bool lightweight = true;
  54   bool usedGlobally = false; // in a table or export
  55
  56   bool worthInlining(PassOptions& options, bool allowMultipleInliningsPerFunction) {
  57     // if it's big, it's just not worth doing (TODO: investigate more)
  58     if (size > FLEXIBLE_SIZE_LIMIT) return false;
  59     // if it has one use, then inlining it would likely reduce code size
  60     // since we are just moving code around, + optimizing, so worth it
  61     // if small enough that we are pretty sure its ok
  62     if (calls == 1 && !usedGlobally && size <= CAREFUL_SIZE_LIMIT) return true;
  63     if (!allowMultipleInliningsPerFunction) return false;
  64     // more than one use, so we can't eliminate it after inlining,
  65     // so only worth it if we really care about speed and don't care
  66     // about size, and if it's lightweight so a good candidate for
  67     // speeding us up
  68     return options.optimizeLevel >= 3 && options.shrinkLevel == 0 && lightweight;
  69   }
  70 };
  71
  72 typedef std::unordered_map<Name, FunctionInfo> NameInfoMap;
  73
  74 struct FunctionInfoScanner : public WalkerPass<PostWalker<FunctionInfoScanner>> {
  75   bool isFunctionParallel() override { return true; }
  76
  77   FunctionInfoScanner(NameInfoMap* infos) : infos(infos) {}
  78
  79   FunctionInfoScanner* create() override {
  80     return new FunctionInfoScanner(infos);
  81   }
  82
  83   void visitLoop(Loop* curr) {
  84     // having a loop is not lightweight
  85     (*infos)[getFunction()->name].lightweight = false;
  86   }
  87
  88   void visitCall(Call* curr) {
  89     assert(infos->count(curr->target) > 0); // can't add a new element in parallel
  90     (*infos)[curr->target].calls++;
  91     // having a call is not lightweight
  92     (*infos)[getFunction()->name].lightweight = false;
  93   }
  94
  95   void visitFunction(Function* curr) {
  96     (*infos)[curr->name].size = Measurer::measure(curr->body);
  97   }
  98
  99 private:
 100   NameInfoMap* infos;
 101 };
 102
 103 struct InliningAction {
 104   Expression** callSite;
 105   Function* contents;
 106
 107   InliningAction(Expression** callSite, Function* contents) : callSite(callSite), contents(contents) {}
 108 };
 109
 110 struct InliningState {
 111   std::unordered_set<Name> worthInlining;
 112   std::unordered_map<Name, std::vector<InliningAction>> actionsForFunction; // function name => actions that can be performed in it
 113 };
 114
 115 struct Planner : public WalkerPass<PostWalker<Planner>> {
 116   bool isFunctionParallel() override { return true; }
 117
 118   Planner(InliningState* state) : state(state) {}
 119
 120   Planner* create() override {
 121     return new Planner(state);
 122   }
 123
 124   void visitCall(Call* curr) {
 125     // plan to inline if we know this is valid to inline, and if the call is
 126     // actually performed - if it is dead code, it's pointless to inline
 127     if (state->worthInlining.count(curr->target) &&
 128         curr->type != unreachable) {
 129       // nest the call in a block. that way the location of the pointer to the call will not
 130       // change even if we inline multiple times into the same function, otherwise
 131       // call1(call2()) might be a problem
 132       auto* block = Builder(*getModule()).makeBlock(curr);
 133       replaceCurrent(block);
 134       assert(state->actionsForFunction.count(getFunction()->name) > 0); // can't add a new element in parallel
 135       state->actionsForFunction[getFunction()->name].emplace_back(&block->list[0], getModule()->getFunction(curr->target));
 136     }
 137   }
 138
 139   void doWalkFunction(Function* func) {
 140     // we shouldn't inline into us if we are to be inlined
 141     // ourselves - that has the risk of cycles
 142     if (state->worthInlining.count(func->name) == 0) {
 143       walk(func->body);
 144     }
 145   }
 146
 147 private:
 148   InliningState* state;
 149 };
 150
 151 // Core inlining logic. Modifies the outside function (adding locals as
 152 // needed), and returns the inlined code.
 153 static Expression* doInlining(Module* module, Function* into, InliningAction& action) {
 154   Function* from = action.contents;
 155   auto* call = (*action.callSite)->cast<Call>();
 156   Builder builder(*module);
 157   auto* block = Builder(*module).makeBlock();
 158   block->name = Name(std::string("__inlined_func$") + from->name.str);
 159   *action.callSite = block;
 160   // set up a locals mapping
 161   struct Updater : public PostWalker<Updater> {
 162     std::map<Index, Index> localMapping;
 163     Name returnName;
 164     Builder* builder;
 165
 166     void visitReturn(Return* curr) {
 167       replaceCurrent(builder->makeBreak(returnName, curr->value));
 168     }
 169     void visitGetLocal(GetLocal* curr) {
 170       curr->index = localMapping[curr->index];
 171     }
 172     void visitSetLocal(SetLocal* curr) {
 173       curr->index = localMapping[curr->index];
 174     }
 175   } updater;
 176   updater.returnName = block->name;
 177   updater.builder = &builder;
 178   for (Index i = 0; i < from->getNumLocals(); i++) {
 179     updater.localMapping[i] = builder.addVar(into, from->getLocalType(i));
 180   }
 181   // assign the operands into the params
 182   for (Index i = 0; i < from->params.size(); i++) {
 183     block->list.push_back(builder.makeSetLocal(updater.localMapping[i], call->operands[i]));
 184   }
 185   // zero out the vars (as we may be in a loop, and may depend on their zero-init value
 186   for (Index i = 0; i < from->vars.size(); i++) {
 187     block->list.push_back(builder.makeSetLocal(updater.localMapping[from->getVarIndexBase() + i], LiteralUtils::makeZero(from->vars[i], *module)));
 188   }
 189   // generate and update the inlined contents
 190   auto* contents = ExpressionManipulator::copy(from->body, *module);
 191   updater.walk(contents);
 192   block->list.push_back(contents);
 193   block->type = call->type;
 194   // if the function returned a value, we just set the block containing the
 195   // inlined code to have that type. or, if the function was void and
 196   // contained void, that is fine too. a bad case is a void function in which
 197   // we have unreachable code, so we would be replacing a void call with an
 198   // unreachable; we need to handle
 199   if (contents->type == unreachable && block->type == none) {
 200     // make the block reachable by adding a break to it
 201     block->list.push_back(builder.makeBreak(block->name));
 202   }
 203   return block;
 204 }
 205
 206 struct Inlining : public Pass {
 207   // whether to optimize where we inline
 208   bool optimize = false;
 209
 210   NameInfoMap infos;
 211
 212   bool firstIteration;
 213
 214   void run(PassRunner* runner, Module* module) override {
 215     // keep going while we inline, to handle nesting. TODO: optimize
 216     firstIteration = true;
 217     while (1) {
 218       calculateInfos(module);
 219       if (!iteration(runner, module)) {
 220         return;
 221       }
 222       firstIteration = false;
 223     }
 224   }
 225
 226   void calculateInfos(Module* module) {
 227     infos.clear();
 228     // fill in info, as we operate on it in parallel (each function to its own entry)
 229     for (auto& func : module->functions) {
 230       infos[func->name];
 231     }
 232     PassRunner runner(module);
 233     runner.setIsNested(true);
 234     runner.add<FunctionInfoScanner>(&infos);
 235     runner.run();
 236     // fill in global uses
 237     // anything exported or used in a table should not be inlined
 238     for (auto& ex : module->exports) {
 239       if (ex->kind == ExternalKind::Function) {
 240         infos[ex->value].usedGlobally = true;
 241       }
 242     }
 243     for (auto& segment : module->table.segments) {
 244       for (auto name : segment.data) {
 245         if (module->getFunctionOrNull(name)) {
 246           infos[name].usedGlobally = true;
 247         }
 248       }
 249     }
 250   }
 251
 252   bool iteration(PassRunner* runner, Module* module) {
 253     // decide which to inline
 254     InliningState state;
 255     for (auto& func : module->functions) {
 256       // on the first iteration, allow multiple inlinings per function
 257       if (infos[func->name].worthInlining(runner->options, firstIteration /* allowMultipleInliningsPerFunction */)) {
 258         state.worthInlining.insert(func->name);
 259       }
 260     }
 261     if (state.worthInlining.size() == 0) return false;
 262     // fill in actionsForFunction, as we operate on it in parallel (each function to its own entry)
 263     for (auto& func : module->functions) {
 264       state.actionsForFunction[func->name];
 265     }
 266     // find and plan inlinings
 267     {
 268       PassRunner runner(module);
 269       runner.setIsNested(true);
 270       runner.add<Planner>(&state);
 271       runner.run();
 272     }
 273     // perform inlinings TODO: parallelize
 274     std::unordered_map<Name, Index> inlinedUses; // how many uses we inlined
 275     std::unordered_set<Function*> inlinedInto; // which functions were inlined into
 276     for (auto& func : module->functions) {
 277       for (auto& action : state.actionsForFunction[func->name]) {
 278         Name inlinedName = action.contents->name;
 279         doInlining(module, func.get(), action);
 280         inlinedUses[inlinedName]++;
 281         inlinedInto.insert(func.get());
 282         assert(inlinedUses[inlinedName] <= infos[inlinedName].calls);
 283       }
 284     }
 285     // anything we inlined into may now have non-unique label names, fix it up
 286     for (auto func : inlinedInto) {
 287       wasm::UniqueNameMapper::uniquify(func->body);
 288     }
 289     if (optimize && inlinedInto.size() > 0) {
 290       doOptimize(inlinedInto, module, runner);
 291     }
 292     // remove functions that we no longer need after inlining
 293     auto& funcs = module->functions;
 294     funcs.erase(std::remove_if(funcs.begin(), funcs.end(), [&](const std::unique_ptr<Function>& curr) {
 295       auto name = curr->name;
 296       auto& info = infos[name];
 297       return inlinedUses.count(name) && inlinedUses[name] == info.calls && !info.usedGlobally;
 298     }), funcs.end());
 299     // return whether we did any work
 300     return inlinedUses.size() > 0;
 301   }
 302
 303   // Run useful optimizations after inlining, things like removing
 304   // unnecessary new blocks, sharing variables, etc.
 305   void doOptimize(std::unordered_set<Function*>& funcs, Module* module, PassRunner* parentRunner) {
 306     // save the full list of functions on the side
 307     std::vector<std::unique_ptr<Function>> all;
 308     all.swap(module->functions);
 309     module->updateMaps();
 310     for (auto& func : funcs) {
 311       module->addFunction(func);
 312     }
 313     PassRunner runner(module, parentRunner->options);
 314     runner.setIsNested(true);
 315     runner.setValidateGlobally(false); // not a full valid module
 316     runner.add("precompute-propagate");
 317     runner.add("remove-unused-brs");
 318     runner.add("remove-unused-names");
 319     runner.add("coalesce-locals");
 320     runner.add("simplify-locals");
 321     runner.add("vacuum");
 322     runner.add("reorder-locals");
 323     runner.add("remove-unused-brs");
 324     runner.add("merge-blocks");
 325     runner.run();
 326     // restore all the funcs
 327     for (auto& func : module->functions) {
 328       func.release();
 329     }
 330     all.swap(module->functions);
 331     module->updateMaps();
 332   }
 333 };
 334
 335 Pass *createInliningPass() {
 336   return new Inlining();
 337 }
 338
 339 Pass *createInliningOptimizingPass() {
 340   auto* ret = new Inlining();
 341   ret->optimize = true;
 342   return ret;
 343 }
 344
 345 } // namespace wasm
 346