]> git.proxmox.com Git - rustc.git/blob - src/binaryen/src/passes/Inlining.cpp
New upstream version 1.23.0+dfsg1
[rustc.git] / src / binaryen / src / passes / Inlining.cpp
1 /*
2 * Copyright 2016 WebAssembly Community Group participants
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 //
18 // Inlining.
19 //
20 // By default, this does a conservative inlining of all functions that have
21 // exactly one use, and are fairly small. That should not increase code
22 // size, and may have speed benefits.
23 //
24 // When opt level is 3+ (-O3 or above), we more aggressively inline
25 // even functions with more than one use, that seem to be "lightweight"
26 // (no loops or calls etc.), so inlining them may get rid of call overhead
27 // that would be noticeable otherwise
28 //
29
30 #include <atomic>
31
32 #include <wasm.h>
33 #include <pass.h>
34 #include <wasm-builder.h>
35 #include <ir/utils.h>
36 #include <ir/literal-utils.h>
37 #include <parsing.h>
38
39 namespace wasm {
40
41 // A limit on how big a function to inline when being careful about size
42 static const int CAREFUL_SIZE_LIMIT = 15;
43
44 // A limit on how big a function to inline when being more flexible. In
45 // particular it's nice that with this limit we can inline the clamp
46 // functions (i32s-div, f64-to-int, etc.), that can affect perf.
47 static const int FLEXIBLE_SIZE_LIMIT = 20;
48
49 // Useful into on a function, helping us decide if we can inline it
50 struct FunctionInfo {
51 std::atomic<Index> calls;
52 Index size;
53 bool lightweight = true;
54 bool usedGlobally = false; // in a table or export
55
56 bool worthInlining(PassOptions& options, bool allowMultipleInliningsPerFunction) {
57 // if it's big, it's just not worth doing (TODO: investigate more)
58 if (size > FLEXIBLE_SIZE_LIMIT) return false;
59 // if it has one use, then inlining it would likely reduce code size
60 // since we are just moving code around, + optimizing, so worth it
61 // if small enough that we are pretty sure its ok
62 if (calls == 1 && !usedGlobally && size <= CAREFUL_SIZE_LIMIT) return true;
63 if (!allowMultipleInliningsPerFunction) return false;
64 // more than one use, so we can't eliminate it after inlining,
65 // so only worth it if we really care about speed and don't care
66 // about size, and if it's lightweight so a good candidate for
67 // speeding us up
68 return options.optimizeLevel >= 3 && options.shrinkLevel == 0 && lightweight;
69 }
70 };
71
72 typedef std::unordered_map<Name, FunctionInfo> NameInfoMap;
73
74 struct FunctionInfoScanner : public WalkerPass<PostWalker<FunctionInfoScanner>> {
75 bool isFunctionParallel() override { return true; }
76
77 FunctionInfoScanner(NameInfoMap* infos) : infos(infos) {}
78
79 FunctionInfoScanner* create() override {
80 return new FunctionInfoScanner(infos);
81 }
82
83 void visitLoop(Loop* curr) {
84 // having a loop is not lightweight
85 (*infos)[getFunction()->name].lightweight = false;
86 }
87
88 void visitCall(Call* curr) {
89 assert(infos->count(curr->target) > 0); // can't add a new element in parallel
90 (*infos)[curr->target].calls++;
91 // having a call is not lightweight
92 (*infos)[getFunction()->name].lightweight = false;
93 }
94
95 void visitFunction(Function* curr) {
96 (*infos)[curr->name].size = Measurer::measure(curr->body);
97 }
98
99 private:
100 NameInfoMap* infos;
101 };
102
103 struct InliningAction {
104 Expression** callSite;
105 Function* contents;
106
107 InliningAction(Expression** callSite, Function* contents) : callSite(callSite), contents(contents) {}
108 };
109
110 struct InliningState {
111 std::unordered_set<Name> worthInlining;
112 std::unordered_map<Name, std::vector<InliningAction>> actionsForFunction; // function name => actions that can be performed in it
113 };
114
115 struct Planner : public WalkerPass<PostWalker<Planner>> {
116 bool isFunctionParallel() override { return true; }
117
118 Planner(InliningState* state) : state(state) {}
119
120 Planner* create() override {
121 return new Planner(state);
122 }
123
124 void visitCall(Call* curr) {
125 // plan to inline if we know this is valid to inline, and if the call is
126 // actually performed - if it is dead code, it's pointless to inline
127 if (state->worthInlining.count(curr->target) &&
128 curr->type != unreachable) {
129 // nest the call in a block. that way the location of the pointer to the call will not
130 // change even if we inline multiple times into the same function, otherwise
131 // call1(call2()) might be a problem
132 auto* block = Builder(*getModule()).makeBlock(curr);
133 replaceCurrent(block);
134 assert(state->actionsForFunction.count(getFunction()->name) > 0); // can't add a new element in parallel
135 state->actionsForFunction[getFunction()->name].emplace_back(&block->list[0], getModule()->getFunction(curr->target));
136 }
137 }
138
139 void doWalkFunction(Function* func) {
140 // we shouldn't inline into us if we are to be inlined
141 // ourselves - that has the risk of cycles
142 if (state->worthInlining.count(func->name) == 0) {
143 walk(func->body);
144 }
145 }
146
147 private:
148 InliningState* state;
149 };
150
151 // Core inlining logic. Modifies the outside function (adding locals as
152 // needed), and returns the inlined code.
153 static Expression* doInlining(Module* module, Function* into, InliningAction& action) {
154 Function* from = action.contents;
155 auto* call = (*action.callSite)->cast<Call>();
156 Builder builder(*module);
157 auto* block = Builder(*module).makeBlock();
158 block->name = Name(std::string("__inlined_func$") + from->name.str);
159 *action.callSite = block;
160 // set up a locals mapping
161 struct Updater : public PostWalker<Updater> {
162 std::map<Index, Index> localMapping;
163 Name returnName;
164 Builder* builder;
165
166 void visitReturn(Return* curr) {
167 replaceCurrent(builder->makeBreak(returnName, curr->value));
168 }
169 void visitGetLocal(GetLocal* curr) {
170 curr->index = localMapping[curr->index];
171 }
172 void visitSetLocal(SetLocal* curr) {
173 curr->index = localMapping[curr->index];
174 }
175 } updater;
176 updater.returnName = block->name;
177 updater.builder = &builder;
178 for (Index i = 0; i < from->getNumLocals(); i++) {
179 updater.localMapping[i] = builder.addVar(into, from->getLocalType(i));
180 }
181 // assign the operands into the params
182 for (Index i = 0; i < from->params.size(); i++) {
183 block->list.push_back(builder.makeSetLocal(updater.localMapping[i], call->operands[i]));
184 }
185 // zero out the vars (as we may be in a loop, and may depend on their zero-init value
186 for (Index i = 0; i < from->vars.size(); i++) {
187 block->list.push_back(builder.makeSetLocal(updater.localMapping[from->getVarIndexBase() + i], LiteralUtils::makeZero(from->vars[i], *module)));
188 }
189 // generate and update the inlined contents
190 auto* contents = ExpressionManipulator::copy(from->body, *module);
191 updater.walk(contents);
192 block->list.push_back(contents);
193 block->type = call->type;
194 // if the function returned a value, we just set the block containing the
195 // inlined code to have that type. or, if the function was void and
196 // contained void, that is fine too. a bad case is a void function in which
197 // we have unreachable code, so we would be replacing a void call with an
198 // unreachable; we need to handle
199 if (contents->type == unreachable && block->type == none) {
200 // make the block reachable by adding a break to it
201 block->list.push_back(builder.makeBreak(block->name));
202 }
203 return block;
204 }
205
206 struct Inlining : public Pass {
207 // whether to optimize where we inline
208 bool optimize = false;
209
210 NameInfoMap infos;
211
212 bool firstIteration;
213
214 void run(PassRunner* runner, Module* module) override {
215 // keep going while we inline, to handle nesting. TODO: optimize
216 firstIteration = true;
217 while (1) {
218 calculateInfos(module);
219 if (!iteration(runner, module)) {
220 return;
221 }
222 firstIteration = false;
223 }
224 }
225
226 void calculateInfos(Module* module) {
227 infos.clear();
228 // fill in info, as we operate on it in parallel (each function to its own entry)
229 for (auto& func : module->functions) {
230 infos[func->name];
231 }
232 PassRunner runner(module);
233 runner.setIsNested(true);
234 runner.add<FunctionInfoScanner>(&infos);
235 runner.run();
236 // fill in global uses
237 // anything exported or used in a table should not be inlined
238 for (auto& ex : module->exports) {
239 if (ex->kind == ExternalKind::Function) {
240 infos[ex->value].usedGlobally = true;
241 }
242 }
243 for (auto& segment : module->table.segments) {
244 for (auto name : segment.data) {
245 if (module->getFunctionOrNull(name)) {
246 infos[name].usedGlobally = true;
247 }
248 }
249 }
250 }
251
252 bool iteration(PassRunner* runner, Module* module) {
253 // decide which to inline
254 InliningState state;
255 for (auto& func : module->functions) {
256 // on the first iteration, allow multiple inlinings per function
257 if (infos[func->name].worthInlining(runner->options, firstIteration /* allowMultipleInliningsPerFunction */)) {
258 state.worthInlining.insert(func->name);
259 }
260 }
261 if (state.worthInlining.size() == 0) return false;
262 // fill in actionsForFunction, as we operate on it in parallel (each function to its own entry)
263 for (auto& func : module->functions) {
264 state.actionsForFunction[func->name];
265 }
266 // find and plan inlinings
267 {
268 PassRunner runner(module);
269 runner.setIsNested(true);
270 runner.add<Planner>(&state);
271 runner.run();
272 }
273 // perform inlinings TODO: parallelize
274 std::unordered_map<Name, Index> inlinedUses; // how many uses we inlined
275 std::unordered_set<Function*> inlinedInto; // which functions were inlined into
276 for (auto& func : module->functions) {
277 for (auto& action : state.actionsForFunction[func->name]) {
278 Name inlinedName = action.contents->name;
279 doInlining(module, func.get(), action);
280 inlinedUses[inlinedName]++;
281 inlinedInto.insert(func.get());
282 assert(inlinedUses[inlinedName] <= infos[inlinedName].calls);
283 }
284 }
285 // anything we inlined into may now have non-unique label names, fix it up
286 for (auto func : inlinedInto) {
287 wasm::UniqueNameMapper::uniquify(func->body);
288 }
289 if (optimize && inlinedInto.size() > 0) {
290 doOptimize(inlinedInto, module, runner);
291 }
292 // remove functions that we no longer need after inlining
293 auto& funcs = module->functions;
294 funcs.erase(std::remove_if(funcs.begin(), funcs.end(), [&](const std::unique_ptr<Function>& curr) {
295 auto name = curr->name;
296 auto& info = infos[name];
297 return inlinedUses.count(name) && inlinedUses[name] == info.calls && !info.usedGlobally;
298 }), funcs.end());
299 // return whether we did any work
300 return inlinedUses.size() > 0;
301 }
302
303 // Run useful optimizations after inlining, things like removing
304 // unnecessary new blocks, sharing variables, etc.
305 void doOptimize(std::unordered_set<Function*>& funcs, Module* module, PassRunner* parentRunner) {
306 // save the full list of functions on the side
307 std::vector<std::unique_ptr<Function>> all;
308 all.swap(module->functions);
309 module->updateMaps();
310 for (auto& func : funcs) {
311 module->addFunction(func);
312 }
313 PassRunner runner(module, parentRunner->options);
314 runner.setIsNested(true);
315 runner.setValidateGlobally(false); // not a full valid module
316 runner.add("precompute-propagate");
317 runner.add("remove-unused-brs");
318 runner.add("remove-unused-names");
319 runner.add("coalesce-locals");
320 runner.add("simplify-locals");
321 runner.add("vacuum");
322 runner.add("reorder-locals");
323 runner.add("remove-unused-brs");
324 runner.add("merge-blocks");
325 runner.run();
326 // restore all the funcs
327 for (auto& func : module->functions) {
328 func.release();
329 }
330 all.swap(module->functions);
331 module->updateMaps();
332 }
333 };
334
335 Pass *createInliningPass() {
336 return new Inlining();
337 }
338
339 Pass *createInliningOptimizingPass() {
340 auto* ret = new Inlining();
341 ret->optimize = true;
342 return ret;
343 }
344
345 } // namespace wasm
346