2 * Copyright 2016 WebAssembly Community Group participants
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 // By default, this does a conservative inlining of all functions that have
21 // exactly one use, and are fairly small. That should not increase code
22 // size, and may have speed benefits.
24 // When opt level is 3+ (-O3 or above), we more aggressively inline
25 // even functions with more than one use, that seem to be "lightweight"
26 // (no loops or calls etc.), so inlining them may get rid of call overhead
27 // that would be noticeable otherwise
34 #include <wasm-builder.h>
36 #include <ir/literal-utils.h>
41 // A limit on how big a function to inline when being careful about size
42 static const int CAREFUL_SIZE_LIMIT
= 15;
44 // A limit on how big a function to inline when being more flexible. In
45 // particular it's nice that with this limit we can inline the clamp
46 // functions (i32s-div, f64-to-int, etc.), that can affect perf.
47 static const int FLEXIBLE_SIZE_LIMIT
= 20;
49 // Useful into on a function, helping us decide if we can inline it
51 std::atomic
<Index
> calls
;
53 bool lightweight
= true;
54 bool usedGlobally
= false; // in a table or export
56 bool worthInlining(PassOptions
& options
, bool allowMultipleInliningsPerFunction
) {
57 // if it's big, it's just not worth doing (TODO: investigate more)
58 if (size
> FLEXIBLE_SIZE_LIMIT
) return false;
59 // if it has one use, then inlining it would likely reduce code size
60 // since we are just moving code around, + optimizing, so worth it
61 // if small enough that we are pretty sure its ok
62 if (calls
== 1 && !usedGlobally
&& size
<= CAREFUL_SIZE_LIMIT
) return true;
63 if (!allowMultipleInliningsPerFunction
) return false;
64 // more than one use, so we can't eliminate it after inlining,
65 // so only worth it if we really care about speed and don't care
66 // about size, and if it's lightweight so a good candidate for
68 return options
.optimizeLevel
>= 3 && options
.shrinkLevel
== 0 && lightweight
;
72 typedef std::unordered_map
<Name
, FunctionInfo
> NameInfoMap
;
74 struct FunctionInfoScanner
: public WalkerPass
<PostWalker
<FunctionInfoScanner
>> {
75 bool isFunctionParallel() override
{ return true; }
77 FunctionInfoScanner(NameInfoMap
* infos
) : infos(infos
) {}
79 FunctionInfoScanner
* create() override
{
80 return new FunctionInfoScanner(infos
);
83 void visitLoop(Loop
* curr
) {
84 // having a loop is not lightweight
85 (*infos
)[getFunction()->name
].lightweight
= false;
88 void visitCall(Call
* curr
) {
89 assert(infos
->count(curr
->target
) > 0); // can't add a new element in parallel
90 (*infos
)[curr
->target
].calls
++;
91 // having a call is not lightweight
92 (*infos
)[getFunction()->name
].lightweight
= false;
95 void visitFunction(Function
* curr
) {
96 (*infos
)[curr
->name
].size
= Measurer::measure(curr
->body
);
103 struct InliningAction
{
104 Expression
** callSite
;
107 InliningAction(Expression
** callSite
, Function
* contents
) : callSite(callSite
), contents(contents
) {}
110 struct InliningState
{
111 std::unordered_set
<Name
> worthInlining
;
112 std::unordered_map
<Name
, std::vector
<InliningAction
>> actionsForFunction
; // function name => actions that can be performed in it
115 struct Planner
: public WalkerPass
<PostWalker
<Planner
>> {
116 bool isFunctionParallel() override
{ return true; }
118 Planner(InliningState
* state
) : state(state
) {}
120 Planner
* create() override
{
121 return new Planner(state
);
124 void visitCall(Call
* curr
) {
125 // plan to inline if we know this is valid to inline, and if the call is
126 // actually performed - if it is dead code, it's pointless to inline
127 if (state
->worthInlining
.count(curr
->target
) &&
128 curr
->type
!= unreachable
) {
129 // nest the call in a block. that way the location of the pointer to the call will not
130 // change even if we inline multiple times into the same function, otherwise
131 // call1(call2()) might be a problem
132 auto* block
= Builder(*getModule()).makeBlock(curr
);
133 replaceCurrent(block
);
134 assert(state
->actionsForFunction
.count(getFunction()->name
) > 0); // can't add a new element in parallel
135 state
->actionsForFunction
[getFunction()->name
].emplace_back(&block
->list
[0], getModule()->getFunction(curr
->target
));
139 void doWalkFunction(Function
* func
) {
140 // we shouldn't inline into us if we are to be inlined
141 // ourselves - that has the risk of cycles
142 if (state
->worthInlining
.count(func
->name
) == 0) {
148 InliningState
* state
;
151 // Core inlining logic. Modifies the outside function (adding locals as
152 // needed), and returns the inlined code.
153 static Expression
* doInlining(Module
* module
, Function
* into
, InliningAction
& action
) {
154 Function
* from
= action
.contents
;
155 auto* call
= (*action
.callSite
)->cast
<Call
>();
156 Builder
builder(*module
);
157 auto* block
= Builder(*module
).makeBlock();
158 block
->name
= Name(std::string("__inlined_func$") + from
->name
.str
);
159 *action
.callSite
= block
;
160 // set up a locals mapping
161 struct Updater
: public PostWalker
<Updater
> {
162 std::map
<Index
, Index
> localMapping
;
166 void visitReturn(Return
* curr
) {
167 replaceCurrent(builder
->makeBreak(returnName
, curr
->value
));
169 void visitGetLocal(GetLocal
* curr
) {
170 curr
->index
= localMapping
[curr
->index
];
172 void visitSetLocal(SetLocal
* curr
) {
173 curr
->index
= localMapping
[curr
->index
];
176 updater
.returnName
= block
->name
;
177 updater
.builder
= &builder
;
178 for (Index i
= 0; i
< from
->getNumLocals(); i
++) {
179 updater
.localMapping
[i
] = builder
.addVar(into
, from
->getLocalType(i
));
181 // assign the operands into the params
182 for (Index i
= 0; i
< from
->params
.size(); i
++) {
183 block
->list
.push_back(builder
.makeSetLocal(updater
.localMapping
[i
], call
->operands
[i
]));
185 // zero out the vars (as we may be in a loop, and may depend on their zero-init value
186 for (Index i
= 0; i
< from
->vars
.size(); i
++) {
187 block
->list
.push_back(builder
.makeSetLocal(updater
.localMapping
[from
->getVarIndexBase() + i
], LiteralUtils::makeZero(from
->vars
[i
], *module
)));
189 // generate and update the inlined contents
190 auto* contents
= ExpressionManipulator::copy(from
->body
, *module
);
191 updater
.walk(contents
);
192 block
->list
.push_back(contents
);
193 block
->type
= call
->type
;
194 // if the function returned a value, we just set the block containing the
195 // inlined code to have that type. or, if the function was void and
196 // contained void, that is fine too. a bad case is a void function in which
197 // we have unreachable code, so we would be replacing a void call with an
198 // unreachable; we need to handle
199 if (contents
->type
== unreachable
&& block
->type
== none
) {
200 // make the block reachable by adding a break to it
201 block
->list
.push_back(builder
.makeBreak(block
->name
));
206 struct Inlining
: public Pass
{
207 // whether to optimize where we inline
208 bool optimize
= false;
214 void run(PassRunner
* runner
, Module
* module
) override
{
215 // keep going while we inline, to handle nesting. TODO: optimize
216 firstIteration
= true;
218 calculateInfos(module
);
219 if (!iteration(runner
, module
)) {
222 firstIteration
= false;
226 void calculateInfos(Module
* module
) {
228 // fill in info, as we operate on it in parallel (each function to its own entry)
229 for (auto& func
: module
->functions
) {
232 PassRunner
runner(module
);
233 runner
.setIsNested(true);
234 runner
.add
<FunctionInfoScanner
>(&infos
);
236 // fill in global uses
237 // anything exported or used in a table should not be inlined
238 for (auto& ex
: module
->exports
) {
239 if (ex
->kind
== ExternalKind::Function
) {
240 infos
[ex
->value
].usedGlobally
= true;
243 for (auto& segment
: module
->table
.segments
) {
244 for (auto name
: segment
.data
) {
245 if (module
->getFunctionOrNull(name
)) {
246 infos
[name
].usedGlobally
= true;
252 bool iteration(PassRunner
* runner
, Module
* module
) {
253 // decide which to inline
255 for (auto& func
: module
->functions
) {
256 // on the first iteration, allow multiple inlinings per function
257 if (infos
[func
->name
].worthInlining(runner
->options
, firstIteration
/* allowMultipleInliningsPerFunction */)) {
258 state
.worthInlining
.insert(func
->name
);
261 if (state
.worthInlining
.size() == 0) return false;
262 // fill in actionsForFunction, as we operate on it in parallel (each function to its own entry)
263 for (auto& func
: module
->functions
) {
264 state
.actionsForFunction
[func
->name
];
266 // find and plan inlinings
268 PassRunner
runner(module
);
269 runner
.setIsNested(true);
270 runner
.add
<Planner
>(&state
);
273 // perform inlinings TODO: parallelize
274 std::unordered_map
<Name
, Index
> inlinedUses
; // how many uses we inlined
275 std::unordered_set
<Function
*> inlinedInto
; // which functions were inlined into
276 for (auto& func
: module
->functions
) {
277 for (auto& action
: state
.actionsForFunction
[func
->name
]) {
278 Name inlinedName
= action
.contents
->name
;
279 doInlining(module
, func
.get(), action
);
280 inlinedUses
[inlinedName
]++;
281 inlinedInto
.insert(func
.get());
282 assert(inlinedUses
[inlinedName
] <= infos
[inlinedName
].calls
);
285 // anything we inlined into may now have non-unique label names, fix it up
286 for (auto func
: inlinedInto
) {
287 wasm::UniqueNameMapper::uniquify(func
->body
);
289 if (optimize
&& inlinedInto
.size() > 0) {
290 doOptimize(inlinedInto
, module
, runner
);
292 // remove functions that we no longer need after inlining
293 auto& funcs
= module
->functions
;
294 funcs
.erase(std::remove_if(funcs
.begin(), funcs
.end(), [&](const std::unique_ptr
<Function
>& curr
) {
295 auto name
= curr
->name
;
296 auto& info
= infos
[name
];
297 return inlinedUses
.count(name
) && inlinedUses
[name
] == info
.calls
&& !info
.usedGlobally
;
299 // return whether we did any work
300 return inlinedUses
.size() > 0;
303 // Run useful optimizations after inlining, things like removing
304 // unnecessary new blocks, sharing variables, etc.
305 void doOptimize(std::unordered_set
<Function
*>& funcs
, Module
* module
, PassRunner
* parentRunner
) {
306 // save the full list of functions on the side
307 std::vector
<std::unique_ptr
<Function
>> all
;
308 all
.swap(module
->functions
);
309 module
->updateMaps();
310 for (auto& func
: funcs
) {
311 module
->addFunction(func
);
313 PassRunner
runner(module
, parentRunner
->options
);
314 runner
.setIsNested(true);
315 runner
.setValidateGlobally(false); // not a full valid module
316 runner
.add("precompute-propagate");
317 runner
.add("remove-unused-brs");
318 runner
.add("remove-unused-names");
319 runner
.add("coalesce-locals");
320 runner
.add("simplify-locals");
321 runner
.add("vacuum");
322 runner
.add("reorder-locals");
323 runner
.add("remove-unused-brs");
324 runner
.add("merge-blocks");
326 // restore all the funcs
327 for (auto& func
: module
->functions
) {
330 all
.swap(module
->functions
);
331 module
->updateMaps();
335 Pass
*createInliningPass() {
336 return new Inlining();
339 Pass
*createInliningOptimizingPass() {
340 auto* ret
= new Inlining();
341 ret
->optimize
= true;