]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // This file defines the interfaces that X86 uses to lower LLVM code into a | |
11 | // selection DAG. | |
12 | // | |
13 | //===----------------------------------------------------------------------===// | |
14 | ||
1a4d82fc JJ |
15 | #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H |
16 | #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H | |
223e47cc | 17 | |
970d7e83 | 18 | #include "llvm/CodeGen/CallingConvLower.h" |
223e47cc | 19 | #include "llvm/CodeGen/SelectionDAG.h" |
970d7e83 LB |
20 | #include "llvm/Target/TargetLowering.h" |
21 | #include "llvm/Target/TargetOptions.h" | |
223e47cc LB |
22 | |
23 | namespace llvm { | |
1a4d82fc JJ |
24 | class X86Subtarget; |
25 | class X86TargetMachine; | |
26 | ||
223e47cc LB |
27 | namespace X86ISD { |
28 | // X86 Specific DAG Nodes | |
29 | enum NodeType { | |
30 | // Start the numbering where the builtin ops leave off. | |
31 | FIRST_NUMBER = ISD::BUILTIN_OP_END, | |
32 | ||
33 | /// BSF - Bit scan forward. | |
34 | /// BSR - Bit scan reverse. | |
35 | BSF, | |
36 | BSR, | |
37 | ||
38 | /// SHLD, SHRD - Double shift instructions. These correspond to | |
39 | /// X86::SHLDxx and X86::SHRDxx instructions. | |
40 | SHLD, | |
41 | SHRD, | |
42 | ||
43 | /// FAND - Bitwise logical AND of floating point values. This corresponds | |
44 | /// to X86::ANDPS or X86::ANDPD. | |
45 | FAND, | |
46 | ||
47 | /// FOR - Bitwise logical OR of floating point values. This corresponds | |
48 | /// to X86::ORPS or X86::ORPD. | |
49 | FOR, | |
50 | ||
51 | /// FXOR - Bitwise logical XOR of floating point values. This corresponds | |
52 | /// to X86::XORPS or X86::XORPD. | |
53 | FXOR, | |
54 | ||
1a4d82fc JJ |
55 | /// FANDN - Bitwise logical ANDNOT of floating point values. This |
56 | /// corresponds to X86::ANDNPS or X86::ANDNPD. | |
57 | FANDN, | |
58 | ||
223e47cc LB |
59 | /// FSRL - Bitwise logical right shift of floating point values. These |
60 | /// corresponds to X86::PSRLDQ. | |
61 | FSRL, | |
62 | ||
63 | /// CALL - These operations represent an abstract X86 call | |
64 | /// instruction, which includes a bunch of information. In particular the | |
65 | /// operands of these node are: | |
66 | /// | |
67 | /// #0 - The incoming token chain | |
68 | /// #1 - The callee | |
69 | /// #2 - The number of arg bytes the caller pushes on the stack. | |
70 | /// #3 - The number of arg bytes the callee pops off the stack. | |
71 | /// #4 - The value to pass in AL/AX/EAX (optional) | |
72 | /// #5 - The value to pass in DL/DX/EDX (optional) | |
73 | /// | |
74 | /// The result values of these nodes are: | |
75 | /// | |
76 | /// #0 - The outgoing token chain | |
77 | /// #1 - The first register result value (optional) | |
78 | /// #2 - The second register result value (optional) | |
79 | /// | |
80 | CALL, | |
81 | ||
82 | /// RDTSC_DAG - This operation implements the lowering for | |
83 | /// readcyclecounter | |
84 | RDTSC_DAG, | |
85 | ||
1a4d82fc JJ |
86 | /// X86 Read Time-Stamp Counter and Processor ID. |
87 | RDTSCP_DAG, | |
88 | ||
89 | /// X86 Read Performance Monitoring Counters. | |
90 | RDPMC_DAG, | |
91 | ||
223e47cc LB |
92 | /// X86 compare and logical compare instructions. |
93 | CMP, COMI, UCOMI, | |
94 | ||
95 | /// X86 bit-test instructions. | |
96 | BT, | |
97 | ||
98 | /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS | |
99 | /// operand, usually produced by a CMP instruction. | |
100 | SETCC, | |
101 | ||
1a4d82fc JJ |
102 | /// X86 Select |
103 | SELECT, | |
104 | ||
223e47cc LB |
105 | // Same as SETCC except it's materialized with a sbb and the value is all |
106 | // one's or all zero's. | |
107 | SETCC_CARRY, // R = carry_bit ? ~0 : 0 | |
108 | ||
109 | /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. | |
110 | /// Operands are two FP values to compare; result is a mask of | |
111 | /// 0s or 1s. Generally DTRT for C/C++ with NaNs. | |
1a4d82fc | 112 | FSETCC, |
223e47cc LB |
113 | |
114 | /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, | |
115 | /// result in an integer GPR. Needs masking for scalar result. | |
116 | FGETSIGNx86, | |
117 | ||
118 | /// X86 conditional moves. Operand 0 and operand 1 are the two values | |
119 | /// to select from. Operand 2 is the condition code, and operand 3 is the | |
120 | /// flag operand produced by a CMP or TEST instruction. It also writes a | |
121 | /// flag result. | |
122 | CMOV, | |
123 | ||
124 | /// X86 conditional branches. Operand 0 is the chain operand, operand 1 | |
125 | /// is the block to branch if condition is true, operand 2 is the | |
126 | /// condition code, and operand 3 is the flag operand produced by a CMP | |
127 | /// or TEST instruction. | |
128 | BRCOND, | |
129 | ||
130 | /// Return with a flag operand. Operand 0 is the chain operand, operand | |
131 | /// 1 is the number of bytes of stack to pop. | |
132 | RET_FLAG, | |
133 | ||
134 | /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. | |
135 | REP_STOS, | |
136 | ||
137 | /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. | |
138 | REP_MOVS, | |
139 | ||
140 | /// GlobalBaseReg - On Darwin, this node represents the result of the popl | |
141 | /// at function entry, used for PIC code. | |
142 | GlobalBaseReg, | |
143 | ||
144 | /// Wrapper - A wrapper node for TargetConstantPool, | |
145 | /// TargetExternalSymbol, and TargetGlobalAddress. | |
146 | Wrapper, | |
147 | ||
148 | /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP | |
149 | /// relative displacements. | |
150 | WrapperRIP, | |
151 | ||
152 | /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector | |
153 | /// to an MMX vector. If you think this is too close to the previous | |
154 | /// mnemonic, so do I; blame Intel. | |
155 | MOVDQ2Q, | |
156 | ||
970d7e83 LB |
157 | /// MMX_MOVD2W - Copies a 32-bit value from the low word of a MMX |
158 | /// vector to a GPR. | |
159 | MMX_MOVD2W, | |
160 | ||
223e47cc LB |
161 | /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to |
162 | /// i32, corresponds to X86::PEXTRB. | |
163 | PEXTRB, | |
164 | ||
165 | /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to | |
166 | /// i32, corresponds to X86::PEXTRW. | |
167 | PEXTRW, | |
168 | ||
169 | /// INSERTPS - Insert any element of a 4 x float vector into any element | |
170 | /// of a destination 4 x floatvector. | |
171 | INSERTPS, | |
172 | ||
173 | /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector, | |
174 | /// corresponds to X86::PINSRB. | |
175 | PINSRB, | |
176 | ||
177 | /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, | |
178 | /// corresponds to X86::PINSRW. | |
179 | PINSRW, MMX_PINSRW, | |
180 | ||
181 | /// PSHUFB - Shuffle 16 8-bit values within a vector. | |
182 | PSHUFB, | |
183 | ||
184 | /// ANDNP - Bitwise Logical AND NOT of Packed FP values. | |
185 | ANDNP, | |
186 | ||
187 | /// PSIGN - Copy integer sign. | |
188 | PSIGN, | |
189 | ||
970d7e83 LB |
190 | /// BLENDI - Blend where the selector is an immediate. |
191 | BLENDI, | |
192 | ||
85aaf69f SL |
193 | /// SHRUNKBLEND - Blend where the condition has been shrunk. |
194 | /// This is used to emphasize that the condition mask is | |
195 | /// no more valid for generic VSELECT optimizations. | |
196 | SHRUNKBLEND, | |
197 | ||
1a4d82fc JJ |
198 | /// ADDSUB - Combined add and sub on an FP vector. |
199 | ADDSUB, | |
200 | ||
970d7e83 LB |
201 | // SUBUS - Integer sub with unsigned saturation. |
202 | SUBUS, | |
223e47cc LB |
203 | |
204 | /// HADD - Integer horizontal add. | |
205 | HADD, | |
206 | ||
207 | /// HSUB - Integer horizontal sub. | |
208 | HSUB, | |
209 | ||
210 | /// FHADD - Floating point horizontal add. | |
211 | FHADD, | |
212 | ||
213 | /// FHSUB - Floating point horizontal sub. | |
214 | FHSUB, | |
215 | ||
970d7e83 LB |
216 | /// UMAX, UMIN - Unsigned integer max and min. |
217 | UMAX, UMIN, | |
218 | ||
219 | /// SMAX, SMIN - Signed integer max and min. | |
220 | SMAX, SMIN, | |
221 | ||
223e47cc LB |
222 | /// FMAX, FMIN - Floating point max and min. |
223 | /// | |
224 | FMAX, FMIN, | |
225 | ||
226 | /// FMAXC, FMINC - Commutative FMIN and FMAX. | |
227 | FMAXC, FMINC, | |
228 | ||
229 | /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal | |
230 | /// approximation. Note that these typically require refinement | |
231 | /// in order to obtain suitable precision. | |
232 | FRSQRT, FRCP, | |
233 | ||
234 | // TLSADDR - Thread Local Storage. | |
235 | TLSADDR, | |
236 | ||
237 | // TLSBASEADDR - Thread Local Storage. A call to get the start address | |
238 | // of the TLS block for the current module. | |
239 | TLSBASEADDR, | |
240 | ||
241 | // TLSCALL - Thread Local Storage. When calling to an OS provided | |
242 | // thunk at the address from an earlier relocation. | |
243 | TLSCALL, | |
244 | ||
245 | // EH_RETURN - Exception Handling helpers. | |
246 | EH_RETURN, | |
247 | ||
970d7e83 LB |
248 | // EH_SJLJ_SETJMP - SjLj exception handling setjmp. |
249 | EH_SJLJ_SETJMP, | |
250 | ||
251 | // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. | |
252 | EH_SJLJ_LONGJMP, | |
253 | ||
254 | /// TC_RETURN - Tail call return. See X86TargetLowering::LowerCall for | |
255 | /// the list of operands. | |
223e47cc LB |
256 | TC_RETURN, |
257 | ||
1a4d82fc | 258 | // VZEXT_MOVL - Vector move to low scalar and zero higher vector elements. |
223e47cc LB |
259 | VZEXT_MOVL, |
260 | ||
970d7e83 LB |
261 | // VZEXT - Vector integer zero-extend. |
262 | VZEXT, | |
263 | ||
264 | // VSEXT - Vector integer signed-extend. | |
265 | VSEXT, | |
266 | ||
1a4d82fc JJ |
267 | // VTRUNC - Vector integer truncate. |
268 | VTRUNC, | |
269 | ||
270 | // VTRUNC - Vector integer truncate with mask. | |
271 | VTRUNCM, | |
272 | ||
223e47cc LB |
273 | // VFPEXT - Vector FP extend. |
274 | VFPEXT, | |
275 | ||
970d7e83 LB |
276 | // VFPROUND - Vector FP round. |
277 | VFPROUND, | |
278 | ||
223e47cc LB |
279 | // VSHL, VSRL - 128-bit vector logical left / right shift |
280 | VSHLDQ, VSRLDQ, | |
281 | ||
282 | // VSHL, VSRL, VSRA - Vector shift elements | |
283 | VSHL, VSRL, VSRA, | |
284 | ||
285 | // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate | |
286 | VSHLI, VSRLI, VSRAI, | |
287 | ||
288 | // CMPP - Vector packed double/float comparison. | |
289 | CMPP, | |
290 | ||
291 | // PCMP* - Vector integer comparisons. | |
292 | PCMPEQ, PCMPGT, | |
1a4d82fc JJ |
293 | // PCMP*M - Vector integer comparisons, the result is in a mask vector. |
294 | PCMPEQM, PCMPGTM, | |
295 | ||
296 | /// CMPM, CMPMU - Vector comparison generating mask bits for fp and | |
297 | /// integer signed and unsigned data types. | |
298 | CMPM, | |
299 | CMPMU, | |
223e47cc LB |
300 | |
301 | // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. | |
302 | ADD, SUB, ADC, SBB, SMUL, | |
303 | INC, DEC, OR, XOR, AND, | |
304 | ||
1a4d82fc | 305 | BEXTR, // BEXTR - Bit field extract |
223e47cc LB |
306 | |
307 | UMUL, // LOW, HI, FLAGS = umul LHS, RHS | |
308 | ||
85aaf69f SL |
309 | // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS |
310 | SMUL8, UMUL8, | |
311 | ||
312 | // 8-bit divrem that zero-extend the high result (AH). | |
313 | UDIVREM8_ZEXT_HREG, | |
314 | SDIVREM8_SEXT_HREG, | |
315 | ||
223e47cc LB |
316 | // MUL_IMM - X86 specific multiply by immediate. |
317 | MUL_IMM, | |
318 | ||
1a4d82fc | 319 | // PTEST - Vector bitwise comparisons. |
223e47cc LB |
320 | PTEST, |
321 | ||
1a4d82fc | 322 | // TESTP - Vector packed fp sign bitwise comparisons. |
223e47cc LB |
323 | TESTP, |
324 | ||
1a4d82fc JJ |
325 | // TESTM, TESTNM - Vector "test" in AVX-512, the result is in a mask vector. |
326 | TESTM, | |
327 | TESTNM, | |
328 | ||
329 | // OR/AND test for masks | |
330 | KORTEST, | |
331 | ||
223e47cc | 332 | // Several flavors of instructions with vector shuffle behaviors. |
1a4d82fc JJ |
333 | PACKSS, |
334 | PACKUS, | |
335 | // Intra-lane alignr | |
970d7e83 | 336 | PALIGNR, |
1a4d82fc JJ |
337 | // AVX512 inter-lane alignr |
338 | VALIGN, | |
223e47cc LB |
339 | PSHUFD, |
340 | PSHUFHW, | |
341 | PSHUFLW, | |
342 | SHUFP, | |
343 | MOVDDUP, | |
344 | MOVSHDUP, | |
345 | MOVSLDUP, | |
346 | MOVLHPS, | |
347 | MOVLHPD, | |
348 | MOVHLPS, | |
349 | MOVLPS, | |
350 | MOVLPD, | |
351 | MOVSD, | |
352 | MOVSS, | |
353 | UNPCKL, | |
354 | UNPCKH, | |
1a4d82fc JJ |
355 | VPERMILPV, |
356 | VPERMILPI, | |
223e47cc | 357 | VPERMV, |
1a4d82fc JJ |
358 | VPERMV3, |
359 | VPERMIV3, | |
223e47cc LB |
360 | VPERMI, |
361 | VPERM2X128, | |
362 | VBROADCAST, | |
1a4d82fc JJ |
363 | // masked broadcast |
364 | VBROADCASTM, | |
365 | // Insert/Extract vector element | |
366 | VINSERT, | |
367 | VEXTRACT, | |
223e47cc | 368 | |
85aaf69f | 369 | // Vector multiply packed unsigned doubleword integers |
223e47cc | 370 | PMULUDQ, |
85aaf69f | 371 | // Vector multiply packed signed doubleword integers |
1a4d82fc | 372 | PMULDQ, |
223e47cc LB |
373 | |
374 | // FMA nodes | |
375 | FMADD, | |
376 | FNMADD, | |
377 | FMSUB, | |
378 | FNMSUB, | |
379 | FMADDSUB, | |
380 | FMSUBADD, | |
381 | ||
85aaf69f SL |
382 | // Compress and expand |
383 | COMPRESS, | |
384 | EXPAND, | |
385 | ||
386 | // Save xmm argument registers to the stack, according to %al. An operator | |
387 | // is needed so that this can be expanded with control flow. | |
223e47cc LB |
388 | VASTART_SAVE_XMM_REGS, |
389 | ||
85aaf69f | 390 | // Windows's _chkstk call to do stack probing. |
223e47cc LB |
391 | WIN_ALLOCA, |
392 | ||
85aaf69f | 393 | // For allocating variable amounts of stack space when using |
223e47cc LB |
394 | // segmented stacks. Check if the current stacklet has enough space, and |
395 | // falls back to heap allocation if not. | |
396 | SEG_ALLOCA, | |
397 | ||
85aaf69f | 398 | // Windows's _ftol2 runtime routine to do fptoui. |
223e47cc LB |
399 | WIN_FTOL, |
400 | ||
401 | // Memory barrier | |
402 | MEMBARRIER, | |
403 | MFENCE, | |
404 | SFENCE, | |
405 | LFENCE, | |
406 | ||
85aaf69f | 407 | // Store FP status word into i16 register. |
223e47cc LB |
408 | FNSTSW16r, |
409 | ||
85aaf69f | 410 | // Store contents of %ah into %eflags. |
223e47cc LB |
411 | SAHF, |
412 | ||
85aaf69f | 413 | // Get a random integer and indicate whether it is valid in CF. |
223e47cc LB |
414 | RDRAND, |
415 | ||
85aaf69f | 416 | // Get a NIST SP800-90B & C compliant random integer and |
1a4d82fc JJ |
417 | // indicate whether it is valid in CF. |
418 | RDSEED, | |
419 | ||
223e47cc LB |
420 | PCMPISTRI, |
421 | PCMPESTRI, | |
422 | ||
85aaf69f | 423 | // Test if in transactional execution. |
1a4d82fc | 424 | XTEST, |
223e47cc | 425 | |
85aaf69f SL |
426 | // ERI instructions |
427 | RSQRT28, RCP28, EXP2, | |
428 | ||
429 | // Compare and swap. | |
1a4d82fc | 430 | LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, |
223e47cc LB |
431 | LCMPXCHG8_DAG, |
432 | LCMPXCHG16_DAG, | |
433 | ||
85aaf69f | 434 | // Load, scalar_to_vector, and zero extend. |
223e47cc LB |
435 | VZEXT_LOAD, |
436 | ||
85aaf69f | 437 | // Store FP control world into i16 memory. |
223e47cc LB |
438 | FNSTCW16m, |
439 | ||
85aaf69f | 440 | /// This instruction implements FP_TO_SINT with the |
223e47cc LB |
441 | /// integer destination in memory and a FP reg source. This corresponds |
442 | /// to the X86::FIST*m instructions and the rounding mode change stuff. It | |
443 | /// has two inputs (token chain and address) and two outputs (int value | |
444 | /// and token chain). | |
445 | FP_TO_INT16_IN_MEM, | |
446 | FP_TO_INT32_IN_MEM, | |
447 | FP_TO_INT64_IN_MEM, | |
448 | ||
85aaf69f | 449 | /// This instruction implements SINT_TO_FP with the |
223e47cc LB |
450 | /// integer source in memory and FP reg result. This corresponds to the |
451 | /// X86::FILD*m instructions. It has three inputs (token chain, address, | |
452 | /// and source type) and two outputs (FP value and token chain). FILD_FLAG | |
453 | /// also produces a flag). | |
454 | FILD, | |
455 | FILD_FLAG, | |
456 | ||
85aaf69f | 457 | /// This instruction implements an extending load to FP stack slots. |
223e47cc LB |
458 | /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain |
459 | /// operand, ptr to load from, and a ValueType node indicating the type | |
460 | /// to load to. | |
461 | FLD, | |
462 | ||
85aaf69f | 463 | /// This instruction implements a truncating store to FP stack |
223e47cc LB |
464 | /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a |
465 | /// chain operand, value to store, address, and a ValueType to store it | |
466 | /// as. | |
467 | FST, | |
468 | ||
85aaf69f | 469 | /// This instruction grabs the address of the next argument |
223e47cc LB |
470 | /// from a va_list. (reads and modifies the va_list in memory) |
471 | VAARG_64 | |
472 | ||
473 | // WARNING: Do not add anything in the end unless you want the node to | |
474 | // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be | |
475 | // thought as target memory ops! | |
476 | }; | |
477 | } | |
478 | ||
479 | /// Define some predicates that are used for node matching. | |
480 | namespace X86 { | |
85aaf69f | 481 | /// Return true if the specified |
1a4d82fc JJ |
482 | /// EXTRACT_SUBVECTOR operand specifies a vector extract that is |
483 | /// suitable for input to VEXTRACTF128, VEXTRACTI128 instructions. | |
484 | bool isVEXTRACT128Index(SDNode *N); | |
485 | ||
85aaf69f | 486 | /// Return true if the specified |
1a4d82fc JJ |
487 | /// INSERT_SUBVECTOR operand specifies a subvector insert that is |
488 | /// suitable for input to VINSERTF128, VINSERTI128 instructions. | |
489 | bool isVINSERT128Index(SDNode *N); | |
490 | ||
85aaf69f | 491 | /// Return true if the specified |
223e47cc | 492 | /// EXTRACT_SUBVECTOR operand specifies a vector extract that is |
1a4d82fc JJ |
493 | /// suitable for input to VEXTRACTF64X4, VEXTRACTI64X4 instructions. |
494 | bool isVEXTRACT256Index(SDNode *N); | |
223e47cc | 495 | |
85aaf69f | 496 | /// Return true if the specified |
223e47cc | 497 | /// INSERT_SUBVECTOR operand specifies a subvector insert that is |
1a4d82fc JJ |
498 | /// suitable for input to VINSERTF64X4, VINSERTI64X4 instructions. |
499 | bool isVINSERT256Index(SDNode *N); | |
223e47cc | 500 | |
85aaf69f | 501 | /// Return the appropriate |
223e47cc | 502 | /// immediate to extract the specified EXTRACT_SUBVECTOR index |
1a4d82fc JJ |
503 | /// with VEXTRACTF128, VEXTRACTI128 instructions. |
504 | unsigned getExtractVEXTRACT128Immediate(SDNode *N); | |
223e47cc | 505 | |
85aaf69f | 506 | /// Return the appropriate |
223e47cc | 507 | /// immediate to insert at the specified INSERT_SUBVECTOR index |
1a4d82fc JJ |
508 | /// with VINSERTF128, VINSERT128 instructions. |
509 | unsigned getInsertVINSERT128Immediate(SDNode *N); | |
510 | ||
85aaf69f | 511 | /// Return the appropriate |
1a4d82fc JJ |
512 | /// immediate to extract the specified EXTRACT_SUBVECTOR index |
513 | /// with VEXTRACTF64X4, VEXTRACTI64x4 instructions. | |
514 | unsigned getExtractVEXTRACT256Immediate(SDNode *N); | |
515 | ||
85aaf69f | 516 | /// Return the appropriate |
1a4d82fc JJ |
517 | /// immediate to insert at the specified INSERT_SUBVECTOR index |
518 | /// with VINSERTF64x4, VINSERTI64x4 instructions. | |
519 | unsigned getInsertVINSERT256Immediate(SDNode *N); | |
223e47cc | 520 | |
85aaf69f | 521 | /// Returns true if Elt is a constant zero or floating point constant +0.0. |
223e47cc LB |
522 | bool isZeroNode(SDValue Elt); |
523 | ||
85aaf69f | 524 | /// Returns true of the given offset can be |
223e47cc LB |
525 | /// fit into displacement field of the instruction. |
526 | bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, | |
527 | bool hasSymbolicDisplacement = true); | |
528 | ||
529 | ||
85aaf69f | 530 | /// Determines whether the callee is required to pop its |
223e47cc LB |
531 | /// own arguments. Callee pop is necessary to support tail calls. |
532 | bool isCalleePop(CallingConv::ID CallingConv, | |
533 | bool is64Bit, bool IsVarArg, bool TailCallOpt); | |
1a4d82fc JJ |
534 | |
535 | /// AVX512 static rounding constants. These need to match the values in | |
536 | /// avx512fintrin.h. | |
537 | enum STATIC_ROUNDING { | |
538 | TO_NEAREST_INT = 0, | |
539 | TO_NEG_INF = 1, | |
540 | TO_POS_INF = 2, | |
541 | TO_ZERO = 3, | |
542 | CUR_DIRECTION = 4 | |
543 | }; | |
223e47cc LB |
544 | } |
545 | ||
546 | //===--------------------------------------------------------------------===// | |
85aaf69f | 547 | // X86 Implementation of the TargetLowering interface |
1a4d82fc | 548 | class X86TargetLowering final : public TargetLowering { |
223e47cc | 549 | public: |
85aaf69f | 550 | explicit X86TargetLowering(const X86TargetMachine &TM); |
223e47cc | 551 | |
1a4d82fc | 552 | unsigned getJumpTableEncoding() const override; |
223e47cc | 553 | |
1a4d82fc | 554 | MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; } |
223e47cc | 555 | |
1a4d82fc | 556 | const MCExpr * |
223e47cc LB |
557 | LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
558 | const MachineBasicBlock *MBB, unsigned uid, | |
1a4d82fc | 559 | MCContext &Ctx) const override; |
223e47cc | 560 | |
85aaf69f | 561 | /// Returns relocation base for the given PIC jumptable. |
1a4d82fc JJ |
562 | SDValue getPICJumpTableRelocBase(SDValue Table, |
563 | SelectionDAG &DAG) const override; | |
564 | const MCExpr * | |
223e47cc | 565 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
1a4d82fc | 566 | unsigned JTI, MCContext &Ctx) const override; |
223e47cc | 567 | |
85aaf69f | 568 | /// Return the desired alignment for ByVal aggregate |
223e47cc LB |
569 | /// function arguments in the caller parameter area. For X86, aggregates |
570 | /// that contains are placed at 16-byte boundaries while the rest are at | |
571 | /// 4-byte boundaries. | |
1a4d82fc | 572 | unsigned getByValTypeAlignment(Type *Ty) const override; |
223e47cc | 573 | |
85aaf69f | 574 | /// Returns the target specific optimal type for load |
223e47cc LB |
575 | /// and store operations as a result of memset, memcpy, and memmove |
576 | /// lowering. If DstAlign is zero that means it's safe to destination | |
577 | /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it | |
578 | /// means there isn't a need to check it against alignment requirement, | |
970d7e83 LB |
579 | /// probably because the source does not need to be loaded. If 'IsMemset' is |
580 | /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that | |
581 | /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy | |
582 | /// source is constant so it does not need to be loaded. | |
223e47cc LB |
583 | /// It returns EVT::Other if the type should be determined using generic |
584 | /// target-independent logic. | |
1a4d82fc JJ |
585 | EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, |
586 | bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, | |
587 | MachineFunction &MF) const override; | |
223e47cc | 588 | |
85aaf69f | 589 | /// Returns true if it's safe to use load / store of the |
970d7e83 LB |
590 | /// specified type to expand memcpy / memset inline. This is mostly true |
591 | /// for all types except for some special cases. For example, on X86 | |
592 | /// targets without SSE2 f64 load / store are done with fldl / fstpl which | |
593 | /// also does type conversion. Note the specified type doesn't have to be | |
594 | /// legal as the hook is used before type legalization. | |
1a4d82fc | 595 | bool isSafeMemOpType(MVT VT) const override; |
970d7e83 | 596 | |
85aaf69f | 597 | /// Returns true if the target allows |
970d7e83 LB |
598 | /// unaligned memory accesses. of the specified type. Returns whether it |
599 | /// is "fast" by reference in the second argument. | |
1a4d82fc JJ |
600 | bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, |
601 | bool *Fast) const override; | |
223e47cc | 602 | |
85aaf69f | 603 | /// Provide custom lowering hooks for some operations. |
223e47cc | 604 | /// |
1a4d82fc | 605 | SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; |
223e47cc | 606 | |
85aaf69f | 607 | /// Replace the results of node with an illegal result |
223e47cc LB |
608 | /// type with new values built out of custom code. |
609 | /// | |
1a4d82fc JJ |
610 | void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, |
611 | SelectionDAG &DAG) const override; | |
223e47cc LB |
612 | |
613 | ||
1a4d82fc | 614 | SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; |
223e47cc | 615 | |
85aaf69f | 616 | /// Return true if the target has native support for |
223e47cc LB |
617 | /// the specified value type and it is 'desirable' to use the type for the |
618 | /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 | |
619 | /// instruction encodings are longer and some i16 instructions are slow. | |
1a4d82fc | 620 | bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override; |
223e47cc | 621 | |
85aaf69f | 622 | /// Return true if the target has native support for the |
223e47cc LB |
623 | /// specified value type and it is 'desirable' to use the type. e.g. On x86 |
624 | /// i16 is legal, but undesirable since i16 instruction encodings are longer | |
625 | /// and some i16 instructions are slow. | |
1a4d82fc | 626 | bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override; |
223e47cc | 627 | |
1a4d82fc | 628 | MachineBasicBlock * |
223e47cc | 629 | EmitInstrWithCustomInserter(MachineInstr *MI, |
1a4d82fc | 630 | MachineBasicBlock *MBB) const override; |
223e47cc LB |
631 | |
632 | ||
85aaf69f | 633 | /// This method returns the name of a target specific DAG node. |
1a4d82fc | 634 | const char *getTargetNodeName(unsigned Opcode) const override; |
223e47cc | 635 | |
85aaf69f SL |
636 | bool isCheapToSpeculateCttz() const override; |
637 | ||
638 | bool isCheapToSpeculateCtlz() const override; | |
639 | ||
640 | /// Return the value type to use for ISD::SETCC. | |
1a4d82fc | 641 | EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; |
223e47cc | 642 | |
85aaf69f SL |
643 | /// Determine which of the bits specified in Mask are known to be either |
644 | /// zero or one and return them in the KnownZero/KnownOne bitsets. | |
1a4d82fc JJ |
645 | void computeKnownBitsForTargetNode(const SDValue Op, |
646 | APInt &KnownZero, | |
647 | APInt &KnownOne, | |
648 | const SelectionDAG &DAG, | |
649 | unsigned Depth = 0) const override; | |
223e47cc | 650 | |
85aaf69f | 651 | /// Determine the number of bits in the operation that are sign bits. |
1a4d82fc JJ |
652 | unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
653 | const SelectionDAG &DAG, | |
654 | unsigned Depth) const override; | |
223e47cc | 655 | |
1a4d82fc JJ |
656 | bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, |
657 | int64_t &Offset) const override; | |
223e47cc LB |
658 | |
659 | SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; | |
660 | ||
1a4d82fc | 661 | bool ExpandInlineAsm(CallInst *CI) const override; |
223e47cc | 662 | |
1a4d82fc JJ |
663 | ConstraintType |
664 | getConstraintType(const std::string &Constraint) const override; | |
223e47cc LB |
665 | |
666 | /// Examine constraint string and operand type and determine a weight value. | |
667 | /// The operand object must already have been set up with the operand type. | |
1a4d82fc JJ |
668 | ConstraintWeight |
669 | getSingleConstraintMatchWeight(AsmOperandInfo &info, | |
670 | const char *constraint) const override; | |
223e47cc | 671 | |
1a4d82fc | 672 | const char *LowerXConstraint(EVT ConstraintVT) const override; |
223e47cc | 673 | |
85aaf69f SL |
674 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
675 | /// add anything to Ops. If hasMemory is true it means one of the asm | |
676 | /// constraint of the inline asm instruction being processed is 'm'. | |
1a4d82fc JJ |
677 | void LowerAsmOperandForConstraint(SDValue Op, |
678 | std::string &Constraint, | |
679 | std::vector<SDValue> &Ops, | |
680 | SelectionDAG &DAG) const override; | |
223e47cc | 681 | |
85aaf69f | 682 | /// Given a physical register constraint |
223e47cc LB |
683 | /// (e.g. {edx}), return the register number and the register class for the |
684 | /// register. This should only be used for C_Register constraints. On | |
685 | /// error, this returns a register number of 0. | |
686 | std::pair<unsigned, const TargetRegisterClass*> | |
687 | getRegForInlineAsmConstraint(const std::string &Constraint, | |
1a4d82fc | 688 | MVT VT) const override; |
223e47cc | 689 | |
85aaf69f | 690 | /// Return true if the addressing mode represented |
223e47cc | 691 | /// by AM is legal for this target, for a load/store of the specified type. |
1a4d82fc | 692 | bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; |
223e47cc | 693 | |
85aaf69f | 694 | /// Return true if the specified immediate is legal |
223e47cc LB |
695 | /// icmp immediate, that is the target has icmp instructions which can |
696 | /// compare a register against the immediate without having to materialize | |
697 | /// the immediate into a register. | |
1a4d82fc | 698 | bool isLegalICmpImmediate(int64_t Imm) const override; |
223e47cc | 699 | |
85aaf69f | 700 | /// Return true if the specified immediate is legal |
223e47cc LB |
701 | /// add immediate, that is the target has add instructions which can |
702 | /// add a register and the immediate without having to materialize | |
703 | /// the immediate into a register. | |
1a4d82fc JJ |
704 | bool isLegalAddImmediate(int64_t Imm) const override; |
705 | ||
706 | /// \brief Return the cost of the scaling factor used in the addressing | |
707 | /// mode represented by AM for this target, for a load/store | |
708 | /// of the specified type. | |
709 | /// If the AM is supported, the return value must be >= 0. | |
710 | /// If the AM is not supported, it returns a negative value. | |
711 | int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; | |
712 | ||
713 | bool isVectorShiftByScalarCheap(Type *Ty) const override; | |
223e47cc | 714 | |
85aaf69f | 715 | /// Return true if it's free to truncate a value of |
223e47cc LB |
716 | /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in |
717 | /// register EAX to i16 by referencing its sub-register AX. | |
1a4d82fc JJ |
718 | bool isTruncateFree(Type *Ty1, Type *Ty2) const override; |
719 | bool isTruncateFree(EVT VT1, EVT VT2) const override; | |
720 | ||
721 | bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; | |
223e47cc | 722 | |
85aaf69f | 723 | /// Return true if any actual instruction that defines a |
223e47cc LB |
724 | /// value of type Ty1 implicit zero-extends the value to Ty2 in the result |
725 | /// register. This does not necessarily include registers defined in | |
726 | /// unknown ways, such as incoming arguments, or copies from unknown | |
727 | /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this | |
728 | /// does not necessarily apply to truncate instructions. e.g. on x86-64, | |
729 | /// all instructions that define 32-bit values implicit zero-extend the | |
730 | /// result out to 64 bits. | |
1a4d82fc JJ |
731 | bool isZExtFree(Type *Ty1, Type *Ty2) const override; |
732 | bool isZExtFree(EVT VT1, EVT VT2) const override; | |
733 | bool isZExtFree(SDValue Val, EVT VT2) const override; | |
223e47cc | 734 | |
85aaf69f SL |
735 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
736 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this | |
737 | /// method returns true, otherwise fmuladd is expanded to fmul + fadd. | |
1a4d82fc | 738 | bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; |
223e47cc | 739 | |
85aaf69f | 740 | /// Return true if it's profitable to narrow |
223e47cc LB |
741 | /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow |
742 | /// from i32 to i8 but not from i32 to i16. | |
1a4d82fc | 743 | bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; |
223e47cc | 744 | |
85aaf69f | 745 | /// Returns true if the target can instruction select the |
223e47cc LB |
746 | /// specified FP immediate natively. If false, the legalizer will |
747 | /// materialize the FP immediate as a load from a constant pool. | |
1a4d82fc | 748 | bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; |
223e47cc | 749 | |
85aaf69f SL |
750 | /// Targets can use this to indicate that they only support *some* |
751 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a | |
752 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to | |
753 | /// be legal. | |
1a4d82fc JJ |
754 | bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, |
755 | EVT VT) const override; | |
223e47cc | 756 | |
85aaf69f SL |
757 | /// Similar to isShuffleMaskLegal. This is used by Targets can use this to |
758 | /// indicate if there is a suitable VECTOR_SHUFFLE that can be used to | |
759 | /// replace a VAND with a constant pool entry. | |
1a4d82fc JJ |
760 | bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, |
761 | EVT VT) const override; | |
223e47cc | 762 | |
85aaf69f | 763 | /// If true, then instruction selection should |
223e47cc LB |
764 | /// seek to shrink the FP constant of the specified type to a smaller type |
765 | /// in order to save space and / or reduce runtime. | |
1a4d82fc | 766 | bool ShouldShrinkFPConstant(EVT VT) const override { |
223e47cc LB |
767 | // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more |
768 | // expensive than a straight movsd. On the other hand, it's important to | |
769 | // shrink long double fp constant since fldt is very slow. | |
770 | return !X86ScalarSSEf64 || VT == MVT::f80; | |
771 | } | |
772 | ||
85aaf69f SL |
773 | /// Return true if we believe it is correct and profitable to reduce the |
774 | /// load node to a smaller type. | |
775 | bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, | |
776 | EVT NewVT) const override; | |
777 | ||
223e47cc LB |
778 | const X86Subtarget* getSubtarget() const { |
779 | return Subtarget; | |
780 | } | |
781 | ||
85aaf69f SL |
782 | /// Return true if the specified scalar FP type is computed in an SSE |
783 | /// register, not on the X87 floating point stack. | |
223e47cc LB |
784 | bool isScalarFPTypeInSSEReg(EVT VT) const { |
785 | return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 | |
786 | (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 | |
787 | } | |
788 | ||
85aaf69f | 789 | /// Return true if the target uses the MSVC _ftol2 routine for fptoui. |
1a4d82fc | 790 | bool isTargetFTOL() const; |
223e47cc | 791 | |
85aaf69f SL |
792 | /// Return true if the MSVC _ftol2 routine should be used for fptoui to the |
793 | /// given type. | |
223e47cc LB |
794 | bool isIntegerTypeFTOL(EVT VT) const { |
795 | return isTargetFTOL() && VT == MVT::i64; | |
796 | } | |
797 | ||
1a4d82fc JJ |
798 | /// \brief Returns true if it is beneficial to convert a load of a constant |
799 | /// to just the constant itself. | |
800 | bool shouldConvertConstantLoadToIntImm(const APInt &Imm, | |
801 | Type *Ty) const override; | |
802 | ||
85aaf69f SL |
803 | /// Return true if EXTRACT_SUBVECTOR is cheap for this result type |
804 | /// with this index. | |
805 | bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override; | |
806 | ||
1a4d82fc JJ |
807 | /// Intel processors have a unified instruction and data cache |
808 | const char * getClearCacheBuiltinName() const override { | |
809 | return nullptr; // nothing to do, move along. | |
810 | } | |
811 | ||
812 | unsigned getRegisterByName(const char* RegName, EVT VT) const override; | |
813 | ||
85aaf69f | 814 | /// This method returns a target specific FastISel object, |
223e47cc | 815 | /// or null if the target does not support "fast" ISel. |
1a4d82fc JJ |
816 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, |
817 | const TargetLibraryInfo *libInfo) const override; | |
223e47cc | 818 | |
85aaf69f SL |
819 | /// Return true if the target stores stack protector cookies at a fixed |
820 | /// offset in some non-standard address space, and populates the address | |
821 | /// space and offset as appropriate. | |
1a4d82fc JJ |
822 | bool getStackCookieLocation(unsigned &AddressSpace, |
823 | unsigned &Offset) const override; | |
223e47cc LB |
824 | |
825 | SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, | |
826 | SelectionDAG &DAG) const; | |
827 | ||
1a4d82fc JJ |
828 | bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; |
829 | ||
830 | /// \brief Reset the operation actions based on target options. | |
831 | void resetOperationActions() override; | |
832 | ||
833 | bool useLoadStackGuardNode() const override; | |
834 | /// \brief Customize the preferred legalization strategy for certain types. | |
835 | LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; | |
836 | ||
223e47cc LB |
837 | protected: |
838 | std::pair<const TargetRegisterClass*, uint8_t> | |
1a4d82fc | 839 | findRepresentativeClass(MVT VT) const override; |
223e47cc LB |
840 | |
841 | private: | |
85aaf69f | 842 | /// Keep a pointer to the X86Subtarget around so that we can |
223e47cc LB |
843 | /// make the right decision when generating code for different targets. |
844 | const X86Subtarget *Subtarget; | |
970d7e83 | 845 | const DataLayout *TD; |
223e47cc | 846 | |
1a4d82fc JJ |
847 | /// Used to store the TargetOptions so that we don't waste time resetting |
848 | /// the operation actions unless we have to. | |
849 | TargetOptions TO; | |
850 | ||
85aaf69f | 851 | /// Select between SSE or x87 floating point ops. |
223e47cc LB |
852 | /// When SSE is available, use it for f32 operations. |
853 | /// When SSE2 is available, use it for f64 operations. | |
854 | bool X86ScalarSSEf32; | |
855 | bool X86ScalarSSEf64; | |
856 | ||
85aaf69f | 857 | /// A list of legal FP immediates. |
223e47cc LB |
858 | std::vector<APFloat> LegalFPImmediates; |
859 | ||
85aaf69f | 860 | /// Indicate that this x86 target can instruction |
223e47cc LB |
861 | /// select the specified FP immediate natively. |
862 | void addLegalFPImmediate(const APFloat& Imm) { | |
863 | LegalFPImmediates.push_back(Imm); | |
864 | } | |
865 | ||
866 | SDValue LowerCallResult(SDValue Chain, SDValue InFlag, | |
867 | CallingConv::ID CallConv, bool isVarArg, | |
868 | const SmallVectorImpl<ISD::InputArg> &Ins, | |
1a4d82fc | 869 | SDLoc dl, SelectionDAG &DAG, |
223e47cc LB |
870 | SmallVectorImpl<SDValue> &InVals) const; |
871 | SDValue LowerMemArgument(SDValue Chain, | |
872 | CallingConv::ID CallConv, | |
873 | const SmallVectorImpl<ISD::InputArg> &ArgInfo, | |
1a4d82fc | 874 | SDLoc dl, SelectionDAG &DAG, |
223e47cc LB |
875 | const CCValAssign &VA, MachineFrameInfo *MFI, |
876 | unsigned i) const; | |
877 | SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, | |
1a4d82fc | 878 | SDLoc dl, SelectionDAG &DAG, |
223e47cc LB |
879 | const CCValAssign &VA, |
880 | ISD::ArgFlagsTy Flags) const; | |
881 | ||
882 | // Call lowering helpers. | |
883 | ||
85aaf69f SL |
884 | /// Check whether the call is eligible for tail call optimization. Targets |
885 | /// that want to do tail call optimization should implement this function. | |
223e47cc LB |
886 | bool IsEligibleForTailCallOptimization(SDValue Callee, |
887 | CallingConv::ID CalleeCC, | |
888 | bool isVarArg, | |
889 | bool isCalleeStructRet, | |
890 | bool isCallerStructRet, | |
891 | Type *RetTy, | |
892 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |
893 | const SmallVectorImpl<SDValue> &OutVals, | |
894 | const SmallVectorImpl<ISD::InputArg> &Ins, | |
895 | SelectionDAG& DAG) const; | |
896 | bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; | |
897 | SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, | |
898 | SDValue Chain, bool IsTailCall, bool Is64Bit, | |
1a4d82fc | 899 | int FPDiff, SDLoc dl) const; |
223e47cc LB |
900 | |
901 | unsigned GetAlignedArgumentStackSize(unsigned StackSize, | |
902 | SelectionDAG &DAG) const; | |
903 | ||
904 | std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, | |
905 | bool isSigned, | |
906 | bool isReplace) const; | |
907 | ||
223e47cc | 908 | SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; |
1a4d82fc | 909 | SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; |
223e47cc | 910 | SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; |
1a4d82fc | 911 | SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; |
223e47cc | 912 | SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
1a4d82fc JJ |
913 | SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; |
914 | SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const; | |
915 | ||
223e47cc | 916 | SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; |
223e47cc LB |
917 | SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; |
918 | SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; | |
1a4d82fc | 919 | SDValue LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, |
223e47cc LB |
920 | int64_t Offset, SelectionDAG &DAG) const; |
921 | SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; | |
922 | SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; | |
923 | SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; | |
223e47cc LB |
924 | SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; |
925 | SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; | |
926 | SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; | |
927 | SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; | |
970d7e83 LB |
928 | SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const; |
929 | SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; | |
223e47cc LB |
930 | SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; |
931 | SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; | |
223e47cc | 932 | SDValue LowerToBT(SDValue And, ISD::CondCode CC, |
1a4d82fc | 933 | SDLoc dl, SelectionDAG &DAG) const; |
223e47cc | 934 | SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; |
223e47cc LB |
935 | SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; |
936 | SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; | |
937 | SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; | |
938 | SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; | |
939 | SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; | |
940 | SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; | |
941 | SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; | |
942 | SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; | |
943 | SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; | |
944 | SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; | |
945 | SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; | |
970d7e83 LB |
946 | SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; |
947 | SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; | |
223e47cc LB |
948 | SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; |
949 | SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; | |
223e47cc | 950 | SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; |
1a4d82fc | 951 | SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; |
223e47cc | 952 | |
1a4d82fc | 953 | SDValue |
223e47cc LB |
954 | LowerFormalArguments(SDValue Chain, |
955 | CallingConv::ID CallConv, bool isVarArg, | |
956 | const SmallVectorImpl<ISD::InputArg> &Ins, | |
1a4d82fc JJ |
957 | SDLoc dl, SelectionDAG &DAG, |
958 | SmallVectorImpl<SDValue> &InVals) const override; | |
959 | SDValue LowerCall(CallLoweringInfo &CLI, | |
960 | SmallVectorImpl<SDValue> &InVals) const override; | |
223e47cc | 961 | |
1a4d82fc JJ |
962 | SDValue LowerReturn(SDValue Chain, |
963 | CallingConv::ID CallConv, bool isVarArg, | |
964 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |
965 | const SmallVectorImpl<SDValue> &OutVals, | |
966 | SDLoc dl, SelectionDAG &DAG) const override; | |
223e47cc | 967 | |
1a4d82fc | 968 | bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; |
223e47cc | 969 | |
1a4d82fc | 970 | bool mayBeEmittedAsTailCall(CallInst *CI) const override; |
223e47cc | 971 | |
1a4d82fc JJ |
972 | EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, |
973 | ISD::NodeType ExtendKind) const override; | |
223e47cc | 974 | |
1a4d82fc JJ |
975 | bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, |
976 | bool isVarArg, | |
977 | const SmallVectorImpl<ISD::OutputArg> &Outs, | |
978 | LLVMContext &Context) const override; | |
979 | ||
980 | const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; | |
981 | ||
982 | bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override; | |
983 | bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; | |
984 | bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; | |
985 | ||
986 | LoadInst * | |
987 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override; | |
988 | ||
989 | bool needsCmpXchgNb(const Type *MemType) const; | |
223e47cc | 990 | |
223e47cc LB |
991 | /// Utility function to emit atomic-load-arith operations (and, or, xor, |
992 | /// nand, max, min, umax, umin). It takes the corresponding instruction to | |
993 | /// expand, the associated machine basic block, and the associated X86 | |
994 | /// opcodes for reg/reg. | |
995 | MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI, | |
996 | MachineBasicBlock *MBB) const; | |
997 | ||
998 | /// Utility function to emit atomic-load-arith operations (and, or, xor, | |
999 | /// nand, add, sub, swap) for 64-bit operands on 32-bit target. | |
1000 | MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI, | |
1001 | MachineBasicBlock *MBB) const; | |
1002 | ||
1003 | // Utility function to emit the low-level va_arg code for X86-64. | |
1004 | MachineBasicBlock *EmitVAARG64WithCustomInserter( | |
1005 | MachineInstr *MI, | |
1006 | MachineBasicBlock *MBB) const; | |
1007 | ||
1008 | /// Utility function to emit the xmm reg save portion of va_start. | |
1009 | MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( | |
1010 | MachineInstr *BInstr, | |
1011 | MachineBasicBlock *BB) const; | |
1012 | ||
1013 | MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, | |
1014 | MachineBasicBlock *BB) const; | |
1015 | ||
1016 | MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, | |
1017 | MachineBasicBlock *BB) const; | |
1018 | ||
1019 | MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, | |
1a4d82fc | 1020 | MachineBasicBlock *BB) const; |
223e47cc LB |
1021 | |
1022 | MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, | |
1023 | MachineBasicBlock *BB) const; | |
1024 | ||
1025 | MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, | |
1026 | MachineBasicBlock *BB) const; | |
1027 | ||
970d7e83 LB |
1028 | MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, |
1029 | MachineBasicBlock *MBB) const; | |
1030 | ||
1031 | MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, | |
1032 | MachineBasicBlock *MBB) const; | |
1033 | ||
1a4d82fc JJ |
1034 | MachineBasicBlock *emitFMA3Instr(MachineInstr *MI, |
1035 | MachineBasicBlock *MBB) const; | |
1036 | ||
223e47cc LB |
1037 | /// Emit nodes that will be selected as "test Op0,Op0", or something |
1038 | /// equivalent, for use with the given x86 condition code. | |
1a4d82fc JJ |
1039 | SDValue EmitTest(SDValue Op0, unsigned X86CC, SDLoc dl, |
1040 | SelectionDAG &DAG) const; | |
223e47cc LB |
1041 | |
1042 | /// Emit nodes that will be selected as "cmp Op0,Op1", or something | |
1043 | /// equivalent, for use with the given x86 condition code. | |
1a4d82fc | 1044 | SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, |
223e47cc LB |
1045 | SelectionDAG &DAG) const; |
1046 | ||
1047 | /// Convert a comparison if required by the subtarget. | |
1048 | SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; | |
85aaf69f SL |
1049 | |
1050 | /// Use rsqrt* to speed up sqrt calculations. | |
1051 | SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, | |
1052 | unsigned &RefinementSteps, | |
1053 | bool &UseOneConstNR) const override; | |
1054 | ||
1055 | /// Use rcp* to speed up fdiv calculations. | |
1056 | SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, | |
1057 | unsigned &RefinementSteps) const override; | |
223e47cc LB |
1058 | }; |
1059 | ||
1060 | namespace X86 { | |
1061 | FastISel *createFastISel(FunctionLoweringInfo &funcInfo, | |
970d7e83 | 1062 | const TargetLibraryInfo *libInfo); |
223e47cc LB |
1063 | } |
1064 | } | |
1065 | ||
1066 | #endif // X86ISELLOWERING_H |