]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===---------------------------------------------------------------------===// |
2 | // Random ideas for the X86 backend: MMX-specific stuff. | |
3 | //===---------------------------------------------------------------------===// | |
4 | ||
5 | //===---------------------------------------------------------------------===// | |
6 | ||
7 | This: | |
8 | ||
9 | #include <mmintrin.h> | |
10 | ||
11 | __v2si qux(int A) { | |
12 | return (__v2si){ 0, A }; | |
13 | } | |
14 | ||
15 | is compiled into: | |
16 | ||
17 | _qux: | |
18 | subl $28, %esp | |
19 | movl 32(%esp), %eax | |
20 | movd %eax, %mm0 | |
21 | movq %mm0, (%esp) | |
22 | movl (%esp), %eax | |
23 | movl %eax, 20(%esp) | |
24 | movq %mm0, 8(%esp) | |
25 | movl 12(%esp), %eax | |
26 | movl %eax, 16(%esp) | |
27 | movq 16(%esp), %mm0 | |
28 | addl $28, %esp | |
29 | ret | |
30 | ||
31 | Yuck! | |
32 | ||
33 | GCC gives us: | |
34 | ||
35 | _qux: | |
36 | subl $12, %esp | |
37 | movl 16(%esp), %eax | |
38 | movl 20(%esp), %edx | |
39 | movl $0, (%eax) | |
40 | movl %edx, 4(%eax) | |
41 | addl $12, %esp | |
42 | ret $4 | |
43 | ||
44 | //===---------------------------------------------------------------------===// | |
45 | ||
46 | We generate crappy code for this: | |
47 | ||
48 | __m64 t() { | |
49 | return _mm_cvtsi32_si64(1); | |
50 | } | |
51 | ||
52 | _t: | |
53 | subl $12, %esp | |
54 | movl $1, %eax | |
55 | movd %eax, %mm0 | |
56 | movq %mm0, (%esp) | |
57 | movl (%esp), %eax | |
58 | movl 4(%esp), %edx | |
59 | addl $12, %esp | |
60 | ret | |
61 | ||
62 | The extra stack traffic is covered in the previous entry. But the other reason | |
63 | is we are not smart about materializing constants in MMX registers. With -m64 | |
64 | ||
65 | movl $1, %eax | |
66 | movd %eax, %mm0 | |
67 | movd %mm0, %rax | |
68 | ret | |
69 | ||
70 | We should be using a constantpool load instead: | |
71 | movq LC0(%rip), %rax |