]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | .text |
2 | .intel_syntax noprefix | |
3 | .file "_lib/int64.c" | |
4 | .globl sum_int64_sse4 | |
5 | .p2align 4, 0x90 | |
6 | .type sum_int64_sse4,@function | |
7 | sum_int64_sse4: # @sum_int64_sse4 | |
8 | # BB#0: | |
9 | push rbp | |
10 | mov rbp, rsp | |
11 | and rsp, -8 | |
12 | test rsi, rsi | |
13 | je .LBB0_1 | |
14 | # BB#2: | |
15 | cmp rsi, 3 | |
16 | jbe .LBB0_3 | |
17 | # BB#6: | |
18 | mov r9, rsi | |
19 | and r9, -4 | |
20 | je .LBB0_3 | |
21 | # BB#7: | |
22 | lea r8, [r9 - 4] | |
23 | mov eax, r8d | |
24 | shr eax, 2 | |
25 | inc eax | |
26 | and rax, 3 | |
27 | je .LBB0_8 | |
28 | # BB#9: | |
29 | neg rax | |
30 | pxor xmm0, xmm0 | |
31 | xor ecx, ecx | |
32 | pxor xmm1, xmm1 | |
33 | .p2align 4, 0x90 | |
34 | .LBB0_10: # =>This Inner Loop Header: Depth=1 | |
35 | movdqu xmm2, xmmword ptr [rdi + 8*rcx] | |
36 | movdqu xmm3, xmmword ptr [rdi + 8*rcx + 16] | |
37 | paddq xmm0, xmm2 | |
38 | paddq xmm1, xmm3 | |
39 | add rcx, 4 | |
40 | inc rax | |
41 | jne .LBB0_10 | |
42 | jmp .LBB0_11 | |
43 | .LBB0_3: | |
44 | xor r9d, r9d | |
45 | xor eax, eax | |
46 | .LBB0_4: | |
47 | lea rcx, [rdi + 8*r9] | |
48 | sub rsi, r9 | |
49 | .p2align 4, 0x90 | |
50 | .LBB0_5: # =>This Inner Loop Header: Depth=1 | |
51 | add rax, qword ptr [rcx] | |
52 | add rcx, 8 | |
53 | dec rsi | |
54 | jne .LBB0_5 | |
55 | jmp .LBB0_15 | |
56 | .LBB0_1: | |
57 | xor eax, eax | |
58 | .LBB0_15: | |
59 | mov qword ptr [rdx], rax | |
60 | mov rsp, rbp | |
61 | pop rbp | |
62 | ret | |
63 | .LBB0_8: | |
64 | xor ecx, ecx | |
65 | pxor xmm0, xmm0 | |
66 | pxor xmm1, xmm1 | |
67 | .LBB0_11: | |
68 | cmp r8, 12 | |
69 | jb .LBB0_14 | |
70 | # BB#12: | |
71 | mov rax, r9 | |
72 | sub rax, rcx | |
73 | lea rcx, [rdi + 8*rcx + 112] | |
74 | .p2align 4, 0x90 | |
75 | .LBB0_13: # =>This Inner Loop Header: Depth=1 | |
76 | movdqu xmm2, xmmword ptr [rcx - 112] | |
77 | movdqu xmm3, xmmword ptr [rcx - 96] | |
78 | movdqu xmm4, xmmword ptr [rcx - 80] | |
79 | movdqu xmm5, xmmword ptr [rcx - 64] | |
80 | paddq xmm2, xmm0 | |
81 | paddq xmm3, xmm1 | |
82 | movdqu xmm6, xmmword ptr [rcx - 48] | |
83 | movdqu xmm7, xmmword ptr [rcx - 32] | |
84 | paddq xmm6, xmm4 | |
85 | paddq xmm6, xmm2 | |
86 | paddq xmm7, xmm5 | |
87 | paddq xmm7, xmm3 | |
88 | movdqu xmm0, xmmword ptr [rcx - 16] | |
89 | movdqu xmm1, xmmword ptr [rcx] | |
90 | paddq xmm0, xmm6 | |
91 | paddq xmm1, xmm7 | |
92 | sub rcx, -128 | |
93 | add rax, -16 | |
94 | jne .LBB0_13 | |
95 | .LBB0_14: | |
96 | paddq xmm0, xmm1 | |
97 | pshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1] | |
98 | paddq xmm1, xmm0 | |
99 | movq rax, xmm1 | |
100 | cmp r9, rsi | |
101 | jne .LBB0_4 | |
102 | jmp .LBB0_15 | |
103 | .Lfunc_end0: | |
104 | .size sum_int64_sse4, .Lfunc_end0-sum_int64_sse4 | |
105 | ||
106 | ||
107 | .ident "Apple LLVM version 9.0.0 (clang-900.0.39.2)" | |
108 | .section ".note.GNU-stack","",@progbits |