]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/go/arrow/math/_lib/float64_avx2.s
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / arrow / math / _lib / float64_avx2.s
1 .text
2 .intel_syntax noprefix
3 .file "_lib/float64.c"
4 .globl sum_float64_avx2
5 .p2align 4, 0x90
6 .type sum_float64_avx2,@function
7 sum_float64_avx2: # @sum_float64_avx2
8 # BB#0:
9 push rbp
10 mov rbp, rsp
11 and rsp, -8
12 vxorpd xmm0, xmm0, xmm0
13 test rsi, rsi
14 je .LBB0_14
15 # BB#1:
16 cmp rsi, 31
17 jbe .LBB0_2
18 # BB#5:
19 mov r9, rsi
20 and r9, -32
21 je .LBB0_2
22 # BB#6:
23 lea r8, [r9 - 32]
24 mov eax, r8d
25 shr eax, 5
26 inc eax
27 and rax, 7
28 je .LBB0_7
29 # BB#8:
30 neg rax
31 vxorpd ymm0, ymm0, ymm0
32 xor ecx, ecx
33 vxorpd ymm1, ymm1, ymm1
34 vxorpd ymm2, ymm2, ymm2
35 vxorpd ymm3, ymm3, ymm3
36 vxorpd ymm4, ymm4, ymm4
37 vxorpd ymm5, ymm5, ymm5
38 vxorpd ymm6, ymm6, ymm6
39 vxorpd ymm7, ymm7, ymm7
40 .p2align 4, 0x90
41 .LBB0_9: # =>This Inner Loop Header: Depth=1
42 vaddpd ymm0, ymm0, ymmword ptr [rdi + 8*rcx]
43 vaddpd ymm1, ymm1, ymmword ptr [rdi + 8*rcx + 32]
44 vaddpd ymm2, ymm2, ymmword ptr [rdi + 8*rcx + 64]
45 vaddpd ymm3, ymm3, ymmword ptr [rdi + 8*rcx + 96]
46 vaddpd ymm4, ymm4, ymmword ptr [rdi + 8*rcx + 128]
47 vaddpd ymm5, ymm5, ymmword ptr [rdi + 8*rcx + 160]
48 vaddpd ymm6, ymm6, ymmword ptr [rdi + 8*rcx + 192]
49 vaddpd ymm7, ymm7, ymmword ptr [rdi + 8*rcx + 224]
50 add rcx, 32
51 inc rax
52 jne .LBB0_9
53 jmp .LBB0_10
54 .LBB0_2:
55 xor r9d, r9d
56 .LBB0_3:
57 lea rax, [rdi + 8*r9]
58 sub rsi, r9
59 .p2align 4, 0x90
60 .LBB0_4: # =>This Inner Loop Header: Depth=1
61 vaddsd xmm0, xmm0, qword ptr [rax]
62 add rax, 8
63 dec rsi
64 jne .LBB0_4
65 .LBB0_14:
66 vmovsd qword ptr [rdx], xmm0
67 mov rsp, rbp
68 pop rbp
69 vzeroupper
70 ret
71 .LBB0_7:
72 xor ecx, ecx
73 vxorpd ymm0, ymm0, ymm0
74 vxorpd ymm1, ymm1, ymm1
75 vxorpd ymm2, ymm2, ymm2
76 vxorpd ymm3, ymm3, ymm3
77 vxorpd ymm4, ymm4, ymm4
78 vxorpd ymm5, ymm5, ymm5
79 vxorpd ymm6, ymm6, ymm6
80 vxorpd ymm7, ymm7, ymm7
81 .LBB0_10:
82 cmp r8, 224
83 jb .LBB0_13
84 # BB#11:
85 mov rax, r9
86 sub rax, rcx
87 lea rcx, [rdi + 8*rcx + 1792]
88 .p2align 4, 0x90
89 .LBB0_12: # =>This Inner Loop Header: Depth=1
90 vaddpd ymm7, ymm7, ymmword ptr [rcx - 1568]
91 vaddpd ymm6, ymm6, ymmword ptr [rcx - 1600]
92 vaddpd ymm5, ymm5, ymmword ptr [rcx - 1632]
93 vaddpd ymm4, ymm4, ymmword ptr [rcx - 1664]
94 vaddpd ymm3, ymm3, ymmword ptr [rcx - 1696]
95 vaddpd ymm2, ymm2, ymmword ptr [rcx - 1728]
96 vaddpd ymm1, ymm1, ymmword ptr [rcx - 1760]
97 vaddpd ymm0, ymm0, ymmword ptr [rcx - 1792]
98 vaddpd ymm0, ymm0, ymmword ptr [rcx - 1536]
99 vaddpd ymm1, ymm1, ymmword ptr [rcx - 1504]
100 vaddpd ymm2, ymm2, ymmword ptr [rcx - 1472]
101 vaddpd ymm3, ymm3, ymmword ptr [rcx - 1440]
102 vaddpd ymm4, ymm4, ymmword ptr [rcx - 1408]
103 vaddpd ymm5, ymm5, ymmword ptr [rcx - 1376]
104 vaddpd ymm6, ymm6, ymmword ptr [rcx - 1344]
105 vaddpd ymm7, ymm7, ymmword ptr [rcx - 1312]
106 vaddpd ymm7, ymm7, ymmword ptr [rcx - 1056]
107 vaddpd ymm6, ymm6, ymmword ptr [rcx - 1088]
108 vaddpd ymm5, ymm5, ymmword ptr [rcx - 1120]
109 vaddpd ymm4, ymm4, ymmword ptr [rcx - 1152]
110 vaddpd ymm3, ymm3, ymmword ptr [rcx - 1184]
111 vaddpd ymm2, ymm2, ymmword ptr [rcx - 1216]
112 vaddpd ymm1, ymm1, ymmword ptr [rcx - 1248]
113 vaddpd ymm0, ymm0, ymmword ptr [rcx - 1280]
114 vaddpd ymm0, ymm0, ymmword ptr [rcx - 1024]
115 vaddpd ymm1, ymm1, ymmword ptr [rcx - 992]
116 vaddpd ymm2, ymm2, ymmword ptr [rcx - 960]
117 vaddpd ymm3, ymm3, ymmword ptr [rcx - 928]
118 vaddpd ymm4, ymm4, ymmword ptr [rcx - 896]
119 vaddpd ymm5, ymm5, ymmword ptr [rcx - 864]
120 vaddpd ymm6, ymm6, ymmword ptr [rcx - 832]
121 vaddpd ymm7, ymm7, ymmword ptr [rcx - 800]
122 vaddpd ymm7, ymm7, ymmword ptr [rcx - 544]
123 vaddpd ymm6, ymm6, ymmword ptr [rcx - 576]
124 vaddpd ymm5, ymm5, ymmword ptr [rcx - 608]
125 vaddpd ymm4, ymm4, ymmword ptr [rcx - 640]
126 vaddpd ymm3, ymm3, ymmword ptr [rcx - 672]
127 vaddpd ymm2, ymm2, ymmword ptr [rcx - 704]
128 vaddpd ymm1, ymm1, ymmword ptr [rcx - 736]
129 vaddpd ymm0, ymm0, ymmword ptr [rcx - 768]
130 vaddpd ymm0, ymm0, ymmword ptr [rcx - 512]
131 vaddpd ymm1, ymm1, ymmword ptr [rcx - 480]
132 vaddpd ymm2, ymm2, ymmword ptr [rcx - 448]
133 vaddpd ymm3, ymm3, ymmword ptr [rcx - 416]
134 vaddpd ymm4, ymm4, ymmword ptr [rcx - 384]
135 vaddpd ymm5, ymm5, ymmword ptr [rcx - 352]
136 vaddpd ymm6, ymm6, ymmword ptr [rcx - 320]
137 vaddpd ymm7, ymm7, ymmword ptr [rcx - 288]
138 vaddpd ymm7, ymm7, ymmword ptr [rcx - 32]
139 vaddpd ymm6, ymm6, ymmword ptr [rcx - 64]
140 vaddpd ymm5, ymm5, ymmword ptr [rcx - 96]
141 vaddpd ymm4, ymm4, ymmword ptr [rcx - 128]
142 vaddpd ymm3, ymm3, ymmword ptr [rcx - 160]
143 vaddpd ymm2, ymm2, ymmword ptr [rcx - 192]
144 vaddpd ymm1, ymm1, ymmword ptr [rcx - 224]
145 vaddpd ymm0, ymm0, ymmword ptr [rcx - 256]
146 vaddpd ymm0, ymm0, ymmword ptr [rcx]
147 vaddpd ymm1, ymm1, ymmword ptr [rcx + 32]
148 vaddpd ymm2, ymm2, ymmword ptr [rcx + 64]
149 vaddpd ymm3, ymm3, ymmword ptr [rcx + 96]
150 vaddpd ymm4, ymm4, ymmword ptr [rcx + 128]
151 vaddpd ymm5, ymm5, ymmword ptr [rcx + 160]
152 vaddpd ymm6, ymm6, ymmword ptr [rcx + 192]
153 vaddpd ymm7, ymm7, ymmword ptr [rcx + 224]
154 add rcx, 2048
155 add rax, -256
156 jne .LBB0_12
157 .LBB0_13:
158 vaddpd ymm1, ymm1, ymm5
159 vaddpd ymm3, ymm3, ymm7
160 vaddpd ymm0, ymm0, ymm4
161 vaddpd ymm2, ymm2, ymm6
162 vaddpd ymm0, ymm0, ymm2
163 vaddpd ymm1, ymm1, ymm3
164 vaddpd ymm0, ymm0, ymm1
165 vextractf128 xmm1, ymm0, 1
166 vaddpd ymm0, ymm0, ymm1
167 vhaddpd ymm0, ymm0, ymm0
168 cmp r9, rsi
169 jne .LBB0_3
170 jmp .LBB0_14
171 .Lfunc_end0:
172 .size sum_float64_avx2, .Lfunc_end0-sum_float64_avx2
173
174
175 .ident "Apple LLVM version 9.0.0 (clang-900.0.39.2)"
176 .section ".note.GNU-stack","",@progbits