]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/go/arrow/math/float64_avx2_amd64.s
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / arrow / math / float64_avx2_amd64.s
CommitLineData
1d09f67e
TL
1//+build !noasm !appengine
2// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
3
4TEXT ยท_sum_float64_avx2(SB), $0-24
5
6 MOVQ buf+0(FP), DI
7 MOVQ len+8(FP), SI
8 MOVQ res+16(FP), DX
9
10 LONG $0xc057f9c5 // vxorpd xmm0, xmm0, xmm0
11 WORD $0x8548; BYTE $0xf6 // test rsi, rsi
12 JE LBB0_14
13 LONG $0x1ffe8348 // cmp rsi, 31
14 JBE LBB0_2
15 WORD $0x8949; BYTE $0xf1 // mov r9, rsi
16 LONG $0xe0e18349 // and r9, -32
17 JE LBB0_2
18 LONG $0xe0418d4d // lea r8, [r9 - 32]
19 WORD $0x8944; BYTE $0xc0 // mov eax, r8d
20 WORD $0xe8c1; BYTE $0x05 // shr eax, 5
21 WORD $0xc0ff // inc eax
22 LONG $0x07e08348 // and rax, 7
23 JE LBB0_7
24 WORD $0xf748; BYTE $0xd8 // neg rax
25 LONG $0xc057fdc5 // vxorpd ymm0, ymm0, ymm0
26 WORD $0xc931 // xor ecx, ecx
27 LONG $0xc957f5c5 // vxorpd ymm1, ymm1, ymm1
28 LONG $0xd257edc5 // vxorpd ymm2, ymm2, ymm2
29 LONG $0xdb57e5c5 // vxorpd ymm3, ymm3, ymm3
30 LONG $0xe457ddc5 // vxorpd ymm4, ymm4, ymm4
31 LONG $0xed57d5c5 // vxorpd ymm5, ymm5, ymm5
32 LONG $0xf657cdc5 // vxorpd ymm6, ymm6, ymm6
33 LONG $0xff57c5c5 // vxorpd ymm7, ymm7, ymm7
34
35LBB0_9:
36 LONG $0x0458fdc5; BYTE $0xcf // vaddpd ymm0, ymm0, yword [rdi + 8*rcx]
37 LONG $0x4c58f5c5; WORD $0x20cf // vaddpd ymm1, ymm1, yword [rdi + 8*rcx + 32]
38 LONG $0x5458edc5; WORD $0x40cf // vaddpd ymm2, ymm2, yword [rdi + 8*rcx + 64]
39 LONG $0x5c58e5c5; WORD $0x60cf // vaddpd ymm3, ymm3, yword [rdi + 8*rcx + 96]
40 QUAD $0x000080cfa458ddc5; BYTE $0x00 // vaddpd ymm4, ymm4, yword [rdi + 8*rcx + 128]
41 QUAD $0x0000a0cfac58d5c5; BYTE $0x00 // vaddpd ymm5, ymm5, yword [rdi + 8*rcx + 160]
42 QUAD $0x0000c0cfb458cdc5; BYTE $0x00 // vaddpd ymm6, ymm6, yword [rdi + 8*rcx + 192]
43 QUAD $0x0000e0cfbc58c5c5; BYTE $0x00 // vaddpd ymm7, ymm7, yword [rdi + 8*rcx + 224]
44 LONG $0x20c18348 // add rcx, 32
45 WORD $0xff48; BYTE $0xc0 // inc rax
46 JNE LBB0_9
47 JMP LBB0_10
48
49LBB0_2:
50 WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
51
52LBB0_3:
53 LONG $0xcf048d4a // lea rax, [rdi + 8*r9]
54 WORD $0x294c; BYTE $0xce // sub rsi, r9
55
56LBB0_4:
57 LONG $0x0058fbc5 // vaddsd xmm0, xmm0, qword [rax]
58 LONG $0x08c08348 // add rax, 8
59 WORD $0xff48; BYTE $0xce // dec rsi
60 JNE LBB0_4
61
62LBB0_14:
63 LONG $0x0211fbc5 // vmovsd qword [rdx], xmm0
64 VZEROUPPER
65 RET
66
67LBB0_7:
68 WORD $0xc931 // xor ecx, ecx
69 LONG $0xc057fdc5 // vxorpd ymm0, ymm0, ymm0
70 LONG $0xc957f5c5 // vxorpd ymm1, ymm1, ymm1
71 LONG $0xd257edc5 // vxorpd ymm2, ymm2, ymm2
72 LONG $0xdb57e5c5 // vxorpd ymm3, ymm3, ymm3
73 LONG $0xe457ddc5 // vxorpd ymm4, ymm4, ymm4
74 LONG $0xed57d5c5 // vxorpd ymm5, ymm5, ymm5
75 LONG $0xf657cdc5 // vxorpd ymm6, ymm6, ymm6
76 LONG $0xff57c5c5 // vxorpd ymm7, ymm7, ymm7
77
78LBB0_10:
79 LONG $0xe0f88149; WORD $0x0000; BYTE $0x00 // cmp r8, 224
80 JB LBB0_13
81 WORD $0x894c; BYTE $0xc8 // mov rax, r9
82 WORD $0x2948; BYTE $0xc8 // sub rax, rcx
83 QUAD $0x00000700cf8c8d48 // lea rcx, [rdi + 8*rcx + 1792]
84
85LBB0_12:
86 QUAD $0xfffff9e0b958c5c5 // vaddpd ymm7, ymm7, yword [rcx - 1568]
87 QUAD $0xfffff9c0b158cdc5 // vaddpd ymm6, ymm6, yword [rcx - 1600]
88 QUAD $0xfffff9a0a958d5c5 // vaddpd ymm5, ymm5, yword [rcx - 1632]
89 QUAD $0xfffff980a158ddc5 // vaddpd ymm4, ymm4, yword [rcx - 1664]
90 QUAD $0xfffff9609958e5c5 // vaddpd ymm3, ymm3, yword [rcx - 1696]
91 QUAD $0xfffff9409158edc5 // vaddpd ymm2, ymm2, yword [rcx - 1728]
92 QUAD $0xfffff9208958f5c5 // vaddpd ymm1, ymm1, yword [rcx - 1760]
93 QUAD $0xfffff9008158fdc5 // vaddpd ymm0, ymm0, yword [rcx - 1792]
94 QUAD $0xfffffa008158fdc5 // vaddpd ymm0, ymm0, yword [rcx - 1536]
95 QUAD $0xfffffa208958f5c5 // vaddpd ymm1, ymm1, yword [rcx - 1504]
96 QUAD $0xfffffa409158edc5 // vaddpd ymm2, ymm2, yword [rcx - 1472]
97 QUAD $0xfffffa609958e5c5 // vaddpd ymm3, ymm3, yword [rcx - 1440]
98 QUAD $0xfffffa80a158ddc5 // vaddpd ymm4, ymm4, yword [rcx - 1408]
99 QUAD $0xfffffaa0a958d5c5 // vaddpd ymm5, ymm5, yword [rcx - 1376]
100 QUAD $0xfffffac0b158cdc5 // vaddpd ymm6, ymm6, yword [rcx - 1344]
101 QUAD $0xfffffae0b958c5c5 // vaddpd ymm7, ymm7, yword [rcx - 1312]
102 QUAD $0xfffffbe0b958c5c5 // vaddpd ymm7, ymm7, yword [rcx - 1056]
103 QUAD $0xfffffbc0b158cdc5 // vaddpd ymm6, ymm6, yword [rcx - 1088]
104 QUAD $0xfffffba0a958d5c5 // vaddpd ymm5, ymm5, yword [rcx - 1120]
105 QUAD $0xfffffb80a158ddc5 // vaddpd ymm4, ymm4, yword [rcx - 1152]
106 QUAD $0xfffffb609958e5c5 // vaddpd ymm3, ymm3, yword [rcx - 1184]
107 QUAD $0xfffffb409158edc5 // vaddpd ymm2, ymm2, yword [rcx - 1216]
108 QUAD $0xfffffb208958f5c5 // vaddpd ymm1, ymm1, yword [rcx - 1248]
109 QUAD $0xfffffb008158fdc5 // vaddpd ymm0, ymm0, yword [rcx - 1280]
110 QUAD $0xfffffc008158fdc5 // vaddpd ymm0, ymm0, yword [rcx - 1024]
111 QUAD $0xfffffc208958f5c5 // vaddpd ymm1, ymm1, yword [rcx - 992]
112 QUAD $0xfffffc409158edc5 // vaddpd ymm2, ymm2, yword [rcx - 960]
113 QUAD $0xfffffc609958e5c5 // vaddpd ymm3, ymm3, yword [rcx - 928]
114 QUAD $0xfffffc80a158ddc5 // vaddpd ymm4, ymm4, yword [rcx - 896]
115 QUAD $0xfffffca0a958d5c5 // vaddpd ymm5, ymm5, yword [rcx - 864]
116 QUAD $0xfffffcc0b158cdc5 // vaddpd ymm6, ymm6, yword [rcx - 832]
117 QUAD $0xfffffce0b958c5c5 // vaddpd ymm7, ymm7, yword [rcx - 800]
118 QUAD $0xfffffde0b958c5c5 // vaddpd ymm7, ymm7, yword [rcx - 544]
119 QUAD $0xfffffdc0b158cdc5 // vaddpd ymm6, ymm6, yword [rcx - 576]
120 QUAD $0xfffffda0a958d5c5 // vaddpd ymm5, ymm5, yword [rcx - 608]
121 QUAD $0xfffffd80a158ddc5 // vaddpd ymm4, ymm4, yword [rcx - 640]
122 QUAD $0xfffffd609958e5c5 // vaddpd ymm3, ymm3, yword [rcx - 672]
123 QUAD $0xfffffd409158edc5 // vaddpd ymm2, ymm2, yword [rcx - 704]
124 QUAD $0xfffffd208958f5c5 // vaddpd ymm1, ymm1, yword [rcx - 736]
125 QUAD $0xfffffd008158fdc5 // vaddpd ymm0, ymm0, yword [rcx - 768]
126 QUAD $0xfffffe008158fdc5 // vaddpd ymm0, ymm0, yword [rcx - 512]
127 QUAD $0xfffffe208958f5c5 // vaddpd ymm1, ymm1, yword [rcx - 480]
128 QUAD $0xfffffe409158edc5 // vaddpd ymm2, ymm2, yword [rcx - 448]
129 QUAD $0xfffffe609958e5c5 // vaddpd ymm3, ymm3, yword [rcx - 416]
130 QUAD $0xfffffe80a158ddc5 // vaddpd ymm4, ymm4, yword [rcx - 384]
131 QUAD $0xfffffea0a958d5c5 // vaddpd ymm5, ymm5, yword [rcx - 352]
132 QUAD $0xfffffec0b158cdc5 // vaddpd ymm6, ymm6, yword [rcx - 320]
133 QUAD $0xfffffee0b958c5c5 // vaddpd ymm7, ymm7, yword [rcx - 288]
134 LONG $0x7958c5c5; BYTE $0xe0 // vaddpd ymm7, ymm7, yword [rcx - 32]
135 LONG $0x7158cdc5; BYTE $0xc0 // vaddpd ymm6, ymm6, yword [rcx - 64]
136 LONG $0x6958d5c5; BYTE $0xa0 // vaddpd ymm5, ymm5, yword [rcx - 96]
137 LONG $0x6158ddc5; BYTE $0x80 // vaddpd ymm4, ymm4, yword [rcx - 128]
138 QUAD $0xffffff609958e5c5 // vaddpd ymm3, ymm3, yword [rcx - 160]
139 QUAD $0xffffff409158edc5 // vaddpd ymm2, ymm2, yword [rcx - 192]
140 QUAD $0xffffff208958f5c5 // vaddpd ymm1, ymm1, yword [rcx - 224]
141 QUAD $0xffffff008158fdc5 // vaddpd ymm0, ymm0, yword [rcx - 256]
142 LONG $0x0158fdc5 // vaddpd ymm0, ymm0, yword [rcx]
143 LONG $0x4958f5c5; BYTE $0x20 // vaddpd ymm1, ymm1, yword [rcx + 32]
144 LONG $0x5158edc5; BYTE $0x40 // vaddpd ymm2, ymm2, yword [rcx + 64]
145 LONG $0x5958e5c5; BYTE $0x60 // vaddpd ymm3, ymm3, yword [rcx + 96]
146 QUAD $0x00000080a158ddc5 // vaddpd ymm4, ymm4, yword [rcx + 128]
147 QUAD $0x000000a0a958d5c5 // vaddpd ymm5, ymm5, yword [rcx + 160]
148 QUAD $0x000000c0b158cdc5 // vaddpd ymm6, ymm6, yword [rcx + 192]
149 QUAD $0x000000e0b958c5c5 // vaddpd ymm7, ymm7, yword [rcx + 224]
150 LONG $0x00c18148; WORD $0x0008; BYTE $0x00 // add rcx, 2048
151 LONG $0xff000548; WORD $0xffff // add rax, -256
152 JNE LBB0_12
153
154LBB0_13:
155 LONG $0xcd58f5c5 // vaddpd ymm1, ymm1, ymm5
156 LONG $0xdf58e5c5 // vaddpd ymm3, ymm3, ymm7
157 LONG $0xc458fdc5 // vaddpd ymm0, ymm0, ymm4
158 LONG $0xd658edc5 // vaddpd ymm2, ymm2, ymm6
159 LONG $0xc258fdc5 // vaddpd ymm0, ymm0, ymm2
160 LONG $0xcb58f5c5 // vaddpd ymm1, ymm1, ymm3
161 LONG $0xc158fdc5 // vaddpd ymm0, ymm0, ymm1
162 LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1
163 LONG $0xc158fdc5 // vaddpd ymm0, ymm0, ymm1
164 LONG $0xc07cfdc5 // vhaddpd ymm0, ymm0, ymm0
165 WORD $0x3949; BYTE $0xf1 // cmp r9, rsi
166 JNE LBB0_3
167 JMP LBB0_14