]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/go/arrow/math/_lib/float64_sse4.s
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / arrow / math / _lib / float64_sse4.s
1 .text
2 .intel_syntax noprefix
3 .file "_lib/float64.c"
4 .globl sum_float64_sse4
5 .p2align 4, 0x90
6 .type sum_float64_sse4,@function
7 sum_float64_sse4: # @sum_float64_sse4
8 # BB#0:
9 push rbp
10 mov rbp, rsp
11 and rsp, -8
12 xorpd xmm0, xmm0
13 test rsi, rsi
14 je .LBB0_14
15 # BB#1:
16 cmp rsi, 3
17 jbe .LBB0_2
18 # BB#5:
19 mov r9, rsi
20 and r9, -4
21 je .LBB0_2
22 # BB#6:
23 lea r8, [r9 - 4]
24 mov eax, r8d
25 shr eax, 2
26 inc eax
27 and rax, 3
28 je .LBB0_7
29 # BB#8:
30 neg rax
31 xorpd xmm0, xmm0
32 xor ecx, ecx
33 xorpd xmm1, xmm1
34 .p2align 4, 0x90
35 .LBB0_9: # =>This Inner Loop Header: Depth=1
36 movupd xmm2, xmmword ptr [rdi + 8*rcx]
37 movupd xmm3, xmmword ptr [rdi + 8*rcx + 16]
38 addpd xmm0, xmm2
39 addpd xmm1, xmm3
40 add rcx, 4
41 inc rax
42 jne .LBB0_9
43 jmp .LBB0_10
44 .LBB0_2:
45 xor r9d, r9d
46 .LBB0_3:
47 lea rax, [rdi + 8*r9]
48 sub rsi, r9
49 .p2align 4, 0x90
50 .LBB0_4: # =>This Inner Loop Header: Depth=1
51 addsd xmm0, qword ptr [rax]
52 add rax, 8
53 dec rsi
54 jne .LBB0_4
55 .LBB0_14:
56 movsd qword ptr [rdx], xmm0
57 mov rsp, rbp
58 pop rbp
59 ret
60 .LBB0_7:
61 xor ecx, ecx
62 xorpd xmm0, xmm0
63 xorpd xmm1, xmm1
64 .LBB0_10:
65 cmp r8, 12
66 jb .LBB0_13
67 # BB#11:
68 mov rax, r9
69 sub rax, rcx
70 lea rcx, [rdi + 8*rcx + 112]
71 .p2align 4, 0x90
72 .LBB0_12: # =>This Inner Loop Header: Depth=1
73 movupd xmm2, xmmword ptr [rcx - 112]
74 movupd xmm3, xmmword ptr [rcx - 96]
75 movupd xmm4, xmmword ptr [rcx - 80]
76 movupd xmm5, xmmword ptr [rcx - 64]
77 addpd xmm2, xmm0
78 addpd xmm3, xmm1
79 movupd xmm6, xmmword ptr [rcx - 48]
80 movupd xmm7, xmmword ptr [rcx - 32]
81 addpd xmm6, xmm4
82 addpd xmm6, xmm2
83 addpd xmm7, xmm5
84 addpd xmm7, xmm3
85 movupd xmm0, xmmword ptr [rcx - 16]
86 movupd xmm1, xmmword ptr [rcx]
87 addpd xmm0, xmm6
88 addpd xmm1, xmm7
89 sub rcx, -128
90 add rax, -16
91 jne .LBB0_12
92 .LBB0_13:
93 addpd xmm0, xmm1
94 haddpd xmm0, xmm0
95 cmp r9, rsi
96 jne .LBB0_3
97 jmp .LBB0_14
98 .Lfunc_end0:
99 .size sum_float64_sse4, .Lfunc_end0-sum_float64_sse4
100
101
102 .ident "Apple LLVM version 9.0.0 (clang-900.0.39.2)"
103 .section ".note.GNU-stack","",@progbits