]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/go/arrow/math/_lib/int64_sse4.s
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / arrow / math / _lib / int64_sse4.s
CommitLineData
1d09f67e
TL
1 .text
2 .intel_syntax noprefix
3 .file "_lib/int64.c"
4 .globl sum_int64_sse4
5 .p2align 4, 0x90
6 .type sum_int64_sse4,@function
7sum_int64_sse4: # @sum_int64_sse4
8# BB#0:
9 push rbp
10 mov rbp, rsp
11 and rsp, -8
12 test rsi, rsi
13 je .LBB0_1
14# BB#2:
15 cmp rsi, 3
16 jbe .LBB0_3
17# BB#6:
18 mov r9, rsi
19 and r9, -4
20 je .LBB0_3
21# BB#7:
22 lea r8, [r9 - 4]
23 mov eax, r8d
24 shr eax, 2
25 inc eax
26 and rax, 3
27 je .LBB0_8
28# BB#9:
29 neg rax
30 pxor xmm0, xmm0
31 xor ecx, ecx
32 pxor xmm1, xmm1
33 .p2align 4, 0x90
34.LBB0_10: # =>This Inner Loop Header: Depth=1
35 movdqu xmm2, xmmword ptr [rdi + 8*rcx]
36 movdqu xmm3, xmmword ptr [rdi + 8*rcx + 16]
37 paddq xmm0, xmm2
38 paddq xmm1, xmm3
39 add rcx, 4
40 inc rax
41 jne .LBB0_10
42 jmp .LBB0_11
43.LBB0_3:
44 xor r9d, r9d
45 xor eax, eax
46.LBB0_4:
47 lea rcx, [rdi + 8*r9]
48 sub rsi, r9
49 .p2align 4, 0x90
50.LBB0_5: # =>This Inner Loop Header: Depth=1
51 add rax, qword ptr [rcx]
52 add rcx, 8
53 dec rsi
54 jne .LBB0_5
55 jmp .LBB0_15
56.LBB0_1:
57 xor eax, eax
58.LBB0_15:
59 mov qword ptr [rdx], rax
60 mov rsp, rbp
61 pop rbp
62 ret
63.LBB0_8:
64 xor ecx, ecx
65 pxor xmm0, xmm0
66 pxor xmm1, xmm1
67.LBB0_11:
68 cmp r8, 12
69 jb .LBB0_14
70# BB#12:
71 mov rax, r9
72 sub rax, rcx
73 lea rcx, [rdi + 8*rcx + 112]
74 .p2align 4, 0x90
75.LBB0_13: # =>This Inner Loop Header: Depth=1
76 movdqu xmm2, xmmword ptr [rcx - 112]
77 movdqu xmm3, xmmword ptr [rcx - 96]
78 movdqu xmm4, xmmword ptr [rcx - 80]
79 movdqu xmm5, xmmword ptr [rcx - 64]
80 paddq xmm2, xmm0
81 paddq xmm3, xmm1
82 movdqu xmm6, xmmword ptr [rcx - 48]
83 movdqu xmm7, xmmword ptr [rcx - 32]
84 paddq xmm6, xmm4
85 paddq xmm6, xmm2
86 paddq xmm7, xmm5
87 paddq xmm7, xmm3
88 movdqu xmm0, xmmword ptr [rcx - 16]
89 movdqu xmm1, xmmword ptr [rcx]
90 paddq xmm0, xmm6
91 paddq xmm1, xmm7
92 sub rcx, -128
93 add rax, -16
94 jne .LBB0_13
95.LBB0_14:
96 paddq xmm0, xmm1
97 pshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1]
98 paddq xmm1, xmm0
99 movq rax, xmm1
100 cmp r9, rsi
101 jne .LBB0_4
102 jmp .LBB0_15
103.Lfunc_end0:
104 .size sum_int64_sse4, .Lfunc_end0-sum_int64_sse4
105
106
107 .ident "Apple LLVM version 9.0.0 (clang-900.0.39.2)"
108 .section ".note.GNU-stack","",@progbits