]>
Commit | Line | Data |
---|---|---|
1d09f67e TL |
1 | //+build !noasm !appengine |
2 | // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT | |
3 | ||
4 | TEXT ยท_sum_uint64_sse4(SB), $0-24 | |
5 | ||
6 | MOVQ buf+0(FP), DI | |
7 | MOVQ len+8(FP), SI | |
8 | MOVQ res+16(FP), DX | |
9 | ||
10 | WORD $0x8548; BYTE $0xf6 // test rsi, rsi | |
11 | JE LBB0_1 | |
12 | LONG $0x03fe8348 // cmp rsi, 3 | |
13 | JBE LBB0_3 | |
14 | WORD $0x8949; BYTE $0xf1 // mov r9, rsi | |
15 | LONG $0xfce18349 // and r9, -4 | |
16 | JE LBB0_3 | |
17 | LONG $0xfc418d4d // lea r8, [r9 - 4] | |
18 | WORD $0x8944; BYTE $0xc0 // mov eax, r8d | |
19 | WORD $0xe8c1; BYTE $0x02 // shr eax, 2 | |
20 | WORD $0xc0ff // inc eax | |
21 | LONG $0x03e08348 // and rax, 3 | |
22 | JE LBB0_8 | |
23 | WORD $0xf748; BYTE $0xd8 // neg rax | |
24 | LONG $0xc0ef0f66 // pxor xmm0, xmm0 | |
25 | WORD $0xc931 // xor ecx, ecx | |
26 | LONG $0xc9ef0f66 // pxor xmm1, xmm1 | |
27 | ||
28 | LBB0_10: | |
29 | LONG $0x146f0ff3; BYTE $0xcf // movdqu xmm2, oword [rdi + 8*rcx] | |
30 | LONG $0x5c6f0ff3; WORD $0x10cf // movdqu xmm3, oword [rdi + 8*rcx + 16] | |
31 | LONG $0xc2d40f66 // paddq xmm0, xmm2 | |
32 | LONG $0xcbd40f66 // paddq xmm1, xmm3 | |
33 | LONG $0x04c18348 // add rcx, 4 | |
34 | WORD $0xff48; BYTE $0xc0 // inc rax | |
35 | JNE LBB0_10 | |
36 | JMP LBB0_11 | |
37 | ||
38 | LBB0_3: | |
39 | WORD $0x3145; BYTE $0xc9 // xor r9d, r9d | |
40 | WORD $0xc031 // xor eax, eax | |
41 | ||
42 | LBB0_4: | |
43 | LONG $0xcf0c8d4a // lea rcx, [rdi + 8*r9] | |
44 | WORD $0x294c; BYTE $0xce // sub rsi, r9 | |
45 | ||
46 | LBB0_5: | |
47 | WORD $0x0348; BYTE $0x01 // add rax, qword [rcx] | |
48 | LONG $0x08c18348 // add rcx, 8 | |
49 | WORD $0xff48; BYTE $0xce // dec rsi | |
50 | JNE LBB0_5 | |
51 | JMP LBB0_15 | |
52 | ||
53 | LBB0_1: | |
54 | WORD $0xc031 // xor eax, eax | |
55 | ||
56 | LBB0_15: | |
57 | WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax | |
58 | RET | |
59 | ||
60 | LBB0_8: | |
61 | WORD $0xc931 // xor ecx, ecx | |
62 | LONG $0xc0ef0f66 // pxor xmm0, xmm0 | |
63 | LONG $0xc9ef0f66 // pxor xmm1, xmm1 | |
64 | ||
65 | LBB0_11: | |
66 | LONG $0x0cf88349 // cmp r8, 12 | |
67 | JB LBB0_14 | |
68 | WORD $0x894c; BYTE $0xc8 // mov rax, r9 | |
69 | WORD $0x2948; BYTE $0xc8 // sub rax, rcx | |
70 | LONG $0xcf4c8d48; BYTE $0x70 // lea rcx, [rdi + 8*rcx + 112] | |
71 | ||
72 | LBB0_13: | |
73 | LONG $0x516f0ff3; BYTE $0x90 // movdqu xmm2, oword [rcx - 112] | |
74 | LONG $0x596f0ff3; BYTE $0xa0 // movdqu xmm3, oword [rcx - 96] | |
75 | LONG $0x616f0ff3; BYTE $0xb0 // movdqu xmm4, oword [rcx - 80] | |
76 | LONG $0x696f0ff3; BYTE $0xc0 // movdqu xmm5, oword [rcx - 64] | |
77 | LONG $0xd0d40f66 // paddq xmm2, xmm0 | |
78 | LONG $0xd9d40f66 // paddq xmm3, xmm1 | |
79 | LONG $0x716f0ff3; BYTE $0xd0 // movdqu xmm6, oword [rcx - 48] | |
80 | LONG $0x796f0ff3; BYTE $0xe0 // movdqu xmm7, oword [rcx - 32] | |
81 | LONG $0xf4d40f66 // paddq xmm6, xmm4 | |
82 | LONG $0xf2d40f66 // paddq xmm6, xmm2 | |
83 | LONG $0xfdd40f66 // paddq xmm7, xmm5 | |
84 | LONG $0xfbd40f66 // paddq xmm7, xmm3 | |
85 | LONG $0x416f0ff3; BYTE $0xf0 // movdqu xmm0, oword [rcx - 16] | |
86 | LONG $0x096f0ff3 // movdqu xmm1, oword [rcx] | |
87 | LONG $0xc6d40f66 // paddq xmm0, xmm6 | |
88 | LONG $0xcfd40f66 // paddq xmm1, xmm7 | |
89 | LONG $0x80e98348 // sub rcx, -128 | |
90 | LONG $0xf0c08348 // add rax, -16 | |
91 | JNE LBB0_13 | |
92 | ||
93 | LBB0_14: | |
94 | LONG $0xc1d40f66 // paddq xmm0, xmm1 | |
95 | LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78 | |
96 | LONG $0xc8d40f66 // paddq xmm1, xmm0 | |
97 | LONG $0x7e0f4866; BYTE $0xc8 // movq rax, xmm1 | |
98 | WORD $0x3949; BYTE $0xf1 // cmp r9, rsi | |
99 | JNE LBB0_4 | |
100 | JMP LBB0_15 |