]> git.proxmox.com Git - ceph.git/blame - ceph/src/isa-l/igzip/crc_utils_04.asm
buildsys: fix parallel builds
[ceph.git] / ceph / src / isa-l / igzip / crc_utils_04.asm
CommitLineData
7c673cae
FG
1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3;
4; Redistribution and use in source and binary forms, with or without
5; modification, are permitted provided that the following conditions
6; are met:
7; * Redistributions of source code must retain the above copyright
8; notice, this list of conditions and the following disclaimer.
9; * Redistributions in binary form must reproduce the above copyright
10; notice, this list of conditions and the following disclaimer in
11; the documentation and/or other materials provided with the
12; distribution.
13; * Neither the name of Intel Corporation nor the names of its
14; contributors may be used to endorse or promote products derived
15; from this software without specific prior written permission.
16;
17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30%include "options.asm"
31%include "reg_sizes.asm"
32
33; Functional versions of CRC macros
34
35%include "igzip_buffer_utils_04.asm"
36
37extern fold_4
38
39%define crc_0 xmm0 ; in/out: crc state
40%define crc_1 xmm1 ; in/out: crc state
41%define crc_2 xmm2 ; in/out: crc state
42%define crc_3 xmm3 ; in/out: crc state
43%define crc_fold xmm4 ; in: (loaded from fold_4)
44%define crc_tmp0 xmm5 ; tmp
45%define crc_tmp1 xmm6 ; tmp
46%define crc_tmp2 xmm7 ; tmp
47%define crc_tmp3 xmm8 ; tmp
48%define crc_tmp4 xmm9 ; tmp
49%define tmp4 rax
50
51; copy x bytes (rounded up to 16 bytes) from src to dst with crc
52; src & dst are unaligned
53; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc)
54; arg 1: rcx: pointer to dst
55; arg 2: rdx: pointer to src
56; arg 3: r8: size (in bytes)
57; arg 4: r9: pointer to CRC
58;; %if 0
59global copy_in_crc_04
60copy_in_crc_04:
61%ifidn __OUTPUT_FORMAT__, elf64
62 mov r9, rcx
63 mov r8, rdx
64 mov rdx, rsi
65 mov rcx, rdi
66%endif
67
68 ; Save xmm registers that need to be preserved.
69 sub rsp, 8 + 4*16
70 vmovdqa [rsp+0*16], xmm6
71 vmovdqa [rsp+1*16], xmm7
72 vmovdqa [rsp+2*16], xmm8
73 vmovdqa [rsp+3*16], xmm9
74
75 vmovdqa crc_0, [r9 + 0*16]
76 vmovdqa crc_1, [r9 + 1*16]
77 vmovdqa crc_2, [r9 + 2*16]
78 vmovdqa crc_3, [r9 + 3*16]
79
80 vmovdqa crc_fold, [fold_4 WRT_OPT]
81 COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \
82 crc_fold, \
83 crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4
84
85 vmovdqa [r9 + 0*16], crc_0
86 vmovdqa [r9 + 1*16], crc_1
87 vmovdqa [r9 + 2*16], crc_2
88 vmovdqa [r9 + 3*16], crc_3
89
90 vmovdqa xmm9, [rsp+3*16]
91 vmovdqa xmm8, [rsp+2*16]
92 vmovdqa xmm7, [rsp+1*16]
93 vmovdqa xmm6, [rsp+0*16]
94 add rsp, 8 + 4*16
95ret
96
97; Convert 512-bit CRC data to real 32-bit value
98; uint32_t crc_512to32(uint32_t *crc)
99; arg 1: rcx: pointer to CRC
100; returns: eax: 32 bit crc
101global crc_512to32_04
102crc_512to32_04:
103%ifidn __OUTPUT_FORMAT__, elf64
104 mov rcx, rdi
105%endif
106
107 vmovdqa crc_0, [rcx + 0*16]
108 vmovdqa crc_1, [rcx + 1*16]
109 vmovdqa crc_2, [rcx + 2*16]
110 vmovdqa crc_3, [rcx + 3*16]
111
112 vmovdqa crc_fold, [rk1 WRT_OPT] ;k1
113
114 ; fold the 4 xmm registers to 1 xmm register with different constants
115 vmovdqa crc_tmp0, crc_0
116 vpclmulqdq crc_0, crc_fold, 0x1
117 vpclmulqdq crc_tmp0, crc_fold, 0x10
118 vpxor crc_1, crc_tmp0
119 vpxor crc_1, crc_0
120
121 vmovdqa crc_tmp0, crc_1
122 vpclmulqdq crc_1, crc_fold, 0x1
123 vpclmulqdq crc_tmp0, crc_fold, 0x10
124 vpxor crc_2, crc_tmp0
125 vpxor crc_2, crc_1
126
127 vmovdqa crc_tmp0, crc_2
128 vpclmulqdq crc_2, crc_fold, 0x1
129 vpclmulqdq crc_tmp0, crc_fold, 0x10
130 vpxor crc_3, crc_tmp0
131 vpxor crc_3, crc_2
132
133
134 vmovdqa crc_fold, [rk5 WRT_OPT]
135 vmovdqa crc_0, crc_3
136
137 vpclmulqdq crc_3, crc_fold, 0
138
139 vpsrldq crc_0, 8
140
141 vpxor crc_3, crc_0
142
143 vmovdqa crc_0, crc_3
144
145
146 vpslldq crc_3, 4
147
148 vpclmulqdq crc_3, crc_fold, 0x10
149
150
151 vpxor crc_3, crc_0
152
153 vpand crc_3, [mask2 WRT_OPT]
154
155 vmovdqa crc_1, crc_3
156
157 vmovdqa crc_2, crc_3
158
159 vmovdqa crc_fold, [rk7 WRT_OPT]
160
161 vpclmulqdq crc_3, crc_fold, 0
162 vpxor crc_3, crc_2
163
164 vpand crc_3, [mask WRT_OPT]
165
166 vmovdqa crc_2, crc_3
167
168 vpclmulqdq crc_3, crc_fold, 0x10
169
170 vpxor crc_3, crc_2
171
172 vpxor crc_3, crc_1
173
174 vpextrd eax, crc_3, 2
175
176 not eax
177
178 ret
179;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
180
181
182section .data
183
184align 16
185
186rk1: dq 0x00000000ccaa009e
187rk2: dq 0x00000001751997d0
188rk5: dq 0x00000000ccaa009e
189rk6: dq 0x0000000163cd6124
190rk7: dq 0x00000001f7011640
191rk8: dq 0x00000001db710640
192
193mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
194mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF