]> git.proxmox.com Git - ceph.git/blob - ceph/src/isa-l/igzip/crc_utils_04.asm
8cb8c3bc00e418d1714229c820e17a9f40294cae
[ceph.git] / ceph / src / isa-l / igzip / crc_utils_04.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 %include "options.asm"
31 %include "reg_sizes.asm"
32
33 ; Functional versions of CRC macros
34
35 %include "igzip_buffer_utils_04.asm"
36
37 extern fold_4
38
39 %define crc_0 xmm0 ; in/out: crc state
40 %define crc_1 xmm1 ; in/out: crc state
41 %define crc_2 xmm2 ; in/out: crc state
42 %define crc_3 xmm3 ; in/out: crc state
43 %define crc_fold xmm4 ; in: (loaded from fold_4)
44 %define crc_tmp0 xmm5 ; tmp
45 %define crc_tmp1 xmm6 ; tmp
46 %define crc_tmp2 xmm7 ; tmp
47 %define crc_tmp3 xmm8 ; tmp
48 %define crc_tmp4 xmm9 ; tmp
49 %define tmp4 rax
50
51 ; copy x bytes (rounded up to 16 bytes) from src to dst with crc
52 ; src & dst are unaligned
53 ; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc)
54 ; arg 1: rcx: pointer to dst
55 ; arg 2: rdx: pointer to src
56 ; arg 3: r8: size (in bytes)
57 ; arg 4: r9: pointer to CRC
58 ;; %if 0
59 global copy_in_crc_04
60 copy_in_crc_04:
61 %ifidn __OUTPUT_FORMAT__, elf64
62 mov r9, rcx
63 mov r8, rdx
64 mov rdx, rsi
65 mov rcx, rdi
66 %endif
67
68 ; Save xmm registers that need to be preserved.
69 sub rsp, 8 + 4*16
70 vmovdqa [rsp+0*16], xmm6
71 vmovdqa [rsp+1*16], xmm7
72 vmovdqa [rsp+2*16], xmm8
73 vmovdqa [rsp+3*16], xmm9
74
75 vmovdqa crc_0, [r9 + 0*16]
76 vmovdqa crc_1, [r9 + 1*16]
77 vmovdqa crc_2, [r9 + 2*16]
78 vmovdqa crc_3, [r9 + 3*16]
79
80 vmovdqa crc_fold, [fold_4 WRT_OPT]
81 COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \
82 crc_fold, \
83 crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4
84
85 vmovdqa [r9 + 0*16], crc_0
86 vmovdqa [r9 + 1*16], crc_1
87 vmovdqa [r9 + 2*16], crc_2
88 vmovdqa [r9 + 3*16], crc_3
89
90 vmovdqa xmm9, [rsp+3*16]
91 vmovdqa xmm8, [rsp+2*16]
92 vmovdqa xmm7, [rsp+1*16]
93 vmovdqa xmm6, [rsp+0*16]
94 add rsp, 8 + 4*16
95 ret
96
97 ; Convert 512-bit CRC data to real 32-bit value
98 ; uint32_t crc_512to32(uint32_t *crc)
99 ; arg 1: rcx: pointer to CRC
100 ; returns: eax: 32 bit crc
101 global crc_512to32_04
102 crc_512to32_04:
103 %ifidn __OUTPUT_FORMAT__, elf64
104 mov rcx, rdi
105 %endif
106
107 vmovdqa crc_0, [rcx + 0*16]
108 vmovdqa crc_1, [rcx + 1*16]
109 vmovdqa crc_2, [rcx + 2*16]
110 vmovdqa crc_3, [rcx + 3*16]
111
112 vmovdqa crc_fold, [rk1 WRT_OPT] ;k1
113
114 ; fold the 4 xmm registers to 1 xmm register with different constants
115 vmovdqa crc_tmp0, crc_0
116 vpclmulqdq crc_0, crc_fold, 0x1
117 vpclmulqdq crc_tmp0, crc_fold, 0x10
118 vpxor crc_1, crc_tmp0
119 vpxor crc_1, crc_0
120
121 vmovdqa crc_tmp0, crc_1
122 vpclmulqdq crc_1, crc_fold, 0x1
123 vpclmulqdq crc_tmp0, crc_fold, 0x10
124 vpxor crc_2, crc_tmp0
125 vpxor crc_2, crc_1
126
127 vmovdqa crc_tmp0, crc_2
128 vpclmulqdq crc_2, crc_fold, 0x1
129 vpclmulqdq crc_tmp0, crc_fold, 0x10
130 vpxor crc_3, crc_tmp0
131 vpxor crc_3, crc_2
132
133
134 vmovdqa crc_fold, [rk5 WRT_OPT]
135 vmovdqa crc_0, crc_3
136
137 vpclmulqdq crc_3, crc_fold, 0
138
139 vpsrldq crc_0, 8
140
141 vpxor crc_3, crc_0
142
143 vmovdqa crc_0, crc_3
144
145
146 vpslldq crc_3, 4
147
148 vpclmulqdq crc_3, crc_fold, 0x10
149
150
151 vpxor crc_3, crc_0
152
153 vpand crc_3, [mask2 WRT_OPT]
154
155 vmovdqa crc_1, crc_3
156
157 vmovdqa crc_2, crc_3
158
159 vmovdqa crc_fold, [rk7 WRT_OPT]
160
161 vpclmulqdq crc_3, crc_fold, 0
162 vpxor crc_3, crc_2
163
164 vpand crc_3, [mask WRT_OPT]
165
166 vmovdqa crc_2, crc_3
167
168 vpclmulqdq crc_3, crc_fold, 0x10
169
170 vpxor crc_3, crc_2
171
172 vpxor crc_3, crc_1
173
174 vpextrd eax, crc_3, 2
175
176 not eax
177
178 ret
179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
180
181
182 section .data
183
184 align 16
185
186 rk1: dq 0x00000000ccaa009e
187 rk2: dq 0x00000001751997d0
188 rk5: dq 0x00000000ccaa009e
189 rk6: dq 0x0000000163cd6124
190 rk7: dq 0x00000001f7011640
191 rk8: dq 0x00000001db710640
192
193 mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000
194 mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF