]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
5 | ; modification, are permitted provided that the following conditions | |
6 | ; are met: | |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
30 | %include "options.asm" | |
31 | %include "reg_sizes.asm" | |
32 | ||
33 | ; Functional versions of CRC macros | |
34 | ||
35 | %include "igzip_buffer_utils_04.asm" | |
36 | ||
37 | extern fold_4 | |
38 | ||
39 | %define crc_0 xmm0 ; in/out: crc state | |
40 | %define crc_1 xmm1 ; in/out: crc state | |
41 | %define crc_2 xmm2 ; in/out: crc state | |
42 | %define crc_3 xmm3 ; in/out: crc state | |
43 | %define crc_fold xmm4 ; in: (loaded from fold_4) | |
44 | %define crc_tmp0 xmm5 ; tmp | |
45 | %define crc_tmp1 xmm6 ; tmp | |
46 | %define crc_tmp2 xmm7 ; tmp | |
47 | %define crc_tmp3 xmm8 ; tmp | |
48 | %define crc_tmp4 xmm9 ; tmp | |
49 | %define tmp4 rax | |
50 | ||
51 | ; copy x bytes (rounded up to 16 bytes) from src to dst with crc | |
52 | ; src & dst are unaligned | |
53 | ; void copy_in_crc(uint8_t *dst, uint8_t *src, uint32_t size, uint32_t *crc) | |
54 | ; arg 1: rcx: pointer to dst | |
55 | ; arg 2: rdx: pointer to src | |
56 | ; arg 3: r8: size (in bytes) | |
57 | ; arg 4: r9: pointer to CRC | |
58 | ;; %if 0 | |
59 | global copy_in_crc_04 | |
60 | copy_in_crc_04: | |
61 | %ifidn __OUTPUT_FORMAT__, elf64 | |
62 | mov r9, rcx | |
63 | mov r8, rdx | |
64 | mov rdx, rsi | |
65 | mov rcx, rdi | |
66 | %endif | |
67 | ||
68 | ; Save xmm registers that need to be preserved. | |
69 | sub rsp, 8 + 4*16 | |
70 | vmovdqa [rsp+0*16], xmm6 | |
71 | vmovdqa [rsp+1*16], xmm7 | |
72 | vmovdqa [rsp+2*16], xmm8 | |
73 | vmovdqa [rsp+3*16], xmm9 | |
74 | ||
75 | vmovdqa crc_0, [r9 + 0*16] | |
76 | vmovdqa crc_1, [r9 + 1*16] | |
77 | vmovdqa crc_2, [r9 + 2*16] | |
78 | vmovdqa crc_3, [r9 + 3*16] | |
79 | ||
80 | vmovdqa crc_fold, [fold_4 WRT_OPT] | |
81 | COPY_IN_CRC rcx, rdx, r8, tmp4, crc_0, crc_1, crc_2, crc_3, \ | |
82 | crc_fold, \ | |
83 | crc_tmp0, crc_tmp1, crc_tmp2, crc_tmp3, crc_tmp4 | |
84 | ||
85 | vmovdqa [r9 + 0*16], crc_0 | |
86 | vmovdqa [r9 + 1*16], crc_1 | |
87 | vmovdqa [r9 + 2*16], crc_2 | |
88 | vmovdqa [r9 + 3*16], crc_3 | |
89 | ||
90 | vmovdqa xmm9, [rsp+3*16] | |
91 | vmovdqa xmm8, [rsp+2*16] | |
92 | vmovdqa xmm7, [rsp+1*16] | |
93 | vmovdqa xmm6, [rsp+0*16] | |
94 | add rsp, 8 + 4*16 | |
95 | ret | |
96 | ||
97 | ; Convert 512-bit CRC data to real 32-bit value | |
98 | ; uint32_t crc_512to32(uint32_t *crc) | |
99 | ; arg 1: rcx: pointer to CRC | |
100 | ; returns: eax: 32 bit crc | |
101 | global crc_512to32_04 | |
102 | crc_512to32_04: | |
103 | %ifidn __OUTPUT_FORMAT__, elf64 | |
104 | mov rcx, rdi | |
105 | %endif | |
106 | ||
107 | vmovdqa crc_0, [rcx + 0*16] | |
108 | vmovdqa crc_1, [rcx + 1*16] | |
109 | vmovdqa crc_2, [rcx + 2*16] | |
110 | vmovdqa crc_3, [rcx + 3*16] | |
111 | ||
112 | vmovdqa crc_fold, [rk1 WRT_OPT] ;k1 | |
113 | ||
114 | ; fold the 4 xmm registers to 1 xmm register with different constants | |
115 | vmovdqa crc_tmp0, crc_0 | |
116 | vpclmulqdq crc_0, crc_fold, 0x1 | |
117 | vpclmulqdq crc_tmp0, crc_fold, 0x10 | |
118 | vpxor crc_1, crc_tmp0 | |
119 | vpxor crc_1, crc_0 | |
120 | ||
121 | vmovdqa crc_tmp0, crc_1 | |
122 | vpclmulqdq crc_1, crc_fold, 0x1 | |
123 | vpclmulqdq crc_tmp0, crc_fold, 0x10 | |
124 | vpxor crc_2, crc_tmp0 | |
125 | vpxor crc_2, crc_1 | |
126 | ||
127 | vmovdqa crc_tmp0, crc_2 | |
128 | vpclmulqdq crc_2, crc_fold, 0x1 | |
129 | vpclmulqdq crc_tmp0, crc_fold, 0x10 | |
130 | vpxor crc_3, crc_tmp0 | |
131 | vpxor crc_3, crc_2 | |
132 | ||
133 | ||
134 | vmovdqa crc_fold, [rk5 WRT_OPT] | |
135 | vmovdqa crc_0, crc_3 | |
136 | ||
137 | vpclmulqdq crc_3, crc_fold, 0 | |
138 | ||
139 | vpsrldq crc_0, 8 | |
140 | ||
141 | vpxor crc_3, crc_0 | |
142 | ||
143 | vmovdqa crc_0, crc_3 | |
144 | ||
145 | ||
146 | vpslldq crc_3, 4 | |
147 | ||
148 | vpclmulqdq crc_3, crc_fold, 0x10 | |
149 | ||
150 | ||
151 | vpxor crc_3, crc_0 | |
152 | ||
153 | vpand crc_3, [mask2 WRT_OPT] | |
154 | ||
155 | vmovdqa crc_1, crc_3 | |
156 | ||
157 | vmovdqa crc_2, crc_3 | |
158 | ||
159 | vmovdqa crc_fold, [rk7 WRT_OPT] | |
160 | ||
161 | vpclmulqdq crc_3, crc_fold, 0 | |
162 | vpxor crc_3, crc_2 | |
163 | ||
164 | vpand crc_3, [mask WRT_OPT] | |
165 | ||
166 | vmovdqa crc_2, crc_3 | |
167 | ||
168 | vpclmulqdq crc_3, crc_fold, 0x10 | |
169 | ||
170 | vpxor crc_3, crc_2 | |
171 | ||
172 | vpxor crc_3, crc_1 | |
173 | ||
174 | vpextrd eax, crc_3, 2 | |
175 | ||
176 | not eax | |
177 | ||
178 | ret | |
179 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
180 | ||
181 | ||
182 | section .data | |
183 | ||
184 | align 16 | |
185 | ||
186 | rk1: dq 0x00000000ccaa009e | |
187 | rk2: dq 0x00000001751997d0 | |
188 | rk5: dq 0x00000000ccaa009e | |
189 | rk6: dq 0x0000000163cd6124 | |
190 | rk7: dq 0x00000001f7011640 | |
191 | rk8: dq 0x00000001db710640 | |
192 | ||
193 | mask: dq 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 | |
194 | mask2: dq 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF |