]>
Commit | Line | Data |
---|---|---|
600a2334 | 1 | /* |
2 | * Intel SHA Extensions optimized implementation of a SHA-256 update function | |
3 | * | |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
9 | * Copyright(c) 2015 Intel Corporation. | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of version 2 of the GNU General Public License as | |
13 | * published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, but | |
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * General Public License for more details. | |
19 | * | |
20 | * Contact Information: | |
21 | * Sean Gulley <sean.m.gulley@intel.com> | |
22 | * Tim Chen <tim.c.chen@linux.intel.com> | |
23 | * | |
24 | * BSD LICENSE | |
25 | * | |
26 | * Copyright(c) 2015 Intel Corporation. | |
27 | * | |
28 | * Redistribution and use in source and binary forms, with or without | |
29 | * modification, are permitted provided that the following conditions | |
30 | * are met: | |
31 | * | |
32 | * * Redistributions of source code must retain the above copyright | |
33 | * notice, this list of conditions and the following disclaimer. | |
34 | * * Redistributions in binary form must reproduce the above copyright | |
35 | * notice, this list of conditions and the following disclaimer in | |
36 | * the documentation and/or other materials provided with the | |
37 | * distribution. | |
38 | * * Neither the name of Intel Corporation nor the names of its | |
39 | * contributors may be used to endorse or promote products derived | |
40 | * from this software without specific prior written permission. | |
41 | * | |
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
53 | * | |
54 | */ | |
55 | ||
56 | #include <linux/linkage.h> | |
57 | ||
58 | #define DIGEST_PTR %rdi /* 1st arg */ | |
59 | #define DATA_PTR %rsi /* 2nd arg */ | |
60 | #define NUM_BLKS %rdx /* 3rd arg */ | |
61 | ||
62 | #define SHA256CONSTANTS %rax | |
63 | ||
64 | #define MSG %xmm0 | |
65 | #define STATE0 %xmm1 | |
66 | #define STATE1 %xmm2 | |
67 | #define MSGTMP0 %xmm3 | |
68 | #define MSGTMP1 %xmm4 | |
69 | #define MSGTMP2 %xmm5 | |
70 | #define MSGTMP3 %xmm6 | |
71 | #define MSGTMP4 %xmm7 | |
72 | ||
73 | #define SHUF_MASK %xmm8 | |
74 | ||
75 | #define ABEF_SAVE %xmm9 | |
76 | #define CDGH_SAVE %xmm10 | |
77 | ||
78 | /* | |
79 | * Intel SHA Extensions optimized implementation of a SHA-256 update function | |
80 | * | |
81 | * The function takes a pointer to the current hash values, a pointer to the | |
82 | * input data, and a number of 64 byte blocks to process. Once all blocks have | |
83 | * been processed, the digest pointer is updated with the resulting hash value. | |
84 | * The function only processes complete blocks, there is no functionality to | |
85 | * store partial blocks. All message padding and hash value initialization must | |
86 | * be done outside the update function. | |
87 | * | |
88 | * The indented lines in the loop are instructions related to rounds processing. | |
89 | * The non-indented lines are instructions related to the message schedule. | |
90 | * | |
91 | * void sha256_ni_transform(uint32_t *digest, const void *data, | |
92 | uint32_t numBlocks); | |
93 | * digest : pointer to digest | |
94 | * data: pointer to input data | |
95 | * numBlocks: Number of blocks to process | |
96 | */ | |
97 | ||
98 | .text | |
99 | .align 32 | |
100 | ENTRY(sha256_ni_transform) | |
101 | ||
102 | shl $6, NUM_BLKS /* convert to bytes */ | |
103 | jz .Ldone_hash | |
104 | add DATA_PTR, NUM_BLKS /* pointer to end of data */ | |
105 | ||
106 | /* | |
107 | * load initial hash values | |
108 | * Need to reorder these appropriately | |
109 | * DCBA, HGFE -> ABEF, CDGH | |
110 | */ | |
111 | movdqu 0*16(DIGEST_PTR), STATE0 | |
112 | movdqu 1*16(DIGEST_PTR), STATE1 | |
113 | ||
114 | pshufd $0xB1, STATE0, STATE0 /* CDAB */ | |
115 | pshufd $0x1B, STATE1, STATE1 /* EFGH */ | |
116 | movdqa STATE0, MSGTMP4 | |
117 | palignr $8, STATE1, STATE0 /* ABEF */ | |
118 | pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ | |
119 | ||
120 | movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK | |
121 | lea K256(%rip), SHA256CONSTANTS | |
122 | ||
123 | .Lloop0: | |
124 | /* Save hash values for addition after rounds */ | |
125 | movdqa STATE0, ABEF_SAVE | |
126 | movdqa STATE1, CDGH_SAVE | |
127 | ||
128 | /* Rounds 0-3 */ | |
129 | movdqu 0*16(DATA_PTR), MSG | |
130 | pshufb SHUF_MASK, MSG | |
131 | movdqa MSG, MSGTMP0 | |
132 | paddd 0*16(SHA256CONSTANTS), MSG | |
133 | sha256rnds2 STATE0, STATE1 | |
134 | pshufd $0x0E, MSG, MSG | |
135 | sha256rnds2 STATE1, STATE0 | |
136 | ||
137 | /* Rounds 4-7 */ | |
138 | movdqu 1*16(DATA_PTR), MSG | |
139 | pshufb SHUF_MASK, MSG | |
140 | movdqa MSG, MSGTMP1 | |
141 | paddd 1*16(SHA256CONSTANTS), MSG | |
142 | sha256rnds2 STATE0, STATE1 | |
143 | pshufd $0x0E, MSG, MSG | |
144 | sha256rnds2 STATE1, STATE0 | |
145 | sha256msg1 MSGTMP1, MSGTMP0 | |
146 | ||
147 | /* Rounds 8-11 */ | |
148 | movdqu 2*16(DATA_PTR), MSG | |
149 | pshufb SHUF_MASK, MSG | |
150 | movdqa MSG, MSGTMP2 | |
151 | paddd 2*16(SHA256CONSTANTS), MSG | |
152 | sha256rnds2 STATE0, STATE1 | |
153 | pshufd $0x0E, MSG, MSG | |
154 | sha256rnds2 STATE1, STATE0 | |
155 | sha256msg1 MSGTMP2, MSGTMP1 | |
156 | ||
157 | /* Rounds 12-15 */ | |
158 | movdqu 3*16(DATA_PTR), MSG | |
159 | pshufb SHUF_MASK, MSG | |
160 | movdqa MSG, MSGTMP3 | |
161 | paddd 3*16(SHA256CONSTANTS), MSG | |
162 | sha256rnds2 STATE0, STATE1 | |
163 | movdqa MSGTMP3, MSGTMP4 | |
164 | palignr $4, MSGTMP2, MSGTMP4 | |
165 | paddd MSGTMP4, MSGTMP0 | |
166 | sha256msg2 MSGTMP3, MSGTMP0 | |
167 | pshufd $0x0E, MSG, MSG | |
168 | sha256rnds2 STATE1, STATE0 | |
169 | sha256msg1 MSGTMP3, MSGTMP2 | |
170 | ||
171 | /* Rounds 16-19 */ | |
172 | movdqa MSGTMP0, MSG | |
173 | paddd 4*16(SHA256CONSTANTS), MSG | |
174 | sha256rnds2 STATE0, STATE1 | |
175 | movdqa MSGTMP0, MSGTMP4 | |
176 | palignr $4, MSGTMP3, MSGTMP4 | |
177 | paddd MSGTMP4, MSGTMP1 | |
178 | sha256msg2 MSGTMP0, MSGTMP1 | |
179 | pshufd $0x0E, MSG, MSG | |
180 | sha256rnds2 STATE1, STATE0 | |
181 | sha256msg1 MSGTMP0, MSGTMP3 | |
182 | ||
183 | /* Rounds 20-23 */ | |
184 | movdqa MSGTMP1, MSG | |
185 | paddd 5*16(SHA256CONSTANTS), MSG | |
186 | sha256rnds2 STATE0, STATE1 | |
187 | movdqa MSGTMP1, MSGTMP4 | |
188 | palignr $4, MSGTMP0, MSGTMP4 | |
189 | paddd MSGTMP4, MSGTMP2 | |
190 | sha256msg2 MSGTMP1, MSGTMP2 | |
191 | pshufd $0x0E, MSG, MSG | |
192 | sha256rnds2 STATE1, STATE0 | |
193 | sha256msg1 MSGTMP1, MSGTMP0 | |
194 | ||
195 | /* Rounds 24-27 */ | |
196 | movdqa MSGTMP2, MSG | |
197 | paddd 6*16(SHA256CONSTANTS), MSG | |
198 | sha256rnds2 STATE0, STATE1 | |
199 | movdqa MSGTMP2, MSGTMP4 | |
200 | palignr $4, MSGTMP1, MSGTMP4 | |
201 | paddd MSGTMP4, MSGTMP3 | |
202 | sha256msg2 MSGTMP2, MSGTMP3 | |
203 | pshufd $0x0E, MSG, MSG | |
204 | sha256rnds2 STATE1, STATE0 | |
205 | sha256msg1 MSGTMP2, MSGTMP1 | |
206 | ||
207 | /* Rounds 28-31 */ | |
208 | movdqa MSGTMP3, MSG | |
209 | paddd 7*16(SHA256CONSTANTS), MSG | |
210 | sha256rnds2 STATE0, STATE1 | |
211 | movdqa MSGTMP3, MSGTMP4 | |
212 | palignr $4, MSGTMP2, MSGTMP4 | |
213 | paddd MSGTMP4, MSGTMP0 | |
214 | sha256msg2 MSGTMP3, MSGTMP0 | |
215 | pshufd $0x0E, MSG, MSG | |
216 | sha256rnds2 STATE1, STATE0 | |
217 | sha256msg1 MSGTMP3, MSGTMP2 | |
218 | ||
219 | /* Rounds 32-35 */ | |
220 | movdqa MSGTMP0, MSG | |
221 | paddd 8*16(SHA256CONSTANTS), MSG | |
222 | sha256rnds2 STATE0, STATE1 | |
223 | movdqa MSGTMP0, MSGTMP4 | |
224 | palignr $4, MSGTMP3, MSGTMP4 | |
225 | paddd MSGTMP4, MSGTMP1 | |
226 | sha256msg2 MSGTMP0, MSGTMP1 | |
227 | pshufd $0x0E, MSG, MSG | |
228 | sha256rnds2 STATE1, STATE0 | |
229 | sha256msg1 MSGTMP0, MSGTMP3 | |
230 | ||
231 | /* Rounds 36-39 */ | |
232 | movdqa MSGTMP1, MSG | |
233 | paddd 9*16(SHA256CONSTANTS), MSG | |
234 | sha256rnds2 STATE0, STATE1 | |
235 | movdqa MSGTMP1, MSGTMP4 | |
236 | palignr $4, MSGTMP0, MSGTMP4 | |
237 | paddd MSGTMP4, MSGTMP2 | |
238 | sha256msg2 MSGTMP1, MSGTMP2 | |
239 | pshufd $0x0E, MSG, MSG | |
240 | sha256rnds2 STATE1, STATE0 | |
241 | sha256msg1 MSGTMP1, MSGTMP0 | |
242 | ||
243 | /* Rounds 40-43 */ | |
244 | movdqa MSGTMP2, MSG | |
245 | paddd 10*16(SHA256CONSTANTS), MSG | |
246 | sha256rnds2 STATE0, STATE1 | |
247 | movdqa MSGTMP2, MSGTMP4 | |
248 | palignr $4, MSGTMP1, MSGTMP4 | |
249 | paddd MSGTMP4, MSGTMP3 | |
250 | sha256msg2 MSGTMP2, MSGTMP3 | |
251 | pshufd $0x0E, MSG, MSG | |
252 | sha256rnds2 STATE1, STATE0 | |
253 | sha256msg1 MSGTMP2, MSGTMP1 | |
254 | ||
255 | /* Rounds 44-47 */ | |
256 | movdqa MSGTMP3, MSG | |
257 | paddd 11*16(SHA256CONSTANTS), MSG | |
258 | sha256rnds2 STATE0, STATE1 | |
259 | movdqa MSGTMP3, MSGTMP4 | |
260 | palignr $4, MSGTMP2, MSGTMP4 | |
261 | paddd MSGTMP4, MSGTMP0 | |
262 | sha256msg2 MSGTMP3, MSGTMP0 | |
263 | pshufd $0x0E, MSG, MSG | |
264 | sha256rnds2 STATE1, STATE0 | |
265 | sha256msg1 MSGTMP3, MSGTMP2 | |
266 | ||
267 | /* Rounds 48-51 */ | |
268 | movdqa MSGTMP0, MSG | |
269 | paddd 12*16(SHA256CONSTANTS), MSG | |
270 | sha256rnds2 STATE0, STATE1 | |
271 | movdqa MSGTMP0, MSGTMP4 | |
272 | palignr $4, MSGTMP3, MSGTMP4 | |
273 | paddd MSGTMP4, MSGTMP1 | |
274 | sha256msg2 MSGTMP0, MSGTMP1 | |
275 | pshufd $0x0E, MSG, MSG | |
276 | sha256rnds2 STATE1, STATE0 | |
277 | sha256msg1 MSGTMP0, MSGTMP3 | |
278 | ||
279 | /* Rounds 52-55 */ | |
280 | movdqa MSGTMP1, MSG | |
281 | paddd 13*16(SHA256CONSTANTS), MSG | |
282 | sha256rnds2 STATE0, STATE1 | |
283 | movdqa MSGTMP1, MSGTMP4 | |
284 | palignr $4, MSGTMP0, MSGTMP4 | |
285 | paddd MSGTMP4, MSGTMP2 | |
286 | sha256msg2 MSGTMP1, MSGTMP2 | |
287 | pshufd $0x0E, MSG, MSG | |
288 | sha256rnds2 STATE1, STATE0 | |
289 | ||
290 | /* Rounds 56-59 */ | |
291 | movdqa MSGTMP2, MSG | |
292 | paddd 14*16(SHA256CONSTANTS), MSG | |
293 | sha256rnds2 STATE0, STATE1 | |
294 | movdqa MSGTMP2, MSGTMP4 | |
295 | palignr $4, MSGTMP1, MSGTMP4 | |
296 | paddd MSGTMP4, MSGTMP3 | |
297 | sha256msg2 MSGTMP2, MSGTMP3 | |
298 | pshufd $0x0E, MSG, MSG | |
299 | sha256rnds2 STATE1, STATE0 | |
300 | ||
301 | /* Rounds 60-63 */ | |
302 | movdqa MSGTMP3, MSG | |
303 | paddd 15*16(SHA256CONSTANTS), MSG | |
304 | sha256rnds2 STATE0, STATE1 | |
305 | pshufd $0x0E, MSG, MSG | |
306 | sha256rnds2 STATE1, STATE0 | |
307 | ||
308 | /* Add current hash values with previously saved */ | |
309 | paddd ABEF_SAVE, STATE0 | |
310 | paddd CDGH_SAVE, STATE1 | |
311 | ||
312 | /* Increment data pointer and loop if more to process */ | |
313 | add $64, DATA_PTR | |
314 | cmp NUM_BLKS, DATA_PTR | |
315 | jne .Lloop0 | |
316 | ||
317 | /* Write hash values back in the correct order */ | |
318 | pshufd $0x1B, STATE0, STATE0 /* FEBA */ | |
319 | pshufd $0xB1, STATE1, STATE1 /* DCHG */ | |
320 | movdqa STATE0, MSGTMP4 | |
321 | pblendw $0xF0, STATE1, STATE0 /* DCBA */ | |
322 | palignr $8, MSGTMP4, STATE1 /* HGFE */ | |
323 | ||
324 | movdqu STATE0, 0*16(DIGEST_PTR) | |
325 | movdqu STATE1, 1*16(DIGEST_PTR) | |
326 | ||
327 | .Ldone_hash: | |
328 | ||
329 | ret | |
330 | ENDPROC(sha256_ni_transform) | |
331 | ||
332 | .data | |
333 | .align 64 | |
334 | K256: | |
335 | .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 | |
336 | .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 | |
337 | .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 | |
338 | .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 | |
339 | .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc | |
340 | .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da | |
341 | .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 | |
342 | .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 | |
343 | .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 | |
344 | .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 | |
345 | .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 | |
346 | .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 | |
347 | .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 | |
348 | .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 | |
349 | .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 | |
350 | .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 | |
351 | ||
352 | PSHUFFLE_BYTE_FLIP_MASK: | |
353 | .octa 0x0c0d0e0f08090a0b0405060700010203 |