]>
Commit | Line | Data |
---|---|---|
c942fddf | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
6574e6c6 JK |
2 | /* |
3 | * des3_ede-asm_64.S - x86-64 assembly implementation of 3DES cipher | |
4 | * | |
5 | * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> | |
6574e6c6 JK |
6 | */ |
7 | ||
8 | #include <linux/linkage.h> | |
9 | ||
10 | .file "des3_ede-asm_64.S" | |
11 | .text | |
12 | ||
13 | #define s1 .L_s1 | |
14 | #define s2 ((s1) + (64*8)) | |
15 | #define s3 ((s2) + (64*8)) | |
16 | #define s4 ((s3) + (64*8)) | |
17 | #define s5 ((s4) + (64*8)) | |
18 | #define s6 ((s5) + (64*8)) | |
19 | #define s7 ((s6) + (64*8)) | |
20 | #define s8 ((s7) + (64*8)) | |
21 | ||
22 | /* register macros */ | |
23 | #define CTX %rdi | |
24 | ||
25 | #define RL0 %r8 | |
26 | #define RL1 %r9 | |
27 | #define RL2 %r10 | |
28 | ||
29 | #define RL0d %r8d | |
30 | #define RL1d %r9d | |
31 | #define RL2d %r10d | |
32 | ||
33 | #define RR0 %r11 | |
34 | #define RR1 %r12 | |
35 | #define RR2 %r13 | |
36 | ||
37 | #define RR0d %r11d | |
38 | #define RR1d %r12d | |
39 | #define RR2d %r13d | |
40 | ||
41 | #define RW0 %rax | |
42 | #define RW1 %rbx | |
43 | #define RW2 %rcx | |
44 | ||
45 | #define RW0d %eax | |
46 | #define RW1d %ebx | |
47 | #define RW2d %ecx | |
48 | ||
49 | #define RW0bl %al | |
50 | #define RW1bl %bl | |
51 | #define RW2bl %cl | |
52 | ||
53 | #define RW0bh %ah | |
54 | #define RW1bh %bh | |
55 | #define RW2bh %ch | |
56 | ||
57 | #define RT0 %r15 | |
3ed7b4d6 | 58 | #define RT1 %rsi |
6574e6c6 JK |
59 | #define RT2 %r14 |
60 | #define RT3 %rdx | |
61 | ||
62 | #define RT0d %r15d | |
3ed7b4d6 | 63 | #define RT1d %esi |
6574e6c6 JK |
64 | #define RT2d %r14d |
65 | #define RT3d %edx | |
66 | ||
67 | /*********************************************************************** | |
68 | * 1-way 3DES | |
69 | ***********************************************************************/ | |
70 | #define do_permutation(a, b, offset, mask) \ | |
71 | movl a, RT0d; \ | |
72 | shrl $(offset), RT0d; \ | |
73 | xorl b, RT0d; \ | |
74 | andl $(mask), RT0d; \ | |
75 | xorl RT0d, b; \ | |
76 | shll $(offset), RT0d; \ | |
77 | xorl RT0d, a; | |
78 | ||
79 | #define expand_to_64bits(val, mask) \ | |
80 | movl val##d, RT0d; \ | |
81 | rorl $4, RT0d; \ | |
82 | shlq $32, RT0; \ | |
83 | orq RT0, val; \ | |
84 | andq mask, val; | |
85 | ||
86 | #define compress_to_64bits(val) \ | |
87 | movq val, RT0; \ | |
88 | shrq $32, RT0; \ | |
89 | roll $4, RT0d; \ | |
90 | orl RT0d, val##d; | |
91 | ||
92 | #define initial_permutation(left, right) \ | |
93 | do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ | |
94 | do_permutation(left##d, right##d, 16, 0x0000ffff); \ | |
95 | do_permutation(right##d, left##d, 2, 0x33333333); \ | |
96 | do_permutation(right##d, left##d, 8, 0x00ff00ff); \ | |
97 | movabs $0x3f3f3f3f3f3f3f3f, RT3; \ | |
98 | movl left##d, RW0d; \ | |
99 | roll $1, right##d; \ | |
100 | xorl right##d, RW0d; \ | |
101 | andl $0xaaaaaaaa, RW0d; \ | |
102 | xorl RW0d, left##d; \ | |
103 | xorl RW0d, right##d; \ | |
104 | roll $1, left##d; \ | |
105 | expand_to_64bits(right, RT3); \ | |
106 | expand_to_64bits(left, RT3); | |
107 | ||
108 | #define final_permutation(left, right) \ | |
109 | compress_to_64bits(right); \ | |
110 | compress_to_64bits(left); \ | |
111 | movl right##d, RW0d; \ | |
112 | rorl $1, left##d; \ | |
113 | xorl left##d, RW0d; \ | |
114 | andl $0xaaaaaaaa, RW0d; \ | |
115 | xorl RW0d, right##d; \ | |
116 | xorl RW0d, left##d; \ | |
117 | rorl $1, right##d; \ | |
118 | do_permutation(right##d, left##d, 8, 0x00ff00ff); \ | |
119 | do_permutation(right##d, left##d, 2, 0x33333333); \ | |
120 | do_permutation(left##d, right##d, 16, 0x0000ffff); \ | |
121 | do_permutation(left##d, right##d, 4, 0x0f0f0f0f); | |
122 | ||
123 | #define round1(n, from, to, load_next_key) \ | |
124 | xorq from, RW0; \ | |
125 | \ | |
126 | movzbl RW0bl, RT0d; \ | |
127 | movzbl RW0bh, RT1d; \ | |
128 | shrq $16, RW0; \ | |
129 | movzbl RW0bl, RT2d; \ | |
130 | movzbl RW0bh, RT3d; \ | |
131 | shrq $16, RW0; \ | |
132 | movq s8(, RT0, 8), RT0; \ | |
133 | xorq s6(, RT1, 8), to; \ | |
134 | movzbl RW0bl, RL1d; \ | |
135 | movzbl RW0bh, RT1d; \ | |
136 | shrl $16, RW0d; \ | |
137 | xorq s4(, RT2, 8), RT0; \ | |
138 | xorq s2(, RT3, 8), to; \ | |
139 | movzbl RW0bl, RT2d; \ | |
140 | movzbl RW0bh, RT3d; \ | |
141 | xorq s7(, RL1, 8), RT0; \ | |
142 | xorq s5(, RT1, 8), to; \ | |
143 | xorq s3(, RT2, 8), RT0; \ | |
144 | load_next_key(n, RW0); \ | |
145 | xorq RT0, to; \ | |
146 | xorq s1(, RT3, 8), to; \ | |
147 | ||
148 | #define load_next_key(n, RWx) \ | |
149 | movq (((n) + 1) * 8)(CTX), RWx; | |
150 | ||
151 | #define dummy2(a, b) /*_*/ | |
152 | ||
153 | #define read_block(io, left, right) \ | |
154 | movl (io), left##d; \ | |
155 | movl 4(io), right##d; \ | |
156 | bswapl left##d; \ | |
157 | bswapl right##d; | |
158 | ||
159 | #define write_block(io, left, right) \ | |
160 | bswapl left##d; \ | |
161 | bswapl right##d; \ | |
162 | movl left##d, (io); \ | |
163 | movl right##d, 4(io); | |
164 | ||
6dcc5627 | 165 | SYM_FUNC_START(des3_ede_x86_64_crypt_blk) |
6574e6c6 JK |
166 | /* input: |
167 | * %rdi: round keys, CTX | |
168 | * %rsi: dst | |
169 | * %rdx: src | |
170 | */ | |
6574e6c6 JK |
171 | pushq %rbx; |
172 | pushq %r12; | |
173 | pushq %r13; | |
174 | pushq %r14; | |
175 | pushq %r15; | |
176 | ||
3ed7b4d6 JP |
177 | pushq %rsi; /* dst */ |
178 | ||
6574e6c6 JK |
179 | read_block(%rdx, RL0, RR0); |
180 | initial_permutation(RL0, RR0); | |
181 | ||
182 | movq (CTX), RW0; | |
183 | ||
184 | round1(0, RR0, RL0, load_next_key); | |
185 | round1(1, RL0, RR0, load_next_key); | |
186 | round1(2, RR0, RL0, load_next_key); | |
187 | round1(3, RL0, RR0, load_next_key); | |
188 | round1(4, RR0, RL0, load_next_key); | |
189 | round1(5, RL0, RR0, load_next_key); | |
190 | round1(6, RR0, RL0, load_next_key); | |
191 | round1(7, RL0, RR0, load_next_key); | |
192 | round1(8, RR0, RL0, load_next_key); | |
193 | round1(9, RL0, RR0, load_next_key); | |
194 | round1(10, RR0, RL0, load_next_key); | |
195 | round1(11, RL0, RR0, load_next_key); | |
196 | round1(12, RR0, RL0, load_next_key); | |
197 | round1(13, RL0, RR0, load_next_key); | |
198 | round1(14, RR0, RL0, load_next_key); | |
199 | round1(15, RL0, RR0, load_next_key); | |
200 | ||
201 | round1(16+0, RL0, RR0, load_next_key); | |
202 | round1(16+1, RR0, RL0, load_next_key); | |
203 | round1(16+2, RL0, RR0, load_next_key); | |
204 | round1(16+3, RR0, RL0, load_next_key); | |
205 | round1(16+4, RL0, RR0, load_next_key); | |
206 | round1(16+5, RR0, RL0, load_next_key); | |
207 | round1(16+6, RL0, RR0, load_next_key); | |
208 | round1(16+7, RR0, RL0, load_next_key); | |
209 | round1(16+8, RL0, RR0, load_next_key); | |
210 | round1(16+9, RR0, RL0, load_next_key); | |
211 | round1(16+10, RL0, RR0, load_next_key); | |
212 | round1(16+11, RR0, RL0, load_next_key); | |
213 | round1(16+12, RL0, RR0, load_next_key); | |
214 | round1(16+13, RR0, RL0, load_next_key); | |
215 | round1(16+14, RL0, RR0, load_next_key); | |
216 | round1(16+15, RR0, RL0, load_next_key); | |
217 | ||
218 | round1(32+0, RR0, RL0, load_next_key); | |
219 | round1(32+1, RL0, RR0, load_next_key); | |
220 | round1(32+2, RR0, RL0, load_next_key); | |
221 | round1(32+3, RL0, RR0, load_next_key); | |
222 | round1(32+4, RR0, RL0, load_next_key); | |
223 | round1(32+5, RL0, RR0, load_next_key); | |
224 | round1(32+6, RR0, RL0, load_next_key); | |
225 | round1(32+7, RL0, RR0, load_next_key); | |
226 | round1(32+8, RR0, RL0, load_next_key); | |
227 | round1(32+9, RL0, RR0, load_next_key); | |
228 | round1(32+10, RR0, RL0, load_next_key); | |
229 | round1(32+11, RL0, RR0, load_next_key); | |
230 | round1(32+12, RR0, RL0, load_next_key); | |
231 | round1(32+13, RL0, RR0, load_next_key); | |
232 | round1(32+14, RR0, RL0, load_next_key); | |
233 | round1(32+15, RL0, RR0, dummy2); | |
234 | ||
235 | final_permutation(RR0, RL0); | |
3ed7b4d6 JP |
236 | |
237 | popq %rsi /* dst */ | |
6574e6c6 JK |
238 | write_block(%rsi, RR0, RL0); |
239 | ||
240 | popq %r15; | |
241 | popq %r14; | |
242 | popq %r13; | |
243 | popq %r12; | |
244 | popq %rbx; | |
6574e6c6 JK |
245 | |
246 | ret; | |
6dcc5627 | 247 | SYM_FUNC_END(des3_ede_x86_64_crypt_blk) |
6574e6c6 JK |
248 | |
249 | /*********************************************************************** | |
250 | * 3-way 3DES | |
251 | ***********************************************************************/ | |
252 | #define expand_to_64bits(val, mask) \ | |
253 | movl val##d, RT0d; \ | |
254 | rorl $4, RT0d; \ | |
255 | shlq $32, RT0; \ | |
256 | orq RT0, val; \ | |
257 | andq mask, val; | |
258 | ||
259 | #define compress_to_64bits(val) \ | |
260 | movq val, RT0; \ | |
261 | shrq $32, RT0; \ | |
262 | roll $4, RT0d; \ | |
263 | orl RT0d, val##d; | |
264 | ||
265 | #define initial_permutation3(left, right) \ | |
266 | do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ | |
267 | do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ | |
268 | do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ | |
269 | do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ | |
270 | do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ | |
271 | do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ | |
272 | \ | |
273 | do_permutation(right##0d, left##0d, 2, 0x33333333); \ | |
274 | do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ | |
275 | do_permutation(right##1d, left##1d, 2, 0x33333333); \ | |
276 | do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ | |
277 | do_permutation(right##2d, left##2d, 2, 0x33333333); \ | |
278 | do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ | |
279 | \ | |
280 | movabs $0x3f3f3f3f3f3f3f3f, RT3; \ | |
281 | \ | |
282 | movl left##0d, RW0d; \ | |
283 | roll $1, right##0d; \ | |
284 | xorl right##0d, RW0d; \ | |
285 | andl $0xaaaaaaaa, RW0d; \ | |
286 | xorl RW0d, left##0d; \ | |
287 | xorl RW0d, right##0d; \ | |
288 | roll $1, left##0d; \ | |
289 | expand_to_64bits(right##0, RT3); \ | |
290 | expand_to_64bits(left##0, RT3); \ | |
291 | movl left##1d, RW1d; \ | |
292 | roll $1, right##1d; \ | |
293 | xorl right##1d, RW1d; \ | |
294 | andl $0xaaaaaaaa, RW1d; \ | |
295 | xorl RW1d, left##1d; \ | |
296 | xorl RW1d, right##1d; \ | |
297 | roll $1, left##1d; \ | |
298 | expand_to_64bits(right##1, RT3); \ | |
299 | expand_to_64bits(left##1, RT3); \ | |
300 | movl left##2d, RW2d; \ | |
301 | roll $1, right##2d; \ | |
302 | xorl right##2d, RW2d; \ | |
303 | andl $0xaaaaaaaa, RW2d; \ | |
304 | xorl RW2d, left##2d; \ | |
305 | xorl RW2d, right##2d; \ | |
306 | roll $1, left##2d; \ | |
307 | expand_to_64bits(right##2, RT3); \ | |
308 | expand_to_64bits(left##2, RT3); | |
309 | ||
310 | #define final_permutation3(left, right) \ | |
311 | compress_to_64bits(right##0); \ | |
312 | compress_to_64bits(left##0); \ | |
313 | movl right##0d, RW0d; \ | |
314 | rorl $1, left##0d; \ | |
315 | xorl left##0d, RW0d; \ | |
316 | andl $0xaaaaaaaa, RW0d; \ | |
317 | xorl RW0d, right##0d; \ | |
318 | xorl RW0d, left##0d; \ | |
319 | rorl $1, right##0d; \ | |
320 | compress_to_64bits(right##1); \ | |
321 | compress_to_64bits(left##1); \ | |
322 | movl right##1d, RW1d; \ | |
323 | rorl $1, left##1d; \ | |
324 | xorl left##1d, RW1d; \ | |
325 | andl $0xaaaaaaaa, RW1d; \ | |
326 | xorl RW1d, right##1d; \ | |
327 | xorl RW1d, left##1d; \ | |
328 | rorl $1, right##1d; \ | |
329 | compress_to_64bits(right##2); \ | |
330 | compress_to_64bits(left##2); \ | |
331 | movl right##2d, RW2d; \ | |
332 | rorl $1, left##2d; \ | |
333 | xorl left##2d, RW2d; \ | |
334 | andl $0xaaaaaaaa, RW2d; \ | |
335 | xorl RW2d, right##2d; \ | |
336 | xorl RW2d, left##2d; \ | |
337 | rorl $1, right##2d; \ | |
338 | \ | |
339 | do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ | |
340 | do_permutation(right##0d, left##0d, 2, 0x33333333); \ | |
341 | do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ | |
342 | do_permutation(right##1d, left##1d, 2, 0x33333333); \ | |
343 | do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ | |
344 | do_permutation(right##2d, left##2d, 2, 0x33333333); \ | |
345 | \ | |
346 | do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ | |
347 | do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ | |
348 | do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ | |
349 | do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ | |
350 | do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ | |
351 | do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); | |
352 | ||
353 | #define round3(n, from, to, load_next_key, do_movq) \ | |
354 | xorq from##0, RW0; \ | |
355 | movzbl RW0bl, RT3d; \ | |
356 | movzbl RW0bh, RT1d; \ | |
357 | shrq $16, RW0; \ | |
358 | xorq s8(, RT3, 8), to##0; \ | |
359 | xorq s6(, RT1, 8), to##0; \ | |
360 | movzbl RW0bl, RT3d; \ | |
361 | movzbl RW0bh, RT1d; \ | |
362 | shrq $16, RW0; \ | |
363 | xorq s4(, RT3, 8), to##0; \ | |
364 | xorq s2(, RT1, 8), to##0; \ | |
365 | movzbl RW0bl, RT3d; \ | |
366 | movzbl RW0bh, RT1d; \ | |
367 | shrl $16, RW0d; \ | |
368 | xorq s7(, RT3, 8), to##0; \ | |
369 | xorq s5(, RT1, 8), to##0; \ | |
370 | movzbl RW0bl, RT3d; \ | |
371 | movzbl RW0bh, RT1d; \ | |
372 | load_next_key(n, RW0); \ | |
373 | xorq s3(, RT3, 8), to##0; \ | |
374 | xorq s1(, RT1, 8), to##0; \ | |
375 | xorq from##1, RW1; \ | |
376 | movzbl RW1bl, RT3d; \ | |
377 | movzbl RW1bh, RT1d; \ | |
378 | shrq $16, RW1; \ | |
379 | xorq s8(, RT3, 8), to##1; \ | |
380 | xorq s6(, RT1, 8), to##1; \ | |
381 | movzbl RW1bl, RT3d; \ | |
382 | movzbl RW1bh, RT1d; \ | |
383 | shrq $16, RW1; \ | |
384 | xorq s4(, RT3, 8), to##1; \ | |
385 | xorq s2(, RT1, 8), to##1; \ | |
386 | movzbl RW1bl, RT3d; \ | |
387 | movzbl RW1bh, RT1d; \ | |
388 | shrl $16, RW1d; \ | |
389 | xorq s7(, RT3, 8), to##1; \ | |
390 | xorq s5(, RT1, 8), to##1; \ | |
391 | movzbl RW1bl, RT3d; \ | |
392 | movzbl RW1bh, RT1d; \ | |
393 | do_movq(RW0, RW1); \ | |
394 | xorq s3(, RT3, 8), to##1; \ | |
395 | xorq s1(, RT1, 8), to##1; \ | |
396 | xorq from##2, RW2; \ | |
397 | movzbl RW2bl, RT3d; \ | |
398 | movzbl RW2bh, RT1d; \ | |
399 | shrq $16, RW2; \ | |
400 | xorq s8(, RT3, 8), to##2; \ | |
401 | xorq s6(, RT1, 8), to##2; \ | |
402 | movzbl RW2bl, RT3d; \ | |
403 | movzbl RW2bh, RT1d; \ | |
404 | shrq $16, RW2; \ | |
405 | xorq s4(, RT3, 8), to##2; \ | |
406 | xorq s2(, RT1, 8), to##2; \ | |
407 | movzbl RW2bl, RT3d; \ | |
408 | movzbl RW2bh, RT1d; \ | |
409 | shrl $16, RW2d; \ | |
410 | xorq s7(, RT3, 8), to##2; \ | |
411 | xorq s5(, RT1, 8), to##2; \ | |
412 | movzbl RW2bl, RT3d; \ | |
413 | movzbl RW2bh, RT1d; \ | |
414 | do_movq(RW0, RW2); \ | |
415 | xorq s3(, RT3, 8), to##2; \ | |
416 | xorq s1(, RT1, 8), to##2; | |
417 | ||
418 | #define __movq(src, dst) \ | |
419 | movq src, dst; | |
420 | ||
6dcc5627 | 421 | SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way) |
6574e6c6 JK |
422 | /* input: |
423 | * %rdi: ctx, round keys | |
424 | * %rsi: dst (3 blocks) | |
425 | * %rdx: src (3 blocks) | |
426 | */ | |
427 | ||
6574e6c6 JK |
428 | pushq %rbx; |
429 | pushq %r12; | |
430 | pushq %r13; | |
431 | pushq %r14; | |
432 | pushq %r15; | |
433 | ||
3ed7b4d6 JP |
434 | pushq %rsi /* dst */ |
435 | ||
6574e6c6 JK |
436 | /* load input */ |
437 | movl 0 * 4(%rdx), RL0d; | |
438 | movl 1 * 4(%rdx), RR0d; | |
439 | movl 2 * 4(%rdx), RL1d; | |
440 | movl 3 * 4(%rdx), RR1d; | |
441 | movl 4 * 4(%rdx), RL2d; | |
442 | movl 5 * 4(%rdx), RR2d; | |
443 | ||
444 | bswapl RL0d; | |
445 | bswapl RR0d; | |
446 | bswapl RL1d; | |
447 | bswapl RR1d; | |
448 | bswapl RL2d; | |
449 | bswapl RR2d; | |
450 | ||
451 | initial_permutation3(RL, RR); | |
452 | ||
453 | movq 0(CTX), RW0; | |
454 | movq RW0, RW1; | |
455 | movq RW0, RW2; | |
456 | ||
457 | round3(0, RR, RL, load_next_key, __movq); | |
458 | round3(1, RL, RR, load_next_key, __movq); | |
459 | round3(2, RR, RL, load_next_key, __movq); | |
460 | round3(3, RL, RR, load_next_key, __movq); | |
461 | round3(4, RR, RL, load_next_key, __movq); | |
462 | round3(5, RL, RR, load_next_key, __movq); | |
463 | round3(6, RR, RL, load_next_key, __movq); | |
464 | round3(7, RL, RR, load_next_key, __movq); | |
465 | round3(8, RR, RL, load_next_key, __movq); | |
466 | round3(9, RL, RR, load_next_key, __movq); | |
467 | round3(10, RR, RL, load_next_key, __movq); | |
468 | round3(11, RL, RR, load_next_key, __movq); | |
469 | round3(12, RR, RL, load_next_key, __movq); | |
470 | round3(13, RL, RR, load_next_key, __movq); | |
471 | round3(14, RR, RL, load_next_key, __movq); | |
472 | round3(15, RL, RR, load_next_key, __movq); | |
473 | ||
474 | round3(16+0, RL, RR, load_next_key, __movq); | |
475 | round3(16+1, RR, RL, load_next_key, __movq); | |
476 | round3(16+2, RL, RR, load_next_key, __movq); | |
477 | round3(16+3, RR, RL, load_next_key, __movq); | |
478 | round3(16+4, RL, RR, load_next_key, __movq); | |
479 | round3(16+5, RR, RL, load_next_key, __movq); | |
480 | round3(16+6, RL, RR, load_next_key, __movq); | |
481 | round3(16+7, RR, RL, load_next_key, __movq); | |
482 | round3(16+8, RL, RR, load_next_key, __movq); | |
483 | round3(16+9, RR, RL, load_next_key, __movq); | |
484 | round3(16+10, RL, RR, load_next_key, __movq); | |
485 | round3(16+11, RR, RL, load_next_key, __movq); | |
486 | round3(16+12, RL, RR, load_next_key, __movq); | |
487 | round3(16+13, RR, RL, load_next_key, __movq); | |
488 | round3(16+14, RL, RR, load_next_key, __movq); | |
489 | round3(16+15, RR, RL, load_next_key, __movq); | |
490 | ||
491 | round3(32+0, RR, RL, load_next_key, __movq); | |
492 | round3(32+1, RL, RR, load_next_key, __movq); | |
493 | round3(32+2, RR, RL, load_next_key, __movq); | |
494 | round3(32+3, RL, RR, load_next_key, __movq); | |
495 | round3(32+4, RR, RL, load_next_key, __movq); | |
496 | round3(32+5, RL, RR, load_next_key, __movq); | |
497 | round3(32+6, RR, RL, load_next_key, __movq); | |
498 | round3(32+7, RL, RR, load_next_key, __movq); | |
499 | round3(32+8, RR, RL, load_next_key, __movq); | |
500 | round3(32+9, RL, RR, load_next_key, __movq); | |
501 | round3(32+10, RR, RL, load_next_key, __movq); | |
502 | round3(32+11, RL, RR, load_next_key, __movq); | |
503 | round3(32+12, RR, RL, load_next_key, __movq); | |
504 | round3(32+13, RL, RR, load_next_key, __movq); | |
505 | round3(32+14, RR, RL, load_next_key, __movq); | |
506 | round3(32+15, RL, RR, dummy2, dummy2); | |
507 | ||
508 | final_permutation3(RR, RL); | |
509 | ||
510 | bswapl RR0d; | |
511 | bswapl RL0d; | |
512 | bswapl RR1d; | |
513 | bswapl RL1d; | |
514 | bswapl RR2d; | |
515 | bswapl RL2d; | |
516 | ||
3ed7b4d6 | 517 | popq %rsi /* dst */ |
6574e6c6 JK |
518 | movl RR0d, 0 * 4(%rsi); |
519 | movl RL0d, 1 * 4(%rsi); | |
520 | movl RR1d, 2 * 4(%rsi); | |
521 | movl RL1d, 3 * 4(%rsi); | |
522 | movl RR2d, 4 * 4(%rsi); | |
523 | movl RL2d, 5 * 4(%rsi); | |
524 | ||
525 | popq %r15; | |
526 | popq %r14; | |
527 | popq %r13; | |
528 | popq %r12; | |
529 | popq %rbx; | |
6574e6c6 JK |
530 | |
531 | ret; | |
6dcc5627 | 532 | SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way) |
6574e6c6 | 533 | |
e183914a | 534 | .section .rodata, "a", @progbits |
6574e6c6 JK |
535 | .align 16 |
536 | .L_s1: | |
537 | .quad 0x0010100001010400, 0x0000000000000000 | |
538 | .quad 0x0000100000010000, 0x0010100001010404 | |
539 | .quad 0x0010100001010004, 0x0000100000010404 | |
540 | .quad 0x0000000000000004, 0x0000100000010000 | |
541 | .quad 0x0000000000000400, 0x0010100001010400 | |
542 | .quad 0x0010100001010404, 0x0000000000000400 | |
543 | .quad 0x0010000001000404, 0x0010100001010004 | |
544 | .quad 0x0010000001000000, 0x0000000000000004 | |
545 | .quad 0x0000000000000404, 0x0010000001000400 | |
546 | .quad 0x0010000001000400, 0x0000100000010400 | |
547 | .quad 0x0000100000010400, 0x0010100001010000 | |
548 | .quad 0x0010100001010000, 0x0010000001000404 | |
549 | .quad 0x0000100000010004, 0x0010000001000004 | |
550 | .quad 0x0010000001000004, 0x0000100000010004 | |
551 | .quad 0x0000000000000000, 0x0000000000000404 | |
552 | .quad 0x0000100000010404, 0x0010000001000000 | |
553 | .quad 0x0000100000010000, 0x0010100001010404 | |
554 | .quad 0x0000000000000004, 0x0010100001010000 | |
555 | .quad 0x0010100001010400, 0x0010000001000000 | |
556 | .quad 0x0010000001000000, 0x0000000000000400 | |
557 | .quad 0x0010100001010004, 0x0000100000010000 | |
558 | .quad 0x0000100000010400, 0x0010000001000004 | |
559 | .quad 0x0000000000000400, 0x0000000000000004 | |
560 | .quad 0x0010000001000404, 0x0000100000010404 | |
561 | .quad 0x0010100001010404, 0x0000100000010004 | |
562 | .quad 0x0010100001010000, 0x0010000001000404 | |
563 | .quad 0x0010000001000004, 0x0000000000000404 | |
564 | .quad 0x0000100000010404, 0x0010100001010400 | |
565 | .quad 0x0000000000000404, 0x0010000001000400 | |
566 | .quad 0x0010000001000400, 0x0000000000000000 | |
567 | .quad 0x0000100000010004, 0x0000100000010400 | |
568 | .quad 0x0000000000000000, 0x0010100001010004 | |
569 | .L_s2: | |
570 | .quad 0x0801080200100020, 0x0800080000000000 | |
571 | .quad 0x0000080000000000, 0x0001080200100020 | |
572 | .quad 0x0001000000100000, 0x0000000200000020 | |
573 | .quad 0x0801000200100020, 0x0800080200000020 | |
574 | .quad 0x0800000200000020, 0x0801080200100020 | |
575 | .quad 0x0801080000100000, 0x0800000000000000 | |
576 | .quad 0x0800080000000000, 0x0001000000100000 | |
577 | .quad 0x0000000200000020, 0x0801000200100020 | |
578 | .quad 0x0001080000100000, 0x0001000200100020 | |
579 | .quad 0x0800080200000020, 0x0000000000000000 | |
580 | .quad 0x0800000000000000, 0x0000080000000000 | |
581 | .quad 0x0001080200100020, 0x0801000000100000 | |
582 | .quad 0x0001000200100020, 0x0800000200000020 | |
583 | .quad 0x0000000000000000, 0x0001080000100000 | |
584 | .quad 0x0000080200000020, 0x0801080000100000 | |
585 | .quad 0x0801000000100000, 0x0000080200000020 | |
586 | .quad 0x0000000000000000, 0x0001080200100020 | |
587 | .quad 0x0801000200100020, 0x0001000000100000 | |
588 | .quad 0x0800080200000020, 0x0801000000100000 | |
589 | .quad 0x0801080000100000, 0x0000080000000000 | |
590 | .quad 0x0801000000100000, 0x0800080000000000 | |
591 | .quad 0x0000000200000020, 0x0801080200100020 | |
592 | .quad 0x0001080200100020, 0x0000000200000020 | |
593 | .quad 0x0000080000000000, 0x0800000000000000 | |
594 | .quad 0x0000080200000020, 0x0801080000100000 | |
595 | .quad 0x0001000000100000, 0x0800000200000020 | |
596 | .quad 0x0001000200100020, 0x0800080200000020 | |
597 | .quad 0x0800000200000020, 0x0001000200100020 | |
598 | .quad 0x0001080000100000, 0x0000000000000000 | |
599 | .quad 0x0800080000000000, 0x0000080200000020 | |
600 | .quad 0x0800000000000000, 0x0801000200100020 | |
601 | .quad 0x0801080200100020, 0x0001080000100000 | |
602 | .L_s3: | |
603 | .quad 0x0000002000000208, 0x0000202008020200 | |
604 | .quad 0x0000000000000000, 0x0000200008020008 | |
605 | .quad 0x0000002008000200, 0x0000000000000000 | |
606 | .quad 0x0000202000020208, 0x0000002008000200 | |
607 | .quad 0x0000200000020008, 0x0000000008000008 | |
608 | .quad 0x0000000008000008, 0x0000200000020000 | |
609 | .quad 0x0000202008020208, 0x0000200000020008 | |
610 | .quad 0x0000200008020000, 0x0000002000000208 | |
611 | .quad 0x0000000008000000, 0x0000000000000008 | |
612 | .quad 0x0000202008020200, 0x0000002000000200 | |
613 | .quad 0x0000202000020200, 0x0000200008020000 | |
614 | .quad 0x0000200008020008, 0x0000202000020208 | |
615 | .quad 0x0000002008000208, 0x0000202000020200 | |
616 | .quad 0x0000200000020000, 0x0000002008000208 | |
617 | .quad 0x0000000000000008, 0x0000202008020208 | |
618 | .quad 0x0000002000000200, 0x0000000008000000 | |
619 | .quad 0x0000202008020200, 0x0000000008000000 | |
620 | .quad 0x0000200000020008, 0x0000002000000208 | |
621 | .quad 0x0000200000020000, 0x0000202008020200 | |
622 | .quad 0x0000002008000200, 0x0000000000000000 | |
623 | .quad 0x0000002000000200, 0x0000200000020008 | |
624 | .quad 0x0000202008020208, 0x0000002008000200 | |
625 | .quad 0x0000000008000008, 0x0000002000000200 | |
626 | .quad 0x0000000000000000, 0x0000200008020008 | |
627 | .quad 0x0000002008000208, 0x0000200000020000 | |
628 | .quad 0x0000000008000000, 0x0000202008020208 | |
629 | .quad 0x0000000000000008, 0x0000202000020208 | |
630 | .quad 0x0000202000020200, 0x0000000008000008 | |
631 | .quad 0x0000200008020000, 0x0000002008000208 | |
632 | .quad 0x0000002000000208, 0x0000200008020000 | |
633 | .quad 0x0000202000020208, 0x0000000000000008 | |
634 | .quad 0x0000200008020008, 0x0000202000020200 | |
635 | .L_s4: | |
636 | .quad 0x1008020000002001, 0x1000020800002001 | |
637 | .quad 0x1000020800002001, 0x0000000800000000 | |
638 | .quad 0x0008020800002000, 0x1008000800000001 | |
639 | .quad 0x1008000000000001, 0x1000020000002001 | |
640 | .quad 0x0000000000000000, 0x0008020000002000 | |
641 | .quad 0x0008020000002000, 0x1008020800002001 | |
642 | .quad 0x1000000800000001, 0x0000000000000000 | |
643 | .quad 0x0008000800000000, 0x1008000000000001 | |
644 | .quad 0x1000000000000001, 0x0000020000002000 | |
645 | .quad 0x0008000000000000, 0x1008020000002001 | |
646 | .quad 0x0000000800000000, 0x0008000000000000 | |
647 | .quad 0x1000020000002001, 0x0000020800002000 | |
648 | .quad 0x1008000800000001, 0x1000000000000001 | |
649 | .quad 0x0000020800002000, 0x0008000800000000 | |
650 | .quad 0x0000020000002000, 0x0008020800002000 | |
651 | .quad 0x1008020800002001, 0x1000000800000001 | |
652 | .quad 0x0008000800000000, 0x1008000000000001 | |
653 | .quad 0x0008020000002000, 0x1008020800002001 | |
654 | .quad 0x1000000800000001, 0x0000000000000000 | |
655 | .quad 0x0000000000000000, 0x0008020000002000 | |
656 | .quad 0x0000020800002000, 0x0008000800000000 | |
657 | .quad 0x1008000800000001, 0x1000000000000001 | |
658 | .quad 0x1008020000002001, 0x1000020800002001 | |
659 | .quad 0x1000020800002001, 0x0000000800000000 | |
660 | .quad 0x1008020800002001, 0x1000000800000001 | |
661 | .quad 0x1000000000000001, 0x0000020000002000 | |
662 | .quad 0x1008000000000001, 0x1000020000002001 | |
663 | .quad 0x0008020800002000, 0x1008000800000001 | |
664 | .quad 0x1000020000002001, 0x0000020800002000 | |
665 | .quad 0x0008000000000000, 0x1008020000002001 | |
666 | .quad 0x0000000800000000, 0x0008000000000000 | |
667 | .quad 0x0000020000002000, 0x0008020800002000 | |
668 | .L_s5: | |
669 | .quad 0x0000001000000100, 0x0020001002080100 | |
670 | .quad 0x0020000002080000, 0x0420001002000100 | |
671 | .quad 0x0000000000080000, 0x0000001000000100 | |
672 | .quad 0x0400000000000000, 0x0020000002080000 | |
673 | .quad 0x0400001000080100, 0x0000000000080000 | |
674 | .quad 0x0020001002000100, 0x0400001000080100 | |
675 | .quad 0x0420001002000100, 0x0420000002080000 | |
676 | .quad 0x0000001000080100, 0x0400000000000000 | |
677 | .quad 0x0020000002000000, 0x0400000000080000 | |
678 | .quad 0x0400000000080000, 0x0000000000000000 | |
679 | .quad 0x0400001000000100, 0x0420001002080100 | |
680 | .quad 0x0420001002080100, 0x0020001002000100 | |
681 | .quad 0x0420000002080000, 0x0400001000000100 | |
682 | .quad 0x0000000000000000, 0x0420000002000000 | |
683 | .quad 0x0020001002080100, 0x0020000002000000 | |
684 | .quad 0x0420000002000000, 0x0000001000080100 | |
685 | .quad 0x0000000000080000, 0x0420001002000100 | |
686 | .quad 0x0000001000000100, 0x0020000002000000 | |
687 | .quad 0x0400000000000000, 0x0020000002080000 | |
688 | .quad 0x0420001002000100, 0x0400001000080100 | |
689 | .quad 0x0020001002000100, 0x0400000000000000 | |
690 | .quad 0x0420000002080000, 0x0020001002080100 | |
691 | .quad 0x0400001000080100, 0x0000001000000100 | |
692 | .quad 0x0020000002000000, 0x0420000002080000 | |
693 | .quad 0x0420001002080100, 0x0000001000080100 | |
694 | .quad 0x0420000002000000, 0x0420001002080100 | |
695 | .quad 0x0020000002080000, 0x0000000000000000 | |
696 | .quad 0x0400000000080000, 0x0420000002000000 | |
697 | .quad 0x0000001000080100, 0x0020001002000100 | |
698 | .quad 0x0400001000000100, 0x0000000000080000 | |
699 | .quad 0x0000000000000000, 0x0400000000080000 | |
700 | .quad 0x0020001002080100, 0x0400001000000100 | |
701 | .L_s6: | |
702 | .quad 0x0200000120000010, 0x0204000020000000 | |
703 | .quad 0x0000040000000000, 0x0204040120000010 | |
704 | .quad 0x0204000020000000, 0x0000000100000010 | |
705 | .quad 0x0204040120000010, 0x0004000000000000 | |
706 | .quad 0x0200040020000000, 0x0004040100000010 | |
707 | .quad 0x0004000000000000, 0x0200000120000010 | |
708 | .quad 0x0004000100000010, 0x0200040020000000 | |
709 | .quad 0x0200000020000000, 0x0000040100000010 | |
710 | .quad 0x0000000000000000, 0x0004000100000010 | |
711 | .quad 0x0200040120000010, 0x0000040000000000 | |
712 | .quad 0x0004040000000000, 0x0200040120000010 | |
713 | .quad 0x0000000100000010, 0x0204000120000010 | |
714 | .quad 0x0204000120000010, 0x0000000000000000 | |
715 | .quad 0x0004040100000010, 0x0204040020000000 | |
716 | .quad 0x0000040100000010, 0x0004040000000000 | |
717 | .quad 0x0204040020000000, 0x0200000020000000 | |
718 | .quad 0x0200040020000000, 0x0000000100000010 | |
719 | .quad 0x0204000120000010, 0x0004040000000000 | |
720 | .quad 0x0204040120000010, 0x0004000000000000 | |
721 | .quad 0x0000040100000010, 0x0200000120000010 | |
722 | .quad 0x0004000000000000, 0x0200040020000000 | |
723 | .quad 0x0200000020000000, 0x0000040100000010 | |
724 | .quad 0x0200000120000010, 0x0204040120000010 | |
725 | .quad 0x0004040000000000, 0x0204000020000000 | |
726 | .quad 0x0004040100000010, 0x0204040020000000 | |
727 | .quad 0x0000000000000000, 0x0204000120000010 | |
728 | .quad 0x0000000100000010, 0x0000040000000000 | |
729 | .quad 0x0204000020000000, 0x0004040100000010 | |
730 | .quad 0x0000040000000000, 0x0004000100000010 | |
731 | .quad 0x0200040120000010, 0x0000000000000000 | |
732 | .quad 0x0204040020000000, 0x0200000020000000 | |
733 | .quad 0x0004000100000010, 0x0200040120000010 | |
734 | .L_s7: | |
735 | .quad 0x0002000000200000, 0x2002000004200002 | |
736 | .quad 0x2000000004000802, 0x0000000000000000 | |
737 | .quad 0x0000000000000800, 0x2000000004000802 | |
738 | .quad 0x2002000000200802, 0x0002000004200800 | |
739 | .quad 0x2002000004200802, 0x0002000000200000 | |
740 | .quad 0x0000000000000000, 0x2000000004000002 | |
741 | .quad 0x2000000000000002, 0x0000000004000000 | |
742 | .quad 0x2002000004200002, 0x2000000000000802 | |
743 | .quad 0x0000000004000800, 0x2002000000200802 | |
744 | .quad 0x2002000000200002, 0x0000000004000800 | |
745 | .quad 0x2000000004000002, 0x0002000004200000 | |
746 | .quad 0x0002000004200800, 0x2002000000200002 | |
747 | .quad 0x0002000004200000, 0x0000000000000800 | |
748 | .quad 0x2000000000000802, 0x2002000004200802 | |
749 | .quad 0x0002000000200800, 0x2000000000000002 | |
750 | .quad 0x0000000004000000, 0x0002000000200800 | |
751 | .quad 0x0000000004000000, 0x0002000000200800 | |
752 | .quad 0x0002000000200000, 0x2000000004000802 | |
753 | .quad 0x2000000004000802, 0x2002000004200002 | |
754 | .quad 0x2002000004200002, 0x2000000000000002 | |
755 | .quad 0x2002000000200002, 0x0000000004000000 | |
756 | .quad 0x0000000004000800, 0x0002000000200000 | |
757 | .quad 0x0002000004200800, 0x2000000000000802 | |
758 | .quad 0x2002000000200802, 0x0002000004200800 | |
759 | .quad 0x2000000000000802, 0x2000000004000002 | |
760 | .quad 0x2002000004200802, 0x0002000004200000 | |
761 | .quad 0x0002000000200800, 0x0000000000000000 | |
762 | .quad 0x2000000000000002, 0x2002000004200802 | |
763 | .quad 0x0000000000000000, 0x2002000000200802 | |
764 | .quad 0x0002000004200000, 0x0000000000000800 | |
765 | .quad 0x2000000004000002, 0x0000000004000800 | |
766 | .quad 0x0000000000000800, 0x2002000000200002 | |
767 | .L_s8: | |
768 | .quad 0x0100010410001000, 0x0000010000001000 | |
769 | .quad 0x0000000000040000, 0x0100010410041000 | |
770 | .quad 0x0100000010000000, 0x0100010410001000 | |
771 | .quad 0x0000000400000000, 0x0100000010000000 | |
772 | .quad 0x0000000400040000, 0x0100000010040000 | |
773 | .quad 0x0100010410041000, 0x0000010000041000 | |
774 | .quad 0x0100010010041000, 0x0000010400041000 | |
775 | .quad 0x0000010000001000, 0x0000000400000000 | |
776 | .quad 0x0100000010040000, 0x0100000410000000 | |
777 | .quad 0x0100010010001000, 0x0000010400001000 | |
778 | .quad 0x0000010000041000, 0x0000000400040000 | |
779 | .quad 0x0100000410040000, 0x0100010010041000 | |
780 | .quad 0x0000010400001000, 0x0000000000000000 | |
781 | .quad 0x0000000000000000, 0x0100000410040000 | |
782 | .quad 0x0100000410000000, 0x0100010010001000 | |
783 | .quad 0x0000010400041000, 0x0000000000040000 | |
784 | .quad 0x0000010400041000, 0x0000000000040000 | |
785 | .quad 0x0100010010041000, 0x0000010000001000 | |
786 | .quad 0x0000000400000000, 0x0100000410040000 | |
787 | .quad 0x0000010000001000, 0x0000010400041000 | |
788 | .quad 0x0100010010001000, 0x0000000400000000 | |
789 | .quad 0x0100000410000000, 0x0100000010040000 | |
790 | .quad 0x0100000410040000, 0x0100000010000000 | |
791 | .quad 0x0000000000040000, 0x0100010410001000 | |
792 | .quad 0x0000000000000000, 0x0100010410041000 | |
793 | .quad 0x0000000400040000, 0x0100000410000000 | |
794 | .quad 0x0100000010040000, 0x0100010010001000 | |
795 | .quad 0x0100010410001000, 0x0000000000000000 | |
796 | .quad 0x0100010410041000, 0x0000010000041000 | |
797 | .quad 0x0000010000041000, 0x0000010400001000 | |
798 | .quad 0x0000010400001000, 0x0000000400040000 | |
799 | .quad 0x0100000010000000, 0x0100010010041000 |