]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/i386/crypto/aes-i586-asm.S
Linux-2.6.12-rc2
[mirror_ubuntu-artful-kernel.git] / arch / i386 / crypto / aes-i586-asm.S
1 // -------------------------------------------------------------------------
2 // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3 // All rights reserved.
4 //
5 // LICENSE TERMS
6 //
7 // The free distribution and use of this software in both source and binary
8 // form is allowed (with or without changes) provided that:
9 //
10 // 1. distributions of this source code include the above copyright
11 // notice, this list of conditions and the following disclaimer//
12 //
13 // 2. distributions in binary form include the above copyright
14 // notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other associated materials//
16 //
17 // 3. the copyright holder's name is not used to endorse products
18 // built using this software without specific written permission.
19 //
20 //
21 // ALTERNATIVELY, provided that this notice is retained in full, this product
22 // may be distributed under the terms of the GNU General Public License (GPL),
23 // in which case the provisions of the GPL apply INSTEAD OF those given above.
24 //
25 // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26 // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28 // DISCLAIMER
29 //
30 // This software is provided 'as is' with no explicit or implied warranties
31 // in respect of its properties including, but not limited to, correctness
32 // and fitness for purpose.
33 // -------------------------------------------------------------------------
34 // Issue Date: 29/07/2002
35
36 .file "aes-i586-asm.S"
37 .text
38
39 // aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
40 // aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
41
42 #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
43
44 // offsets to parameters with one register pushed onto stack
45
46 #define in_blk 8 // input byte array address parameter
47 #define out_blk 12 // output byte array address parameter
48 #define ctx 16 // AES context structure
49
50 // offsets in context structure
51
52 #define ekey 0 // encryption key schedule base address
53 #define nrnd 256 // number of rounds
54 #define dkey 260 // decryption key schedule base address
55
56 // register mapping for encrypt and decrypt subroutines
57
58 #define r0 eax
59 #define r1 ebx
60 #define r2 ecx
61 #define r3 edx
62 #define r4 esi
63 #define r5 edi
64
65 #define eaxl al
66 #define eaxh ah
67 #define ebxl bl
68 #define ebxh bh
69 #define ecxl cl
70 #define ecxh ch
71 #define edxl dl
72 #define edxh dh
73
74 #define _h(reg) reg##h
75 #define h(reg) _h(reg)
76
77 #define _l(reg) reg##l
78 #define l(reg) _l(reg)
79
80 // This macro takes a 32-bit word representing a column and uses
81 // each of its four bytes to index into four tables of 256 32-bit
82 // words to obtain values that are then xored into the appropriate
83 // output registers r0, r1, r4 or r5.
84
85 // Parameters:
86 // table table base address
87 // %1 out_state[0]
88 // %2 out_state[1]
89 // %3 out_state[2]
90 // %4 out_state[3]
91 // idx input register for the round (destroyed)
92 // tmp scratch register for the round
93 // sched key schedule
94
95 #define do_col(table, a1,a2,a3,a4, idx, tmp) \
96 movzx %l(idx),%tmp; \
97 xor table(,%tmp,4),%a1; \
98 movzx %h(idx),%tmp; \
99 shr $16,%idx; \
100 xor table+tlen(,%tmp,4),%a2; \
101 movzx %l(idx),%tmp; \
102 movzx %h(idx),%idx; \
103 xor table+2*tlen(,%tmp,4),%a3; \
104 xor table+3*tlen(,%idx,4),%a4;
105
106 // initialise output registers from the key schedule
107 // NB1: original value of a3 is in idx on exit
108 // NB2: original values of a1,a2,a4 aren't used
109 #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
110 mov 0 sched,%a1; \
111 movzx %l(idx),%tmp; \
112 mov 12 sched,%a2; \
113 xor table(,%tmp,4),%a1; \
114 mov 4 sched,%a4; \
115 movzx %h(idx),%tmp; \
116 shr $16,%idx; \
117 xor table+tlen(,%tmp,4),%a2; \
118 movzx %l(idx),%tmp; \
119 movzx %h(idx),%idx; \
120 xor table+3*tlen(,%idx,4),%a4; \
121 mov %a3,%idx; \
122 mov 8 sched,%a3; \
123 xor table+2*tlen(,%tmp,4),%a3;
124
125 // initialise output registers from the key schedule
126 // NB1: original value of a3 is in idx on exit
127 // NB2: original values of a1,a2,a4 aren't used
128 #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
129 mov 0 sched,%a1; \
130 movzx %l(idx),%tmp; \
131 mov 4 sched,%a2; \
132 xor table(,%tmp,4),%a1; \
133 mov 12 sched,%a4; \
134 movzx %h(idx),%tmp; \
135 shr $16,%idx; \
136 xor table+tlen(,%tmp,4),%a2; \
137 movzx %l(idx),%tmp; \
138 movzx %h(idx),%idx; \
139 xor table+3*tlen(,%idx,4),%a4; \
140 mov %a3,%idx; \
141 mov 8 sched,%a3; \
142 xor table+2*tlen(,%tmp,4),%a3;
143
144
145 // original Gladman had conditional saves to MMX regs.
146 #define save(a1, a2) \
147 mov %a2,4*a1(%esp)
148
149 #define restore(a1, a2) \
150 mov 4*a2(%esp),%a1
151
152 // These macros perform a forward encryption cycle. They are entered with
153 // the first previous round column values in r0,r1,r4,r5 and
154 // exit with the final values in the same registers, using stack
155 // for temporary storage.
156
157 // round column values
158 // on entry: r0,r1,r4,r5
159 // on exit: r2,r1,r4,r5
160 #define fwd_rnd1(arg, table) \
161 save (0,r1); \
162 save (1,r5); \
163 \
164 /* compute new column values */ \
165 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
166 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
167 restore(r0,0); \
168 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
169 restore(r0,1); \
170 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
171
172 // round column values
173 // on entry: r2,r1,r4,r5
174 // on exit: r0,r1,r4,r5
175 #define fwd_rnd2(arg, table) \
176 save (0,r1); \
177 save (1,r5); \
178 \
179 /* compute new column values */ \
180 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
181 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
182 restore(r2,0); \
183 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
184 restore(r2,1); \
185 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
186
187 // These macros performs an inverse encryption cycle. They are entered with
188 // the first previous round column values in r0,r1,r4,r5 and
189 // exit with the final values in the same registers, using stack
190 // for temporary storage
191
192 // round column values
193 // on entry: r0,r1,r4,r5
194 // on exit: r2,r1,r4,r5
195 #define inv_rnd1(arg, table) \
196 save (0,r1); \
197 save (1,r5); \
198 \
199 /* compute new column values */ \
200 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
201 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
202 restore(r0,0); \
203 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
204 restore(r0,1); \
205 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
206
207 // round column values
208 // on entry: r2,r1,r4,r5
209 // on exit: r0,r1,r4,r5
210 #define inv_rnd2(arg, table) \
211 save (0,r1); \
212 save (1,r5); \
213 \
214 /* compute new column values */ \
215 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
216 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
217 restore(r2,0); \
218 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
219 restore(r2,1); \
220 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
221
222 // AES (Rijndael) Encryption Subroutine
223
224 .global aes_enc_blk
225
226 .extern ft_tab
227 .extern fl_tab
228
229 .align 4
230
231 aes_enc_blk:
232 push %ebp
233 mov ctx(%esp),%ebp // pointer to context
234
235 // CAUTION: the order and the values used in these assigns
236 // rely on the register mappings
237
238 1: push %ebx
239 mov in_blk+4(%esp),%r2
240 push %esi
241 mov nrnd(%ebp),%r3 // number of rounds
242 push %edi
243 #if ekey != 0
244 lea ekey(%ebp),%ebp // key pointer
245 #endif
246
247 // input four columns and xor in first round key
248
249 mov (%r2),%r0
250 mov 4(%r2),%r1
251 mov 8(%r2),%r4
252 mov 12(%r2),%r5
253 xor (%ebp),%r0
254 xor 4(%ebp),%r1
255 xor 8(%ebp),%r4
256 xor 12(%ebp),%r5
257
258 sub $8,%esp // space for register saves on stack
259 add $16,%ebp // increment to next round key
260 sub $10,%r3
261 je 4f // 10 rounds for 128-bit key
262 add $32,%ebp
263 sub $2,%r3
264 je 3f // 12 rounds for 128-bit key
265 add $32,%ebp
266
267 2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 128-bit key
268 fwd_rnd2( -48(%ebp) ,ft_tab)
269 3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 128-bit key
270 fwd_rnd2( -16(%ebp) ,ft_tab)
271 4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
272 fwd_rnd2( +16(%ebp) ,ft_tab)
273 fwd_rnd1( +32(%ebp) ,ft_tab)
274 fwd_rnd2( +48(%ebp) ,ft_tab)
275 fwd_rnd1( +64(%ebp) ,ft_tab)
276 fwd_rnd2( +80(%ebp) ,ft_tab)
277 fwd_rnd1( +96(%ebp) ,ft_tab)
278 fwd_rnd2(+112(%ebp) ,ft_tab)
279 fwd_rnd1(+128(%ebp) ,ft_tab)
280 fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
281
282 // move final values to the output array. CAUTION: the
283 // order of these assigns rely on the register mappings
284
285 add $8,%esp
286 mov out_blk+12(%esp),%ebp
287 mov %r5,12(%ebp)
288 pop %edi
289 mov %r4,8(%ebp)
290 pop %esi
291 mov %r1,4(%ebp)
292 pop %ebx
293 mov %r0,(%ebp)
294 pop %ebp
295 mov $1,%eax
296 ret
297
298 // AES (Rijndael) Decryption Subroutine
299
300 .global aes_dec_blk
301
302 .extern it_tab
303 .extern il_tab
304
305 .align 4
306
307 aes_dec_blk:
308 push %ebp
309 mov ctx(%esp),%ebp // pointer to context
310
311 // CAUTION: the order and the values used in these assigns
312 // rely on the register mappings
313
314 1: push %ebx
315 mov in_blk+4(%esp),%r2
316 push %esi
317 mov nrnd(%ebp),%r3 // number of rounds
318 push %edi
319 #if dkey != 0
320 lea dkey(%ebp),%ebp // key pointer
321 #endif
322 mov %r3,%r0
323 shl $4,%r0
324 add %r0,%ebp
325
326 // input four columns and xor in first round key
327
328 mov (%r2),%r0
329 mov 4(%r2),%r1
330 mov 8(%r2),%r4
331 mov 12(%r2),%r5
332 xor (%ebp),%r0
333 xor 4(%ebp),%r1
334 xor 8(%ebp),%r4
335 xor 12(%ebp),%r5
336
337 sub $8,%esp // space for register saves on stack
338 sub $16,%ebp // increment to next round key
339 sub $10,%r3
340 je 4f // 10 rounds for 128-bit key
341 sub $32,%ebp
342 sub $2,%r3
343 je 3f // 12 rounds for 128-bit key
344 sub $32,%ebp
345
346 2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 128-bit key
347 inv_rnd2( +48(%ebp), it_tab)
348 3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 128-bit key
349 inv_rnd2( +16(%ebp), it_tab)
350 4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
351 inv_rnd2( -16(%ebp), it_tab)
352 inv_rnd1( -32(%ebp), it_tab)
353 inv_rnd2( -48(%ebp), it_tab)
354 inv_rnd1( -64(%ebp), it_tab)
355 inv_rnd2( -80(%ebp), it_tab)
356 inv_rnd1( -96(%ebp), it_tab)
357 inv_rnd2(-112(%ebp), it_tab)
358 inv_rnd1(-128(%ebp), it_tab)
359 inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
360
361 // move final values to the output array. CAUTION: the
362 // order of these assigns rely on the register mappings
363
364 add $8,%esp
365 mov out_blk+12(%esp),%ebp
366 mov %r5,12(%ebp)
367 pop %edi
368 mov %r4,8(%ebp)
369 pop %esi
370 mov %r1,4(%ebp)
371 pop %ebx
372 mov %r0,(%ebp)
373 pop %ebp
374 mov $1,%eax
375 ret
376