]> git.proxmox.com Git - ceph.git/blob - ceph/src/crypto/isa-l/isa-l_crypto/aes/cbc_common.asm
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / aes / cbc_common.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 ;
31 ; the following defines control the operation of the macros below and
32 ; need to be defines in the including file
33 ; KEY_ROUNDS - number of key rounds needed based on key length: 128bit - 11, 192bit - 13 or 256bit - 15
34 ; EARLY_BLOCKS - number of data block to load before starting computations
35 ; PARALLEL_BLOCKS - number of blocks of data to process in parallel also the number of xmm regs to reserve for data
36 ; IV_CNT - number of xmm regs to use for IV data valid values of 0 or 1
37 ; TMP_CNT - number of tmp xmm register to reserve
38 ; XMM_USAGE - number of xmm registers to use. must be at least the same as PARALLEL_BLOCKS + 2
39 ;
40
41 %include "reg_sizes.asm"
42
43 ;
44 ; the following instructions set specific macros must be defined in the user file
45 ; to make use of the AES macros below
46 ; MOVDQ - move from memory to xmm reg
47 ; PXOR - XOR of two xmm registers pxor
48 ; AES_DEC - AES block decode for early key rounds
49 ; AES_DEC_LAST - AES block decode for last key round
50 ; or
51 ; AES_ENC - AES block encode for early key rounds
52 ; AES_ENC_LAST - AES block encode for last key round
53
54 ; Three usages of xmm regs: key round cache, blocks data and one temp
55 ; CKEY_CNT are (number of xmm regs) - PARALLEL_BLOCKS - IV holder - 2 TMP mmx reg
56 %assign FIRST_XDATA (0)
57 %assign IV_IDX (FIRST_XDATA + PARALLEL_BLOCKS)
58 %ifndef IV_CNT
59 %define IV_CNT (1)
60 %endif
61 %assign TMP (IV_IDX + IV_CNT)
62 %assign TMP_CNT (2)
63 %assign FIRST_CKEY (TMP + TMP_CNT)
64 %assign CKEY_CNT (XMM_USAGE - (PARALLEL_BLOCKS + IV_CNT + TMP_CNT))
65
66 ; Abstract xmm register usages that identify the expected contents of the register
67 %define reg(i) xmm %+ i
68 %define XDATA(i) xmm %+ i
69 %define KEY_REG(i) xmm %+ i
70 %define IV_REG(i) xmm %+ i
71
72 %define IDX rax
73
74
75
76
77 ;
78 ;
79 ; AES CBC ENCODE MACROS
80 ;
81 ;
82
83 ;
84 ; CBC_DECRYPT_BLOCKS
85 ; Decrypts a number of blocks using AES_PARALLEL_ENC_BLOCKS macro
86 ; Finalized the decryption and saves results in the output
87 ; places last last buffers crypto text in IV for next buffer
88 ; updates the index and number of bytes left
89 ;
90 %macro CBC_DECRYPT_BLOCKS 17
91 %define %%TOT_ROUNDS %1
92 %define %%num_blocks %2 ; can be 0..13
93 %define %%EARLY_LOADS %3 ; number of data blocks to laod before processing
94 %define %%MOVDQ %4
95 %define %%PXOR %5
96 %define %%AES_DEC %6
97 %define %%AES_DEC_LAST %7
98 %define %%CACHED_KEYS %8 ; number of key data cached in xmm regs
99 %define %%TMP %9
100 %define %%TMP_CNT %10
101 %define %%FIRST_CKEY %11
102 %define %%KEY_DATA %12
103 %define %%FIRST_XDATA %13
104 %define %%IN %14 ; input data
105 %define %%OUT %15 ; output data
106 %define %%IDX %16 ; index into input and output data buffers
107 %define %%LEN %17
108
109 AES_PARALLEL_ENC_BLOCKS %%TOT_ROUNDS, %%num_blocks, %%EARLY_LOADS, %%MOVDQ, %%PXOR, %%AES_DEC, %%AES_DEC_LAST, %%CACHED_KEYS, %%TMP, %%TMP_CNT, %%FIRST_CKEY, %%KEY_DATA, %%FIRST_XDATA, %%IN, %%OUT, %%IDX
110
111 ;
112 ; XOR the result of each block's decrypt with the previous block's cypher text (C)
113 ;
114 %assign i 0
115 %rep (%%num_blocks)
116 %%PXOR XDATA(i), XDATA(IV_IDX) ; XOR result with previous block's C
117 %%MOVDQ [%%OUT + %%IDX + i*16], XDATA(i) ; save plain text to out
118 %%MOVDQ XDATA(IV_IDX), [%%IN + IDX + i*16] ; load IV with current block C
119 %assign i (i+1)
120 %endrep
121
122 add %%IDX, %%num_blocks*16
123 sub %%LEN, %%num_blocks*16
124 %endmacro
125
126
127 ;
128 ; CBC_ENC_INIT
129 ; XOR first data block with the IV data
130 %macro CBC_ENC_INIT 7
131 %define %%P_FIRST %1
132 %define %%IV_IDX %2
133 %define %%MOVDQ %3
134 %define %%PXOR %4
135 %define %%IV %5
136 %define %%IN %6 ; input data
137 %define %%IDX %7 ; index into input and output data buffers
138
139 %%MOVDQ XDATA(%%P_FIRST), [%%IN + %%IDX + 0*16]
140 %%MOVDQ reg(%%IV_IDX), [%%IV]
141 %%PXOR XDATA(%%P_FIRST), reg(%%IV_IDX)
142 %endmacro
143
144 ;
145 ; assumptions:
146 ; LEN is length of data remaining
147 ; IDX is offset into the data buffer
148 ;
149 ; subloops
150 ; if data > 16 load next block into a next XDATA reg (XDATA(p_next))
151 ; load first uncached key into TMP0 (if any)
152 ; AES block encript XDATA(P_FIRST)
153 ; if data > 16 XOR next2 block (XDATA(p_next)) with current (XDATA(P_FIRST))
154 ; save current (XDATA(P_FIRST))
155 ; update indexes for P_FIRST
156 ; end if data zero
157 ;
158 %macro CBC_ENC_SUBLOOP 17
159 %define %%TOT_ROUNDS %1
160 %define %%BLOCKS %2 ; can be 1...14
161 %define %%START_DATA %3
162 %define %%MOVDQ %4
163 %define %%PXOR %5
164 %define %%AES_DEC %6
165 %define %%AES_DEC_LAST %7
166 %define %%TMP %8
167 %define %%TMP_CNT %9
168 %define %%FIRST_CKEY %10
169 %define %%CKEY_CNT %11
170 %define %%KEYS %12
171 %define %%CACHED_KEYS %13
172 %define %%IN %14 ; input data
173 %define %%OUT %15 ; output data
174 %define %%IDX %16 ; index into input and output data buffers
175 %define %%LEN %17
176
177 %assign this_blk 0
178 %assign next_blk 1
179 %assign p_first %%START_DATA
180 %assign p_next (p_first+1)
181 ; for number of blocks to be processed in a loop
182 %assign blk 1
183 %rep %%BLOCKS
184 ; if data > 16 load next block into a next XDATA reg (XDATA(p_next))
185 cmp %%LEN, 16
186 %push skip_read
187 je %$skip_read_next
188 %%MOVDQ XDATA(p_next), [%%IN + %%IDX + next_blk*16]
189 %$skip_read_next:
190 %pop
191
192 AES_ENC_BLOCKS %%TOT_ROUNDS, p_first, %%TMP, %%TMP_CNT, %%FIRST_CKEY, %%CKEY_CNT, %%KEYS, %%MOVDQ, %%PXOR, %%AES_DEC, %%AES_DEC_LAST
193
194 ; if data > 16 XOR next2 block (XDATA(p_next)) with current (XDATA(p_first))
195 cmp %%LEN, 16
196 %push skip_next
197 je %$skip_next_blk_start
198 %%PXOR XDATA(p_next), XDATA(p_first)
199 %$skip_next_blk_start:
200 %pop
201
202 ; save current (XDATA(p_first))
203 %%MOVDQ [%%OUT + %%IDX + this_blk*16], XDATA(p_first)
204 ; update indexes for p_first
205 add %%IDX, 16
206 sub %%LEN, 16
207
208 %if (blk < %%BLOCKS) ; only insert jz if NOT last block
209 ; end if data zero
210 jz %%END_CBC_ENC_SUBLOOP
211 %endif ; (p_next < %%BLOCKS)
212
213 %assign p_first (p_next)
214 %assign blk (blk+1)
215 %if (blk == %%BLOCKS) ; the last rep loop's read of the next block needs to be into START_DATA
216 %assign p_next (%%START_DATA)
217 %elif (1 == %%BLOCKS)
218 %%MOVDQ XDATA(%%START_DATA), XDATA(p_next)
219 %else
220 %assign p_next (p_next+1)
221 %endif
222 %endrep ; %%BLOCKS
223
224 %%END_CBC_ENC_SUBLOOP:
225 %endm ; CBC_ENC_SUBLOOP
226
227
228 ;
229 ;
230 ; AES BLOCK ENCODE MACROS
231 ;
232 ;
233
234 ;
235 ; FILL_KEY_CACHE
236 ; Load key data into the cache key xmm regs
237 %macro FILL_KEY_CACHE 4
238 %define %%CACHED_KEYS %1
239 %define %%CKEY_START %2
240 %define %%KEY_DATA %3
241 %define %%MOVDQ %4
242
243 %assign rnd 0
244 %rep KEY_ROUNDS
245 %if (rnd < %%CACHED_KEYS) ; find the round's key data
246 %assign c (rnd + %%CKEY_START)
247 %%MOVDQ KEY_REG(c), [%%KEY_DATA + rnd*16] ;load sub key into an available register
248 %endif
249 %assign rnd (rnd+1)
250 %endrep
251 %endmacro
252
253 ;
254 ; SCHEDULE_DATA_LOAD
255 ; pre-loades message data into xmm regs
256 ; updates global 'blocks_loaded' that tracks which data blocks have been loaded
257 ; 'blocks_loaded' is an in/out global and must be declared in the using macro or function
258 %macro SCHEDULE_DATA_LOAD 5
259 %define %%PARALLEL_DATA %1
260 %define %%EARLY_LOADS %2
261 %define %%MOVDQ %3
262 %define %%IN %4
263 %define %%IDX %5
264
265 %if (blocks_loaded < %%PARALLEL_DATA)
266 ; load cipher text
267 %%MOVDQ XDATA(blocks_loaded), [%%IN + %%IDX + blocks_loaded*16]
268 %assign blocks_loaded (blocks_loaded+1)
269 %endif ; (blocks_loaded < %%PARALLEL_DATA)
270 %endmacro ; SCHEDULED_EARLY_DATA_LOADS
271
272 ;
273 ; INIT_SELECT_KEY
274 ; determine which xmm reg holds the key data needed or loades it into the temp register if not cached
275 ; 'current_tmp' is an in/out global and must be declared in the using macro or function
276 %macro INIT_SELECT_KEY 6
277 %define %%TOT_ROUNDS %1
278 %define %%CACHED_KEYS %2
279 %define %%KEY_DATA %3
280 %define %%FIRST_TMP %4
281 %define %%TMP_CNT %5
282 %define %%MOVDQ %6
283
284 %assign current_tmp (%%FIRST_TMP)
285 %if (%%TOT_ROUNDS > %%CACHED_KEYS) ; load the first uncached key into temp reg
286 %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + %%CACHED_KEYS*16]
287 %endif ; (KEY_ROUNDS > CKEY_CNT)
288 %endmacro ; SELECT_KEY
289
290 ;
291 ; SELECT_KEY
292 ; determine which xmm reg holds the key data needed or loades it into the temp register if not cached
293 ; 'current_tmp' is an in/out global and must be declared in the using macro or function
294 %macro SELECT_KEY 8
295 %define %%ROUND %1
296 %define %%TOT_ROUNDS %2
297 %define %%CACHED_KEYS %3
298 %define %%FIRST_KEY %4
299 %define %%KEY_DATA %5
300 %define %%FIRST_TMP %6
301 %define %%TMP_CNT %7
302 %define %%MOVDQ %8
303
304 ; find the key data for this round
305 %if (%%ROUND < %%CACHED_KEYS) ; is it cached
306 %assign key (%%ROUND + %%FIRST_KEY)
307 %else
308 ; Load non-cached key %%ROUND data ping-ponging between temp regs if more than one
309 %assign key (current_tmp) ; use the previous loaded key data
310 %if (1 == %%TMP_CNT)
311 %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + %%ROUND*16] ; load the next rounds key data
312 %else
313 %assign next_round (%%ROUND+1)
314 %if (next_round < %%TOT_ROUNDS) ; if more rounds to be done
315 %if (current_tmp == %%FIRST_TMP) ; calc the next temp reg to use
316 %assign current_tmp (current_tmp + 1)
317 %else
318 %assign current_tmp (%%FIRST_TMP)
319 %endif ; (current_tmp == %%FIRST_TMP)
320 %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + next_round*16] ; load the next rounds key data
321
322 %endif ; (%%ROUND < KEY_ROUNDS)
323 %endif ; (1 < %%TMP_CNT)
324 %endif ; (%%ROUND < %%CACHED_KEYS)
325 %endmacro ; SELECT_KEY
326
327
328 ;
329 ; AES_PARALLEL_ENC_BLOCKS
330 ; preloads some data blocks to be worked on
331 ; starts the aes block encoding while loading the other blocks to be done in parallel
332 ; aes block encodes each key round on each block
333 %macro AES_PARALLEL_ENC_BLOCKS 16
334 %define %%KEY_ROUNDS %1
335 %define %%PARALLEL_DATA %2
336 %define %%EARLY_LOADS %3
337 %define %%MOVDQ %4
338 %define %%PXOR %5
339 %define %%AES_DEC %6
340 %define %%AES_DEC_LAST %7
341 %define %%CACHED_KEYS %8
342 %define %%TMP %9
343 %define %%TMP_CNT %10
344 %define %%FIRST_CKEY %11
345 %define %%KEY_DATA %12
346 %define %%FIRST_XDATA %13
347 %define %%IN %14 ; input data
348 %define %%OUT %15 ; output data
349 %define %%IDX %16 ; index into input and output data buffers
350
351 %assign blocks_loaded 0
352
353 %rep %%EARLY_LOADS
354 SCHEDULE_DATA_LOAD %%PARALLEL_DATA, %%EARLY_LOADS, %%MOVDQ, %%IN, %%IDX ; updates blocks_loaded
355 %endrep ; %%EARLY_LOADS
356
357 %assign current_tmp (TMP)
358 INIT_SELECT_KEY %%KEY_ROUNDS, %%CACHED_KEYS, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
359
360 %assign round 0
361 %assign key 0
362 %rep KEY_ROUNDS ; for all key rounds
363 SELECT_KEY round, %%KEY_ROUNDS, %%CACHED_KEYS, %%FIRST_CKEY, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
364
365 %assign i %%FIRST_XDATA
366 %rep %%PARALLEL_DATA ; for each block do the EAS block encode step
367 %if (0 == round)
368 %%PXOR XDATA(i), KEY_REG(key) ; first round's step
369 SCHEDULE_DATA_LOAD %%PARALLEL_DATA, %%EARLY_LOADS, %%MOVDQ, %%IN, %%IDX
370
371 %elif ( (%%KEY_ROUNDS-1) == round )
372 %%AES_DEC_LAST XDATA(i), KEY_REG(key) ; last round's step
373
374 %else
375 %%AES_DEC XDATA(i), KEY_REG(key) ; middle round's (1..last-1) step
376
377 %endif
378 %assign i (i+1)
379 %endrep ;%%PARALLEL_DATA
380 %assign round (round+1)
381 %endrep ;KEY_ROUNDS
382 %endmacro ; AES_PARALLEL_ENC_BLOCKS
383
384
385
386 ;
387 ; AES_ENC_BLOCKS
388 ; load first uncached key into TMP0 (if any)
389 ; AES block encript XDATA(p_first)
390 ; before using uncached key in TMP0, load next key in TMP1
391 ; before using uncached key in TMP1, load next key in TMP0
392 %macro AES_ENC_BLOCKS 11
393 %define %%TOT_ROUNDS %1
394 %define %%ENC_BLOCK %2
395 %define %%TMP %3
396 %define %%TMP_CNT %4
397 %define %%FIRST_CKEY %5
398 %define %%CACHED_KEYS %6
399 %define %%KEY_DATA %7
400 %define %%MOVDQ %8
401 %define %%PXOR %9
402 %define %%AES_ENC %10
403 %define %%AES_ENC_LAST %11
404
405 %assign current_tmp (%%TMP)
406 INIT_SELECT_KEY %%TOT_ROUNDS, %%CACHED_KEYS, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
407
408 %assign round 0
409 %assign key (round + %%FIRST_CKEY)
410 %rep %%TOT_ROUNDS ; for all key rounds
411 ; find the key data for this round
412 SELECT_KEY round, %%TOT_ROUNDS, %%CACHED_KEYS, %%FIRST_CKEY, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
413
414 ; encrypt block
415 %if (0 == round)
416 %%PXOR XDATA(%%ENC_BLOCK), KEY_REG(key) ; round zero step
417 %elif ( (%%TOT_ROUNDS-1) == round )
418 %%AES_ENC_LAST XDATA(%%ENC_BLOCK), KEY_REG(key) ; last round's step
419 %else
420 %%AES_ENC XDATA(%%ENC_BLOCK), KEY_REG(key) ; rounds 1..last-1 step
421 %endif ; (0 == round)
422
423 %assign round (round+1)
424 %endrep ; KEY_ROUNDS
425 %endmacro ; AES_ENC
426
427