]> git.proxmox.com Git - ceph.git/blob - ceph/src/crypto/isa-l/isa-l_crypto/aes/cbc_common.asm
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / aes / cbc_common.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 ;
31 ; the following defines control the operation of the macros below and
32 ; need to be defines in the including file
33 ; KEY_ROUNDS - number of key rounds needed based on key length: 128bit - 11, 192bit - 13 or 256bit - 15
34 ; EARLY_BLOCKS - number of data block to load before starting computations
35 ; PARALLEL_BLOCKS - number of blocks of data to process in parallel also the number of xmm regs to reserve for data
36 ; IV_CNT - number of xmm regs to use for IV data valid values of 0 or 1
37 ; TMP_CNT - number of tmp xmm register to reserve
38 ; XMM_USAGE - number of xmm registers to use. must be at least the same as PARALLEL_BLOCKS + 2
39 ;
40
41 %include "reg_sizes.asm"
42
43 [bits 64]
44 default rel
45 section .text
46
47 ;
48 ; the following instructions set specific macros must be defined in the user file
49 ; to make use of the AES macros below
50 ; MOVDQ - move from memory to xmm reg
51 ; PXOR - XOR of two xmm registers pxor
52 ; AES_DEC - AES block decode for early key rounds
53 ; AES_DEC_LAST - AES block decode for last key round
54 ; or
55 ; AES_ENC - AES block encode for early key rounds
56 ; AES_ENC_LAST - AES block encode for last key round
57
58 ; Three usages of xmm regs: key round cache, blocks data and one temp
59 ; CKEY_CNT are (number of xmm regs) - PARALLEL_BLOCKS - IV holder - 2 TMP mmx reg
60 %assign FIRST_XDATA (0)
61 %assign IV_IDX (FIRST_XDATA + PARALLEL_BLOCKS)
62 %ifndef IV_CNT
63 %define IV_CNT (1)
64 %endif
65 %assign TMP (IV_IDX + IV_CNT)
66 %assign TMP_CNT (2)
67 %assign FIRST_CKEY (TMP + TMP_CNT)
68 %assign CKEY_CNT (XMM_USAGE - (PARALLEL_BLOCKS + IV_CNT + TMP_CNT))
69
70 ; Abstract xmm register usages that identify the expected contents of the register
71 %define reg(i) xmm %+ i
72 %define XDATA(i) xmm %+ i
73 %define KEY_REG(i) xmm %+ i
74 %define IV_REG(i) xmm %+ i
75
76 %define IDX rax
77
78
79
80
81 ;
82 ;
83 ; AES CBC ENCODE MACROS
84 ;
85 ;
86
87 ;
88 ; CBC_DECRYPT_BLOCKS
89 ; Decrypts a number of blocks using AES_PARALLEL_ENC_BLOCKS macro
90 ; Finalized the decryption and saves results in the output
91 ; places last last buffers crypto text in IV for next buffer
92 ; updates the index and number of bytes left
93 ;
94 %macro CBC_DECRYPT_BLOCKS 17
95 %define %%TOT_ROUNDS %1
96 %define %%num_blocks %2 ; can be 0..13
97 %define %%EARLY_LOADS %3 ; number of data blocks to laod before processing
98 %define %%MOVDQ %4
99 %define %%PXOR %5
100 %define %%AES_DEC %6
101 %define %%AES_DEC_LAST %7
102 %define %%CACHED_KEYS %8 ; number of key data cached in xmm regs
103 %define %%TMP %9
104 %define %%TMP_CNT %10
105 %define %%FIRST_CKEY %11
106 %define %%KEY_DATA %12
107 %define %%FIRST_XDATA %13
108 %define %%IN %14 ; input data
109 %define %%OUT %15 ; output data
110 %define %%IDX %16 ; index into input and output data buffers
111 %define %%LEN %17
112
113 AES_PARALLEL_ENC_BLOCKS %%TOT_ROUNDS, %%num_blocks, %%EARLY_LOADS, %%MOVDQ, %%PXOR, %%AES_DEC, %%AES_DEC_LAST, %%CACHED_KEYS, %%TMP, %%TMP_CNT, %%FIRST_CKEY, %%KEY_DATA, %%FIRST_XDATA, %%IN, %%OUT, %%IDX
114
115 ;
116 ; XOR the result of each block's decrypt with the previous block's cypher text (C)
117 ;
118 %assign i 0
119 %rep (%%num_blocks)
120 %%PXOR XDATA(i), XDATA(IV_IDX) ; XOR result with previous block's C
121 %%MOVDQ [%%OUT + %%IDX + i*16], XDATA(i) ; save plain text to out
122 %%MOVDQ XDATA(IV_IDX), [%%IN + IDX + i*16] ; load IV with current block C
123 %assign i (i+1)
124 %endrep
125
126 add %%IDX, %%num_blocks*16
127 sub %%LEN, %%num_blocks*16
128 %endmacro
129
130
131 ;
132 ; CBC_ENC_INIT
133 ; XOR first data block with the IV data
134 %macro CBC_ENC_INIT 7
135 %define %%P_FIRST %1
136 %define %%IV_IDX %2
137 %define %%MOVDQ %3
138 %define %%PXOR %4
139 %define %%IV %5
140 %define %%IN %6 ; input data
141 %define %%IDX %7 ; index into input and output data buffers
142
143 %%MOVDQ XDATA(%%P_FIRST), [%%IN + %%IDX + 0*16]
144 %%MOVDQ reg(%%IV_IDX), [%%IV]
145 %%PXOR XDATA(%%P_FIRST), reg(%%IV_IDX)
146 %endmacro
147
148 ;
149 ; assumptions:
150 ; LEN is length of data remaining
151 ; IDX is offset into the data buffer
152 ;
153 ; subloops
154 ; if data > 16 load next block into a next XDATA reg (XDATA(p_next))
155 ; load first uncached key into TMP0 (if any)
156 ; AES block encript XDATA(P_FIRST)
157 ; if data > 16 XOR next2 block (XDATA(p_next)) with current (XDATA(P_FIRST))
158 ; save current (XDATA(P_FIRST))
159 ; update indexes for P_FIRST
160 ; end if data zero
161 ;
162 %macro CBC_ENC_SUBLOOP 17
163 %define %%TOT_ROUNDS %1
164 %define %%BLOCKS %2 ; can be 1...14
165 %define %%START_DATA %3
166 %define %%MOVDQ %4
167 %define %%PXOR %5
168 %define %%AES_DEC %6
169 %define %%AES_DEC_LAST %7
170 %define %%TMP %8
171 %define %%TMP_CNT %9
172 %define %%FIRST_CKEY %10
173 %define %%CKEY_CNT %11
174 %define %%KEYS %12
175 %define %%CACHED_KEYS %13
176 %define %%IN %14 ; input data
177 %define %%OUT %15 ; output data
178 %define %%IDX %16 ; index into input and output data buffers
179 %define %%LEN %17
180
181 %assign this_blk 0
182 %assign next_blk 1
183 %assign p_first %%START_DATA
184 %assign p_next (p_first+1)
185 ; for number of blocks to be processed in a loop
186 %assign blk 1
187 %rep %%BLOCKS
188 ; if data > 16 load next block into a next XDATA reg (XDATA(p_next))
189 cmp %%LEN, 16
190 %push skip_read
191 je %$skip_read_next
192 %%MOVDQ XDATA(p_next), [%%IN + %%IDX + next_blk*16]
193 %$skip_read_next:
194 %pop
195
196 AES_ENC_BLOCKS %%TOT_ROUNDS, p_first, %%TMP, %%TMP_CNT, %%FIRST_CKEY, %%CKEY_CNT, %%KEYS, %%MOVDQ, %%PXOR, %%AES_DEC, %%AES_DEC_LAST
197
198 ; if data > 16 XOR next2 block (XDATA(p_next)) with current (XDATA(p_first))
199 cmp %%LEN, 16
200 %push skip_next
201 je %$skip_next_blk_start
202 %%PXOR XDATA(p_next), XDATA(p_first)
203 %$skip_next_blk_start:
204 %pop
205
206 ; save current (XDATA(p_first))
207 %%MOVDQ [%%OUT + %%IDX + this_blk*16], XDATA(p_first)
208 ; update indexes for p_first
209 add %%IDX, 16
210 sub %%LEN, 16
211
212 %if (blk < %%BLOCKS) ; only insert jz if NOT last block
213 ; end if data zero
214 jz %%END_CBC_ENC_SUBLOOP
215 %endif ; (p_next < %%BLOCKS)
216
217 %assign p_first (p_next)
218 %assign blk (blk+1)
219 %if (blk == %%BLOCKS) ; the last rep loop's read of the next block needs to be into START_DATA
220 %assign p_next (%%START_DATA)
221 %elif (1 == %%BLOCKS)
222 %%MOVDQ XDATA(%%START_DATA), XDATA(p_next)
223 %else
224 %assign p_next (p_next+1)
225 %endif
226 %endrep ; %%BLOCKS
227
228 %%END_CBC_ENC_SUBLOOP:
229 %endm ; CBC_ENC_SUBLOOP
230
231
232 ;
233 ;
234 ; AES BLOCK ENCODE MACROS
235 ;
236 ;
237
238 ;
239 ; FILL_KEY_CACHE
240 ; Load key data into the cache key xmm regs
241 %macro FILL_KEY_CACHE 4
242 %define %%CACHED_KEYS %1
243 %define %%CKEY_START %2
244 %define %%KEY_DATA %3
245 %define %%MOVDQ %4
246
247 %assign rnd 0
248 %rep KEY_ROUNDS
249 %if (rnd < %%CACHED_KEYS) ; find the round's key data
250 %assign c (rnd + %%CKEY_START)
251 %%MOVDQ KEY_REG(c), [%%KEY_DATA + rnd*16] ;load sub key into an available register
252 %endif
253 %assign rnd (rnd+1)
254 %endrep
255 %endmacro
256
257 ;
258 ; SCHEDULE_DATA_LOAD
259 ; pre-loades message data into xmm regs
260 ; updates global 'blocks_loaded' that tracks which data blocks have been loaded
261 ; 'blocks_loaded' is an in/out global and must be declared in the using macro or function
262 %macro SCHEDULE_DATA_LOAD 5
263 %define %%PARALLEL_DATA %1
264 %define %%EARLY_LOADS %2
265 %define %%MOVDQ %3
266 %define %%IN %4
267 %define %%IDX %5
268
269 %if (blocks_loaded < %%PARALLEL_DATA)
270 ; load cipher text
271 %%MOVDQ XDATA(blocks_loaded), [%%IN + %%IDX + blocks_loaded*16]
272 %assign blocks_loaded (blocks_loaded+1)
273 %endif ; (blocks_loaded < %%PARALLEL_DATA)
274 %endmacro ; SCHEDULED_EARLY_DATA_LOADS
275
276 ;
277 ; INIT_SELECT_KEY
278 ; determine which xmm reg holds the key data needed or loades it into the temp register if not cached
279 ; 'current_tmp' is an in/out global and must be declared in the using macro or function
280 %macro INIT_SELECT_KEY 6
281 %define %%TOT_ROUNDS %1
282 %define %%CACHED_KEYS %2
283 %define %%KEY_DATA %3
284 %define %%FIRST_TMP %4
285 %define %%TMP_CNT %5
286 %define %%MOVDQ %6
287
288 %assign current_tmp (%%FIRST_TMP)
289 %if (%%TOT_ROUNDS > %%CACHED_KEYS) ; load the first uncached key into temp reg
290 %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + %%CACHED_KEYS*16]
291 %endif ; (KEY_ROUNDS > CKEY_CNT)
292 %endmacro ; SELECT_KEY
293
294 ;
295 ; SELECT_KEY
296 ; determine which xmm reg holds the key data needed or loades it into the temp register if not cached
297 ; 'current_tmp' is an in/out global and must be declared in the using macro or function
298 %macro SELECT_KEY 8
299 %define %%ROUND %1
300 %define %%TOT_ROUNDS %2
301 %define %%CACHED_KEYS %3
302 %define %%FIRST_KEY %4
303 %define %%KEY_DATA %5
304 %define %%FIRST_TMP %6
305 %define %%TMP_CNT %7
306 %define %%MOVDQ %8
307
308 ; find the key data for this round
309 %if (%%ROUND < %%CACHED_KEYS) ; is it cached
310 %assign key (%%ROUND + %%FIRST_KEY)
311 %else
312 ; Load non-cached key %%ROUND data ping-ponging between temp regs if more than one
313 %assign key (current_tmp) ; use the previous loaded key data
314 %if (1 == %%TMP_CNT)
315 %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + %%ROUND*16] ; load the next rounds key data
316 %else
317 %assign next_round (%%ROUND+1)
318 %if (next_round < %%TOT_ROUNDS) ; if more rounds to be done
319 %if (current_tmp == %%FIRST_TMP) ; calc the next temp reg to use
320 %assign current_tmp (current_tmp + 1)
321 %else
322 %assign current_tmp (%%FIRST_TMP)
323 %endif ; (current_tmp == %%FIRST_TMP)
324 %%MOVDQ KEY_REG(current_tmp), [%%KEY_DATA + next_round*16] ; load the next rounds key data
325
326 %endif ; (%%ROUND < KEY_ROUNDS)
327 %endif ; (1 < %%TMP_CNT)
328 %endif ; (%%ROUND < %%CACHED_KEYS)
329 %endmacro ; SELECT_KEY
330
331
332 ;
333 ; AES_PARALLEL_ENC_BLOCKS
334 ; preloads some data blocks to be worked on
335 ; starts the aes block encoding while loading the other blocks to be done in parallel
336 ; aes block encodes each key round on each block
337 %macro AES_PARALLEL_ENC_BLOCKS 16
338 %define %%KEY_ROUNDS %1
339 %define %%PARALLEL_DATA %2
340 %define %%EARLY_LOADS %3
341 %define %%MOVDQ %4
342 %define %%PXOR %5
343 %define %%AES_DEC %6
344 %define %%AES_DEC_LAST %7
345 %define %%CACHED_KEYS %8
346 %define %%TMP %9
347 %define %%TMP_CNT %10
348 %define %%FIRST_CKEY %11
349 %define %%KEY_DATA %12
350 %define %%FIRST_XDATA %13
351 %define %%IN %14 ; input data
352 %define %%OUT %15 ; output data
353 %define %%IDX %16 ; index into input and output data buffers
354
355 %assign blocks_loaded 0
356
357 %rep %%EARLY_LOADS
358 SCHEDULE_DATA_LOAD %%PARALLEL_DATA, %%EARLY_LOADS, %%MOVDQ, %%IN, %%IDX ; updates blocks_loaded
359 %endrep ; %%EARLY_LOADS
360
361 %assign current_tmp (TMP)
362 INIT_SELECT_KEY %%KEY_ROUNDS, %%CACHED_KEYS, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
363
364 %assign round 0
365 %assign key 0
366 %rep KEY_ROUNDS ; for all key rounds
367 SELECT_KEY round, %%KEY_ROUNDS, %%CACHED_KEYS, %%FIRST_CKEY, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
368
369 %assign i %%FIRST_XDATA
370 %rep %%PARALLEL_DATA ; for each block do the EAS block encode step
371 %if (0 == round)
372 %%PXOR XDATA(i), KEY_REG(key) ; first round's step
373 SCHEDULE_DATA_LOAD %%PARALLEL_DATA, %%EARLY_LOADS, %%MOVDQ, %%IN, %%IDX
374
375 %elif ( (%%KEY_ROUNDS-1) == round )
376 %%AES_DEC_LAST XDATA(i), KEY_REG(key) ; last round's step
377
378 %else
379 %%AES_DEC XDATA(i), KEY_REG(key) ; middle round's (1..last-1) step
380
381 %endif
382 %assign i (i+1)
383 %endrep ;%%PARALLEL_DATA
384 %assign round (round+1)
385 %endrep ;KEY_ROUNDS
386 %endmacro ; AES_PARALLEL_ENC_BLOCKS
387
388
389
390 ;
391 ; AES_ENC_BLOCKS
392 ; load first uncached key into TMP0 (if any)
393 ; AES block encript XDATA(p_first)
394 ; before using uncached key in TMP0, load next key in TMP1
395 ; before using uncached key in TMP1, load next key in TMP0
396 %macro AES_ENC_BLOCKS 11
397 %define %%TOT_ROUNDS %1
398 %define %%ENC_BLOCK %2
399 %define %%TMP %3
400 %define %%TMP_CNT %4
401 %define %%FIRST_CKEY %5
402 %define %%CACHED_KEYS %6
403 %define %%KEY_DATA %7
404 %define %%MOVDQ %8
405 %define %%PXOR %9
406 %define %%AES_ENC %10
407 %define %%AES_ENC_LAST %11
408
409 %assign current_tmp (%%TMP)
410 INIT_SELECT_KEY %%TOT_ROUNDS, %%CACHED_KEYS, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
411
412 %assign round 0
413 %assign key (round + %%FIRST_CKEY)
414 %rep %%TOT_ROUNDS ; for all key rounds
415 ; find the key data for this round
416 SELECT_KEY round, %%TOT_ROUNDS, %%CACHED_KEYS, %%FIRST_CKEY, %%KEY_DATA, %%TMP, %%TMP_CNT, %%MOVDQ
417
418 ; encrypt block
419 %if (0 == round)
420 %%PXOR XDATA(%%ENC_BLOCK), KEY_REG(key) ; round zero step
421 %elif ( (%%TOT_ROUNDS-1) == round )
422 %%AES_ENC_LAST XDATA(%%ENC_BLOCK), KEY_REG(key) ; last round's step
423 %else
424 %%AES_ENC XDATA(%%ENC_BLOCK), KEY_REG(key) ; rounds 1..last-1 step
425 %endif ; (0 == round)
426
427 %assign round (round+1)
428 %endrep ; KEY_ROUNDS
429 %endmacro ; AES_ENC
430
431