]> git.proxmox.com Git - ceph.git/blob - ceph/src/crypto/isa-l/isa-l_crypto/aes/cbc_dec_256_x4_sse.asm
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / crypto / isa-l / isa-l_crypto / aes / cbc_dec_256_x4_sse.asm
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
3 ;
4 ; Redistribution and use in source and binary forms, with or without
5 ; modification, are permitted provided that the following conditions
6 ; are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in
11 ; the documentation and/or other materials provided with the
12 ; distribution.
13 ; * Neither the name of Intel Corporation nor the names of its
14 ; contributors may be used to endorse or promote products derived
15 ; from this software without specific prior written permission.
16 ;
17 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30 ; routine to do AES cbc decrypt on 16n bytes doing AES
31 ; XMM registers are clobbered. Saving/restoring must be done at a higher level
32
33 ; void aes_cbc_dec_256_sse(void *in,
34 ; uint8_t *IV,
35 ; uint8_t keys,
36 ; void *out,
37 ; uint64_t len_bytes);
38 ;
39 ; arg 1: rcx: pointer to input (cipher text)
40 ; arg 2: rdx: pointer to IV
41 ; arg 3: r8: pointer to keys
42 ; arg 4: r9: pointer to output (plain text)
43 ; arg 5: sp: length in bytes (multiple of 16)
44 ;
45
46 %include "reg_sizes.asm"
47
48 %define MOVDQ movdqu
49
50 %ifidn __OUTPUT_FORMAT__, elf64
51 %define IN rdi
52 %define IV rsi
53 %define KEYS rdx
54 %define OUT rcx
55 %define LEN r8
56 %define func(x) x:
57 %define FUNC_SAVE
58 %define FUNC_RESTORE
59 %endif
60
61 %ifidn __OUTPUT_FORMAT__, win64
62 %define IN rcx
63 %define IV rdx
64 %define KEYS r8
65 %define OUT r9
66 %define LEN r10
67 %define PS 8
68 %define stack_size 10*16 + 1*8 ; must be an odd multiple of 8
69 %define arg(x) [rsp + stack_size + PS + PS*x]
70
71 %define func(x) proc_frame x
72 %macro FUNC_SAVE 0
73 alloc_stack stack_size
74 save_xmm128 xmm6, 0*16
75 save_xmm128 xmm7, 1*16
76 save_xmm128 xmm8, 2*16
77 save_xmm128 xmm9, 3*16
78 save_xmm128 xmm10, 4*16
79 save_xmm128 xmm11, 5*16
80 save_xmm128 xmm12, 6*16
81 save_xmm128 xmm13, 7*16
82 save_xmm128 xmm14, 8*16
83 save_xmm128 xmm15, 9*16
84 end_prolog
85 mov LEN, arg(4)
86 %endmacro
87
88 %macro FUNC_RESTORE 0
89 movdqa xmm6, [rsp + 0*16]
90 movdqa xmm7, [rsp + 1*16]
91 movdqa xmm8, [rsp + 2*16]
92 movdqa xmm9, [rsp + 3*16]
93 movdqa xmm10, [rsp + 4*16]
94 movdqa xmm11, [rsp + 5*16]
95 movdqa xmm12, [rsp + 6*16]
96 movdqa xmm13, [rsp + 7*16]
97 movdqa xmm14, [rsp + 8*16]
98 movdqa xmm15, [rsp + 9*16]
99 add rsp, stack_size
100 %endmacro
101 %endif
102
103 ; configuration paramaters for AES-CBC
104 %define KEY_ROUNDS 15
105 %define XMM_USAGE (16)
106 %define EARLY_BLOCKS (4)
107 %define PARALLEL_BLOCKS (11)
108 %define IV_CNT (1)
109
110 ; instruction set specific operation definitions
111 %define MOVDQ movdqu
112 %define PXOR pxor
113 %define AES_DEC aesdec
114 %define AES_DEC_LAST aesdeclast
115
116 %include "cbc_common.asm"
117
118 global aes_cbc_dec_256_sse:function
119 func(aes_cbc_dec_256_sse)
120 FUNC_SAVE
121
122 FILL_KEY_CACHE CKEY_CNT, FIRST_CKEY, KEYS, MOVDQ
123
124 MOVDQ reg(IV_IDX), [IV] ; Load IV for next round of block decrypt
125 mov IDX, 0
126 cmp LEN, PARALLEL_BLOCKS*16
127 jge main_loop ; if enough data blocks remain enter main_loop
128 jmp partials
129
130 main_loop:
131 CBC_DECRYPT_BLOCKS KEY_ROUNDS, PARALLEL_BLOCKS, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN
132 cmp LEN, PARALLEL_BLOCKS*16
133 jge main_loop ; enough blocks to do another full parallel set
134 jz done
135
136 partials: ; fewer than 'PARALLEL_BLOCKS' left do in groups of 4, 2 or 1
137 cmp LEN, 0
138 je done
139 cmp LEN, 4*16
140 jge initial_4
141 cmp LEN, 2*16
142 jge initial_2
143
144 initial_1:
145 CBC_DECRYPT_BLOCKS KEY_ROUNDS, 1, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN
146 jmp done
147
148 initial_2:
149 CBC_DECRYPT_BLOCKS KEY_ROUNDS, 2, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN
150 jz done
151 jmp partials
152
153 initial_4:
154 CBC_DECRYPT_BLOCKS KEY_ROUNDS, 4, EARLY_BLOCKS, MOVDQ, PXOR, AES_DEC, AES_DEC_LAST, CKEY_CNT, TMP, TMP_CNT, FIRST_CKEY, KEYS, FIRST_XDATA, IN, OUT, IDX, LEN
155 jnz partials
156 done:
157 FUNC_RESTORE
158 ret
159
160 endproc_frame