]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2 | ; Copyright(c) 2011-2016 Intel Corporation All rights reserved. | |
3 | ; | |
4 | ; Redistribution and use in source and binary forms, with or without | |
1e59de90 | 5 | ; modification, are permitted provided that the following conditions |
7c673cae FG |
6 | ; are met: |
7 | ; * Redistributions of source code must retain the above copyright | |
8 | ; notice, this list of conditions and the following disclaimer. | |
9 | ; * Redistributions in binary form must reproduce the above copyright | |
10 | ; notice, this list of conditions and the following disclaimer in | |
11 | ; the documentation and/or other materials provided with the | |
12 | ; distribution. | |
13 | ; * Neither the name of Intel Corporation nor the names of its | |
14 | ; contributors may be used to endorse or promote products derived | |
15 | ; from this software without specific prior written permission. | |
16 | ; | |
17 | ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
18 | ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
19 | ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
20 | ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
21 | ; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
22 | ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
23 | ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
24 | ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
25 | ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
26 | ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
27 | ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
28 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
29 | ||
1e59de90 TL |
30 | %ifndef GCM_DEFINES_ASM_INCLUDED |
31 | %define GCM_DEFINES_ASM_INCLUDED | |
32 | ||
7c673cae FG |
33 | ; |
34 | ; Authors: | |
35 | ; Erdinc Ozturk | |
36 | ; Vinodh Gopal | |
37 | ; James Guilford | |
38 | ||
39 | ||
40 | ;;;;;; | |
7c673cae FG |
41 | |
42 | section .data | |
43 | ||
44 | align 16 | |
45 | ||
46 | POLY dq 0x0000000000000001, 0xC200000000000000 | |
1e59de90 TL |
47 | |
48 | align 64 | |
7c673cae | 49 | POLY2 dq 0x00000001C2000000, 0xC200000000000000 |
1e59de90 TL |
50 | dq 0x00000001C2000000, 0xC200000000000000 |
51 | dq 0x00000001C2000000, 0xC200000000000000 | |
52 | dq 0x00000001C2000000, 0xC200000000000000 | |
53 | align 16 | |
7c673cae FG |
54 | TWOONE dq 0x0000000000000001, 0x0000000100000000 |
55 | ||
56 | ; order of these constants should not change. | |
57 | ; more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F | |
58 | ||
1e59de90 | 59 | align 64 |
7c673cae | 60 | SHUF_MASK dq 0x08090A0B0C0D0E0F, 0x0001020304050607 |
1e59de90 TL |
61 | dq 0x08090A0B0C0D0E0F, 0x0001020304050607 |
62 | dq 0x08090A0B0C0D0E0F, 0x0001020304050607 | |
63 | dq 0x08090A0B0C0D0E0F, 0x0001020304050607 | |
64 | ||
7c673cae FG |
65 | SHIFT_MASK dq 0x0706050403020100, 0x0f0e0d0c0b0a0908 |
66 | ALL_F dq 0xffffffffffffffff, 0xffffffffffffffff | |
67 | ZERO dq 0x0000000000000000, 0x0000000000000000 | |
68 | ONE dq 0x0000000000000001, 0x0000000000000000 | |
1e59de90 | 69 | TWO dq 0x0000000000000002, 0x0000000000000000 |
7c673cae | 70 | ONEf dq 0x0000000000000000, 0x0100000000000000 |
1e59de90 TL |
71 | TWOf dq 0x0000000000000000, 0x0200000000000000 |
72 | ||
73 | align 64 | |
74 | ddq_add_1234: | |
75 | dq 0x0000000000000001, 0x0000000000000000 | |
76 | dq 0x0000000000000002, 0x0000000000000000 | |
77 | dq 0x0000000000000003, 0x0000000000000000 | |
78 | dq 0x0000000000000004, 0x0000000000000000 | |
79 | ||
80 | align 64 | |
81 | ddq_add_5678: | |
82 | dq 0x0000000000000005, 0x0000000000000000 | |
83 | dq 0x0000000000000006, 0x0000000000000000 | |
84 | dq 0x0000000000000007, 0x0000000000000000 | |
85 | dq 0x0000000000000008, 0x0000000000000000 | |
86 | ||
87 | align 64 | |
88 | ddq_add_4444: | |
89 | dq 0x0000000000000004, 0x0000000000000000 | |
90 | dq 0x0000000000000004, 0x0000000000000000 | |
91 | dq 0x0000000000000004, 0x0000000000000000 | |
92 | dq 0x0000000000000004, 0x0000000000000000 | |
93 | ||
94 | align 64 | |
95 | ddq_add_8888: | |
96 | dq 0x0000000000000008, 0x0000000000000000 | |
97 | dq 0x0000000000000008, 0x0000000000000000 | |
98 | dq 0x0000000000000008, 0x0000000000000000 | |
99 | dq 0x0000000000000008, 0x0000000000000000 | |
100 | ||
101 | align 64 | |
102 | ddq_addbe_1234: | |
103 | dq 0x0000000000000000, 0x0100000000000000 | |
104 | dq 0x0000000000000000, 0x0200000000000000 | |
105 | dq 0x0000000000000000, 0x0300000000000000 | |
106 | dq 0x0000000000000000, 0x0400000000000000 | |
107 | ||
108 | align 64 | |
109 | ddq_addbe_5678: | |
110 | dq 0x0000000000000000, 0x0500000000000000 | |
111 | dq 0x0000000000000000, 0x0600000000000000 | |
112 | dq 0x0000000000000000, 0x0700000000000000 | |
113 | dq 0x0000000000000000, 0x0800000000000000 | |
114 | ||
115 | align 64 | |
116 | ddq_addbe_4444: | |
117 | dq 0x0000000000000000, 0x0400000000000000 | |
118 | dq 0x0000000000000000, 0x0400000000000000 | |
119 | dq 0x0000000000000000, 0x0400000000000000 | |
120 | dq 0x0000000000000000, 0x0400000000000000 | |
121 | ||
122 | align 64 | |
123 | ddq_addbe_8888: | |
124 | dq 0x0000000000000000, 0x0800000000000000 | |
125 | dq 0x0000000000000000, 0x0800000000000000 | |
126 | dq 0x0000000000000000, 0x0800000000000000 | |
127 | dq 0x0000000000000000, 0x0800000000000000 | |
128 | ||
129 | align 64 | |
130 | byte_len_to_mask_table: | |
131 | dw 0x0000, 0x0001, 0x0003, 0x0007, | |
132 | dw 0x000f, 0x001f, 0x003f, 0x007f, | |
133 | dw 0x00ff, 0x01ff, 0x03ff, 0x07ff, | |
134 | dw 0x0fff, 0x1fff, 0x3fff, 0x7fff, | |
135 | dw 0xffff | |
136 | ||
137 | align 64 | |
138 | byte64_len_to_mask_table: | |
139 | dq 0x0000000000000000, 0x0000000000000001 | |
140 | dq 0x0000000000000003, 0x0000000000000007 | |
141 | dq 0x000000000000000f, 0x000000000000001f | |
142 | dq 0x000000000000003f, 0x000000000000007f | |
143 | dq 0x00000000000000ff, 0x00000000000001ff | |
144 | dq 0x00000000000003ff, 0x00000000000007ff | |
145 | dq 0x0000000000000fff, 0x0000000000001fff | |
146 | dq 0x0000000000003fff, 0x0000000000007fff | |
147 | dq 0x000000000000ffff, 0x000000000001ffff | |
148 | dq 0x000000000003ffff, 0x000000000007ffff | |
149 | dq 0x00000000000fffff, 0x00000000001fffff | |
150 | dq 0x00000000003fffff, 0x00000000007fffff | |
151 | dq 0x0000000000ffffff, 0x0000000001ffffff | |
152 | dq 0x0000000003ffffff, 0x0000000007ffffff | |
153 | dq 0x000000000fffffff, 0x000000001fffffff | |
154 | dq 0x000000003fffffff, 0x000000007fffffff | |
155 | dq 0x00000000ffffffff, 0x00000001ffffffff | |
156 | dq 0x00000003ffffffff, 0x00000007ffffffff | |
157 | dq 0x0000000fffffffff, 0x0000001fffffffff | |
158 | dq 0x0000003fffffffff, 0x0000007fffffffff | |
159 | dq 0x000000ffffffffff, 0x000001ffffffffff | |
160 | dq 0x000003ffffffffff, 0x000007ffffffffff | |
161 | dq 0x00000fffffffffff, 0x00001fffffffffff | |
162 | dq 0x00003fffffffffff, 0x00007fffffffffff | |
163 | dq 0x0000ffffffffffff, 0x0001ffffffffffff | |
164 | dq 0x0003ffffffffffff, 0x0007ffffffffffff | |
165 | dq 0x000fffffffffffff, 0x001fffffffffffff | |
166 | dq 0x003fffffffffffff, 0x007fffffffffffff | |
167 | dq 0x00ffffffffffffff, 0x01ffffffffffffff | |
168 | dq 0x03ffffffffffffff, 0x07ffffffffffffff | |
169 | dq 0x0fffffffffffffff, 0x1fffffffffffffff | |
170 | dq 0x3fffffffffffffff, 0x7fffffffffffffff | |
171 | dq 0xffffffffffffffff | |
172 | ||
173 | align 64 | |
174 | mask_out_top_block: | |
175 | dq 0xffffffffffffffff, 0xffffffffffffffff | |
176 | dq 0xffffffffffffffff, 0xffffffffffffffff | |
177 | dq 0xffffffffffffffff, 0xffffffffffffffff | |
178 | dq 0x0000000000000000, 0x0000000000000000 | |
7c673cae FG |
179 | |
180 | section .text | |
181 | ||
182 | ||
183 | ;;define the fields of gcm_data struct | |
184 | ;typedef struct gcm_data | |
185 | ;{ | |
186 | ; u8 expanded_keys[16*15]; | |
187 | ; u8 shifted_hkey_1[16]; // store HashKey <<1 mod poly here | |
188 | ; u8 shifted_hkey_2[16]; // store HashKey^2 <<1 mod poly here | |
189 | ; u8 shifted_hkey_3[16]; // store HashKey^3 <<1 mod poly here | |
190 | ; u8 shifted_hkey_4[16]; // store HashKey^4 <<1 mod poly here | |
191 | ; u8 shifted_hkey_5[16]; // store HashKey^5 <<1 mod poly here | |
192 | ; u8 shifted_hkey_6[16]; // store HashKey^6 <<1 mod poly here | |
193 | ; u8 shifted_hkey_7[16]; // store HashKey^7 <<1 mod poly here | |
194 | ; u8 shifted_hkey_8[16]; // store HashKey^8 <<1 mod poly here | |
195 | ; u8 shifted_hkey_1_k[16]; // store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes) | |
196 | ; u8 shifted_hkey_2_k[16]; // store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes) | |
197 | ; u8 shifted_hkey_3_k[16]; // store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes) | |
198 | ; u8 shifted_hkey_4_k[16]; // store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes) | |
199 | ; u8 shifted_hkey_5_k[16]; // store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes) | |
200 | ; u8 shifted_hkey_6_k[16]; // store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes) | |
201 | ; u8 shifted_hkey_7_k[16]; // store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes) | |
202 | ; u8 shifted_hkey_8_k[16]; // store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes) | |
203 | ;} gcm_data; | |
204 | ||
1e59de90 | 205 | %ifndef GCM_KEYS_VAES_AVX512_INCLUDED |
7c673cae | 206 | %define HashKey 16*15 ; store HashKey <<1 mod poly here |
1e59de90 | 207 | %define HashKey_1 16*15 ; store HashKey <<1 mod poly here |
7c673cae FG |
208 | %define HashKey_2 16*16 ; store HashKey^2 <<1 mod poly here |
209 | %define HashKey_3 16*17 ; store HashKey^3 <<1 mod poly here | |
210 | %define HashKey_4 16*18 ; store HashKey^4 <<1 mod poly here | |
211 | %define HashKey_5 16*19 ; store HashKey^5 <<1 mod poly here | |
212 | %define HashKey_6 16*20 ; store HashKey^6 <<1 mod poly here | |
213 | %define HashKey_7 16*21 ; store HashKey^7 <<1 mod poly here | |
214 | %define HashKey_8 16*22 ; store HashKey^8 <<1 mod poly here | |
215 | %define HashKey_k 16*23 ; store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes) | |
216 | %define HashKey_2_k 16*24 ; store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes) | |
217 | %define HashKey_3_k 16*25 ; store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes) | |
218 | %define HashKey_4_k 16*26 ; store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes) | |
219 | %define HashKey_5_k 16*27 ; store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes) | |
220 | %define HashKey_6_k 16*28 ; store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes) | |
221 | %define HashKey_7_k 16*29 ; store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes) | |
222 | %define HashKey_8_k 16*30 ; store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes) | |
1e59de90 TL |
223 | %endif |
224 | ||
225 | %define AadHash 16*0 ; store current Hash of data which has been input | |
226 | %define AadLen 16*1 ; store length of input data which will not be encrypted or decrypted | |
227 | %define InLen (16*1)+8 ; store length of input data which will be encrypted or decrypted | |
228 | %define PBlockEncKey 16*2 ; encryption key for the partial block at the end of the previous update | |
229 | %define OrigIV 16*3 ; input IV | |
230 | %define CurCount 16*4 ; Current counter for generation of encryption key | |
231 | %define PBlockLen 16*5 ; length of partial block at the end of the previous update | |
7c673cae FG |
232 | |
233 | %define reg(q) xmm %+ q | |
1e59de90 | 234 | %define arg(x) [r14 + STACK_OFFSET + 8*x] |
7c673cae FG |
235 | |
236 | ||
237 | ||
238 | ||
239 | %ifnidn __OUTPUT_FORMAT__, elf64 | |
240 | %xdefine arg1 rcx | |
241 | %xdefine arg2 rdx | |
242 | %xdefine arg3 r8 | |
243 | %xdefine arg4 r9 | |
1e59de90 | 244 | %xdefine arg5 rsi ;[r14 + STACK_OFFSET + 8*5] - need push and load |
7c673cae FG |
245 | %xdefine arg6 [r14 + STACK_OFFSET + 8*6] |
246 | %xdefine arg7 [r14 + STACK_OFFSET + 8*7] | |
247 | %xdefine arg8 [r14 + STACK_OFFSET + 8*8] | |
248 | %xdefine arg9 [r14 + STACK_OFFSET + 8*9] | |
1e59de90 | 249 | %xdefine arg10 [r14 + STACK_OFFSET + 8*10] |
7c673cae FG |
250 | |
251 | %else | |
252 | %xdefine arg1 rdi | |
253 | %xdefine arg2 rsi | |
254 | %xdefine arg3 rdx | |
255 | %xdefine arg4 rcx | |
256 | %xdefine arg5 r8 | |
257 | %xdefine arg6 r9 | |
258 | %xdefine arg7 [r14 + STACK_OFFSET + 8*1] | |
259 | %xdefine arg8 [r14 + STACK_OFFSET + 8*2] | |
260 | %xdefine arg9 [r14 + STACK_OFFSET + 8*3] | |
1e59de90 | 261 | %xdefine arg10 [r14 + STACK_OFFSET + 8*4] |
7c673cae FG |
262 | %endif |
263 | ||
264 | %ifdef NT_LDST | |
265 | %define NT_LD | |
266 | %define NT_ST | |
267 | %endif | |
268 | ||
269 | ;;; Use Non-temporal load/stor | |
270 | %ifdef NT_LD | |
1e59de90 TL |
271 | %define XLDR movntdqa |
272 | %define VXLDR vmovntdqa | |
273 | %define VX512LDR vmovntdqa | |
7c673cae | 274 | %else |
1e59de90 TL |
275 | %define XLDR movdqu |
276 | %define VXLDR vmovdqu | |
277 | %define VX512LDR vmovdqu8 | |
7c673cae FG |
278 | %endif |
279 | ||
280 | ;;; Use Non-temporal load/stor | |
281 | %ifdef NT_ST | |
1e59de90 TL |
282 | %define XSTR movntdq |
283 | %define VXSTR vmovntdq | |
284 | %define VX512STR vmovntdq | |
7c673cae | 285 | %else |
1e59de90 TL |
286 | %define XSTR movdqu |
287 | %define VXSTR vmovdqu | |
288 | %define VX512STR vmovdqu8 | |
7c673cae | 289 | %endif |
1e59de90 TL |
290 | |
291 | %endif ; GCM_DEFINES_ASM_INCLUDED |