]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | ;; |
2 | ;; Copyright (c) 2012-2018, Intel Corporation | |
3 | ;; | |
4 | ;; Redistribution and use in source and binary forms, with or without | |
5 | ;; modification, are permitted provided that the following conditions are met: | |
6 | ;; | |
7 | ;; * Redistributions of source code must retain the above copyright notice, | |
8 | ;; this list of conditions and the following disclaimer. | |
9 | ;; * Redistributions in binary form must reproduce the above copyright | |
10 | ;; notice, this list of conditions and the following disclaimer in the | |
11 | ;; documentation and/or other materials provided with the distribution. | |
12 | ;; * Neither the name of Intel Corporation nor the names of its contributors | |
13 | ;; may be used to endorse or promote products derived from this software | |
14 | ;; without specific prior written permission. | |
15 | ;; | |
16 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
19 | ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
20 | ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
22 | ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
23 | ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
24 | ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
25 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 | ;; | |
27 | ||
28 | ; | |
29 | ; Authors: | |
30 | ; Erdinc Ozturk | |
31 | ; Vinodh Gopal | |
32 | ; James Guilford | |
33 | ||
34 | section .data | |
9f95a23c | 35 | default rel |
11fdf7f2 TL |
36 | |
37 | align 16 | |
9f95a23c | 38 | POLY: dq 0x0000000000000001, 0xC200000000000000 |
11fdf7f2 | 39 | |
9f95a23c TL |
40 | align 64 |
41 | POLY2: | |
42 | dq 0x00000001C2000000, 0xC200000000000000 | |
43 | dq 0x00000001C2000000, 0xC200000000000000 | |
44 | dq 0x00000001C2000000, 0xC200000000000000 | |
45 | dq 0x00000001C2000000, 0xC200000000000000 | |
11fdf7f2 | 46 | |
9f95a23c TL |
47 | align 16 |
48 | TWOONE: dq 0x0000000000000001, 0x0000000100000000 | |
49 | ||
50 | ;;; @note Order of these constants should not change. | |
51 | ;;; More specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F | |
52 | align 64 | |
53 | SHUF_MASK: | |
54 | dq 0x08090A0B0C0D0E0F, 0x0001020304050607 | |
55 | dq 0x08090A0B0C0D0E0F, 0x0001020304050607 | |
56 | dq 0x08090A0B0C0D0E0F, 0x0001020304050607 | |
57 | dq 0x08090A0B0C0D0E0F, 0x0001020304050607 | |
58 | ||
59 | align 16 | |
60 | SHIFT_MASK: | |
61 | dq 0x0706050403020100, 0x0f0e0d0c0b0a0908 | |
62 | ||
63 | ALL_F: | |
64 | dq 0xffffffffffffffff, 0xffffffffffffffff | |
65 | ||
66 | ZERO: | |
67 | dq 0x0000000000000000, 0x0000000000000000 | |
68 | ||
69 | align 16 | |
70 | ONE: | |
71 | dq 0x0000000000000001, 0x0000000000000000 | |
72 | ||
73 | align 16 | |
74 | TWO: | |
75 | dq 0x0000000000000002, 0x0000000000000000 | |
76 | ||
77 | align 16 | |
78 | ONEf: | |
79 | dq 0x0000000000000000, 0x0100000000000000 | |
11fdf7f2 | 80 | |
9f95a23c TL |
81 | align 16 |
82 | TWOf: | |
83 | dq 0x0000000000000000, 0x0200000000000000 | |
84 | ||
85 | align 64 | |
86 | ddq_add_5678: | |
87 | dq 0x0000000000000005, 0x0000000000000000 | |
88 | dq 0x0000000000000006, 0x0000000000000000 | |
89 | dq 0x0000000000000007, 0x0000000000000000 | |
90 | dq 0x0000000000000008, 0x0000000000000000 | |
91 | ||
92 | align 64 | |
93 | ddq_add_1234: | |
94 | dq 0x0000000000000001, 0x0000000000000000 | |
95 | dq 0x0000000000000002, 0x0000000000000000 | |
96 | dq 0x0000000000000003, 0x0000000000000000 | |
97 | dq 0x0000000000000004, 0x0000000000000000 | |
98 | ||
99 | align 64 | |
100 | ddq_add_4444: | |
101 | dq 0x0000000000000004, 0x0000000000000000 | |
102 | dq 0x0000000000000004, 0x0000000000000000 | |
103 | dq 0x0000000000000004, 0x0000000000000000 | |
104 | dq 0x0000000000000004, 0x0000000000000000 | |
105 | ||
106 | align 64 | |
107 | ddq_addbe_4444: | |
108 | dq 0x0000000000000000, 0x0400000000000000 | |
109 | dq 0x0000000000000000, 0x0400000000000000 | |
110 | dq 0x0000000000000000, 0x0400000000000000 | |
111 | dq 0x0000000000000000, 0x0400000000000000 | |
112 | ||
113 | align 64 | |
114 | ddq_add_8888: | |
115 | dq 0x0000000000000008, 0x0000000000000000 | |
116 | dq 0x0000000000000008, 0x0000000000000000 | |
117 | dq 0x0000000000000008, 0x0000000000000000 | |
118 | dq 0x0000000000000008, 0x0000000000000000 | |
119 | ||
120 | align 64 | |
121 | ddq_addbe_8888: | |
122 | dq 0x0000000000000000, 0x0800000000000000 | |
123 | dq 0x0000000000000000, 0x0800000000000000 | |
124 | dq 0x0000000000000000, 0x0800000000000000 | |
125 | dq 0x0000000000000000, 0x0800000000000000 | |
126 | ||
127 | align 64 | |
128 | index_to_lane4: | |
129 | dq 0x0000000000000000, 0x0000000000000001 | |
130 | dq 0x0000000000000002, 0x0000000000000003 | |
131 | dq 0x0000000000000000, 0x0000000000000000 | |
132 | dq 0x0000000000000000, 0x0000000000000000 | |
133 | ||
134 | align 64 | |
135 | byte_len_to_mask_table: | |
136 | dw 0x0000, 0x0001, 0x0003, 0x0007, | |
137 | dw 0x000f, 0x001f, 0x003f, 0x007f, | |
138 | dw 0x00ff, 0x01ff, 0x03ff, 0x07ff, | |
139 | dw 0x0fff, 0x1fff, 0x3fff, 0x7fff, | |
140 | dw 0xffff | |
141 | ||
142 | ||
143 | ;;; @note these 2 need to be next one another | |
144 | ;;; - they are used to map lane index onto coresponding bit mask and | |
145 | ;;; NOT version of the bitmask | |
146 | index_to_lane4_mask: | |
147 | dw 0x0001, 0x0002, 0x0004, 0x0008 | |
148 | index_to_lane4_not_mask: | |
149 | dw 0x000e, 0x000d, 0x000b, 0x0007 | |
11fdf7f2 TL |
150 | |
151 | section .text | |
152 | ||
153 | ;;define the fields of gcm_key_data struct | |
154 | ;; struct gcm_key_data { | |
155 | ;; uint8_t expanded_keys[GCM_ENC_KEY_LEN * GCM_KEY_SETS]; | |
9f95a23c | 156 | ;; uint8_t padding[GCM_ENC_KEY_LEN]; |
11fdf7f2 TL |
157 | ;; uint8_t shifted_hkey_1[GCM_ENC_KEY_LEN]; // store HashKey <<1 mod poly here |
158 | ;; uint8_t shifted_hkey_2[GCM_ENC_KEY_LEN]; // store HashKey^2 <<1 mod poly here | |
159 | ;; uint8_t shifted_hkey_3[GCM_ENC_KEY_LEN]; // store HashKey^3 <<1 mod poly here | |
160 | ;; uint8_t shifted_hkey_4[GCM_ENC_KEY_LEN]; // store HashKey^4 <<1 mod poly here | |
161 | ;; uint8_t shifted_hkey_5[GCM_ENC_KEY_LEN]; // store HashKey^5 <<1 mod poly here | |
162 | ;; uint8_t shifted_hkey_6[GCM_ENC_KEY_LEN]; // store HashKey^6 <<1 mod poly here | |
163 | ;; uint8_t shifted_hkey_7[GCM_ENC_KEY_LEN]; // store HashKey^7 <<1 mod poly here | |
164 | ;; uint8_t shifted_hkey_8[GCM_ENC_KEY_LEN]; // store HashKey^8 <<1 mod poly here | |
165 | ;; uint8_t shifted_hkey_1_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes) | |
166 | ;; uint8_t shifted_hkey_2_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes) | |
167 | ;; uint8_t shifted_hkey_3_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes) | |
168 | ;; uint8_t shifted_hkey_4_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes) | |
169 | ;; uint8_t shifted_hkey_5_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes) | |
170 | ;; uint8_t shifted_hkey_6_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes) | |
171 | ;; uint8_t shifted_hkey_7_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes) | |
172 | ;; uint8_t shifted_hkey_8_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes) | |
173 | ;; } | |
9f95a23c TL |
174 | %define Padding (16*15) |
175 | %define HashKey_8 (16*16) ; store HashKey^8 <<1 mod poly here | |
176 | %define HashKey_7 (16*17) ; store HashKey^7 <<1 mod poly here | |
177 | %define HashKey_6 (16*18) ; store HashKey^6 <<1 mod poly here | |
11fdf7f2 | 178 | %define HashKey_5 (16*19) ; store HashKey^5 <<1 mod poly here |
9f95a23c TL |
179 | %define HashKey_4 (16*20) ; store HashKey^4 <<1 mod poly here |
180 | %define HashKey_3 (16*21) ; store HashKey^3 <<1 mod poly here | |
181 | %define HashKey_2 (16*22) ; store HashKey^2 <<1 mod poly here | |
182 | %define HashKey_1 (16*23) ; store HashKey <<1 mod poly here | |
183 | %define HashKey (16*23) ; store HashKey <<1 mod poly here | |
184 | %define HashKey_k (16*24) ; store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes) | |
185 | %define HashKey_1_k (16*24) ; store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes) | |
186 | %define HashKey_2_k (16*25) ; store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes) | |
187 | %define HashKey_3_k (16*26) ; store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes) | |
188 | %define HashKey_4_k (16*27) ; store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes) | |
189 | %define HashKey_5_k (16*28) ; store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes) | |
190 | %define HashKey_6_k (16*29) ; store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes) | |
191 | %define HashKey_7_k (16*30) ; store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes) | |
192 | %define HashKey_8_k (16*31) ; store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes) | |
11fdf7f2 TL |
193 | |
194 | ;;define the fields of gcm_context_data struct | |
195 | ;; struct gcm_context_data { | |
196 | ;; // init, update and finalize context data | |
197 | ;; uint8_t aad_hash[GCM_BLOCK_LEN]; | |
198 | ;; uint64_t aad_length; | |
199 | ;; uint64_t in_length; | |
200 | ;; uint8_t partial_block_enc_key[GCM_BLOCK_LEN]; | |
201 | ;; uint8_t orig_IV[GCM_BLOCK_LEN]; | |
202 | ;; uint8_t current_counter[GCM_BLOCK_LEN]; | |
203 | ;; uint64_t partial_block_length; | |
204 | ;; }; | |
205 | ||
206 | %define AadHash (16*0) ; store current Hash of data which has been input | |
207 | %define AadLen (16*1) ; store length of input data which will not be encrypted or decrypted | |
208 | %define InLen ((16*1)+8); store length of input data which will be encrypted or decrypted | |
209 | %define PBlockEncKey (16*2) ; encryption key for the partial block at the end of the previous update | |
210 | %define OrigIV (16*3) ; input IV | |
211 | %define CurCount (16*4) ; Current counter for generation of encryption key | |
212 | %define PBlockLen (16*5) ; length of partial block at the end of the previous update | |
213 | ||
214 | %define reg(q) xmm %+ q | |
215 | ||
216 | %ifdef WIN_ABI | |
217 | %xdefine arg1 rcx | |
218 | %xdefine arg2 rdx | |
219 | %xdefine arg3 r8 | |
220 | %xdefine arg4 r9 | |
221 | %xdefine arg5 qword [r14 + STACK_OFFSET + 8*5] | |
222 | %xdefine arg6 qword [r14 + STACK_OFFSET + 8*6] | |
223 | %xdefine arg7 qword [r14 + STACK_OFFSET + 8*7] | |
224 | %xdefine arg8 qword [r14 + STACK_OFFSET + 8*8] | |
225 | %xdefine arg9 qword [r14 + STACK_OFFSET + 8*9] | |
226 | %xdefine arg10 qword [r14 + STACK_OFFSET + 8*10] | |
227 | %else | |
228 | %xdefine arg1 rdi | |
229 | %xdefine arg2 rsi | |
230 | %xdefine arg3 rdx | |
231 | %xdefine arg4 rcx | |
232 | %xdefine arg5 r8 | |
233 | %xdefine arg6 r9 | |
234 | %xdefine arg7 [r14 + STACK_OFFSET + 8*1] | |
235 | %xdefine arg8 [r14 + STACK_OFFSET + 8*2] | |
236 | %xdefine arg9 [r14 + STACK_OFFSET + 8*3] | |
237 | %xdefine arg10 [r14 + STACK_OFFSET + 8*4] | |
238 | %endif | |
239 | ||
240 | %ifdef NT_LDST | |
241 | %define NT_LD | |
242 | %define NT_ST | |
243 | %endif | |
244 | ||
245 | ;;; Use Non-temporal load/stor | |
246 | %ifdef NT_LD | |
9f95a23c TL |
247 | %define XLDR movntdqa |
248 | %define VXLDR vmovntdqa | |
249 | %define VX512LDR vmovntdqa | |
11fdf7f2 | 250 | %else |
9f95a23c TL |
251 | %define XLDR movdqu |
252 | %define VXLDR vmovdqu | |
253 | %define VX512LDR vmovdqu64 | |
11fdf7f2 TL |
254 | %endif |
255 | ||
256 | ;;; Use Non-temporal load/stor | |
257 | %ifdef NT_ST | |
9f95a23c TL |
258 | %define XSTR movntdq |
259 | %define VXSTR vmovntdq | |
260 | %define VX512STR vmovntdq | |
11fdf7f2 | 261 | %else |
9f95a23c TL |
262 | %define XSTR movdqu |
263 | %define VXSTR vmovdqu | |
264 | %define VX512STR vmovdqu64 | |
11fdf7f2 | 265 | %endif |