2 ;; Copyright (c) 2012-2018, Intel Corporation
4 ;; Redistribution and use in source and binary forms, with or without
5 ;; modification, are permitted provided that the following conditions are met:
7 ;; * Redistributions of source code must retain the above copyright notice,
8 ;; this list of conditions and the following disclaimer.
9 ;; * Redistributions in binary form must reproduce the above copyright
10 ;; notice, this list of conditions and the following disclaimer in the
11 ;; documentation and/or other materials provided with the distribution.
12 ;; * Neither the name of Intel Corporation nor the names of its contributors
13 ;; may be used to endorse or promote products derived from this software
14 ;; without specific prior written permission.
16 ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 POLY: dq 0x0000000000000001, 0xC200000000000000
42 dq 0x00000001C2000000, 0xC200000000000000
43 dq 0x00000001C2000000, 0xC200000000000000
44 dq 0x00000001C2000000, 0xC200000000000000
45 dq 0x00000001C2000000, 0xC200000000000000
48 TWOONE: dq 0x0000000000000001, 0x0000000100000000
50 ;;; @note Order of these constants should not change.
51 ;;; More specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F
54 dq 0x08090A0B0C0D0E0F, 0x0001020304050607
55 dq 0x08090A0B0C0D0E0F, 0x0001020304050607
56 dq 0x08090A0B0C0D0E0F, 0x0001020304050607
57 dq 0x08090A0B0C0D0E0F, 0x0001020304050607
61 dq 0x0706050403020100, 0x0f0e0d0c0b0a0908
64 dq 0xffffffffffffffff, 0xffffffffffffffff
67 dq 0x0000000000000000, 0x0000000000000000
71 dq 0x0000000000000001, 0x0000000000000000
75 dq 0x0000000000000002, 0x0000000000000000
79 dq 0x0000000000000000, 0x0100000000000000
83 dq 0x0000000000000000, 0x0200000000000000
87 dq 0x0000000000000005, 0x0000000000000000
88 dq 0x0000000000000006, 0x0000000000000000
89 dq 0x0000000000000007, 0x0000000000000000
90 dq 0x0000000000000008, 0x0000000000000000
94 dq 0x0000000000000001, 0x0000000000000000
95 dq 0x0000000000000002, 0x0000000000000000
96 dq 0x0000000000000003, 0x0000000000000000
97 dq 0x0000000000000004, 0x0000000000000000
101 dq 0x0000000000000004, 0x0000000000000000
102 dq 0x0000000000000004, 0x0000000000000000
103 dq 0x0000000000000004, 0x0000000000000000
104 dq 0x0000000000000004, 0x0000000000000000
108 dq 0x0000000000000000, 0x0400000000000000
109 dq 0x0000000000000000, 0x0400000000000000
110 dq 0x0000000000000000, 0x0400000000000000
111 dq 0x0000000000000000, 0x0400000000000000
115 dq 0x0000000000000008, 0x0000000000000000
116 dq 0x0000000000000008, 0x0000000000000000
117 dq 0x0000000000000008, 0x0000000000000000
118 dq 0x0000000000000008, 0x0000000000000000
122 dq 0x0000000000000000, 0x0800000000000000
123 dq 0x0000000000000000, 0x0800000000000000
124 dq 0x0000000000000000, 0x0800000000000000
125 dq 0x0000000000000000, 0x0800000000000000
129 dq 0x0000000000000000, 0x0000000000000001
130 dq 0x0000000000000002, 0x0000000000000003
131 dq 0x0000000000000000, 0x0000000000000000
132 dq 0x0000000000000000, 0x0000000000000000
135 byte_len_to_mask_table:
136 dw 0x0000, 0x0001, 0x0003, 0x0007,
137 dw 0x000f, 0x001f, 0x003f, 0x007f,
138 dw 0x00ff, 0x01ff, 0x03ff, 0x07ff,
139 dw 0x0fff, 0x1fff, 0x3fff, 0x7fff,
143 ;;; @note these 2 need to be next one another
144 ;;; - they are used to map lane index onto coresponding bit mask and
145 ;;; NOT version of the bitmask
147 dw 0x0001, 0x0002, 0x0004, 0x0008
148 index_to_lane4_not_mask:
149 dw 0x000e, 0x000d, 0x000b, 0x0007
153 ;;define the fields of gcm_key_data struct
154 ;; struct gcm_key_data {
155 ;; uint8_t expanded_keys[GCM_ENC_KEY_LEN * GCM_KEY_SETS];
156 ;; uint8_t padding[GCM_ENC_KEY_LEN];
157 ;; uint8_t shifted_hkey_1[GCM_ENC_KEY_LEN]; // store HashKey <<1 mod poly here
158 ;; uint8_t shifted_hkey_2[GCM_ENC_KEY_LEN]; // store HashKey^2 <<1 mod poly here
159 ;; uint8_t shifted_hkey_3[GCM_ENC_KEY_LEN]; // store HashKey^3 <<1 mod poly here
160 ;; uint8_t shifted_hkey_4[GCM_ENC_KEY_LEN]; // store HashKey^4 <<1 mod poly here
161 ;; uint8_t shifted_hkey_5[GCM_ENC_KEY_LEN]; // store HashKey^5 <<1 mod poly here
162 ;; uint8_t shifted_hkey_6[GCM_ENC_KEY_LEN]; // store HashKey^6 <<1 mod poly here
163 ;; uint8_t shifted_hkey_7[GCM_ENC_KEY_LEN]; // store HashKey^7 <<1 mod poly here
164 ;; uint8_t shifted_hkey_8[GCM_ENC_KEY_LEN]; // store HashKey^8 <<1 mod poly here
165 ;; uint8_t shifted_hkey_1_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes)
166 ;; uint8_t shifted_hkey_2_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
167 ;; uint8_t shifted_hkey_3_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
168 ;; uint8_t shifted_hkey_4_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
169 ;; uint8_t shifted_hkey_5_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
170 ;; uint8_t shifted_hkey_6_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
171 ;; uint8_t shifted_hkey_7_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
172 ;; uint8_t shifted_hkey_8_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
174 %define Padding (16*15)
175 %define HashKey_8 (16*16) ; store HashKey^8 <<1 mod poly here
176 %define HashKey_7 (16*17) ; store HashKey^7 <<1 mod poly here
177 %define HashKey_6 (16*18) ; store HashKey^6 <<1 mod poly here
178 %define HashKey_5 (16*19) ; store HashKey^5 <<1 mod poly here
179 %define HashKey_4 (16*20) ; store HashKey^4 <<1 mod poly here
180 %define HashKey_3 (16*21) ; store HashKey^3 <<1 mod poly here
181 %define HashKey_2 (16*22) ; store HashKey^2 <<1 mod poly here
182 %define HashKey_1 (16*23) ; store HashKey <<1 mod poly here
183 %define HashKey (16*23) ; store HashKey <<1 mod poly here
184 %define HashKey_k (16*24) ; store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes)
185 %define HashKey_1_k (16*24) ; store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes)
186 %define HashKey_2_k (16*25) ; store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
187 %define HashKey_3_k (16*26) ; store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
188 %define HashKey_4_k (16*27) ; store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
189 %define HashKey_5_k (16*28) ; store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
190 %define HashKey_6_k (16*29) ; store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
191 %define HashKey_7_k (16*30) ; store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
192 %define HashKey_8_k (16*31) ; store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
194 ;;define the fields of gcm_context_data struct
195 ;; struct gcm_context_data {
196 ;; // init, update and finalize context data
197 ;; uint8_t aad_hash[GCM_BLOCK_LEN];
198 ;; uint64_t aad_length;
199 ;; uint64_t in_length;
200 ;; uint8_t partial_block_enc_key[GCM_BLOCK_LEN];
201 ;; uint8_t orig_IV[GCM_BLOCK_LEN];
202 ;; uint8_t current_counter[GCM_BLOCK_LEN];
203 ;; uint64_t partial_block_length;
206 %define AadHash (16*0) ; store current Hash of data which has been input
207 %define AadLen (16*1) ; store length of input data which will not be encrypted or decrypted
208 %define InLen ((16*1)+8); store length of input data which will be encrypted or decrypted
209 %define PBlockEncKey (16*2) ; encryption key for the partial block at the end of the previous update
210 %define OrigIV (16*3) ; input IV
211 %define CurCount (16*4) ; Current counter for generation of encryption key
212 %define PBlockLen (16*5) ; length of partial block at the end of the previous update
214 %define reg(q) xmm %+ q
221 %xdefine arg5 qword [r14 + STACK_OFFSET + 8*5]
222 %xdefine arg6 qword [r14 + STACK_OFFSET + 8*6]
223 %xdefine arg7 qword [r14 + STACK_OFFSET + 8*7]
224 %xdefine arg8 qword [r14 + STACK_OFFSET + 8*8]
225 %xdefine arg9 qword [r14 + STACK_OFFSET + 8*9]
226 %xdefine arg10 qword [r14 + STACK_OFFSET + 8*10]
234 %xdefine arg7 [r14 + STACK_OFFSET + 8*1]
235 %xdefine arg8 [r14 + STACK_OFFSET + 8*2]
236 %xdefine arg9 [r14 + STACK_OFFSET + 8*3]
237 %xdefine arg10 [r14 + STACK_OFFSET + 8*4]
245 ;;; Use Non-temporal load/stor
247 %define XLDR movntdqa
248 %define VXLDR vmovntdqa
249 %define VX512LDR vmovntdqa
252 %define VXLDR vmovdqu
253 %define VX512LDR vmovdqu64
256 ;;; Use Non-temporal load/stor
259 %define VXSTR vmovntdq
260 %define VX512STR vmovntdq
263 %define VXSTR vmovdqu
264 %define VX512STR vmovdqu64