]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | ;; |
2 | ;; Copyright (c) 2017-2018, Intel Corporation | |
3 | ;; | |
4 | ;; Redistribution and use in source and binary forms, with or without | |
5 | ;; modification, are permitted provided that the following conditions are met: | |
6 | ;; | |
7 | ;; * Redistributions of source code must retain the above copyright notice, | |
8 | ;; this list of conditions and the following disclaimer. | |
9 | ;; * Redistributions in binary form must reproduce the above copyright | |
10 | ;; notice, this list of conditions and the following disclaimer in the | |
11 | ;; documentation and/or other materials provided with the distribution. | |
12 | ;; * Neither the name of Intel Corporation nor the names of its contributors | |
13 | ;; may be used to endorse or promote products derived from this software | |
14 | ;; without specific prior written permission. | |
15 | ;; | |
16 | ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
17 | ;; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
18 | ;; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
19 | ;; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE | |
20 | ;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
21 | ;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
22 | ;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
23 | ;; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
24 | ;; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
25 | ;; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
26 | ;; | |
27 | ||
28 | ;; In System V AMD64 ABI | |
29 | ;; calle saves: RBX, RBP, R12-R15 | |
30 | ;; Windows x64 ABI | |
31 | ;; calle saves: RBX, RBP, RDI, RSI, RSP, R12-R15 | |
32 | ;; | |
33 | ;; Registers: RAX RBX RCX RDX RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15 | |
34 | ;; ----------------------------------------------------------- | |
35 | ;; Windows clobbers: RAX RCX RDX R8 R9 R10 R11 | |
36 | ;; Windows preserves: RBX RBP RSI RDI R12 R13 R14 R15 | |
37 | ;; ----------------------------------------------------------- | |
38 | ;; Linux clobbers: RAX RCX RDX RSI RDI R8 R9 R10 R11 | |
39 | ;; Linux preserves: RBX RBP R12 R13 R14 R15 | |
40 | ;; ----------------------------------------------------------- | |
41 | ;; Clobbers ZMM0-31 | |
42 | ||
f67539c2 | 43 | %include "include/os.asm" |
11fdf7f2 TL |
44 | %include "job_aes_hmac.asm" |
45 | %include "mb_mgr_datastruct.asm" | |
f67539c2 | 46 | %include "include/reg_sizes.asm" |
11fdf7f2 TL |
47 | |
48 | ;; %define DO_DBGPRINT | |
f67539c2 | 49 | %include "include/dbgprint.asm" |
11fdf7f2 TL |
50 | |
51 | extern sha256_x16_avx512 | |
52 | ||
53 | section .data | |
54 | default rel | |
55 | align 16 | |
56 | byteswap: | |
57 | dq 0x0405060700010203, 0x0c0d0e0f08090a0b | |
58 | ||
59 | align 32 | |
60 | len_masks: | |
61 | dq 0x000000000000FFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 | |
62 | dq 0x00000000FFFF0000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 | |
63 | dq 0x0000FFFF00000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 | |
64 | dq 0xFFFF000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000 | |
65 | dq 0x0000000000000000, 0x000000000000FFFF, 0x0000000000000000, 0x0000000000000000 | |
66 | dq 0x0000000000000000, 0x00000000FFFF0000, 0x0000000000000000, 0x0000000000000000 | |
67 | dq 0x0000000000000000, 0x0000FFFF00000000, 0x0000000000000000, 0x0000000000000000 | |
68 | dq 0x0000000000000000, 0xFFFF000000000000, 0x0000000000000000, 0x0000000000000000 | |
69 | dq 0x0000000000000000, 0x0000000000000000, 0x000000000000FFFF, 0x0000000000000000 | |
70 | dq 0x0000000000000000, 0x0000000000000000, 0x00000000FFFF0000, 0x0000000000000000 | |
71 | dq 0x0000000000000000, 0x0000000000000000, 0x0000FFFF00000000, 0x0000000000000000 | |
72 | dq 0x0000000000000000, 0x0000000000000000, 0xFFFF000000000000, 0x0000000000000000 | |
73 | dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x000000000000FFFF | |
74 | dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x00000000FFFF0000 | |
75 | dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000FFFF00000000 | |
76 | dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFF000000000000 | |
77 | ||
78 | lane_1: dq 1 | |
79 | lane_2: dq 2 | |
80 | lane_3: dq 3 | |
81 | lane_4: dq 4 | |
82 | lane_5: dq 5 | |
83 | lane_6: dq 6 | |
84 | lane_7: dq 7 | |
85 | lane_8: dq 8 | |
86 | lane_9: dq 9 | |
87 | lane_10: dq 10 | |
88 | lane_11: dq 11 | |
89 | lane_12: dq 12 | |
90 | lane_13: dq 13 | |
91 | lane_14: dq 14 | |
92 | lane_15: dq 15 | |
93 | ||
94 | section .text | |
95 | ||
96 | %ifdef LINUX | |
97 | %define arg1 rdi | |
98 | %define arg2 rsi | |
99 | %define arg3 rdx | |
100 | %else | |
101 | %define arg1 rcx | |
102 | %define arg2 rdx | |
103 | %define arg3 rsi | |
104 | %endif | |
105 | ||
106 | %define state arg1 | |
107 | %define job arg2 | |
108 | %define len2 arg2 | |
109 | ||
110 | ||
111 | ; idx needs to be in rbp, r15 | |
112 | %define idx rbp | |
113 | ||
114 | %define unused_lanes r10 | |
115 | %define tmp5 r10 | |
116 | ||
117 | %define lane_data rbx | |
118 | %define tmp2 rbx | |
119 | ||
120 | %define job_rax rax | |
121 | %define tmp1 rax | |
122 | %define size_offset rax | |
123 | %define start_offset rax | |
124 | ||
125 | %define tmp3 arg1 | |
126 | ||
127 | %define extra_blocks arg2 | |
128 | %define p arg2 | |
129 | ||
130 | %define tmp4 arg3 | |
131 | %define tmp r9 | |
132 | ||
133 | %define len_upper r13 | |
134 | %define idx_upper r14 | |
135 | ||
136 | ||
137 | ; we clobber rsi, rbp; called routine also clobbers rax, r9 to r15 | |
138 | struc STACK | |
139 | _gpr_save: resq 8 | |
140 | _rsp_save: resq 1 | |
141 | endstruc | |
142 | ||
143 | %define APPEND(a,b) a %+ b | |
144 | ||
145 | ; JOB* flush_job_hmac_sha_224_avx512(MB_MGR_HMAC_SHA_256_OOO *state) | |
146 | ; JOB* flush_job_hmac_sha_256_avx512(MB_MGR_HMAC_SHA_256_OOO *state) | |
147 | ; arg 1 : state | |
148 | align 32 | |
149 | %ifdef SHA224 | |
150 | MKGLOBAL(flush_job_hmac_sha_224_avx512,function,internal) | |
151 | flush_job_hmac_sha_224_avx512: | |
152 | %else | |
153 | MKGLOBAL(flush_job_hmac_sha_256_avx512,function,internal) | |
154 | flush_job_hmac_sha_256_avx512: | |
155 | %endif | |
156 | mov rax, rsp | |
157 | sub rsp, STACK_size | |
158 | and rsp, -32 | |
159 | mov [rsp + _gpr_save + 8*0], rbx | |
160 | mov [rsp + _gpr_save + 8*1], rbp | |
161 | mov [rsp + _gpr_save + 8*2], r12 | |
162 | mov [rsp + _gpr_save + 8*3], r13 | |
163 | mov [rsp + _gpr_save + 8*4], r14 | |
164 | mov [rsp + _gpr_save + 8*5], r15 | |
165 | %ifndef LINUX | |
166 | mov [rsp + _gpr_save + 8*6], rsi | |
167 | mov [rsp + _gpr_save + 8*7], rdi | |
168 | %endif | |
169 | mov [rsp + _rsp_save], rax ; original SP | |
170 | ||
171 | ; if bit (32+3) is set, then all lanes are empty | |
172 | cmp dword [state + _num_lanes_inuse_sha256], 0 | |
173 | jz return_null | |
174 | ||
175 | ; find a lane with a non-null job | |
176 | xor idx, idx | |
177 | ||
178 | %assign I 1 | |
179 | %rep 15 | |
180 | cmp qword [state + _ldata_sha256 + (I * _HMAC_SHA1_LANE_DATA_size) + _job_in_lane], 0 | |
181 | cmovne idx, [rel APPEND(lane_,I)] | |
182 | %assign I (I+1) | |
183 | %endrep | |
184 | ||
185 | copy_lane_data: | |
186 | ; copy idx to empty lanes | |
187 | vmovdqa ymm0, [state + _lens_sha256] | |
188 | mov tmp, [state + _args_data_ptr_sha256 + PTR_SZ*idx] | |
189 | ||
190 | %assign I 0 | |
191 | %rep 16 | |
192 | cmp qword [state + _ldata_sha256 + I * _HMAC_SHA1_LANE_DATA_size + _job_in_lane], 0 | |
193 | jne APPEND(skip_,I) | |
194 | mov [state + _args_data_ptr_sha256 + PTR_SZ*I], tmp | |
195 | vpor ymm0, ymm0, [rel len_masks + 32*I] | |
196 | APPEND(skip_,I): | |
197 | %assign I (I+1) | |
198 | %endrep | |
199 | ||
200 | vmovdqa [state + _lens_sha256 ], ymm0 | |
201 | ||
202 | vphminposuw xmm1, xmm0 | |
203 | vpextrw DWORD(len2), xmm1, 0 ; min value | |
204 | vpextrw DWORD(idx), xmm1, 1 ; min index (0...7) | |
205 | ||
206 | vmovdqa xmm2, [state + _lens_sha256 + 8*2] | |
207 | vphminposuw xmm3, xmm2 | |
208 | vpextrw DWORD(len_upper), xmm3, 0 ; min value | |
209 | vpextrw DWORD(idx_upper), xmm3, 1 ; min index (8...F) | |
210 | ||
211 | cmp len2, len_upper | |
212 | jle use_min | |
213 | ||
214 | vmovdqa xmm1, xmm3 | |
215 | mov len2, len_upper | |
216 | mov idx, idx_upper ; idx would be in range 0..7 | |
217 | add idx, 8 ; to reflect that index is in 8..F range | |
218 | ||
219 | use_min: | |
220 | cmp len2, 0 | |
221 | je len_is_0 | |
222 | ||
223 | vpbroadcastw xmm1, xmm1 ; duplicate words across all lanes | |
224 | vpsubw xmm0, xmm0, xmm1 | |
225 | vmovdqa [state + _lens_sha256], xmm0 | |
226 | vpsubw xmm2, xmm2, xmm1 | |
227 | vmovdqa [state + _lens_sha256 + 8*2], xmm2 | |
228 | ||
229 | ; "state" and "args" are the same address, arg1 | |
230 | ; len is arg2 | |
231 | call sha256_x16_avx512 | |
232 | ; state and idx are intact | |
233 | ||
234 | len_is_0: | |
235 | ; process completed job "idx" | |
236 | imul lane_data, idx, _HMAC_SHA1_LANE_DATA_size | |
237 | lea lane_data, [state + _ldata_sha256 + lane_data] | |
238 | mov DWORD(extra_blocks), [lane_data + _extra_blocks] | |
239 | cmp extra_blocks, 0 | |
240 | jne proc_extra_blocks | |
241 | cmp dword [lane_data + _outer_done], 0 | |
242 | jne end_loop | |
243 | ||
244 | proc_outer: | |
245 | mov dword [lane_data + _outer_done], 1 | |
246 | mov DWORD(size_offset), [lane_data + _size_offset] | |
247 | mov qword [lane_data + _extra_block + size_offset], 0 | |
248 | mov word [state + _lens_sha256 + 2*idx], 1 | |
249 | lea tmp, [lane_data + _outer_block] | |
250 | mov [state + _args_data_ptr_sha256 + PTR_SZ*idx], tmp | |
251 | ||
252 | vmovd xmm0, [state + _args_digest_sha256 + 4*idx + 0*SHA256_DIGEST_ROW_SIZE] | |
253 | vpinsrd xmm0, xmm0, [state + _args_digest_sha256 + 4*idx + 1*SHA256_DIGEST_ROW_SIZE], 1 | |
254 | vpinsrd xmm0, xmm0, [state + _args_digest_sha256 + 4*idx + 2*SHA256_DIGEST_ROW_SIZE], 2 | |
255 | vpinsrd xmm0, xmm0, [state + _args_digest_sha256 + 4*idx + 3*SHA256_DIGEST_ROW_SIZE], 3 | |
256 | vpshufb xmm0, xmm0, [rel byteswap] | |
257 | vmovd xmm1, [state + _args_digest_sha256 + 4*idx + 4*SHA256_DIGEST_ROW_SIZE] | |
258 | vpinsrd xmm1, xmm1, [state + _args_digest_sha256 + 4*idx + 5*SHA256_DIGEST_ROW_SIZE], 1 | |
259 | vpinsrd xmm1, xmm1, [state + _args_digest_sha256 + 4*idx + 6*SHA256_DIGEST_ROW_SIZE], 2 | |
260 | %ifndef SHA224 | |
261 | vpinsrd xmm1, xmm1, [state + _args_digest_sha256 + 4*idx + 7*SHA256_DIGEST_ROW_SIZE], 3 | |
262 | %endif | |
263 | vpshufb xmm1, xmm1, [rel byteswap] | |
264 | ||
265 | vmovdqa [lane_data + _outer_block], xmm0 | |
266 | vmovdqa [lane_data + _outer_block + 4*4], xmm1 | |
267 | %ifdef SHA224 | |
268 | mov dword [lane_data + _outer_block + 7*4], 0x80 | |
269 | %endif | |
270 | ||
271 | mov job, [lane_data + _job_in_lane] | |
272 | mov tmp, [job + _auth_key_xor_opad] | |
273 | vmovdqu xmm0, [tmp] | |
274 | vmovdqu xmm1, [tmp + 4*4] | |
275 | vmovd [state + _args_digest_sha256 + 4*idx + 0*SHA256_DIGEST_ROW_SIZE], xmm0 | |
276 | vpextrd [state + _args_digest_sha256 + 4*idx + 1*SHA256_DIGEST_ROW_SIZE], xmm0, 1 | |
277 | vpextrd [state + _args_digest_sha256 + 4*idx + 2*SHA256_DIGEST_ROW_SIZE], xmm0, 2 | |
278 | vpextrd [state + _args_digest_sha256 + 4*idx + 3*SHA256_DIGEST_ROW_SIZE], xmm0, 3 | |
279 | vmovd [state + _args_digest_sha256 + 4*idx + 4*SHA256_DIGEST_ROW_SIZE], xmm1 | |
280 | vpextrd [state + _args_digest_sha256 + 4*idx + 5*SHA256_DIGEST_ROW_SIZE], xmm1, 1 | |
281 | vpextrd [state + _args_digest_sha256 + 4*idx + 6*SHA256_DIGEST_ROW_SIZE], xmm1, 2 | |
282 | vpextrd [state + _args_digest_sha256 + 4*idx + 7*SHA256_DIGEST_ROW_SIZE], xmm1, 3 | |
283 | jmp copy_lane_data | |
284 | ||
285 | align 16 | |
286 | proc_extra_blocks: | |
287 | mov DWORD(start_offset), [lane_data + _start_offset] | |
288 | mov [state + _lens_sha256 + 2*idx], WORD(extra_blocks) | |
289 | lea tmp, [lane_data + _extra_block + start_offset] | |
290 | mov [state + _args_data_ptr_sha256 + PTR_SZ*idx], tmp | |
291 | mov dword [lane_data + _extra_blocks], 0 | |
292 | jmp copy_lane_data | |
293 | ||
294 | return_null: | |
295 | xor job_rax, job_rax | |
296 | jmp return | |
297 | ||
298 | align 16 | |
299 | end_loop: | |
300 | mov job_rax, [lane_data + _job_in_lane] | |
301 | mov qword [lane_data + _job_in_lane], 0 | |
302 | or dword [job_rax + _status], STS_COMPLETED_HMAC | |
303 | mov unused_lanes, [state + _unused_lanes_sha256] | |
304 | shl unused_lanes, 4 | |
305 | or unused_lanes, idx | |
306 | mov [state + _unused_lanes_sha256], unused_lanes | |
307 | ||
308 | sub dword [state + _num_lanes_inuse_sha256], 1 | |
309 | ||
310 | mov p, [job_rax + _auth_tag_output] | |
311 | ||
9f95a23c TL |
312 | %ifdef SHA224 |
313 | cmp qword [job_rax + _auth_tag_output_len_in_bytes], 14 | |
314 | jne copy_full_digest | |
315 | %else | |
316 | cmp qword [job_rax + _auth_tag_output_len_in_bytes], 16 | |
317 | jne copy_full_digest | |
318 | %endif | |
319 | ||
320 | ;; copy SHA224 14 bytes / SHA256 16 bytes | |
11fdf7f2 TL |
321 | mov DWORD(tmp), [state + _args_digest_sha256 + 4*idx + 0*SHA256_DIGEST_ROW_SIZE] |
322 | mov DWORD(tmp2), [state + _args_digest_sha256 + 4*idx + 1*SHA256_DIGEST_ROW_SIZE] | |
323 | mov DWORD(tmp4), [state + _args_digest_sha256 + 4*idx + 2*SHA256_DIGEST_ROW_SIZE] | |
324 | mov DWORD(tmp5), [state + _args_digest_sha256 + 4*idx + 3*SHA256_DIGEST_ROW_SIZE] | |
11fdf7f2 TL |
325 | bswap DWORD(tmp) |
326 | bswap DWORD(tmp2) | |
327 | bswap DWORD(tmp4) | |
328 | bswap DWORD(tmp5) | |
329 | mov [p + 0*4], DWORD(tmp) | |
330 | mov [p + 1*4], DWORD(tmp2) | |
331 | mov [p + 2*4], DWORD(tmp4) | |
332 | %ifdef SHA224 | |
333 | mov [p + 3*4], WORD(tmp5) | |
334 | %else | |
335 | mov [p + 3*4], DWORD(tmp5) | |
336 | %endif | |
f67539c2 | 337 | jmp clear_ret |
9f95a23c TL |
338 | |
339 | copy_full_digest: | |
340 | ;; copy SHA224 28 bytes / SHA256 32 bytes | |
341 | mov DWORD(tmp), [state + _args_digest_sha256 + 4*idx + 0*SHA256_DIGEST_ROW_SIZE] | |
342 | mov DWORD(tmp2), [state + _args_digest_sha256 + 4*idx + 1*SHA256_DIGEST_ROW_SIZE] | |
343 | mov DWORD(tmp4), [state + _args_digest_sha256 + 4*idx + 2*SHA256_DIGEST_ROW_SIZE] | |
344 | mov DWORD(tmp5), [state + _args_digest_sha256 + 4*idx + 3*SHA256_DIGEST_ROW_SIZE] | |
345 | bswap DWORD(tmp) | |
346 | bswap DWORD(tmp2) | |
347 | bswap DWORD(tmp4) | |
348 | bswap DWORD(tmp5) | |
349 | mov [p + 0*4], DWORD(tmp) | |
350 | mov [p + 1*4], DWORD(tmp2) | |
351 | mov [p + 2*4], DWORD(tmp4) | |
352 | mov [p + 3*4], DWORD(tmp5) | |
353 | ||
354 | mov DWORD(tmp), [state + _args_digest_sha256 + 4*idx + 4*SHA256_DIGEST_ROW_SIZE] | |
355 | mov DWORD(tmp2), [state + _args_digest_sha256 + 4*idx + 5*SHA256_DIGEST_ROW_SIZE] | |
356 | mov DWORD(tmp4), [state + _args_digest_sha256 + 4*idx + 6*SHA256_DIGEST_ROW_SIZE] | |
357 | %ifndef SHA224 | |
358 | mov DWORD(tmp5), [state + _args_digest_sha256 + 4*idx + 7*SHA256_DIGEST_ROW_SIZE] | |
359 | %endif | |
360 | bswap DWORD(tmp) | |
361 | bswap DWORD(tmp2) | |
362 | bswap DWORD(tmp4) | |
363 | %ifndef SHA224 | |
364 | bswap DWORD(tmp5) | |
365 | %endif | |
366 | mov [p + 4*4], DWORD(tmp) | |
367 | mov [p + 5*4], DWORD(tmp2) | |
368 | mov [p + 6*4], DWORD(tmp4) | |
369 | %ifndef SHA224 | |
370 | mov [p + 7*4], DWORD(tmp5) | |
371 | %endif | |
11fdf7f2 | 372 | |
f67539c2 TL |
373 | clear_ret: |
374 | ||
375 | %ifdef SAFE_DATA | |
376 | vpxorq zmm0, zmm0 | |
377 | ||
378 | ;; Clear digest (28B/32B), outer_block (28B/32B) and extra_block (64B) | |
379 | ;; of returned job and NULL jobs | |
380 | %assign I 0 | |
381 | %rep 16 | |
382 | cmp qword [state + _ldata_sha256 + (I*_HMAC_SHA1_LANE_DATA_size) + _job_in_lane], 0 | |
383 | jne APPEND(skip_clear_,I) | |
384 | ||
385 | ;; Clear digest (28 bytes for SHA-224, 32 bytes for SHA-256 bytes) | |
386 | %assign J 0 | |
387 | %rep 7 | |
388 | mov dword [state + _args_digest_sha256 + SHA256_DIGEST_WORD_SIZE*I + J*SHA256_DIGEST_ROW_SIZE], 0 | |
389 | %assign J (J+1) | |
390 | %endrep | |
391 | %ifndef SHA224 | |
392 | mov dword [state + _args_digest_sha256 + SHA256_DIGEST_WORD_SIZE*I + 7*SHA256_DIGEST_ROW_SIZE], 0 | |
393 | %endif | |
394 | ||
395 | lea lane_data, [state + _ldata_sha256 + (I*_HMAC_SHA1_LANE_DATA_size)] | |
396 | ;; Clear first 64 bytes of extra_block | |
397 | vmovdqu64 [lane_data + _extra_block], zmm0 | |
398 | ||
399 | ;; Clear first 28 bytes (SHA-224) or 32 bytes (SHA-256) of outer_block | |
400 | %ifdef SHA224 | |
401 | vmovdqa64 [lane_data + _outer_block], xmm0 | |
402 | mov qword [lane_data + _outer_block + 16], 0 | |
403 | mov dword [lane_data + _outer_block + 24], 0 | |
404 | %else | |
405 | vmovdqu64 [lane_data + _outer_block], ymm0 | |
406 | %endif | |
407 | ||
408 | APPEND(skip_clear_,I): | |
409 | %assign I (I+1) | |
410 | %endrep | |
411 | ||
412 | %endif ;; SAFE_DATA | |
413 | ||
11fdf7f2 TL |
414 | return: |
415 | vzeroupper | |
416 | ||
417 | mov rbx, [rsp + _gpr_save + 8*0] | |
418 | mov rbp, [rsp + _gpr_save + 8*1] | |
419 | mov r12, [rsp + _gpr_save + 8*2] | |
420 | mov r13, [rsp + _gpr_save + 8*3] | |
421 | mov r14, [rsp + _gpr_save + 8*4] | |
422 | mov r15, [rsp + _gpr_save + 8*5] | |
423 | %ifndef LINUX | |
424 | mov rsi, [rsp + _gpr_save + 8*6] | |
425 | mov rdi, [rsp + _gpr_save + 8*7] | |
426 | %endif | |
427 | mov rsp, [rsp + _rsp_save] ; original SP | |
428 | ||
429 | ret | |
430 | ||
431 | %ifdef LINUX | |
432 | section .note.GNU-stack noalloc noexec nowrite progbits | |
433 | %endif |