]> git.proxmox.com Git - mirror_edk2.git/blame - IntelFsp2Pkg/Include/SaveRestoreSseAvxNasm.inc
IntelFsp2Pkg: FSP_TEMP_RAM_INIT call must follow X64 Calling Convention
[mirror_edk2.git] / IntelFsp2Pkg / Include / SaveRestoreSseAvxNasm.inc
CommitLineData
00aa71ce
TK
1;------------------------------------------------------------------------------\r
2;\r
3; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>\r
4; SPDX-License-Identifier: BSD-2-Clause-Patent\r
5;\r
6; Abstract:\r
7;\r
8; Provide macro for register save/restore using SSE registers\r
9;\r
10;------------------------------------------------------------------------------\r
11\r
12;\r
13; Define SSE and AVX instruction set\r
14;\r
15;\r
16; Define SSE macros using SSE 4.1 instructions\r
17; args 1:XMM, 2:IDX, 3:REG\r
18;\r
19%macro SXMMN 3\r
20 pinsrq %1, %3, (%2 & 3)\r
21 %endmacro\r
22\r
23;\r
24; args 1:XMM, 2:REG, 3:IDX\r
25;\r
26%macro LXMMN 3\r
27 pextrq %2, %1, (%3 & 3)\r
28 %endmacro\r
29\r
30;\r
31; Define AVX macros using AVX instructions\r
32; Save XMM to YMM\r
33; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM\r
34;\r
35%macro SYMMN 3\r
36 vinsertf128 %1, %1, %3, %2\r
37 %endmacro\r
38\r
39;\r
40; Restore XMM from YMM\r
41; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)\r
42;\r
43%macro LYMMN 3\r
44 vextractf128 %2, %1, %3\r
45 %endmacro\r
46\r
47;\r
48; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.\r
49; Modified: XMM5, YMM6, YMM7 and YMM8\r
50;\r
51%macro SAVE_REGS 0\r
52 SXMMN xmm5, 0, rbp\r
53 SXMMN xmm5, 1, rbx\r
54 SYMMN ymm7, 1, xmm5\r
55 SXMMN xmm5, 0, rsi\r
56 SXMMN xmm5, 1, rdi\r
57 SYMMN ymm8, 1, xmm5\r
58 SAVE_RSP\r
59 %endmacro\r
60\r
61;\r
62; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.\r
63; Modified: XMM5, RBP, RBX, RSI, RDI and RSP\r
64;\r
65%macro LOAD_REGS 0\r
66 LYMMN ymm7, xmm5, 1\r
67 LXMMN xmm5, rbp, 0\r
68 LXMMN xmm5, rbx, 1\r
69 LYMMN ymm8, xmm5, 1\r
70 LXMMN xmm5, rsi, 0\r
71 LXMMN xmm5, rdi, 1\r
72 LOAD_RSP\r
73 %endmacro\r
74;\r
75; Restore RBP from YMM7[128:191]\r
76; Modified: XMM5 and RBP\r
77;\r
78%macro LOAD_RBP 0\r
79 LYMMN ymm7, xmm5, 1\r
80 movq rbp, xmm5\r
81 %endmacro\r
82\r
83;\r
84; Restore RBX from YMM7[192:255]\r
85; Modified: XMM5 and RBX\r
86;\r
87%macro LOAD_RBX 0\r
88 LYMMN ymm7, xmm5, 1\r
89 LXMMN xmm5, rbx, 1\r
90 %endmacro\r
91\r
92;\r
93; Upper half of YMM6 to save/restore Time Stamp, RSP\r
94;\r
95;\r
96; Save Time Stamp to YMM6[192:255]\r
97; arg 1:general purpose register which holds time stamp\r
98; Modified: XMM5 and YMM6\r
99;\r
100%macro SAVE_TS 1\r
101 LYMMN ymm6, xmm5, 1\r
102 SXMMN xmm5, 1, %1\r
103 SYMMN ymm6, 1, xmm5\r
104 %endmacro\r
105\r
106;\r
107; Restore Time Stamp from YMM6[192:255]\r
108; arg 1:general purpose register where to save time stamp\r
109; Modified: XMM5 and %1\r
110;\r
111%macro LOAD_TS 1\r
112 LYMMN ymm6, xmm5, 1\r
113 LXMMN xmm5, %1, 1\r
114 %endmacro\r
115\r
116;\r
117; Save RSP to YMM6[128:191]\r
118; Modified: XMM5 and YMM6\r
119;\r
120%macro SAVE_RSP 0\r
121 LYMMN ymm6, xmm5, 1\r
122 SXMMN xmm5, 0, rsp\r
123 SYMMN ymm6, 1, xmm5\r
124 %endmacro\r
125\r
126;\r
127; Restore RSP from YMM6[128:191]\r
128; Modified: XMM5 and RSP\r
129;\r
130%macro LOAD_RSP 0\r
131 LYMMN ymm6, xmm5, 1\r
132 movq rsp, xmm5\r
133 %endmacro\r
134\r
135;\r
136; Upper half of YMM9 to save/restore UCODE status, BFV address\r
137;\r
138;\r
139; Save uCode status to YMM9[192:255]\r
140; arg 1:general purpose register which holds uCode status\r
141; Modified: XMM5 and YMM9\r
142;\r
143%macro SAVE_UCODE_STATUS 1\r
144 LYMMN ymm9, xmm5, 1\r
145 SXMMN xmm5, 0, %1\r
146 SYMMN ymm9, 1, xmm5\r
147 %endmacro\r
148\r
149;\r
150; Restore uCode status from YMM9[192:255]\r
151; arg 1:general purpose register where to save uCode status\r
152; Modified: XMM5 and %1\r
153;\r
154%macro LOAD_UCODE_STATUS 1\r
155 LYMMN ymm9, xmm5, 1\r
156 movq %1, xmm5\r
157 %endmacro\r
158\r
159;\r
160; Save BFV address to YMM9[128:191]\r
161; arg 1:general purpose register which holds BFV address\r
162; Modified: XMM5 and YMM9\r
163;\r
164%macro SAVE_BFV 1\r
165 LYMMN ymm9, xmm5, 1\r
166 SXMMN xmm5, 1, %1\r
167 SYMMN ymm9, 1, xmm5\r
168 %endmacro\r
169\r
170;\r
171; Restore BFV address from YMM9[128:191]\r
172; arg 1:general purpose register where to save BFV address\r
173; Modified: XMM5 and %1\r
174;\r
175%macro LOAD_BFV 1\r
176 LYMMN ymm9, xmm5, 1\r
177 LXMMN xmm5, %1, 1\r
178 %endmacro\r
179\r
11d8abcb
DC
180;\r
181; Upper half of YMM10 to save/restore RCX\r
182;\r
183;\r
184; Save RCX to YMM10[128:191]\r
185; Modified: XMM5 and YMM10\r
186;\r
187\r
188%macro SAVE_RCX 0\r
189 LYMMN ymm10, xmm5, 1\r
190 SXMMN xmm5, 0, rcx\r
191 SYMMN ymm10, 1, xmm5\r
192 %endmacro\r
193\r
194;\r
195; Restore RCX from YMM10[128:191]\r
196; Modified: XMM5 and RCX\r
197;\r
198\r
199%macro LOAD_RCX 0\r
200 LYMMN ymm10, xmm5, 1\r
201 movq rcx, xmm5\r
202 %endmacro\r
203\r
00aa71ce
TK
204;\r
205; YMM7[128:191] for calling stack\r
206; arg 1:Entry\r
207; Modified: RSI, XMM5, YMM7\r
208;\r
209%macro CALL_YMM 1\r
210 mov rsi, %%ReturnAddress\r
211 LYMMN ymm7, xmm5, 1\r
212 SXMMN xmm5, 0, rsi\r
213 SYMMN ymm7, 1, xmm5\r
214 mov rsi, %1\r
215 jmp rsi\r
216%%ReturnAddress:\r
217 %endmacro\r
218;\r
219; Restore RIP from YMM7[128:191]\r
220; Modified: RSI, XMM5\r
221;\r
222%macro RET_YMM 0\r
223 LYMMN ymm7, xmm5, 1\r
224 movq rsi, xmm5\r
225 jmp rsi\r
226 %endmacro\r
227\r
228%macro ENABLE_SSE 0\r
229 ;\r
230 ; Initialize floating point units\r
231 ;\r
232 jmp NextAddress\r
233align 4\r
234 ;\r
235 ; Float control word initial value:\r
236 ; all exceptions masked, double-precision, round-to-nearest\r
237 ;\r
238FpuControlWord DW 027Fh\r
239 ;\r
240 ; Multimedia-extensions control word:\r
241 ; all exceptions masked, round-to-nearest, flush to zero for masked underflow\r
242 ;\r
243MmxControlWord DQ 01F80h\r
244SseError:\r
245 ;\r
246 ; Processor has to support SSE\r
247 ;\r
248 jmp SseError\r
249NextAddress:\r
250 finit\r
251 mov rax, FpuControlWord\r
252 fldcw [rax]\r
253\r
254 ;\r
255 ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r
256 ; whether the processor supports SSE instruction.\r
257 ;\r
11d8abcb 258 mov r10, rcx\r
00aa71ce
TK
259 mov rax, 1\r
260 cpuid\r
261 bt rdx, 25\r
262 jnc SseError\r
263\r
264 ;\r
265 ; SSE 4.1 support\r
266 ;\r
267 bt ecx, 19\r
268 jnc SseError\r
11d8abcb 269 mov rcx, r10\r
00aa71ce
TK
270\r
271 ;\r
272 ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r
273 ;\r
274 mov rax, cr4\r
275 or rax, 00000600h\r
276 mov cr4, rax\r
277\r
278 ;\r
279 ; The processor should support SSE instruction and we can use\r
280 ; ldmxcsr instruction\r
281 ;\r
282 mov rax, MmxControlWord\r
283 ldmxcsr [rax]\r
284 %endmacro\r
285\r
286%macro ENABLE_AVX 0\r
11d8abcb 287 mov r10, rcx\r
00aa71ce
TK
288 mov eax, 1\r
289 cpuid\r
290 and ecx, 10000000h\r
291 cmp ecx, 10000000h ; check AVX feature flag\r
292 je EnableAvx\r
293AvxError:\r
294 ;\r
295 ; Processor has to support AVX\r
296 ;\r
297 jmp AvxError\r
298EnableAvx:\r
299 ;\r
300 ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction\r
301 ;\r
302 mov rax, cr4\r
303 or rax, 00040000h\r
304 mov cr4, rax\r
305\r
306 mov rcx, 0 ; index 0\r
307 xgetbv ; result in edx:eax\r
308 or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state\r
309 xsetbv\r
11d8abcb 310 mov rcx, r10\r
00aa71ce
TK
311 %endmacro\r
312\r