]> git.proxmox.com Git - mirror_edk2.git/blob - IntelFsp2Pkg/Include/SaveRestoreSseAvxNasm.inc
IntelFsp2Pkg: FSP_TEMP_RAM_INIT call must follow X64 Calling Convention
[mirror_edk2.git] / IntelFsp2Pkg / Include / SaveRestoreSseAvxNasm.inc
1 ;------------------------------------------------------------------------------
2 ;
3 ; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>
4 ; SPDX-License-Identifier: BSD-2-Clause-Patent
5 ;
6 ; Abstract:
7 ;
8 ; Provide macro for register save/restore using SSE registers
9 ;
10 ;------------------------------------------------------------------------------
11
12 ;
13 ; Define SSE and AVX instruction set
14 ;
15 ;
16 ; Define SSE macros using SSE 4.1 instructions
17 ; args 1:XMM, 2:IDX, 3:REG
18 ;
19 %macro SXMMN 3
20 pinsrq %1, %3, (%2 & 3)
21 %endmacro
22
23 ;
24 ; args 1:XMM, 2:REG, 3:IDX
25 ;
26 %macro LXMMN 3
27 pextrq %2, %1, (%3 & 3)
28 %endmacro
29
30 ;
31 ; Define AVX macros using AVX instructions
32 ; Save XMM to YMM
33 ; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM
34 ;
35 %macro SYMMN 3
36 vinsertf128 %1, %1, %3, %2
37 %endmacro
38
39 ;
40 ; Restore XMM from YMM
41 ; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)
42 ;
43 %macro LYMMN 3
44 vextractf128 %2, %1, %3
45 %endmacro
46
47 ;
48 ; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.
49 ; Modified: XMM5, YMM6, YMM7 and YMM8
50 ;
51 %macro SAVE_REGS 0
52 SXMMN xmm5, 0, rbp
53 SXMMN xmm5, 1, rbx
54 SYMMN ymm7, 1, xmm5
55 SXMMN xmm5, 0, rsi
56 SXMMN xmm5, 1, rdi
57 SYMMN ymm8, 1, xmm5
58 SAVE_RSP
59 %endmacro
60
61 ;
62 ; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.
63 ; Modified: XMM5, RBP, RBX, RSI, RDI and RSP
64 ;
65 %macro LOAD_REGS 0
66 LYMMN ymm7, xmm5, 1
67 LXMMN xmm5, rbp, 0
68 LXMMN xmm5, rbx, 1
69 LYMMN ymm8, xmm5, 1
70 LXMMN xmm5, rsi, 0
71 LXMMN xmm5, rdi, 1
72 LOAD_RSP
73 %endmacro
74 ;
75 ; Restore RBP from YMM7[128:191]
76 ; Modified: XMM5 and RBP
77 ;
78 %macro LOAD_RBP 0
79 LYMMN ymm7, xmm5, 1
80 movq rbp, xmm5
81 %endmacro
82
83 ;
84 ; Restore RBX from YMM7[192:255]
85 ; Modified: XMM5 and RBX
86 ;
87 %macro LOAD_RBX 0
88 LYMMN ymm7, xmm5, 1
89 LXMMN xmm5, rbx, 1
90 %endmacro
91
92 ;
93 ; Upper half of YMM6 to save/restore Time Stamp, RSP
94 ;
95 ;
96 ; Save Time Stamp to YMM6[192:255]
97 ; arg 1:general purpose register which holds time stamp
98 ; Modified: XMM5 and YMM6
99 ;
100 %macro SAVE_TS 1
101 LYMMN ymm6, xmm5, 1
102 SXMMN xmm5, 1, %1
103 SYMMN ymm6, 1, xmm5
104 %endmacro
105
106 ;
107 ; Restore Time Stamp from YMM6[192:255]
108 ; arg 1:general purpose register where to save time stamp
109 ; Modified: XMM5 and %1
110 ;
111 %macro LOAD_TS 1
112 LYMMN ymm6, xmm5, 1
113 LXMMN xmm5, %1, 1
114 %endmacro
115
116 ;
117 ; Save RSP to YMM6[128:191]
118 ; Modified: XMM5 and YMM6
119 ;
120 %macro SAVE_RSP 0
121 LYMMN ymm6, xmm5, 1
122 SXMMN xmm5, 0, rsp
123 SYMMN ymm6, 1, xmm5
124 %endmacro
125
126 ;
127 ; Restore RSP from YMM6[128:191]
128 ; Modified: XMM5 and RSP
129 ;
130 %macro LOAD_RSP 0
131 LYMMN ymm6, xmm5, 1
132 movq rsp, xmm5
133 %endmacro
134
135 ;
136 ; Upper half of YMM9 to save/restore UCODE status, BFV address
137 ;
138 ;
139 ; Save uCode status to YMM9[192:255]
140 ; arg 1:general purpose register which holds uCode status
141 ; Modified: XMM5 and YMM9
142 ;
143 %macro SAVE_UCODE_STATUS 1
144 LYMMN ymm9, xmm5, 1
145 SXMMN xmm5, 0, %1
146 SYMMN ymm9, 1, xmm5
147 %endmacro
148
149 ;
150 ; Restore uCode status from YMM9[192:255]
151 ; arg 1:general purpose register where to save uCode status
152 ; Modified: XMM5 and %1
153 ;
154 %macro LOAD_UCODE_STATUS 1
155 LYMMN ymm9, xmm5, 1
156 movq %1, xmm5
157 %endmacro
158
159 ;
160 ; Save BFV address to YMM9[128:191]
161 ; arg 1:general purpose register which holds BFV address
162 ; Modified: XMM5 and YMM9
163 ;
164 %macro SAVE_BFV 1
165 LYMMN ymm9, xmm5, 1
166 SXMMN xmm5, 1, %1
167 SYMMN ymm9, 1, xmm5
168 %endmacro
169
170 ;
171 ; Restore BFV address from YMM9[128:191]
172 ; arg 1:general purpose register where to save BFV address
173 ; Modified: XMM5 and %1
174 ;
175 %macro LOAD_BFV 1
176 LYMMN ymm9, xmm5, 1
177 LXMMN xmm5, %1, 1
178 %endmacro
179
180 ;
181 ; Upper half of YMM10 to save/restore RCX
182 ;
183 ;
184 ; Save RCX to YMM10[128:191]
185 ; Modified: XMM5 and YMM10
186 ;
187
188 %macro SAVE_RCX 0
189 LYMMN ymm10, xmm5, 1
190 SXMMN xmm5, 0, rcx
191 SYMMN ymm10, 1, xmm5
192 %endmacro
193
194 ;
195 ; Restore RCX from YMM10[128:191]
196 ; Modified: XMM5 and RCX
197 ;
198
199 %macro LOAD_RCX 0
200 LYMMN ymm10, xmm5, 1
201 movq rcx, xmm5
202 %endmacro
203
204 ;
205 ; YMM7[128:191] for calling stack
206 ; arg 1:Entry
207 ; Modified: RSI, XMM5, YMM7
208 ;
209 %macro CALL_YMM 1
210 mov rsi, %%ReturnAddress
211 LYMMN ymm7, xmm5, 1
212 SXMMN xmm5, 0, rsi
213 SYMMN ymm7, 1, xmm5
214 mov rsi, %1
215 jmp rsi
216 %%ReturnAddress:
217 %endmacro
218 ;
219 ; Restore RIP from YMM7[128:191]
220 ; Modified: RSI, XMM5
221 ;
222 %macro RET_YMM 0
223 LYMMN ymm7, xmm5, 1
224 movq rsi, xmm5
225 jmp rsi
226 %endmacro
227
228 %macro ENABLE_SSE 0
229 ;
230 ; Initialize floating point units
231 ;
232 jmp NextAddress
233 align 4
234 ;
235 ; Float control word initial value:
236 ; all exceptions masked, double-precision, round-to-nearest
237 ;
238 FpuControlWord DW 027Fh
239 ;
240 ; Multimedia-extensions control word:
241 ; all exceptions masked, round-to-nearest, flush to zero for masked underflow
242 ;
243 MmxControlWord DQ 01F80h
244 SseError:
245 ;
246 ; Processor has to support SSE
247 ;
248 jmp SseError
249 NextAddress:
250 finit
251 mov rax, FpuControlWord
252 fldcw [rax]
253
254 ;
255 ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test
256 ; whether the processor supports SSE instruction.
257 ;
258 mov r10, rcx
259 mov rax, 1
260 cpuid
261 bt rdx, 25
262 jnc SseError
263
264 ;
265 ; SSE 4.1 support
266 ;
267 bt ecx, 19
268 jnc SseError
269 mov rcx, r10
270
271 ;
272 ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
273 ;
274 mov rax, cr4
275 or rax, 00000600h
276 mov cr4, rax
277
278 ;
279 ; The processor should support SSE instruction and we can use
280 ; ldmxcsr instruction
281 ;
282 mov rax, MmxControlWord
283 ldmxcsr [rax]
284 %endmacro
285
286 %macro ENABLE_AVX 0
287 mov r10, rcx
288 mov eax, 1
289 cpuid
290 and ecx, 10000000h
291 cmp ecx, 10000000h ; check AVX feature flag
292 je EnableAvx
293 AvxError:
294 ;
295 ; Processor has to support AVX
296 ;
297 jmp AvxError
298 EnableAvx:
299 ;
300 ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction
301 ;
302 mov rax, cr4
303 or rax, 00040000h
304 mov cr4, rax
305
306 mov rcx, 0 ; index 0
307 xgetbv ; result in edx:eax
308 or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state
309 xsetbv
310 mov rcx, r10
311 %endmacro
312