]> git.proxmox.com Git - mirror_edk2.git/blob - IntelFsp2Pkg/Include/SaveRestoreSseAvxNasm.inc
IntelFsp2Pkg: FspSecCore support for X64
[mirror_edk2.git] / IntelFsp2Pkg / Include / SaveRestoreSseAvxNasm.inc
1 ;------------------------------------------------------------------------------
2 ;
3 ; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>
4 ; SPDX-License-Identifier: BSD-2-Clause-Patent
5 ;
6 ; Abstract:
7 ;
8 ; Provide macro for register save/restore using SSE registers
9 ;
10 ;------------------------------------------------------------------------------
11
12 ;
13 ; Define SSE and AVX instruction set
14 ;
15 ;
16 ; Define SSE macros using SSE 4.1 instructions
17 ; args 1:XMM, 2:IDX, 3:REG
18 ;
19 %macro SXMMN 3
20 pinsrq %1, %3, (%2 & 3)
21 %endmacro
22
23 ;
24 ; args 1:XMM, 2:REG, 3:IDX
25 ;
26 %macro LXMMN 3
27 pextrq %2, %1, (%3 & 3)
28 %endmacro
29
30 ;
31 ; Define AVX macros using AVX instructions
32 ; Save XMM to YMM
33 ; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM
34 ;
35 %macro SYMMN 3
36 vinsertf128 %1, %1, %3, %2
37 %endmacro
38
39 ;
40 ; Restore XMM from YMM
41 ; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)
42 ;
43 %macro LYMMN 3
44 vextractf128 %2, %1, %3
45 %endmacro
46
47 ;
48 ; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.
49 ; Modified: XMM5, YMM6, YMM7 and YMM8
50 ;
51 %macro SAVE_REGS 0
52 SXMMN xmm5, 0, rbp
53 SXMMN xmm5, 1, rbx
54 SYMMN ymm7, 1, xmm5
55 SXMMN xmm5, 0, rsi
56 SXMMN xmm5, 1, rdi
57 SYMMN ymm8, 1, xmm5
58 SAVE_RSP
59 %endmacro
60
61 ;
62 ; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.
63 ; Modified: XMM5, RBP, RBX, RSI, RDI and RSP
64 ;
65 %macro LOAD_REGS 0
66 LYMMN ymm7, xmm5, 1
67 LXMMN xmm5, rbp, 0
68 LXMMN xmm5, rbx, 1
69 LYMMN ymm8, xmm5, 1
70 LXMMN xmm5, rsi, 0
71 LXMMN xmm5, rdi, 1
72 LOAD_RSP
73 %endmacro
74 ;
75 ; Restore RBP from YMM7[128:191]
76 ; Modified: XMM5 and RBP
77 ;
78 %macro LOAD_RBP 0
79 LYMMN ymm7, xmm5, 1
80 movq rbp, xmm5
81 %endmacro
82
83 ;
84 ; Restore RBX from YMM7[192:255]
85 ; Modified: XMM5 and RBX
86 ;
87 %macro LOAD_RBX 0
88 LYMMN ymm7, xmm5, 1
89 LXMMN xmm5, rbx, 1
90 %endmacro
91
92 ;
93 ; Upper half of YMM6 to save/restore Time Stamp, RSP
94 ;
95 ;
96 ; Save Time Stamp to YMM6[192:255]
97 ; arg 1:general purpose register which holds time stamp
98 ; Modified: XMM5 and YMM6
99 ;
100 %macro SAVE_TS 1
101 LYMMN ymm6, xmm5, 1
102 SXMMN xmm5, 1, %1
103 SYMMN ymm6, 1, xmm5
104 %endmacro
105
106 ;
107 ; Restore Time Stamp from YMM6[192:255]
108 ; arg 1:general purpose register where to save time stamp
109 ; Modified: XMM5 and %1
110 ;
111 %macro LOAD_TS 1
112 LYMMN ymm6, xmm5, 1
113 LXMMN xmm5, %1, 1
114 %endmacro
115
116 ;
117 ; Save RSP to YMM6[128:191]
118 ; Modified: XMM5 and YMM6
119 ;
120 %macro SAVE_RSP 0
121 LYMMN ymm6, xmm5, 1
122 SXMMN xmm5, 0, rsp
123 SYMMN ymm6, 1, xmm5
124 %endmacro
125
126 ;
127 ; Restore RSP from YMM6[128:191]
128 ; Modified: XMM5 and RSP
129 ;
130 %macro LOAD_RSP 0
131 LYMMN ymm6, xmm5, 1
132 movq rsp, xmm5
133 %endmacro
134
135 ;
136 ; Upper half of YMM9 to save/restore UCODE status, BFV address
137 ;
138 ;
139 ; Save uCode status to YMM9[192:255]
140 ; arg 1:general purpose register which holds uCode status
141 ; Modified: XMM5 and YMM9
142 ;
143 %macro SAVE_UCODE_STATUS 1
144 LYMMN ymm9, xmm5, 1
145 SXMMN xmm5, 0, %1
146 SYMMN ymm9, 1, xmm5
147 %endmacro
148
149 ;
150 ; Restore uCode status from YMM9[192:255]
151 ; arg 1:general purpose register where to save uCode status
152 ; Modified: XMM5 and %1
153 ;
154 %macro LOAD_UCODE_STATUS 1
155 LYMMN ymm9, xmm5, 1
156 movq %1, xmm5
157 %endmacro
158
159 ;
160 ; Save BFV address to YMM9[128:191]
161 ; arg 1:general purpose register which holds BFV address
162 ; Modified: XMM5 and YMM9
163 ;
164 %macro SAVE_BFV 1
165 LYMMN ymm9, xmm5, 1
166 SXMMN xmm5, 1, %1
167 SYMMN ymm9, 1, xmm5
168 %endmacro
169
170 ;
171 ; Restore BFV address from YMM9[128:191]
172 ; arg 1:general purpose register where to save BFV address
173 ; Modified: XMM5 and %1
174 ;
175 %macro LOAD_BFV 1
176 LYMMN ymm9, xmm5, 1
177 LXMMN xmm5, %1, 1
178 %endmacro
179
180 ;
181 ; YMM7[128:191] for calling stack
182 ; arg 1:Entry
183 ; Modified: RSI, XMM5, YMM7
184 ;
185 %macro CALL_YMM 1
186 mov rsi, %%ReturnAddress
187 LYMMN ymm7, xmm5, 1
188 SXMMN xmm5, 0, rsi
189 SYMMN ymm7, 1, xmm5
190 mov rsi, %1
191 jmp rsi
192 %%ReturnAddress:
193 %endmacro
194 ;
195 ; Restore RIP from YMM7[128:191]
196 ; Modified: RSI, XMM5
197 ;
198 %macro RET_YMM 0
199 LYMMN ymm7, xmm5, 1
200 movq rsi, xmm5
201 jmp rsi
202 %endmacro
203
204 %macro ENABLE_SSE 0
205 ;
206 ; Initialize floating point units
207 ;
208 jmp NextAddress
209 align 4
210 ;
211 ; Float control word initial value:
212 ; all exceptions masked, double-precision, round-to-nearest
213 ;
214 FpuControlWord DW 027Fh
215 ;
216 ; Multimedia-extensions control word:
217 ; all exceptions masked, round-to-nearest, flush to zero for masked underflow
218 ;
219 MmxControlWord DQ 01F80h
220 SseError:
221 ;
222 ; Processor has to support SSE
223 ;
224 jmp SseError
225 NextAddress:
226 finit
227 mov rax, FpuControlWord
228 fldcw [rax]
229
230 ;
231 ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test
232 ; whether the processor supports SSE instruction.
233 ;
234 mov rax, 1
235 cpuid
236 bt rdx, 25
237 jnc SseError
238
239 ;
240 ; SSE 4.1 support
241 ;
242 bt ecx, 19
243 jnc SseError
244
245 ;
246 ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
247 ;
248 mov rax, cr4
249 or rax, 00000600h
250 mov cr4, rax
251
252 ;
253 ; The processor should support SSE instruction and we can use
254 ; ldmxcsr instruction
255 ;
256 mov rax, MmxControlWord
257 ldmxcsr [rax]
258 %endmacro
259
260 %macro ENABLE_AVX 0
261 mov eax, 1
262 cpuid
263 and ecx, 10000000h
264 cmp ecx, 10000000h ; check AVX feature flag
265 je EnableAvx
266 AvxError:
267 ;
268 ; Processor has to support AVX
269 ;
270 jmp AvxError
271 EnableAvx:
272 ;
273 ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction
274 ;
275 mov rax, cr4
276 or rax, 00040000h
277 mov cr4, rax
278
279 mov rcx, 0 ; index 0
280 xgetbv ; result in edx:eax
281 or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state
282 xsetbv
283 %endmacro
284