]>
Commit | Line | Data |
---|---|---|
00aa71ce TK |
1 | ;------------------------------------------------------------------------------\r |
2 | ;\r | |
3 | ; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>\r | |
4 | ; SPDX-License-Identifier: BSD-2-Clause-Patent\r | |
5 | ;\r | |
6 | ; Abstract:\r | |
7 | ;\r | |
8 | ; Provide macro for register save/restore using SSE registers\r | |
9 | ;\r | |
10 | ;------------------------------------------------------------------------------\r | |
11 | \r | |
12 | ;\r | |
13 | ; Define SSE and AVX instruction set\r | |
14 | ;\r | |
15 | ;\r | |
16 | ; Define SSE macros using SSE 4.1 instructions\r | |
17 | ; args 1:XMM, 2:IDX, 3:REG\r | |
18 | ;\r | |
19 | %macro SXMMN 3\r | |
20 | pinsrq %1, %3, (%2 & 3)\r | |
21 | %endmacro\r | |
22 | \r | |
23 | ;\r | |
24 | ; args 1:XMM, 2:REG, 3:IDX\r | |
25 | ;\r | |
26 | %macro LXMMN 3\r | |
27 | pextrq %2, %1, (%3 & 3)\r | |
28 | %endmacro\r | |
29 | \r | |
30 | ;\r | |
31 | ; Define AVX macros using AVX instructions\r | |
32 | ; Save XMM to YMM\r | |
33 | ; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM\r | |
34 | ;\r | |
35 | %macro SYMMN 3\r | |
36 | vinsertf128 %1, %1, %3, %2\r | |
37 | %endmacro\r | |
38 | \r | |
39 | ;\r | |
40 | ; Restore XMM from YMM\r | |
41 | ; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)\r | |
42 | ;\r | |
43 | %macro LYMMN 3\r | |
44 | vextractf128 %2, %1, %3\r | |
45 | %endmacro\r | |
46 | \r | |
47 | ;\r | |
48 | ; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.\r | |
49 | ; Modified: XMM5, YMM6, YMM7 and YMM8\r | |
50 | ;\r | |
51 | %macro SAVE_REGS 0\r | |
52 | SXMMN xmm5, 0, rbp\r | |
53 | SXMMN xmm5, 1, rbx\r | |
54 | SYMMN ymm7, 1, xmm5\r | |
55 | SXMMN xmm5, 0, rsi\r | |
56 | SXMMN xmm5, 1, rdi\r | |
57 | SYMMN ymm8, 1, xmm5\r | |
58 | SAVE_RSP\r | |
59 | %endmacro\r | |
60 | \r | |
61 | ;\r | |
62 | ; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.\r | |
63 | ; Modified: XMM5, RBP, RBX, RSI, RDI and RSP\r | |
64 | ;\r | |
65 | %macro LOAD_REGS 0\r | |
66 | LYMMN ymm7, xmm5, 1\r | |
67 | LXMMN xmm5, rbp, 0\r | |
68 | LXMMN xmm5, rbx, 1\r | |
69 | LYMMN ymm8, xmm5, 1\r | |
70 | LXMMN xmm5, rsi, 0\r | |
71 | LXMMN xmm5, rdi, 1\r | |
72 | LOAD_RSP\r | |
73 | %endmacro\r | |
74 | ;\r | |
75 | ; Restore RBP from YMM7[128:191]\r | |
76 | ; Modified: XMM5 and RBP\r | |
77 | ;\r | |
78 | %macro LOAD_RBP 0\r | |
79 | LYMMN ymm7, xmm5, 1\r | |
80 | movq rbp, xmm5\r | |
81 | %endmacro\r | |
82 | \r | |
83 | ;\r | |
84 | ; Restore RBX from YMM7[192:255]\r | |
85 | ; Modified: XMM5 and RBX\r | |
86 | ;\r | |
87 | %macro LOAD_RBX 0\r | |
88 | LYMMN ymm7, xmm5, 1\r | |
89 | LXMMN xmm5, rbx, 1\r | |
90 | %endmacro\r | |
91 | \r | |
92 | ;\r | |
93 | ; Upper half of YMM6 to save/restore Time Stamp, RSP\r | |
94 | ;\r | |
95 | ;\r | |
96 | ; Save Time Stamp to YMM6[192:255]\r | |
97 | ; arg 1:general purpose register which holds time stamp\r | |
98 | ; Modified: XMM5 and YMM6\r | |
99 | ;\r | |
100 | %macro SAVE_TS 1\r | |
101 | LYMMN ymm6, xmm5, 1\r | |
102 | SXMMN xmm5, 1, %1\r | |
103 | SYMMN ymm6, 1, xmm5\r | |
104 | %endmacro\r | |
105 | \r | |
106 | ;\r | |
107 | ; Restore Time Stamp from YMM6[192:255]\r | |
108 | ; arg 1:general purpose register where to save time stamp\r | |
109 | ; Modified: XMM5 and %1\r | |
110 | ;\r | |
111 | %macro LOAD_TS 1\r | |
112 | LYMMN ymm6, xmm5, 1\r | |
113 | LXMMN xmm5, %1, 1\r | |
114 | %endmacro\r | |
115 | \r | |
116 | ;\r | |
117 | ; Save RSP to YMM6[128:191]\r | |
118 | ; Modified: XMM5 and YMM6\r | |
119 | ;\r | |
120 | %macro SAVE_RSP 0\r | |
121 | LYMMN ymm6, xmm5, 1\r | |
122 | SXMMN xmm5, 0, rsp\r | |
123 | SYMMN ymm6, 1, xmm5\r | |
124 | %endmacro\r | |
125 | \r | |
126 | ;\r | |
127 | ; Restore RSP from YMM6[128:191]\r | |
128 | ; Modified: XMM5 and RSP\r | |
129 | ;\r | |
130 | %macro LOAD_RSP 0\r | |
131 | LYMMN ymm6, xmm5, 1\r | |
132 | movq rsp, xmm5\r | |
133 | %endmacro\r | |
134 | \r | |
135 | ;\r | |
136 | ; Upper half of YMM9 to save/restore UCODE status, BFV address\r | |
137 | ;\r | |
138 | ;\r | |
139 | ; Save uCode status to YMM9[192:255]\r | |
140 | ; arg 1:general purpose register which holds uCode status\r | |
141 | ; Modified: XMM5 and YMM9\r | |
142 | ;\r | |
143 | %macro SAVE_UCODE_STATUS 1\r | |
144 | LYMMN ymm9, xmm5, 1\r | |
145 | SXMMN xmm5, 0, %1\r | |
146 | SYMMN ymm9, 1, xmm5\r | |
147 | %endmacro\r | |
148 | \r | |
149 | ;\r | |
150 | ; Restore uCode status from YMM9[192:255]\r | |
151 | ; arg 1:general purpose register where to save uCode status\r | |
152 | ; Modified: XMM5 and %1\r | |
153 | ;\r | |
154 | %macro LOAD_UCODE_STATUS 1\r | |
155 | LYMMN ymm9, xmm5, 1\r | |
156 | movq %1, xmm5\r | |
157 | %endmacro\r | |
158 | \r | |
159 | ;\r | |
160 | ; Save BFV address to YMM9[128:191]\r | |
161 | ; arg 1:general purpose register which holds BFV address\r | |
162 | ; Modified: XMM5 and YMM9\r | |
163 | ;\r | |
164 | %macro SAVE_BFV 1\r | |
165 | LYMMN ymm9, xmm5, 1\r | |
166 | SXMMN xmm5, 1, %1\r | |
167 | SYMMN ymm9, 1, xmm5\r | |
168 | %endmacro\r | |
169 | \r | |
170 | ;\r | |
171 | ; Restore BFV address from YMM9[128:191]\r | |
172 | ; arg 1:general purpose register where to save BFV address\r | |
173 | ; Modified: XMM5 and %1\r | |
174 | ;\r | |
175 | %macro LOAD_BFV 1\r | |
176 | LYMMN ymm9, xmm5, 1\r | |
177 | LXMMN xmm5, %1, 1\r | |
178 | %endmacro\r | |
179 | \r | |
11d8abcb DC |
180 | ;\r |
181 | ; Upper half of YMM10 to save/restore RCX\r | |
182 | ;\r | |
183 | ;\r | |
184 | ; Save RCX to YMM10[128:191]\r | |
185 | ; Modified: XMM5 and YMM10\r | |
186 | ;\r | |
187 | \r | |
188 | %macro SAVE_RCX 0\r | |
189 | LYMMN ymm10, xmm5, 1\r | |
190 | SXMMN xmm5, 0, rcx\r | |
191 | SYMMN ymm10, 1, xmm5\r | |
192 | %endmacro\r | |
193 | \r | |
194 | ;\r | |
195 | ; Restore RCX from YMM10[128:191]\r | |
196 | ; Modified: XMM5 and RCX\r | |
197 | ;\r | |
198 | \r | |
199 | %macro LOAD_RCX 0\r | |
200 | LYMMN ymm10, xmm5, 1\r | |
201 | movq rcx, xmm5\r | |
202 | %endmacro\r | |
203 | \r | |
00aa71ce TK |
204 | ;\r |
205 | ; YMM7[128:191] for calling stack\r | |
206 | ; arg 1:Entry\r | |
207 | ; Modified: RSI, XMM5, YMM7\r | |
208 | ;\r | |
209 | %macro CALL_YMM 1\r | |
210 | mov rsi, %%ReturnAddress\r | |
211 | LYMMN ymm7, xmm5, 1\r | |
212 | SXMMN xmm5, 0, rsi\r | |
213 | SYMMN ymm7, 1, xmm5\r | |
214 | mov rsi, %1\r | |
215 | jmp rsi\r | |
216 | %%ReturnAddress:\r | |
217 | %endmacro\r | |
218 | ;\r | |
219 | ; Restore RIP from YMM7[128:191]\r | |
220 | ; Modified: RSI, XMM5\r | |
221 | ;\r | |
222 | %macro RET_YMM 0\r | |
223 | LYMMN ymm7, xmm5, 1\r | |
224 | movq rsi, xmm5\r | |
225 | jmp rsi\r | |
226 | %endmacro\r | |
227 | \r | |
228 | %macro ENABLE_SSE 0\r | |
229 | ;\r | |
230 | ; Initialize floating point units\r | |
231 | ;\r | |
232 | jmp NextAddress\r | |
233 | align 4\r | |
234 | ;\r | |
235 | ; Float control word initial value:\r | |
236 | ; all exceptions masked, double-precision, round-to-nearest\r | |
237 | ;\r | |
238 | FpuControlWord DW 027Fh\r | |
239 | ;\r | |
240 | ; Multimedia-extensions control word:\r | |
241 | ; all exceptions masked, round-to-nearest, flush to zero for masked underflow\r | |
242 | ;\r | |
243 | MmxControlWord DQ 01F80h\r | |
244 | SseError:\r | |
245 | ;\r | |
246 | ; Processor has to support SSE\r | |
247 | ;\r | |
248 | jmp SseError\r | |
249 | NextAddress:\r | |
250 | finit\r | |
251 | mov rax, FpuControlWord\r | |
252 | fldcw [rax]\r | |
253 | \r | |
254 | ;\r | |
255 | ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r | |
256 | ; whether the processor supports SSE instruction.\r | |
257 | ;\r | |
11d8abcb | 258 | mov r10, rcx\r |
00aa71ce TK |
259 | mov rax, 1\r |
260 | cpuid\r | |
261 | bt rdx, 25\r | |
262 | jnc SseError\r | |
263 | \r | |
264 | ;\r | |
265 | ; SSE 4.1 support\r | |
266 | ;\r | |
267 | bt ecx, 19\r | |
268 | jnc SseError\r | |
11d8abcb | 269 | mov rcx, r10\r |
00aa71ce TK |
270 | \r |
271 | ;\r | |
272 | ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r | |
273 | ;\r | |
274 | mov rax, cr4\r | |
275 | or rax, 00000600h\r | |
276 | mov cr4, rax\r | |
277 | \r | |
278 | ;\r | |
279 | ; The processor should support SSE instruction and we can use\r | |
280 | ; ldmxcsr instruction\r | |
281 | ;\r | |
282 | mov rax, MmxControlWord\r | |
283 | ldmxcsr [rax]\r | |
284 | %endmacro\r | |
285 | \r | |
286 | %macro ENABLE_AVX 0\r | |
11d8abcb | 287 | mov r10, rcx\r |
00aa71ce TK |
288 | mov eax, 1\r |
289 | cpuid\r | |
290 | and ecx, 10000000h\r | |
291 | cmp ecx, 10000000h ; check AVX feature flag\r | |
292 | je EnableAvx\r | |
293 | AvxError:\r | |
294 | ;\r | |
295 | ; Processor has to support AVX\r | |
296 | ;\r | |
297 | jmp AvxError\r | |
298 | EnableAvx:\r | |
299 | ;\r | |
300 | ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction\r | |
301 | ;\r | |
302 | mov rax, cr4\r | |
303 | or rax, 00040000h\r | |
304 | mov cr4, rax\r | |
305 | \r | |
306 | mov rcx, 0 ; index 0\r | |
307 | xgetbv ; result in edx:eax\r | |
308 | or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state\r | |
309 | xsetbv\r | |
11d8abcb | 310 | mov rcx, r10\r |
00aa71ce TK |
311 | %endmacro\r |
312 | \r |