1 ;------------------------------------------------------------------------------
3 ; Copyright (c) 2022, Intel Corporation. All rights reserved.<BR>
4 ; SPDX-License-Identifier: BSD-2-Clause-Patent
8 ; Provide macro for register save/restore using SSE registers
10 ;------------------------------------------------------------------------------
13 ; Define SSE and AVX instruction set
16 ; Define SSE macros using SSE 4.1 instructions
17 ; args 1:XMM, 2:IDX, 3:REG
20 pinsrq %1, %3, (%2 & 3)
24 ; args 1:XMM, 2:REG, 3:IDX
27 pextrq %2, %1, (%3 & 3)
31 ; Define AVX macros using AVX instructions
33 ; args 1:YMM, 2:IDX (0 - lower 128bits, 1 - upper 128bits), 3:XMM
36 vinsertf128 %1, %1, %3, %2
40 ; Restore XMM from YMM
41 ; args 1:YMM, 2:XMM, 3:IDX (0 - lower 128bits, 1 - upper 128bits)
44 vextractf128 %2, %1, %3
48 ; Upper half of YMM7 to save RBP and RBX. Upper half of YMM8 to save RSI and RDI.
49 ; Modified: XMM5, YMM6, YMM7 and YMM8
62 ; Upper half of YMM7 to restore RBP and RBX. Upper half of YMM8 to restore RSI and RDI.
63 ; Modified: XMM5, RBP, RBX, RSI, RDI and RSP
75 ; Restore RBP from YMM7[128:191]
76 ; Modified: XMM5 and RBP
84 ; Restore RBX from YMM7[192:255]
85 ; Modified: XMM5 and RBX
93 ; Upper half of YMM6 to save/restore Time Stamp, RSP
96 ; Save Time Stamp to YMM6[192:255]
97 ; arg 1:general purpose register which holds time stamp
98 ; Modified: XMM5 and YMM6
107 ; Restore Time Stamp from YMM6[192:255]
108 ; arg 1:general purpose register where to save time stamp
109 ; Modified: XMM5 and %1
117 ; Save RSP to YMM6[128:191]
118 ; Modified: XMM5 and YMM6
127 ; Restore RSP from YMM6[128:191]
128 ; Modified: XMM5 and RSP
136 ; Upper half of YMM9 to save/restore UCODE status, BFV address
139 ; Save uCode status to YMM9[192:255]
140 ; arg 1:general purpose register which holds uCode status
141 ; Modified: XMM5 and YMM9
143 %macro SAVE_UCODE_STATUS 1
150 ; Restore uCode status from YMM9[192:255]
151 ; arg 1:general purpose register where to save uCode status
152 ; Modified: XMM5 and %1
154 %macro LOAD_UCODE_STATUS 1
160 ; Save BFV address to YMM9[128:191]
161 ; arg 1:general purpose register which holds BFV address
162 ; Modified: XMM5 and YMM9
171 ; Restore BFV address from YMM9[128:191]
172 ; arg 1:general purpose register where to save BFV address
173 ; Modified: XMM5 and %1
181 ; YMM7[128:191] for calling stack
183 ; Modified: RSI, XMM5, YMM7
186 mov rsi, %%ReturnAddress
195 ; Restore RIP from YMM7[128:191]
196 ; Modified: RSI, XMM5
206 ; Initialize floating point units
211 ; Float control word initial value:
212 ; all exceptions masked, double-precision, round-to-nearest
214 FpuControlWord DW 027Fh
216 ; Multimedia-extensions control word:
217 ; all exceptions masked, round-to-nearest, flush to zero for masked underflow
219 MmxControlWord DQ 01F80h
222 ; Processor has to support SSE
227 mov rax, FpuControlWord
231 ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test
232 ; whether the processor supports SSE instruction.
246 ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)
253 ; The processor should support SSE instruction and we can use
254 ; ldmxcsr instruction
256 mov rax, MmxControlWord
264 cmp ecx, 10000000h ; check AVX feature flag
268 ; Processor has to support AVX
273 ; Set OSXSAVE bit (bit #18) to enable xgetbv/xsetbv instruction
280 xgetbv ; result in edx:eax
281 or eax, 00000006h ; Set XCR0 bit #1 and bit #2 to enable SSE state and AVX state