]>
Commit | Line | Data |
---|---|---|
1 | ;------------------------------------------------------------------------------\r | |
2 | ;\r | |
3 | ; Copyright (c) 2015 - 2022, Intel Corporation. All rights reserved.<BR>\r | |
4 | ; SPDX-License-Identifier: BSD-2-Clause-Patent\r | |
5 | ;\r | |
6 | ; Abstract:\r | |
7 | ;\r | |
8 | ; Provide macro for register save/restore using SSE registers\r | |
9 | ;\r | |
10 | ;------------------------------------------------------------------------------\r | |
11 | \r | |
12 | ;\r | |
13 | ; Define SSE instruction set\r | |
14 | ;\r | |
15 | %ifdef USE_SSE41_FLAG\r | |
16 | ;\r | |
17 | ; Define SSE macros using SSE 4.1 instructions\r | |
18 | ; args 1:XMM, 2:IDX, 3:REG\r | |
19 | %macro SXMMN 3\r | |
20 | pinsrd %1, %3, (%2 & 3)\r | |
21 | %endmacro\r | |
22 | \r | |
23 | ;\r | |
24 | ;args 1:XMM, 2:REG, 3:IDX\r | |
25 | ;\r | |
26 | %macro LXMMN 3\r | |
27 | pextrd %2, %1, (%3 & 3)\r | |
28 | %endmacro\r | |
29 | %else\r | |
30 | ;\r | |
31 | ; Define SSE macros using SSE 2 instructions\r | |
32 | ; args 1:XMM, 2:IDX, 3:REG\r | |
33 | %macro SXMMN 3\r | |
34 | pinsrw %1, %3, (%2 & 3) * 2\r | |
35 | ror %3, 16\r | |
36 | pinsrw %1, %3, (%2 & 3) * 2 + 1\r | |
37 | rol %3, 16\r | |
38 | %endmacro\r | |
39 | \r | |
40 | ;\r | |
41 | ;args 1:XMM, 2:REG, 3:IDX\r | |
42 | ;\r | |
43 | %macro LXMMN 3\r | |
44 | pshufd %1, %1, ((0E4E4E4h >> (%3 * 2)) & 0FFh)\r | |
45 | movd %2, %1\r | |
46 | pshufd %1, %1, ((0E4E4E4h >> (%3 * 2 + (%3 & 1) * 4)) & 0FFh)\r | |
47 | %endmacro\r | |
48 | %endif\r | |
49 | \r | |
50 | ;\r | |
51 | ; XMM7 to save/restore EBP - slot 0, EBX - slot 1, ESI - slot 2, EDI - slot 3\r | |
52 | ;\r | |
53 | %macro SAVE_REGS 0\r | |
54 | SXMMN xmm7, 0, ebp\r | |
55 | SXMMN xmm7, 1, ebx\r | |
56 | SXMMN xmm7, 2, esi\r | |
57 | SXMMN xmm7, 3, edi\r | |
58 | SAVE_ESP\r | |
59 | %endmacro\r | |
60 | \r | |
61 | %macro LOAD_REGS 0\r | |
62 | LXMMN xmm7, ebp, 0\r | |
63 | LXMMN xmm7, ebx, 1\r | |
64 | LXMMN xmm7, esi, 2\r | |
65 | LXMMN xmm7, edi, 3\r | |
66 | LOAD_ESP\r | |
67 | %endmacro\r | |
68 | \r | |
69 | ;\r | |
70 | ; XMM6 to save/restore ESP - slot 0, EAX - slot 1, EDX - slot 2, ECX - slot 3\r | |
71 | ;\r | |
72 | %macro LOAD_ESP 0\r | |
73 | movd esp, xmm6\r | |
74 | %endmacro\r | |
75 | \r | |
76 | %macro SAVE_ESP 0\r | |
77 | SXMMN xmm6, 0, esp\r | |
78 | %endmacro\r | |
79 | \r | |
80 | %macro LOAD_EAX 0\r | |
81 | LXMMN xmm6, eax, 1\r | |
82 | %endmacro\r | |
83 | \r | |
84 | %macro SAVE_EAX 0\r | |
85 | SXMMN xmm6, 1, eax\r | |
86 | %endmacro\r | |
87 | \r | |
88 | %macro LOAD_EDX 0\r | |
89 | LXMMN xmm6, edx, 2\r | |
90 | %endmacro\r | |
91 | \r | |
92 | %macro SAVE_EDX 0\r | |
93 | SXMMN xmm6, 2, edx\r | |
94 | %endmacro\r | |
95 | \r | |
96 | %macro LOAD_ECX 0\r | |
97 | LXMMN xmm6, ecx, 3\r | |
98 | %endmacro\r | |
99 | \r | |
100 | %macro SAVE_ECX 0\r | |
101 | SXMMN xmm6, 3, ecx\r | |
102 | %endmacro\r | |
103 | \r | |
104 | ;\r | |
105 | ; XMM5 slot 0 for calling stack\r | |
106 | ; arg 1:Entry\r | |
107 | %macro CALL_XMM 1\r | |
108 | mov esi, %%ReturnAddress\r | |
109 | SXMMN xmm5, 0, esi\r | |
110 | mov esi, %1\r | |
111 | jmp esi\r | |
112 | %%ReturnAddress:\r | |
113 | %endmacro\r | |
114 | \r | |
115 | %macro RET_XMM 0\r | |
116 | LXMMN xmm5, esi, 0\r | |
117 | jmp esi\r | |
118 | %endmacro\r | |
119 | \r | |
120 | ;\r | |
121 | ; XMM5 slot 1 for uCode status\r | |
122 | ;\r | |
123 | %macro LOAD_UCODE_STATUS 0\r | |
124 | LXMMN xmm5, eax, 1\r | |
125 | %endmacro\r | |
126 | \r | |
127 | %macro SAVE_UCODE_STATUS 0\r | |
128 | SXMMN xmm5, 1, eax\r | |
129 | %endmacro\r | |
130 | \r | |
131 | %macro ENABLE_SSE 0\r | |
132 | ;\r | |
133 | ; Initialize floating point units\r | |
134 | ;\r | |
135 | jmp NextAddress\r | |
136 | align 4\r | |
137 | ;\r | |
138 | ; Float control word initial value:\r | |
139 | ; all exceptions masked, double-precision, round-to-nearest\r | |
140 | ;\r | |
141 | FpuControlWord DW 027Fh\r | |
142 | ;\r | |
143 | ; Multimedia-extensions control word:\r | |
144 | ; all exceptions masked, round-to-nearest, flush to zero for masked underflow\r | |
145 | ;\r | |
146 | MmxControlWord DD 01F80h\r | |
147 | SseError:\r | |
148 | ;\r | |
149 | ; Processor has to support SSE\r | |
150 | ;\r | |
151 | jmp SseError\r | |
152 | NextAddress:\r | |
153 | finit\r | |
154 | fldcw [FpuControlWord]\r | |
155 | \r | |
156 | ;\r | |
157 | ; Use CpuId instruction (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r | |
158 | ; whether the processor supports SSE instruction.\r | |
159 | ;\r | |
160 | mov eax, 1\r | |
161 | cpuid\r | |
162 | bt edx, 25\r | |
163 | jnc SseError\r | |
164 | \r | |
165 | %ifdef USE_SSE41_FLAG\r | |
166 | ;\r | |
167 | ; SSE 4.1 support\r | |
168 | ;\r | |
169 | bt ecx, 19\r | |
170 | jnc SseError\r | |
171 | %endif\r | |
172 | \r | |
173 | ;\r | |
174 | ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r | |
175 | ;\r | |
176 | mov eax, cr4\r | |
177 | or eax, 00000600h\r | |
178 | mov cr4, eax\r | |
179 | \r | |
180 | ;\r | |
181 | ; The processor should support SSE instruction and we can use\r | |
182 | ; ldmxcsr instruction\r | |
183 | ;\r | |
184 | ldmxcsr [MmxControlWord]\r | |
185 | %endmacro\r |