]>
Commit | Line | Data |
---|---|---|
cf1d4549 JY |
1 | ;------------------------------------------------------------------------------\r |
2 | ;\r | |
3 | ; Copyright (c) 2015, Intel Corporation. All rights reserved.<BR>\r | |
4 | ; This program and the accompanying materials\r | |
5 | ; are licensed and made available under the terms and conditions of the BSD License\r | |
6 | ; which accompanies this distribution. The full text of the license may be found at\r | |
7 | ; http://opensource.org/licenses/bsd-license.php.\r | |
8 | ;\r | |
9 | ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r | |
10 | ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r | |
11 | ;\r | |
12 | ; Abstract:\r | |
13 | ;\r | |
14 | ; Provide macro for register save/restore using SSE registers\r | |
15 | ;\r | |
16 | ;------------------------------------------------------------------------------\r | |
17 | \r | |
18 | ;\r | |
19 | ; Define SSE instruction set\r | |
20 | ;\r | |
21 | %ifdef USE_SSE41_FLAG\r | |
22 | ;\r | |
23 | ; Define SSE macros using SSE 4.1 instructions\r | |
24 | ; args 1:XMM, 2:IDX, 3:REG\r | |
25 | %macro SXMMN 3\r | |
26 | pinsrd %1, %3, (%2 & 3)\r | |
27 | %endmacro\r | |
28 | \r | |
29 | ;\r | |
30 | ;args 1:XMM, 2:REG, 3:IDX\r | |
31 | ;\r | |
32 | %macro LXMMN 3\r | |
33 | pextrd %2, %1, (%3 & 3)\r | |
34 | %endmacro\r | |
35 | %else\r | |
36 | ;\r | |
37 | ; Define SSE macros using SSE 2 instructions\r | |
38 | ; args 1:XMM, 2:IDX, 3:REG\r | |
39 | %macro SXMMN 3\r | |
40 | pinsrw %1, %3, (%2 & 3) * 2\r | |
41 | ror %3, 16\r | |
42 | pinsrw %1, %3, (%2 & 3) * 2 + 1\r | |
43 | rol %3, 16\r | |
44 | %endmacro\r | |
45 | \r | |
46 | ;\r | |
47 | ;args 1:XMM, 2:REG, 3:IDX\r | |
48 | ;\r | |
49 | %macro LXMMN 3\r | |
50 | pshufd %1, %1, ((0E4E4E4h >> (%3 * 2)) & 0FFh)\r | |
51 | movd %2, %1\r | |
52 | pshufd %1, %1, ((0E4E4E4h >> (%3 * 2 + (%3 & 1) * 4)) & 0FFh)\r | |
53 | %endmacro\r | |
54 | %endif\r | |
55 | \r | |
56 | ;\r | |
57 | ; XMM7 to save/restore EBP, EBX, ESI, EDI\r | |
58 | ;\r | |
59 | %macro SAVE_REGS 0\r | |
60 | SXMMN xmm7, 0, ebp\r | |
61 | SXMMN xmm7, 1, ebx\r | |
62 | SXMMN xmm7, 2, esi\r | |
63 | SXMMN xmm7, 3, edi\r | |
64 | SAVE_ESP\r | |
65 | %endmacro\r | |
66 | \r | |
67 | %macro LOAD_REGS 0\r | |
68 | LXMMN xmm7, ebp, 0\r | |
69 | LXMMN xmm7, ebx, 1\r | |
70 | LXMMN xmm7, esi, 2\r | |
71 | LXMMN xmm7, edi, 3\r | |
72 | LOAD_ESP\r | |
73 | %endmacro\r | |
74 | \r | |
75 | ;\r | |
76 | ; XMM6 to save/restore EAX, EDX, ECX, ESP\r | |
77 | ;\r | |
78 | %macro LOAD_EAX 0\r | |
79 | LXMMN xmm6, eax, 1\r | |
80 | %endmacro\r | |
81 | \r | |
82 | %macro SAVE_EAX 0\r | |
83 | SXMMN xmm6, 1, eax\r | |
84 | %endmacro\r | |
85 | \r | |
86 | %macro LOAD_EDX 0\r | |
87 | LXMMN xmm6, edx, 2\r | |
88 | %endmacro\r | |
89 | \r | |
90 | %macro SAVE_EDX 0\r | |
91 | SXMMN xmm6, 2, edx\r | |
92 | %endmacro\r | |
93 | \r | |
94 | %macro SAVE_ECX 0\r | |
95 | SXMMN xmm6, 3, ecx\r | |
96 | %endmacro\r | |
97 | \r | |
98 | %macro LOAD_ECX 0\r | |
99 | LXMMN xmm6, ecx, 3\r | |
100 | %endmacro\r | |
101 | \r | |
102 | %macro SAVE_ESP 0\r | |
103 | SXMMN xmm6, 0, esp\r | |
104 | %endmacro\r | |
105 | \r | |
106 | %macro LOAD_ESP 0\r | |
107 | movd esp, xmm6\r | |
108 | %endmacro\r | |
109 | ;\r | |
110 | ; XMM5 for calling stack\r | |
111 | ; arg 1:Entry\r | |
112 | %macro CALL_XMM 1\r | |
113 | mov esi, %%ReturnAddress\r | |
114 | pslldq xmm5, 4\r | |
115 | %ifdef USE_SSE41_FLAG\r | |
116 | pinsrd xmm5, esi, 0\r | |
117 | %else\r | |
118 | pinsrw xmm5, esi, 0\r | |
119 | ror esi, 16\r | |
120 | pinsrw xmm5, esi, 1\r | |
121 | %endif\r | |
122 | mov esi, %1\r | |
123 | jmp esi\r | |
124 | %%ReturnAddress:\r | |
125 | %endmacro\r | |
126 | \r | |
127 | %macro RET_XMM 0\r | |
128 | movd esi, xmm5\r | |
129 | psrldq xmm5, 4\r | |
130 | jmp esi\r | |
131 | %endmacro\r | |
132 | \r | |
133 | %macro ENABLE_SSE 0\r | |
134 | ;\r | |
135 | ; Initialize floating point units\r | |
136 | ;\r | |
137 | jmp NextAddress\r | |
138 | align 4\r | |
139 | ;\r | |
140 | ; Float control word initial value:\r | |
141 | ; all exceptions masked, double-precision, round-to-nearest\r | |
142 | ;\r | |
143 | FpuControlWord DW 027Fh\r | |
144 | ;\r | |
145 | ; Multimedia-extensions control word:\r | |
146 | ; all exceptions masked, round-to-nearest, flush to zero for masked underflow\r | |
147 | ;\r | |
148 | MmxControlWord DD 01F80h\r | |
149 | SseError:\r | |
150 | ;\r | |
151 | ; Processor has to support SSE\r | |
152 | ;\r | |
153 | jmp SseError\r | |
154 | NextAddress:\r | |
155 | finit\r | |
156 | fldcw [FpuControlWord]\r | |
157 | \r | |
158 | ;\r | |
159 | ; Use CpuId instructuion (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r | |
160 | ; whether the processor supports SSE instruction.\r | |
161 | ;\r | |
162 | mov eax, 1\r | |
163 | cpuid\r | |
164 | bt edx, 25\r | |
165 | jnc SseError\r | |
166 | \r | |
167 | %ifdef USE_SSE41_FLAG\r | |
168 | ;\r | |
169 | ; SSE 4.1 support\r | |
170 | ;\r | |
171 | bt ecx, 19\r | |
172 | jnc SseError\r | |
173 | %endif\r | |
174 | \r | |
175 | ;\r | |
176 | ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r | |
177 | ;\r | |
178 | mov eax, cr4\r | |
179 | or eax, 00000600h\r | |
180 | mov cr4, eax\r | |
181 | \r | |
182 | ;\r | |
183 | ; The processor should support SSE instruction and we can use\r | |
184 | ; ldmxcsr instruction\r | |
185 | ;\r | |
186 | ldmxcsr [MmxControlWord]\r | |
187 | %endmacro\r |