]>
Commit | Line | Data |
---|---|---|
c8ec22a2 JY |
1 | ;------------------------------------------------------------------------------\r |
2 | ;\r | |
9da59186 | 3 | ; Copyright (c) 2014 - 2015, Intel Corporation. All rights reserved.<BR>\r |
16a16ea6 | 4 | ; SPDX-License-Identifier: BSD-2-Clause-Patent\r |
c8ec22a2 JY |
5 | ;\r |
6 | ; Abstract:\r | |
7 | ;\r | |
8 | ; Provide macro for register save/restore using SSE registers\r | |
9 | ;\r | |
10 | ;------------------------------------------------------------------------------\r | |
11 | \r | |
12 | ;\r | |
13 | ; Define SSE instruction set\r | |
14 | ;\r | |
15 | IFDEF USE_SSE41_FLAG\r | |
16 | ;\r | |
17 | ; Define SSE macros using SSE 4.1 instructions\r | |
18 | ;\r | |
19 | SXMMN MACRO XMM, IDX, REG\r | |
20 | pinsrd XMM, REG, (IDX AND 3)\r | |
21 | ENDM\r | |
22 | \r | |
23 | LXMMN MACRO XMM, REG, IDX\r | |
24 | pextrd REG, XMM, (IDX AND 3)\r | |
25 | ENDM\r | |
26 | ELSE\r | |
27 | ;\r | |
28 | ; Define SSE macros using SSE 2 instructions\r | |
29 | ;\r | |
30 | SXMMN MACRO XMM, IDX, REG\r | |
31 | pinsrw XMM, REG, (IDX AND 3) * 2\r | |
32 | ror REG, 16\r | |
33 | pinsrw XMM, REG, (IDX AND 3) * 2 + 1\r | |
34 | rol REG, 16\r | |
35 | ENDM\r | |
36 | \r | |
37 | LXMMN MACRO XMM, REG, IDX\r | |
38 | pshufd XMM, XMM, (0E4E4E4h SHR (IDX * 2)) AND 0FFh\r | |
39 | movd REG, XMM\r | |
40 | pshufd XMM, XMM, (0E4E4E4h SHR (IDX * 2 + (IDX AND 1) * 4)) AND 0FFh\r | |
41 | ENDM\r | |
42 | ENDIF\r | |
43 | \r | |
9da59186 JY |
44 | ;\r |
45 | ; XMM7 to save/restore EBP, EBX, ESI, EDI\r | |
46 | ; \r | |
c8ec22a2 JY |
47 | SAVE_REGS MACRO\r |
48 | SXMMN xmm7, 0, ebp\r | |
49 | SXMMN xmm7, 1, ebx\r | |
50 | SXMMN xmm7, 2, esi\r | |
51 | SXMMN xmm7, 3, edi\r | |
52 | SAVE_ESP\r | |
53 | ENDM\r | |
54 | \r | |
55 | LOAD_REGS MACRO\r | |
56 | LXMMN xmm7, ebp, 0\r | |
57 | LXMMN xmm7, ebx, 1\r | |
58 | LXMMN xmm7, esi, 2\r | |
59 | LXMMN xmm7, edi, 3\r | |
60 | LOAD_ESP\r | |
61 | ENDM\r | |
62 | \r | |
9da59186 JY |
63 | ;\r |
64 | ; XMM6 to save/restore EAX, EDX, ECX, ESP\r | |
65 | ; \r | |
c8ec22a2 JY |
66 | LOAD_EAX MACRO\r |
67 | LXMMN xmm6, eax, 1\r | |
68 | ENDM\r | |
69 | \r | |
70 | SAVE_EAX MACRO\r | |
71 | SXMMN xmm6, 1, eax\r | |
72 | ENDM\r | |
73 | \r | |
74 | LOAD_EDX MACRO\r | |
75 | LXMMN xmm6, edx, 2\r | |
76 | ENDM\r | |
77 | \r | |
78 | SAVE_EDX MACRO\r | |
79 | SXMMN xmm6, 2, edx\r | |
80 | ENDM\r | |
81 | \r | |
82 | SAVE_ECX MACRO\r | |
83 | SXMMN xmm6, 3, ecx\r | |
84 | ENDM\r | |
85 | \r | |
86 | LOAD_ECX MACRO\r | |
87 | LXMMN xmm6, ecx, 3\r | |
88 | ENDM\r | |
89 | \r | |
90 | SAVE_ESP MACRO\r | |
91 | SXMMN xmm6, 0, esp\r | |
92 | ENDM\r | |
93 | \r | |
94 | LOAD_ESP MACRO\r | |
95 | movd esp, xmm6\r | |
96 | ENDM\r | |
9da59186 JY |
97 | \r |
98 | ;\r | |
99 | ; XMM5 for calling stack\r | |
100 | ;\r | |
101 | CALL_XMM MACRO Entry\r | |
102 | local ReturnAddress\r | |
103 | mov esi, offset ReturnAddress\r | |
104 | pslldq xmm5, 4\r | |
105 | IFDEF USE_SSE41_FLAG\r | |
106 | pinsrd xmm5, esi, 0\r | |
107 | ELSE \r | |
108 | pinsrw xmm5, esi, 0\r | |
109 | ror esi, 16\r | |
110 | pinsrw xmm5, esi, 1 \r | |
111 | ENDIF \r | |
112 | mov esi, Entry\r | |
113 | jmp esi\r | |
114 | ReturnAddress: \r | |
c8ec22a2 | 115 | ENDM\r |
9da59186 JY |
116 | \r |
117 | RET_XMM MACRO \r | |
118 | movd esi, xmm5\r | |
119 | psrldq xmm5, 4\r | |
120 | jmp esi\r | |
121 | ENDM\r | |
122 | \r | |
123 | ENABLE_SSE MACRO\r | |
124 | ;\r | |
125 | ; Initialize floating point units\r | |
126 | ;\r | |
127 | local NextAddress \r | |
128 | jmp NextAddress\r | |
129 | ALIGN 4\r | |
130 | ;\r | |
131 | ; Float control word initial value:\r | |
132 | ; all exceptions masked, double-precision, round-to-nearest\r | |
133 | ;\r | |
134 | FpuControlWord DW 027Fh\r | |
135 | ;\r | |
136 | ; Multimedia-extensions control word:\r | |
137 | ; all exceptions masked, round-to-nearest, flush to zero for masked underflow\r | |
138 | ;\r | |
139 | MmxControlWord DD 01F80h \r | |
140 | SseError: \r | |
141 | ;\r | |
142 | ; Processor has to support SSE\r | |
143 | ;\r | |
144 | jmp SseError \r | |
145 | NextAddress: \r | |
146 | finit\r | |
147 | fldcw FpuControlWord\r | |
148 | \r | |
149 | ;\r | |
150 | ; Use CpuId instructuion (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r | |
151 | ; whether the processor supports SSE instruction.\r | |
152 | ;\r | |
153 | mov eax, 1\r | |
154 | cpuid\r | |
155 | bt edx, 25\r | |
156 | jnc SseError\r | |
157 | \r | |
158 | IFDEF USE_SSE41_FLAG\r | |
159 | ;\r | |
160 | ; SSE 4.1 support\r | |
161 | ;\r | |
162 | bt ecx, 19 \r | |
163 | jnc SseError\r | |
164 | ENDIF\r | |
165 | \r | |
166 | ;\r | |
167 | ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r | |
168 | ;\r | |
169 | mov eax, cr4\r | |
170 | or eax, 00000600h\r | |
171 | mov cr4, eax\r | |
172 | \r | |
173 | ;\r | |
174 | ; The processor should support SSE instruction and we can use\r | |
175 | ; ldmxcsr instruction\r | |
176 | ;\r | |
177 | ldmxcsr MmxControlWord\r | |
178 | ENDM\r |