]>
Commit | Line | Data |
---|---|---|
c8ec22a2 JY |
1 | ;------------------------------------------------------------------------------\r |
2 | ;\r | |
9da59186 | 3 | ; Copyright (c) 2014 - 2015, Intel Corporation. All rights reserved.<BR>\r |
c8ec22a2 JY |
4 | ; This program and the accompanying materials\r |
5 | ; are licensed and made available under the terms and conditions of the BSD License\r | |
6 | ; which accompanies this distribution. The full text of the license may be found at\r | |
7 | ; http://opensource.org/licenses/bsd-license.php.\r | |
8 | ;\r | |
9 | ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r | |
10 | ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r | |
11 | ;\r | |
12 | ; Abstract:\r | |
13 | ;\r | |
14 | ; Provide macro for register save/restore using SSE registers\r | |
15 | ;\r | |
16 | ;------------------------------------------------------------------------------\r | |
17 | \r | |
18 | ;\r | |
19 | ; Define SSE instruction set\r | |
20 | ;\r | |
21 | IFDEF USE_SSE41_FLAG\r | |
22 | ;\r | |
23 | ; Define SSE macros using SSE 4.1 instructions\r | |
24 | ;\r | |
25 | SXMMN MACRO XMM, IDX, REG\r | |
26 | pinsrd XMM, REG, (IDX AND 3)\r | |
27 | ENDM\r | |
28 | \r | |
29 | LXMMN MACRO XMM, REG, IDX\r | |
30 | pextrd REG, XMM, (IDX AND 3)\r | |
31 | ENDM\r | |
32 | ELSE\r | |
33 | ;\r | |
34 | ; Define SSE macros using SSE 2 instructions\r | |
35 | ;\r | |
36 | SXMMN MACRO XMM, IDX, REG\r | |
37 | pinsrw XMM, REG, (IDX AND 3) * 2\r | |
38 | ror REG, 16\r | |
39 | pinsrw XMM, REG, (IDX AND 3) * 2 + 1\r | |
40 | rol REG, 16\r | |
41 | ENDM\r | |
42 | \r | |
43 | LXMMN MACRO XMM, REG, IDX\r | |
44 | pshufd XMM, XMM, (0E4E4E4h SHR (IDX * 2)) AND 0FFh\r | |
45 | movd REG, XMM\r | |
46 | pshufd XMM, XMM, (0E4E4E4h SHR (IDX * 2 + (IDX AND 1) * 4)) AND 0FFh\r | |
47 | ENDM\r | |
48 | ENDIF\r | |
49 | \r | |
9da59186 JY |
50 | ;\r |
51 | ; XMM7 to save/restore EBP, EBX, ESI, EDI\r | |
52 | ; \r | |
c8ec22a2 JY |
53 | SAVE_REGS MACRO\r |
54 | SXMMN xmm7, 0, ebp\r | |
55 | SXMMN xmm7, 1, ebx\r | |
56 | SXMMN xmm7, 2, esi\r | |
57 | SXMMN xmm7, 3, edi\r | |
58 | SAVE_ESP\r | |
59 | ENDM\r | |
60 | \r | |
61 | LOAD_REGS MACRO\r | |
62 | LXMMN xmm7, ebp, 0\r | |
63 | LXMMN xmm7, ebx, 1\r | |
64 | LXMMN xmm7, esi, 2\r | |
65 | LXMMN xmm7, edi, 3\r | |
66 | LOAD_ESP\r | |
67 | ENDM\r | |
68 | \r | |
9da59186 JY |
69 | ;\r |
70 | ; XMM6 to save/restore EAX, EDX, ECX, ESP\r | |
71 | ; \r | |
c8ec22a2 JY |
72 | LOAD_EAX MACRO\r |
73 | LXMMN xmm6, eax, 1\r | |
74 | ENDM\r | |
75 | \r | |
76 | SAVE_EAX MACRO\r | |
77 | SXMMN xmm6, 1, eax\r | |
78 | ENDM\r | |
79 | \r | |
80 | LOAD_EDX MACRO\r | |
81 | LXMMN xmm6, edx, 2\r | |
82 | ENDM\r | |
83 | \r | |
84 | SAVE_EDX MACRO\r | |
85 | SXMMN xmm6, 2, edx\r | |
86 | ENDM\r | |
87 | \r | |
88 | SAVE_ECX MACRO\r | |
89 | SXMMN xmm6, 3, ecx\r | |
90 | ENDM\r | |
91 | \r | |
92 | LOAD_ECX MACRO\r | |
93 | LXMMN xmm6, ecx, 3\r | |
94 | ENDM\r | |
95 | \r | |
96 | SAVE_ESP MACRO\r | |
97 | SXMMN xmm6, 0, esp\r | |
98 | ENDM\r | |
99 | \r | |
100 | LOAD_ESP MACRO\r | |
101 | movd esp, xmm6\r | |
102 | ENDM\r | |
9da59186 JY |
103 | \r |
104 | ;\r | |
105 | ; XMM5 for calling stack\r | |
106 | ;\r | |
107 | CALL_XMM MACRO Entry\r | |
108 | local ReturnAddress\r | |
109 | mov esi, offset ReturnAddress\r | |
110 | pslldq xmm5, 4\r | |
111 | IFDEF USE_SSE41_FLAG\r | |
112 | pinsrd xmm5, esi, 0\r | |
113 | ELSE \r | |
114 | pinsrw xmm5, esi, 0\r | |
115 | ror esi, 16\r | |
116 | pinsrw xmm5, esi, 1 \r | |
117 | ENDIF \r | |
118 | mov esi, Entry\r | |
119 | jmp esi\r | |
120 | ReturnAddress: \r | |
c8ec22a2 | 121 | ENDM\r |
9da59186 JY |
122 | \r |
123 | RET_XMM MACRO \r | |
124 | movd esi, xmm5\r | |
125 | psrldq xmm5, 4\r | |
126 | jmp esi\r | |
127 | ENDM\r | |
128 | \r | |
129 | ENABLE_SSE MACRO\r | |
130 | ;\r | |
131 | ; Initialize floating point units\r | |
132 | ;\r | |
133 | local NextAddress \r | |
134 | jmp NextAddress\r | |
135 | ALIGN 4\r | |
136 | ;\r | |
137 | ; Float control word initial value:\r | |
138 | ; all exceptions masked, double-precision, round-to-nearest\r | |
139 | ;\r | |
140 | FpuControlWord DW 027Fh\r | |
141 | ;\r | |
142 | ; Multimedia-extensions control word:\r | |
143 | ; all exceptions masked, round-to-nearest, flush to zero for masked underflow\r | |
144 | ;\r | |
145 | MmxControlWord DD 01F80h \r | |
146 | SseError: \r | |
147 | ;\r | |
148 | ; Processor has to support SSE\r | |
149 | ;\r | |
150 | jmp SseError \r | |
151 | NextAddress: \r | |
152 | finit\r | |
153 | fldcw FpuControlWord\r | |
154 | \r | |
155 | ;\r | |
156 | ; Use CpuId instructuion (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r | |
157 | ; whether the processor supports SSE instruction.\r | |
158 | ;\r | |
159 | mov eax, 1\r | |
160 | cpuid\r | |
161 | bt edx, 25\r | |
162 | jnc SseError\r | |
163 | \r | |
164 | IFDEF USE_SSE41_FLAG\r | |
165 | ;\r | |
166 | ; SSE 4.1 support\r | |
167 | ;\r | |
168 | bt ecx, 19 \r | |
169 | jnc SseError\r | |
170 | ENDIF\r | |
171 | \r | |
172 | ;\r | |
173 | ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r | |
174 | ;\r | |
175 | mov eax, cr4\r | |
176 | or eax, 00000600h\r | |
177 | mov cr4, eax\r | |
178 | \r | |
179 | ;\r | |
180 | ; The processor should support SSE instruction and we can use\r | |
181 | ; ldmxcsr instruction\r | |
182 | ;\r | |
183 | ldmxcsr MmxControlWord\r | |
184 | ENDM\r |