;------------------------------------------------------------------------------\r
;\r
-; Copyright (c) 2014, Intel Corporation. All rights reserved.<BR>\r
+; Copyright (c) 2014 - 2015, Intel Corporation. All rights reserved.<BR>\r
; This program and the accompanying materials\r
; are licensed and made available under the terms and conditions of the BSD License\r
; which accompanies this distribution. The full text of the license may be found at\r
ENDM\r
ENDIF\r
\r
-\r
+;\r
+; XMM7 to save/restore EBP, EBX, ESI, EDI\r
+; \r
SAVE_REGS MACRO\r
SXMMN xmm7, 0, ebp\r
SXMMN xmm7, 1, ebx\r
LOAD_ESP\r
ENDM\r
\r
+;\r
+; XMM6 to save/restore EAX, EDX, ECX, ESP\r
+; \r
LOAD_EAX MACRO\r
LXMMN xmm6, eax, 1\r
ENDM\r
LOAD_ESP MACRO\r
movd esp, xmm6\r
ENDM\r
-\r
-ENABLE_SSE MACRO\r
- mov eax, cr4\r
- or eax, 00000600h\r
- mov cr4, eax\r
+ \r
+;\r
+; XMM5 for calling stack\r
+;\r
+CALL_XMM MACRO Entry\r
+ local ReturnAddress\r
+ mov esi, offset ReturnAddress\r
+ pslldq xmm5, 4\r
+IFDEF USE_SSE41_FLAG\r
+ pinsrd xmm5, esi, 0\r
+ELSE \r
+ pinsrw xmm5, esi, 0\r
+ ror esi, 16\r
+ pinsrw xmm5, esi, 1 \r
+ENDIF \r
+ mov esi, Entry\r
+ jmp esi\r
+ReturnAddress: \r
ENDM\r
+ \r
+RET_XMM MACRO \r
+ movd esi, xmm5\r
+ psrldq xmm5, 4\r
+ jmp esi\r
+ ENDM\r
+ \r
+ENABLE_SSE MACRO\r
+ ;\r
+ ; Initialize floating point units\r
+ ;\r
+ local NextAddress \r
+ jmp NextAddress\r
+ALIGN 4\r
+ ;\r
+ ; Float control word initial value:\r
+ ; all exceptions masked, double-precision, round-to-nearest\r
+ ;\r
+FpuControlWord DW 027Fh\r
+ ;\r
+ ; Multimedia-extensions control word:\r
+ ; all exceptions masked, round-to-nearest, flush to zero for masked underflow\r
+ ;\r
+MmxControlWord DD 01F80h \r
+SseError: \r
+ ;\r
+ ; Processor has to support SSE\r
+ ;\r
+ jmp SseError \r
+NextAddress: \r
+ finit\r
+ fldcw FpuControlWord\r
+\r
+ ;\r
+ ; Use CpuId instructuion (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r
+ ; whether the processor supports SSE instruction.\r
+ ;\r
+ mov eax, 1\r
+ cpuid\r
+ bt edx, 25\r
+ jnc SseError\r
+\r
+IFDEF USE_SSE41_FLAG\r
+ ;\r
+ ; SSE 4.1 support\r
+ ;\r
+ bt ecx, 19 \r
+ jnc SseError\r
+ENDIF\r
+\r
+ ;\r
+ ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r
+ ;\r
+ mov eax, cr4\r
+ or eax, 00000600h\r
+ mov cr4, eax\r
+\r
+ ;\r
+ ; The processor should support SSE instruction and we can use\r
+ ; ldmxcsr instruction\r
+ ;\r
+ ldmxcsr MmxControlWord\r
+ ENDM\r