+ \r
+RET_XMM MACRO \r
+ movd esi, xmm5\r
+ psrldq xmm5, 4\r
+ jmp esi\r
+ ENDM\r
+ \r
+ENABLE_SSE MACRO\r
+ ;\r
+ ; Initialize floating point units\r
+ ;\r
+ local NextAddress \r
+ jmp NextAddress\r
+ALIGN 4\r
+ ;\r
+ ; Float control word initial value:\r
+ ; all exceptions masked, double-precision, round-to-nearest\r
+ ;\r
+FpuControlWord DW 027Fh\r
+ ;\r
+ ; Multimedia-extensions control word:\r
+ ; all exceptions masked, round-to-nearest, flush to zero for masked underflow\r
+ ;\r
+MmxControlWord DD 01F80h \r
+SseError: \r
+ ;\r
+ ; Processor has to support SSE\r
+ ;\r
+ jmp SseError \r
+NextAddress: \r
+ finit\r
+ fldcw FpuControlWord\r
+\r
+ ;\r
+ ; Use CpuId instructuion (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r
+ ; whether the processor supports SSE instruction.\r
+ ;\r
+ mov eax, 1\r
+ cpuid\r
+ bt edx, 25\r
+ jnc SseError\r
+\r
+IFDEF USE_SSE41_FLAG\r
+ ;\r
+ ; SSE 4.1 support\r
+ ;\r
+ bt ecx, 19 \r
+ jnc SseError\r
+ENDIF\r
+\r
+ ;\r
+ ; Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r
+ ;\r
+ mov eax, cr4\r
+ or eax, 00000600h\r
+ mov cr4, eax\r
+\r
+ ;\r
+ ; The processor should support SSE instruction and we can use\r
+ ; ldmxcsr instruction\r
+ ;\r
+ ldmxcsr MmxControlWord\r
+ ENDM\r