+ jmp NextAddress\r
+.align 4\r
+ #\r
+ # Float control word initial value:\r
+ # all exceptions masked, double-precision, round-to-nearest\r
+ #\r
+ASM_PFX(mFpuControlWord): .word 0x027F\r
+ #\r
+ # Multimedia-extensions control word:\r
+ # all exceptions masked, round-to-nearest, flush to zero for masked underflow\r
+ #\r
+ASM_PFX(mMmxControlWord): .long 0x01F80\r
+SseError: \r
+ #\r
+ # Processor has to support SSE\r
+ #\r
+ jmp SseError \r
+NextAddress: \r
+ #\r
+ # Initialize floating point units\r
+ #\r
+ finit\r
+ fldcw ASM_PFX(mFpuControlWord)\r
+\r
+ #\r
+ # Use CpuId instructuion (CPUID.01H:EDX.SSE[bit 25] = 1) to test\r
+ # whether the processor supports SSE instruction.\r
+ #\r
+ movl $1, %eax\r
+ cpuid\r
+ btl $25, %edx\r
+ jnc SseError\r
+\r
+ #\r
+ # Set OSFXSR bit (bit #9) & OSXMMEXCPT bit (bit #10)\r
+ #\r
+ movl %cr4, %eax\r
+ orl $BIT9, %eax\r
+ movl %eax, %cr4\r
+\r
+ #\r
+ # The processor should support SSE instruction and we can use\r
+ # ldmxcsr instruction\r
+ #\r
+ ldmxcsr ASM_PFX(mMmxControlWord)\r
+.endm\r
+\r
+#Save in ECX-SLOT 3 in xmm6.\r
+.macro SAVE_EAX_MICROCODE_RET_STATUS\r
+ pinsrw $0x6, %eax, %xmm6\r
+ ror $0x10, %eax\r
+ pinsrw $0x7, %eax, %xmm6\r
+ rol $0x10, %eax\r
+.endm\r
+\r
+#Restore from ECX-SLOT 3 in xmm6.\r
+.macro LOAD_EAX_MICROCODE_RET_STATUS\r
+ pshufd $0x93, %xmm6, %xmm6\r
+ movd %xmm6, %eax\r
+ pshufd $0x39, %xmm6, %xmm6\r