--- /dev/null
+;/*++\r
+;\r
+;Copyright (c) 2006, Intel Corporation \r
+;All rights reserved. This program and the accompanying materials \r
+;are licensed and made available under the terms and conditions of the BSD License \r
+;which accompanies this distribution. The full text of the license may be found at \r
+;http://opensource.org/licenses/bsd-license.php \r
+; \r
+;THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, \r
+;WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. \r
+;\r
+;Module Name:\r
+;\r
+; EfiZeroMem.c\r
+;\r
+;Abstract:\r
+;\r
+; This is the code that supports IA32-optimized ZeroMem service\r
+;\r
+;--*/\r
+;---------------------------------------------------------------------------\r
+ .686\r
+ .model flat,C\r
+ .mmx\r
+ .code\r
+\r
+;---------------------------------------------------------------------------\r
+;VOID\r
+;EfiCommonLibZeroMem (\r
+; IN VOID *Buffer,\r
+; IN UINTN Count\r
+; )\r
+;/*++\r
+;\r
+;Input: VOID *Buffer - Pointer to buffer to clear\r
+; UINTN Count - Number of bytes to clear\r
+;\r
+;Output: None.\r
+;\r
+;Saves:\r
+;\r
+;Modifies:\r
+;\r
+;Description: This function is an optimized zero-memory function.\r
+;\r
+;Notes: This function tries to zero memory 8 bytes at a time. As a result, \r
+; it first picks up any misaligned bytes, then words, before getting \r
+; in the main loop that does the 8-byte clears.\r
+;\r
+;--*/\r
+EfiCommonLibZeroMem PROC\r
+; UINT64 MmxSave;\r
+ push ebp\r
+ mov ebp, esp\r
+ push ecx ; Reserve space for local variable MmxSave\r
+ push ecx\r
+ push edi\r
+ \r
+ mov ecx, [ebp + 0Ch] ; Count\r
+ mov edi, [ebp + 8]; Buffer\r
+\r
+ ; Pick up misaligned start bytes (get pointer 4-byte aligned)\r
+_StartByteZero:\r
+ mov eax, edi \r
+ and al, 3 ; check lower 2 bits of address\r
+ test al, al\r
+ je _ZeroBlocks ; already aligned?\r
+ cmp ecx, 0\r
+ je _ZeroMemDone\r
+\r
+ ; Clear the byte memory location\r
+ mov BYTE PTR [edi], 0 \r
+ inc edi\r
+\r
+ ; Decrement our count\r
+ dec ecx\r
+ jmp _StartByteZero ; back to top of loop\r
+\r
+_ZeroBlocks:\r
+\r
+ ; Compute how many 64-byte blocks we can clear \r
+ mov edx, ecx\r
+ shr ecx, 6 ; convert to 64-byte count\r
+ shl ecx, 6 ; convert back to bytes\r
+ sub edx, ecx ; subtract from the original count\r
+ shr ecx, 6 ; and this is how many 64-byte blocks\r
+\r
+ ; If no 64-byte blocks, then skip \r
+ cmp ecx, 0\r
+ je _ZeroRemaining\r
+\r
+ ; Save mm0\r
+ movq [ebp - 8], mm0 ; Save mm0 to MmxSave\r
+\r
+ pxor mm0, mm0 ; Clear mm0\r
+\r
+_B:\r
+ movq QWORD PTR ds:[edi], mm0\r
+ movq QWORD PTR ds:[edi+8], mm0\r
+ movq QWORD PTR ds:[edi+16], mm0\r
+ movq QWORD PTR ds:[edi+24], mm0\r
+ movq QWORD PTR ds:[edi+32], mm0\r
+ movq QWORD PTR ds:[edi+40], mm0\r
+ movq QWORD PTR ds:[edi+48], mm0\r
+ movq QWORD PTR ds:[edi+56], mm0\r
+ \r
+ add edi, 64\r
+ dec ecx\r
+ jnz _B\r
+ \r
+; Restore mm0\r
+ movq mm0, [ebp - 8] ; Restore mm0 from MmxSave\r
+ emms ; Exit MMX Instruction\r
+\r
+_ZeroRemaining:\r
+ ; Zero out as many DWORDS as possible\r
+ mov ecx, edx\r
+ shr ecx, 2\r
+ xor eax, eax\r
+\r
+ rep stosd\r
+\r
+ ; Zero out remaining as bytes\r
+ mov ecx, edx\r
+ and ecx, 03\r
+\r
+ rep stosb\r
+ \r
+_ZeroMemDone:\r
+\r
+ pop edi\r
+ leave\r
+ ret\r
+EfiCommonLibZeroMem ENDP \r
+ END\r