]> git.proxmox.com Git - mirror_edk2.git/blame - MdePkg/Library/BaseMemoryLibSse2/X64/SetMem.nasm
MdePkg BaseMemoryLibSse2: Convert X64/SetMem.asm to NASM
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibSse2 / X64 / SetMem.nasm
CommitLineData
e8758b6a
JJ
1;------------------------------------------------------------------------------\r
2;\r
3; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>\r
4; This program and the accompanying materials\r
5; are licensed and made available under the terms and conditions of the BSD License\r
6; which accompanies this distribution. The full text of the license may be found at\r
7; http://opensource.org/licenses/bsd-license.php.\r
8;\r
9; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
10; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
11;\r
12; Module Name:\r
13;\r
14; SetMem.nasm\r
15;\r
16; Abstract:\r
17;\r
18; SetMem function\r
19;\r
20; Notes:\r
21;\r
22;------------------------------------------------------------------------------\r
23\r
24 DEFAULT REL\r
25 SECTION .text\r
26\r
27;------------------------------------------------------------------------------\r
28; VOID *\r
29; InternalMemSetMem (\r
30; IN VOID *Buffer,\r
31; IN UINTN Count,\r
32; IN UINT8 Value\r
33; )\r
34;------------------------------------------------------------------------------\r
35global ASM_PFX(InternalMemSetMem)\r
36ASM_PFX(InternalMemSetMem):\r
37 push rdi\r
38 mov rdi, rcx ; rdi <- Buffer\r
39 mov al, r8b ; al <- Value\r
40 mov r9, rdi ; r9 <- Buffer as return value\r
41 xor rcx, rcx\r
42 sub rcx, rdi\r
43 and rcx, 15 ; rcx + rdi aligns on 16-byte boundary\r
44 jz .0\r
45 cmp rcx, rdx\r
46 cmova rcx, rdx\r
47 sub rdx, rcx\r
48 rep stosb\r
49.0:\r
50 mov rcx, rdx\r
51 and rdx, 15\r
52 shr rcx, 4\r
53 jz @SetBytes\r
54 mov ah, al ; ax <- Value repeats twice\r
55 movdqa [rsp + 0x10], xmm0 ; save xmm0\r
56 movd xmm0, eax ; xmm0[0..16] <- Value repeats twice\r
57 pshuflw xmm0, xmm0, 0 ; xmm0[0..63] <- Value repeats 8 times\r
58 movlhps xmm0, xmm0 ; xmm0 <- Value repeats 16 times\r
59.1:\r
60 movntdq [rdi], xmm0 ; rdi should be 16-byte aligned\r
61 add rdi, 16\r
62 loop .1\r
63 mfence\r
64 movdqa xmm0, [rsp + 0x10] ; restore xmm0\r
65@SetBytes:\r
66 mov ecx, edx ; high 32 bits of rcx are always zero\r
67 rep stosb\r
68 mov rax, r9 ; rax <- Return value\r
69 pop rdi\r
70 ret\r
71\r