]> git.proxmox.com Git - mirror_edk2.git/blame - MdePkg/Library/BaseMemoryLibSse2/X64/CopyMem.nasm
MdeModulePkg: Replace BSD License with BSD+Patent License
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibSse2 / X64 / CopyMem.nasm
CommitLineData
9987eaea
JJ
1;------------------------------------------------------------------------------\r
2;\r
3; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>\r
4; This program and the accompanying materials\r
5; are licensed and made available under the terms and conditions of the BSD License\r
6; which accompanies this distribution. The full text of the license may be found at\r
7; http://opensource.org/licenses/bsd-license.php.\r
8;\r
9; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
10; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
11;\r
12; Module Name:\r
13;\r
14; CopyMem.nasm\r
15;\r
16; Abstract:\r
17;\r
18; CopyMem function\r
19;\r
20; Notes:\r
21;\r
22;------------------------------------------------------------------------------\r
23\r
24 DEFAULT REL\r
25 SECTION .text\r
26\r
27;------------------------------------------------------------------------------\r
28; VOID *\r
29; EFIAPI\r
30; InternalMemCopyMem (\r
31; IN VOID *Destination,\r
32; IN VOID *Source,\r
33; IN UINTN Count\r
34; );\r
35;------------------------------------------------------------------------------\r
36global ASM_PFX(InternalMemCopyMem)\r
37ASM_PFX(InternalMemCopyMem):\r
38 push rsi\r
39 push rdi\r
40 mov rsi, rdx ; rsi <- Source\r
41 mov rdi, rcx ; rdi <- Destination\r
42 lea r9, [rsi + r8 - 1] ; r9 <- Last byte of Source\r
43 cmp rsi, rdi\r
44 mov rax, rdi ; rax <- Destination as return value\r
45 jae .0 ; Copy forward if Source > Destination\r
46 cmp r9, rdi ; Overlapped?\r
47 jae @CopyBackward ; Copy backward if overlapped\r
48.0:\r
49 xor rcx, rcx\r
50 sub rcx, rdi ; rcx <- -rdi\r
51 and rcx, 15 ; rcx + rsi should be 16 bytes aligned\r
52 jz .1 ; skip if rcx == 0\r
53 cmp rcx, r8\r
54 cmova rcx, r8\r
55 sub r8, rcx\r
56 rep movsb\r
57.1:\r
58 mov rcx, r8\r
59 and r8, 15\r
60 shr rcx, 4 ; rcx <- # of DQwords to copy\r
61 jz @CopyBytes\r
62 movdqa [rsp + 0x18], xmm0 ; save xmm0 on stack\r
63.2:\r
64 movdqu xmm0, [rsi] ; rsi may not be 16-byte aligned\r
65 movntdq [rdi], xmm0 ; rdi should be 16-byte aligned\r
66 add rsi, 16\r
67 add rdi, 16\r
68 loop .2\r
69 mfence\r
70 movdqa xmm0, [rsp + 0x18] ; restore xmm0\r
71 jmp @CopyBytes ; copy remaining bytes\r
72@CopyBackward:\r
73 mov rsi, r9 ; rsi <- Last byte of Source\r
74 lea rdi, [rdi + r8 - 1] ; rdi <- Last byte of Destination\r
75 std\r
76@CopyBytes:\r
77 mov rcx, r8\r
78 rep movsb\r
79 cld\r
80 pop rdi\r
81 pop rsi\r
82 ret\r
83\r