]> git.proxmox.com Git - mirror_edk2.git/blob - MdePkg/Library/BaseMemoryLibOptDxe/X64/CopyMem.nasm
MdePkg: Replace BSD License with BSD+Patent License
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / X64 / CopyMem.nasm
1 ;------------------------------------------------------------------------------
2 ;
3 ; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>
4 ; SPDX-License-Identifier: BSD-2-Clause-Patent
5 ;
6 ; Module Name:
7 ;
8 ; CopyMem.nasm
9 ;
10 ; Abstract:
11 ;
12 ; CopyMem function
13 ;
14 ; Notes:
15 ;
16 ;------------------------------------------------------------------------------
17
18 DEFAULT REL
19 SECTION .text
20
21 ;------------------------------------------------------------------------------
22 ; VOID *
23 ; EFIAPI
24 ; InternalMemCopyMem (
25 ; IN VOID *Destination,
26 ; IN VOID *Source,
27 ; IN UINTN Count
28 ; );
29 ;------------------------------------------------------------------------------
30 global ASM_PFX(InternalMemCopyMem)
31 ASM_PFX(InternalMemCopyMem):
32 push rsi
33 push rdi
34 mov rsi, rdx ; rsi <- Source
35 mov rdi, rcx ; rdi <- Destination
36 lea r9, [rsi + r8 - 1] ; r9 <- Last byte of Source
37 cmp rsi, rdi
38 mov rax, rdi ; rax <- Destination as return value
39 jae .0 ; Copy forward if Source > Destination
40 cmp r9, rdi ; Overlapped?
41 jae @CopyBackward ; Copy backward if overlapped
42 .0:
43 xor rcx, rcx
44 sub rcx, rdi ; rcx <- -rdi
45 and rcx, 15 ; rcx + rsi should be 16 bytes aligned
46 jz .1 ; skip if rcx == 0
47 cmp rcx, r8
48 cmova rcx, r8
49 sub r8, rcx
50 rep movsb
51 .1:
52 mov rcx, r8
53 and r8, 15
54 shr rcx, 4 ; rcx <- # of DQwords to copy
55 jz @CopyBytes
56 movdqa [rsp + 0x18], xmm0 ; save xmm0 on stack
57 .2:
58 movdqu xmm0, [rsi] ; rsi may not be 16-byte aligned
59 movntdq [rdi], xmm0 ; rdi should be 16-byte aligned
60 add rsi, 16
61 add rdi, 16
62 loop .2
63 mfence
64 movdqa xmm0, [rsp + 0x18] ; restore xmm0
65 jmp @CopyBytes ; copy remaining bytes
66 @CopyBackward:
67 mov rsi, r9 ; rsi <- Last byte of Source
68 lea rdi, [rdi + r8 - 1] ; rdi <- Last byte of Destination
69 std
70 @CopyBytes:
71 mov rcx, r8
72 rep movsb
73 cld
74 pop rdi
75 pop rsi
76 ret
77