]> git.proxmox.com Git - mirror_edk2.git/blob - MdePkg/Library/BaseMemoryLibSse2/x64/CopyMem.asm
Initial import.
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibSse2 / x64 / CopyMem.asm
1 ;------------------------------------------------------------------------------
2 ;
3 ; Copyright (c) 2006, Intel Corporation
4 ; All rights reserved. This program and the accompanying materials
5 ; are licensed and made available under the terms and conditions of the BSD License
6 ; which accompanies this distribution. The full text of the license may be found at
7 ; http://opensource.org/licenses/bsd-license.php
8 ;
9 ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
10 ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
11 ;
12 ; Module Name:
13 ;
14 ; CopyMem.asm
15 ;
16 ; Abstract:
17 ;
18 ; CopyMem function
19 ;
20 ; Notes:
21 ;
22 ;------------------------------------------------------------------------------
23
24 .code
25
26 ;------------------------------------------------------------------------------
27 ; VOID *
28 ; _mem_CopyMem (
29 ; IN VOID *Destination,
30 ; IN VOID *Source,
31 ; IN UINTN Count
32 ; )
33 ;------------------------------------------------------------------------------
34 InternalMemCopyMem PROC USES rsi rdi
35 mov rsi, rdx ; rsi <- Source
36 mov rdi, rcx ; rdi <- Destination
37 lea r9, [rdi + r8 - 1] ; r9 <- Last byte of Destination
38 cmp rsi, rdi
39 mov rax, rdi ; rax <- Destination as return value
40 jae @F ; Copy forward if Source > Destination
41 cmp r9, rsi ; Overlapped?
42 jae @CopyBackward ; Copy backward if overlapped
43 @@:
44 xor rcx, rcx
45 sub rcx, rdi ; rcx <- -rdi
46 and rcx, 15 ; rcx + rsi should be 16 bytes aligned
47 jz @F ; skip if rcx == 0
48 cmp rcx, r8
49 cmova rcx, r8
50 sub r8, rcx
51 rep movsb
52 @@:
53 mov rcx, r8
54 and r8, 15
55 shr rcx, 4 ; rcx <- # of DQwords to copy
56 jz @CopyBytes
57 movdqa [rsp + 18h], xmm0 ; save xmm0 on stack
58 @@:
59 movdqu xmm0, [rsi] ; rsi may not be 16-byte aligned
60 movntdq [rdi], xmm0 ; rdi should be 16-byte aligned
61 add rsi, 16
62 add rdi, 16
63 loop @B
64 mfence
65 movdqa xmm0, [rsp + 18h] ; restore xmm0
66 jmp @CopyBytes ; copy remaining bytes
67 @CopyBackward:
68 mov rdi, r9 ; rdi <- Last byte of Destination
69 lea rsi, [rsi + r8 - 1] ; rsi <- Last byte of Source
70 std
71 @CopyBytes:
72 mov rcx, r8
73 rep movsb
74 cld
75 ret
76 InternalMemCopyMem ENDP
77
78 END