]> git.proxmox.com Git - mirror_edk2.git/blob - MdePkg/Library/BaseMemoryLibSse2/Ia32/CopyMem.nasm
MdePkg: Replace BSD License with BSD+Patent License
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibSse2 / Ia32 / CopyMem.nasm
1 ;------------------------------------------------------------------------------
2 ;
3 ; Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>
4 ; SPDX-License-Identifier: BSD-2-Clause-Patent
5 ;
6 ; Module Name:
7 ;
8 ; CopyMem.nasm
9 ;
10 ; Abstract:
11 ;
12 ; CopyMem function
13 ;
14 ; Notes:
15 ;
16 ;------------------------------------------------------------------------------
17
18 SECTION .text
19
20 ;------------------------------------------------------------------------------
21 ; VOID *
22 ; InternalMemCopyMem (
23 ; IN VOID *Destination,
24 ; IN VOID *Source,
25 ; IN UINTN Count
26 ; );
27 ;------------------------------------------------------------------------------
28 global ASM_PFX(InternalMemCopyMem)
29 ASM_PFX(InternalMemCopyMem):
30 push esi
31 push edi
32 mov esi, [esp + 16] ; esi <- Source
33 mov edi, [esp + 12] ; edi <- Destination
34 mov edx, [esp + 20] ; edx <- Count
35 lea eax, [esi + edx - 1] ; eax <- End of Source
36 cmp esi, edi
37 jae .0
38 cmp eax, edi ; Overlapped?
39 jae @CopyBackward ; Copy backward if overlapped
40 .0:
41 xor ecx, ecx
42 sub ecx, edi
43 and ecx, 15 ; ecx + edi aligns on 16-byte boundary
44 jz .1
45 cmp ecx, edx
46 cmova ecx, edx
47 sub edx, ecx ; edx <- remaining bytes to copy
48 rep movsb
49 .1:
50 mov ecx, edx
51 and edx, 15
52 shr ecx, 4 ; ecx <- # of DQwords to copy
53 jz @CopyBytes
54 add esp, -16
55 movdqu [esp], xmm0 ; save xmm0
56 .2:
57 movdqu xmm0, [esi] ; esi may not be 16-bytes aligned
58 movntdq [edi], xmm0 ; edi should be 16-bytes aligned
59 add esi, 16
60 add edi, 16
61 loop .2
62 mfence
63 movdqu xmm0, [esp] ; restore xmm0
64 add esp, 16 ; stack cleanup
65 jmp @CopyBytes
66 @CopyBackward:
67 mov esi, eax ; esi <- Last byte in Source
68 lea edi, [edi + edx - 1] ; edi <- Last byte in Destination
69 std
70 @CopyBytes:
71 mov ecx, edx
72 rep movsb
73 cld
74 mov eax, [esp + 12] ; eax <- Destination as return value
75 pop edi
76 pop esi
77 ret
78