]> git.proxmox.com Git - mirror_edk2.git/blob - EdkCompatibilityPkg/Foundation/Library/EfiCommonLib/Ia32/EfiCopyMemSSE2.c
Sync all bug fixes between EDK1.04 and EDK1.06 into EdkCompatibilityPkg.
[mirror_edk2.git] / EdkCompatibilityPkg / Foundation / Library / EfiCommonLib / Ia32 / EfiCopyMemSSE2.c
1 /*++
2
3 Copyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR>
4 This program and the accompanying materials
5 are licensed and made available under the terms and conditions of the BSD License
6 which accompanies this distribution. The full text of the license may be found at
7 http://opensource.org/licenses/bsd-license.php
8
9 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
10 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
11
12 Module Name:
13
14 EfiCopyMemSSE2.c
15
16 Abstract:
17
18 This is the code that supports IA32-optimized CopyMem service
19
20 --*/
21
22 #include "Tiano.h"
23
24 VOID
25 EfiCommonLibCopyMem (
26 IN VOID *Destination,
27 IN VOID *Source,
28 IN UINTN Count
29 )
30 /*++
31
32 Routine Description:
33
34 Copy Length bytes from Source to Destination.
35
36 Arguments:
37
38 Destination - Target of copy
39
40 Source - Place to copy from
41
42 Length - Number of bytes to copy
43
44 Returns:
45
46 None
47
48 --*/
49 {
50 __asm {
51 mov ecx, Count
52 mov esi, Source
53 mov edi, Destination
54
55 ; First off, make sure we have no overlap. That is to say,
56 ; if (Source == Destination) => do nothing
57 ; if (Source + Count <= Destination) => regular copy
58 ; if (Destination + Count <= Source) => regular copy
59 ; otherwise, do a reverse copy
60 mov eax, esi
61 add eax, ecx ; Source + Count
62 cmp eax, edi
63 jle _StartByteCopy
64
65 mov eax, edi
66 add eax, ecx ; Dest + Count
67 cmp eax, esi
68 jle _StartByteCopy
69
70 cmp esi, edi
71 je _CopyMemDone
72 jl _CopyOverlapped ; too bad -- overlaps
73
74 ; Pick up misaligned start bytes to get destination pointer 4-byte aligned
75 _StartByteCopy:
76 cmp ecx, 0
77 je _CopyMemDone ; Count == 0, all done
78 mov edx, edi
79 and dl, 3 ; check lower 2 bits of address
80 test dl, dl
81 je SHORT _CopyBlocks ; already aligned?
82
83 ; Copy a byte
84 mov al, BYTE PTR [esi] ; get byte from Source
85 mov BYTE PTR [edi], al ; write byte to Destination
86 dec ecx
87 inc edi
88 inc esi
89 jmp _StartByteCopy ; back to top of loop
90
91 _CopyBlocks:
92 ; Compute how many 64-byte blocks we can clear
93 mov eax, ecx ; get Count in eax
94 shr eax, 6 ; convert to 64-byte count
95 shl eax, 6 ; convert back to bytes
96 sub ecx, eax ; subtract from the original count
97 shr eax, 6 ; and this is how many 64-byte blocks
98
99 ; If no 64-byte blocks, then skip
100 cmp eax, 0
101 je _CopyRemainingDWords
102
103
104 copyxmm:
105
106 movdqu xmm0, OWORD PTR ds:[esi]
107 movdqu OWORD PTR ds:[edi], xmm0
108 movdqu xmm1, OWORD PTR ds:[esi+16]
109 movdqu OWORD PTR ds:[edi+16], xmm1
110 movdqu xmm2, OWORD PTR ds:[esi+32]
111 movdqu OWORD PTR ds:[edi+32], xmm2
112 movdqu xmm3, OWORD PTR ds:[esi+48]
113 movdqu OWORD PTR ds:[edi+48], xmm3
114
115 add edi, 64
116 add esi, 64
117 dec eax
118 jnz copyxmm
119
120
121 ; Copy as many DWORDS as possible
122 _CopyRemainingDWords:
123 cmp ecx, 4
124 jb _CopyRemainingBytes
125
126 mov eax, DWORD PTR [esi] ; get data from Source
127 mov DWORD PTR [edi], eax ; write byte to Destination
128 sub ecx, 4 ; decrement Count
129 add esi, 4 ; advance Source pointer
130 add edi, 4 ; advance Destination pointer
131 jmp _CopyRemainingDWords ; back to top
132
133 _CopyRemainingBytes:
134 cmp ecx, 0
135 je _CopyMemDone
136 mov al, BYTE PTR [esi] ; get byte from Source
137 mov BYTE PTR [edi], al ; write byte to Destination
138 dec ecx
139 inc esi
140 inc edi ; advance Destination pointer
141 jmp SHORT _CopyRemainingBytes ; back to top of loop
142
143 ;
144 ; We do this block if the source and destination buffers overlap. To
145 ; handle it, copy starting at the end of the source buffer and work
146 ; your way back. Since this is the atypical case, this code has not
147 ; been optimized, and thus simply copies bytes.
148 ;
149 _CopyOverlapped:
150
151 ; Move the source and destination pointers to the end of the range
152 add esi, ecx ; Source + Count
153 dec esi
154 add edi, ecx ; Dest + Count
155 dec edi
156
157 _CopyOverlappedLoop:
158 cmp ecx, 0
159 je _CopyMemDone
160 mov al, BYTE PTR [esi] ; get byte from Source
161 mov BYTE PTR [edi], al ; write byte to Destination
162 dec ecx
163 dec esi
164 dec edi
165 jmp _CopyOverlappedLoop ; back to top of loop
166
167 _CopyMemDone:
168 }
169 }