]>
Commit | Line | Data |
---|---|---|
3eb9473e | 1 | /*++\r |
2 | \r | |
4ea9375a HT |
3 | Copyright (c) 2006, Intel Corporation. All rights reserved.<BR>\r |
4 | This program and the accompanying materials \r | |
3eb9473e | 5 | are licensed and made available under the terms and conditions of the BSD License \r |
6 | which accompanies this distribution. The full text of the license may be found at \r | |
7 | http://opensource.org/licenses/bsd-license.php \r | |
8 | \r | |
9 | THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, \r | |
10 | WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. \r | |
11 | \r | |
12 | Module Name:\r | |
13 | \r | |
14 | EfiCopyMemSSE2.c\r | |
15 | \r | |
16 | Abstract:\r | |
17 | \r | |
18 | This is the code that supports IA32-optimized CopyMem service\r | |
19 | \r | |
20 | --*/\r | |
21 | \r | |
22 | #include "Tiano.h"\r | |
23 | \r | |
24 | VOID\r | |
25 | EfiCommonLibCopyMem (\r | |
26 | IN VOID *Destination,\r | |
27 | IN VOID *Source,\r | |
28 | IN UINTN Count\r | |
29 | )\r | |
30 | /*++\r | |
31 | \r | |
32 | Routine Description:\r | |
33 | \r | |
34 | Copy Length bytes from Source to Destination.\r | |
35 | \r | |
36 | Arguments:\r | |
37 | \r | |
38 | Destination - Target of copy\r | |
39 | \r | |
40 | Source - Place to copy from\r | |
41 | \r | |
42 | Length - Number of bytes to copy\r | |
43 | \r | |
44 | Returns:\r | |
45 | \r | |
46 | None\r | |
47 | \r | |
48 | --*/\r | |
49 | {\r | |
50 | __asm {\r | |
51 | mov ecx, Count\r | |
52 | mov esi, Source\r | |
53 | mov edi, Destination\r | |
54 | \r | |
55 | ; First off, make sure we have no overlap. That is to say,\r | |
56 | ; if (Source == Destination) => do nothing\r | |
57 | ; if (Source + Count <= Destination) => regular copy\r | |
58 | ; if (Destination + Count <= Source) => regular copy\r | |
59 | ; otherwise, do a reverse copy\r | |
60 | mov eax, esi\r | |
61 | add eax, ecx ; Source + Count\r | |
62 | cmp eax, edi\r | |
63 | jle _StartByteCopy\r | |
64 | \r | |
65 | mov eax, edi\r | |
66 | add eax, ecx ; Dest + Count\r | |
67 | cmp eax, esi\r | |
68 | jle _StartByteCopy\r | |
69 | \r | |
70 | cmp esi, edi\r | |
71 | je _CopyMemDone \r | |
72 | jl _CopyOverlapped ; too bad -- overlaps\r | |
73 | \r | |
74 | ; Pick up misaligned start bytes to get destination pointer 4-byte aligned\r | |
75 | _StartByteCopy:\r | |
76 | cmp ecx, 0\r | |
77 | je _CopyMemDone ; Count == 0, all done\r | |
78 | mov edx, edi\r | |
79 | and dl, 3 ; check lower 2 bits of address\r | |
80 | test dl, dl \r | |
81 | je SHORT _CopyBlocks ; already aligned?\r | |
82 | \r | |
83 | ; Copy a byte\r | |
84 | mov al, BYTE PTR [esi] ; get byte from Source\r | |
85 | mov BYTE PTR [edi], al ; write byte to Destination\r | |
86 | dec ecx\r | |
87 | inc edi\r | |
88 | inc esi\r | |
89 | jmp _StartByteCopy ; back to top of loop\r | |
90 | \r | |
91 | _CopyBlocks:\r | |
92 | ; Compute how many 64-byte blocks we can clear \r | |
93 | mov eax, ecx ; get Count in eax\r | |
94 | shr eax, 6 ; convert to 64-byte count\r | |
95 | shl eax, 6 ; convert back to bytes\r | |
96 | sub ecx, eax ; subtract from the original count\r | |
97 | shr eax, 6 ; and this is how many 64-byte blocks\r | |
98 | \r | |
99 | ; If no 64-byte blocks, then skip \r | |
100 | cmp eax, 0\r | |
101 | je _CopyRemainingDWords\r | |
102 | \r | |
103 | \r | |
104 | copyxmm:\r | |
105 | \r | |
106 | movdqu xmm0, OWORD PTR ds:[esi]\r | |
107 | movdqu QWORD PTR ds:[edi], xmm0\r | |
108 | movdqu xmm1, OWORD PTR ds:[esi+16]\r | |
109 | movdqu QWORD PTR ds:[edi+16], xmm1\r | |
110 | movdqu xmm2, OWORD PTR ds:[esi+32]\r | |
111 | movdqu QWORD PTR ds:[edi+32], xmm2\r | |
112 | movdqu xmm3, OWORD PTR ds:[esi+48]\r | |
113 | movdqu QWORD PTR ds:[edi+48], xmm3\r | |
114 | \r | |
115 | add edi, 64\r | |
116 | add esi, 64\r | |
117 | dec eax\r | |
118 | jnz copyxmm\r | |
119 | \r | |
120 | \r | |
121 | ; Copy as many DWORDS as possible\r | |
122 | _CopyRemainingDWords:\r | |
123 | cmp ecx, 4\r | |
124 | jb _CopyRemainingBytes\r | |
125 | \r | |
126 | mov eax, DWORD PTR [esi] ; get data from Source\r | |
127 | mov DWORD PTR [edi], eax ; write byte to Destination\r | |
128 | sub ecx, 4 ; decrement Count\r | |
129 | add esi, 4 ; advance Source pointer\r | |
130 | add edi, 4 ; advance Destination pointer\r | |
131 | jmp _CopyRemainingDWords ; back to top\r | |
132 | \r | |
133 | _CopyRemainingBytes:\r | |
134 | cmp ecx, 0\r | |
135 | je _CopyMemDone\r | |
136 | mov al, BYTE PTR [esi] ; get byte from Source\r | |
137 | mov BYTE PTR [edi], al ; write byte to Destination\r | |
138 | dec ecx\r | |
139 | inc esi\r | |
140 | inc edi ; advance Destination pointer\r | |
141 | jmp SHORT _CopyRemainingBytes ; back to top of loop\r | |
142 | \r | |
143 | ;\r | |
144 | ; We do this block if the source and destination buffers overlap. To\r | |
145 | ; handle it, copy starting at the end of the source buffer and work\r | |
146 | ; your way back. Since this is the atypical case, this code has not\r | |
147 | ; been optimized, and thus simply copies bytes.\r | |
148 | ;\r | |
149 | _CopyOverlapped:\r | |
150 | \r | |
151 | ; Move the source and destination pointers to the end of the range\r | |
152 | add esi, ecx ; Source + Count\r | |
153 | dec esi\r | |
154 | add edi, ecx ; Dest + Count\r | |
155 | dec edi\r | |
156 | \r | |
157 | _CopyOverlappedLoop:\r | |
158 | cmp ecx, 0\r | |
159 | je _CopyMemDone\r | |
160 | mov al, BYTE PTR [esi] ; get byte from Source\r | |
161 | mov BYTE PTR [edi], al ; write byte to Destination\r | |
162 | dec ecx\r | |
163 | dec esi\r | |
164 | dec edi\r | |
165 | jmp _CopyOverlappedLoop ; back to top of loop\r | |
166 | \r | |
167 | _CopyMemDone:\r | |
168 | }\r | |
169 | }\r |