]>
Commit | Line | Data |
---|---|---|
a37f6605 AB |
1 | #------------------------------------------------------------------------------\r |
2 | #\r | |
3 | # CopyMem() worker for ARM\r | |
4 | #\r | |
5 | # This file started out as C code that did 64 bit moves if the buffer was\r | |
6 | # 32-bit aligned, else it does a byte copy. It also does a byte copy for\r | |
7 | # any trailing bytes. It was updated to do 32-byte copies using stm/ldm.\r | |
8 | #\r | |
9 | # Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>\r | |
10 | # Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>\r | |
9344f092 | 11 | # SPDX-License-Identifier: BSD-2-Clause-Patent\r |
a37f6605 AB |
12 | #\r |
13 | #------------------------------------------------------------------------------\r | |
14 | \r | |
15 | .text\r | |
16 | .thumb\r | |
17 | .syntax unified\r | |
18 | \r | |
19 | /**\r | |
20 | Copy Length bytes from Source to Destination. Overlap is OK.\r | |
21 | \r | |
22 | This implementation\r | |
23 | \r | |
24 | @param Destination Target of copy\r | |
25 | @param Source Place to copy from\r | |
26 | @param Length Number of bytes to copy\r | |
27 | \r | |
28 | @return Destination\r | |
29 | \r | |
30 | \r | |
31 | VOID *\r | |
32 | EFIAPI\r | |
33 | InternalMemCopyMem (\r | |
34 | OUT VOID *DestinationBuffer,\r | |
35 | IN CONST VOID *SourceBuffer,\r | |
36 | IN UINTN Length\r | |
37 | )\r | |
38 | **/\r | |
decaac5d | 39 | .type ASM_PFX(InternalMemCopyMem), %function\r |
a37f6605 AB |
40 | ASM_GLOBAL ASM_PFX(InternalMemCopyMem)\r |
41 | ASM_PFX(InternalMemCopyMem):\r | |
42 | push {r4-r11, lr}\r | |
43 | // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)\r | |
44 | mov r11, r0\r | |
45 | mov r10, r0\r | |
46 | mov r12, r2\r | |
47 | mov r14, r1\r | |
48 | \r | |
49 | cmp r11, r1\r | |
50 | // If (dest < source)\r | |
51 | bcc memcopy_check_optim_default\r | |
52 | \r | |
53 | // If (source + length < dest)\r | |
54 | rsb r3, r1, r11\r | |
55 | cmp r12, r3\r | |
56 | bcc memcopy_check_optim_default\r | |
57 | b memcopy_check_optim_overlap\r | |
58 | \r | |
59 | memcopy_check_optim_default:\r | |
60 | // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)\r | |
61 | tst r0, #0xF\r | |
62 | it ne\r | |
eab26788 | 63 | movne.n r0, #0\r |
a37f6605 AB |
64 | bne memcopy_default\r |
65 | tst r1, #0xF\r | |
eab26788 AB |
66 | it ne\r |
67 | movne.n r3, #0\r | |
68 | it eq\r | |
69 | moveq.n r3, #1\r | |
a37f6605 | 70 | cmp r2, #31\r |
eab26788 AB |
71 | it ls\r |
72 | movls.n r0, #0\r | |
73 | bls memcopy_default\r | |
74 | and r0, r3, #1\r | |
a37f6605 AB |
75 | b memcopy_default\r |
76 | \r | |
77 | memcopy_check_optim_overlap:\r | |
78 | // r10 = dest_end, r14 = source_end\r | |
79 | add r10, r11, r12\r | |
80 | add r14, r12, r1\r | |
81 | \r | |
82 | // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)\r | |
83 | cmp r2, #31\r | |
eab26788 AB |
84 | it ls\r |
85 | movls.n r0, #0\r | |
86 | it hi\r | |
87 | movhi.n r0, #1\r | |
a37f6605 AB |
88 | tst r10, #0xF\r |
89 | it ne\r | |
eab26788 | 90 | movne.n r0, #0\r |
a37f6605 AB |
91 | tst r14, #0xF\r |
92 | it ne\r | |
eab26788 | 93 | movne.n r0, #0\r |
a37f6605 AB |
94 | b memcopy_overlapped\r |
95 | \r | |
96 | memcopy_overlapped_non_optim:\r | |
97 | // We read 1 byte from the end of the source buffer\r | |
98 | sub r3, r14, #1\r | |
99 | sub r12, r12, #1\r | |
100 | ldrb r3, [r3, #0]\r | |
101 | sub r2, r10, #1\r | |
102 | cmp r12, #0\r | |
103 | // We write 1 byte at the end of the dest buffer\r | |
104 | sub r10, r10, #1\r | |
105 | sub r14, r14, #1\r | |
106 | strb r3, [r2, #0]\r | |
107 | bne memcopy_overlapped_non_optim\r | |
108 | b memcopy_end\r | |
109 | \r | |
110 | // r10 = dest_end, r14 = source_end\r | |
111 | memcopy_overlapped:\r | |
112 | // Are we in the optimized case ?\r | |
113 | cmp r0, #0\r | |
114 | beq memcopy_overlapped_non_optim\r | |
115 | \r | |
116 | // Optimized Overlapped - Read 32 bytes\r | |
117 | sub r14, r14, #32\r | |
118 | sub r12, r12, #32\r | |
119 | cmp r12, #31\r | |
120 | ldmia r14, {r2-r9}\r | |
121 | \r | |
122 | // If length is less than 32 then disable optim\r | |
123 | it ls\r | |
eab26788 | 124 | movls.n r0, #0\r |
a37f6605 AB |
125 | \r |
126 | cmp r12, #0\r | |
127 | \r | |
128 | // Optimized Overlapped - Write 32 bytes\r | |
129 | sub r10, r10, #32\r | |
130 | stmia r10, {r2-r9}\r | |
131 | \r | |
132 | // while (length != 0)\r | |
133 | bne memcopy_overlapped\r | |
134 | b memcopy_end\r | |
135 | \r | |
136 | memcopy_default_non_optim:\r | |
137 | // Byte copy\r | |
138 | ldrb r3, [r14], #1\r | |
139 | sub r12, r12, #1\r | |
140 | strb r3, [r10], #1\r | |
141 | \r | |
142 | memcopy_default:\r | |
143 | cmp r12, #0\r | |
144 | beq memcopy_end\r | |
145 | \r | |
146 | // r10 = dest, r14 = source\r | |
147 | memcopy_default_loop:\r | |
148 | cmp r0, #0\r | |
149 | beq memcopy_default_non_optim\r | |
150 | \r | |
151 | // Optimized memcopy - Read 32 Bytes\r | |
152 | sub r12, r12, #32\r | |
153 | cmp r12, #31\r | |
154 | ldmia r14!, {r2-r9}\r | |
155 | \r | |
156 | // If length is less than 32 then disable optim\r | |
157 | it ls\r | |
eab26788 | 158 | movls.n r0, #0\r |
a37f6605 AB |
159 | \r |
160 | cmp r12, #0\r | |
161 | \r | |
162 | // Optimized memcopy - Write 32 Bytes\r | |
163 | stmia r10!, {r2-r9}\r | |
164 | \r | |
165 | // while (length != 0)\r | |
166 | bne memcopy_default_loop\r | |
167 | \r | |
168 | memcopy_end:\r | |
169 | mov r0, r11\r | |
170 | pop {r4-r11, pc}\r |