]> git.proxmox.com Git - mirror_edk2.git/blob - MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.S
MdePkg/BaseMemoryLibOptDxe: add accelerated ARM routines
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / CopyMem.S
1 #------------------------------------------------------------------------------
2 #
3 # CopyMem() worker for ARM
4 #
5 # This file started out as C code that did 64 bit moves if the buffer was
6 # 32-bit aligned, else it does a byte copy. It also does a byte copy for
7 # any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
8 #
9 # Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
10 # Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
11 # This program and the accompanying materials
12 # are licensed and made available under the terms and conditions of the BSD License
13 # which accompanies this distribution. The full text of the license may be found at
14 # http://opensource.org/licenses/bsd-license.php
15 #
16 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
17 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
18 #
19 #------------------------------------------------------------------------------
20
21 .text
22 .thumb
23 .syntax unified
24
25 /**
26 Copy Length bytes from Source to Destination. Overlap is OK.
27
28 This implementation
29
30 @param Destination Target of copy
31 @param Source Place to copy from
32 @param Length Number of bytes to copy
33
34 @return Destination
35
36
37 VOID *
38 EFIAPI
39 InternalMemCopyMem (
40 OUT VOID *DestinationBuffer,
41 IN CONST VOID *SourceBuffer,
42 IN UINTN Length
43 )
44 **/
45 ASM_GLOBAL ASM_PFX(InternalMemCopyMem)
46 ASM_PFX(InternalMemCopyMem):
47 push {r4-r11, lr}
48 // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
49 mov r11, r0
50 mov r10, r0
51 mov r12, r2
52 mov r14, r1
53
54 cmp r11, r1
55 // If (dest < source)
56 bcc memcopy_check_optim_default
57
58 // If (source + length < dest)
59 rsb r3, r1, r11
60 cmp r12, r3
61 bcc memcopy_check_optim_default
62 b memcopy_check_optim_overlap
63
64 memcopy_check_optim_default:
65 // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
66 tst r0, #0xF
67 it ne
68 movne r0, #0
69 bne memcopy_default
70 tst r1, #0xF
71 ite ne
72 movne r3, #0
73 moveq r3, #1
74 cmp r2, #31
75 ite ls
76 movls r0, #0
77 andhi r0, r3, #1
78 b memcopy_default
79
80 memcopy_check_optim_overlap:
81 // r10 = dest_end, r14 = source_end
82 add r10, r11, r12
83 add r14, r12, r1
84
85 // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
86 cmp r2, #31
87 ite ls
88 movls r0, #0
89 movhi r0, #1
90 tst r10, #0xF
91 it ne
92 movne r0, #0
93 tst r14, #0xF
94 it ne
95 movne r0, #0
96 b memcopy_overlapped
97
98 memcopy_overlapped_non_optim:
99 // We read 1 byte from the end of the source buffer
100 sub r3, r14, #1
101 sub r12, r12, #1
102 ldrb r3, [r3, #0]
103 sub r2, r10, #1
104 cmp r12, #0
105 // We write 1 byte at the end of the dest buffer
106 sub r10, r10, #1
107 sub r14, r14, #1
108 strb r3, [r2, #0]
109 bne memcopy_overlapped_non_optim
110 b memcopy_end
111
112 // r10 = dest_end, r14 = source_end
113 memcopy_overlapped:
114 // Are we in the optimized case ?
115 cmp r0, #0
116 beq memcopy_overlapped_non_optim
117
118 // Optimized Overlapped - Read 32 bytes
119 sub r14, r14, #32
120 sub r12, r12, #32
121 cmp r12, #31
122 ldmia r14, {r2-r9}
123
124 // If length is less than 32 then disable optim
125 it ls
126 movls r0, #0
127
128 cmp r12, #0
129
130 // Optimized Overlapped - Write 32 bytes
131 sub r10, r10, #32
132 stmia r10, {r2-r9}
133
134 // while (length != 0)
135 bne memcopy_overlapped
136 b memcopy_end
137
138 memcopy_default_non_optim:
139 // Byte copy
140 ldrb r3, [r14], #1
141 sub r12, r12, #1
142 strb r3, [r10], #1
143
144 memcopy_default:
145 cmp r12, #0
146 beq memcopy_end
147
148 // r10 = dest, r14 = source
149 memcopy_default_loop:
150 cmp r0, #0
151 beq memcopy_default_non_optim
152
153 // Optimized memcopy - Read 32 Bytes
154 sub r12, r12, #32
155 cmp r12, #31
156 ldmia r14!, {r2-r9}
157
158 // If length is less than 32 then disable optim
159 it ls
160 movls r0, #0
161
162 cmp r12, #0
163
164 // Optimized memcopy - Write 32 Bytes
165 stmia r10!, {r2-r9}
166
167 // while (length != 0)
168 bne memcopy_default_loop
169
170 memcopy_end:
171 mov r0, r11
172 pop {r4-r11, pc}