]> git.proxmox.com Git - mirror_edk2.git/blob - MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm
MdePkg/BaseMemoryLibOptDxe: add accelerated ARM routines
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / CopyMem.asm
1 ;------------------------------------------------------------------------------
2 ;
3 ; CopyMem() worker for ARM
4 ;
5 ; This file started out as C code that did 64 bit moves if the buffer was
6 ; 32-bit aligned, else it does a byte copy. It also does a byte copy for
7 ; any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
8 ;
9 ; Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
10 ; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
11 ; This program and the accompanying materials
12 ; are licensed and made available under the terms and conditions of the BSD License
13 ; which accompanies this distribution. The full text of the license may be found at
14 ; http://opensource.org/licenses/bsd-license.php
15 ;
16 ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
17 ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
18 ;
19 ;------------------------------------------------------------------------------
20
21 EXPORT InternalMemCopyMem
22 AREA SetMem, CODE, READONLY
23 THUMB
24
25 InternalMemCopyMem
26 stmfd sp!, {r4-r11, lr}
27 // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
28 mov r11, r0
29 mov r10, r0
30 mov r12, r2
31 mov r14, r1
32
33 memcopy_check_overlapped
34 cmp r11, r1
35 // If (dest < source)
36 bcc memcopy_check_optim_default
37
38 // If (source + length < dest)
39 rsb r3, r1, r11
40 cmp r12, r3
41 bcc memcopy_check_optim_default
42 b memcopy_check_optim_overlap
43
44 memcopy_check_optim_default
45 // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
46 tst r0, #0xF
47 movne r0, #0
48 bne memcopy_default
49 tst r1, #0xF
50 movne r3, #0
51 moveq r3, #1
52 cmp r2, #31
53 movls r0, #0
54 andhi r0, r3, #1
55 b memcopy_default
56
57 memcopy_check_optim_overlap
58 // r10 = dest_end, r14 = source_end
59 add r10, r11, r12
60 add r14, r12, r1
61
62 // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
63 cmp r2, #31
64 movls r0, #0
65 movhi r0, #1
66 tst r10, #0xF
67 movne r0, #0
68 tst r14, #0xF
69 movne r0, #0
70 b memcopy_overlapped
71
72 memcopy_overlapped_non_optim
73 // We read 1 byte from the end of the source buffer
74 sub r3, r14, #1
75 sub r12, r12, #1
76 ldrb r3, [r3, #0]
77 sub r2, r10, #1
78 cmp r12, #0
79 // We write 1 byte at the end of the dest buffer
80 sub r10, r10, #1
81 sub r14, r14, #1
82 strb r3, [r2, #0]
83 bne memcopy_overlapped_non_optim
84 b memcopy_end
85
86 // r10 = dest_end, r14 = source_end
87 memcopy_overlapped
88 // Are we in the optimized case ?
89 cmp r0, #0
90 beq memcopy_overlapped_non_optim
91
92 // Optimized Overlapped - Read 32 bytes
93 sub r14, r14, #32
94 sub r12, r12, #32
95 cmp r12, #31
96 ldmia r14, {r2-r9}
97
98 // If length is less than 32 then disable optim
99 movls r0, #0
100
101 cmp r12, #0
102
103 // Optimized Overlapped - Write 32 bytes
104 sub r10, r10, #32
105 stmia r10, {r2-r9}
106
107 // while (length != 0)
108 bne memcopy_overlapped
109 b memcopy_end
110
111 memcopy_default_non_optim
112 // Byte copy
113 ldrb r3, [r14], #1
114 sub r12, r12, #1
115 strb r3, [r10], #1
116
117 memcopy_default
118 cmp r12, #0
119 beq memcopy_end
120
121 // r10 = dest, r14 = source
122 memcopy_default_loop
123 cmp r0, #0
124 beq memcopy_default_non_optim
125
126 // Optimized memcopy - Read 32 Bytes
127 sub r12, r12, #32
128 cmp r12, #31
129 ldmia r14!, {r2-r9}
130
131 // If length is less than 32 then disable optim
132 movls r0, #0
133
134 cmp r12, #0
135
136 // Optimized memcopy - Write 32 Bytes
137 stmia r10!, {r2-r9}
138
139 // while (length != 0)
140 bne memcopy_default_loop
141
142 memcopy_end
143 mov r0, r11
144 ldmfd sp!, {r4-r11, pc}
145
146 END
147