]> git.proxmox.com Git - mirror_edk2.git/blame - MdePkg/Library/BaseMemoryLibOptDxe/Arm/CopyMem.asm
MdePkg/BaseMemoryLibOptDxe: add accelerated ARM routines
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / CopyMem.asm
CommitLineData
a37f6605
AB
1;------------------------------------------------------------------------------\r
2;\r
3; CopyMem() worker for ARM\r
4;\r
5; This file started out as C code that did 64 bit moves if the buffer was\r
6; 32-bit aligned, else it does a byte copy. It also does a byte copy for\r
7; any trailing bytes. It was updated to do 32-byte copies using stm/ldm.\r
8;\r
9; Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>\r
10; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>\r
11; This program and the accompanying materials\r
12; are licensed and made available under the terms and conditions of the BSD License\r
13; which accompanies this distribution. The full text of the license may be found at\r
14; http://opensource.org/licenses/bsd-license.php\r
15;\r
16; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
17; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
18;\r
19;------------------------------------------------------------------------------\r
20\r
21 EXPORT InternalMemCopyMem\r
22 AREA SetMem, CODE, READONLY\r
23 THUMB\r
24\r
25InternalMemCopyMem\r
26 stmfd sp!, {r4-r11, lr}\r
27 // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)\r
28 mov r11, r0\r
29 mov r10, r0\r
30 mov r12, r2\r
31 mov r14, r1\r
32\r
33memcopy_check_overlapped\r
34 cmp r11, r1\r
35 // If (dest < source)\r
36 bcc memcopy_check_optim_default\r
37\r
38 // If (source + length < dest)\r
39 rsb r3, r1, r11\r
40 cmp r12, r3\r
41 bcc memcopy_check_optim_default\r
42 b memcopy_check_optim_overlap\r
43\r
44memcopy_check_optim_default\r
45 // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)\r
46 tst r0, #0xF\r
47 movne r0, #0\r
48 bne memcopy_default\r
49 tst r1, #0xF\r
50 movne r3, #0\r
51 moveq r3, #1\r
52 cmp r2, #31\r
53 movls r0, #0\r
54 andhi r0, r3, #1\r
55 b memcopy_default\r
56\r
57memcopy_check_optim_overlap\r
58 // r10 = dest_end, r14 = source_end\r
59 add r10, r11, r12\r
60 add r14, r12, r1\r
61\r
62 // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)\r
63 cmp r2, #31\r
64 movls r0, #0\r
65 movhi r0, #1\r
66 tst r10, #0xF\r
67 movne r0, #0\r
68 tst r14, #0xF\r
69 movne r0, #0\r
70 b memcopy_overlapped\r
71\r
72memcopy_overlapped_non_optim\r
73 // We read 1 byte from the end of the source buffer\r
74 sub r3, r14, #1\r
75 sub r12, r12, #1\r
76 ldrb r3, [r3, #0]\r
77 sub r2, r10, #1\r
78 cmp r12, #0\r
79 // We write 1 byte at the end of the dest buffer\r
80 sub r10, r10, #1\r
81 sub r14, r14, #1\r
82 strb r3, [r2, #0]\r
83 bne memcopy_overlapped_non_optim\r
84 b memcopy_end\r
85\r
86// r10 = dest_end, r14 = source_end\r
87memcopy_overlapped\r
88 // Are we in the optimized case ?\r
89 cmp r0, #0\r
90 beq memcopy_overlapped_non_optim\r
91\r
92 // Optimized Overlapped - Read 32 bytes\r
93 sub r14, r14, #32\r
94 sub r12, r12, #32\r
95 cmp r12, #31\r
96 ldmia r14, {r2-r9}\r
97\r
98 // If length is less than 32 then disable optim\r
99 movls r0, #0\r
100\r
101 cmp r12, #0\r
102\r
103 // Optimized Overlapped - Write 32 bytes\r
104 sub r10, r10, #32\r
105 stmia r10, {r2-r9}\r
106\r
107 // while (length != 0)\r
108 bne memcopy_overlapped\r
109 b memcopy_end\r
110\r
111memcopy_default_non_optim\r
112 // Byte copy\r
113 ldrb r3, [r14], #1\r
114 sub r12, r12, #1\r
115 strb r3, [r10], #1\r
116\r
117memcopy_default\r
118 cmp r12, #0\r
119 beq memcopy_end\r
120\r
121// r10 = dest, r14 = source\r
122memcopy_default_loop\r
123 cmp r0, #0\r
124 beq memcopy_default_non_optim\r
125\r
126 // Optimized memcopy - Read 32 Bytes\r
127 sub r12, r12, #32\r
128 cmp r12, #31\r
129 ldmia r14!, {r2-r9}\r
130\r
131 // If length is less than 32 then disable optim\r
132 movls r0, #0\r
133\r
134 cmp r12, #0\r
135\r
136 // Optimized memcopy - Write 32 Bytes\r
137 stmia r10!, {r2-r9}\r
138\r
139 // while (length != 0)\r
140 bne memcopy_default_loop\r
141\r
142memcopy_end\r
143 mov r0, r11\r
144 ldmfd sp!, {r4-r11, pc}\r
145\r
146 END\r
147\r