]> git.proxmox.com Git - mirror_edk2.git/blob - ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S
ArmPkg/CompilerIntrinsicsLib: switch to ASM_FUNC() asm macro
[mirror_edk2.git] / ArmPkg / Library / CompilerIntrinsicsLib / AArch64 / memcpy.S
1 /*
2 * Copyright (c) 2011 - 2013, ARM Ltd
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the company may not be used to endorse or promote
14 * products derived from this software without specific prior written
15 * permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <AsmMacroIoLibV8.h>
30
31 // Taken from Newlib BSD implementation.
32 ASM_FUNC(memcpy)
33 // Copy dst to x6, so we can preserve return value.
34 mov x6, x0
35
36 // NOTE: although size_t is unsigned, this code uses signed
37 // comparisons on x2 so relies on nb never having its top bit
38 // set. In practice this is not going to be a real problem.
39
40 // Require at least 64 bytes to be worth aligning.
41 cmp x2, #64
42 blt qwordcopy
43
44 // Compute offset to align destination to 16 bytes.
45 neg x3, x0
46 and x3, x3, 15
47
48 cbz x3, blockcopy // offset == 0 is likely
49
50 // We know there is at least 64 bytes to be done, so we
51 // do a 16 byte misaligned copy at first and then later do
52 // all 16-byte aligned copies. Some bytes will be copied
53 // twice, but there's no harm in that since memcpy does not
54 // guarantee correctness on overlap.
55
56 sub x2, x2, x3 // nb -= offset
57 ldp x4, x5, [x1]
58 add x1, x1, x3
59 stp x4, x5, [x6]
60 add x6, x6, x3
61
62 // The destination pointer is now qword (16 byte) aligned.
63 // (The src pointer might be.)
64
65 blockcopy:
66 // Copy 64 bytes at a time.
67 subs x2, x2, #64
68 blt 3f
69 2: subs x2, x2, #64
70 ldp x4, x5, [x1,#0]
71 ldp x8, x9, [x1,#16]
72 ldp x10,x11,[x1,#32]
73 ldp x12,x13,[x1,#48]
74 add x1, x1, #64
75 stp x4, x5, [x6,#0]
76 stp x8, x9, [x6,#16]
77 stp x10,x11,[x6,#32]
78 stp x12,x13,[x6,#48]
79 add x6, x6, #64
80 bge 2b
81
82 // Unwind pre-decrement
83 3: add x2, x2, #64
84
85 qwordcopy:
86 // Copy 0-48 bytes, 16 bytes at a time.
87 subs x2, x2, #16
88 blt tailcopy
89 2: ldp x4, x5, [x1],#16
90 subs x2, x2, #16
91 stp x4, x5, [x6],#16
92 bge 2b
93
94 // No need to unwind the pre-decrement, it would not change
95 // the low 4 bits of the count. But how likely is it for the
96 // byte count to be multiple of 16? Is it worth the overhead
97 // of testing for x2 == -16?
98
99 tailcopy:
100 // Copy trailing 0-15 bytes.
101 tbz x2, #3, 1f
102 ldr x4, [x1],#8 // copy 8 bytes
103 str x4, [x6],#8
104 1:
105 tbz x2, #2, 1f
106 ldr w4, [x1],#4 // copy 4 bytes
107 str w4, [x6],#4
108 1:
109 tbz x2, #1, 1f
110 ldrh w4, [x1],#2 // copy 2 bytes
111 strh w4, [x6],#2
112 1:
113 tbz x2, #0, return
114 ldrb w4, [x1] // copy 1 byte
115 strb w4, [x6]
116
117 return:
118 // This is the only return point of memcpy.
119 ret