]> git.proxmox.com Git - mirror_edk2.git/blame - ArmPkg/Library/CompilerIntrinsicsLib/AArch64/memcpy.S
ARM Packages: use GCC_ASM_EXPORT to export functions
[mirror_edk2.git] / ArmPkg / Library / CompilerIntrinsicsLib / AArch64 / memcpy.S
CommitLineData
25402f5d
HL
1/*\r
2 * Copyright (c) 2011 - 2013, ARM Ltd\r
3 * All rights reserved.\r
4 *\r
5 * Redistribution and use in source and binary forms, with or without\r
6 * modification, are permitted provided that the following conditions\r
7 * are met:\r
8 * 1. Redistributions of source code must retain the above copyright\r
9 * notice, this list of conditions and the following disclaimer.\r
10 * 2. Redistributions in binary form must reproduce the above copyright\r
11 * notice, this list of conditions and the following disclaimer in the\r
12 * documentation and/or other materials provided with the distribution.\r
13 * 3. The name of the company may not be used to endorse or promote\r
14 * products derived from this software without specific prior written\r
15 * permission.\r
16 *\r
17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED\r
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\r
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\r
20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\r
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
27 */\r
28\r
29\r
30.text\r
31.align 2\r
32\r
33\r
919a3a02 34GCC_ASM_EXPORT(memcpy)\r
25402f5d
HL
35\r
36\r
37// Taken from Newlib BSD implementation.\r
38ASM_PFX(memcpy):\r
39 // Copy dst to x6, so we can preserve return value.\r
40 mov x6, x0\r
41\r
42 // NOTE: although size_t is unsigned, this code uses signed\r
43 // comparisons on x2 so relies on nb never having its top bit\r
44 // set. In practice this is not going to be a real problem.\r
45\r
46 // Require at least 64 bytes to be worth aligning.\r
47 cmp x2, #64\r
48 blt qwordcopy\r
49\r
50 // Compute offset to align destination to 16 bytes.\r
51 neg x3, x0\r
52 and x3, x3, 15\r
53\r
54 cbz x3, blockcopy // offset == 0 is likely\r
55\r
56 // We know there is at least 64 bytes to be done, so we\r
57 // do a 16 byte misaligned copy at first and then later do\r
58 // all 16-byte aligned copies. Some bytes will be copied\r
59 // twice, but there's no harm in that since memcpy does not\r
60 // guarantee correctness on overlap.\r
61\r
62 sub x2, x2, x3 // nb -= offset\r
63 ldp x4, x5, [x1]\r
64 add x1, x1, x3\r
65 stp x4, x5, [x6]\r
66 add x6, x6, x3\r
67\r
68 // The destination pointer is now qword (16 byte) aligned.\r
69 // (The src pointer might be.)\r
70\r
71blockcopy:\r
72 // Copy 64 bytes at a time.\r
73 subs x2, x2, #64\r
74 blt 3f\r
752: subs x2, x2, #64\r
76 ldp x4, x5, [x1,#0]\r
77 ldp x8, x9, [x1,#16]\r
78 ldp x10,x11,[x1,#32]\r
79 ldp x12,x13,[x1,#48]\r
80 add x1, x1, #64\r
81 stp x4, x5, [x6,#0]\r
82 stp x8, x9, [x6,#16]\r
83 stp x10,x11,[x6,#32]\r
84 stp x12,x13,[x6,#48]\r
85 add x6, x6, #64\r
86 bge 2b\r
87\r
88 // Unwind pre-decrement\r
893: add x2, x2, #64\r
90\r
91qwordcopy:\r
92 // Copy 0-48 bytes, 16 bytes at a time.\r
93 subs x2, x2, #16\r
94 blt tailcopy\r
952: ldp x4, x5, [x1],#16\r
96 subs x2, x2, #16\r
97 stp x4, x5, [x6],#16\r
98 bge 2b\r
99\r
100 // No need to unwind the pre-decrement, it would not change\r
101 // the low 4 bits of the count. But how likely is it for the\r
102 // byte count to be multiple of 16? Is it worth the overhead\r
103 // of testing for x2 == -16?\r
104\r
105tailcopy:\r
106 // Copy trailing 0-15 bytes.\r
107 tbz x2, #3, 1f\r
108 ldr x4, [x1],#8 // copy 8 bytes\r
109 str x4, [x6],#8\r
1101:\r
111 tbz x2, #2, 1f\r
112 ldr w4, [x1],#4 // copy 4 bytes\r
113 str w4, [x6],#4\r
1141:\r
115 tbz x2, #1, 1f\r
116 ldrh w4, [x1],#2 // copy 2 bytes\r
117 strh w4, [x6],#2\r
1181:\r
119 tbz x2, #0, return\r
120 ldrb w4, [x1] // copy 1 byte\r
121 strb w4, [x6]\r
122\r
123return:\r
124 // This is the only return point of memcpy.\r
125 ret\r