]>
Commit | Line | Data |
---|---|---|
25402f5d HL |
1 | /*\r |
2 | * Copyright (c) 2011 - 2013, ARM Ltd\r | |
3 | * All rights reserved.\r | |
4 | *\r | |
5 | * Redistribution and use in source and binary forms, with or without\r | |
6 | * modification, are permitted provided that the following conditions\r | |
7 | * are met:\r | |
8 | * 1. Redistributions of source code must retain the above copyright\r | |
9 | * notice, this list of conditions and the following disclaimer.\r | |
10 | * 2. Redistributions in binary form must reproduce the above copyright\r | |
11 | * notice, this list of conditions and the following disclaimer in the\r | |
12 | * documentation and/or other materials provided with the distribution.\r | |
13 | * 3. The name of the company may not be used to endorse or promote\r | |
14 | * products derived from this software without specific prior written\r | |
15 | * permission.\r | |
16 | *\r | |
17 | * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED\r | |
18 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\r | |
19 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\r | |
20 | * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r | |
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\r | |
22 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r | |
23 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r | |
24 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r | |
25 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r | |
26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r | |
27 | */\r | |
28 | \r | |
903e3124 | 29 | #include <AsmMacroIoLibV8.h>\r |
25402f5d HL |
30 | \r |
31 | // Taken from Newlib BSD implementation.\r | |
903e3124 | 32 | ASM_FUNC(memcpy)\r |
25402f5d HL |
33 | // Copy dst to x6, so we can preserve return value.\r |
34 | mov x6, x0\r | |
35 | \r | |
36 | // NOTE: although size_t is unsigned, this code uses signed\r | |
37 | // comparisons on x2 so relies on nb never having its top bit\r | |
38 | // set. In practice this is not going to be a real problem.\r | |
39 | \r | |
40 | // Require at least 64 bytes to be worth aligning.\r | |
41 | cmp x2, #64\r | |
42 | blt qwordcopy\r | |
43 | \r | |
44 | // Compute offset to align destination to 16 bytes.\r | |
45 | neg x3, x0\r | |
46 | and x3, x3, 15\r | |
47 | \r | |
48 | cbz x3, blockcopy // offset == 0 is likely\r | |
49 | \r | |
50 | // We know there is at least 64 bytes to be done, so we\r | |
51 | // do a 16 byte misaligned copy at first and then later do\r | |
52 | // all 16-byte aligned copies. Some bytes will be copied\r | |
53 | // twice, but there's no harm in that since memcpy does not\r | |
54 | // guarantee correctness on overlap.\r | |
55 | \r | |
56 | sub x2, x2, x3 // nb -= offset\r | |
57 | ldp x4, x5, [x1]\r | |
58 | add x1, x1, x3\r | |
59 | stp x4, x5, [x6]\r | |
60 | add x6, x6, x3\r | |
61 | \r | |
62 | // The destination pointer is now qword (16 byte) aligned.\r | |
63 | // (The src pointer might be.)\r | |
64 | \r | |
65 | blockcopy:\r | |
66 | // Copy 64 bytes at a time.\r | |
67 | subs x2, x2, #64\r | |
68 | blt 3f\r | |
69 | 2: subs x2, x2, #64\r | |
70 | ldp x4, x5, [x1,#0]\r | |
71 | ldp x8, x9, [x1,#16]\r | |
72 | ldp x10,x11,[x1,#32]\r | |
73 | ldp x12,x13,[x1,#48]\r | |
74 | add x1, x1, #64\r | |
75 | stp x4, x5, [x6,#0]\r | |
76 | stp x8, x9, [x6,#16]\r | |
77 | stp x10,x11,[x6,#32]\r | |
78 | stp x12,x13,[x6,#48]\r | |
79 | add x6, x6, #64\r | |
80 | bge 2b\r | |
81 | \r | |
82 | // Unwind pre-decrement\r | |
83 | 3: add x2, x2, #64\r | |
84 | \r | |
85 | qwordcopy:\r | |
86 | // Copy 0-48 bytes, 16 bytes at a time.\r | |
87 | subs x2, x2, #16\r | |
88 | blt tailcopy\r | |
89 | 2: ldp x4, x5, [x1],#16\r | |
90 | subs x2, x2, #16\r | |
91 | stp x4, x5, [x6],#16\r | |
92 | bge 2b\r | |
93 | \r | |
94 | // No need to unwind the pre-decrement, it would not change\r | |
95 | // the low 4 bits of the count. But how likely is it for the\r | |
96 | // byte count to be multiple of 16? Is it worth the overhead\r | |
97 | // of testing for x2 == -16?\r | |
98 | \r | |
99 | tailcopy:\r | |
100 | // Copy trailing 0-15 bytes.\r | |
101 | tbz x2, #3, 1f\r | |
102 | ldr x4, [x1],#8 // copy 8 bytes\r | |
103 | str x4, [x6],#8\r | |
104 | 1:\r | |
105 | tbz x2, #2, 1f\r | |
106 | ldr w4, [x1],#4 // copy 4 bytes\r | |
107 | str w4, [x6],#4\r | |
108 | 1:\r | |
109 | tbz x2, #1, 1f\r | |
110 | ldrh w4, [x1],#2 // copy 2 bytes\r | |
111 | strh w4, [x6],#2\r | |
112 | 1:\r | |
113 | tbz x2, #0, return\r | |
114 | ldrb w4, [x1] // copy 1 byte\r | |
115 | strb w4, [x6]\r | |
116 | \r | |
117 | return:\r | |
118 | // This is the only return point of memcpy.\r | |
119 | ret\r |