]>
Commit | Line | Data |
---|---|---|
9fe3fd03 JN |
1 | /* A memcpy for CRIS. |
2 | Copyright (C) 1994-2005 Axis Communications. | |
3 | All rights reserved. | |
4 | ||
5 | Redistribution and use in source and binary forms, with or without | |
6 | modification, are permitted provided that the following conditions | |
7 | are met: | |
8 | ||
9 | 1. Redistributions of source code must retain the above copyright | |
10 | notice, this list of conditions and the following disclaimer. | |
11 | ||
12 | 2. Neither the name of Axis Communications nor the names of its | |
13 | contributors may be used to endorse or promote products derived | |
14 | from this software without specific prior written permission. | |
15 | ||
16 | THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS | |
17 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS | |
20 | COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, | |
21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
22 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
24 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
25 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING | |
26 | IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
27 | POSSIBILITY OF SUCH DAMAGE. */ | |
28 | ||
29 | /* FIXME: This file should really only be used for reference, as the | |
30 | result is somewhat depending on gcc generating what we expect rather | |
31 | than what we describe. An assembly file should be used instead. */ | |
32 | ||
33 | #include <stddef.h> | |
34 | ||
35 | /* Break even between movem and move16 is really at 38.7 * 2, but | |
36 | modulo 44, so up to the next multiple of 44, we use ordinary code. */ | |
37 | #define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2) | |
38 | ||
39 | /* No name ambiguities in this file. */ | |
40 | __asm__ (".syntax no_register_prefix"); | |
41 | ||
42 | void * | |
43 | memcpy(void *pdst, const void *psrc, size_t pn) | |
51533b61 | 44 | { |
9fe3fd03 | 45 | /* Now we want the parameters put in special registers. |
51533b61 | 46 | Make sure the compiler is able to make something useful of this. |
9fe3fd03 | 47 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). |
51533b61 | 48 | |
9fe3fd03 JN |
49 | If gcc was allright, it really would need no temporaries, and no |
50 | stack space to save stuff on. */ | |
51533b61 MS |
51 | |
52 | register void *return_dst __asm__ ("r10") = pdst; | |
9fe3fd03 JN |
53 | register unsigned char *dst __asm__ ("r13") = pdst; |
54 | register unsigned const char *src __asm__ ("r11") = psrc; | |
51533b61 MS |
55 | register int n __asm__ ("r12") = pn; |
56 | ||
51533b61 MS |
57 | /* When src is aligned but not dst, this makes a few extra needless |
58 | cycles. I believe it would take as many to check that the | |
59 | re-alignment was unnecessary. */ | |
60 | if (((unsigned long) dst & 3) != 0 | |
61 | /* Don't align if we wouldn't copy more than a few bytes; so we | |
62 | don't have to check further for overflows. */ | |
63 | && n >= 3) | |
64 | { | |
65 | if ((unsigned long) dst & 1) | |
9fe3fd03 JN |
66 | { |
67 | n--; | |
68 | *dst = *src; | |
69 | src++; | |
70 | dst++; | |
71 | } | |
51533b61 MS |
72 | |
73 | if ((unsigned long) dst & 2) | |
9fe3fd03 JN |
74 | { |
75 | n -= 2; | |
76 | *(short *) dst = *(short *) src; | |
77 | src += 2; | |
78 | dst += 2; | |
79 | } | |
51533b61 MS |
80 | } |
81 | ||
9fe3fd03 JN |
82 | /* Decide which copying method to use. */ |
83 | if (n >= MEMCPY_BY_BLOCK_THRESHOLD) | |
84 | { | |
85 | /* It is not optimal to tell the compiler about clobbering any | |
86 | registers; that will move the saving/restoring of those registers | |
87 | to the function prologue/epilogue, and make non-movem sizes | |
88 | suboptimal. */ | |
89 | __asm__ volatile | |
90 | ("\ | |
91 | ;; GCC does promise correct register allocations, but let's \n\ | |
92 | ;; make sure it keeps its promises. \n\ | |
93 | .ifnc %0-%1-%2,$r13-$r11-$r12 \n\ | |
94 | .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\" \n\ | |
95 | .endif \n\ | |
51533b61 | 96 | \n\ |
9fe3fd03 JN |
97 | ;; Save the registers we'll use in the movem process \n\ |
98 | ;; on the stack. \n\ | |
99 | subq 11*4,sp \n\ | |
100 | movem r10,[sp] \n\ | |
51533b61 | 101 | \n\ |
9fe3fd03 JN |
102 | ;; Now we've got this: \n\ |
103 | ;; r11 - src \n\ | |
104 | ;; r13 - dst \n\ | |
105 | ;; r12 - n \n\ | |
51533b61 | 106 | \n\ |
9fe3fd03 JN |
107 | ;; Update n for the first loop. \n\ |
108 | subq 44,r12 \n\ | |
51533b61 | 109 | 0: \n\ |
9fe3fd03 JN |
110 | " |
111 | #ifdef __arch_common_v10_v32 | |
112 | /* Cater to branch offset difference between v32 and v10. We | |
113 | assume the branch below has an 8-bit offset. */ | |
114 | " setf\n" | |
115 | #endif | |
116 | " movem [r11+],r10 \n\ | |
117 | subq 44,r12 \n\ | |
118 | bge 0b \n\ | |
119 | movem r10,[r13+] \n\ | |
51533b61 | 120 | \n\ |
9fe3fd03 JN |
121 | ;; Compensate for last loop underflowing n. \n\ |
122 | addq 44,r12 \n\ | |
51533b61 | 123 | \n\ |
9fe3fd03 JN |
124 | ;; Restore registers from stack. \n\ |
125 | movem [sp+],r10" | |
51533b61 | 126 | |
9fe3fd03 JN |
127 | /* Outputs. */ |
128 | : "=r" (dst), "=r" (src), "=r" (n) | |
51533b61 | 129 | |
9fe3fd03 JN |
130 | /* Inputs. */ |
131 | : "0" (dst), "1" (src), "2" (n)); | |
132 | } | |
51533b61 | 133 | |
9fe3fd03 JN |
134 | while (n >= 16) |
135 | { | |
136 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
137 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
138 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
139 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
51533b61 | 140 | |
9fe3fd03 JN |
141 | n -= 16; |
142 | } | |
51533b61 | 143 | |
51533b61 | 144 | switch (n) |
9fe3fd03 | 145 | { |
51533b61 MS |
146 | case 0: |
147 | break; | |
9fe3fd03 | 148 | |
51533b61 | 149 | case 1: |
9fe3fd03 | 150 | *dst = *src; |
51533b61 | 151 | break; |
9fe3fd03 | 152 | |
51533b61 | 153 | case 2: |
9fe3fd03 | 154 | *(short *) dst = *(short *) src; |
51533b61 | 155 | break; |
9fe3fd03 | 156 | |
51533b61 | 157 | case 3: |
9fe3fd03 JN |
158 | *(short *) dst = *(short *) src; dst += 2; src += 2; |
159 | *dst = *src; | |
51533b61 | 160 | break; |
9fe3fd03 | 161 | |
51533b61 | 162 | case 4: |
9fe3fd03 | 163 | *(long *) dst = *(long *) src; |
51533b61 | 164 | break; |
9fe3fd03 | 165 | |
51533b61 | 166 | case 5: |
9fe3fd03 JN |
167 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
168 | *dst = *src; | |
51533b61 | 169 | break; |
9fe3fd03 | 170 | |
51533b61 | 171 | case 6: |
9fe3fd03 JN |
172 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
173 | *(short *) dst = *(short *) src; | |
51533b61 | 174 | break; |
9fe3fd03 | 175 | |
51533b61 | 176 | case 7: |
9fe3fd03 JN |
177 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
178 | *(short *) dst = *(short *) src; dst += 2; src += 2; | |
179 | *dst = *src; | |
51533b61 | 180 | break; |
9fe3fd03 | 181 | |
51533b61 | 182 | case 8: |
9fe3fd03 JN |
183 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
184 | *(long *) dst = *(long *) src; | |
51533b61 | 185 | break; |
9fe3fd03 | 186 | |
51533b61 | 187 | case 9: |
9fe3fd03 JN |
188 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
189 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
190 | *dst = *src; | |
51533b61 | 191 | break; |
9fe3fd03 | 192 | |
51533b61 | 193 | case 10: |
9fe3fd03 JN |
194 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
195 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
196 | *(short *) dst = *(short *) src; | |
51533b61 | 197 | break; |
9fe3fd03 | 198 | |
51533b61 | 199 | case 11: |
9fe3fd03 JN |
200 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
201 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
202 | *(short *) dst = *(short *) src; dst += 2; src += 2; | |
203 | *dst = *src; | |
51533b61 | 204 | break; |
9fe3fd03 | 205 | |
51533b61 | 206 | case 12: |
9fe3fd03 JN |
207 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
208 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
209 | *(long *) dst = *(long *) src; | |
51533b61 | 210 | break; |
9fe3fd03 | 211 | |
51533b61 | 212 | case 13: |
9fe3fd03 JN |
213 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
214 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
215 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
216 | *dst = *src; | |
51533b61 | 217 | break; |
9fe3fd03 | 218 | |
51533b61 | 219 | case 14: |
9fe3fd03 JN |
220 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
221 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
222 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
223 | *(short *) dst = *(short *) src; | |
51533b61 | 224 | break; |
9fe3fd03 | 225 | |
51533b61 | 226 | case 15: |
9fe3fd03 JN |
227 | *(long *) dst = *(long *) src; dst += 4; src += 4; |
228 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
229 | *(long *) dst = *(long *) src; dst += 4; src += 4; | |
230 | *(short *) dst = *(short *) src; dst += 2; src += 2; | |
231 | *dst = *src; | |
51533b61 | 232 | break; |
9fe3fd03 | 233 | } |
51533b61 | 234 | |
9fe3fd03 JN |
235 | return return_dst; |
236 | } |