]>
Commit | Line | Data |
---|---|---|
1da177e4 | 1 | #include <linux/string.h> |
e683014c | 2 | #include <linux/export.h> |
1da177e4 LT |
3 | |
4 | #undef memcpy | |
5 | #undef memset | |
6 | ||
a9143296 | 7 | __visible void *memcpy(void *to, const void *from, size_t n) |
1da177e4 | 8 | { |
6974f0c4 | 9 | #if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE) |
1da177e4 LT |
10 | return __memcpy3d(to, from, n); |
11 | #else | |
12 | return __memcpy(to, from, n); | |
13 | #endif | |
14 | } | |
15 | EXPORT_SYMBOL(memcpy); | |
16 | ||
a9143296 | 17 | __visible void *memset(void *s, int c, size_t count) |
1da177e4 LT |
18 | { |
19 | return __memset(s, c, count); | |
20 | } | |
21 | EXPORT_SYMBOL(memset); | |
22 | ||
a9143296 | 23 | __visible void *memmove(void *dest, const void *src, size_t n) |
1da177e4 | 24 | { |
3b4b682b ML |
25 | int d0,d1,d2,d3,d4,d5; |
26 | char *ret = dest; | |
27 | ||
28 | __asm__ __volatile__( | |
d50ba368 | 29 | /* Handle more 16 bytes in loop */ |
3b4b682b ML |
30 | "cmp $0x10, %0\n\t" |
31 | "jb 1f\n\t" | |
32 | ||
33 | /* Decide forward/backward copy mode */ | |
34 | "cmp %2, %1\n\t" | |
35 | "jb 2f\n\t" | |
36 | ||
37 | /* | |
38 | * movs instruction have many startup latency | |
39 | * so we handle small size by general register. | |
40 | */ | |
41 | "cmp $680, %0\n\t" | |
42 | "jb 3f\n\t" | |
43 | /* | |
44 | * movs instruction is only good for aligned case. | |
45 | */ | |
46 | "mov %1, %3\n\t" | |
47 | "xor %2, %3\n\t" | |
48 | "and $0xff, %3\n\t" | |
49 | "jz 4f\n\t" | |
50 | "3:\n\t" | |
51 | "sub $0x10, %0\n\t" | |
52 | ||
53 | /* | |
bb916ff7 | 54 | * We gobble 16 bytes forward in each loop. |
3b4b682b ML |
55 | */ |
56 | "3:\n\t" | |
57 | "sub $0x10, %0\n\t" | |
58 | "mov 0*4(%1), %3\n\t" | |
59 | "mov 1*4(%1), %4\n\t" | |
60 | "mov %3, 0*4(%2)\n\t" | |
61 | "mov %4, 1*4(%2)\n\t" | |
62 | "mov 2*4(%1), %3\n\t" | |
63 | "mov 3*4(%1), %4\n\t" | |
64 | "mov %3, 2*4(%2)\n\t" | |
65 | "mov %4, 3*4(%2)\n\t" | |
66 | "lea 0x10(%1), %1\n\t" | |
67 | "lea 0x10(%2), %2\n\t" | |
68 | "jae 3b\n\t" | |
69 | "add $0x10, %0\n\t" | |
70 | "jmp 1f\n\t" | |
71 | ||
72 | /* | |
73 | * Handle data forward by movs. | |
74 | */ | |
75 | ".p2align 4\n\t" | |
76 | "4:\n\t" | |
77 | "mov -4(%1, %0), %3\n\t" | |
78 | "lea -4(%2, %0), %4\n\t" | |
79 | "shr $2, %0\n\t" | |
80 | "rep movsl\n\t" | |
81 | "mov %3, (%4)\n\t" | |
82 | "jmp 11f\n\t" | |
83 | /* | |
84 | * Handle data backward by movs. | |
85 | */ | |
86 | ".p2align 4\n\t" | |
87 | "6:\n\t" | |
88 | "mov (%1), %3\n\t" | |
89 | "mov %2, %4\n\t" | |
90 | "lea -4(%1, %0), %1\n\t" | |
91 | "lea -4(%2, %0), %2\n\t" | |
92 | "shr $2, %0\n\t" | |
93 | "std\n\t" | |
94 | "rep movsl\n\t" | |
95 | "mov %3,(%4)\n\t" | |
96 | "cld\n\t" | |
97 | "jmp 11f\n\t" | |
98 | ||
99 | /* | |
100 | * Start to prepare for backward copy. | |
101 | */ | |
102 | ".p2align 4\n\t" | |
103 | "2:\n\t" | |
104 | "cmp $680, %0\n\t" | |
105 | "jb 5f\n\t" | |
106 | "mov %1, %3\n\t" | |
107 | "xor %2, %3\n\t" | |
108 | "and $0xff, %3\n\t" | |
109 | "jz 6b\n\t" | |
110 | ||
111 | /* | |
112 | * Calculate copy position to tail. | |
113 | */ | |
114 | "5:\n\t" | |
115 | "add %0, %1\n\t" | |
116 | "add %0, %2\n\t" | |
117 | "sub $0x10, %0\n\t" | |
118 | ||
119 | /* | |
bb916ff7 | 120 | * We gobble 16 bytes backward in each loop. |
3b4b682b ML |
121 | */ |
122 | "7:\n\t" | |
123 | "sub $0x10, %0\n\t" | |
124 | ||
125 | "mov -1*4(%1), %3\n\t" | |
126 | "mov -2*4(%1), %4\n\t" | |
127 | "mov %3, -1*4(%2)\n\t" | |
128 | "mov %4, -2*4(%2)\n\t" | |
129 | "mov -3*4(%1), %3\n\t" | |
130 | "mov -4*4(%1), %4\n\t" | |
131 | "mov %3, -3*4(%2)\n\t" | |
132 | "mov %4, -4*4(%2)\n\t" | |
133 | "lea -0x10(%1), %1\n\t" | |
134 | "lea -0x10(%2), %2\n\t" | |
135 | "jae 7b\n\t" | |
136 | /* | |
137 | * Calculate copy position to head. | |
138 | */ | |
139 | "add $0x10, %0\n\t" | |
140 | "sub %0, %1\n\t" | |
141 | "sub %0, %2\n\t" | |
142 | ||
143 | /* | |
144 | * Move data from 8 bytes to 15 bytes. | |
145 | */ | |
146 | ".p2align 4\n\t" | |
147 | "1:\n\t" | |
148 | "cmp $8, %0\n\t" | |
149 | "jb 8f\n\t" | |
150 | "mov 0*4(%1), %3\n\t" | |
151 | "mov 1*4(%1), %4\n\t" | |
152 | "mov -2*4(%1, %0), %5\n\t" | |
153 | "mov -1*4(%1, %0), %1\n\t" | |
154 | ||
155 | "mov %3, 0*4(%2)\n\t" | |
156 | "mov %4, 1*4(%2)\n\t" | |
157 | "mov %5, -2*4(%2, %0)\n\t" | |
158 | "mov %1, -1*4(%2, %0)\n\t" | |
159 | "jmp 11f\n\t" | |
160 | ||
161 | /* | |
162 | * Move data from 4 bytes to 7 bytes. | |
163 | */ | |
164 | ".p2align 4\n\t" | |
165 | "8:\n\t" | |
166 | "cmp $4, %0\n\t" | |
167 | "jb 9f\n\t" | |
168 | "mov 0*4(%1), %3\n\t" | |
169 | "mov -1*4(%1, %0), %4\n\t" | |
170 | "mov %3, 0*4(%2)\n\t" | |
171 | "mov %4, -1*4(%2, %0)\n\t" | |
172 | "jmp 11f\n\t" | |
173 | ||
174 | /* | |
175 | * Move data from 2 bytes to 3 bytes. | |
176 | */ | |
177 | ".p2align 4\n\t" | |
178 | "9:\n\t" | |
179 | "cmp $2, %0\n\t" | |
180 | "jb 10f\n\t" | |
181 | "movw 0*2(%1), %%dx\n\t" | |
182 | "movw -1*2(%1, %0), %%bx\n\t" | |
183 | "movw %%dx, 0*2(%2)\n\t" | |
184 | "movw %%bx, -1*2(%2, %0)\n\t" | |
185 | "jmp 11f\n\t" | |
186 | ||
187 | /* | |
188 | * Move data for 1 byte. | |
189 | */ | |
190 | ".p2align 4\n\t" | |
191 | "10:\n\t" | |
192 | "cmp $1, %0\n\t" | |
193 | "jb 11f\n\t" | |
194 | "movb (%1), %%cl\n\t" | |
195 | "movb %%cl, (%2)\n\t" | |
196 | ".p2align 4\n\t" | |
197 | "11:" | |
198 | : "=&c" (d0), "=&S" (d1), "=&D" (d2), | |
199 | "=r" (d3),"=r" (d4), "=r"(d5) | |
200 | :"0" (n), | |
201 | "1" (src), | |
202 | "2" (dest) | |
203 | :"memory"); | |
204 | ||
205 | return ret; | |
206 | ||
1da177e4 LT |
207 | } |
208 | EXPORT_SYMBOL(memmove); |