]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/xtensa/lib/usercopy.S
Merge ath-next from git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
[mirror_ubuntu-artful-kernel.git] / arch / xtensa / lib / usercopy.S
1 /*
2 * arch/xtensa/lib/usercopy.S
3 *
4 * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
5 *
6 * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7 * It needs to remain separate and distinct. The hal files are part
8 * of the Xtensa link-time HAL, and those files may differ per
9 * processor configuration. Patching the kernel for another
10 * processor configuration includes replacing the hal files, and we
11 * could lose the special functionality for accessing user-space
12 * memory during such a patch. We sacrifice a little code space here
13 * in favor to simplify code maintenance.
14 *
15 * This file is subject to the terms and conditions of the GNU General
16 * Public License. See the file "COPYING" in the main directory of
17 * this archive for more details.
18 *
19 * Copyright (C) 2002 Tensilica Inc.
20 */
21
22
23 /*
24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
25 *
26 * The returned value is the number of bytes not copied. Implies zero
27 * is success.
28 *
29 * The general case algorithm is as follows:
30 * If the destination and source are both aligned,
31 * do 16B chunks with a loop, and then finish up with
32 * 8B, 4B, 2B, and 1B copies conditional on the length.
33 * If destination is aligned and source unaligned,
34 * do the same, but use SRC to align the source data.
35 * If destination is unaligned, align it by conditionally
36 * copying 1B and 2B and then retest.
37 * This code tries to use fall-through braches for the common
38 * case of aligned destinations (except for the branches to
39 * the alignment label).
40 *
41 * Register use:
42 * a0/ return address
43 * a1/ stack pointer
44 * a2/ return value
45 * a3/ src
46 * a4/ length
47 * a5/ dst
48 * a6/ tmp
49 * a7/ tmp
50 * a8/ tmp
51 * a9/ tmp
52 * a10/ tmp
53 * a11/ original length
54 */
55
56 #include <variant/core.h>
57
58 #ifdef __XTENSA_EB__
59 #define ALIGN(R, W0, W1) src R, W0, W1
60 #define SSA8(R) ssa8b R
61 #else
62 #define ALIGN(R, W0, W1) src R, W1, W0
63 #define SSA8(R) ssa8l R
64 #endif
65
66 /* Load or store instructions that may cause exceptions use the EX macro. */
67
68 #define EX(insn,reg1,reg2,offset,handler) \
69 9: insn reg1, reg2, offset; \
70 .section __ex_table, "a"; \
71 .word 9b, handler; \
72 .previous
73
74
75 .text
76 .align 4
77 .global __xtensa_copy_user
78 .type __xtensa_copy_user,@function
79 __xtensa_copy_user:
80 entry sp, 16 # minimal stack frame
81 # a2/ dst, a3/ src, a4/ len
82 mov a5, a2 # copy dst so that a2 is return value
83 mov a11, a4 # preserve original len for error case
84 .Lcommon:
85 bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
86 bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
87 .Ldstaligned: # return here from .Ldstunaligned when dst is aligned
88 srli a7, a4, 4 # number of loop iterations with 16B
89 # per iteration
90 movi a8, 3 # if source is also aligned,
91 bnone a3, a8, .Laligned # then use word copy
92 SSA8( a3) # set shift amount from byte offset
93 bnez a4, .Lsrcunaligned
94 movi a2, 0 # return success for len==0
95 retw
96
97 /*
98 * Destination is unaligned
99 */
100
101 .Ldst1mod2: # dst is only byte aligned
102 bltui a4, 7, .Lbytecopy # do short copies byte by byte
103
104 # copy 1 byte
105 EX(l8ui, a6, a3, 0, fixup)
106 addi a3, a3, 1
107 EX(s8i, a6, a5, 0, fixup)
108 addi a5, a5, 1
109 addi a4, a4, -1
110 bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
111 # return to main algorithm
112 .Ldst2mod4: # dst 16-bit aligned
113 # copy 2 bytes
114 bltui a4, 6, .Lbytecopy # do short copies byte by byte
115 EX(l8ui, a6, a3, 0, fixup)
116 EX(l8ui, a7, a3, 1, fixup)
117 addi a3, a3, 2
118 EX(s8i, a6, a5, 0, fixup)
119 EX(s8i, a7, a5, 1, fixup)
120 addi a5, a5, 2
121 addi a4, a4, -2
122 j .Ldstaligned # dst is now aligned, return to main algorithm
123
124 /*
125 * Byte by byte copy
126 */
127 .align 4
128 .byte 0 # 1 mod 4 alignment for LOOPNEZ
129 # (0 mod 4 alignment for LBEG)
130 .Lbytecopy:
131 #if XCHAL_HAVE_LOOPS
132 loopnez a4, .Lbytecopydone
133 #else /* !XCHAL_HAVE_LOOPS */
134 beqz a4, .Lbytecopydone
135 add a7, a3, a4 # a7 = end address for source
136 #endif /* !XCHAL_HAVE_LOOPS */
137 .Lnextbyte:
138 EX(l8ui, a6, a3, 0, fixup)
139 addi a3, a3, 1
140 EX(s8i, a6, a5, 0, fixup)
141 addi a5, a5, 1
142 #if !XCHAL_HAVE_LOOPS
143 blt a3, a7, .Lnextbyte
144 #endif /* !XCHAL_HAVE_LOOPS */
145 .Lbytecopydone:
146 movi a2, 0 # return success for len bytes copied
147 retw
148
149 /*
150 * Destination and source are word-aligned.
151 */
152 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
153 .align 4 # 1 mod 4 alignment for LOOPNEZ
154 .byte 0 # (0 mod 4 alignment for LBEG)
155 .Laligned:
156 #if XCHAL_HAVE_LOOPS
157 loopnez a7, .Loop1done
158 #else /* !XCHAL_HAVE_LOOPS */
159 beqz a7, .Loop1done
160 slli a8, a7, 4
161 add a8, a8, a3 # a8 = end of last 16B source chunk
162 #endif /* !XCHAL_HAVE_LOOPS */
163 .Loop1:
164 EX(l32i, a6, a3, 0, fixup)
165 EX(l32i, a7, a3, 4, fixup)
166 EX(s32i, a6, a5, 0, fixup)
167 EX(l32i, a6, a3, 8, fixup)
168 EX(s32i, a7, a5, 4, fixup)
169 EX(l32i, a7, a3, 12, fixup)
170 EX(s32i, a6, a5, 8, fixup)
171 addi a3, a3, 16
172 EX(s32i, a7, a5, 12, fixup)
173 addi a5, a5, 16
174 #if !XCHAL_HAVE_LOOPS
175 blt a3, a8, .Loop1
176 #endif /* !XCHAL_HAVE_LOOPS */
177 .Loop1done:
178 bbci.l a4, 3, .L2
179 # copy 8 bytes
180 EX(l32i, a6, a3, 0, fixup)
181 EX(l32i, a7, a3, 4, fixup)
182 addi a3, a3, 8
183 EX(s32i, a6, a5, 0, fixup)
184 EX(s32i, a7, a5, 4, fixup)
185 addi a5, a5, 8
186 .L2:
187 bbci.l a4, 2, .L3
188 # copy 4 bytes
189 EX(l32i, a6, a3, 0, fixup)
190 addi a3, a3, 4
191 EX(s32i, a6, a5, 0, fixup)
192 addi a5, a5, 4
193 .L3:
194 bbci.l a4, 1, .L4
195 # copy 2 bytes
196 EX(l16ui, a6, a3, 0, fixup)
197 addi a3, a3, 2
198 EX(s16i, a6, a5, 0, fixup)
199 addi a5, a5, 2
200 .L4:
201 bbci.l a4, 0, .L5
202 # copy 1 byte
203 EX(l8ui, a6, a3, 0, fixup)
204 EX(s8i, a6, a5, 0, fixup)
205 .L5:
206 movi a2, 0 # return success for len bytes copied
207 retw
208
209 /*
210 * Destination is aligned, Source is unaligned
211 */
212
213 .align 4
214 .byte 0 # 1 mod 4 alignement for LOOPNEZ
215 # (0 mod 4 alignment for LBEG)
216 .Lsrcunaligned:
217 # copy 16 bytes per iteration for word-aligned dst and unaligned src
218 and a10, a3, a8 # save unalignment offset for below
219 sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
220 EX(l32i, a6, a3, 0, fixup) # load first word
221 #if XCHAL_HAVE_LOOPS
222 loopnez a7, .Loop2done
223 #else /* !XCHAL_HAVE_LOOPS */
224 beqz a7, .Loop2done
225 slli a12, a7, 4
226 add a12, a12, a3 # a12 = end of last 16B source chunk
227 #endif /* !XCHAL_HAVE_LOOPS */
228 .Loop2:
229 EX(l32i, a7, a3, 4, fixup)
230 EX(l32i, a8, a3, 8, fixup)
231 ALIGN( a6, a6, a7)
232 EX(s32i, a6, a5, 0, fixup)
233 EX(l32i, a9, a3, 12, fixup)
234 ALIGN( a7, a7, a8)
235 EX(s32i, a7, a5, 4, fixup)
236 EX(l32i, a6, a3, 16, fixup)
237 ALIGN( a8, a8, a9)
238 EX(s32i, a8, a5, 8, fixup)
239 addi a3, a3, 16
240 ALIGN( a9, a9, a6)
241 EX(s32i, a9, a5, 12, fixup)
242 addi a5, a5, 16
243 #if !XCHAL_HAVE_LOOPS
244 blt a3, a12, .Loop2
245 #endif /* !XCHAL_HAVE_LOOPS */
246 .Loop2done:
247 bbci.l a4, 3, .L12
248 # copy 8 bytes
249 EX(l32i, a7, a3, 4, fixup)
250 EX(l32i, a8, a3, 8, fixup)
251 ALIGN( a6, a6, a7)
252 EX(s32i, a6, a5, 0, fixup)
253 addi a3, a3, 8
254 ALIGN( a7, a7, a8)
255 EX(s32i, a7, a5, 4, fixup)
256 addi a5, a5, 8
257 mov a6, a8
258 .L12:
259 bbci.l a4, 2, .L13
260 # copy 4 bytes
261 EX(l32i, a7, a3, 4, fixup)
262 addi a3, a3, 4
263 ALIGN( a6, a6, a7)
264 EX(s32i, a6, a5, 0, fixup)
265 addi a5, a5, 4
266 mov a6, a7
267 .L13:
268 add a3, a3, a10 # readjust a3 with correct misalignment
269 bbci.l a4, 1, .L14
270 # copy 2 bytes
271 EX(l8ui, a6, a3, 0, fixup)
272 EX(l8ui, a7, a3, 1, fixup)
273 addi a3, a3, 2
274 EX(s8i, a6, a5, 0, fixup)
275 EX(s8i, a7, a5, 1, fixup)
276 addi a5, a5, 2
277 .L14:
278 bbci.l a4, 0, .L15
279 # copy 1 byte
280 EX(l8ui, a6, a3, 0, fixup)
281 EX(s8i, a6, a5, 0, fixup)
282 .L15:
283 movi a2, 0 # return success for len bytes copied
284 retw
285
286
287 .section .fixup, "ax"
288 .align 4
289
290 /* a2 = original dst; a5 = current dst; a11= original len
291 * bytes_copied = a5 - a2
292 * retval = bytes_not_copied = original len - bytes_copied
293 * retval = a11 - (a5 - a2)
294 */
295
296
297 fixup:
298 sub a2, a5, a2 /* a2 <-- bytes copied */
299 sub a2, a11, a2 /* a2 <-- bytes not copied */
300 retw