]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/xtensa/lib/usercopy.S
Merge ath-next from git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
[mirror_ubuntu-artful-kernel.git] / arch / xtensa / lib / usercopy.S
CommitLineData
249ac17e
CZ
1/*
2 * arch/xtensa/lib/usercopy.S
3 *
4 * Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
5 *
6 * DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
7 * It needs to remain separate and distinct. The hal files are part
4b3f686d 8 * of the Xtensa link-time HAL, and those files may differ per
249ac17e
CZ
9 * processor configuration. Patching the kernel for another
10 * processor configuration includes replacing the hal files, and we
4b3f686d 11 * could lose the special functionality for accessing user-space
249ac17e
CZ
12 * memory during such a patch. We sacrifice a little code space here
13 * in favor to simplify code maintenance.
14 *
15 * This file is subject to the terms and conditions of the GNU General
16 * Public License. See the file "COPYING" in the main directory of
17 * this archive for more details.
18 *
19 * Copyright (C) 2002 Tensilica Inc.
20 */
21
22
23/*
24 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
25 *
26 * The returned value is the number of bytes not copied. Implies zero
27 * is success.
28 *
29 * The general case algorithm is as follows:
30 * If the destination and source are both aligned,
31 * do 16B chunks with a loop, and then finish up with
32 * 8B, 4B, 2B, and 1B copies conditional on the length.
33 * If destination is aligned and source unaligned,
34 * do the same, but use SRC to align the source data.
35 * If destination is unaligned, align it by conditionally
36 * copying 1B and 2B and then retest.
37 * This code tries to use fall-through braches for the common
38 * case of aligned destinations (except for the branches to
39 * the alignment label).
40 *
41 * Register use:
42 * a0/ return address
43 * a1/ stack pointer
44 * a2/ return value
45 * a3/ src
46 * a4/ length
47 * a5/ dst
48 * a6/ tmp
49 * a7/ tmp
50 * a8/ tmp
51 * a9/ tmp
52 * a10/ tmp
53 * a11/ original length
54 */
55
367b8112 56#include <variant/core.h>
249ac17e
CZ
57
58#ifdef __XTENSA_EB__
59#define ALIGN(R, W0, W1) src R, W0, W1
60#define SSA8(R) ssa8b R
61#else
62#define ALIGN(R, W0, W1) src R, W1, W0
63#define SSA8(R) ssa8l R
64#endif
65
66/* Load or store instructions that may cause exceptions use the EX macro. */
67
68#define EX(insn,reg1,reg2,offset,handler) \
699: insn reg1, reg2, offset; \
70 .section __ex_table, "a"; \
71 .word 9b, handler; \
72 .previous
73
74
75 .text
76 .align 4
77 .global __xtensa_copy_user
78 .type __xtensa_copy_user,@function
79__xtensa_copy_user:
80 entry sp, 16 # minimal stack frame
81 # a2/ dst, a3/ src, a4/ len
82 mov a5, a2 # copy dst so that a2 is return value
83 mov a11, a4 # preserve original len for error case
84.Lcommon:
85 bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
86 bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
87.Ldstaligned: # return here from .Ldstunaligned when dst is aligned
88 srli a7, a4, 4 # number of loop iterations with 16B
89 # per iteration
90 movi a8, 3 # if source is also aligned,
91 bnone a3, a8, .Laligned # then use word copy
92 SSA8( a3) # set shift amount from byte offset
93 bnez a4, .Lsrcunaligned
94 movi a2, 0 # return success for len==0
95 retw
96
97/*
98 * Destination is unaligned
99 */
100
101.Ldst1mod2: # dst is only byte aligned
102 bltui a4, 7, .Lbytecopy # do short copies byte by byte
103
104 # copy 1 byte
3a0e75ad 105 EX(l8ui, a6, a3, 0, fixup)
249ac17e 106 addi a3, a3, 1
3a0e75ad 107 EX(s8i, a6, a5, 0, fixup)
249ac17e
CZ
108 addi a5, a5, 1
109 addi a4, a4, -1
110 bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
111 # return to main algorithm
112.Ldst2mod4: # dst 16-bit aligned
113 # copy 2 bytes
114 bltui a4, 6, .Lbytecopy # do short copies byte by byte
3a0e75ad
AV
115 EX(l8ui, a6, a3, 0, fixup)
116 EX(l8ui, a7, a3, 1, fixup)
249ac17e 117 addi a3, a3, 2
3a0e75ad
AV
118 EX(s8i, a6, a5, 0, fixup)
119 EX(s8i, a7, a5, 1, fixup)
249ac17e
CZ
120 addi a5, a5, 2
121 addi a4, a4, -2
122 j .Ldstaligned # dst is now aligned, return to main algorithm
123
124/*
125 * Byte by byte copy
126 */
127 .align 4
128 .byte 0 # 1 mod 4 alignment for LOOPNEZ
129 # (0 mod 4 alignment for LBEG)
130.Lbytecopy:
131#if XCHAL_HAVE_LOOPS
132 loopnez a4, .Lbytecopydone
133#else /* !XCHAL_HAVE_LOOPS */
134 beqz a4, .Lbytecopydone
135 add a7, a3, a4 # a7 = end address for source
136#endif /* !XCHAL_HAVE_LOOPS */
137.Lnextbyte:
3a0e75ad 138 EX(l8ui, a6, a3, 0, fixup)
249ac17e 139 addi a3, a3, 1
3a0e75ad 140 EX(s8i, a6, a5, 0, fixup)
249ac17e
CZ
141 addi a5, a5, 1
142#if !XCHAL_HAVE_LOOPS
143 blt a3, a7, .Lnextbyte
144#endif /* !XCHAL_HAVE_LOOPS */
145.Lbytecopydone:
146 movi a2, 0 # return success for len bytes copied
147 retw
148
149/*
150 * Destination and source are word-aligned.
151 */
152 # copy 16 bytes per iteration for word-aligned dst and word-aligned src
153 .align 4 # 1 mod 4 alignment for LOOPNEZ
154 .byte 0 # (0 mod 4 alignment for LBEG)
155.Laligned:
156#if XCHAL_HAVE_LOOPS
157 loopnez a7, .Loop1done
158#else /* !XCHAL_HAVE_LOOPS */
159 beqz a7, .Loop1done
160 slli a8, a7, 4
161 add a8, a8, a3 # a8 = end of last 16B source chunk
162#endif /* !XCHAL_HAVE_LOOPS */
163.Loop1:
3a0e75ad
AV
164 EX(l32i, a6, a3, 0, fixup)
165 EX(l32i, a7, a3, 4, fixup)
166 EX(s32i, a6, a5, 0, fixup)
167 EX(l32i, a6, a3, 8, fixup)
168 EX(s32i, a7, a5, 4, fixup)
169 EX(l32i, a7, a3, 12, fixup)
170 EX(s32i, a6, a5, 8, fixup)
249ac17e 171 addi a3, a3, 16
3a0e75ad 172 EX(s32i, a7, a5, 12, fixup)
249ac17e
CZ
173 addi a5, a5, 16
174#if !XCHAL_HAVE_LOOPS
175 blt a3, a8, .Loop1
176#endif /* !XCHAL_HAVE_LOOPS */
177.Loop1done:
178 bbci.l a4, 3, .L2
179 # copy 8 bytes
3a0e75ad
AV
180 EX(l32i, a6, a3, 0, fixup)
181 EX(l32i, a7, a3, 4, fixup)
249ac17e 182 addi a3, a3, 8
3a0e75ad
AV
183 EX(s32i, a6, a5, 0, fixup)
184 EX(s32i, a7, a5, 4, fixup)
249ac17e
CZ
185 addi a5, a5, 8
186.L2:
187 bbci.l a4, 2, .L3
188 # copy 4 bytes
3a0e75ad 189 EX(l32i, a6, a3, 0, fixup)
249ac17e 190 addi a3, a3, 4
3a0e75ad 191 EX(s32i, a6, a5, 0, fixup)
249ac17e
CZ
192 addi a5, a5, 4
193.L3:
194 bbci.l a4, 1, .L4
195 # copy 2 bytes
3a0e75ad 196 EX(l16ui, a6, a3, 0, fixup)
249ac17e 197 addi a3, a3, 2
3a0e75ad 198 EX(s16i, a6, a5, 0, fixup)
249ac17e
CZ
199 addi a5, a5, 2
200.L4:
201 bbci.l a4, 0, .L5
202 # copy 1 byte
3a0e75ad
AV
203 EX(l8ui, a6, a3, 0, fixup)
204 EX(s8i, a6, a5, 0, fixup)
249ac17e
CZ
205.L5:
206 movi a2, 0 # return success for len bytes copied
207 retw
208
209/*
210 * Destination is aligned, Source is unaligned
211 */
212
213 .align 4
214 .byte 0 # 1 mod 4 alignement for LOOPNEZ
215 # (0 mod 4 alignment for LBEG)
216.Lsrcunaligned:
217 # copy 16 bytes per iteration for word-aligned dst and unaligned src
218 and a10, a3, a8 # save unalignment offset for below
219 sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
3a0e75ad 220 EX(l32i, a6, a3, 0, fixup) # load first word
249ac17e
CZ
221#if XCHAL_HAVE_LOOPS
222 loopnez a7, .Loop2done
223#else /* !XCHAL_HAVE_LOOPS */
224 beqz a7, .Loop2done
5029615e
MF
225 slli a12, a7, 4
226 add a12, a12, a3 # a12 = end of last 16B source chunk
249ac17e
CZ
227#endif /* !XCHAL_HAVE_LOOPS */
228.Loop2:
3a0e75ad
AV
229 EX(l32i, a7, a3, 4, fixup)
230 EX(l32i, a8, a3, 8, fixup)
249ac17e 231 ALIGN( a6, a6, a7)
3a0e75ad
AV
232 EX(s32i, a6, a5, 0, fixup)
233 EX(l32i, a9, a3, 12, fixup)
249ac17e 234 ALIGN( a7, a7, a8)
3a0e75ad
AV
235 EX(s32i, a7, a5, 4, fixup)
236 EX(l32i, a6, a3, 16, fixup)
249ac17e 237 ALIGN( a8, a8, a9)
3a0e75ad 238 EX(s32i, a8, a5, 8, fixup)
249ac17e
CZ
239 addi a3, a3, 16
240 ALIGN( a9, a9, a6)
3a0e75ad 241 EX(s32i, a9, a5, 12, fixup)
249ac17e
CZ
242 addi a5, a5, 16
243#if !XCHAL_HAVE_LOOPS
5029615e 244 blt a3, a12, .Loop2
249ac17e
CZ
245#endif /* !XCHAL_HAVE_LOOPS */
246.Loop2done:
247 bbci.l a4, 3, .L12
248 # copy 8 bytes
3a0e75ad
AV
249 EX(l32i, a7, a3, 4, fixup)
250 EX(l32i, a8, a3, 8, fixup)
249ac17e 251 ALIGN( a6, a6, a7)
3a0e75ad 252 EX(s32i, a6, a5, 0, fixup)
249ac17e
CZ
253 addi a3, a3, 8
254 ALIGN( a7, a7, a8)
3a0e75ad 255 EX(s32i, a7, a5, 4, fixup)
249ac17e
CZ
256 addi a5, a5, 8
257 mov a6, a8
258.L12:
259 bbci.l a4, 2, .L13
260 # copy 4 bytes
3a0e75ad 261 EX(l32i, a7, a3, 4, fixup)
249ac17e
CZ
262 addi a3, a3, 4
263 ALIGN( a6, a6, a7)
3a0e75ad 264 EX(s32i, a6, a5, 0, fixup)
249ac17e
CZ
265 addi a5, a5, 4
266 mov a6, a7
267.L13:
268 add a3, a3, a10 # readjust a3 with correct misalignment
269 bbci.l a4, 1, .L14
270 # copy 2 bytes
3a0e75ad
AV
271 EX(l8ui, a6, a3, 0, fixup)
272 EX(l8ui, a7, a3, 1, fixup)
249ac17e 273 addi a3, a3, 2
3a0e75ad
AV
274 EX(s8i, a6, a5, 0, fixup)
275 EX(s8i, a7, a5, 1, fixup)
249ac17e
CZ
276 addi a5, a5, 2
277.L14:
278 bbci.l a4, 0, .L15
279 # copy 1 byte
3a0e75ad
AV
280 EX(l8ui, a6, a3, 0, fixup)
281 EX(s8i, a6, a5, 0, fixup)
249ac17e
CZ
282.L15:
283 movi a2, 0 # return success for len bytes copied
284 retw
285
286
287 .section .fixup, "ax"
288 .align 4
289
290/* a2 = original dst; a5 = current dst; a11= original len
291 * bytes_copied = a5 - a2
292 * retval = bytes_not_copied = original len - bytes_copied
293 * retval = a11 - (a5 - a2)
249ac17e
CZ
294 */
295
249ac17e 296
3a0e75ad 297fixup:
249ac17e
CZ
298 sub a2, a5, a2 /* a2 <-- bytes copied */
299 sub a2, a11, a2 /* a2 <-- bytes not copied */
300 retw