[mirror_ubuntu-artful-kernel.git] / arch / xtensa / lib / usercopy.S

/*
 *  arch/xtensa/lib/usercopy.S
 *
 *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
 *
 *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
 *  It needs to remain separate and distinct.  The hal files are part
 *  of the Xtensa link-time HAL, and those files may differ per
 *  processor configuration.  Patching the kernel for another
 *  processor configuration includes replacing the hal files, and we
 *  could lose the special functionality for accessing user-space
 *  memory during such a patch.  We sacrifice a little code space here
 *  in favor to simplify code maintenance.
 *
 *  This file is subject to the terms and conditions of the GNU General
 *  Public License.  See the file "COPYING" in the main directory of
 *  this archive for more details.
 *
 *  Copyright (C) 2002 Tensilica Inc.
 */


/*
 * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
 *
 * The returned value is the number of bytes not copied.  Implies zero
 * is success.
 *
 * The general case algorithm is as follows:
 *   If the destination and source are both aligned,
 *     do 16B chunks with a loop, and then finish up with
 *     8B, 4B, 2B, and 1B copies conditional on the length.
 *   If destination is aligned and source unaligned,
 *     do the same, but use SRC to align the source data.
 *   If destination is unaligned, align it by conditionally
 *     copying 1B and 2B and then retest.
 *   This code tries to use fall-through braches for the common
 *     case of aligned destinations (except for the branches to
 *     the alignment label).
 *
 * Register use:
 *	a0/ return address
 *	a1/ stack pointer
 *	a2/ return value
 *	a3/ src
 *	a4/ length
 *	a5/ dst
 *	a6/ tmp
 *	a7/ tmp
 *	a8/ tmp
 *	a9/ tmp
 *	a10/ tmp
 *	a11/ original length
 */

#include <variant/core.h>

#ifdef __XTENSA_EB__
#define ALIGN(R, W0, W1) src	R, W0, W1
#define SSA8(R)	ssa8b R
#else
#define ALIGN(R, W0, W1) src	R, W1, W0
#define SSA8(R)	ssa8l R
#endif

/* Load or store instructions that may cause exceptions use the EX macro. */

#define EX(insn,reg1,reg2,offset,handler)	\
9:	insn	reg1, reg2, offset;		\
	.section __ex_table, "a";		\
	.word	9b, handler;			\
	.previous


	.text
	.align	4
	.global	__xtensa_copy_user
	.type	__xtensa_copy_user,@function
__xtensa_copy_user:
	entry	sp, 16		# minimal stack frame
	# a2/ dst, a3/ src, a4/ len
	mov	a5, a2		# copy dst so that a2 is return value
	mov	a11, a4		# preserve original len for error case
.Lcommon:
	bbsi.l	a2, 0, .Ldst1mod2 # if dst is 1 mod 2
	bbsi.l	a2, 1, .Ldst2mod4 # if dst is 2 mod 4
.Ldstaligned:	# return here from .Ldstunaligned when dst is aligned
	srli	a7, a4, 4	# number of loop iterations with 16B
				# per iteration
	movi	a8, 3		  # if source is also aligned,
	bnone	a3, a8, .Laligned # then use word copy
	SSA8(	a3)		# set shift amount from byte offset
	bnez	a4, .Lsrcunaligned
	movi	a2, 0		# return success for len==0
	retw

/*
 * Destination is unaligned
 */

.Ldst1mod2:	# dst is only byte aligned
	bltui	a4, 7, .Lbytecopy	# do short copies byte by byte

	# copy 1 byte
	EX(l8ui, a6, a3, 0, fixup)
	addi	a3, a3,  1
	EX(s8i, a6, a5,  0, fixup)
	addi	a5, a5,  1
	addi	a4, a4, -1
	bbci.l	a5, 1, .Ldstaligned	# if dst is now aligned, then
					# return to main algorithm
.Ldst2mod4:	# dst 16-bit aligned
	# copy 2 bytes
	bltui	a4, 6, .Lbytecopy	# do short copies byte by byte
	EX(l8ui, a6, a3, 0, fixup)
	EX(l8ui, a7, a3, 1, fixup)
	addi	a3, a3,  2
	EX(s8i, a6, a5,  0, fixup)
	EX(s8i, a7, a5,  1, fixup)
	addi	a5, a5,  2
	addi	a4, a4, -2
	j	.Ldstaligned	# dst is now aligned, return to main algorithm

/*
 * Byte by byte copy
 */
	.align	4
	.byte	0		# 1 mod 4 alignment for LOOPNEZ
				# (0 mod 4 alignment for LBEG)
.Lbytecopy:
#if XCHAL_HAVE_LOOPS
	loopnez	a4, .Lbytecopydone
#else /* !XCHAL_HAVE_LOOPS */
	beqz	a4, .Lbytecopydone
	add	a7, a3, a4	# a7 = end address for source
#endif /* !XCHAL_HAVE_LOOPS */
.Lnextbyte:
	EX(l8ui, a6, a3, 0, fixup)
	addi	a3, a3, 1
	EX(s8i, a6, a5, 0, fixup)
	addi	a5, a5, 1
#if !XCHAL_HAVE_LOOPS
	blt	a3, a7, .Lnextbyte
#endif /* !XCHAL_HAVE_LOOPS */
.Lbytecopydone:
	movi	a2, 0		# return success for len bytes copied
	retw

/*
 * Destination and source are word-aligned.
 */
	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
	.align	4		# 1 mod 4 alignment for LOOPNEZ
	.byte	0		# (0 mod 4 alignment for LBEG)
.Laligned:
#if XCHAL_HAVE_LOOPS
	loopnez	a7, .Loop1done
#else /* !XCHAL_HAVE_LOOPS */
	beqz	a7, .Loop1done
	slli	a8, a7, 4
	add	a8, a8, a3	# a8 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop1:
	EX(l32i, a6, a3,  0, fixup)
	EX(l32i, a7, a3,  4, fixup)
	EX(s32i, a6, a5,  0, fixup)
	EX(l32i, a6, a3,  8, fixup)
	EX(s32i, a7, a5,  4, fixup)
	EX(l32i, a7, a3, 12, fixup)
	EX(s32i, a6, a5,  8, fixup)
	addi	a3, a3, 16
	EX(s32i, a7, a5, 12, fixup)
	addi	a5, a5, 16
#if !XCHAL_HAVE_LOOPS
	blt	a3, a8, .Loop1
#endif /* !XCHAL_HAVE_LOOPS */
.Loop1done:
	bbci.l	a4, 3, .L2
	# copy 8 bytes
	EX(l32i, a6, a3,  0, fixup)
	EX(l32i, a7, a3,  4, fixup)
	addi	a3, a3,  8
	EX(s32i, a6, a5,  0, fixup)
	EX(s32i, a7, a5,  4, fixup)
	addi	a5, a5,  8
.L2:
	bbci.l	a4, 2, .L3
	# copy 4 bytes
	EX(l32i, a6, a3,  0, fixup)
	addi	a3, a3,  4
	EX(s32i, a6, a5,  0, fixup)
	addi	a5, a5,  4
.L3:
	bbci.l	a4, 1, .L4
	# copy 2 bytes
	EX(l16ui, a6, a3,  0, fixup)
	addi	a3, a3,  2
	EX(s16i,  a6, a5,  0, fixup)
	addi	a5, a5,  2
.L4:
	bbci.l	a4, 0, .L5
	# copy 1 byte
	EX(l8ui, a6, a3,  0, fixup)
	EX(s8i,  a6, a5,  0, fixup)
.L5:
	movi	a2, 0		# return success for len bytes copied
	retw

/*
 * Destination is aligned, Source is unaligned
 */

	.align	4
	.byte	0		# 1 mod 4 alignement for LOOPNEZ
				# (0 mod 4 alignment for LBEG)
.Lsrcunaligned:
	# copy 16 bytes per iteration for word-aligned dst and unaligned src
	and	a10, a3, a8	# save unalignment offset for below
	sub	a3, a3, a10	# align a3 (to avoid sim warnings only; not needed for hardware)
	EX(l32i, a6, a3, 0, fixup)	# load first word
#if XCHAL_HAVE_LOOPS
	loopnez	a7, .Loop2done
#else /* !XCHAL_HAVE_LOOPS */
	beqz	a7, .Loop2done
	slli	a12, a7, 4
	add	a12, a12, a3	# a12 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */
.Loop2:
	EX(l32i, a7, a3,  4, fixup)
	EX(l32i, a8, a3,  8, fixup)
	ALIGN(	a6, a6, a7)
	EX(s32i, a6, a5,  0, fixup)
	EX(l32i, a9, a3, 12, fixup)
	ALIGN(	a7, a7, a8)
	EX(s32i, a7, a5,  4, fixup)
	EX(l32i, a6, a3, 16, fixup)
	ALIGN(	a8, a8, a9)
	EX(s32i, a8, a5,  8, fixup)
	addi	a3, a3, 16
	ALIGN(	a9, a9, a6)
	EX(s32i, a9, a5, 12, fixup)
	addi	a5, a5, 16
#if !XCHAL_HAVE_LOOPS
	blt	a3, a12, .Loop2
#endif /* !XCHAL_HAVE_LOOPS */
.Loop2done:
	bbci.l	a4, 3, .L12
	# copy 8 bytes
	EX(l32i, a7, a3,  4, fixup)
	EX(l32i, a8, a3,  8, fixup)
	ALIGN(	a6, a6, a7)
	EX(s32i, a6, a5,  0, fixup)
	addi	a3, a3,  8
	ALIGN(	a7, a7, a8)
	EX(s32i, a7, a5,  4, fixup)
	addi	a5, a5,  8
	mov	a6, a8
.L12:
	bbci.l	a4, 2, .L13
	# copy 4 bytes
	EX(l32i, a7, a3,  4, fixup)
	addi	a3, a3,  4
	ALIGN(	a6, a6, a7)
	EX(s32i, a6, a5,  0, fixup)
	addi	a5, a5,  4
	mov	a6, a7
.L13:
	add	a3, a3, a10	# readjust a3 with correct misalignment
	bbci.l	a4, 1, .L14
	# copy 2 bytes
	EX(l8ui, a6, a3,  0, fixup)
	EX(l8ui, a7, a3,  1, fixup)
	addi	a3, a3,  2
	EX(s8i, a6, a5,  0, fixup)
	EX(s8i, a7, a5,  1, fixup)
	addi	a5, a5,  2
.L14:
	bbci.l	a4, 0, .L15
	# copy 1 byte
	EX(l8ui, a6, a3,  0, fixup)
	EX(s8i,  a6, a5,  0, fixup)
.L15:
	movi	a2, 0		# return success for len bytes copied
	retw


	.section .fixup, "ax"
	.align	4

/* a2 = original dst; a5 = current dst; a11= original len
 * bytes_copied = a5 - a2
 * retval = bytes_not_copied = original len - bytes_copied
 * retval = a11 - (a5 - a2)
 */


fixup:
	sub	a2, a5, a2	/* a2 <-- bytes copied */
	sub	a2, a11, a2	/* a2 <-- bytes not copied */
	retw
Commit	Line	Data
249ac17e CZ	1	/*
	2	* arch/xtensa/lib/usercopy.S
	3	*
	4	* Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
	5	*
	6	* DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
	7	* It needs to remain separate and distinct. The hal files are part
4b3f686d	8	* of the Xtensa link-time HAL, and those files may differ per
249ac17e CZ	9	* processor configuration. Patching the kernel for another
249ac17e CZ	10	* processor configuration includes replacing the hal files, and we
4b3f686d	11	* could lose the special functionality for accessing user-space
249ac17e CZ	12	* memory during such a patch. We sacrifice a little code space here
	13	* in favor to simplify code maintenance.
	14	*
	15	* This file is subject to the terms and conditions of the GNU General
	16	* Public License. See the file "COPYING" in the main directory of
	17	* this archive for more details.
	18	*
	19	* Copyright (C) 2002 Tensilica Inc.
	20	*/
	21
	22
	23	/*
	24	* size_t __xtensa_copy_user (void dst, const void src, size_t len);
	25	*
	26	* The returned value is the number of bytes not copied. Implies zero
	27	* is success.
	28	*
	29	* The general case algorithm is as follows:
	30	* If the destination and source are both aligned,
	31	* do 16B chunks with a loop, and then finish up with
	32	* 8B, 4B, 2B, and 1B copies conditional on the length.
	33	* If destination is aligned and source unaligned,
	34	* do the same, but use SRC to align the source data.
	35	* If destination is unaligned, align it by conditionally
	36	* copying 1B and 2B and then retest.
	37	* This code tries to use fall-through braches for the common
	38	* case of aligned destinations (except for the branches to
	39	* the alignment label).
	40	*
	41	* Register use:
	42	* a0/ return address
	43	* a1/ stack pointer
	44	* a2/ return value
	45	* a3/ src
	46	* a4/ length
	47	* a5/ dst
	48	* a6/ tmp
	49	* a7/ tmp
	50	* a8/ tmp
	51	* a9/ tmp
	52	* a10/ tmp
	53	* a11/ original length
	54	*/
	55
367b8112	56	#include <variant/core.h>
249ac17e CZ	57
	58	#ifdef __XTENSA_EB__
	59	#define ALIGN(R, W0, W1) src R, W0, W1
	60	#define SSA8(R) ssa8b R
	61	#else
	62	#define ALIGN(R, W0, W1) src R, W1, W0
	63	#define SSA8(R) ssa8l R
	64	#endif
	65
	66	/* Load or store instructions that may cause exceptions use the EX macro. */
	67
	68	#define EX(insn,reg1,reg2,offset,handler) \
	69	9: insn reg1, reg2, offset; \
	70	.section __ex_table, "a"; \
	71	.word 9b, handler; \
	72	.previous
	73
	74
	75	.text
	76	.align 4
	77	.global __xtensa_copy_user
	78	.type __xtensa_copy_user,@function
	79	__xtensa_copy_user:
	80	entry sp, 16 # minimal stack frame
	81	# a2/ dst, a3/ src, a4/ len
	82	mov a5, a2 # copy dst so that a2 is return value
	83	mov a11, a4 # preserve original len for error case
	84	.Lcommon:
	85	bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
	86	bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
	87	.Ldstaligned: # return here from .Ldstunaligned when dst is aligned
	88	srli a7, a4, 4 # number of loop iterations with 16B
	89	# per iteration
	90	movi a8, 3 # if source is also aligned,
	91	bnone a3, a8, .Laligned # then use word copy
	92	SSA8( a3) # set shift amount from byte offset
	93	bnez a4, .Lsrcunaligned
	94	movi a2, 0 # return success for len==0
	95	retw
	96
	97	/*
	98	* Destination is unaligned
	99	*/
	100
	101	.Ldst1mod2: # dst is only byte aligned
	102	bltui a4, 7, .Lbytecopy # do short copies byte by byte
	103
	104	# copy 1 byte
3a0e75ad	105	EX(l8ui, a6, a3, 0, fixup)
249ac17e	106	addi a3, a3, 1
3a0e75ad	107	EX(s8i, a6, a5, 0, fixup)
249ac17e CZ	108	addi a5, a5, 1
	109	addi a4, a4, -1
	110	bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
	111	# return to main algorithm
	112	.Ldst2mod4: # dst 16-bit aligned
	113	# copy 2 bytes
	114	bltui a4, 6, .Lbytecopy # do short copies byte by byte
3a0e75ad AV	115	EX(l8ui, a6, a3, 0, fixup)
3a0e75ad AV	116	EX(l8ui, a7, a3, 1, fixup)
249ac17e	117	addi a3, a3, 2
3a0e75ad AV	118	EX(s8i, a6, a5, 0, fixup)
3a0e75ad AV	119	EX(s8i, a7, a5, 1, fixup)
249ac17e CZ	120	addi a5, a5, 2
	121	addi a4, a4, -2
	122	j .Ldstaligned # dst is now aligned, return to main algorithm
	123
	124	/*
	125	* Byte by byte copy
	126	*/
	127	.align 4
	128	.byte 0 # 1 mod 4 alignment for LOOPNEZ
	129	# (0 mod 4 alignment for LBEG)
	130	.Lbytecopy:
	131	#if XCHAL_HAVE_LOOPS
	132	loopnez a4, .Lbytecopydone
	133	#else /* !XCHAL_HAVE_LOOPS */
	134	beqz a4, .Lbytecopydone
	135	add a7, a3, a4 # a7 = end address for source
	136	#endif /* !XCHAL_HAVE_LOOPS */
	137	.Lnextbyte:
3a0e75ad	138	EX(l8ui, a6, a3, 0, fixup)
249ac17e	139	addi a3, a3, 1
3a0e75ad	140	EX(s8i, a6, a5, 0, fixup)
249ac17e CZ	141	addi a5, a5, 1
	142	#if !XCHAL_HAVE_LOOPS
	143	blt a3, a7, .Lnextbyte
	144	#endif /* !XCHAL_HAVE_LOOPS */
	145	.Lbytecopydone:
	146	movi a2, 0 # return success for len bytes copied
	147	retw
	148
	149	/*
	150	* Destination and source are word-aligned.
	151	*/
	152	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
	153	.align 4 # 1 mod 4 alignment for LOOPNEZ
	154	.byte 0 # (0 mod 4 alignment for LBEG)
	155	.Laligned:
	156	#if XCHAL_HAVE_LOOPS
	157	loopnez a7, .Loop1done
	158	#else /* !XCHAL_HAVE_LOOPS */
	159	beqz a7, .Loop1done
	160	slli a8, a7, 4
	161	add a8, a8, a3 # a8 = end of last 16B source chunk
	162	#endif /* !XCHAL_HAVE_LOOPS */
	163	.Loop1:
3a0e75ad AV	164	EX(l32i, a6, a3, 0, fixup)
	165	EX(l32i, a7, a3, 4, fixup)
	166	EX(s32i, a6, a5, 0, fixup)
	167	EX(l32i, a6, a3, 8, fixup)
	168	EX(s32i, a7, a5, 4, fixup)
	169	EX(l32i, a7, a3, 12, fixup)
	170	EX(s32i, a6, a5, 8, fixup)
249ac17e	171	addi a3, a3, 16
3a0e75ad	172	EX(s32i, a7, a5, 12, fixup)
249ac17e CZ	173	addi a5, a5, 16
	174	#if !XCHAL_HAVE_LOOPS
	175	blt a3, a8, .Loop1
	176	#endif /* !XCHAL_HAVE_LOOPS */
	177	.Loop1done:
	178	bbci.l a4, 3, .L2
	179	# copy 8 bytes
3a0e75ad AV	180	EX(l32i, a6, a3, 0, fixup)
3a0e75ad AV	181	EX(l32i, a7, a3, 4, fixup)
249ac17e	182	addi a3, a3, 8
3a0e75ad AV	183	EX(s32i, a6, a5, 0, fixup)
3a0e75ad AV	184	EX(s32i, a7, a5, 4, fixup)
249ac17e CZ	185	addi a5, a5, 8
	186	.L2:
	187	bbci.l a4, 2, .L3
	188	# copy 4 bytes
3a0e75ad	189	EX(l32i, a6, a3, 0, fixup)
249ac17e	190	addi a3, a3, 4
3a0e75ad	191	EX(s32i, a6, a5, 0, fixup)
249ac17e CZ	192	addi a5, a5, 4
	193	.L3:
	194	bbci.l a4, 1, .L4
	195	# copy 2 bytes
3a0e75ad	196	EX(l16ui, a6, a3, 0, fixup)
249ac17e	197	addi a3, a3, 2
3a0e75ad	198	EX(s16i, a6, a5, 0, fixup)
249ac17e CZ	199	addi a5, a5, 2
	200	.L4:
	201	bbci.l a4, 0, .L5
	202	# copy 1 byte
3a0e75ad AV	203	EX(l8ui, a6, a3, 0, fixup)
3a0e75ad AV	204	EX(s8i, a6, a5, 0, fixup)
249ac17e CZ	205	.L5:
	206	movi a2, 0 # return success for len bytes copied
	207	retw
	208
	209	/*
	210	* Destination is aligned, Source is unaligned
	211	*/
	212
	213	.align 4
	214	.byte 0 # 1 mod 4 alignement for LOOPNEZ
	215	# (0 mod 4 alignment for LBEG)
	216	.Lsrcunaligned:
	217	# copy 16 bytes per iteration for word-aligned dst and unaligned src
	218	and a10, a3, a8 # save unalignment offset for below
	219	sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
3a0e75ad	220	EX(l32i, a6, a3, 0, fixup) # load first word
249ac17e CZ	221	#if XCHAL_HAVE_LOOPS
	222	loopnez a7, .Loop2done
	223	#else /* !XCHAL_HAVE_LOOPS */
	224	beqz a7, .Loop2done
5029615e MF	225	slli a12, a7, 4
5029615e MF	226	add a12, a12, a3 # a12 = end of last 16B source chunk
249ac17e CZ	227	#endif /* !XCHAL_HAVE_LOOPS */
249ac17e CZ	228	.Loop2:
3a0e75ad AV	229	EX(l32i, a7, a3, 4, fixup)
3a0e75ad AV	230	EX(l32i, a8, a3, 8, fixup)
249ac17e	231	ALIGN( a6, a6, a7)
3a0e75ad AV	232	EX(s32i, a6, a5, 0, fixup)
3a0e75ad AV	233	EX(l32i, a9, a3, 12, fixup)
249ac17e	234	ALIGN( a7, a7, a8)
3a0e75ad AV	235	EX(s32i, a7, a5, 4, fixup)
3a0e75ad AV	236	EX(l32i, a6, a3, 16, fixup)
249ac17e	237	ALIGN( a8, a8, a9)
3a0e75ad	238	EX(s32i, a8, a5, 8, fixup)
249ac17e CZ	239	addi a3, a3, 16
249ac17e CZ	240	ALIGN( a9, a9, a6)
3a0e75ad	241	EX(s32i, a9, a5, 12, fixup)
249ac17e CZ	242	addi a5, a5, 16
249ac17e CZ	243	#if !XCHAL_HAVE_LOOPS
5029615e	244	blt a3, a12, .Loop2
249ac17e CZ	245	#endif /* !XCHAL_HAVE_LOOPS */
	246	.Loop2done:
	247	bbci.l a4, 3, .L12
	248	# copy 8 bytes
3a0e75ad AV	249	EX(l32i, a7, a3, 4, fixup)
3a0e75ad AV	250	EX(l32i, a8, a3, 8, fixup)
249ac17e	251	ALIGN( a6, a6, a7)
3a0e75ad	252	EX(s32i, a6, a5, 0, fixup)
249ac17e CZ	253	addi a3, a3, 8
249ac17e CZ	254	ALIGN( a7, a7, a8)
3a0e75ad	255	EX(s32i, a7, a5, 4, fixup)
249ac17e CZ	256	addi a5, a5, 8
	257	mov a6, a8
	258	.L12:
	259	bbci.l a4, 2, .L13
	260	# copy 4 bytes
3a0e75ad	261	EX(l32i, a7, a3, 4, fixup)
249ac17e CZ	262	addi a3, a3, 4
249ac17e CZ	263	ALIGN( a6, a6, a7)
3a0e75ad	264	EX(s32i, a6, a5, 0, fixup)
249ac17e CZ	265	addi a5, a5, 4
	266	mov a6, a7
	267	.L13:
	268	add a3, a3, a10 # readjust a3 with correct misalignment
	269	bbci.l a4, 1, .L14
	270	# copy 2 bytes
3a0e75ad AV	271	EX(l8ui, a6, a3, 0, fixup)
3a0e75ad AV	272	EX(l8ui, a7, a3, 1, fixup)
249ac17e	273	addi a3, a3, 2
3a0e75ad AV	274	EX(s8i, a6, a5, 0, fixup)
3a0e75ad AV	275	EX(s8i, a7, a5, 1, fixup)
249ac17e CZ	276	addi a5, a5, 2
	277	.L14:
	278	bbci.l a4, 0, .L15
	279	# copy 1 byte
3a0e75ad AV	280	EX(l8ui, a6, a3, 0, fixup)
3a0e75ad AV	281	EX(s8i, a6, a5, 0, fixup)
249ac17e CZ	282	.L15:
	283	movi a2, 0 # return success for len bytes copied
	284	retw
	285
	286
	287	.section .fixup, "ax"
	288	.align 4
	289
	290	/* a2 = original dst; a5 = current dst; a11= original len
	291	* bytes_copied = a5 - a2
	292	* retval = bytes_not_copied = original len - bytes_copied
	293	* retval = a11 - (a5 - a2)
249ac17e CZ	294	*/
249ac17e CZ	295
249ac17e	296
3a0e75ad	297	fixup:
249ac17e CZ	298	sub a2, a5, a2 /* a2 <-- bytes copied */
	299	sub a2, a11, a2 /* a2 <-- bytes not copied */
	300	retw