]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/mips/lib/memcpy.S
MIPS: lib: memcpy: Merge EXC and load/store macros
[mirror_ubuntu-artful-kernel.git] / arch / mips / lib / memcpy.S
CommitLineData
1da177e4
LT
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Unified implementation of memcpy, memmove and the __copy_user backend.
7 *
8 * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
9 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
10 * Copyright (C) 2002 Broadcom, Inc.
11 * memcpy/copy_user author: Mark Vandevoorde
619b6e18 12 * Copyright (C) 2007 Maciej W. Rozycki
5bc05971 13 * Copyright (C) 2014 Imagination Technologies Ltd.
1da177e4
LT
14 *
15 * Mnemonic names for arguments to memcpy/__copy_user
16 */
e5adb877
RB
17
18/*
19 * Hack to resolve longstanding prefetch issue
20 *
21 * Prefetching may be fatal on some systems if we're prefetching beyond the
22 * end of memory on some systems. It's also a seriously bad idea on non
23 * dma-coherent systems.
24 */
634286f1 25#ifdef CONFIG_DMA_NONCOHERENT
e5adb877
RB
26#undef CONFIG_CPU_HAS_PREFETCH
27#endif
28#ifdef CONFIG_MIPS_MALTA
29#undef CONFIG_CPU_HAS_PREFETCH
30#endif
31
1da177e4 32#include <asm/asm.h>
048eb582 33#include <asm/asm-offsets.h>
1da177e4
LT
34#include <asm/regdef.h>
35
36#define dst a0
37#define src a1
38#define len a2
39
40/*
41 * Spec
42 *
43 * memcpy copies len bytes from src to dst and sets v0 to dst.
44 * It assumes that
45 * - src and dst don't overlap
46 * - src is readable
47 * - dst is writable
48 * memcpy uses the standard calling convention
49 *
50 * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
51 * the number of uncopied bytes due to an exception caused by a read or write.
52 * __copy_user assumes that src and dst don't overlap, and that the call is
53 * implementing one of the following:
54 * copy_to_user
55 * - src is readable (no exceptions when reading src)
56 * copy_from_user
57 * - dst is writable (no exceptions when writing dst)
58 * __copy_user uses a non-standard calling convention; see
59 * include/asm-mips/uaccess.h
60 *
61 * When an exception happens on a load, the handler must
62 # ensure that all of the destination buffer is overwritten to prevent
63 * leaking information to user mode programs.
64 */
65
66/*
67 * Implementation
68 */
69
70/*
71 * The exception handler for loads requires that:
72 * 1- AT contain the address of the byte just past the end of the source
73 * of the copy,
74 * 2- src_entry <= src < AT, and
75 * 3- (dst - src) == (dst_entry - src_entry),
76 * The _entry suffix denotes values when __copy_user was called.
77 *
78 * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
79 * (2) is met by incrementing src by the number of bytes copied
80 * (3) is met by not doing loads between a pair of increments of dst and src
81 *
82 * The exception handlers for stores adjust len (if necessary) and return.
83 * These handlers do not need to overwrite any data.
84 *
85 * For __rmemcpy and memmove an exception is always a kernel bug, therefore
86 * they're not protected.
87 */
88
5bc05971
MC
89/* Instruction type */
90#define LD_INSN 1
91#define ST_INSN 2
92
93/*
94 * Wrapper to add an entry in the exception table
95 * in case the insn causes a memory exception.
96 * Arguments:
97 * insn : Load/store instruction
98 * type : Instruction type
99 * reg : Register
100 * addr : Address
101 * handler : Exception handler
102 */
103#define EXC(insn, type, reg, addr, handler) \
1049: insn reg, addr; \
1da177e4
LT
105 .section __ex_table,"a"; \
106 PTR 9b, handler; \
107 .previous
108
109/*
110 * Only on the 64-bit kernel we can made use of 64-bit registers.
111 */
875d43e7 112#ifdef CONFIG_64BIT
1da177e4
LT
113#define USE_DOUBLE
114#endif
115
116#ifdef USE_DOUBLE
117
5bc05971
MC
118#define LOADK ld /* No exception */
119#define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler)
120#define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler)
121#define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler)
122#define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler)
123#define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler)
124#define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler)
1da177e4
LT
125#define ADD daddu
126#define SUB dsubu
127#define SRL dsrl
128#define SRA dsra
129#define SLL dsll
130#define SLLV dsllv
131#define SRLV dsrlv
132#define NBYTES 8
133#define LOG_NBYTES 3
134
42a3b4f2 135/*
1da177e4
LT
136 * As we are sharing code base with the mips32 tree (which use the o32 ABI
137 * register definitions). We need to redefine the register definitions from
138 * the n64 ABI register naming to the o32 ABI register naming.
139 */
140#undef t0
141#undef t1
142#undef t2
143#undef t3
144#define t0 $8
145#define t1 $9
146#define t2 $10
147#define t3 $11
148#define t4 $12
149#define t5 $13
150#define t6 $14
151#define t7 $15
42a3b4f2 152
1da177e4
LT
153#else
154
5bc05971
MC
155#define LOADK lw /* No exception */
156#define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler)
157#define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler)
158#define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler)
159#define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler)
160#define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler)
161#define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler)
1da177e4
LT
162#define ADD addu
163#define SUB subu
164#define SRL srl
165#define SLL sll
166#define SRA sra
167#define SLLV sllv
168#define SRLV srlv
169#define NBYTES 4
170#define LOG_NBYTES 2
171
172#endif /* USE_DOUBLE */
173
5bc05971
MC
174#define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler)
175#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler)
176
1da177e4
LT
177#ifdef CONFIG_CPU_LITTLE_ENDIAN
178#define LDFIRST LOADR
70342287 179#define LDREST LOADL
1da177e4 180#define STFIRST STORER
70342287 181#define STREST STOREL
1da177e4
LT
182#define SHIFT_DISCARD SLLV
183#else
184#define LDFIRST LOADL
70342287 185#define LDREST LOADR
1da177e4 186#define STFIRST STOREL
70342287 187#define STREST STORER
1da177e4
LT
188#define SHIFT_DISCARD SRLV
189#endif
190
191#define FIRST(unit) ((unit)*NBYTES)
192#define REST(unit) (FIRST(unit)+NBYTES-1)
193#define UNIT(unit) FIRST(unit)
194
195#define ADDRMASK (NBYTES-1)
196
197 .text
198 .set noreorder
619b6e18 199#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
1da177e4 200 .set noat
619b6e18
MR
201#else
202 .set at=v1
203#endif
1da177e4 204
bb0757eb
DD
205/*
206 * t6 is used as a flag to note inatomic mode.
207 */
208LEAF(__copy_user_inatomic)
209 b __copy_user_common
210 li t6, 1
211 END(__copy_user_inatomic)
212
1da177e4
LT
213/*
214 * A combined memcpy/__copy_user
215 * __copy_user sets len to 0 for success; else to an upper bound of
216 * the number of uncopied bytes.
217 * memcpy sets v0 to dst.
218 */
219 .align 5
220LEAF(memcpy) /* a0=dst a1=src a2=len */
221 move v0, dst /* return value */
c5ec1983 222.L__memcpy:
1da177e4 223FEXPORT(__copy_user)
bb0757eb
DD
224 li t6, 0 /* not inatomic */
225__copy_user_common:
1da177e4
LT
226 /*
227 * Note: dst & src may be unaligned, len may be 0
228 * Temps
229 */
230#define rem t8
231
930bff88 232 R10KCBARRIER(0(ra))
1da177e4
LT
233 /*
234 * The "issue break"s below are very approximate.
235 * Issue delays for dcache fills will perturb the schedule, as will
236 * load queue full replay traps, etc.
237 *
238 * If len < NBYTES use byte operations.
239 */
240 PREF( 0, 0(src) )
241 PREF( 1, 0(dst) )
242 sltu t2, len, NBYTES
243 and t1, dst, ADDRMASK
244 PREF( 0, 1*32(src) )
245 PREF( 1, 1*32(dst) )
c5ec1983 246 bnez t2, .Lcopy_bytes_checklen
1da177e4
LT
247 and t0, src, ADDRMASK
248 PREF( 0, 2*32(src) )
249 PREF( 1, 2*32(dst) )
c5ec1983 250 bnez t1, .Ldst_unaligned
1da177e4 251 nop
c5ec1983 252 bnez t0, .Lsrc_unaligned_dst_aligned
1da177e4
LT
253 /*
254 * use delay slot for fall-through
255 * src and dst are aligned; need to compute rem
256 */
c5ec1983 257.Lboth_aligned:
70342287 258 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
c5ec1983 259 beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES
1da177e4
LT
260 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES)
261 PREF( 0, 3*32(src) )
262 PREF( 1, 3*32(dst) )
263 .align 4
2641:
930bff88 265 R10KCBARRIER(0(ra))
5bc05971
MC
266 LOAD(t0, UNIT(0)(src), .Ll_exc)
267 LOAD(t1, UNIT(1)(src), .Ll_exc_copy)
268 LOAD(t2, UNIT(2)(src), .Ll_exc_copy)
269 LOAD(t3, UNIT(3)(src), .Ll_exc_copy)
1da177e4 270 SUB len, len, 8*NBYTES
5bc05971
MC
271 LOAD(t4, UNIT(4)(src), .Ll_exc_copy)
272 LOAD(t7, UNIT(5)(src), .Ll_exc_copy)
273 STORE(t0, UNIT(0)(dst), .Ls_exc_p8u)
274 STORE(t1, UNIT(1)(dst), .Ls_exc_p7u)
275 LOAD(t0, UNIT(6)(src), .Ll_exc_copy)
276 LOAD(t1, UNIT(7)(src), .Ll_exc_copy)
1da177e4
LT
277 ADD src, src, 8*NBYTES
278 ADD dst, dst, 8*NBYTES
5bc05971
MC
279 STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u)
280 STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u)
281 STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u)
282 STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u)
283 STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u)
284 STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u)
1da177e4
LT
285 PREF( 0, 8*32(src) )
286 PREF( 1, 8*32(dst) )
287 bne len, rem, 1b
288 nop
289
290 /*
291 * len == rem == the number of bytes left to copy < 8*NBYTES
292 */
c5ec1983
RB
293.Lcleanup_both_aligned:
294 beqz len, .Ldone
1da177e4 295 sltu t0, len, 4*NBYTES
c5ec1983 296 bnez t0, .Lless_than_4units
1da177e4
LT
297 and rem, len, (NBYTES-1) # rem = len % NBYTES
298 /*
299 * len >= 4*NBYTES
300 */
5bc05971
MC
301 LOAD( t0, UNIT(0)(src), .Ll_exc)
302 LOAD( t1, UNIT(1)(src), .Ll_exc_copy)
303 LOAD( t2, UNIT(2)(src), .Ll_exc_copy)
304 LOAD( t3, UNIT(3)(src), .Ll_exc_copy)
1da177e4
LT
305 SUB len, len, 4*NBYTES
306 ADD src, src, 4*NBYTES
930bff88 307 R10KCBARRIER(0(ra))
5bc05971
MC
308 STORE(t0, UNIT(0)(dst), .Ls_exc_p4u)
309 STORE(t1, UNIT(1)(dst), .Ls_exc_p3u)
310 STORE(t2, UNIT(2)(dst), .Ls_exc_p2u)
311 STORE(t3, UNIT(3)(dst), .Ls_exc_p1u)
619b6e18
MR
312 .set reorder /* DADDI_WAR */
313 ADD dst, dst, 4*NBYTES
c5ec1983 314 beqz len, .Ldone
619b6e18 315 .set noreorder
c5ec1983 316.Lless_than_4units:
1da177e4
LT
317 /*
318 * rem = len % NBYTES
319 */
c5ec1983 320 beq rem, len, .Lcopy_bytes
1da177e4
LT
321 nop
3221:
930bff88 323 R10KCBARRIER(0(ra))
5bc05971 324 LOAD(t0, 0(src), .Ll_exc)
1da177e4
LT
325 ADD src, src, NBYTES
326 SUB len, len, NBYTES
5bc05971 327 STORE(t0, 0(dst), .Ls_exc_p1u)
619b6e18
MR
328 .set reorder /* DADDI_WAR */
329 ADD dst, dst, NBYTES
1da177e4 330 bne rem, len, 1b
619b6e18 331 .set noreorder
1da177e4
LT
332
333 /*
334 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
335 * A loop would do only a byte at a time with possible branch
70342287 336 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
1da177e4
LT
337 * because can't assume read-access to dst. Instead, use
338 * STREST dst, which doesn't require read access to dst.
339 *
340 * This code should perform better than a simple loop on modern,
341 * wide-issue mips processors because the code has fewer branches and
342 * more instruction-level parallelism.
343 */
344#define bits t2
c5ec1983 345 beqz len, .Ldone
1da177e4
LT
346 ADD t1, dst, len # t1 is just past last byte of dst
347 li bits, 8*NBYTES
348 SLL rem, len, 3 # rem = number of bits to keep
5bc05971 349 LOAD(t0, 0(src), .Ll_exc)
70342287 350 SUB bits, bits, rem # bits = number of bits to discard
1da177e4 351 SHIFT_DISCARD t0, t0, bits
5bc05971 352 STREST(t0, -1(t1), .Ls_exc)
1da177e4
LT
353 jr ra
354 move len, zero
c5ec1983 355.Ldst_unaligned:
1da177e4
LT
356 /*
357 * dst is unaligned
358 * t0 = src & ADDRMASK
359 * t1 = dst & ADDRMASK; T1 > 0
360 * len >= NBYTES
361 *
362 * Copy enough bytes to align dst
363 * Set match = (src and dst have same alignment)
364 */
365#define match rem
5bc05971 366 LDFIRST(t3, FIRST(0)(src), .Ll_exc)
1da177e4 367 ADD t2, zero, NBYTES
5bc05971 368 LDREST(t3, REST(0)(src), .Ll_exc_copy)
1da177e4
LT
369 SUB t2, t2, t1 # t2 = number of bytes copied
370 xor match, t0, t1
930bff88 371 R10KCBARRIER(0(ra))
5bc05971 372 STFIRST(t3, FIRST(0)(dst), .Ls_exc)
c5ec1983 373 beq len, t2, .Ldone
1da177e4
LT
374 SUB len, len, t2
375 ADD dst, dst, t2
c5ec1983 376 beqz match, .Lboth_aligned
1da177e4
LT
377 ADD src, src, t2
378
c5ec1983 379.Lsrc_unaligned_dst_aligned:
70342287 380 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
1da177e4 381 PREF( 0, 3*32(src) )
c5ec1983 382 beqz t0, .Lcleanup_src_unaligned
70342287 383 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
1da177e4
LT
384 PREF( 1, 3*32(dst) )
3851:
386/*
387 * Avoid consecutive LD*'s to the same register since some mips
388 * implementations can't issue them in the same cycle.
389 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
390 * are to the same unit (unless src is aligned, but it's not).
391 */
930bff88 392 R10KCBARRIER(0(ra))
5bc05971
MC
393 LDFIRST(t0, FIRST(0)(src), .Ll_exc)
394 LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy)
70342287 395 SUB len, len, 4*NBYTES
5bc05971
MC
396 LDREST(t0, REST(0)(src), .Ll_exc_copy)
397 LDREST(t1, REST(1)(src), .Ll_exc_copy)
398 LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy)
399 LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy)
400 LDREST(t2, REST(2)(src), .Ll_exc_copy)
401 LDREST(t3, REST(3)(src), .Ll_exc_copy)
1da177e4
LT
402 PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed)
403 ADD src, src, 4*NBYTES
404#ifdef CONFIG_CPU_SB1
405 nop # improves slotting
406#endif
5bc05971
MC
407 STORE(t0, UNIT(0)(dst), .Ls_exc_p4u)
408 STORE(t1, UNIT(1)(dst), .Ls_exc_p3u)
409 STORE(t2, UNIT(2)(dst), .Ls_exc_p2u)
410 STORE(t3, UNIT(3)(dst), .Ls_exc_p1u)
70342287 411 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
619b6e18
MR
412 .set reorder /* DADDI_WAR */
413 ADD dst, dst, 4*NBYTES
1da177e4 414 bne len, rem, 1b
619b6e18 415 .set noreorder
1da177e4 416
c5ec1983
RB
417.Lcleanup_src_unaligned:
418 beqz len, .Ldone
1da177e4 419 and rem, len, NBYTES-1 # rem = len % NBYTES
c5ec1983 420 beq rem, len, .Lcopy_bytes
1da177e4
LT
421 nop
4221:
930bff88 423 R10KCBARRIER(0(ra))
5bc05971
MC
424 LDFIRST(t0, FIRST(0)(src), .Ll_exc)
425 LDREST(t0, REST(0)(src), .Ll_exc_copy)
1da177e4
LT
426 ADD src, src, NBYTES
427 SUB len, len, NBYTES
5bc05971 428 STORE(t0, 0(dst), .Ls_exc_p1u)
619b6e18
MR
429 .set reorder /* DADDI_WAR */
430 ADD dst, dst, NBYTES
1da177e4 431 bne len, rem, 1b
619b6e18 432 .set noreorder
1da177e4 433
c5ec1983
RB
434.Lcopy_bytes_checklen:
435 beqz len, .Ldone
1da177e4 436 nop
c5ec1983 437.Lcopy_bytes:
1da177e4 438 /* 0 < len < NBYTES */
930bff88 439 R10KCBARRIER(0(ra))
1da177e4 440#define COPY_BYTE(N) \
5bc05971 441 LOADB(t0, N(src), .Ll_exc); \
1da177e4 442 SUB len, len, 1; \
c5ec1983 443 beqz len, .Ldone; \
5bc05971 444 STOREB(t0, N(dst), .Ls_exc_p1)
1da177e4
LT
445
446 COPY_BYTE(0)
447 COPY_BYTE(1)
448#ifdef USE_DOUBLE
449 COPY_BYTE(2)
450 COPY_BYTE(3)
451 COPY_BYTE(4)
452 COPY_BYTE(5)
453#endif
5bc05971 454 LOADB(t0, NBYTES-2(src), .Ll_exc)
1da177e4
LT
455 SUB len, len, 1
456 jr ra
5bc05971 457 STOREB(t0, NBYTES-2(dst), .Ls_exc_p1)
c5ec1983 458.Ldone:
1da177e4
LT
459 jr ra
460 nop
461 END(memcpy)
462
c5ec1983 463.Ll_exc_copy:
1da177e4
LT
464 /*
465 * Copy bytes from src until faulting load address (or until a
466 * lb faults)
467 *
468 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
469 * may be more than a byte beyond the last address.
470 * Hence, the lb below may get an exception.
471 *
472 * Assumes src < THREAD_BUADDR($28)
473 */
5bc05971 474 LOADK t0, TI_TASK($28)
1da177e4 475 nop
5bc05971 476 LOADK t0, THREAD_BUADDR(t0)
1da177e4 4771:
5bc05971 478 LOADB(t1, 0(src), .Ll_exc)
1da177e4
LT
479 ADD src, src, 1
480 sb t1, 0(dst) # can't fault -- we're copy_from_user
619b6e18
MR
481 .set reorder /* DADDI_WAR */
482 ADD dst, dst, 1
1da177e4 483 bne src, t0, 1b
619b6e18 484 .set noreorder
c5ec1983 485.Ll_exc:
5bc05971 486 LOADK t0, TI_TASK($28)
1da177e4 487 nop
5bc05971 488 LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address
1da177e4
LT
489 nop
490 SUB len, AT, t0 # len number of uncopied bytes
bb0757eb 491 bnez t6, .Ldone /* Skip the zeroing part if inatomic */
1da177e4
LT
492 /*
493 * Here's where we rely on src and dst being incremented in tandem,
494 * See (3) above.
495 * dst += (fault addr - src) to put dst at first byte to clear
496 */
497 ADD dst, t0 # compute start address in a1
498 SUB dst, src
499 /*
500 * Clear len bytes starting at dst. Can't call __bzero because it
501 * might modify len. An inefficient loop for these rare times...
502 */
619b6e18
MR
503 .set reorder /* DADDI_WAR */
504 SUB src, len, 1
c5ec1983 505 beqz len, .Ldone
619b6e18 506 .set noreorder
1da177e4
LT
5071: sb zero, 0(dst)
508 ADD dst, dst, 1
619b6e18 509#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
1da177e4
LT
510 bnez src, 1b
511 SUB src, src, 1
619b6e18
MR
512#else
513 .set push
514 .set noat
515 li v1, 1
516 bnez src, 1b
517 SUB src, src, v1
518 .set pop
519#endif
1da177e4
LT
520 jr ra
521 nop
522
523
619b6e18 524#define SEXC(n) \
70342287 525 .set reorder; /* DADDI_WAR */ \
c5ec1983 526.Ls_exc_p ## n ## u: \
619b6e18
MR
527 ADD len, len, n*NBYTES; \
528 jr ra; \
529 .set noreorder
1da177e4
LT
530
531SEXC(8)
532SEXC(7)
533SEXC(6)
534SEXC(5)
535SEXC(4)
536SEXC(3)
537SEXC(2)
538SEXC(1)
539
c5ec1983 540.Ls_exc_p1:
619b6e18
MR
541 .set reorder /* DADDI_WAR */
542 ADD len, len, 1
1da177e4 543 jr ra
619b6e18 544 .set noreorder
c5ec1983 545.Ls_exc:
1da177e4
LT
546 jr ra
547 nop
548
549 .align 5
550LEAF(memmove)
551 ADD t0, a0, a2
552 ADD t1, a1, a2
553 sltu t0, a1, t0 # dst + len <= src -> memcpy
554 sltu t1, a0, t1 # dst >= src + len -> memcpy
555 and t0, t1
c5ec1983 556 beqz t0, .L__memcpy
1da177e4 557 move v0, a0 /* return value */
c5ec1983 558 beqz a2, .Lr_out
1da177e4
LT
559 END(memmove)
560
561 /* fall through to __rmemcpy */
562LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
563 sltu t0, a1, a0
c5ec1983 564 beqz t0, .Lr_end_bytes_up # src >= dst
1da177e4
LT
565 nop
566 ADD a0, a2 # dst = dst + len
567 ADD a1, a2 # src = src + len
568
c5ec1983 569.Lr_end_bytes:
930bff88 570 R10KCBARRIER(0(ra))
1da177e4
LT
571 lb t0, -1(a1)
572 SUB a2, a2, 0x1
573 sb t0, -1(a0)
574 SUB a1, a1, 0x1
619b6e18
MR
575 .set reorder /* DADDI_WAR */
576 SUB a0, a0, 0x1
c5ec1983 577 bnez a2, .Lr_end_bytes
619b6e18 578 .set noreorder
1da177e4 579
c5ec1983 580.Lr_out:
1da177e4
LT
581 jr ra
582 move a2, zero
583
c5ec1983 584.Lr_end_bytes_up:
930bff88 585 R10KCBARRIER(0(ra))
1da177e4
LT
586 lb t0, (a1)
587 SUB a2, a2, 0x1
588 sb t0, (a0)
589 ADD a1, a1, 0x1
619b6e18
MR
590 .set reorder /* DADDI_WAR */
591 ADD a0, a0, 0x1
c5ec1983 592 bnez a2, .Lr_end_bytes_up
619b6e18 593 .set noreorder
1da177e4
LT
594
595 jr ra
596 move a2, zero
597 END(__rmemcpy)