[mirror_edk2.git] / ArmPkg / Library / CompilerIntrinsicsLib / AArch64 / memcpy.S

/*\r
 * Copyright (c) 2011 - 2013, ARM Ltd\r
 * All rights reserved.\r
 *\r
 * Redistribution and use in source and binary forms, with or without\r
 * modification, are permitted provided that the following conditions\r
 * are met:\r
 * 1. Redistributions of source code must retain the above copyright\r
 *    notice, this list of conditions and the following disclaimer.\r
 * 2. Redistributions in binary form must reproduce the above copyright\r
 *    notice, this list of conditions and the following disclaimer in the\r
 *    documentation and/or other materials provided with the distribution.\r
 * 3. The name of the company may not be used to endorse or promote\r
 *    products derived from this software without specific prior written\r
 *    permission.\r
 *\r
 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED\r
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\r
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\r
 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\r
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
 */\r
\r
#include <AsmMacroIoLibV8.h>\r
\r
// Taken from Newlib BSD implementation.\r
ASM_FUNC(memcpy)\r
        // Copy dst to x6, so we can preserve return value.\r
        mov     x6, x0\r
\r
        // NOTE: although size_t is unsigned, this code uses signed\r
        // comparisons on x2 so relies on nb never having its top bit\r
        // set. In practice this is not going to be a real problem.\r
\r
        // Require at least 64 bytes to be worth aligning.\r
        cmp     x2, #64\r
        blt     qwordcopy\r
\r
        // Compute offset to align destination to 16 bytes.\r
        neg     x3, x0\r
        and     x3, x3, 15\r
\r
        cbz     x3, blockcopy           // offset == 0 is likely\r
\r
        // We know there is at least 64 bytes to be done, so we\r
        // do a 16 byte misaligned copy at first and then later do\r
        // all 16-byte aligned copies.  Some bytes will be copied\r
        // twice, but there's no harm in that since memcpy does not\r
        // guarantee correctness on overlap.\r
\r
        sub     x2, x2, x3              // nb -= offset\r
        ldp     x4, x5, [x1]\r
        add     x1, x1, x3\r
        stp     x4, x5, [x6]\r
        add     x6, x6, x3\r
\r
        // The destination pointer is now qword (16 byte) aligned.\r
        // (The src pointer might be.)\r
\r
blockcopy:\r
        // Copy 64 bytes at a time.\r
        subs    x2, x2, #64\r
        blt     3f\r
2:      subs    x2, x2, #64\r
        ldp     x4, x5, [x1,#0]\r
        ldp     x8, x9, [x1,#16]\r
        ldp     x10,x11,[x1,#32]\r
        ldp     x12,x13,[x1,#48]\r
        add     x1, x1, #64\r
        stp     x4, x5, [x6,#0]\r
        stp     x8, x9, [x6,#16]\r
        stp     x10,x11,[x6,#32]\r
        stp     x12,x13,[x6,#48]\r
        add     x6, x6, #64\r
        bge     2b\r
\r
        // Unwind pre-decrement\r
3:      add     x2, x2, #64\r
\r
qwordcopy:\r
        // Copy 0-48 bytes, 16 bytes at a time.\r
        subs    x2, x2, #16\r
        blt     tailcopy\r
2:      ldp     x4, x5, [x1],#16\r
        subs    x2, x2, #16\r
        stp     x4, x5, [x6],#16\r
        bge     2b\r
\r
        // No need to unwind the pre-decrement, it would not change\r
        // the low 4 bits of the count. But how likely is it for the\r
        // byte count to be multiple of 16? Is it worth the overhead\r
        // of testing for x2 == -16?\r
\r
tailcopy:\r
        // Copy trailing 0-15 bytes.\r
        tbz     x2, #3, 1f\r
        ldr     x4, [x1],#8             // copy 8 bytes\r
        str     x4, [x6],#8\r
1:\r
        tbz     x2, #2, 1f\r
        ldr     w4, [x1],#4             // copy 4 bytes\r
        str     w4, [x6],#4\r
1:\r
        tbz     x2, #1, 1f\r
        ldrh    w4, [x1],#2             // copy 2 bytes\r
        strh    w4, [x6],#2\r
1:\r
        tbz     x2, #0, return\r
        ldrb    w4, [x1]                // copy 1 byte\r
        strb    w4, [x6]\r
\r
return:\r
        // This is the only return point of memcpy.\r
        ret\r
Commit	Line	Data
25402f5d HL	1	/*\r
	2	* Copyright (c) 2011 - 2013, ARM Ltd\r
	3	* All rights reserved.\r
	4	*\r
	5	* Redistribution and use in source and binary forms, with or without\r
	6	* modification, are permitted provided that the following conditions\r
	7	* are met:\r
	8	* 1. Redistributions of source code must retain the above copyright\r
	9	* notice, this list of conditions and the following disclaimer.\r
	10	* 2. Redistributions in binary form must reproduce the above copyright\r
	11	* notice, this list of conditions and the following disclaimer in the\r
	12	* documentation and/or other materials provided with the distribution.\r
	13	* 3. The name of the company may not be used to endorse or promote\r
	14	* products derived from this software without specific prior written\r
	15	* permission.\r
	16	*\r
	17	* THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED\r
	18	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF\r
	19	* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\r
	20	* IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
	21	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED\r
	22	* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\r
	23	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\r
	24	* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\r
	25	* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\r
	26	* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
	27	*/\r
	28	\r
903e3124	29	#include <AsmMacroIoLibV8.h>\r
25402f5d HL	30	\r
25402f5d HL	31	// Taken from Newlib BSD implementation.\r
903e3124	32	ASM_FUNC(memcpy)\r
25402f5d HL	33	// Copy dst to x6, so we can preserve return value.\r
	34	mov x6, x0\r
	35	\r
	36	// NOTE: although size_t is unsigned, this code uses signed\r
	37	// comparisons on x2 so relies on nb never having its top bit\r
	38	// set. In practice this is not going to be a real problem.\r
	39	\r
	40	// Require at least 64 bytes to be worth aligning.\r
	41	cmp x2, #64\r
	42	blt qwordcopy\r
	43	\r
	44	// Compute offset to align destination to 16 bytes.\r
	45	neg x3, x0\r
	46	and x3, x3, 15\r
	47	\r
	48	cbz x3, blockcopy // offset == 0 is likely\r
	49	\r
	50	// We know there is at least 64 bytes to be done, so we\r
	51	// do a 16 byte misaligned copy at first and then later do\r
	52	// all 16-byte aligned copies. Some bytes will be copied\r
	53	// twice, but there's no harm in that since memcpy does not\r
	54	// guarantee correctness on overlap.\r
	55	\r
	56	sub x2, x2, x3 // nb -= offset\r
	57	ldp x4, x5, [x1]\r
	58	add x1, x1, x3\r
	59	stp x4, x5, [x6]\r
	60	add x6, x6, x3\r
	61	\r
	62	// The destination pointer is now qword (16 byte) aligned.\r
	63	// (The src pointer might be.)\r
	64	\r
	65	blockcopy:\r
	66	// Copy 64 bytes at a time.\r
	67	subs x2, x2, #64\r
	68	blt 3f\r
	69	2: subs x2, x2, #64\r
	70	ldp x4, x5, [x1,#0]\r
	71	ldp x8, x9, [x1,#16]\r
	72	ldp x10,x11,[x1,#32]\r
	73	ldp x12,x13,[x1,#48]\r
	74	add x1, x1, #64\r
	75	stp x4, x5, [x6,#0]\r
	76	stp x8, x9, [x6,#16]\r
	77	stp x10,x11,[x6,#32]\r
	78	stp x12,x13,[x6,#48]\r
	79	add x6, x6, #64\r
	80	bge 2b\r
	81	\r
	82	// Unwind pre-decrement\r
	83	3: add x2, x2, #64\r
	84	\r
	85	qwordcopy:\r
	86	// Copy 0-48 bytes, 16 bytes at a time.\r
	87	subs x2, x2, #16\r
	88	blt tailcopy\r
	89	2: ldp x4, x5, [x1],#16\r
	90	subs x2, x2, #16\r
	91	stp x4, x5, [x6],#16\r
	92	bge 2b\r
	93	\r
	94	// No need to unwind the pre-decrement, it would not change\r
	95	// the low 4 bits of the count. But how likely is it for the\r
	96	// byte count to be multiple of 16? Is it worth the overhead\r
97	// of testing for x2 == -16?\r
98	\r
99	tailcopy:\r
100	// Copy trailing 0-15 bytes.\r
101	tbz x2, #3, 1f\r
102	ldr x4, [x1],#8 // copy 8 bytes\r
103	str x4, [x6],#8\r
104	1:\r
105	tbz x2, #2, 1f\r
106	ldr w4, [x1],#4 // copy 4 bytes\r
107	str w4, [x6],#4\r
108	1:\r
109	tbz x2, #1, 1f\r
110	ldrh w4, [x1],#2 // copy 2 bytes\r
111	strh w4, [x6],#2\r
112	1:\r
113	tbz x2, #0, return\r
114	ldrb w4, [x1] // copy 1 byte\r
115	strb w4, [x6]\r
116	\r
117	return:\r
118	// This is the only return point of memcpy.\r
119	ret\r