[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / CopyMem.S

#------------------------------------------------------------------------------\r
#\r
# CopyMem() worker for ARM\r
#\r
# This file started out as C code that did 64 bit moves if the buffer was\r
# 32-bit aligned, else it does a byte copy. It also does a byte copy for\r
# any trailing bytes. It was updated to do 32-byte copies using stm/ldm.\r
#\r
# Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>\r
# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>\r
# This program and the accompanying materials\r
# are licensed and made available under the terms and conditions of the BSD License\r
# which accompanies this distribution.  The full text of the license may be found at\r
# http://opensource.org/licenses/bsd-license.php\r
#\r
# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
#\r
#------------------------------------------------------------------------------\r
\r
    .text\r
    .thumb\r
    .syntax unified\r
\r
/**\r
  Copy Length bytes from Source to Destination. Overlap is OK.\r
\r
  This implementation\r
\r
  @param  Destination Target of copy\r
  @param  Source      Place to copy from\r
  @param  Length      Number of bytes to copy\r
\r
  @return Destination\r
\r
\r
VOID *\r
EFIAPI\r
InternalMemCopyMem (\r
  OUT     VOID                      *DestinationBuffer,\r
  IN      CONST VOID                *SourceBuffer,\r
  IN      UINTN                     Length\r
  )\r
**/\r
ASM_GLOBAL ASM_PFX(InternalMemCopyMem)\r
ASM_PFX(InternalMemCopyMem):\r
    push    {r4-r11, lr}\r
    // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)\r
    mov     r11, r0\r
    mov     r10, r0\r
    mov     r12, r2\r
    mov     r14, r1\r
\r
    cmp     r11, r1\r
    // If (dest < source)\r
    bcc     memcopy_check_optim_default\r
\r
    // If (source + length < dest)\r
    rsb     r3, r1, r11\r
    cmp     r12, r3\r
    bcc     memcopy_check_optim_default\r
    b       memcopy_check_optim_overlap\r
\r
memcopy_check_optim_default:\r
    // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)\r
    tst     r0, #0xF\r
    it      ne\r
    movne   r0, #0\r
    bne     memcopy_default\r
    tst     r1, #0xF\r
    ite     ne\r
    movne   r3, #0\r
    moveq   r3, #1\r
    cmp     r2, #31\r
    ite     ls\r
    movls   r0, #0\r
    andhi   r0, r3, #1\r
    b       memcopy_default\r
\r
memcopy_check_optim_overlap:\r
    // r10 = dest_end, r14 = source_end\r
    add     r10, r11, r12\r
    add     r14, r12, r1\r
\r
    // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)\r
    cmp     r2, #31\r
    ite     ls\r
    movls   r0, #0\r
    movhi   r0, #1\r
    tst     r10, #0xF\r
    it      ne\r
    movne   r0, #0\r
    tst     r14, #0xF\r
    it      ne\r
    movne   r0, #0\r
    b       memcopy_overlapped\r
\r
memcopy_overlapped_non_optim:\r
    // We read 1 byte from the end of the source buffer\r
    sub     r3, r14, #1\r
    sub     r12, r12, #1\r
    ldrb    r3, [r3, #0]\r
    sub     r2, r10, #1\r
    cmp     r12, #0\r
    // We write 1 byte at the end of the dest buffer\r
    sub     r10, r10, #1\r
    sub     r14, r14, #1\r
    strb    r3, [r2, #0]\r
    bne     memcopy_overlapped_non_optim\r
    b       memcopy_end\r
\r
// r10 = dest_end, r14 = source_end\r
memcopy_overlapped:\r
    // Are we in the optimized case ?\r
    cmp     r0, #0\r
    beq     memcopy_overlapped_non_optim\r
\r
    // Optimized Overlapped - Read 32 bytes\r
    sub     r14, r14, #32\r
    sub     r12, r12, #32\r
    cmp     r12, #31\r
    ldmia   r14, {r2-r9}\r
\r
    // If length is less than 32 then disable optim\r
    it      ls\r
    movls   r0, #0\r
\r
    cmp     r12, #0\r
\r
    // Optimized Overlapped - Write 32 bytes\r
    sub     r10, r10, #32\r
    stmia   r10, {r2-r9}\r
\r
    // while (length != 0)\r
    bne     memcopy_overlapped\r
    b       memcopy_end\r
\r
memcopy_default_non_optim:\r
    // Byte copy\r
    ldrb    r3, [r14], #1\r
    sub     r12, r12, #1\r
    strb    r3, [r10], #1\r
\r
memcopy_default:\r
    cmp     r12, #0\r
    beq     memcopy_end\r
\r
// r10 = dest, r14 = source\r
memcopy_default_loop:\r
    cmp     r0, #0\r
    beq     memcopy_default_non_optim\r
\r
    // Optimized memcopy - Read 32 Bytes\r
    sub     r12, r12, #32\r
    cmp     r12, #31\r
    ldmia   r14!, {r2-r9}\r
\r
    // If length is less than 32 then disable optim\r
    it      ls\r
    movls   r0, #0\r
\r
    cmp     r12, #0\r
\r
    // Optimized memcopy - Write 32 Bytes\r
    stmia   r10!, {r2-r9}\r
\r
    // while (length != 0)\r
    bne     memcopy_default_loop\r
\r
memcopy_end:\r
    mov     r0, r11\r
    pop     {r4-r11, pc}\r
Commit	Line	Data
a37f6605 AB	1	#------------------------------------------------------------------------------\r
	2	#\r
	3	# CopyMem() worker for ARM\r
	4	#\r
	5	# This file started out as C code that did 64 bit moves if the buffer was\r
	6	# 32-bit aligned, else it does a byte copy. It also does a byte copy for\r
	7	# any trailing bytes. It was updated to do 32-byte copies using stm/ldm.\r
	8	#\r
	9	# Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>\r
	10	# Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>\r
	11	# This program and the accompanying materials\r
	12	# are licensed and made available under the terms and conditions of the BSD License\r
	13	# which accompanies this distribution. The full text of the license may be found at\r
	14	# http://opensource.org/licenses/bsd-license.php\r
	15	#\r
	16	# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
	17	# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
	18	#\r
	19	#------------------------------------------------------------------------------\r
	20	\r
	21	.text\r
	22	.thumb\r
	23	.syntax unified\r
	24	\r
	25	/**\r
	26	Copy Length bytes from Source to Destination. Overlap is OK.\r
	27	\r
	28	This implementation\r
	29	\r
	30	@param Destination Target of copy\r
	31	@param Source Place to copy from\r
	32	@param Length Number of bytes to copy\r
	33	\r
	34	@return Destination\r
	35	\r
	36	\r
	37	VOID *\r
	38	EFIAPI\r
	39	InternalMemCopyMem (\r
	40	OUT VOID *DestinationBuffer,\r
	41	IN CONST VOID *SourceBuffer,\r
	42	IN UINTN Length\r
	43	)\r
	44	**/\r
	45	ASM_GLOBAL ASM_PFX(InternalMemCopyMem)\r
	46	ASM_PFX(InternalMemCopyMem):\r
	47	push {r4-r11, lr}\r
	48	// Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)\r
	49	mov r11, r0\r
	50	mov r10, r0\r
	51	mov r12, r2\r
	52	mov r14, r1\r
	53	\r
	54	cmp r11, r1\r
	55	// If (dest < source)\r
	56	bcc memcopy_check_optim_default\r
	57	\r
	58	// If (source + length < dest)\r
	59	rsb r3, r1, r11\r
	60	cmp r12, r3\r
	61	bcc memcopy_check_optim_default\r
	62	b memcopy_check_optim_overlap\r
	63	\r
	64	memcopy_check_optim_default:\r
65	// Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)\r
66	tst r0, #0xF\r
67	it ne\r
68	movne r0, #0\r
69	bne memcopy_default\r
70	tst r1, #0xF\r
71	ite ne\r
72	movne r3, #0\r
73	moveq r3, #1\r
74	cmp r2, #31\r
75	ite ls\r
76	movls r0, #0\r
77	andhi r0, r3, #1\r
78	b memcopy_default\r
79	\r
80	memcopy_check_optim_overlap:\r
81	// r10 = dest_end, r14 = source_end\r
82	add r10, r11, r12\r
83	add r14, r12, r1\r
84	\r
85	// Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)\r
86	cmp r2, #31\r
87	ite ls\r
88	movls r0, #0\r
89	movhi r0, #1\r
90	tst r10, #0xF\r
91	it ne\r
92	movne r0, #0\r
93	tst r14, #0xF\r
94	it ne\r
95	movne r0, #0\r
96	b memcopy_overlapped\r
97	\r
98	memcopy_overlapped_non_optim:\r
99	// We read 1 byte from the end of the source buffer\r
100	sub r3, r14, #1\r
101	sub r12, r12, #1\r
102	ldrb r3, [r3, #0]\r
103	sub r2, r10, #1\r
104	cmp r12, #0\r
105	// We write 1 byte at the end of the dest buffer\r
106	sub r10, r10, #1\r
107	sub r14, r14, #1\r
108	strb r3, [r2, #0]\r
109	bne memcopy_overlapped_non_optim\r
110	b memcopy_end\r
111	\r
112	// r10 = dest_end, r14 = source_end\r
113	memcopy_overlapped:\r
114	// Are we in the optimized case ?\r
115	cmp r0, #0\r
116	beq memcopy_overlapped_non_optim\r
117	\r
118	// Optimized Overlapped - Read 32 bytes\r
119	sub r14, r14, #32\r
120	sub r12, r12, #32\r
121	cmp r12, #31\r
122	ldmia r14, {r2-r9}\r
123	\r
124	// If length is less than 32 then disable optim\r
125	it ls\r
126	movls r0, #0\r
127	\r
128	cmp r12, #0\r
129	\r
130	// Optimized Overlapped - Write 32 bytes\r
131	sub r10, r10, #32\r
132	stmia r10, {r2-r9}\r
133	\r
134	// while (length != 0)\r
135	bne memcopy_overlapped\r
136	b memcopy_end\r
137	\r
138	memcopy_default_non_optim:\r
139	// Byte copy\r
140	ldrb r3, [r14], #1\r
141	sub r12, r12, #1\r
142	strb r3, [r10], #1\r
143	\r
144	memcopy_default:\r
145	cmp r12, #0\r
146	beq memcopy_end\r
147	\r
148	// r10 = dest, r14 = source\r
149	memcopy_default_loop:\r
150	cmp r0, #0\r
151	beq memcopy_default_non_optim\r
152	\r
153	// Optimized memcopy - Read 32 Bytes\r
154	sub r12, r12, #32\r
155	cmp r12, #31\r
156	ldmia r14!, {r2-r9}\r
157	\r
158	// If length is less than 32 then disable optim\r
159	it ls\r
160	movls r0, #0\r
161	\r
162	cmp r12, #0\r
163	\r
164	// Optimized memcopy - Write 32 Bytes\r
165	stmia r10!, {r2-r9}\r
166	\r
167	// while (length != 0)\r
168	bne memcopy_default_loop\r
169	\r
170	memcopy_end:\r
171	mov r0, r11\r
172	pop {r4-r11, pc}\r