]> git.proxmox.com Git - mirror_edk2.git/blob - MdePkg/Library/BaseMemoryLibOptDxe/Arm/SetMem.S
MdePkg/BaseMemoryLibOptDxe: add accelerated ARM routines
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / SetMem.S
1 #------------------------------------------------------------------------------
2 #
3 # Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
4 #
5 # This program and the accompanying materials are licensed and made available
6 # under the terms and conditions of the BSD License which accompanies this
7 # distribution. The full text of the license may be found at
8 # http://opensource.org/licenses/bsd-license.php
9 #
10 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12 #
13 #------------------------------------------------------------------------------
14
15 .text
16 .thumb
17 .syntax unified
18 .align 5
19 ASM_GLOBAL ASM_PFX(InternalMemZeroMem)
20 ASM_PFX(InternalMemZeroMem):
21 movs r2, #0
22
23 ASM_GLOBAL ASM_PFX(InternalMemSetMem)
24 ASM_PFX(InternalMemSetMem):
25 uxtb r2, r2
26 orr r2, r2, r2, lsl #8
27
28 ASM_GLOBAL ASM_PFX(InternalMemSetMem16)
29 ASM_PFX(InternalMemSetMem16):
30 uxth r2, r2
31 orr r2, r2, r2, lsl #16
32
33 ASM_GLOBAL ASM_PFX(InternalMemSetMem32)
34 ASM_PFX(InternalMemSetMem32):
35 mov r3, r2
36
37 ASM_GLOBAL ASM_PFX(InternalMemSetMem64)
38 ASM_PFX(InternalMemSetMem64):
39 push {r4, lr}
40 cmp r1, #16 // fewer than 16 bytes of input?
41 add r1, r1, r0 // r1 := dst + length
42 add lr, r0, #16
43 blt 2f
44 bic lr, lr, #15 // align output pointer
45
46 str r2, [r0] // potentially unaligned store of 4 bytes
47 str r3, [r0, #4] // potentially unaligned store of 4 bytes
48 str r2, [r0, #8] // potentially unaligned store of 4 bytes
49 str r3, [r0, #12] // potentially unaligned store of 4 bytes
50 beq 1f
51
52 0: add lr, lr, #16 // advance the output pointer by 16 bytes
53 subs r4, r1, lr // past the output?
54 blt 3f // break out of the loop
55 strd r2, r3, [lr, #-16] // aligned store of 16 bytes
56 strd r2, r3, [lr, #-8]
57 bne 0b // goto beginning of loop
58 1: pop {r4, pc}
59
60 2: subs r4, r1, lr
61 3: adds r4, r4, #16
62 subs r1, r1, #8
63 cmp r4, #4 // between 4 and 15 bytes?
64 blt 4f
65 cmp r4, #8 // between 8 and 15 bytes?
66 str r2, [lr, #-16] // overlapping store of 4 + (4 + 4) + 4 bytes
67 itt gt
68 strgt r3, [lr, #-12]
69 strgt r2, [r1]
70 str r3, [r1, #4]
71 pop {r4, pc}
72
73 4: cmp r4, #2 // 2 or 3 bytes?
74 strb r2, [lr, #-16] // store 1 byte
75 it ge
76 strhge r2, [r1, #6] // store 2 bytes
77 pop {r4, pc}