]> git.proxmox.com Git - mirror_edk2.git/blob - MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm
UefiCpuPkg: Move AsmRelocateApLoopStart from Mpfuncs.nasm to AmdSev.nasm
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / CompareMem.asm
1 ;
2 ; Copyright (c) 2013 - 2016, Linaro Limited
3 ; All rights reserved.
4 ; SPDX-License-Identifier: BSD-2-Clause-Patent
5 ;
6
7 ; Parameters and result.
8 #define src1 r0
9 #define src2 r1
10 #define limit r2
11 #define result r0
12
13 ; Internal variables.
14 #define data1 r3
15 #define data2 r4
16 #define limit_wd r5
17 #define diff r6
18 #define tmp1 r7
19 #define tmp2 r12
20 #define pos r8
21 #define mask r14
22
23 EXPORT InternalMemCompareMem
24 THUMB
25 AREA CompareMem, CODE, READONLY
26
27 InternalMemCompareMem
28 push {r4-r8, lr}
29 eor tmp1, src1, src2
30 tst tmp1, #3
31 bne Lmisaligned4
32 ands tmp1, src1, #3
33 bne Lmutual_align
34 add limit_wd, limit, #3
35 nop.w
36 lsr limit_wd, limit_wd, #2
37
38 ; Start of performance-critical section -- one 32B cache line.
39 Lloop_aligned
40 ldr data1, [src1], #4
41 ldr data2, [src2], #4
42 Lstart_realigned
43 subs limit_wd, limit_wd, #1
44 eor diff, data1, data2 ; Non-zero if differences found.
45 cbnz diff, L0
46 bne Lloop_aligned
47 ; End of performance-critical section -- one 32B cache line.
48
49 ; Not reached the limit, must have found a diff.
50 L0
51 cbnz limit_wd, Lnot_limit
52
53 // Limit % 4 == 0 => all bytes significant.
54 ands limit, limit, #3
55 beq Lnot_limit
56
57 lsl limit, limit, #3 // Bits -> bytes.
58 mov mask, #~0
59 lsl mask, mask, limit
60 bic data1, data1, mask
61 bic data2, data2, mask
62
63 orr diff, diff, mask
64
65 Lnot_limit
66 rev diff, diff
67 rev data1, data1
68 rev data2, data2
69
70 ; The MS-non-zero bit of DIFF marks either the first bit
71 ; that is different, or the end of the significant data.
72 ; Shifting left now will bring the critical information into the
73 ; top bits.
74 clz pos, diff
75 lsl data1, data1, pos
76 lsl data2, data2, pos
77
78 ; But we need to zero-extend (char is unsigned) the value and then
79 ; perform a signed 32-bit subtraction.
80 lsr data1, data1, #28
81 sub result, data1, data2, lsr #28
82 pop {r4-r8, pc}
83
84 Lmutual_align
85 ; Sources are mutually aligned, but are not currently at an
86 ; alignment boundary. Round down the addresses and then mask off
87 ; the bytes that precede the start point.
88 bic src1, src1, #3
89 bic src2, src2, #3
90 add limit, limit, tmp1 ; Adjust the limit for the extra.
91 lsl tmp1, tmp1, #2 ; Bytes beyond alignment -> bits.
92 ldr data1, [src1], #4
93 neg tmp1, tmp1 ; Bits to alignment -32.
94 ldr data2, [src2], #4
95 mov tmp2, #~0
96
97 ; Little-endian. Early bytes are at LSB.
98 lsr tmp2, tmp2, tmp1 ; Shift (tmp1 & 31).
99 add limit_wd, limit, #3
100 orr data1, data1, tmp2
101 orr data2, data2, tmp2
102 lsr limit_wd, limit_wd, #2
103 b Lstart_realigned
104
105 Lmisaligned4
106 sub limit, limit, #1
107 L1
108 // Perhaps we can do better than this.
109 ldrb data1, [src1], #1
110 ldrb data2, [src2], #1
111 subs limit, limit, #1
112 it cs
113 cmpcs data1, data2
114 beq L1
115 sub result, data1, data2
116 pop {r4-r8, pc}
117
118 END