]> git.proxmox.com Git - mirror_edk2.git/blob - MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm
MdePkg/BaseMemoryLibOptDxe ARM: fix arithmetic bugs in CompareMem()
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / CompareMem.asm
1 ;
2 ; Copyright (c) 2013 - 2016, Linaro Limited
3 ; All rights reserved.
4 ;
5 ; Redistribution and use in source and binary forms, with or without
6 ; modification, are permitted provided that the following conditions are met:
7 ; * Redistributions of source code must retain the above copyright
8 ; notice, this list of conditions and the following disclaimer.
9 ; * Redistributions in binary form must reproduce the above copyright
10 ; notice, this list of conditions and the following disclaimer in the
11 ; documentation and/or other materials provided with the distribution.
12 ; * Neither the name of the Linaro nor the
13 ; names of its contributors may be used to endorse or promote products
14 ; derived from this software without specific prior written permission.
15 ;
16 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 ; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 ;
28
29 ; Parameters and result.
30 #define src1 r0
31 #define src2 r1
32 #define limit r2
33 #define result r0
34
35 ; Internal variables.
36 #define data1 r3
37 #define data2 r4
38 #define limit_wd r5
39 #define diff r6
40 #define tmp1 r7
41 #define tmp2 r12
42 #define pos r8
43 #define mask r14
44
45 EXPORT InternalMemCompareMem
46 THUMB
47 AREA CompareMem, CODE, READONLY
48
49 InternalMemCompareMem
50 push {r4-r8, lr}
51 eor tmp1, src1, src2
52 tst tmp1, #3
53 bne Lmisaligned4
54 ands tmp1, src1, #3
55 bne Lmutual_align
56 add limit_wd, limit, #3
57 nop.w
58 lsr limit_wd, limit_wd, #2
59
60 ; Start of performance-critical section -- one 32B cache line.
61 Lloop_aligned
62 ldr data1, [src1], #4
63 ldr data2, [src2], #4
64 Lstart_realigned
65 subs limit_wd, limit_wd, #1
66 eor diff, data1, data2 ; Non-zero if differences found.
67 cbnz diff, L0
68 bne Lloop_aligned
69 ; End of performance-critical section -- one 32B cache line.
70
71 ; Not reached the limit, must have found a diff.
72 L0
73 cbnz limit_wd, Lnot_limit
74
75 // Limit % 4 == 0 => all bytes significant.
76 ands limit, limit, #3
77 beq Lnot_limit
78
79 lsl limit, limit, #3 // Bits -> bytes.
80 mov mask, #~0
81 lsl mask, mask, limit
82 bic data1, data1, mask
83 bic data2, data2, mask
84
85 orr diff, diff, mask
86
87 Lnot_limit
88 rev diff, diff
89 rev data1, data1
90 rev data2, data2
91
92 ; The MS-non-zero bit of DIFF marks either the first bit
93 ; that is different, or the end of the significant data.
94 ; Shifting left now will bring the critical information into the
95 ; top bits.
96 clz pos, diff
97 lsl data1, data1, pos
98 lsl data2, data2, pos
99
100 ; But we need to zero-extend (char is unsigned) the value and then
101 ; perform a signed 32-bit subtraction.
102 lsr data1, data1, #28
103 sub result, data1, data2, lsr #28
104 pop {r4-r8, pc}
105
106 Lmutual_align
107 ; Sources are mutually aligned, but are not currently at an
108 ; alignment boundary. Round down the addresses and then mask off
109 ; the bytes that precede the start point.
110 bic src1, src1, #3
111 bic src2, src2, #3
112 add limit, limit, tmp1 ; Adjust the limit for the extra.
113 lsl tmp1, tmp1, #2 ; Bytes beyond alignment -> bits.
114 ldr data1, [src1], #4
115 neg tmp1, tmp1 ; Bits to alignment -32.
116 ldr data2, [src2], #4
117 mov tmp2, #~0
118
119 ; Little-endian. Early bytes are at LSB.
120 lsr tmp2, tmp2, tmp1 ; Shift (tmp1 & 31).
121 add limit_wd, limit, #3
122 orr data1, data1, tmp2
123 orr data2, data2, tmp2
124 lsr limit_wd, limit_wd, #2
125 b Lstart_realigned
126
127 Lmisaligned4
128 sub limit, limit, #1
129 L1
130 // Perhaps we can do better than this.
131 ldrb data1, [src1], #1
132 ldrb data2, [src2], #1
133 subs limit, limit, #1
134 it cs
135 cmpcs data1, data2
136 beq L1
137 sub result, data1, data2
138 pop {r4-r8, pc}
139
140 END