MdePkg/BaseMemoryLibOptDxe ARM: add missing function annotations
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / CompareMem.S
1 //
2 // Copyright (c) 2013 - 2016, Linaro Limited
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright
10 // notice, this list of conditions and the following disclaimer in the
11 // documentation and/or other materials provided with the distribution.
12 // * Neither the name of the Linaro nor the
13 // names of its contributors may be used to endorse or promote products
14 // derived from this software without specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 //
28
29 // Parameters and result.
30 #define src1 r0
31 #define src2 r1
32 #define limit r2
33 #define result r0
34
35 // Internal variables.
36 #define data1 r3
37 #define data2 r4
38 #define limit_wd r5
39 #define diff r6
40 #define tmp1 r7
41 #define tmp2 r12
42 #define pos r8
43 #define mask r14
44
45 .text
46 .thumb
47 .syntax unified
48 .align 5
49 .type ASM_PFX(InternalMemCompareMem), %function
50 ASM_GLOBAL ASM_PFX(InternalMemCompareMem)
51 ASM_PFX(InternalMemCompareMem):
52 push {r4-r8, lr}
53 eor tmp1, src1, src2
54 tst tmp1, #3
55 bne .Lmisaligned4
56 ands tmp1, src1, #3
57 bne .Lmutual_align
58 add limit_wd, limit, #3
59 nop.w
60 lsr limit_wd, limit_wd, #2
61
62 // Start of performance-critical section -- one 32B cache line.
63 .Lloop_aligned:
64 ldr data1, [src1], #4
65 ldr data2, [src2], #4
66 .Lstart_realigned:
67 subs limit_wd, limit_wd, #1
68 eor diff, data1, data2 // Non-zero if differences found.
69 cbnz diff, 0f
70 bne .Lloop_aligned
71 // End of performance-critical section -- one 32B cache line.
72
73 // Not reached the limit, must have found a diff.
74 0: cbnz limit_wd, .Lnot_limit
75
76 // Limit % 4 == 0 => all bytes significant.
77 ands limit, limit, #3
78 beq .Lnot_limit
79
80 lsl limit, limit, #3 // Bits -> bytes.
81 mov mask, #~0
82 lsl mask, mask, limit
83 bic data1, data1, mask
84 bic data2, data2, mask
85
86 orr diff, diff, mask
87
88 .Lnot_limit:
89 rev diff, diff
90 rev data1, data1
91 rev data2, data2
92
93 // The MS-non-zero bit of DIFF marks either the first bit
94 // that is different, or the end of the significant data.
95 // Shifting left now will bring the critical information into the
96 // top bits.
97 clz pos, diff
98 lsl data1, data1, pos
99 lsl data2, data2, pos
100
101 // But we need to zero-extend (char is unsigned) the value and then
102 // perform a signed 32-bit subtraction.
103 lsr data1, data1, #28
104 sub result, data1, data2, lsr #28
105 pop {r4-r8, pc}
106
107 .Lmutual_align:
108 // Sources are mutually aligned, but are not currently at an
109 // alignment boundary. Round down the addresses and then mask off
110 // the bytes that precede the start point.
111 bic src1, src1, #3
112 bic src2, src2, #3
113 add limit, limit, tmp1 // Adjust the limit for the extra.
114 lsl tmp1, tmp1, #3 // Bytes beyond alignment -> bits.
115 ldr data1, [src1], #4
116 rsb tmp1, tmp1, #32 // Bits to alignment -32.
117 ldr data2, [src2], #4
118 mov tmp2, #~0
119
120 // Little-endian. Early bytes are at LSB.
121 lsr tmp2, tmp2, tmp1 // Shift (tmp1 & 31).
122 add limit_wd, limit, #3
123 orr data1, data1, tmp2
124 orr data2, data2, tmp2
125 lsr limit_wd, limit_wd, #2
126 b .Lstart_realigned
127
128 .Lmisaligned4:
129 sub limit, limit, #1
130 1:
131 // Perhaps we can do better than this.
132 ldrb data1, [src1], #1
133 ldrb data2, [src2], #1
134 subs limit, limit, #1
135 it cs
136 cmpcs.n data1, data2
137 beq 1b
138 sub result, data1, data2
139 pop {r4-r8, pc}