]>
Commit | Line | Data |
---|---|---|
a37f6605 AB |
1 | //\r |
2 | // Copyright (c) 2013 - 2016, Linaro Limited\r | |
3 | // All rights reserved.\r | |
4 | //\r | |
5 | // Redistribution and use in source and binary forms, with or without\r | |
6 | // modification, are permitted provided that the following conditions are met:\r | |
7 | // * Redistributions of source code must retain the above copyright\r | |
8 | // notice, this list of conditions and the following disclaimer.\r | |
9 | // * Redistributions in binary form must reproduce the above copyright\r | |
10 | // notice, this list of conditions and the following disclaimer in the\r | |
11 | // documentation and/or other materials provided with the distribution.\r | |
12 | // * Neither the name of the Linaro nor the\r | |
13 | // names of its contributors may be used to endorse or promote products\r | |
14 | // derived from this software without specific prior written permission.\r | |
15 | //\r | |
16 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r | |
17 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r | |
18 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r | |
19 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\r | |
20 | // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r | |
21 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r | |
22 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r | |
23 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r | |
24 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r | |
25 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r | |
26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r | |
27 | //\r | |
28 | \r | |
29 | // Parameters and result.\r | |
30 | #define src1 r0\r | |
31 | #define src2 r1\r | |
32 | #define limit r2\r | |
33 | #define result r0\r | |
34 | \r | |
35 | // Internal variables.\r | |
36 | #define data1 r3\r | |
37 | #define data2 r4\r | |
38 | #define limit_wd r5\r | |
39 | #define diff r6\r | |
40 | #define tmp1 r7\r | |
41 | #define tmp2 r12\r | |
42 | #define pos r8\r | |
43 | #define mask r14\r | |
44 | \r | |
45 | .text\r | |
46 | .thumb\r | |
47 | .syntax unified\r | |
48 | .align 5\r | |
decaac5d | 49 | .type ASM_PFX(InternalMemCompareMem), %function\r |
a37f6605 AB |
50 | ASM_GLOBAL ASM_PFX(InternalMemCompareMem)\r |
51 | ASM_PFX(InternalMemCompareMem):\r | |
52 | push {r4-r8, lr}\r | |
53 | eor tmp1, src1, src2\r | |
54 | tst tmp1, #3\r | |
55 | bne .Lmisaligned4\r | |
56 | ands tmp1, src1, #3\r | |
57 | bne .Lmutual_align\r | |
58 | add limit_wd, limit, #3\r | |
59 | nop.w\r | |
60 | lsr limit_wd, limit_wd, #2\r | |
61 | \r | |
62 | // Start of performance-critical section -- one 32B cache line.\r | |
63 | .Lloop_aligned:\r | |
64 | ldr data1, [src1], #4\r | |
65 | ldr data2, [src2], #4\r | |
66 | .Lstart_realigned:\r | |
67 | subs limit_wd, limit_wd, #1\r | |
68 | eor diff, data1, data2 // Non-zero if differences found.\r | |
69 | cbnz diff, 0f\r | |
70 | bne .Lloop_aligned\r | |
71 | // End of performance-critical section -- one 32B cache line.\r | |
72 | \r | |
73 | // Not reached the limit, must have found a diff.\r | |
74 | 0: cbnz limit_wd, .Lnot_limit\r | |
75 | \r | |
76 | // Limit % 4 == 0 => all bytes significant.\r | |
77 | ands limit, limit, #3\r | |
78 | beq .Lnot_limit\r | |
79 | \r | |
80 | lsl limit, limit, #3 // Bits -> bytes.\r | |
81 | mov mask, #~0\r | |
82 | lsl mask, mask, limit\r | |
83 | bic data1, data1, mask\r | |
84 | bic data2, data2, mask\r | |
85 | \r | |
86 | orr diff, diff, mask\r | |
87 | \r | |
88 | .Lnot_limit:\r | |
89 | rev diff, diff\r | |
90 | rev data1, data1\r | |
91 | rev data2, data2\r | |
92 | \r | |
93 | // The MS-non-zero bit of DIFF marks either the first bit\r | |
94 | // that is different, or the end of the significant data.\r | |
95 | // Shifting left now will bring the critical information into the\r | |
96 | // top bits.\r | |
97 | clz pos, diff\r | |
98 | lsl data1, data1, pos\r | |
99 | lsl data2, data2, pos\r | |
100 | \r | |
101 | // But we need to zero-extend (char is unsigned) the value and then\r | |
102 | // perform a signed 32-bit subtraction.\r | |
103 | lsr data1, data1, #28\r | |
104 | sub result, data1, data2, lsr #28\r | |
105 | pop {r4-r8, pc}\r | |
106 | \r | |
107 | .Lmutual_align:\r | |
108 | // Sources are mutually aligned, but are not currently at an\r | |
109 | // alignment boundary. Round down the addresses and then mask off\r | |
110 | // the bytes that precede the start point.\r | |
111 | bic src1, src1, #3\r | |
112 | bic src2, src2, #3\r | |
113 | add limit, limit, tmp1 // Adjust the limit for the extra.\r | |
60fe5e8a | 114 | lsl tmp1, tmp1, #3 // Bytes beyond alignment -> bits.\r |
a37f6605 | 115 | ldr data1, [src1], #4\r |
60fe5e8a | 116 | rsb tmp1, tmp1, #32 // Bits to alignment -32.\r |
a37f6605 AB |
117 | ldr data2, [src2], #4\r |
118 | mov tmp2, #~0\r | |
119 | \r | |
120 | // Little-endian. Early bytes are at LSB.\r | |
121 | lsr tmp2, tmp2, tmp1 // Shift (tmp1 & 31).\r | |
122 | add limit_wd, limit, #3\r | |
123 | orr data1, data1, tmp2\r | |
124 | orr data2, data2, tmp2\r | |
125 | lsr limit_wd, limit_wd, #2\r | |
126 | b .Lstart_realigned\r | |
127 | \r | |
128 | .Lmisaligned4:\r | |
129 | sub limit, limit, #1\r | |
130 | 1:\r | |
131 | // Perhaps we can do better than this.\r | |
132 | ldrb data1, [src1], #1\r | |
133 | ldrb data2, [src2], #1\r | |
134 | subs limit, limit, #1\r | |
135 | it cs\r | |
eab26788 | 136 | cmpcs.n data1, data2\r |
a37f6605 AB |
137 | beq 1b\r |
138 | sub result, data1, data2\r | |
139 | pop {r4-r8, pc}\r |