]> git.proxmox.com Git - mirror_edk2.git/blame - MdePkg/Library/BaseMemoryLibOptDxe/Arm/CompareMem.asm
MdePkg/BaseMemoryLibOptDxe: add accelerated ARM routines
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / CompareMem.asm
CommitLineData
a37f6605
AB
1;\r
2; Copyright (c) 2013 - 2016, Linaro Limited\r
3; All rights reserved.\r
4;\r
5; Redistribution and use in source and binary forms, with or without\r
6; modification, are permitted provided that the following conditions are met:\r
7; * Redistributions of source code must retain the above copyright\r
8; notice, this list of conditions and the following disclaimer.\r
9; * Redistributions in binary form must reproduce the above copyright\r
10; notice, this list of conditions and the following disclaimer in the\r
11; documentation and/or other materials provided with the distribution.\r
12; * Neither the name of the Linaro nor the\r
13; names of its contributors may be used to endorse or promote products\r
14; derived from this software without specific prior written permission.\r
15;\r
16; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
17; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
18; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
19; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\r
20; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
21; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
22; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r
23; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r
24; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
25; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r
26; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
27;\r
28\r
29; Parameters and result.\r
30#define src1 r0\r
31#define src2 r1\r
32#define limit r2\r
33#define result r0\r
34\r
35; Internal variables.\r
36#define data1 r3\r
37#define data2 r4\r
38#define limit_wd r5\r
39#define diff r6\r
40#define tmp1 r7\r
41#define tmp2 r12\r
42#define pos r8\r
43#define mask r14\r
44\r
45 EXPORT InternalMemCompareMem\r
46 THUMB\r
47 AREA CompareMem, CODE, READONLY\r
48\r
49InternalMemCompareMem\r
50 push {r4-r8, lr}\r
51 eor tmp1, src1, src2\r
52 tst tmp1, #3\r
53 bne Lmisaligned4\r
54 ands tmp1, src1, #3\r
55 bne Lmutual_align\r
56 add limit_wd, limit, #3\r
57 nop.w\r
58 lsr limit_wd, limit_wd, #2\r
59\r
60 ; Start of performance-critical section -- one 32B cache line.\r
61Lloop_aligned\r
62 ldr data1, [src1], #4\r
63 ldr data2, [src2], #4\r
64Lstart_realigned\r
65 subs limit_wd, limit_wd, #1\r
66 eor diff, data1, data2 ; Non-zero if differences found.\r
67 cbnz diff, L0\r
68 bne Lloop_aligned\r
69 ; End of performance-critical section -- one 32B cache line.\r
70\r
71 ; Not reached the limit, must have found a diff.\r
72L0\r
73 cbnz limit_wd, Lnot_limit\r
74\r
75 // Limit % 4 == 0 => all bytes significant.\r
76 ands limit, limit, #3\r
77 beq Lnot_limit\r
78\r
79 lsl limit, limit, #3 // Bits -> bytes.\r
80 mov mask, #~0\r
81 lsl mask, mask, limit\r
82 bic data1, data1, mask\r
83 bic data2, data2, mask\r
84\r
85 orr diff, diff, mask\r
86\r
87Lnot_limit\r
88 rev diff, diff\r
89 rev data1, data1\r
90 rev data2, data2\r
91\r
92 ; The MS-non-zero bit of DIFF marks either the first bit\r
93 ; that is different, or the end of the significant data.\r
94 ; Shifting left now will bring the critical information into the\r
95 ; top bits.\r
96 clz pos, diff\r
97 lsl data1, data1, pos\r
98 lsl data2, data2, pos\r
99\r
100 ; But we need to zero-extend (char is unsigned) the value and then\r
101 ; perform a signed 32-bit subtraction.\r
102 lsr data1, data1, #28\r
103 sub result, data1, data2, lsr #28\r
104 pop {r4-r8, pc}\r
105\r
106Lmutual_align\r
107 ; Sources are mutually aligned, but are not currently at an\r
108 ; alignment boundary. Round down the addresses and then mask off\r
109 ; the bytes that precede the start point.\r
110 bic src1, src1, #3\r
111 bic src2, src2, #3\r
112 add limit, limit, tmp1 ; Adjust the limit for the extra.\r
113 lsl tmp1, tmp1, #2 ; Bytes beyond alignment -> bits.\r
114 ldr data1, [src1], #4\r
115 neg tmp1, tmp1 ; Bits to alignment -32.\r
116 ldr data2, [src2], #4\r
117 mov tmp2, #~0\r
118\r
119 ; Little-endian. Early bytes are at LSB.\r
120 lsr tmp2, tmp2, tmp1 ; Shift (tmp1 & 31).\r
121 add limit_wd, limit, #3\r
122 orr data1, data1, tmp2\r
123 orr data2, data2, tmp2\r
124 lsr limit_wd, limit_wd, #2\r
125 b Lstart_realigned\r
126\r
127Lmisaligned4\r
128 sub limit, limit, #1\r
129L1\r
130 // Perhaps we can do better than this.\r
131 ldrb data1, [src1], #1\r
132 ldrb data2, [src2], #1\r
133 subs limit, limit, #1\r
134 it cs\r
135 cmpcs data1, data2\r
136 beq L1\r
137 sub result, data1, data2\r
138 pop {r4-r8, pc}\r
139\r
140 END\r