]> git.proxmox.com Git - mirror_edk2.git/blame - MdePkg/Library/BaseMemoryLibOptDxe/Arm/ScanMem.S
MdePkg/BaseMemoryLibOptDxe ARM: fix Thumb-2 bug in ScanMem()
[mirror_edk2.git] / MdePkg / Library / BaseMemoryLibOptDxe / Arm / ScanMem.S
CommitLineData
a37f6605
AB
1// Copyright (c) 2010-2011, Linaro Limited\r
2// All rights reserved.\r
3//\r
4// Redistribution and use in source and binary forms, with or without\r
5// modification, are permitted provided that the following conditions\r
6// are met:\r
7//\r
8// * Redistributions of source code must retain the above copyright\r
9// notice, this list of conditions and the following disclaimer.\r
10//\r
11// * Redistributions in binary form must reproduce the above copyright\r
12// notice, this list of conditions and the following disclaimer in the\r
13// documentation and/or other materials provided with the distribution.\r
14//\r
15// * Neither the name of Linaro Limited nor the names of its\r
16// contributors may be used to endorse or promote products derived\r
17// from this software without specific prior written permission.\r
18//\r
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\r
23// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
30//\r
31\r
32//\r
33// Written by Dave Gilbert <david.gilbert@linaro.org>\r
34//\r
35// This memchr routine is optimised on a Cortex-A9 and should work on\r
36// all ARMv7 processors. It has a fast past for short sizes, and has\r
37// an optimised path for large data sets; the worst case is finding the\r
38// match early in a large data set.\r
39//\r
40\r
41\r
42// 2011-02-07 david.gilbert@linaro.org\r
43// Extracted from local git a5b438d861\r
44// 2011-07-14 david.gilbert@linaro.org\r
45// Import endianness fix from local git ea786f1b\r
46// 2011-12-07 david.gilbert@linaro.org\r
47// Removed unneeded cbz from align loop\r
48\r
49// this lets us check a flag in a 00/ff byte easily in either endianness\r
50#define CHARTSTMASK(c) 1<<(c*8)\r
51\r
52 .text\r
53 .thumb\r
54 .syntax unified\r
55\r
56 .type ASM_PFX(InternalMemScanMem8), %function\r
57ASM_GLOBAL ASM_PFX(InternalMemScanMem8)\r
58ASM_PFX(InternalMemScanMem8):\r
59 // r0 = start of memory to scan\r
60 // r1 = length\r
61 // r2 = character to look for\r
62 // returns r0 = pointer to character or NULL if not found\r
63 uxtb r2, r2 // Don't think we can trust the caller to actually pass a char\r
64\r
65 cmp r1, #16 // If it's short don't bother with anything clever\r
66 blt 20f\r
67\r
68 tst r0, #7 // If it's already aligned skip the next bit\r
69 beq 10f\r
70\r
71 // Work up to an aligned point\r
725:\r
73 ldrb r3, [r0],#1\r
74 subs r1, r1, #1\r
75 cmp r3, r2\r
76 beq 50f // If it matches exit found\r
77 tst r0, #7\r
78 bne 5b // If not aligned yet then do next byte\r
79\r
8010:\r
81 // At this point, we are aligned, we know we have at least 8 bytes to work with\r
82 push {r4-r7}\r
83 orr r2, r2, r2, lsl #8 // expand the match word across to all bytes\r
84 orr r2, r2, r2, lsl #16\r
85 bic r4, r1, #7 // Number of double words to work with\r
86 mvns r7, #0 // all F's\r
87 movs r3, #0\r
88\r
8915:\r
90 ldmia r0!, {r5,r6}\r
91 subs r4, r4, #8\r
92 eor r5, r5, r2 // Get it so that r5,r6 have 00's where the bytes match the target\r
93 eor r6, r6, r2\r
94 uadd8 r5, r5, r7 // Parallel add 0xff - sets the GE bits for anything that wasn't 0\r
95 sel r5, r3, r7 // bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION\r
96 uadd8 r6, r6, r7 // Parallel add 0xff - sets the GE bits for anything that wasn't 0\r
97 sel r6, r5, r7 // chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION\r
98 cbnz r6, 60f\r
99 bne 15b // (Flags from the subs above) If not run out of bytes then go around again\r
100\r
101 pop {r4-r7}\r
102 and r2, r2, #0xff // Get r2 back to a single character from the expansion above\r
103 and r1, r1, #7 // Leave the count remaining as the number after the double words have been done\r
104\r
10520:\r
106 cbz r1, 40f // 0 length or hit the end already then not found\r
107\r
10821: // Post aligned section, or just a short call\r
109 ldrb r3, [r0], #1\r
110 subs r1, r1, #1\r
111 eor r3, r3, r2 // r3 = 0 if match - doesn't break flags from sub\r
112 cbz r3, 50f\r
113 bne 21b // on r1 flags\r
114\r
11540:\r
116 movs r0, #0 // not found\r
117 bx lr\r
118\r
11950:\r
120 subs r0, r0, #1 // found\r
121 bx lr\r
122\r
12360: // We're here because the fast path found a hit - now we have to track down exactly which word it was\r
124 // r0 points to the start of the double word after the one that was tested\r
125 // r5 has the 00/ff pattern for the first word, r6 has the chained value\r
126 cmp r5, #0\r
127 itte eq\r
128 moveq r5, r6 // the end is in the 2nd word\r
129 subeq r0, r0, #3 // Points to 2nd byte of 2nd word\r
130 subne r0, r0, #7 // or 2nd byte of 1st word\r
131\r
132 // r0 currently points to the 3rd byte of the word containing the hit\r
133 tst r5, #CHARTSTMASK(0) // 1st character\r
134 bne 61f\r
135 adds r0, r0, #1\r
136 tst r5, #CHARTSTMASK(1) // 2nd character\r
c4f63707
AB
137 bne 61f\r
138 adds r0, r0 ,#1\r
139 tst r5, #(3 << 15) // 2nd & 3rd character\r
a37f6605 140 // If not the 3rd must be the last one\r
c4f63707
AB
141 it eq\r
142 addeq.n r0, r0, #1\r
a37f6605
AB
143\r
14461:\r
145 pop {r4-r7}\r
146 subs r0, r0, #1\r
147 bx lr\r