]>
Commit | Line | Data |
---|---|---|
a37f6605 AB |
1 | // Copyright (c) 2010-2011, Linaro Limited\r |
2 | // All rights reserved.\r | |
3 | //\r | |
4 | // Redistribution and use in source and binary forms, with or without\r | |
5 | // modification, are permitted provided that the following conditions\r | |
6 | // are met:\r | |
7 | //\r | |
8 | // * Redistributions of source code must retain the above copyright\r | |
9 | // notice, this list of conditions and the following disclaimer.\r | |
10 | //\r | |
11 | // * Redistributions in binary form must reproduce the above copyright\r | |
12 | // notice, this list of conditions and the following disclaimer in the\r | |
13 | // documentation and/or other materials provided with the distribution.\r | |
14 | //\r | |
15 | // * Neither the name of Linaro Limited nor the names of its\r | |
16 | // contributors may be used to endorse or promote products derived\r | |
17 | // from this software without specific prior written permission.\r | |
18 | //\r | |
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\r | |
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\r | |
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\r | |
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\r | |
23 | // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r | |
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\r | |
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r | |
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r | |
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r | |
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r | |
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r | |
30 | //\r | |
31 | \r | |
32 | //\r | |
33 | // Written by Dave Gilbert <david.gilbert@linaro.org>\r | |
34 | //\r | |
35 | // This memchr routine is optimised on a Cortex-A9 and should work on\r | |
36 | // all ARMv7 processors. It has a fast past for short sizes, and has\r | |
37 | // an optimised path for large data sets; the worst case is finding the\r | |
38 | // match early in a large data set.\r | |
39 | //\r | |
40 | \r | |
41 | \r | |
42 | // 2011-02-07 david.gilbert@linaro.org\r | |
43 | // Extracted from local git a5b438d861\r | |
44 | // 2011-07-14 david.gilbert@linaro.org\r | |
45 | // Import endianness fix from local git ea786f1b\r | |
46 | // 2011-12-07 david.gilbert@linaro.org\r | |
47 | // Removed unneeded cbz from align loop\r | |
48 | \r | |
49 | // this lets us check a flag in a 00/ff byte easily in either endianness\r | |
50 | #define CHARTSTMASK(c) 1<<(c*8)\r | |
51 | \r | |
52 | .text\r | |
53 | .thumb\r | |
54 | .syntax unified\r | |
55 | \r | |
56 | .type ASM_PFX(InternalMemScanMem8), %function\r | |
57 | ASM_GLOBAL ASM_PFX(InternalMemScanMem8)\r | |
58 | ASM_PFX(InternalMemScanMem8):\r | |
59 | // r0 = start of memory to scan\r | |
60 | // r1 = length\r | |
61 | // r2 = character to look for\r | |
62 | // returns r0 = pointer to character or NULL if not found\r | |
63 | uxtb r2, r2 // Don't think we can trust the caller to actually pass a char\r | |
64 | \r | |
65 | cmp r1, #16 // If it's short don't bother with anything clever\r | |
66 | blt 20f\r | |
67 | \r | |
68 | tst r0, #7 // If it's already aligned skip the next bit\r | |
69 | beq 10f\r | |
70 | \r | |
71 | // Work up to an aligned point\r | |
72 | 5:\r | |
73 | ldrb r3, [r0],#1\r | |
74 | subs r1, r1, #1\r | |
75 | cmp r3, r2\r | |
76 | beq 50f // If it matches exit found\r | |
77 | tst r0, #7\r | |
78 | bne 5b // If not aligned yet then do next byte\r | |
79 | \r | |
80 | 10:\r | |
81 | // At this point, we are aligned, we know we have at least 8 bytes to work with\r | |
82 | push {r4-r7}\r | |
83 | orr r2, r2, r2, lsl #8 // expand the match word across to all bytes\r | |
84 | orr r2, r2, r2, lsl #16\r | |
85 | bic r4, r1, #7 // Number of double words to work with\r | |
86 | mvns r7, #0 // all F's\r | |
87 | movs r3, #0\r | |
88 | \r | |
89 | 15:\r | |
90 | ldmia r0!, {r5,r6}\r | |
91 | subs r4, r4, #8\r | |
92 | eor r5, r5, r2 // Get it so that r5,r6 have 00's where the bytes match the target\r | |
93 | eor r6, r6, r2\r | |
94 | uadd8 r5, r5, r7 // Parallel add 0xff - sets the GE bits for anything that wasn't 0\r | |
95 | sel r5, r3, r7 // bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION\r | |
96 | uadd8 r6, r6, r7 // Parallel add 0xff - sets the GE bits for anything that wasn't 0\r | |
97 | sel r6, r5, r7 // chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION\r | |
98 | cbnz r6, 60f\r | |
99 | bne 15b // (Flags from the subs above) If not run out of bytes then go around again\r | |
100 | \r | |
101 | pop {r4-r7}\r | |
102 | and r2, r2, #0xff // Get r2 back to a single character from the expansion above\r | |
103 | and r1, r1, #7 // Leave the count remaining as the number after the double words have been done\r | |
104 | \r | |
105 | 20:\r | |
106 | cbz r1, 40f // 0 length or hit the end already then not found\r | |
107 | \r | |
108 | 21: // Post aligned section, or just a short call\r | |
109 | ldrb r3, [r0], #1\r | |
110 | subs r1, r1, #1\r | |
111 | eor r3, r3, r2 // r3 = 0 if match - doesn't break flags from sub\r | |
112 | cbz r3, 50f\r | |
113 | bne 21b // on r1 flags\r | |
114 | \r | |
115 | 40:\r | |
116 | movs r0, #0 // not found\r | |
117 | bx lr\r | |
118 | \r | |
119 | 50:\r | |
120 | subs r0, r0, #1 // found\r | |
121 | bx lr\r | |
122 | \r | |
123 | 60: // We're here because the fast path found a hit - now we have to track down exactly which word it was\r | |
124 | // r0 points to the start of the double word after the one that was tested\r | |
125 | // r5 has the 00/ff pattern for the first word, r6 has the chained value\r | |
eab26788 | 126 | subs r0, r0, #3\r |
a37f6605 | 127 | cmp r5, #0\r |
eab26788 AB |
128 | it eq\r |
129 | moveq.n r5, r6 // the end is in the 2nd word\r | |
130 | it ne\r | |
131 | subne.n r0, r0, #4 // or 2nd byte of 1st word\r | |
a37f6605 AB |
132 | \r |
133 | // r0 currently points to the 3rd byte of the word containing the hit\r | |
134 | tst r5, #CHARTSTMASK(0) // 1st character\r | |
135 | bne 61f\r | |
136 | adds r0, r0, #1\r | |
137 | tst r5, #CHARTSTMASK(1) // 2nd character\r | |
c4f63707 AB |
138 | bne 61f\r |
139 | adds r0, r0 ,#1\r | |
140 | tst r5, #(3 << 15) // 2nd & 3rd character\r | |
a37f6605 | 141 | // If not the 3rd must be the last one\r |
c4f63707 AB |
142 | it eq\r |
143 | addeq.n r0, r0, #1\r | |
a37f6605 AB |
144 | \r |
145 | 61:\r | |
146 | pop {r4-r7}\r | |
147 | subs r0, r0, #1\r | |
148 | bx lr\r |