2 // Copyright (c) 2014, ARM Limited
3 // All rights Reserved.
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright
10 // notice, this list of conditions and the following disclaimer in the
11 // documentation and/or other materials provided with the distribution.
12 // * Neither the name of the company nor the names of its contributors
13 // may be used to endorse or promote products derived from this
14 // software without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 // Arguments and results.
60 // For each 32-byte chunk we calculate a 64-bit syndrome value, with two bits
61 // per byte. For each tuple, bit 0 is set if the relevant byte matched the
62 // requested character and bit 1 is not used (faster than using a 32bit
63 // syndrome). Since the bits in the syndrome reflect exactly the order in which
64 // things occur in the original string, counting trailing zeros allows to
65 // identify exactly which byte has matched.
68 ASM_GLOBAL ASM_PFX(InternalMemScanMem8)
69 ASM_PFX(InternalMemScanMem8):
70 // Do not dereference srcin if no bytes to compare.
71 cbz cntin, .Lzero_length
73 // Magic constant 0x40100401 allows us to identify which lane matches
74 // the requested byte.
77 movk wtmp2, #0x4010, lsl #16
78 dup vrepchr.16b, chrin
79 // Work with aligned 32-byte chunks
81 dup vrepmask.4s, wtmp2
83 and cntrem, cntin, #31
87 // Input string is not 32-byte aligned. We calculate the syndrome
88 // value for the aligned 32 bytes block containing the first bytes
89 // and mask the irrelevant part.
92 ld1 {vdata1.16b, vdata2.16b}, [src], #32
94 adds cntin, cntin, tmp
95 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
96 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
97 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
98 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
99 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
100 addp vend.16b, vend.16b, vend.16b // 128->64
102 // Clear the soff*2 lower bits
106 // The first block can also be the last
108 // Have we found something already?
112 ld1 {vdata1.16b, vdata2.16b}, [src], #32
113 subs cntin, cntin, #32
114 cmeq vhas_chr1.16b, vdata1.16b, vrepchr.16b
115 cmeq vhas_chr2.16b, vdata2.16b, vrepchr.16b
116 // If we're out of data we finish regardless of the result
118 // Use a fast check for the termination condition
119 orr vend.16b, vhas_chr1.16b, vhas_chr2.16b
120 addp vend.2d, vend.2d, vend.2d
122 // We're not out of data, loop if we haven't found the character
126 // Termination condition found, let's calculate the syndrome value
127 and vhas_chr1.16b, vhas_chr1.16b, vrepmask.16b
128 and vhas_chr2.16b, vhas_chr2.16b, vrepmask.16b
129 addp vend.16b, vhas_chr1.16b, vhas_chr2.16b // 256->128
130 addp vend.16b, vend.16b, vend.16b // 128->64
132 // Only do the clear for the last possible block
136 // Clear the (32 - ((cntrem + soff) % 32)) * 2 upper bits
137 add tmp, cntrem, soff
145 // Count the trailing zeros using bit reversing
147 // Compensate the last post-increment
149 // Check that we have found a character
151 // And count the leading zeros
153 // Compute the potential result
154 add result, src, synd, lsr #1
155 // Select result or NULL
156 csel result, xzr, result, eq