]>
git.proxmox.com Git - cargo.git/blob - vendor/memchr/src/memchr/fallback.rs
1 // This module defines pure Rust platform independent implementations of all
2 // the memchr routines. We do our best to make them fast. Some of them may even
3 // get auto-vectorized.
5 use core
::{cmp, usize}
;
7 #[cfg(target_pointer_width = "16")]
8 const USIZE_BYTES
: usize = 2;
10 #[cfg(target_pointer_width = "32")]
11 const USIZE_BYTES
: usize = 4;
13 #[cfg(target_pointer_width = "64")]
14 const USIZE_BYTES
: usize = 8;
16 // The number of bytes to loop at in one iteration of memchr/memrchr.
17 const LOOP_SIZE
: usize = 2 * USIZE_BYTES
;
19 /// Return `true` if `x` contains any zero byte.
21 /// From *Matters Computational*, J. Arndt
23 /// "The idea is to subtract one from each of the bytes and then look for
24 /// bytes where the borrow propagated all the way to the most significant
27 fn contains_zero_byte(x
: usize) -> bool
{
28 const LO_U64
: u64 = 0x0101010101010101;
29 const HI_U64
: u64 = 0x8080808080808080;
31 const LO_USIZE
: usize = LO_U64
as usize;
32 const HI_USIZE
: usize = HI_U64
as usize;
34 x
.wrapping_sub(LO_USIZE
) & !x
& HI_USIZE
!= 0
37 /// Repeat the given byte into a word size number. That is, every 8 bits
38 /// is equivalent to the given byte. For example, if `b` is `\x4E` or
39 /// `01001110` in binary, then the returned value on a 32-bit system would be:
40 /// `01001110_01001110_01001110_01001110`.
42 fn repeat_byte(b
: u8) -> usize {
43 (b
as usize) * (usize::MAX
/ 255)
46 pub fn memchr(n1
: u8, haystack
: &[u8]) -> Option
<usize> {
47 let vn1
= repeat_byte(n1
);
48 let confirm
= |byte
| byte
== n1
;
49 let loop_size
= cmp
::min(LOOP_SIZE
, haystack
.len());
50 let align
= USIZE_BYTES
- 1;
51 let start_ptr
= haystack
.as_ptr();
52 let mut ptr
= start_ptr
;
55 let end_ptr
= start_ptr
.add(haystack
.len());
56 if haystack
.len() < USIZE_BYTES
{
57 return forward_search(start_ptr
, end_ptr
, ptr
, confirm
);
60 let chunk
= (ptr
as *const usize).read_unaligned();
61 if contains_zero_byte(chunk ^ vn1
) {
62 return forward_search(start_ptr
, end_ptr
, ptr
, confirm
);
65 ptr
= ptr
.add(USIZE_BYTES
- (start_ptr
as usize & align
));
66 debug_assert
!(ptr
> start_ptr
);
67 debug_assert
!(end_ptr
.sub(USIZE_BYTES
) >= start_ptr
);
68 while loop_size
== LOOP_SIZE
&& ptr
<= end_ptr
.sub(loop_size
) {
69 debug_assert_eq
!(0, (ptr
as usize) % USIZE_BYTES
);
71 let a
= *(ptr
as *const usize);
72 let b
= *(ptr
.add(USIZE_BYTES
) as *const usize);
73 let eqa
= contains_zero_byte(a ^ vn1
);
74 let eqb
= contains_zero_byte(b ^ vn1
);
78 ptr
= ptr
.add(LOOP_SIZE
);
80 forward_search(start_ptr
, end_ptr
, ptr
, confirm
)
84 /// Like `memchr`, but searches for two bytes instead of one.
85 pub fn memchr2(n1
: u8, n2
: u8, haystack
: &[u8]) -> Option
<usize> {
86 let vn1
= repeat_byte(n1
);
87 let vn2
= repeat_byte(n2
);
88 let confirm
= |byte
| byte
== n1
|| byte
== n2
;
89 let align
= USIZE_BYTES
- 1;
90 let start_ptr
= haystack
.as_ptr();
91 let mut ptr
= start_ptr
;
94 let end_ptr
= start_ptr
.add(haystack
.len());
95 if haystack
.len() < USIZE_BYTES
{
96 return forward_search(start_ptr
, end_ptr
, ptr
, confirm
);
99 let chunk
= (ptr
as *const usize).read_unaligned();
100 let eq1
= contains_zero_byte(chunk ^ vn1
);
101 let eq2
= contains_zero_byte(chunk ^ vn2
);
103 return forward_search(start_ptr
, end_ptr
, ptr
, confirm
);
106 ptr
= ptr
.add(USIZE_BYTES
- (start_ptr
as usize & align
));
107 debug_assert
!(ptr
> start_ptr
);
108 debug_assert
!(end_ptr
.sub(USIZE_BYTES
) >= start_ptr
);
109 while ptr
<= end_ptr
.sub(USIZE_BYTES
) {
110 debug_assert_eq
!(0, (ptr
as usize) % USIZE_BYTES
);
112 let chunk
= *(ptr
as *const usize);
113 let eq1
= contains_zero_byte(chunk ^ vn1
);
114 let eq2
= contains_zero_byte(chunk ^ vn2
);
118 ptr
= ptr
.add(USIZE_BYTES
);
120 forward_search(start_ptr
, end_ptr
, ptr
, confirm
)
124 /// Like `memchr`, but searches for three bytes instead of one.
125 pub fn memchr3(n1
: u8, n2
: u8, n3
: u8, haystack
: &[u8]) -> Option
<usize> {
126 let vn1
= repeat_byte(n1
);
127 let vn2
= repeat_byte(n2
);
128 let vn3
= repeat_byte(n3
);
129 let confirm
= |byte
| byte
== n1
|| byte
== n2
|| byte
== n3
;
130 let align
= USIZE_BYTES
- 1;
131 let start_ptr
= haystack
.as_ptr();
132 let mut ptr
= start_ptr
;
135 let end_ptr
= start_ptr
.add(haystack
.len());
136 if haystack
.len() < USIZE_BYTES
{
137 return forward_search(start_ptr
, end_ptr
, ptr
, confirm
);
140 let chunk
= (ptr
as *const usize).read_unaligned();
141 let eq1
= contains_zero_byte(chunk ^ vn1
);
142 let eq2
= contains_zero_byte(chunk ^ vn2
);
143 let eq3
= contains_zero_byte(chunk ^ vn3
);
144 if eq1
|| eq2
|| eq3
{
145 return forward_search(start_ptr
, end_ptr
, ptr
, confirm
);
148 ptr
= ptr
.add(USIZE_BYTES
- (start_ptr
as usize & align
));
149 debug_assert
!(ptr
> start_ptr
);
150 debug_assert
!(end_ptr
.sub(USIZE_BYTES
) >= start_ptr
);
151 while ptr
<= end_ptr
.sub(USIZE_BYTES
) {
152 debug_assert_eq
!(0, (ptr
as usize) % USIZE_BYTES
);
154 let chunk
= *(ptr
as *const usize);
155 let eq1
= contains_zero_byte(chunk ^ vn1
);
156 let eq2
= contains_zero_byte(chunk ^ vn2
);
157 let eq3
= contains_zero_byte(chunk ^ vn3
);
158 if eq1
|| eq2
|| eq3
{
161 ptr
= ptr
.add(USIZE_BYTES
);
163 forward_search(start_ptr
, end_ptr
, ptr
, confirm
)
167 /// Return the last index matching the byte `x` in `text`.
168 pub fn memrchr(n1
: u8, haystack
: &[u8]) -> Option
<usize> {
169 let vn1
= repeat_byte(n1
);
170 let confirm
= |byte
| byte
== n1
;
171 let loop_size
= cmp
::min(LOOP_SIZE
, haystack
.len());
172 let align
= USIZE_BYTES
- 1;
173 let start_ptr
= haystack
.as_ptr();
176 let end_ptr
= start_ptr
.add(haystack
.len());
177 let mut ptr
= end_ptr
;
178 if haystack
.len() < USIZE_BYTES
{
179 return reverse_search(start_ptr
, end_ptr
, ptr
, confirm
);
182 let chunk
= (ptr
.sub(USIZE_BYTES
) as *const usize).read_unaligned();
183 if contains_zero_byte(chunk ^ vn1
) {
184 return reverse_search(start_ptr
, end_ptr
, ptr
, confirm
);
187 ptr
= (end_ptr
as usize & !align
) as *const u8;
188 debug_assert
!(start_ptr
<= ptr
&& ptr
<= end_ptr
);
189 while loop_size
== LOOP_SIZE
&& ptr
>= start_ptr
.add(loop_size
) {
190 debug_assert_eq
!(0, (ptr
as usize) % USIZE_BYTES
);
192 let a
= *(ptr
.sub(2 * USIZE_BYTES
) as *const usize);
193 let b
= *(ptr
.sub(1 * USIZE_BYTES
) as *const usize);
194 let eqa
= contains_zero_byte(a ^ vn1
);
195 let eqb
= contains_zero_byte(b ^ vn1
);
199 ptr
= ptr
.sub(loop_size
);
201 reverse_search(start_ptr
, end_ptr
, ptr
, confirm
)
205 /// Like `memrchr`, but searches for two bytes instead of one.
206 pub fn memrchr2(n1
: u8, n2
: u8, haystack
: &[u8]) -> Option
<usize> {
207 let vn1
= repeat_byte(n1
);
208 let vn2
= repeat_byte(n2
);
209 let confirm
= |byte
| byte
== n1
|| byte
== n2
;
210 let align
= USIZE_BYTES
- 1;
211 let start_ptr
= haystack
.as_ptr();
214 let end_ptr
= start_ptr
.add(haystack
.len());
215 let mut ptr
= end_ptr
;
216 if haystack
.len() < USIZE_BYTES
{
217 return reverse_search(start_ptr
, end_ptr
, ptr
, confirm
);
220 let chunk
= (ptr
.sub(USIZE_BYTES
) as *const usize).read_unaligned();
221 let eq1
= contains_zero_byte(chunk ^ vn1
);
222 let eq2
= contains_zero_byte(chunk ^ vn2
);
224 return reverse_search(start_ptr
, end_ptr
, ptr
, confirm
);
227 ptr
= (end_ptr
as usize & !align
) as *const u8;
228 debug_assert
!(start_ptr
<= ptr
&& ptr
<= end_ptr
);
229 while ptr
>= start_ptr
.add(USIZE_BYTES
) {
230 debug_assert_eq
!(0, (ptr
as usize) % USIZE_BYTES
);
232 let chunk
= *(ptr
.sub(USIZE_BYTES
) as *const usize);
233 let eq1
= contains_zero_byte(chunk ^ vn1
);
234 let eq2
= contains_zero_byte(chunk ^ vn2
);
238 ptr
= ptr
.sub(USIZE_BYTES
);
240 reverse_search(start_ptr
, end_ptr
, ptr
, confirm
)
244 /// Like `memrchr`, but searches for three bytes instead of one.
245 pub fn memrchr3(n1
: u8, n2
: u8, n3
: u8, haystack
: &[u8]) -> Option
<usize> {
246 let vn1
= repeat_byte(n1
);
247 let vn2
= repeat_byte(n2
);
248 let vn3
= repeat_byte(n3
);
249 let confirm
= |byte
| byte
== n1
|| byte
== n2
|| byte
== n3
;
250 let align
= USIZE_BYTES
- 1;
251 let start_ptr
= haystack
.as_ptr();
254 let end_ptr
= start_ptr
.add(haystack
.len());
255 let mut ptr
= end_ptr
;
256 if haystack
.len() < USIZE_BYTES
{
257 return reverse_search(start_ptr
, end_ptr
, ptr
, confirm
);
260 let chunk
= (ptr
.sub(USIZE_BYTES
) as *const usize).read_unaligned();
261 let eq1
= contains_zero_byte(chunk ^ vn1
);
262 let eq2
= contains_zero_byte(chunk ^ vn2
);
263 let eq3
= contains_zero_byte(chunk ^ vn3
);
264 if eq1
|| eq2
|| eq3
{
265 return reverse_search(start_ptr
, end_ptr
, ptr
, confirm
);
268 ptr
= (end_ptr
as usize & !align
) as *const u8;
269 debug_assert
!(start_ptr
<= ptr
&& ptr
<= end_ptr
);
270 while ptr
>= start_ptr
.add(USIZE_BYTES
) {
271 debug_assert_eq
!(0, (ptr
as usize) % USIZE_BYTES
);
273 let chunk
= *(ptr
.sub(USIZE_BYTES
) as *const usize);
274 let eq1
= contains_zero_byte(chunk ^ vn1
);
275 let eq2
= contains_zero_byte(chunk ^ vn2
);
276 let eq3
= contains_zero_byte(chunk ^ vn3
);
277 if eq1
|| eq2
|| eq3
{
280 ptr
= ptr
.sub(USIZE_BYTES
);
282 reverse_search(start_ptr
, end_ptr
, ptr
, confirm
)
287 unsafe fn forward_search
<F
: Fn(u8) -> bool
>(
288 start_ptr
: *const u8,
293 debug_assert
!(start_ptr
<= ptr
);
294 debug_assert
!(ptr
<= end_ptr
);
296 while ptr
< end_ptr
{
298 return Some(sub(ptr
, start_ptr
));
306 unsafe fn reverse_search
<F
: Fn(u8) -> bool
>(
307 start_ptr
: *const u8,
312 debug_assert
!(start_ptr
<= ptr
);
313 debug_assert
!(ptr
<= end_ptr
);
315 while ptr
> start_ptr
{
316 ptr
= ptr
.offset(-1);
318 return Some(sub(ptr
, start_ptr
));
324 /// Subtract `b` from `a` and return the difference. `a` should be greater than
326 fn sub(a
: *const u8, b
: *const u8) -> usize {
327 debug_assert
!(a
>= b
);
328 (a
as usize) - (b
as usize)