1 // Original implementation taken from rust-memchr.
2 // Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
4 // ignore-tidy-undocumented-unsafe
9 const LO_U64
: u64 = 0x0101010101010101;
10 const HI_U64
: u64 = 0x8080808080808080;
13 const LO_USIZE
: usize = LO_U64
as usize;
14 const HI_USIZE
: usize = HI_U64
as usize;
16 /// Returns `true` if `x` contains any zero byte.
18 /// From *Matters Computational*, J. Arndt:
20 /// "The idea is to subtract one from each of the bytes and then look for
21 /// bytes where the borrow propagated all the way to the most significant
24 fn contains_zero_byte(x
: usize) -> bool
{
25 x
.wrapping_sub(LO_USIZE
) & !x
& HI_USIZE
!= 0
28 #[cfg(target_pointer_width = "16")]
30 fn repeat_byte(b
: u8) -> usize {
31 (b
as usize) << 8 | b
as usize
34 #[cfg(not(target_pointer_width = "16"))]
36 fn repeat_byte(b
: u8) -> usize {
37 (b
as usize) * (crate::usize::MAX
/ 255)
40 /// Returns the first index matching the byte `x` in `text`.
41 pub fn memchr(x
: u8, text
: &[u8]) -> Option
<usize> {
42 // Scan for a single byte value by reading two `usize` words at a time.
44 // Split `text` in three parts
45 // - unaligned initial part, before the first word aligned address in text
46 // - body, scan by 2 words at a time
47 // - the last remaining part, < 2 word size
49 let ptr
= text
.as_ptr();
50 let usize_bytes
= mem
::size_of
::<usize>();
52 // search up to an aligned boundary
53 let mut offset
= ptr
.align_offset(usize_bytes
);
55 offset
= cmp
::min(offset
, len
);
56 if let Some(index
) = text
[..offset
].iter().position(|elt
| *elt
== x
) {
61 // search the body of the text
62 let repeated_x
= repeat_byte(x
);
64 if len
>= 2 * usize_bytes
{
65 while offset
<= len
- 2 * usize_bytes
{
67 let u
= *(ptr
.add(offset
) as *const usize);
68 let v
= *(ptr
.add(offset
+ usize_bytes
) as *const usize);
70 // break if there is a matching byte
71 let zu
= contains_zero_byte(u ^ repeated_x
);
72 let zv
= contains_zero_byte(v ^ repeated_x
);
77 offset
+= usize_bytes
* 2;
81 // Find the byte after the point the body loop stopped.
82 text
[offset
..].iter().position(|elt
| *elt
== x
).map(|i
| offset
+ i
)
85 /// Returns the last index matching the byte `x` in `text`.
86 pub fn memrchr(x
: u8, text
: &[u8]) -> Option
<usize> {
87 // Scan for a single byte value by reading two `usize` words at a time.
89 // Split `text` in three parts:
90 // - unaligned tail, after the last word aligned address in text,
91 // - body, scanned by 2 words at a time,
92 // - the first remaining bytes, < 2 word size.
94 let ptr
= text
.as_ptr();
97 let (min_aligned_offset
, max_aligned_offset
) = {
98 // We call this just to obtain the length of the prefix and suffix.
99 // In the middle we always process two chunks at once.
100 let (prefix
, _
, suffix
) = unsafe { text.align_to::<(Chunk, Chunk)>() }
;
101 (prefix
.len(), len
- suffix
.len())
104 let mut offset
= max_aligned_offset
;
105 if let Some(index
) = text
[offset
..].iter().rposition(|elt
| *elt
== x
) {
106 return Some(offset
+ index
);
109 // Search the body of the text, make sure we don't cross min_aligned_offset.
110 // offset is always aligned, so just testing `>` is sufficient and avoids possible
112 let repeated_x
= repeat_byte(x
);
113 let chunk_bytes
= mem
::size_of
::<Chunk
>();
115 while offset
> min_aligned_offset
{
117 let u
= *(ptr
.offset(offset
as isize - 2 * chunk_bytes
as isize) as *const Chunk
);
118 let v
= *(ptr
.offset(offset
as isize - chunk_bytes
as isize) as *const Chunk
);
120 // Break if there is a matching byte.
121 let zu
= contains_zero_byte(u ^ repeated_x
);
122 let zv
= contains_zero_byte(v ^ repeated_x
);
127 offset
-= 2 * chunk_bytes
;
130 // Find the byte before the point the body loop stopped.
131 text
[..offset
].iter().rposition(|elt
| *elt
== x
)