src/libstd/sys_common/memchr.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10 //
  11 // Original implementation taken from rust-memchr
  12 // Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
  13
  14 #[allow(dead_code)]
  15 pub mod fallback {
  16     use cmp;
  17     use mem;
  18
  19     const LO_U64: u64 = 0x0101010101010101;
  20     const HI_U64: u64 = 0x8080808080808080;
  21
  22     // use truncation
  23     const LO_USIZE: usize = LO_U64 as usize;
  24     const HI_USIZE: usize = HI_U64 as usize;
  25
  26     /// Return `true` if `x` contains any zero byte.
  27     ///
  28     /// From *Matters Computational*, J. Arndt
  29     ///
  30     /// "The idea is to subtract one from each of the bytes and then look for
  31     /// bytes where the borrow propagated all the way to the most significant
  32     /// bit."
  33     #[inline]
  34     fn contains_zero_byte(x: usize) -> bool {
  35         x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
  36     }
  37
  38     #[cfg(target_pointer_width = "32")]
  39     #[inline]
  40     fn repeat_byte(b: u8) -> usize {
  41         let mut rep = (b as usize) << 8 | b as usize;
  42         rep = rep << 16 | rep;
  43         rep
  44     }
  45
  46     #[cfg(target_pointer_width = "64")]
  47     #[inline]
  48     fn repeat_byte(b: u8) -> usize {
  49         let mut rep = (b as usize) << 8 | b as usize;
  50         rep = rep << 16 | rep;
  51         rep = rep << 32 | rep;
  52         rep
  53     }
  54
  55     /// Return the first index matching the byte `a` in `text`.
  56     pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
  57         // Scan for a single byte value by reading two `usize` words at a time.
  58         //
  59         // Split `text` in three parts
  60         // - unaligned initial part, before the first word aligned address in text
  61         // - body, scan by 2 words at a time
  62         // - the last remaining part, < 2 word size
  63         let len = text.len();
  64         let ptr = text.as_ptr();
  65         let usize_bytes = mem::size_of::<usize>();
  66
  67         // search up to an aligned boundary
  68         let align = (ptr as usize) & (usize_bytes- 1);
  69         let mut offset;
  70         if align > 0 {
  71             offset = cmp::min(usize_bytes - align, len);
  72             if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
  73                 return Some(index);
  74             }
  75         } else {
  76             offset = 0;
  77         }
  78
  79         // search the body of the text
  80         let repeated_x = repeat_byte(x);
  81
  82         if len >= 2 * usize_bytes {
  83             while offset <= len - 2 * usize_bytes {
  84                 unsafe {
  85                     let u = *(ptr.offset(offset as isize) as *const usize);
  86                     let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
  87
  88                     // break if there is a matching byte
  89                     let zu = contains_zero_byte(u ^ repeated_x);
  90                     let zv = contains_zero_byte(v ^ repeated_x);
  91                     if zu || zv {
  92                         break;
  93                     }
  94                 }
  95                 offset += usize_bytes * 2;
  96             }
  97         }
  98
  99         // find the byte after the point the body loop stopped
 100         text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i)
 101     }
 102
 103     /// Return the last index matching the byte `a` in `text`.
 104     pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> {
 105         // Scan for a single byte value by reading two `usize` words at a time.
 106         //
 107         // Split `text` in three parts
 108         // - unaligned tail, after the last word aligned address in text
 109         // - body, scan by 2 words at a time
 110         // - the first remaining bytes, < 2 word size
 111         let len = text.len();
 112         let ptr = text.as_ptr();
 113         let usize_bytes = mem::size_of::<usize>();
 114
 115         // search to an aligned boundary
 116         let end_align = (ptr as usize + len) & (usize_bytes - 1);
 117         let mut offset;
 118         if end_align > 0 {
 119             offset = if end_align >= len { 0 } else { len - end_align };
 120             if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) {
 121                 return Some(offset + index);
 122             }
 123         } else {
 124             offset = len;
 125         }
 126
 127         // search the body of the text
 128         let repeated_x = repeat_byte(x);
 129
 130         while offset >= 2 * usize_bytes {
 131             unsafe {
 132                 let u = *(ptr.offset(offset as isize - 2 * usize_bytes as isize) as *const usize);
 133                 let v = *(ptr.offset(offset as isize - usize_bytes as isize) as *const usize);
 134
 135                 // break if there is a matching byte
 136                 let zu = contains_zero_byte(u ^ repeated_x);
 137                 let zv = contains_zero_byte(v ^ repeated_x);
 138                 if zu || zv {
 139                     break;
 140                 }
 141             }
 142             offset -= 2 * usize_bytes;
 143         }
 144
 145         // find the byte before the point the body loop stopped
 146         text[..offset].iter().rposition(|elt| *elt == x)
 147     }
 148
 149     // test fallback implementations on all platforms
 150     #[test]
 151     fn matches_one() {
 152         assert_eq!(Some(0), memchr(b'a', b"a"));
 153     }
 154
 155     #[test]
 156     fn matches_begin() {
 157         assert_eq!(Some(0), memchr(b'a', b"aaaa"));
 158     }
 159
 160     #[test]
 161     fn matches_end() {
 162         assert_eq!(Some(4), memchr(b'z', b"aaaaz"));
 163     }
 164
 165     #[test]
 166     fn matches_nul() {
 167         assert_eq!(Some(4), memchr(b'\x00', b"aaaa\x00"));
 168     }
 169
 170     #[test]
 171     fn matches_past_nul() {
 172         assert_eq!(Some(5), memchr(b'z', b"aaaa\x00z"));
 173     }
 174
 175     #[test]
 176     fn no_match_empty() {
 177         assert_eq!(None, memchr(b'a', b""));
 178     }
 179
 180     #[test]
 181     fn no_match() {
 182         assert_eq!(None, memchr(b'a', b"xyz"));
 183     }
 184
 185     #[test]
 186     fn matches_one_reversed() {
 187         assert_eq!(Some(0), memrchr(b'a', b"a"));
 188     }
 189
 190     #[test]
 191     fn matches_begin_reversed() {
 192         assert_eq!(Some(3), memrchr(b'a', b"aaaa"));
 193     }
 194
 195     #[test]
 196     fn matches_end_reversed() {
 197         assert_eq!(Some(0), memrchr(b'z', b"zaaaa"));
 198     }
 199
 200     #[test]
 201     fn matches_nul_reversed() {
 202         assert_eq!(Some(4), memrchr(b'\x00', b"aaaa\x00"));
 203     }
 204
 205     #[test]
 206     fn matches_past_nul_reversed() {
 207         assert_eq!(Some(0), memrchr(b'z', b"z\x00aaaa"));
 208     }
 209
 210     #[test]
 211     fn no_match_empty_reversed() {
 212         assert_eq!(None, memrchr(b'a', b""));
 213     }
 214
 215     #[test]
 216     fn no_match_reversed() {
 217         assert_eq!(None, memrchr(b'a', b"xyz"));
 218     }
 219
 220     #[test]
 221     fn each_alignment_reversed() {
 222         let mut data = [1u8; 64];
 223         let needle = 2;
 224         let pos = 40;
 225         data[pos] = needle;
 226         for start in 0..16 {
 227             assert_eq!(Some(pos - start), memrchr(needle, &data[start..]));
 228         }
 229     }
 230 }