]>
Commit | Line | Data |
---|---|---|
f20569fa XL |
1 | extern crate packed_simd; |
2 | ||
3 | #[cfg(not(feature = "runtime-dispatch-simd"))] | |
4 | use core::mem; | |
5 | #[cfg(feature = "runtime-dispatch-simd")] | |
6 | use std::mem; | |
7 | ||
8 | use self::packed_simd::{u8x32, u8x64, FromCast}; | |
9 | ||
10 | const MASK: [u8; 64] = [ | |
11 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
12 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
13 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | |
14 | 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, | |
15 | ]; | |
16 | ||
17 | unsafe fn u8x64_from_offset(slice: &[u8], offset: usize) -> u8x64 { | |
18 | u8x64::from_slice_unaligned_unchecked(slice.get_unchecked(offset..)) | |
19 | } | |
20 | unsafe fn u8x32_from_offset(slice: &[u8], offset: usize) -> u8x32 { | |
21 | u8x32::from_slice_unaligned_unchecked(slice.get_unchecked(offset..)) | |
22 | } | |
23 | ||
24 | fn sum_x64(u8s: &u8x64) -> usize { | |
25 | let mut store = [0; mem::size_of::<u8x64>()]; | |
26 | u8s.write_to_slice_unaligned(&mut store); | |
27 | store.iter().map(|&e| e as usize).sum() | |
28 | } | |
29 | fn sum_x32(u8s: &u8x32) -> usize { | |
30 | let mut store = [0; mem::size_of::<u8x32>()]; | |
31 | u8s.write_to_slice_unaligned(&mut store); | |
32 | store.iter().map(|&e| e as usize).sum() | |
33 | } | |
34 | ||
35 | pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { | |
36 | assert!(haystack.len() >= 32); | |
37 | ||
38 | unsafe { | |
39 | let mut offset = 0; | |
40 | let mut count = 0; | |
41 | ||
42 | let needles_x64 = u8x64::splat(needle); | |
43 | ||
44 | // 16320 | |
45 | while haystack.len() >= offset + 64 * 255 { | |
46 | let mut counts = u8x64::splat(0); | |
47 | for _ in 0..255 { | |
48 | counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64)); | |
49 | offset += 64; | |
50 | } | |
51 | count += sum_x64(&counts); | |
52 | } | |
53 | ||
54 | // 8192 | |
55 | if haystack.len() >= offset + 64 * 128 { | |
56 | let mut counts = u8x64::splat(0); | |
57 | for _ in 0..128 { | |
58 | counts -= u8x64::from_cast(u8x64_from_offset(haystack, offset).eq(needles_x64)); | |
59 | offset += 64; | |
60 | } | |
61 | count += sum_x64(&counts); | |
62 | } | |
63 | ||
64 | let needles_x32 = u8x32::splat(needle); | |
65 | ||
66 | // 32 | |
67 | let mut counts = u8x32::splat(0); | |
68 | for i in 0..(haystack.len() - offset) / 32 { | |
69 | counts -= u8x32::from_cast(u8x32_from_offset(haystack, offset + i * 32).eq(needles_x32)); | |
70 | } | |
71 | if haystack.len() % 32 != 0 { | |
72 | counts -= u8x32::from_cast(u8x32_from_offset(haystack, haystack.len() - 32).eq(needles_x32)) & | |
73 | u8x32_from_offset(&MASK, haystack.len() % 32); | |
74 | } | |
75 | count += sum_x32(&counts); | |
76 | ||
77 | count | |
78 | } | |
79 | } | |
80 | ||
81 | fn is_leading_utf8_byte_x64(u8s: u8x64) -> u8x64 { | |
82 | u8x64::from_cast((u8s & u8x64::splat(0b1100_0000)).ne(u8x64::splat(0b1000_0000))) | |
83 | } | |
84 | ||
85 | fn is_leading_utf8_byte_x32(u8s: u8x32) -> u8x32 { | |
86 | u8x32::from_cast((u8s & u8x32::splat(0b1100_0000)).ne(u8x32::splat(0b1000_0000))) | |
87 | } | |
88 | ||
89 | pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize { | |
90 | assert!(utf8_chars.len() >= 32); | |
91 | ||
92 | unsafe { | |
93 | let mut offset = 0; | |
94 | let mut count = 0; | |
95 | ||
96 | // 16320 | |
97 | while utf8_chars.len() >= offset + 64 * 255 { | |
98 | let mut counts = u8x64::splat(0);; | |
99 | for _ in 0..255 { | |
100 | counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset)); | |
101 | offset += 64; | |
102 | } | |
103 | count += sum_x64(&counts); | |
104 | } | |
105 | ||
106 | // 8192 | |
107 | if utf8_chars.len() >= offset + 64 * 128 { | |
108 | let mut counts = u8x64::splat(0);; | |
109 | for _ in 0..128 { | |
110 | counts -= is_leading_utf8_byte_x64(u8x64_from_offset(utf8_chars, offset)); | |
111 | offset += 64; | |
112 | } | |
113 | count += sum_x64(&counts); | |
114 | } | |
115 | ||
116 | // 32 | |
117 | let mut counts = u8x32::splat(0); | |
118 | for i in 0..(utf8_chars.len() - offset) / 32 { | |
119 | counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, offset + i * 32)); | |
120 | } | |
121 | if utf8_chars.len() % 32 != 0 { | |
122 | counts -= is_leading_utf8_byte_x32(u8x32_from_offset(utf8_chars, utf8_chars.len() - 32)) & | |
123 | u8x32_from_offset(&MASK, utf8_chars.len() % 32); | |
124 | } | |
125 | count += sum_x32(&counts); | |
126 | ||
127 | count | |
128 | } | |
129 | } |