]>
Commit | Line | Data |
---|---|---|
353b0b11 FG |
1 | use std::ffi::OsStr; |
2 | ||
3 | pub trait OsStrExt: private::Sealed { | |
4 | /// Converts to a string slice. | |
5 | fn try_str(&self) -> Result<&str, std::str::Utf8Error>; | |
6 | /// Returns `true` if the given pattern matches a sub-slice of | |
7 | /// this string slice. | |
8 | /// | |
9 | /// Returns `false` if it does not. | |
10 | /// | |
11 | /// # Examples | |
12 | /// | |
13 | /// ```rust | |
14 | /// use clap_lex::OsStrExt as _; | |
15 | /// let bananas = std::ffi::OsStr::new("bananas"); | |
16 | /// | |
17 | /// assert!(bananas.contains("nana")); | |
18 | /// assert!(!bananas.contains("apples")); | |
19 | /// ``` | |
20 | fn contains(&self, needle: &str) -> bool; | |
21 | /// Returns the byte index of the first character of this string slice that | |
22 | /// matches the pattern. | |
23 | /// | |
24 | /// Returns [`None`] if the pattern doesn't match. | |
25 | /// | |
26 | /// # Examples | |
27 | /// | |
28 | /// ```rust | |
29 | /// use clap_lex::OsStrExt as _; | |
30 | /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi"); | |
31 | /// | |
32 | /// assert_eq!(s.find("L"), Some(0)); | |
33 | /// assert_eq!(s.find("é"), Some(14)); | |
34 | /// assert_eq!(s.find("par"), Some(17)); | |
35 | /// ``` | |
36 | /// | |
37 | /// Not finding the pattern: | |
38 | /// | |
39 | /// ```rust | |
40 | /// use clap_lex::OsStrExt as _; | |
41 | /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard"); | |
42 | /// | |
43 | /// assert_eq!(s.find("1"), None); | |
44 | /// ``` | |
45 | fn find(&self, needle: &str) -> Option<usize>; | |
46 | /// Returns a string slice with the prefix removed. | |
47 | /// | |
48 | /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped | |
49 | /// in `Some`. | |
50 | /// | |
51 | /// If the string does not start with `prefix`, returns `None`. | |
52 | /// | |
53 | /// # Examples | |
54 | /// | |
55 | /// ``` | |
56 | /// use std::ffi::OsStr; | |
57 | /// use clap_lex::OsStrExt as _; | |
58 | /// assert_eq!(OsStr::new("foo:bar").strip_prefix("foo:"), Some(OsStr::new("bar"))); | |
59 | /// assert_eq!(OsStr::new("foo:bar").strip_prefix("bar"), None); | |
60 | /// assert_eq!(OsStr::new("foofoo").strip_prefix("foo"), Some(OsStr::new("foo"))); | |
61 | /// ``` | |
62 | fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>; | |
63 | /// Returns `true` if the given pattern matches a prefix of this | |
64 | /// string slice. | |
65 | /// | |
66 | /// Returns `false` if it does not. | |
67 | /// | |
68 | /// # Examples | |
69 | /// | |
70 | /// ``` | |
71 | /// use clap_lex::OsStrExt as _; | |
72 | /// let bananas = std::ffi::OsStr::new("bananas"); | |
73 | /// | |
74 | /// assert!(bananas.starts_with("bana")); | |
75 | /// assert!(!bananas.starts_with("nana")); | |
76 | /// ``` | |
77 | fn starts_with(&self, prefix: &str) -> bool; | |
78 | /// An iterator over substrings of this string slice, separated by | |
79 | /// characters matched by a pattern. | |
80 | /// | |
81 | /// # Examples | |
82 | /// | |
83 | /// Simple patterns: | |
84 | /// | |
85 | /// ``` | |
86 | /// use std::ffi::OsStr; | |
87 | /// use clap_lex::OsStrExt as _; | |
88 | /// let v: Vec<_> = OsStr::new("Mary had a little lamb").split(" ").collect(); | |
89 | /// assert_eq!(v, [OsStr::new("Mary"), OsStr::new("had"), OsStr::new("a"), OsStr::new("little"), OsStr::new("lamb")]); | |
90 | /// | |
91 | /// let v: Vec<_> = OsStr::new("").split("X").collect(); | |
92 | /// assert_eq!(v, [OsStr::new("")]); | |
93 | /// | |
94 | /// let v: Vec<_> = OsStr::new("lionXXtigerXleopard").split("X").collect(); | |
95 | /// assert_eq!(v, [OsStr::new("lion"), OsStr::new(""), OsStr::new("tiger"), OsStr::new("leopard")]); | |
96 | /// | |
97 | /// let v: Vec<_> = OsStr::new("lion::tiger::leopard").split("::").collect(); | |
98 | /// assert_eq!(v, [OsStr::new("lion"), OsStr::new("tiger"), OsStr::new("leopard")]); | |
99 | /// ``` | |
100 | /// | |
101 | /// If a string contains multiple contiguous separators, you will end up | |
102 | /// with empty strings in the output: | |
103 | /// | |
104 | /// ``` | |
105 | /// use std::ffi::OsStr; | |
106 | /// use clap_lex::OsStrExt as _; | |
107 | /// let x = OsStr::new("||||a||b|c"); | |
108 | /// let d: Vec<_> = x.split("|").collect(); | |
109 | /// | |
110 | /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]); | |
111 | /// ``` | |
112 | /// | |
113 | /// Contiguous separators are separated by the empty string. | |
114 | /// | |
115 | /// ``` | |
116 | /// use std::ffi::OsStr; | |
117 | /// use clap_lex::OsStrExt as _; | |
118 | /// let x = OsStr::new("(///)"); | |
119 | /// let d: Vec<_> = x.split("/").collect(); | |
120 | /// | |
121 | /// assert_eq!(d, &[OsStr::new("("), OsStr::new(""), OsStr::new(""), OsStr::new(")")]); | |
122 | /// ``` | |
123 | /// | |
124 | /// Separators at the start or end of a string are neighbored | |
125 | /// by empty strings. | |
126 | /// | |
127 | /// ``` | |
128 | /// use std::ffi::OsStr; | |
129 | /// use clap_lex::OsStrExt as _; | |
130 | /// let d: Vec<_> = OsStr::new("010").split("0").collect(); | |
131 | /// assert_eq!(d, &[OsStr::new(""), OsStr::new("1"), OsStr::new("")]); | |
132 | /// ``` | |
133 | /// | |
134 | /// When the empty string is used as a separator, it panics | |
135 | /// | |
136 | /// ```should_panic | |
137 | /// use std::ffi::OsStr; | |
138 | /// use clap_lex::OsStrExt as _; | |
139 | /// let f: Vec<_> = OsStr::new("rust").split("").collect(); | |
140 | /// assert_eq!(f, &[OsStr::new(""), OsStr::new("r"), OsStr::new("u"), OsStr::new("s"), OsStr::new("t"), OsStr::new("")]); | |
141 | /// ``` | |
142 | /// | |
143 | /// Contiguous separators can lead to possibly surprising behavior | |
144 | /// when whitespace is used as the separator. This code is correct: | |
145 | /// | |
146 | /// ``` | |
147 | /// use std::ffi::OsStr; | |
148 | /// use clap_lex::OsStrExt as _; | |
149 | /// let x = OsStr::new(" a b c"); | |
150 | /// let d: Vec<_> = x.split(" ").collect(); | |
151 | /// | |
152 | /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]); | |
153 | /// ``` | |
154 | /// | |
155 | /// It does _not_ give you: | |
156 | /// | |
157 | /// ```,ignore | |
158 | /// assert_eq!(d, &[OsStr::new("a"), OsStr::new("b"), OsStr::new("c")]); | |
159 | /// ``` | |
160 | /// | |
161 | /// Use [`split_whitespace`] for this behavior. | |
162 | /// | |
163 | /// [`split_whitespace`]: str::split_whitespace | |
164 | fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>; | |
353b0b11 FG |
165 | /// Splits the string on the first occurrence of the specified delimiter and |
166 | /// returns prefix before delimiter and suffix after delimiter. | |
167 | /// | |
168 | /// # Examples | |
169 | /// | |
170 | /// ``` | |
171 | /// use std::ffi::OsStr; | |
172 | /// use clap_lex::OsStrExt as _; | |
173 | /// assert_eq!(OsStr::new("cfg").split_once("="), None); | |
174 | /// assert_eq!(OsStr::new("cfg=").split_once("="), Some((OsStr::new("cfg"), OsStr::new("")))); | |
175 | /// assert_eq!(OsStr::new("cfg=foo").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo")))); | |
176 | /// assert_eq!(OsStr::new("cfg=foo=bar").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo=bar")))); | |
177 | /// ``` | |
178 | fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>; | |
179 | } | |
180 | ||
181 | impl OsStrExt for OsStr { | |
182 | fn try_str(&self) -> Result<&str, std::str::Utf8Error> { | |
183 | let bytes = to_bytes(self); | |
184 | std::str::from_utf8(bytes) | |
185 | } | |
186 | ||
187 | fn contains(&self, needle: &str) -> bool { | |
188 | self.find(needle).is_some() | |
189 | } | |
190 | ||
191 | fn find(&self, needle: &str) -> Option<usize> { | |
192 | let bytes = to_bytes(self); | |
193 | (0..=self.len().checked_sub(needle.len())?) | |
194 | .find(|&x| bytes[x..].starts_with(needle.as_bytes())) | |
195 | } | |
196 | ||
197 | fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> { | |
198 | let bytes = to_bytes(self); | |
199 | bytes.strip_prefix(prefix.as_bytes()).map(|s| { | |
200 | // SAFETY: | |
201 | // - This came from `to_bytes` | |
202 | // - Since `prefix` is `&str`, any split will be along UTF-8 boundarie | |
203 | unsafe { to_os_str_unchecked(s) } | |
204 | }) | |
205 | } | |
206 | fn starts_with(&self, prefix: &str) -> bool { | |
207 | let bytes = to_bytes(self); | |
208 | bytes.starts_with(prefix.as_bytes()) | |
209 | } | |
210 | ||
211 | fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> { | |
212 | assert_ne!(needle, ""); | |
213 | Split { | |
214 | haystack: Some(self), | |
215 | needle, | |
216 | } | |
217 | } | |
218 | ||
353b0b11 FG |
219 | fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> { |
220 | let start = self.find(needle)?; | |
221 | let end = start + needle.len(); | |
222 | let haystack = to_bytes(self); | |
223 | let first = &haystack[0..start]; | |
224 | let second = &haystack[end..]; | |
225 | // SAFETY: | |
226 | // - This came from `to_bytes` | |
227 | // - Since `needle` is `&str`, any split will be along UTF-8 boundarie | |
228 | unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) } | |
229 | } | |
230 | } | |
231 | ||
232 | mod private { | |
233 | pub trait Sealed {} | |
234 | ||
235 | impl Sealed for std::ffi::OsStr {} | |
236 | } | |
237 | ||
238 | /// Allow access to raw bytes | |
239 | /// | |
240 | /// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with | |
241 | /// 7-bit ASCII or `&str` | |
242 | /// | |
243 | /// # Compatibility | |
244 | /// | |
245 | /// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate | |
246 | /// (since its dependent on rustc) | |
247 | fn to_bytes(s: &OsStr) -> &[u8] { | |
248 | // SAFETY: | |
249 | // - Lifetimes are the same | |
250 | // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) | |
251 | // - The primary contract is that the encoding for invalid surrogate code points is not | |
252 | // guaranteed which isn't a problem here | |
253 | // | |
254 | // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290) | |
255 | // but its in limbo | |
256 | unsafe { std::mem::transmute(s) } | |
257 | } | |
258 | ||
259 | /// Restore raw bytes as `OsStr` | |
260 | /// | |
261 | /// # Safety | |
262 | /// | |
263 | /// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary | |
264 | /// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries | |
265 | unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr { | |
266 | // SAFETY: | |
267 | // - Lifetimes are the same | |
268 | // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`) | |
269 | // - The primary contract is that the encoding for invalid surrogate code points is not | |
270 | // guaranteed which isn't a problem here | |
271 | // | |
272 | // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290) | |
273 | // but its in limbo | |
274 | std::mem::transmute(s) | |
275 | } | |
276 | ||
277 | pub struct Split<'s, 'n> { | |
278 | haystack: Option<&'s OsStr>, | |
279 | needle: &'n str, | |
280 | } | |
281 | ||
282 | impl<'s, 'n> Iterator for Split<'s, 'n> { | |
283 | type Item = &'s OsStr; | |
284 | ||
285 | fn next(&mut self) -> Option<Self::Item> { | |
286 | let haystack = self.haystack?; | |
287 | match haystack.split_once(self.needle) { | |
288 | Some((first, second)) => { | |
289 | if !haystack.is_empty() { | |
290 | debug_assert_ne!(haystack, second); | |
291 | } | |
292 | self.haystack = Some(second); | |
293 | Some(first) | |
294 | } | |
295 | None => { | |
296 | self.haystack = None; | |
297 | Some(haystack) | |
298 | } | |
299 | } | |
300 | } | |
301 | } | |
302 | ||
303 | /// Split an `OsStr` | |
304 | /// | |
305 | /// # Safety | |
306 | /// | |
307 | /// `index` must be at a valid UTF-8 boundary | |
308 | pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) { | |
309 | let bytes = to_bytes(os); | |
310 | let (first, second) = bytes.split_at(index); | |
311 | (to_os_str_unchecked(first), to_os_str_unchecked(second)) | |
312 | } |