]> git.proxmox.com Git - rustc.git/blob - vendor/fst/src/raw/tests.rs
New upstream version 1.48.0+dfsg1
[rustc.git] / vendor / fst / src / raw / tests.rs
1 use crate::automaton::AlwaysMatch;
2 use crate::error::Error;
3 use crate::raw::{self, Bound, Builder, Fst, Output, Stream, VERSION};
4 use crate::stream::Streamer;
5
6 const TEXT: &'static str = include_str!("./../../data/words-100000");
7
8 pub fn fst_set<I, S>(ss: I) -> Fst<Vec<u8>>
9 where
10 I: IntoIterator<Item = S>,
11 S: AsRef<[u8]>,
12 {
13 let mut bfst = Builder::memory();
14 let mut ss: Vec<Vec<u8>> =
15 ss.into_iter().map(|s| s.as_ref().to_vec()).collect();
16 ss.sort();
17 ss.dedup();
18 for s in ss.iter().into_iter() {
19 bfst.add(s).unwrap();
20 }
21 let fst = bfst.into_fst();
22 assert_eq!(fst.len(), ss.len());
23 fst
24 }
25
26 pub fn fst_map<I, S>(ss: I) -> Fst<Vec<u8>>
27 where
28 I: IntoIterator<Item = (S, u64)>,
29 S: AsRef<[u8]>,
30 {
31 let mut bfst = Builder::memory();
32 let mut ss: Vec<(Vec<u8>, u64)> =
33 ss.into_iter().map(|(s, o)| (s.as_ref().to_vec(), o)).collect();
34 ss.sort();
35 ss.dedup();
36 for (s, o) in ss.into_iter() {
37 bfst.insert(s, o).unwrap();
38 }
39 bfst.into_fst()
40 }
41
42 pub fn fst_inputs<D: AsRef<[u8]>>(fst: &Fst<D>) -> Vec<Vec<u8>> {
43 let mut words = vec![];
44 let mut rdr = fst.stream();
45 while let Some((word, _)) = rdr.next() {
46 words.push(word.to_vec());
47 }
48 words
49 }
50
51 pub fn fst_inputs_outputs<D: AsRef<[u8]>>(
52 fst: &Fst<D>,
53 ) -> Vec<(Vec<u8>, u64)> {
54 let mut words = vec![];
55 let mut rdr = fst.stream();
56 while let Some((word, out)) = rdr.next() {
57 words.push((word.to_vec(), out.value()));
58 }
59 words
60 }
61
62 macro_rules! test_set {
63 ($name:ident, $($s:expr),+) => {
64 #[test]
65 fn $name() {
66 let mut items = vec![$($s),*];
67 let fst = fst_set(&items);
68 let mut rdr = fst.stream();
69 items.sort();
70 items.dedup();
71 for item in &items {
72 assert_eq!(rdr.next().unwrap().0, item.as_bytes());
73 }
74 assert_eq!(rdr.next(), None);
75 for item in &items {
76 assert!(fst.get(item).is_some());
77 }
78 }
79 }
80 }
81
82 macro_rules! test_set_fail {
83 ($name:ident, $($s:expr),+) => {
84 #[test]
85 #[should_panic]
86 fn $name() {
87 let mut bfst = Builder::memory();
88 $(bfst.add($s).unwrap();)*
89 }
90 }
91 }
92
93 test_set!(fst_set_only_empty, "");
94 test_set!(fst_set_one, "a");
95 test_set!(fst_set_dupe_empty, "", "");
96 test_set!(fst_set_dupe1, "a", "a");
97 test_set!(fst_set_dupe2, "a", "b", "b");
98 test_set!(fst_set_two1, "a", "b");
99 test_set!(fst_set_two2, "a", "ab");
100 test_set!(fst_set_jan, "jam", "jbm", "jcm", "jdm", "jem", "jfm", "jgm");
101
102 test_set_fail!(fst_set_order1, "b", "a");
103 test_set_fail!(fst_set_order2, "a", "b", "c", "a");
104
105 #[test]
106 fn fst_set_100000() {
107 let words: Vec<Vec<u8>> =
108 TEXT.lines().map(|s| s.as_bytes().to_vec()).collect();
109 let fst = fst_set(words.clone());
110 assert_eq!(words, fst_inputs(&fst));
111 for word in &words {
112 assert!(
113 fst.get(word).is_some(),
114 "failed to find word: {}",
115 std::str::from_utf8(word).unwrap()
116 );
117 }
118 }
119
120 macro_rules! test_map {
121 ($name:ident, $($s:expr, $o:expr),+) => {
122 #[test]
123 fn $name() {
124 let fst = fst_map(vec![$(($s, $o)),*]);
125 let mut rdr = fst.stream();
126 $({
127 let (s, o) = rdr.next().unwrap();
128 assert_eq!((s, o.value()), ($s.as_bytes(), $o));
129 })*
130 assert_eq!(rdr.next(), None);
131 $({
132 assert_eq!(fst.get($s.as_bytes()), Some(Output::new($o)));
133 })*
134 }
135 }
136 }
137
138 macro_rules! test_map_fail {
139 ($name:ident, $($s:expr, $o:expr),+) => {
140 #[test]
141 #[should_panic]
142 fn $name() {
143 let mut bfst = Builder::memory();
144 $(bfst.insert($s, $o).unwrap();)*
145 }
146 }
147 }
148
149 test_map!(fst_map_only_empty1, "", 0);
150 test_map!(fst_map_only_empty2, "", 100);
151 test_map!(fst_map_only_empty3, "", 9999999999);
152 test_map!(fst_map_one1, "a", 0);
153 test_map!(fst_map_one2, "a", 100);
154 test_map!(fst_map_one3, "a", 999999999);
155 test_map!(fst_map_two, "a", 1, "b", 2);
156 test_map!(fst_map_many1, "a", 34786, "ab", 26);
157 test_map!(
158 fst_map_many2,
159 "a",
160 34786,
161 "ab",
162 26,
163 "abc",
164 58976,
165 "abcd",
166 25,
167 "z",
168 58,
169 "zabc",
170 6798
171 );
172 test_map!(fst_map_many3, "a", 1, "ab", 0, "abc", 0);
173
174 test_map_fail!(fst_map_dupe_empty, "", 0, "", 0);
175 test_map_fail!(fst_map_dupe1, "a", 0, "a", 0);
176 test_map_fail!(fst_map_dupe2, "a", 0, "b", 0, "b", 0);
177 test_map_fail!(fst_map_order1, "b", 0, "a", 0);
178 test_map_fail!(fst_map_order2, "a", 0, "b", 0, "c", 0, "a", 0);
179
180 #[test]
181 fn fst_map_100000_increments() {
182 let words: Vec<(Vec<u8>, u64)> = TEXT
183 .lines()
184 .enumerate()
185 .map(|(i, s)| (s.as_bytes().to_vec(), i as u64))
186 .collect();
187 let fst = fst_map(words.clone());
188 assert_eq!(words, fst_inputs_outputs(&fst));
189 for &(ref word, out) in &words {
190 assert_eq!(fst.get(word), Some(Output::new(out)));
191 }
192 }
193
194 #[test]
195 fn fst_map_100000_lengths() {
196 let words: Vec<(Vec<u8>, u64)> = TEXT
197 .lines()
198 .map(|s| (s.as_bytes().to_vec(), s.len() as u64))
199 .collect();
200 let fst = fst_map(words.clone());
201 assert_eq!(words, fst_inputs_outputs(&fst));
202 for &(ref word, out) in &words {
203 assert_eq!(fst.get(word), Some(Output::new(out)));
204 }
205 }
206
207 #[test]
208 fn invalid_version() {
209 match Fst::new(vec![0; 36]) {
210 Err(Error::Fst(raw::Error::Version { got, .. })) => assert_eq!(got, 0),
211 Err(err) => panic!("expected version error, got {:?}", err),
212 Ok(_) => panic!("expected version error, got FST"),
213 }
214 }
215
216 #[test]
217 fn invalid_version_crate_too_old() {
218 let mut buf = vec![0; 36];
219 crate::bytes::write_u64_le(VERSION + 1, &mut buf);
220 match Fst::new(buf) {
221 Err(Error::Fst(raw::Error::Version { got, .. })) => {
222 assert_eq!(got, VERSION + 1);
223 }
224 Err(err) => panic!("expected version error, got {:?}", err),
225 Ok(_) => panic!("expected version error, got FST"),
226 }
227 }
228
229 #[test]
230 fn invalid_format() {
231 match Fst::new(vec![0; 0]) {
232 Err(Error::Fst(raw::Error::Format { .. })) => {}
233 Err(err) => panic!("expected format error, got {:?}", err),
234 Ok(_) => panic!("expected format error, got FST"),
235 }
236 }
237
238 #[test]
239 fn fst_set_zero() {
240 let fst = fst_set::<_, String>(vec![]);
241 let mut rdr = fst.stream();
242 assert_eq!(rdr.next(), None);
243 }
244
245 macro_rules! test_range {
246 (
247 $name:ident,
248 min: $min:expr,
249 max: $max:expr,
250 imin: $imin:expr,
251 imax: $imax:expr,
252 $($s:expr),*
253 ) => {
254 #[test]
255 fn $name() {
256 let items: Vec<&'static str> = vec![$($s),*];
257 let items: Vec<_> = items
258 .into_iter()
259 .enumerate()
260 .map(|(i, k)| (k, i as u64))
261 .collect();
262 let fst = fst_map(items.clone());
263 let mut rdr = Stream::new(fst.as_ref(), AlwaysMatch, $min, $max);
264 for i in $imin..$imax {
265 assert_eq!(
266 rdr.next().unwrap(),
267 (items[i].0.as_bytes(), Output::new(items[i].1)),
268 );
269 }
270 assert_eq!(rdr.next(), None);
271 }
272 }
273 }
274
275 test_range! {
276 fst_range_empty_1,
277 min: Bound::Unbounded, max: Bound::Unbounded,
278 imin: 0, imax: 0,
279 }
280
281 test_range! {
282 fst_range_empty_2,
283 min: Bound::Unbounded, max: Bound::Unbounded,
284 imin: 0, imax: 1,
285 ""
286 }
287
288 test_range! {
289 fst_range_empty_3,
290 min: Bound::Included(vec![]), max: Bound::Unbounded,
291 imin: 0, imax: 1,
292 ""
293 }
294
295 test_range! {
296 fst_range_empty_4,
297 min: Bound::Excluded(vec![]), max: Bound::Unbounded,
298 imin: 0, imax: 0,
299 ""
300 }
301
302 test_range! {
303 fst_range_empty_5,
304 min: Bound::Included(vec![]), max: Bound::Unbounded,
305 imin: 0, imax: 2,
306 "", "a"
307 }
308
309 test_range! {
310 fst_range_empty_6,
311 min: Bound::Excluded(vec![]), max: Bound::Unbounded,
312 imin: 1, imax: 2,
313 "", "a"
314 }
315
316 test_range! {
317 fst_range_empty_7,
318 min: Bound::Unbounded, max: Bound::Unbounded,
319 imin: 0, imax: 2,
320 "", "a"
321 }
322
323 test_range! {
324 fst_range_empty_8,
325 min: Bound::Unbounded, max: Bound::Included(vec![]),
326 imin: 0, imax: 1,
327 ""
328 }
329
330 test_range! {
331 fst_range_empty_9,
332 min: Bound::Unbounded, max: Bound::Excluded(vec![]),
333 imin: 0, imax: 0,
334 ""
335 }
336
337 test_range! {
338 fst_range_empty_10,
339 min: Bound::Unbounded, max: Bound::Included(vec![]),
340 imin: 0, imax: 1,
341 "", "a"
342 }
343
344 test_range! {
345 fst_range_empty_11,
346 min: Bound::Included(vec![]), max: Bound::Included(vec![]),
347 imin: 0, imax: 1,
348 ""
349 }
350
351 test_range! {
352 fst_range_1,
353 min: Bound::Included(vec![b'a']), max: Bound::Included(vec![b'z']),
354 imin: 0, imax: 4,
355 "a", "b", "y", "z"
356 }
357
358 test_range! {
359 fst_range_2,
360 min: Bound::Excluded(vec![b'a']), max: Bound::Included(vec![b'y']),
361 imin: 1, imax: 3,
362 "a", "b", "y", "z"
363 }
364
365 test_range! {
366 fst_range_3,
367 min: Bound::Excluded(vec![b'a']), max: Bound::Excluded(vec![b'y']),
368 imin: 1, imax: 2,
369 "a", "b", "y", "z"
370 }
371
372 test_range! {
373 fst_range_4,
374 min: Bound::Unbounded, max: Bound::Unbounded,
375 imin: 0, imax: 4,
376 "a", "b", "y", "z"
377 }
378
379 test_range! {
380 fst_range_5,
381 min: Bound::Included(b"abd".to_vec()), max: Bound::Unbounded,
382 imin: 0, imax: 0,
383 "a", "ab", "abc", "abcd", "abcde"
384 }
385
386 test_range! {
387 fst_range_6,
388 min: Bound::Included(b"abd".to_vec()), max: Bound::Unbounded,
389 imin: 5, imax: 6,
390 "a", "ab", "abc", "abcd", "abcde", "abe"
391 }
392
393 test_range! {
394 fst_range_7,
395 min: Bound::Excluded(b"abd".to_vec()), max: Bound::Unbounded,
396 imin: 5, imax: 6,
397 "a", "ab", "abc", "abcd", "abcde", "abe"
398 }
399
400 test_range! {
401 fst_range_8,
402 min: Bound::Included(b"abd".to_vec()), max: Bound::Unbounded,
403 imin: 5, imax: 6,
404 "a", "ab", "abc", "abcd", "abcde", "xyz"
405 }
406
407 test_range! {
408 fst_range_9,
409 min: Bound::Unbounded, max: Bound::Included(b"abd".to_vec()),
410 imin: 0, imax: 5,
411 "a", "ab", "abc", "abcd", "abcde", "abe"
412 }
413
414 test_range! {
415 fst_range_10,
416 min: Bound::Unbounded, max: Bound::Included(b"abd".to_vec()),
417 imin: 0, imax: 6,
418 "a", "ab", "abc", "abcd", "abcde", "abd"
419 }
420
421 test_range! {
422 fst_range_11,
423 min: Bound::Unbounded, max: Bound::Included(b"abd".to_vec()),
424 imin: 0, imax: 6,
425 "a", "ab", "abc", "abcd", "abcde", "abd", "abdx"
426 }
427
428 test_range! {
429 fst_range_12,
430 min: Bound::Unbounded, max: Bound::Excluded(b"abd".to_vec()),
431 imin: 0, imax: 5,
432 "a", "ab", "abc", "abcd", "abcde", "abe"
433 }
434
435 test_range! {
436 fst_range_13,
437 min: Bound::Unbounded, max: Bound::Excluded(b"abd".to_vec()),
438 imin: 0, imax: 5,
439 "a", "ab", "abc", "abcd", "abcde", "abd"
440 }
441
442 test_range! {
443 fst_range_14,
444 min: Bound::Unbounded, max: Bound::Excluded(b"abd".to_vec()),
445 imin: 0, imax: 5,
446 "a", "ab", "abc", "abcd", "abcde", "abd", "abdx"
447 }
448
449 test_range! {
450 fst_range_15,
451 min: Bound::Included(vec![b'd']), max: Bound::Included(vec![b'c']),
452 imin: 0, imax: 0,
453 "a", "b", "c", "d", "e", "f"
454 }
455
456 test_range! {
457 fst_range_16,
458 min: Bound::Included(vec![b'c']), max: Bound::Included(vec![b'c']),
459 imin: 2, imax: 3,
460 "a", "b", "c", "d", "e", "f"
461 }
462
463 test_range! {
464 fst_range_17,
465 min: Bound::Excluded(vec![b'c']), max: Bound::Excluded(vec![b'c']),
466 imin: 0, imax: 0,
467 "a", "b", "c", "d", "e", "f"
468 }
469
470 test_range! {
471 fst_range_18,
472 min: Bound::Included(vec![b'c']), max: Bound::Excluded(vec![b'c']),
473 imin: 0, imax: 0,
474 "a", "b", "c", "d", "e", "f"
475 }
476
477 test_range! {
478 fst_range_19,
479 min: Bound::Included(vec![b'c']), max: Bound::Excluded(vec![b'd']),
480 imin: 2, imax: 3,
481 "a", "b", "c", "d", "e", "f"
482 }
483
484 #[test]
485 fn one_vec_multiple_fsts() {
486 let mut bfst1 = Builder::memory();
487 bfst1.add(b"bar").unwrap();
488 bfst1.add(b"baz").unwrap();
489 let bytes = bfst1.into_inner().unwrap();
490 let fst1_len = bytes.len();
491
492 let mut bfst2 = Builder::new(bytes).unwrap();
493 bfst2.add(b"bar").unwrap();
494 bfst2.add(b"foo").unwrap();
495
496 let bytes = bfst2.into_inner().unwrap();
497 let slice1 = &bytes[0..fst1_len];
498 let slice2 = &bytes[fst1_len..bytes.len()];
499
500 let fst1 = Fst::new(slice1).unwrap();
501 let fst2 = Fst::new(slice2).unwrap();
502
503 assert_eq!(fst_inputs(&fst1), vec![b"bar".to_vec(), b"baz".to_vec()]);
504 assert_eq!(fst_inputs(&fst2), vec![b"bar".to_vec(), b"foo".to_vec()]);
505 }
506
507 #[test]
508 fn bytes_written() {
509 let mut bfst1 = Builder::memory();
510 bfst1.add(b"bar").unwrap();
511 bfst1.add(b"baz").unwrap();
512 let counted_len = bfst1.bytes_written();
513 let bytes = bfst1.into_inner().unwrap();
514 let fst1_len = bytes.len() as u64;
515 let footer_size = 28;
516 assert_eq!(counted_len + footer_size, fst1_len);
517 }
518
519 #[test]
520 fn get_key_simple() {
521 let map = fst_map(vec![("abc", 2), ("xyz", 3)]);
522 assert_eq!(map.get_key(0), None);
523 assert_eq!(map.get_key(1), None);
524 assert_eq!(map.get_key(2), Some(b"abc".to_vec()));
525 assert_eq!(map.get_key(3), Some(b"xyz".to_vec()));
526 assert_eq!(map.get_key(4), None);
527 }
528
529 #[test]
530 fn get_key_words() {
531 let words: Vec<(Vec<u8>, u64)> = TEXT
532 .lines()
533 .enumerate()
534 .map(|(i, line)| (line.as_bytes().to_vec(), i as u64))
535 .collect();
536 let map = fst_map(words.clone());
537 for (key, value) in words {
538 assert_eq!(map.get_key(value), Some(key));
539 }
540 }
541
542 #[test]
543 fn get_key_words_discontiguous() {
544 let words: Vec<(Vec<u8>, u64)> = TEXT
545 .lines()
546 .enumerate()
547 .map(|(i, line)| (line.as_bytes().to_vec(), i as u64 * 2))
548 .collect();
549 let map = fst_map(words.clone());
550 for (key, value) in words {
551 assert_eq!(map.get_key(value), Some(key));
552 }
553 }
554
555 #[test]
556 fn verify_ok_nonempty() {
557 let words: Vec<(Vec<u8>, u64)> = TEXT
558 .lines()
559 .enumerate()
560 .map(|(i, line)| (line.as_bytes().to_vec(), i as u64 * 2))
561 .collect();
562 let map = fst_map(words.clone());
563 assert!(map.verify().is_ok());
564 }
565
566 #[test]
567 fn verify_ok_empty() {
568 let map = fst_map(Vec::<(&str, u64)>::new());
569 assert!(map.verify().is_ok());
570 }
571
572 #[test]
573 fn verify_err() {
574 let mut b = Builder::memory();
575 b.add(b"bar").unwrap();
576 b.add(b"baz").unwrap();
577 let mut bytes = b.into_inner().unwrap();
578
579 {
580 let fst = Fst::new(&bytes).unwrap();
581 assert!(fst.verify().is_ok());
582 }
583
584 bytes[17] = b'\xFF';
585 {
586 let fst = Fst::new(&bytes).unwrap();
587 assert!(fst.verify().is_err());
588 }
589 }