1 use crate::automaton
::AlwaysMatch
;
2 use crate::error
::Error
;
3 use crate::raw
::{self, Bound, Builder, Fst, Output, Stream, VERSION}
;
4 use crate::stream
::Streamer
;
6 const TEXT
: &'
static str = include_str
!("./../../data/words-100000");
8 pub fn fst_set
<I
, S
>(ss
: I
) -> Fst
<Vec
<u8>>
10 I
: IntoIterator
<Item
= S
>,
13 let mut bfst
= Builder
::memory();
14 let mut ss
: Vec
<Vec
<u8>> =
15 ss
.into_iter().map(|s
| s
.as_ref().to_vec()).collect();
18 for s
in ss
.iter().into_iter() {
21 let fst
= bfst
.into_fst();
22 assert_eq
!(fst
.len(), ss
.len());
26 pub fn fst_map
<I
, S
>(ss
: I
) -> Fst
<Vec
<u8>>
28 I
: IntoIterator
<Item
= (S
, u64)>,
31 let mut bfst
= Builder
::memory();
32 let mut ss
: Vec
<(Vec
<u8>, u64)> =
33 ss
.into_iter().map(|(s
, o
)| (s
.as_ref().to_vec(), o
)).collect();
36 for (s
, o
) in ss
.into_iter() {
37 bfst
.insert(s
, o
).unwrap();
42 pub fn fst_inputs
<D
: AsRef
<[u8]>>(fst
: &Fst
<D
>) -> Vec
<Vec
<u8>> {
43 let mut words
= vec
![];
44 let mut rdr
= fst
.stream();
45 while let Some((word
, _
)) = rdr
.next() {
46 words
.push(word
.to_vec());
51 pub fn fst_inputs_outputs
<D
: AsRef
<[u8]>>(
53 ) -> Vec
<(Vec
<u8>, u64)> {
54 let mut words
= vec
![];
55 let mut rdr
= fst
.stream();
56 while let Some((word
, out
)) = rdr
.next() {
57 words
.push((word
.to_vec(), out
.value()));
62 macro_rules
! test_set
{
63 ($name
:ident
, $
($s
:expr
),+) => {
66 let mut items
= vec
![$
($s
),*];
67 let fst
= fst_set(&items
);
68 let mut rdr
= fst
.stream();
72 assert_eq
!(rdr
.next().unwrap().0, item
.as_bytes());
74 assert_eq
!(rdr
.next(), None
);
76 assert
!(fst
.get(item
).is_some());
82 macro_rules
! test_set_fail
{
83 ($name
:ident
, $
($s
:expr
),+) => {
87 let mut bfst
= Builder
::memory();
88 $
(bfst
.add($s
).unwrap();)*
93 test_set
!(fst_set_only_empty
, "");
94 test_set
!(fst_set_one
, "a");
95 test_set
!(fst_set_dupe_empty
, "", "");
96 test_set
!(fst_set_dupe1
, "a", "a");
97 test_set
!(fst_set_dupe2
, "a", "b", "b");
98 test_set
!(fst_set_two1
, "a", "b");
99 test_set
!(fst_set_two2
, "a", "ab");
100 test_set
!(fst_set_jan
, "jam", "jbm", "jcm", "jdm", "jem", "jfm", "jgm");
102 test_set_fail
!(fst_set_order1
, "b", "a");
103 test_set_fail
!(fst_set_order2
, "a", "b", "c", "a");
106 fn fst_set_100000() {
107 let words
: Vec
<Vec
<u8>> =
108 TEXT
.lines().map(|s
| s
.as_bytes().to_vec()).collect();
109 let fst
= fst_set(words
.clone());
110 assert_eq
!(words
, fst_inputs(&fst
));
113 fst
.get(word
).is_some(),
114 "failed to find word: {}",
115 std
::str::from_utf8(word
).unwrap()
120 macro_rules
! test_map
{
121 ($name
:ident
, $
($s
:expr
, $o
:expr
),+) => {
124 let fst
= fst_map(vec
![$
(($s
, $o
)),*]);
125 let mut rdr
= fst
.stream();
127 let (s
, o
) = rdr
.next().unwrap();
128 assert_eq
!((s
, o
.value()), ($s
.as_bytes(), $o
));
130 assert_eq
!(rdr
.next(), None
);
132 assert_eq
!(fst
.get($s
.as_bytes()), Some(Output
::new($o
)));
138 macro_rules
! test_map_fail
{
139 ($name
:ident
, $
($s
:expr
, $o
:expr
),+) => {
143 let mut bfst
= Builder
::memory();
144 $
(bfst
.insert($s
, $o
).unwrap();)*
149 test_map
!(fst_map_only_empty1
, "", 0);
150 test_map
!(fst_map_only_empty2
, "", 100);
151 test_map
!(fst_map_only_empty3
, "", 9999999999);
152 test_map
!(fst_map_one1
, "a", 0);
153 test_map
!(fst_map_one2
, "a", 100);
154 test_map
!(fst_map_one3
, "a", 999999999);
155 test_map
!(fst_map_two
, "a", 1, "b", 2);
156 test_map
!(fst_map_many1
, "a", 34786, "ab", 26);
172 test_map
!(fst_map_many3
, "a", 1, "ab", 0, "abc", 0);
174 test_map_fail
!(fst_map_dupe_empty
, "", 0, "", 0);
175 test_map_fail
!(fst_map_dupe1
, "a", 0, "a", 0);
176 test_map_fail
!(fst_map_dupe2
, "a", 0, "b", 0, "b", 0);
177 test_map_fail
!(fst_map_order1
, "b", 0, "a", 0);
178 test_map_fail
!(fst_map_order2
, "a", 0, "b", 0, "c", 0, "a", 0);
181 fn fst_map_100000_increments() {
182 let words
: Vec
<(Vec
<u8>, u64)> = TEXT
185 .map(|(i
, s
)| (s
.as_bytes().to_vec(), i
as u64))
187 let fst
= fst_map(words
.clone());
188 assert_eq
!(words
, fst_inputs_outputs(&fst
));
189 for &(ref word
, out
) in &words
{
190 assert_eq
!(fst
.get(word
), Some(Output
::new(out
)));
195 fn fst_map_100000_lengths() {
196 let words
: Vec
<(Vec
<u8>, u64)> = TEXT
198 .map(|s
| (s
.as_bytes().to_vec(), s
.len() as u64))
200 let fst
= fst_map(words
.clone());
201 assert_eq
!(words
, fst_inputs_outputs(&fst
));
202 for &(ref word
, out
) in &words
{
203 assert_eq
!(fst
.get(word
), Some(Output
::new(out
)));
208 fn invalid_version() {
209 match Fst
::new(vec
![0; 36]) {
210 Err(Error
::Fst(raw
::Error
::Version { got, .. }
)) => assert_eq
!(got
, 0),
211 Err(err
) => panic
!("expected version error, got {:?}", err
),
212 Ok(_
) => panic
!("expected version error, got FST"),
217 fn invalid_version_crate_too_old() {
218 let mut buf
= vec
![0; 36];
219 crate::bytes
::write_u64_le(VERSION
+ 1, &mut buf
);
220 match Fst
::new(buf
) {
221 Err(Error
::Fst(raw
::Error
::Version { got, .. }
)) => {
222 assert_eq
!(got
, VERSION
+ 1);
224 Err(err
) => panic
!("expected version error, got {:?}", err
),
225 Ok(_
) => panic
!("expected version error, got FST"),
230 fn invalid_format() {
231 match Fst
::new(vec
![0; 0]) {
232 Err(Error
::Fst(raw
::Error
::Format { .. }
)) => {}
233 Err(err
) => panic
!("expected format error, got {:?}", err
),
234 Ok(_
) => panic
!("expected format error, got FST"),
240 let fst
= fst_set
::<_
, String
>(vec
![]);
241 let mut rdr
= fst
.stream();
242 assert_eq
!(rdr
.next(), None
);
245 macro_rules
! test_range
{
256 let items
: Vec
<&'
static str> = vec
![$
($s
),*];
257 let items
: Vec
<_
> = items
260 .map(|(i
, k
)| (k
, i
as u64))
262 let fst
= fst_map(items
.clone());
263 let mut rdr
= Stream
::new(fst
.as_ref(), AlwaysMatch
, $min
, $max
);
264 for i
in $imin
..$imax
{
267 (items
[i
].0.as_bytes(), Output
::new(items
[i
].1)),
270 assert_eq
!(rdr
.next(), None
);
277 min
: Bound
::Unbounded
, max
: Bound
::Unbounded
,
283 min
: Bound
::Unbounded
, max
: Bound
::Unbounded
,
290 min
: Bound
::Included(vec
![]), max
: Bound
::Unbounded
,
297 min
: Bound
::Excluded(vec
![]), max
: Bound
::Unbounded
,
304 min
: Bound
::Included(vec
![]), max
: Bound
::Unbounded
,
311 min
: Bound
::Excluded(vec
![]), max
: Bound
::Unbounded
,
318 min
: Bound
::Unbounded
, max
: Bound
::Unbounded
,
325 min
: Bound
::Unbounded
, max
: Bound
::Included(vec
![]),
332 min
: Bound
::Unbounded
, max
: Bound
::Excluded(vec
![]),
339 min
: Bound
::Unbounded
, max
: Bound
::Included(vec
![]),
346 min
: Bound
::Included(vec
![]), max
: Bound
::Included(vec
![]),
353 min
: Bound
::Included(vec
![b'a'
]), max
: Bound
::Included(vec
![b'z'
]),
360 min
: Bound
::Excluded(vec
![b'a'
]), max
: Bound
::Included(vec
![b'y'
]),
367 min
: Bound
::Excluded(vec
![b'a'
]), max
: Bound
::Excluded(vec
![b'y'
]),
374 min
: Bound
::Unbounded
, max
: Bound
::Unbounded
,
381 min
: Bound
::Included(b
"abd".to_vec()), max
: Bound
::Unbounded
,
383 "a", "ab", "abc", "abcd", "abcde"
388 min
: Bound
::Included(b
"abd".to_vec()), max
: Bound
::Unbounded
,
390 "a", "ab", "abc", "abcd", "abcde", "abe"
395 min
: Bound
::Excluded(b
"abd".to_vec()), max
: Bound
::Unbounded
,
397 "a", "ab", "abc", "abcd", "abcde", "abe"
402 min
: Bound
::Included(b
"abd".to_vec()), max
: Bound
::Unbounded
,
404 "a", "ab", "abc", "abcd", "abcde", "xyz"
409 min
: Bound
::Unbounded
, max
: Bound
::Included(b
"abd".to_vec()),
411 "a", "ab", "abc", "abcd", "abcde", "abe"
416 min
: Bound
::Unbounded
, max
: Bound
::Included(b
"abd".to_vec()),
418 "a", "ab", "abc", "abcd", "abcde", "abd"
423 min
: Bound
::Unbounded
, max
: Bound
::Included(b
"abd".to_vec()),
425 "a", "ab", "abc", "abcd", "abcde", "abd", "abdx"
430 min
: Bound
::Unbounded
, max
: Bound
::Excluded(b
"abd".to_vec()),
432 "a", "ab", "abc", "abcd", "abcde", "abe"
437 min
: Bound
::Unbounded
, max
: Bound
::Excluded(b
"abd".to_vec()),
439 "a", "ab", "abc", "abcd", "abcde", "abd"
444 min
: Bound
::Unbounded
, max
: Bound
::Excluded(b
"abd".to_vec()),
446 "a", "ab", "abc", "abcd", "abcde", "abd", "abdx"
451 min
: Bound
::Included(vec
![b'd'
]), max
: Bound
::Included(vec
![b'c'
]),
453 "a", "b", "c", "d", "e", "f"
458 min
: Bound
::Included(vec
![b'c'
]), max
: Bound
::Included(vec
![b'c'
]),
460 "a", "b", "c", "d", "e", "f"
465 min
: Bound
::Excluded(vec
![b'c'
]), max
: Bound
::Excluded(vec
![b'c'
]),
467 "a", "b", "c", "d", "e", "f"
472 min
: Bound
::Included(vec
![b'c'
]), max
: Bound
::Excluded(vec
![b'c'
]),
474 "a", "b", "c", "d", "e", "f"
479 min
: Bound
::Included(vec
![b'c'
]), max
: Bound
::Excluded(vec
![b'd'
]),
481 "a", "b", "c", "d", "e", "f"
485 fn one_vec_multiple_fsts() {
486 let mut bfst1
= Builder
::memory();
487 bfst1
.add(b
"bar").unwrap();
488 bfst1
.add(b
"baz").unwrap();
489 let bytes
= bfst1
.into_inner().unwrap();
490 let fst1_len
= bytes
.len();
492 let mut bfst2
= Builder
::new(bytes
).unwrap();
493 bfst2
.add(b
"bar").unwrap();
494 bfst2
.add(b
"foo").unwrap();
496 let bytes
= bfst2
.into_inner().unwrap();
497 let slice1
= &bytes
[0..fst1_len
];
498 let slice2
= &bytes
[fst1_len
..bytes
.len()];
500 let fst1
= Fst
::new(slice1
).unwrap();
501 let fst2
= Fst
::new(slice2
).unwrap();
503 assert_eq
!(fst_inputs(&fst1
), vec
![b
"bar".to_vec(), b
"baz".to_vec()]);
504 assert_eq
!(fst_inputs(&fst2
), vec
![b
"bar".to_vec(), b
"foo".to_vec()]);
509 let mut bfst1
= Builder
::memory();
510 bfst1
.add(b
"bar").unwrap();
511 bfst1
.add(b
"baz").unwrap();
512 let counted_len
= bfst1
.bytes_written();
513 let bytes
= bfst1
.into_inner().unwrap();
514 let fst1_len
= bytes
.len() as u64;
515 let footer_size
= 28;
516 assert_eq
!(counted_len
+ footer_size
, fst1_len
);
520 fn get_key_simple() {
521 let map
= fst_map(vec
![("abc", 2), ("xyz", 3)]);
522 assert_eq
!(map
.get_key(0), None
);
523 assert_eq
!(map
.get_key(1), None
);
524 assert_eq
!(map
.get_key(2), Some(b
"abc".to_vec()));
525 assert_eq
!(map
.get_key(3), Some(b
"xyz".to_vec()));
526 assert_eq
!(map
.get_key(4), None
);
531 let words
: Vec
<(Vec
<u8>, u64)> = TEXT
534 .map(|(i
, line
)| (line
.as_bytes().to_vec(), i
as u64))
536 let map
= fst_map(words
.clone());
537 for (key
, value
) in words
{
538 assert_eq
!(map
.get_key(value
), Some(key
));
543 fn get_key_words_discontiguous() {
544 let words
: Vec
<(Vec
<u8>, u64)> = TEXT
547 .map(|(i
, line
)| (line
.as_bytes().to_vec(), i
as u64 * 2))
549 let map
= fst_map(words
.clone());
550 for (key
, value
) in words
{
551 assert_eq
!(map
.get_key(value
), Some(key
));
556 fn verify_ok_nonempty() {
557 let words
: Vec
<(Vec
<u8>, u64)> = TEXT
560 .map(|(i
, line
)| (line
.as_bytes().to_vec(), i
as u64 * 2))
562 let map
= fst_map(words
.clone());
563 assert
!(map
.verify().is_ok());
567 fn verify_ok_empty() {
568 let map
= fst_map(Vec
::<(&str, u64)>::new());
569 assert
!(map
.verify().is_ok());
574 let mut b
= Builder
::memory();
575 b
.add(b
"bar").unwrap();
576 b
.add(b
"baz").unwrap();
577 let mut bytes
= b
.into_inner().unwrap();
580 let fst
= Fst
::new(&bytes
).unwrap();
581 assert
!(fst
.verify().is_ok());
586 let fst
= Fst
::new(&bytes
).unwrap();
587 assert
!(fst
.verify().is_err());