]> git.proxmox.com Git - rustc.git/blob - vendor/encode_unicode/tests/iterators.rs
New upstream version 1.46.0~beta.2+dfsg1
[rustc.git] / vendor / encode_unicode / tests / iterators.rs
1 /* Copyright 2018 The encode_unicode Developers
2 *
3 * Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4 * http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5 * http://opensource.org/licenses/MIT>, at your option. This file may not be
6 * copied, modified, or distributed except according to those terms.
7 */
8
9 //! Iterator tests
10
11 #![cfg(feature="std")]
12
13 extern crate encode_unicode;
14
15 use encode_unicode::{IterExt, SliceExt, CharExt};
16 use encode_unicode::iterator::Utf8CharSplitter;
17 use encode_unicode::error::InvalidUtf8Slice::*;
18 use encode_unicode::error::InvalidUtf8::*;
19 use encode_unicode::error::InvalidUtf8FirstByte::*;
20 use encode_unicode::error::InvalidCodepoint::*;
21 use encode_unicode::error::Utf16PairError::*;
22 use std::io::Read;
23 use std::cmp::min;
24
25 #[test] fn utf8charmerger() {
26 let slice = b"\xf0\xa1\x92X\xcc\xbb";
27 let mut iter = slice.iter().to_utf8chars();
28 assert_eq!(iter.size_hint(), (1, Some(6)));
29 assert_eq!(format!("{:?}", &iter),
30 format!("Utf8CharMerger {{ buffered: [], inner: {:?} }}", slice.iter()));
31
32 assert_eq!(iter.next(), Some(Err(Utf8(NotAContinuationByte(3)))));
33 assert_eq!(iter.size_hint(), (0, Some(5)));
34 assert_eq!(
35 format!("{:?}", &iter),
36 format!("Utf8CharMerger {{ buffered: [161, 146, 88], inner: {:?} }}", slice[4..].iter())
37 );
38
39 assert_eq!(iter.next(), Some(Err(Utf8(FirstByte(ContinuationByte)))));
40 assert_eq!(iter.into_inner().next(), Some(&b'\xcc'));
41 }
42
43 #[test] fn utf8chardecoder() {
44 let slice = b"\xf4\xbf\x80\x80XY\xcc\xbbZ_";
45 let mut iter = slice.utf8char_indices();
46 assert_eq!(iter.size_hint(), (2, Some(10)));
47 assert_eq!(
48 format!("{:?}", &iter),
49 format!("Utf8CharDecoder {{ bytes[0..]: {:?} }}", &slice)
50 );
51
52 assert_eq!(iter.next(), Some((0, Err(Codepoint(TooHigh)), 1)));
53 assert_eq!(
54 format!("{:?}", &iter),
55 format!("Utf8CharDecoder {{ bytes[1..]: {:?} }}", &slice[1..])
56 );
57 assert_eq!(iter.size_hint(), (2, Some(9)));
58 assert_eq!(iter.count(), 8);
59 }
60
61 #[test] fn utf16charmerger() {
62 let slice = [0xd800, 'x' as u16, 0xd900, 0xdfff, 'λ' as u16];
63 let mut iter = slice.iter().to_utf16chars();
64 assert_eq!(iter.size_hint(), (2, Some(5)));
65 assert_eq!(format!("{:?}", &iter),
66 format!("Utf16CharMerger {{ buffered: None, inner: {:?} }}", slice.iter()));
67
68 assert_eq!(iter.next(), Some(Err(UnmatchedLeadingSurrogate)));
69 assert_eq!(iter.size_hint(), (1, Some(4)));
70 assert_eq!(
71 format!("{:?}", &iter),
72 format!("Utf16CharMerger {{ buffered: Some(120), inner: {:?} }}", slice[2..].iter())
73 );
74
75 assert_eq!(iter.into_inner().next(), Some(&0xd900));
76 }
77
78 #[test] fn utf16chardecoder() {
79 let slice = [0xd800, 'x' as u16, 0xd900, 0xdfff, 'λ' as u16];
80 let mut iter = slice.utf16char_indices();
81 assert_eq!(iter.size_hint(), (2, Some(5)));
82 assert_eq!(
83 format!("{:?}", &iter),
84 format!("Utf16CharDecoder {{ units[0..]: {:?} }}", &slice)
85 );
86
87 assert_eq!(iter.next(), Some((0, Err(UnmatchedLeadingSurrogate), 1)));
88 assert_eq!(
89 format!("{:?}", &iter),
90 format!("Utf16CharDecoder {{ units[1..]: {:?} }}", &slice[1..])
91 );
92 assert_eq!(iter.size_hint(), (2, Some(4)));
93 assert_eq!(iter.count(), 3);
94 }
95
96
97
98 /// Tests for ensuring that iterators which also implement Read support
99 /// interleaving calls of `read()` and `next()`, and that they implement Read
100 /// correctly (support any buffer size at any time).
101
102 #[test] fn read_single_ascii() {
103 let uc = 'a'.to_utf8();
104 assert_eq!(uc.len(), 1);
105 for chunk in 1..5 {
106 let mut buf = [b'E'; 6];
107 let mut iter = uc.into_iter();
108 let mut written = 0;
109 for _ in 0..4 {
110 assert_eq!(iter.read(&mut buf[..0]).unwrap(), 0);
111 let wrote = iter.read(&mut buf[written..written+chunk]).unwrap();
112 assert_eq!(wrote, min(1-written, chunk));
113 written += wrote;
114 for &b in &buf[written..] {assert_eq!(b, b'E');}
115 assert_eq!(buf[..written], AsRef::<[u8]>::as_ref(&uc)[..written]);
116 }
117 assert_eq!(written, 1);
118 }
119 }
120
121 #[test] fn read_single_nonascii() {
122 let uc = 'ä'.to_utf8();
123 assert_eq!(uc.len(), 2);
124 for chunk in 1..5 {
125 let mut buf = [b'E'; 6];
126 let mut iter = uc.into_iter();
127 let mut written = 0;
128 for _ in 0..4 {
129 assert_eq!(iter.read(&mut buf[..0]).unwrap(), 0);
130 let wrote = iter.read(&mut buf[written..written+chunk]).unwrap();
131 assert_eq!(wrote, min(2-written, chunk));
132 written += wrote;
133 for &b in &buf[written..] {assert_eq!(b, b'E');}
134 assert_eq!(buf[..written], AsRef::<[u8]>::as_ref(&uc)[..written]);
135 }
136 assert_eq!(written, 2);
137 }
138 }
139
140
141 #[test] fn utf8charsplitter_read_all_sizes() {
142 let s = "1111\u{104444}\u{222}1\u{833}1111\u{100004}";
143 assert!(s.len()%3 == 1);
144 let mut buf = vec![b'E'; s.len()+6];
145 for size in 2..6 {//s.len()+4 {
146 let mut reader = Utf8CharSplitter::from(s.chars().map(|c| c.to_utf8() ));
147 for (offset, part) in s.as_bytes().chunks(size).enumerate() {
148 let read_to = if part.len() == size {(offset+1)*size} else {buf.len()};
149 assert_eq!(reader.read(&mut buf[offset*size..read_to]).unwrap(), part.len());
150 assert_eq!(&buf[..offset*size+part.len()], &s.as_bytes()[..offset*size+part.len()]);
151 }
152 assert_eq!(reader.read(&mut buf[..]).unwrap(), 0);
153 assert!(buf[s.len()..].iter().all(|&b| b==b'E' ));
154 }
155 }
156
157 #[test] fn utf8charsplitter_alternate_iter_read() {
158 let s = "1111\u{104444}\u{222}1\u{833}1111\u{100004}";
159 let mut buf = [b'0'; 10];
160 for n in 0..2 {
161 // need to collect to test size_hint()
162 // because chars().size_hint() returns ((bytes+3)/4, Some(bytes))
163 let u8chars = s.chars().map(|c| c.to_utf8() ).collect::<Vec<_>>();
164 let mut iter: Utf8CharSplitter<_,_> = u8chars.into_iter().into();
165 for (i, byte) in s.bytes().enumerate() {
166 let until_next = s.as_bytes()[i..].iter().take_while(|&b| (b>>6)==0b10u8 ).count();
167 let remaining_chars = s[i+until_next..].chars().count();
168 println!("{}. run: byte {:02} of {}, remaining: {:02}+{}: 0b{:08b} = {:?}",
169 n, i, s.len(), remaining_chars, until_next, byte, byte as char);
170 assert_eq!(iter.read(&mut[][..]).unwrap(), 0);
171 if i % 2 == n {
172 assert_eq!(iter.next(), Some(byte));
173 } else {
174 assert_eq!(iter.read(&mut buf[..1]).unwrap(), 1);
175 assert_eq!(buf[0], byte);
176 }
177 }
178 assert_eq!(iter.size_hint(), (0, Some(0)));
179 assert_eq!(iter.next(), None);
180 assert_eq!(iter.read(&mut buf[..]).unwrap(), 0);
181 }
182 }