1 /* Copyright 2018 The encode_unicode Developers
3 * Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4 * http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5 * http://opensource.org/licenses/MIT>, at your option. This file may not be
6 * copied, modified, or distributed except according to those terms.
11 #![cfg(feature="std")]
13 extern crate encode_unicode
;
15 use encode_unicode
::{IterExt, SliceExt, CharExt}
;
16 use encode_unicode
::iterator
::Utf8CharSplitter
;
17 use encode_unicode
::error
::InvalidUtf8Slice
::*;
18 use encode_unicode
::error
::InvalidUtf8
::*;
19 use encode_unicode
::error
::InvalidUtf8FirstByte
::*;
20 use encode_unicode
::error
::InvalidCodepoint
::*;
21 use encode_unicode
::error
::Utf16PairError
::*;
25 #[test] fn utf8charmerger() {
26 let slice
= b
"\xf0\xa1\x92X\xcc\xbb";
27 let mut iter
= slice
.iter().to_utf8chars();
28 assert_eq
!(iter
.size_hint(), (1, Some(6)));
29 assert_eq
!(format
!("{:?}", &iter
),
30 format
!("Utf8CharMerger {{ buffered: [], inner: {:?} }}", slice
.iter()));
32 assert_eq
!(iter
.next(), Some(Err(Utf8(NotAContinuationByte(3)))));
33 assert_eq
!(iter
.size_hint(), (0, Some(5)));
35 format
!("{:?}", &iter
),
36 format
!("Utf8CharMerger {{ buffered: [161, 146, 88], inner: {:?} }}", slice
[4..].iter())
39 assert_eq
!(iter
.next(), Some(Err(Utf8(FirstByte(ContinuationByte
)))));
40 assert_eq
!(iter
.into_inner().next(), Some(&b'
\xcc'
));
43 #[test] fn utf8chardecoder() {
44 let slice
= b
"\xf4\xbf\x80\x80XY\xcc\xbbZ_";
45 let mut iter
= slice
.utf8char_indices();
46 assert_eq
!(iter
.size_hint(), (2, Some(10)));
48 format
!("{:?}", &iter
),
49 format
!("Utf8CharDecoder {{ bytes[0..]: {:?} }}", &slice
)
52 assert_eq
!(iter
.next(), Some((0, Err(Codepoint(TooHigh
)), 1)));
54 format
!("{:?}", &iter
),
55 format
!("Utf8CharDecoder {{ bytes[1..]: {:?} }}", &slice
[1..])
57 assert_eq
!(iter
.size_hint(), (2, Some(9)));
58 assert_eq
!(iter
.count(), 8);
61 #[test] fn utf16charmerger() {
62 let slice
= [0xd800, 'x'
as u16, 0xd900, 0xdfff, 'λ'
as u16];
63 let mut iter
= slice
.iter().to_utf16chars();
64 assert_eq
!(iter
.size_hint(), (2, Some(5)));
65 assert_eq
!(format
!("{:?}", &iter
),
66 format
!("Utf16CharMerger {{ buffered: None, inner: {:?} }}", slice
.iter()));
68 assert_eq
!(iter
.next(), Some(Err(UnmatchedLeadingSurrogate
)));
69 assert_eq
!(iter
.size_hint(), (1, Some(4)));
71 format
!("{:?}", &iter
),
72 format
!("Utf16CharMerger {{ buffered: Some(120), inner: {:?} }}", slice
[2..].iter())
75 assert_eq
!(iter
.into_inner().next(), Some(&0xd900));
78 #[test] fn utf16chardecoder() {
79 let slice
= [0xd800, 'x'
as u16, 0xd900, 0xdfff, 'λ'
as u16];
80 let mut iter
= slice
.utf16char_indices();
81 assert_eq
!(iter
.size_hint(), (2, Some(5)));
83 format
!("{:?}", &iter
),
84 format
!("Utf16CharDecoder {{ units[0..]: {:?} }}", &slice
)
87 assert_eq
!(iter
.next(), Some((0, Err(UnmatchedLeadingSurrogate
), 1)));
89 format
!("{:?}", &iter
),
90 format
!("Utf16CharDecoder {{ units[1..]: {:?} }}", &slice
[1..])
92 assert_eq
!(iter
.size_hint(), (2, Some(4)));
93 assert_eq
!(iter
.count(), 3);
98 /// Tests for ensuring that iterators which also implement Read support
99 /// interleaving calls of `read()` and `next()`, and that they implement Read
100 /// correctly (support any buffer size at any time).
102 #[test] fn read_single_ascii() {
103 let uc
= 'a'
.to_utf8();
104 assert_eq
!(uc
.len(), 1);
106 let mut buf
= [b'E'
; 6];
107 let mut iter
= uc
.into_iter();
110 assert_eq
!(iter
.read(&mut buf
[..0]).unwrap(), 0);
111 let wrote
= iter
.read(&mut buf
[written
..written
+chunk
]).unwrap();
112 assert_eq
!(wrote
, min(1-written
, chunk
));
114 for &b
in &buf
[written
..] {assert_eq!(b, b'E');}
115 assert_eq
!(buf
[..written
], AsRef
::<[u8]>::as_ref(&uc
)[..written
]);
117 assert_eq
!(written
, 1);
121 #[test] fn read_single_nonascii() {
122 let uc
= 'ä'
.to_utf8();
123 assert_eq
!(uc
.len(), 2);
125 let mut buf
= [b'E'
; 6];
126 let mut iter
= uc
.into_iter();
129 assert_eq
!(iter
.read(&mut buf
[..0]).unwrap(), 0);
130 let wrote
= iter
.read(&mut buf
[written
..written
+chunk
]).unwrap();
131 assert_eq
!(wrote
, min(2-written
, chunk
));
133 for &b
in &buf
[written
..] {assert_eq!(b, b'E');}
134 assert_eq
!(buf
[..written
], AsRef
::<[u8]>::as_ref(&uc
)[..written
]);
136 assert_eq
!(written
, 2);
141 #[test] fn utf8charsplitter_read_all_sizes() {
142 let s
= "1111\u{104444}\u{222}1\u{833}1111\u{100004}";
143 assert
!(s
.len()%3 == 1);
144 let mut buf
= vec
![b'E'
; s
.len()+6];
145 for size
in 2..6 {//s.len()+4 {
146 let mut reader
= Utf8CharSplitter
::from(s
.chars().map(|c
| c
.to_utf8() ));
147 for (offset
, part
) in s
.as_bytes().chunks(size
).enumerate() {
148 let read_to
= if part
.len() == size {(offset+1)*size}
else {buf.len()}
;
149 assert_eq
!(reader
.read(&mut buf
[offset
*size
..read_to
]).unwrap(), part
.len());
150 assert_eq
!(&buf
[..offset
*size
+part
.len()], &s
.as_bytes()[..offset
*size
+part
.len()]);
152 assert_eq
!(reader
.read(&mut buf
[..]).unwrap(), 0);
153 assert
!(buf
[s
.len()..].iter().all(|&b
| b
==b'E'
));
157 #[test] fn utf8charsplitter_alternate_iter_read() {
158 let s
= "1111\u{104444}\u{222}1\u{833}1111\u{100004}";
159 let mut buf
= [b'
0'
; 10];
161 // need to collect to test size_hint()
162 // because chars().size_hint() returns ((bytes+3)/4, Some(bytes))
163 let u8chars
= s
.chars().map(|c
| c
.to_utf8() ).collect
::<Vec
<_
>>();
164 let mut iter
: Utf8CharSplitter
<_
,_
> = u8chars
.into_iter().into();
165 for (i
, byte
) in s
.bytes().enumerate() {
166 let until_next
= s
.as_bytes()[i
..].iter().take_while(|&b
| (b
>>6)==0b10u8 ).count();
167 let remaining_chars
= s
[i
+until_next
..].chars().count();
168 println
!("{}. run: byte {:02} of {}, remaining: {:02}+{}: 0b{:08b} = {:?}",
169 n
, i
, s
.len(), remaining_chars
, until_next
, byte
, byte
as char);
170 assert_eq
!(iter
.read(&mut[][..]).unwrap(), 0);
172 assert_eq
!(iter
.next(), Some(byte
));
174 assert_eq
!(iter
.read(&mut buf
[..1]).unwrap(), 1);
175 assert_eq
!(buf
[0], byte
);
178 assert_eq
!(iter
.size_hint(), (0, Some(0)));
179 assert_eq
!(iter
.next(), None
);
180 assert_eq
!(iter
.read(&mut buf
[..]).unwrap(), 0);