use core::str::next_code_point;
-use borrow::Cow;
-use char;
-use fmt;
-use hash::{Hash, Hasher};
-use iter::FromIterator;
-use mem;
-use ops;
-use rc::Rc;
-use slice;
-use str;
-use sync::Arc;
-use sys_common::AsInner;
+use crate::borrow::Cow;
+use crate::char;
+use crate::fmt;
+use crate::hash::{Hash, Hasher};
+use crate::iter::FromIterator;
+use crate::mem;
+use crate::ops;
+use crate::rc::Rc;
+use crate::slice;
+use crate::str;
+use crate::sync::Arc;
+use crate::sys_common::AsInner;
const UTF8_REPLACEMENT_CHARACTER: &str = "\u{FFFD}";
/// a code point that is not a surrogate (U+D800 to U+DFFF).
#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy)]
pub struct CodePoint {
- value: u32
+ value: u32,
}
/// Format the code point as `U+` followed by four to six hexadecimal digits.
/// Example: `U+1F4A9`
impl fmt::Debug for CodePoint {
#[inline]
- fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(formatter, "U+{:04X}", self.value)
}
}
#[inline]
pub fn from_u32(value: u32) -> Option<CodePoint> {
match value {
- 0 ..= 0x10FFFF => Some(CodePoint { value }),
- _ => None
+ 0..=0x10FFFF => Some(CodePoint { value }),
+ _ => None,
}
}
#[inline]
pub fn to_char(&self) -> Option<char> {
match self.value {
- 0xD800 ..= 0xDFFF => None,
- _ => Some(unsafe { char::from_u32_unchecked(self.value) })
+ 0xD800..=0xDFFF => None,
+ _ => Some(unsafe { char::from_u32_unchecked(self.value) }),
}
}
/// if they’re not in a surrogate pair.
#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)]
pub struct Wtf8Buf {
- bytes: Vec<u8>
+ bytes: Vec<u8>,
}
impl ops::Deref for Wtf8Buf {
/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800]
impl fmt::Debug for Wtf8Buf {
#[inline]
- fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&**self, formatter)
}
}
Wtf8Buf { bytes: Vec::new() }
}
- /// Creates a new, empty WTF-8 string with pre-allocated capacity for `n` bytes.
+ /// Creates a new, empty WTF-8 string with pre-allocated capacity for `capacity` bytes.
#[inline]
- pub fn with_capacity(n: usize) -> Wtf8Buf {
- Wtf8Buf { bytes: Vec::with_capacity(n) }
+ pub fn with_capacity(capacity: usize) -> Wtf8Buf {
+ Wtf8Buf { bytes: Vec::with_capacity(capacity) }
}
/// Creates a WTF-8 string from a UTF-8 `String`.
Err(surrogate) => {
let surrogate = surrogate.unpaired_surrogate();
// Surrogates are known to be in the code point range.
- let code_point = unsafe {
- CodePoint::from_u32_unchecked(surrogate as u32)
- };
+ let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) };
// Skip the WTF-8 concatenation check,
// surrogate pairs are already decoded by decode_utf16
string.push_code_point_unchecked(code_point)
/// Copied from String::push
/// This does **not** include the WTF-8 concatenation check.
fn push_code_point_unchecked(&mut self, code_point: CodePoint) {
- let c = unsafe {
- char::from_u32_unchecked(code_point.value)
- };
+ let c = unsafe { char::from_u32_unchecked(code_point.value) };
let mut bytes = [0; 4];
let bytes = c.encode_utf8(&mut bytes).as_bytes();
self.bytes.extend_from_slice(bytes)
self.push_char(decode_surrogate_pair(lead, trail));
self.bytes.extend_from_slice(other_without_trail_surrogate);
}
- _ => self.bytes.extend_from_slice(&other.bytes)
+ _ => self.bytes.extend_from_slice(&other.bytes),
}
}
let len_without_lead_surrogate = self.len() - 3;
self.bytes.truncate(len_without_lead_surrogate);
self.push_char(decode_surrogate_pair(lead, trail as u16));
- return
+ return;
}
}
pos = surrogate_pos + 3;
self.bytes[surrogate_pos..pos]
.copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
- },
- None => return unsafe { String::from_utf8_unchecked(self.bytes) }
+ }
+ None => return unsafe { String::from_utf8_unchecked(self.bytes) },
}
}
}
/// This replaces surrogate code point pairs with supplementary code points,
/// like concatenating ill-formed UTF-16 strings effectively would.
impl FromIterator<CodePoint> for Wtf8Buf {
- fn from_iter<T: IntoIterator<Item=CodePoint>>(iter: T) -> Wtf8Buf {
+ fn from_iter<T: IntoIterator<Item = CodePoint>>(iter: T) -> Wtf8Buf {
let mut string = Wtf8Buf::new();
string.extend(iter);
string
/// This replaces surrogate code point pairs with supplementary code points,
/// like concatenating ill-formed UTF-16 strings effectively would.
impl Extend<CodePoint> for Wtf8Buf {
- fn extend<T: IntoIterator<Item=CodePoint>>(&mut self, iter: T) {
+ fn extend<T: IntoIterator<Item = CodePoint>>(&mut self, iter: T) {
let iterator = iter.into_iter();
let (low, _high) = iterator.size_hint();
// Lower bound of one byte per code point (ASCII only)
self.bytes.reserve(low);
- for code_point in iterator {
- self.push(code_point);
- }
+ iterator.for_each(move |code_point| self.push(code_point));
}
}
/// if they’re not in a surrogate pair.
#[derive(Eq, Ord, PartialEq, PartialOrd)]
pub struct Wtf8 {
- bytes: [u8]
+ bytes: [u8],
}
impl AsInner<[u8]> for Wtf8 {
- fn as_inner(&self) -> &[u8] { &self.bytes }
+ fn as_inner(&self) -> &[u8] {
+ &self.bytes
+ }
}
/// Format the slice with double quotes,
/// and surrogates as `\u` followed by four hexadecimal digits.
/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800]
impl fmt::Debug for Wtf8 {
- fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
- fn write_str_escaped(f: &mut fmt::Formatter, s: &str) -> fmt::Result {
- use fmt::Write;
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
+ use crate::fmt::Write;
for c in s.chars().flat_map(|c| c.escape_debug()) {
f.write_char(c)?
}
formatter.write_str("\"")?;
let mut pos = 0;
while let Some((surrogate_pos, surrogate)) = self.next_surrogate(pos) {
- write_str_escaped(
- formatter,
- unsafe { str::from_utf8_unchecked(
- &self.bytes[pos .. surrogate_pos]
- )},
- )?;
+ write_str_escaped(formatter, unsafe {
+ str::from_utf8_unchecked(&self.bytes[pos..surrogate_pos])
+ })?;
write!(formatter, "\\u{{{:x}}}", surrogate)?;
pos = surrogate_pos + 3;
}
- write_str_escaped(
- formatter,
- unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) },
- )?;
+ write_str_escaped(formatter, unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) })?;
formatter.write_str("\"")
}
}
impl fmt::Display for Wtf8 {
- fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
let wtf8_bytes = &self.bytes;
let mut pos = 0;
loop {
match self.next_surrogate(pos) {
Some((surrogate_pos, _)) => {
formatter.write_str(unsafe {
- str::from_utf8_unchecked(&wtf8_bytes[pos .. surrogate_pos])
+ str::from_utf8_unchecked(&wtf8_bytes[pos..surrogate_pos])
})?;
formatter.write_str(UTF8_REPLACEMENT_CHARACTER)?;
pos = surrogate_pos + 3;
- },
+ }
None => {
- let s = unsafe {
- str::from_utf8_unchecked(&wtf8_bytes[pos..])
- };
- if pos == 0 {
- return s.fmt(formatter)
- } else {
- return formatter.write_str(s)
- }
+ let s = unsafe { str::from_utf8_unchecked(&wtf8_bytes[pos..]) };
+ if pos == 0 { return s.fmt(formatter) } else { return formatter.write_str(s) }
}
}
}
#[inline]
pub fn ascii_byte_at(&self, position: usize) -> u8 {
match self.bytes[position] {
- ascii_byte @ 0x00 ..= 0x7F => ascii_byte,
- _ => 0xFF
+ ascii_byte @ 0x00..=0x7F => ascii_byte,
+ _ => 0xFF,
}
}
/// Returns an iterator for the string’s code points.
#[inline]
- pub fn code_points(&self) -> Wtf8CodePoints {
+ pub fn code_points(&self) -> Wtf8CodePoints<'_> {
Wtf8CodePoints { bytes: self.bytes.iter() }
}
/// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”).
///
/// This only copies the data if necessary (if it contains any surrogate).
- pub fn to_string_lossy(&self) -> Cow<str> {
+ pub fn to_string_lossy(&self) -> Cow<'_, str> {
let surrogate_pos = match self.next_surrogate(0) {
None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(&self.bytes) }),
Some((pos, _)) => pos,
loop {
match self.next_surrogate(pos) {
Some((surrogate_pos, _)) => {
- utf8_bytes.extend_from_slice(&wtf8_bytes[pos .. surrogate_pos]);
+ utf8_bytes.extend_from_slice(&wtf8_bytes[pos..surrogate_pos]);
utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes());
pos = surrogate_pos + 3;
- },
+ }
None => {
utf8_bytes.extend_from_slice(&wtf8_bytes[pos..]);
- return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) })
+ return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) });
}
}
}
/// calling `Wtf8Buf::from_ill_formed_utf16` on the resulting code units
/// would always return the original WTF-8 string.
#[inline]
- pub fn encode_wide(&self) -> EncodeWide {
+ pub fn encode_wide(&self) -> EncodeWide<'_> {
EncodeWide { code_points: self.code_points(), extra: 0 }
}
} else if b == 0xED {
match (iter.next(), iter.next()) {
(Some(&b2), Some(&b3)) if b2 >= 0xA0 => {
- return Some((pos, decode_surrogate(b2, b3)))
+ return Some((pos, decode_surrogate(b2, b3)));
}
- _ => pos += 3
+ _ => pos += 3,
}
} else if b < 0xF0 {
iter.next();
fn final_lead_surrogate(&self) -> Option<u16> {
let len = self.len();
if len < 3 {
- return None
+ return None;
}
match &self.bytes[(len - 3)..] {
&[0xED, b2 @ 0xA0..=0xAF, b3] => Some(decode_surrogate(b2, b3)),
- _ => None
+ _ => None,
}
}
fn initial_trail_surrogate(&self) -> Option<u16> {
let len = self.len();
if len < 3 {
- return None
+ return None;
}
match &self.bytes[..3] {
&[0xED, b2 @ 0xB0..=0xBF, b3] => Some(decode_surrogate(b2, b3)),
- _ => None
+ _ => None,
}
}
}
}
-
/// Returns a slice of the given string for the byte range [`begin`..`end`).
///
/// # Panics
#[inline]
fn index(&self, range: ops::Range<usize>) -> &Wtf8 {
// is_code_point_boundary checks that the index is in [0, .len()]
- if range.start <= range.end &&
- is_code_point_boundary(self, range.start) &&
- is_code_point_boundary(self, range.end) {
+ if range.start <= range.end
+ && is_code_point_boundary(self, range.start)
+ && is_code_point_boundary(self, range.end)
+ {
unsafe { slice_unchecked(self, range.start, range.end) }
} else {
slice_error_fail(self, range.start, range.end)
/// Copied from core::str::StrPrelude::is_char_boundary
#[inline]
pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool {
- if index == slice.len() { return true; }
+ if index == slice.len() {
+ return true;
+ }
match slice.bytes.get(index) {
None => false,
Some(&b) => b < 128 || b >= 192,
#[inline]
pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
// memory layout of an &[u8] and &Wtf8 are the same
- Wtf8::from_bytes_unchecked(slice::from_raw_parts(
- s.bytes.as_ptr().add(begin),
- end - begin
- ))
+ Wtf8::from_bytes_unchecked(slice::from_raw_parts(s.bytes.as_ptr().add(begin), end - begin))
}
/// Copied from core::str::raw::slice_error_fail
#[inline(never)]
pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! {
assert!(begin <= end);
- panic!("index {} and/or {} in `{:?}` do not lie on character boundary",
- begin, end, s);
+ panic!("index {} and/or {} in `{:?}` do not lie on character boundary", begin, end, s);
}
/// Iterator for the code points of a WTF-8 string.
/// Created with the method `.code_points()`.
#[derive(Clone)]
pub struct Wtf8CodePoints<'a> {
- bytes: slice::Iter<'a, u8>
+ bytes: slice::Iter<'a, u8>,
}
impl<'a> Iterator for Wtf8CodePoints<'a> {
#[derive(Clone)]
pub struct EncodeWide<'a> {
code_points: Wtf8CodePoints<'a>,
- extra: u16
+ extra: u16,
}
// Copied from libunicode/u_str.rs
let mut buf = [0; 2];
self.code_points.next().map(|code_point| {
- let c = unsafe {
- char::from_u32_unchecked(code_point.value)
- };
+ let c = unsafe { char::from_u32_unchecked(code_point.value) };
let n = c.encode_utf16(&mut buf).len();
if n == 2 {
self.extra = buf[1];
}
impl Wtf8 {
- pub fn make_ascii_uppercase(&mut self) { self.bytes.make_ascii_uppercase() }
+ pub fn make_ascii_uppercase(&mut self) {
+ self.bytes.make_ascii_uppercase()
+ }
}
#[cfg(test)]
mod tests {
- use borrow::Cow;
use super::*;
+ use crate::borrow::Cow;
#[test]
fn code_point_from_u32() {
#[test]
fn code_point_to_u32() {
- fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
assert_eq!(c(0).to_u32(), 0);
assert_eq!(c(0xD800).to_u32(), 0xD800);
assert_eq!(c(0x10FFFF).to_u32(), 0x10FFFF);
#[test]
fn code_point_to_char() {
- fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
assert_eq!(c(0x61).to_char(), Some('a'));
assert_eq!(c(0x1F4A9).to_char(), Some('💩'));
assert_eq!(c(0xD800).to_char(), None);
#[test]
fn code_point_to_char_lossy() {
- fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
assert_eq!(c(0x61).to_char_lossy(), 'a');
assert_eq!(c(0x1F4A9).to_char_lossy(), '💩');
assert_eq!(c(0xD800).to_char_lossy(), '\u{FFFD}');
#[test]
fn wtf8buf_from_str() {
assert_eq!(Wtf8Buf::from_str("").bytes, b"");
- assert_eq!(Wtf8Buf::from_str("aé 💩").bytes,
- b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert_eq!(Wtf8Buf::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
}
#[test]
fn wtf8buf_from_string() {
assert_eq!(Wtf8Buf::from_string(String::from("")).bytes, b"");
- assert_eq!(Wtf8Buf::from_string(String::from("aé 💩")).bytes,
- b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert_eq!(
+ Wtf8Buf::from_string(String::from("aé 💩")).bytes,
+ b"a\xC3\xA9 \xF0\x9F\x92\xA9"
+ );
}
#[test]
fn wtf8buf_from_wide() {
assert_eq!(Wtf8Buf::from_wide(&[]).bytes, b"");
- assert_eq!(Wtf8Buf::from_wide(
- &[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes,
- b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9");
+ assert_eq!(
+ Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]).bytes,
+ b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"
+ );
}
#[test]
string.push(CodePoint::from_char('💩'));
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
- fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
let mut string = Wtf8Buf::new();
- string.push(c(0xD83D)); // lead
- string.push(c(0xDCA9)); // trail
- assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+ string.push(c(0xD83D)); // lead
+ string.push(c(0xDCA9)); // trail
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
let mut string = Wtf8Buf::new();
- string.push(c(0xD83D)); // lead
- string.push(c(0x20)); // not surrogate
- string.push(c(0xDCA9)); // trail
+ string.push(c(0xD83D)); // lead
+ string.push(c(0x20)); // not surrogate
+ string.push(c(0xDCA9)); // trail
assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
let mut string = Wtf8Buf::new();
- string.push(c(0xD800)); // lead
- string.push(c(0xDBFF)); // lead
+ string.push(c(0xD800)); // lead
+ string.push(c(0xDBFF)); // lead
assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
let mut string = Wtf8Buf::new();
- string.push(c(0xD800)); // lead
- string.push(c(0xE000)); // not surrogate
+ string.push(c(0xD800)); // lead
+ string.push(c(0xE000)); // not surrogate
assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
let mut string = Wtf8Buf::new();
- string.push(c(0xD7FF)); // not surrogate
- string.push(c(0xDC00)); // trail
+ string.push(c(0xD7FF)); // not surrogate
+ string.push(c(0xDC00)); // trail
assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
let mut string = Wtf8Buf::new();
- string.push(c(0x61)); // not surrogate, < 3 bytes
- string.push(c(0xDC00)); // trail
+ string.push(c(0x61)); // not surrogate, < 3 bytes
+ string.push(c(0xDC00)); // trail
assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
let mut string = Wtf8Buf::new();
- string.push(c(0xDC00)); // trail
+ string.push(c(0xDC00)); // trail
assert_eq!(string.bytes, b"\xED\xB0\x80");
}
string.push_wtf8(Wtf8::from_str(" 💩"));
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
- fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } }
+ fn w(v: &[u8]) -> &Wtf8 {
+ unsafe { Wtf8::from_bytes_unchecked(v) }
+ }
let mut string = Wtf8Buf::new();
- string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
- string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
- assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+ string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
+ string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
+ assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic!
let mut string = Wtf8Buf::new();
- string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
- string.push_wtf8(w(b" ")); // not surrogate
- string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
+ string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
+ string.push_wtf8(w(b" ")); // not surrogate
+ string.push_wtf8(w(b"\xED\xB2\xA9")); // trail
assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
let mut string = Wtf8Buf::new();
- string.push_wtf8(w(b"\xED\xA0\x80")); // lead
- string.push_wtf8(w(b"\xED\xAF\xBF")); // lead
+ string.push_wtf8(w(b"\xED\xA0\x80")); // lead
+ string.push_wtf8(w(b"\xED\xAF\xBF")); // lead
assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF");
let mut string = Wtf8Buf::new();
- string.push_wtf8(w(b"\xED\xA0\x80")); // lead
- string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate
+ string.push_wtf8(w(b"\xED\xA0\x80")); // lead
+ string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate
assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80");
let mut string = Wtf8Buf::new();
- string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate
- string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80");
let mut string = Wtf8Buf::new();
- string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes
- string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\x61\xED\xB0\x80");
let mut string = Wtf8Buf::new();
- string.push_wtf8(w(b"\xED\xB0\x80")); // trail
+ string.push_wtf8(w(b"\xED\xB0\x80")); // trail
assert_eq!(string.bytes, b"\xED\xB0\x80");
}
}
assert_eq!(f(&[0x61, 0xE9, 0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
- assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+ assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
assert_eq!(f(&[0xD83D, 0x20, 0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
assert_eq!(f(&[0xD800, 0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
assert_eq!(f(&[0xD800, 0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
#[test]
fn wtf8buf_extend() {
fn e(initial: &[u32], extended: &[u32]) -> Wtf8Buf {
- fn c(value: &u32) -> CodePoint { CodePoint::from_u32(*value).unwrap() }
+ fn c(value: &u32) -> CodePoint {
+ CodePoint::from_u32(*value).unwrap()
+ }
let mut string = initial.iter().map(c).collect::<Wtf8Buf>();
string.extend(extended.iter().map(c));
string
}
- assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes,
- b"a\xC3\xA9 \xF0\x9F\x92\xA9");
+ assert_eq!(e(&[0x61, 0xE9], &[0x20, 0x1F4A9]).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
- assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
+ assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic!
assert_eq!(e(&[0xD83D, 0x20], &[0xDCA9]).bytes, b"\xED\xA0\xBD \xED\xB2\xA9");
assert_eq!(e(&[0xD800], &[0xDBFF]).bytes, b"\xED\xA0\x80\xED\xAF\xBF");
assert_eq!(e(&[0xD800], &[0xE000]).bytes, b"\xED\xA0\x80\xEE\x80\x80");
#[test]
fn wtf8_slice() {
- assert_eq!(&Wtf8::from_str("aé 💩")[1.. 4].bytes, b"\xC3\xA9 ");
+ assert_eq!(&Wtf8::from_str("aé 💩")[1..4].bytes, b"\xC3\xA9 ");
}
#[test]
#[should_panic]
fn wtf8_slice_not_code_point_boundary() {
- &Wtf8::from_str("aé 💩")[2.. 4];
+ &Wtf8::from_str("aé 💩")[2..4];
}
#[test]
#[test]
fn wtf8_code_points() {
- fn c(value: u32) -> CodePoint { CodePoint::from_u32(value).unwrap() }
+ fn c(value: u32) -> CodePoint {
+ CodePoint::from_u32(value).unwrap()
+ }
fn cp(string: &Wtf8Buf) -> Vec<Option<char>> {
string.code_points().map(|c| c.to_char()).collect::<Vec<_>>()
}
assert_eq!(Wtf8::from_str("aé 💩").to_string_lossy(), Cow::Borrowed("aé 💩"));
let mut string = Wtf8Buf::from_str("aé 💩");
string.push(CodePoint::from_u32(0xD800).unwrap());
- let expected: Cow<str> = Cow::Owned(String::from("aé 💩�"));
+ let expected: Cow<'_, str> = Cow::Owned(String::from("aé 💩�"));
assert_eq!(string.to_string_lossy(), expected);
}
let mut string = Wtf8Buf::from_str("aé ");
string.push(CodePoint::from_u32(0xD83D).unwrap());
string.push_char('💩');
- assert_eq!(string.encode_wide().collect::<Vec<_>>(),
- vec![0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]);
+ assert_eq!(
+ string.encode_wide().collect::<Vec<_>>(),
+ vec![0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]
+ );
}
}