1 //! Private utility functions
3 const TAG_CONT
: u8 = 0b1000_0000;
4 const TAG_TWO_B
: u8 = 0b1100_0000;
5 const TAG_THREE_B
: u8 = 0b1110_0000;
6 const TAG_FOUR_B
: u8 = 0b1111_0000;
7 const MAX_ONE_B
: u32 = 0x80;
8 const MAX_TWO_B
: u32 = 0x800;
9 const MAX_THREE_B
: u32 = 0x10000;
12 pub fn encode_utf8(c
: char) -> EncodeUtf8
{
15 let pos
= if code
< MAX_ONE_B
{
18 } else if code
< MAX_TWO_B
{
19 buf
[2] = (code
>> 6 & 0x1F) as u8 | TAG_TWO_B
;
20 buf
[3] = (code
& 0x3F) as u8 | TAG_CONT
;
22 } else if code
< MAX_THREE_B
{
23 buf
[1] = (code
>> 12 & 0x0F) as u8 | TAG_THREE_B
;
24 buf
[2] = (code
>> 6 & 0x3F) as u8 | TAG_CONT
;
25 buf
[3] = (code
& 0x3F) as u8 | TAG_CONT
;
28 buf
[0] = (code
>> 18 & 0x07) as u8 | TAG_FOUR_B
;
29 buf
[1] = (code
>> 12 & 0x3F) as u8 | TAG_CONT
;
30 buf
[2] = (code
>> 6 & 0x3F) as u8 | TAG_CONT
;
31 buf
[3] = (code
& 0x3F) as u8 | TAG_CONT
;
40 pub struct EncodeUtf8
{
46 // FIXME: use this from_utf8_unchecked, since we know it can never fail
47 pub fn as_str(&self) -> &str {
48 ::core
::str::from_utf8(&self.buf
[self.pos
..]).unwrap()
52 #[allow(non_upper_case_globals)]
53 const Pattern_White_Space_table
: &'
static [(char, char)] = &[('
\u{9}'
, '
\u{d}'
),
56 ('
\u{200e}'
, '
\u{200f}'
),
57 ('
\u{2028}'
, '
\u{2029}'
)];
59 fn bsearch_range_table(c
: char, r
: &'
static [(char, char)]) -> bool
{
60 use core
::cmp
::Ordering
::{Equal, Less, Greater}
;
61 r
.binary_search_by(|&(lo
, hi
)| if c
< lo
{
71 #[allow(non_snake_case)]
72 pub fn Pattern_White_Space(c
: char) -> bool
{
73 bsearch_range_table(c
, Pattern_White_Space_table
)