1 // Copyright 2013 The rust-url developers.
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
9 //! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
11 //! Since Punycode fundamentally works on unicode code points,
12 //! `encode` and `decode` take and return slices and vectors of `char`.
13 //! `encode_str` and `decode_to_string` provide convenience wrappers
14 //! that convert from and to Rust’s UTF-8 based `str` and `String` types.
18 use std
::ascii
::AsciiExt
;
20 // Bootstring parameters for Punycode
21 static BASE
: u32 = 36;
22 static T_MIN
: u32 = 1;
23 static T_MAX
: u32 = 26;
24 static SKEW
: u32 = 38;
25 static DAMP
: u32 = 700;
26 static INITIAL_BIAS
: u32 = 72;
27 static INITIAL_N
: u32 = 0x80;
28 static DELIMITER
: char = '
-'
;
32 fn adapt(mut delta
: u32, num_points
: u32, first_time
: bool
) -> u32 {
33 delta
/= if first_time { DAMP }
else { 2 }
;
34 delta
+= delta
/ num_points
;
36 while delta
> ((BASE
- T_MIN
) * T_MAX
) / 2 {
37 delta
/= BASE
- T_MIN
;
40 k
+ (((BASE
- T_MIN
+ 1) * delta
) / (delta
+ SKEW
))
44 /// Convert Punycode to an Unicode `String`.
46 /// This is a convenience wrapper around `decode`.
48 pub fn decode_to_string(input
: &str) -> Option
<String
> {
49 decode(input
).map(|chars
| chars
.into_iter().collect())
53 /// Convert Punycode to Unicode.
55 /// Return None on malformed input or overflow.
56 /// Overflow can only happen on inputs that take more than
57 /// 63 encoded bytes, the DNS limit on domain name labels.
58 pub fn decode(input
: &str) -> Option
<Vec
<char>> {
59 // Handle "basic" (ASCII) code points.
60 // They are encoded as-is before the last delimiter, if any.
61 let (mut output
, input
) = match input
.rfind(DELIMITER
) {
62 None
=> (Vec
::new(), input
),
64 input
[..position
].chars().collect(),
65 if position
> 0 { &input[position + 1..] }
else { input }
68 let mut code_point
= INITIAL_N
;
69 let mut bias
= INITIAL_BIAS
;
71 let mut iter
= input
.bytes();
76 let mut byte
= match iter
.next() {
80 // Decode a generalized variable-length integer into delta,
81 // which gets added to i.
83 let digit
= match byte
{
84 byte @ b'
0'
... b'
9'
=> byte
- b'
0'
+ 26,
85 byte @ b'A'
... b'Z'
=> byte
- b'A'
,
86 byte @ b'a'
... b'z'
=> byte
- b'a'
,
89 if digit
> (u32::MAX
- i
) / weight
{
90 return None
// Overflow
93 let t
= if k
<= bias { T_MIN }
94 else if k
>= bias
+ T_MAX { T_MAX }
99 if weight
> u32::MAX
/ (BASE
- t
) {
100 return None
// Overflow
104 byte
= match iter
.next() {
105 None
=> return None
, // End of input before the end of this delta
109 let length
= output
.len() as u32;
110 bias
= adapt(i
- previous_i
, length
+ 1, previous_i
== 0);
111 if i
/ (length
+ 1) > u32::MAX
- code_point
{
112 return None
// Overflow
114 // i was supposed to wrap around from length+1 to 0,
115 // incrementing code_point each time.
116 code_point
+= i
/ (length
+ 1);
118 let c
= match char::from_u32(code_point
) {
122 output
.insert(i
as usize, c
);
129 /// Convert an Unicode `str` to Punycode.
131 /// This is a convenience wrapper around `encode`.
133 pub fn encode_str(input
: &str) -> Option
<String
> {
134 encode(&input
.chars().collect
::<Vec
<char>>())
138 /// Convert Unicode to Punycode.
140 /// Return None on overflow, which can only happen on inputs that would take more than
141 /// 63 encoded bytes, the DNS limit on domain name labels.
142 pub fn encode(input
: &[char]) -> Option
<String
> {
143 // Handle "basic" (ASCII) code points. They are encoded as-is.
144 let output_bytes
= input
.iter().filter_map(|&c
|
145 if c
.is_ascii() { Some(c as u8) }
else { None }
147 let mut output
= unsafe { String::from_utf8_unchecked(output_bytes) }
;
148 let basic_length
= output
.len() as u32;
149 if basic_length
> 0 {
152 let mut code_point
= INITIAL_N
;
154 let mut bias
= INITIAL_BIAS
;
155 let mut processed
= basic_length
;
156 let input_length
= input
.len() as u32;
157 while processed
< input_length
{
158 // All code points < code_point have been handled already.
159 // Find the next larger one.
160 let min_code_point
= input
.iter().map(|&c
| c
as u32)
161 .filter(|&c
| c
>= code_point
).min().unwrap();
162 if min_code_point
- code_point
> (u32::MAX
- delta
) / (processed
+ 1) {
163 return None
// Overflow
165 // Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
166 delta
+= (min_code_point
- code_point
) * (processed
+ 1);
167 code_point
= min_code_point
;
173 return None
// Overflow
177 // Represent delta as a generalized variable-length integer:
181 let t
= if k
<= bias { T_MIN }
182 else if k
>= bias
+ T_MAX { T_MAX }
187 let value
= t
+ ((q
- t
) % (BASE
- t
));
188 output
.push(value_to_digit(value
));
189 q
= (q
- t
) / (BASE
- t
);
192 output
.push(value_to_digit(q
));
193 bias
= adapt(delta
, processed
+ 1, processed
== basic_length
);
206 fn value_to_digit(value
: u32) -> char {
208 0 ... 25 => (value
as u8 + 'a'
as u8) as char, // a..z
209 26 ... 35 => (value
as u8 - 26 + '
0'
as u8) as char, // 0..9