1 // Copyright 2013 The rust-url developers.
3 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6 // option. This file may not be copied, modified, or distributed
7 // except according to those terms.
9 //! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
11 //! Since Punycode fundamentally works on unicode code points,
12 //! `encode` and `decode` take and return slices and vectors of `char`.
13 //! `encode_str` and `decode_to_string` provide convenience wrappers
14 //! that convert from and to Rust’s UTF-8 based `str` and `String` types.
18 #[allow(unused_imports, deprecated)]
19 use std
::ascii
::AsciiExt
;
21 // Bootstring parameters for Punycode
22 static BASE
: u32 = 36;
23 static T_MIN
: u32 = 1;
24 static T_MAX
: u32 = 26;
25 static SKEW
: u32 = 38;
26 static DAMP
: u32 = 700;
27 static INITIAL_BIAS
: u32 = 72;
28 static INITIAL_N
: u32 = 0x80;
29 static DELIMITER
: char = '
-'
;
33 fn adapt(mut delta
: u32, num_points
: u32, first_time
: bool
) -> u32 {
34 delta
/= if first_time { DAMP }
else { 2 }
;
35 delta
+= delta
/ num_points
;
37 while delta
> ((BASE
- T_MIN
) * T_MAX
) / 2 {
38 delta
/= BASE
- T_MIN
;
41 k
+ (((BASE
- T_MIN
+ 1) * delta
) / (delta
+ SKEW
))
45 /// Convert Punycode to an Unicode `String`.
47 /// This is a convenience wrapper around `decode`.
49 pub fn decode_to_string(input
: &str) -> Option
<String
> {
50 decode(input
).map(|chars
| chars
.into_iter().collect())
54 /// Convert Punycode to Unicode.
56 /// Return None on malformed input or overflow.
57 /// Overflow can only happen on inputs that take more than
58 /// 63 encoded bytes, the DNS limit on domain name labels.
59 pub fn decode(input
: &str) -> Option
<Vec
<char>> {
60 // Handle "basic" (ASCII) code points.
61 // They are encoded as-is before the last delimiter, if any.
62 let (mut output
, input
) = match input
.rfind(DELIMITER
) {
63 None
=> (Vec
::new(), input
),
65 input
[..position
].chars().collect(),
66 if position
> 0 { &input[position + 1..] }
else { input }
69 let mut code_point
= INITIAL_N
;
70 let mut bias
= INITIAL_BIAS
;
72 let mut iter
= input
.bytes();
77 let mut byte
= match iter
.next() {
81 // Decode a generalized variable-length integer into delta,
82 // which gets added to i.
84 let digit
= match byte
{
85 byte @ b'
0'
... b'
9'
=> byte
- b'
0'
+ 26,
86 byte @ b'A'
... b'Z'
=> byte
- b'A'
,
87 byte @ b'a'
... b'z'
=> byte
- b'a'
,
90 if digit
> (u32::MAX
- i
) / weight
{
91 return None
// Overflow
94 let t
= if k
<= bias { T_MIN }
95 else if k
>= bias
+ T_MAX { T_MAX }
100 if weight
> u32::MAX
/ (BASE
- t
) {
101 return None
// Overflow
105 byte
= match iter
.next() {
106 None
=> return None
, // End of input before the end of this delta
110 let length
= output
.len() as u32;
111 bias
= adapt(i
- previous_i
, length
+ 1, previous_i
== 0);
112 if i
/ (length
+ 1) > u32::MAX
- code_point
{
113 return None
// Overflow
115 // i was supposed to wrap around from length+1 to 0,
116 // incrementing code_point each time.
117 code_point
+= i
/ (length
+ 1);
119 let c
= match char::from_u32(code_point
) {
123 output
.insert(i
as usize, c
);
130 /// Convert an Unicode `str` to Punycode.
132 /// This is a convenience wrapper around `encode`.
134 pub fn encode_str(input
: &str) -> Option
<String
> {
135 encode(&input
.chars().collect
::<Vec
<char>>())
139 /// Convert Unicode to Punycode.
141 /// Return None on overflow, which can only happen on inputs that would take more than
142 /// 63 encoded bytes, the DNS limit on domain name labels.
143 pub fn encode(input
: &[char]) -> Option
<String
> {
144 // Handle "basic" (ASCII) code points. They are encoded as-is.
145 let output_bytes
= input
.iter().filter_map(|&c
|
146 if c
.is_ascii() { Some(c as u8) }
else { None }
148 let mut output
= unsafe { String::from_utf8_unchecked(output_bytes) }
;
149 let basic_length
= output
.len() as u32;
150 if basic_length
> 0 {
153 let mut code_point
= INITIAL_N
;
155 let mut bias
= INITIAL_BIAS
;
156 let mut processed
= basic_length
;
157 let input_length
= input
.len() as u32;
158 while processed
< input_length
{
159 // All code points < code_point have been handled already.
160 // Find the next larger one.
161 let min_code_point
= input
.iter().map(|&c
| c
as u32)
162 .filter(|&c
| c
>= code_point
).min().unwrap();
163 if min_code_point
- code_point
> (u32::MAX
- delta
) / (processed
+ 1) {
164 return None
// Overflow
166 // Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
167 delta
+= (min_code_point
- code_point
) * (processed
+ 1);
168 code_point
= min_code_point
;
174 return None
// Overflow
178 // Represent delta as a generalized variable-length integer:
182 let t
= if k
<= bias { T_MIN }
183 else if k
>= bias
+ T_MAX { T_MAX }
188 let value
= t
+ ((q
- t
) % (BASE
- t
));
189 output
.push(value_to_digit(value
));
190 q
= (q
- t
) / (BASE
- t
);
193 output
.push(value_to_digit(q
));
194 bias
= adapt(delta
, processed
+ 1, processed
== basic_length
);
207 fn value_to_digit(value
: u32) -> char {
209 0 ... 25 => (value
as u8 + 'a'
as u8) as char, // a..z
210 26 ... 35 => (value
as u8 - 26 + '
0'
as u8) as char, // 0..9