]>
Commit | Line | Data |
---|---|---|
476ff2be SL |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | // | |
11 | // ignore-lexer-test FIXME #15679 | |
12 | ||
13 | //! Base64 binary-to-text encoding | |
14 | ||
15 | pub use self::FromBase64Error::*; | |
16 | pub use self::CharacterSet::*; | |
17 | ||
18 | use std::fmt; | |
19 | use std::error; | |
20 | ||
21 | /// Available encoding character sets | |
22 | #[derive(Clone, Copy, Debug)] | |
23 | pub enum CharacterSet { | |
24 | /// The standard character set (uses `+` and `/`) | |
25 | Standard, | |
26 | /// The URL safe character set (uses `-` and `_`) | |
27 | UrlSafe | |
28 | } | |
29 | ||
30 | /// Available newline types | |
31 | #[derive(Clone, Copy, Debug)] | |
32 | pub enum Newline { | |
33 | /// A linefeed (i.e. Unix-style newline) | |
34 | LF, | |
35 | /// A carriage return and a linefeed (i.e. Windows-style newline) | |
36 | CRLF | |
37 | } | |
38 | ||
39 | /// Contains configuration parameters for `to_base64`. | |
40 | #[derive(Clone, Copy, Debug)] | |
41 | pub struct Config { | |
42 | /// Character set to use | |
43 | pub char_set: CharacterSet, | |
44 | /// Newline to use | |
45 | pub newline: Newline, | |
46 | /// True to pad output with `=` characters | |
47 | pub pad: bool, | |
48 | /// `Some(len)` to wrap lines at `len`, `None` to disable line wrapping | |
49 | pub line_length: Option<usize> | |
50 | } | |
51 | ||
52 | /// Configuration for RFC 4648 standard base64 encoding | |
53 | pub static STANDARD: Config = | |
54 | Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: None}; | |
55 | ||
56 | /// Configuration for RFC 4648 base64url encoding | |
57 | pub static URL_SAFE: Config = | |
58 | Config {char_set: UrlSafe, newline: Newline::CRLF, pad: false, line_length: None}; | |
59 | ||
60 | /// Configuration for RFC 2045 MIME base64 encoding | |
61 | pub static MIME: Config = | |
62 | Config {char_set: Standard, newline: Newline::CRLF, pad: true, line_length: Some(76)}; | |
63 | ||
64 | static STANDARD_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ | |
65 | abcdefghijklmnopqrstuvwxyz\ | |
66 | 0123456789+/"; | |
67 | ||
68 | static URLSAFE_CHARS: &'static[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ | |
69 | abcdefghijklmnopqrstuvwxyz\ | |
70 | 0123456789-_"; | |
71 | ||
72 | /// A trait for converting a value to base64 encoding. | |
73 | pub trait ToBase64 { | |
74 | /// Converts the value of `self` to a base64 value following the specified | |
75 | /// format configuration, returning the owned string. | |
76 | fn to_base64(&self, config: Config) -> String; | |
77 | } | |
78 | ||
79 | impl ToBase64 for [u8] { | |
80 | /// Turn a vector of `u8` bytes into a base64 string. | |
81 | /// | |
82 | /// # Example | |
83 | /// | |
84 | /// ```rust | |
85 | /// extern crate rustc_serialize; | |
86 | /// use rustc_serialize::base64::{ToBase64, STANDARD}; | |
87 | /// | |
88 | /// fn main () { | |
89 | /// let str = [52,32].to_base64(STANDARD); | |
90 | /// println!("base 64 output: {:?}", str); | |
91 | /// } | |
92 | /// ``` | |
93 | fn to_base64(&self, config: Config) -> String { | |
94 | let bytes = match config.char_set { | |
95 | Standard => STANDARD_CHARS, | |
96 | UrlSafe => URLSAFE_CHARS | |
97 | }; | |
98 | ||
99 | let len = self.len(); | |
100 | let newline = match config.newline { | |
101 | Newline::LF => "\n", | |
102 | Newline::CRLF => "\r\n", | |
103 | }; | |
104 | ||
105 | // Preallocate memory. | |
106 | let mut prealloc_len = (len + 2) / 3 * 4; | |
107 | if let Some(line_length) = config.line_length { | |
108 | let num_lines = match prealloc_len { | |
109 | 0 => 0, | |
110 | n => (n - 1) / line_length | |
111 | }; | |
112 | prealloc_len += num_lines * newline.bytes().count(); | |
113 | } | |
114 | ||
115 | let mut out_bytes = vec![b'='; prealloc_len]; | |
116 | ||
117 | // Deal with padding bytes | |
118 | let mod_len = len % 3; | |
119 | ||
120 | // Use iterators to reduce branching | |
121 | { | |
122 | let mut cur_length = 0; | |
123 | ||
124 | let mut s_in = self[..len - mod_len].iter().map(|&x| x as u32); | |
125 | let mut s_out = out_bytes.iter_mut(); | |
126 | ||
127 | // Convenient shorthand | |
128 | let enc = |val| bytes[val as usize]; | |
129 | let mut write = |val| *s_out.next().unwrap() = val; | |
130 | ||
131 | // Iterate though blocks of 4 | |
132 | while let (Some(first), Some(second), Some(third)) = | |
133 | (s_in.next(), s_in.next(), s_in.next()) { | |
134 | ||
135 | // Line break if needed | |
136 | if let Some(line_length) = config.line_length { | |
137 | if cur_length >= line_length { | |
138 | for b in newline.bytes() { write(b) }; | |
139 | cur_length = 0; | |
140 | } | |
141 | } | |
142 | ||
143 | let n = first << 16 | second << 8 | third; | |
144 | ||
145 | // This 24-bit number gets separated into four 6-bit numbers. | |
146 | write(enc((n >> 18) & 63)); | |
147 | write(enc((n >> 12) & 63)); | |
148 | write(enc((n >> 6 ) & 63)); | |
149 | write(enc((n >> 0 ) & 63)); | |
150 | ||
151 | cur_length += 4; | |
152 | } | |
153 | ||
154 | // Line break only needed if padding is required | |
155 | if mod_len != 0 { | |
156 | if let Some(line_length) = config.line_length { | |
157 | if cur_length >= line_length { | |
158 | for b in newline.bytes() { write(b) }; | |
159 | } | |
160 | } | |
161 | } | |
162 | ||
163 | // Heh, would be cool if we knew this was exhaustive | |
164 | // (the dream of bounded integer types) | |
165 | match mod_len { | |
166 | 0 => (), | |
167 | 1 => { | |
168 | let n = (self[len-1] as u32) << 16; | |
169 | write(enc((n >> 18) & 63)); | |
170 | write(enc((n >> 12) & 63)); | |
171 | } | |
172 | 2 => { | |
173 | let n = (self[len-2] as u32) << 16 | | |
174 | (self[len-1] as u32) << 8; | |
175 | write(enc((n >> 18) & 63)); | |
176 | write(enc((n >> 12) & 63)); | |
177 | write(enc((n >> 6 ) & 63)); | |
178 | } | |
179 | _ => panic!("Algebra is broken, please alert the math police") | |
180 | } | |
181 | } | |
182 | ||
183 | // We get padding for "free", so only have to drop it if unwanted. | |
184 | if !config.pad { | |
185 | while let Some(&b'=') = out_bytes.last() { | |
186 | out_bytes.pop(); | |
187 | } | |
188 | } | |
189 | ||
190 | unsafe { String::from_utf8_unchecked(out_bytes) } | |
191 | } | |
192 | } | |
193 | ||
8bb4bdeb XL |
194 | impl<'a, T: ?Sized + ToBase64> ToBase64 for &'a T { |
195 | fn to_base64(&self, config: Config) -> String { | |
196 | (**self).to_base64(config) | |
197 | } | |
198 | } | |
199 | ||
476ff2be SL |
200 | /// A trait for converting from base64 encoded values. |
201 | pub trait FromBase64 { | |
202 | /// Converts the value of `self`, interpreted as base64 encoded data, into | |
203 | /// an owned vector of bytes, returning the vector. | |
204 | fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error>; | |
205 | } | |
206 | ||
207 | /// Errors that can occur when decoding a base64 encoded string | |
208 | #[derive(Clone, Copy)] | |
209 | pub enum FromBase64Error { | |
210 | /// The input contained a character not part of the base64 format | |
211 | InvalidBase64Byte(u8, usize), | |
212 | /// The input had an invalid length | |
213 | InvalidBase64Length, | |
214 | } | |
215 | ||
216 | impl fmt::Debug for FromBase64Error { | |
217 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
218 | match *self { | |
219 | InvalidBase64Byte(ch, idx) => | |
220 | write!(f, "Invalid character '{}' at position {}", ch, idx), | |
221 | InvalidBase64Length => write!(f, "Invalid length"), | |
222 | } | |
223 | } | |
224 | } | |
225 | ||
226 | impl error::Error for FromBase64Error { | |
227 | fn description(&self) -> &str { | |
228 | match *self { | |
229 | InvalidBase64Byte(_, _) => "invalid character", | |
230 | InvalidBase64Length => "invalid length", | |
231 | } | |
232 | } | |
233 | } | |
234 | ||
235 | impl fmt::Display for FromBase64Error { | |
236 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
237 | fmt::Debug::fmt(&self, f) | |
238 | } | |
239 | } | |
240 | ||
241 | impl FromBase64 for str { | |
242 | /// Convert any base64 encoded string (literal, `@`, `&`, or `~`) | |
243 | /// to the byte values it encodes. | |
244 | /// | |
245 | /// You can use the `String::from_utf8` function to turn a `Vec<u8>` into a | |
246 | /// string with characters corresponding to those values. | |
247 | /// | |
248 | /// # Example | |
249 | /// | |
250 | /// This converts a string literal to base64 and back. | |
251 | /// | |
252 | /// ```rust | |
253 | /// extern crate rustc_serialize; | |
254 | /// use rustc_serialize::base64::{ToBase64, FromBase64, STANDARD}; | |
255 | /// | |
256 | /// fn main () { | |
257 | /// let hello_str = b"Hello, World".to_base64(STANDARD); | |
258 | /// println!("base64 output: {}", hello_str); | |
259 | /// let res = hello_str.from_base64(); | |
260 | /// if res.is_ok() { | |
261 | /// let opt_bytes = String::from_utf8(res.unwrap()); | |
262 | /// if opt_bytes.is_ok() { | |
263 | /// println!("decoded from base64: {:?}", opt_bytes.unwrap()); | |
264 | /// } | |
265 | /// } | |
266 | /// } | |
267 | /// ``` | |
268 | #[inline] | |
269 | fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> { | |
270 | self.as_bytes().from_base64() | |
271 | } | |
272 | } | |
273 | ||
274 | impl FromBase64 for [u8] { | |
275 | fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> { | |
276 | let mut r = Vec::with_capacity(self.len()); | |
277 | let mut buf: u32 = 0; | |
278 | let mut modulus = 0; | |
279 | ||
280 | let mut it = self.iter(); | |
281 | for byte in it.by_ref() { | |
282 | let code = DECODE_TABLE[*byte as usize]; | |
283 | if code >= SPECIAL_CODES_START { | |
284 | match code { | |
285 | NEWLINE_CODE => continue, | |
286 | EQUALS_CODE => break, | |
287 | INVALID_CODE => return Err(InvalidBase64Byte( | |
288 | *byte, (byte as *const _ as usize) - self.as_ptr() as usize)), | |
289 | _ => unreachable!(), | |
290 | } | |
291 | } | |
292 | buf = (buf | code as u32) << 6; | |
293 | modulus += 1; | |
294 | if modulus == 4 { | |
295 | modulus = 0; | |
296 | r.push((buf >> 22) as u8); | |
297 | r.push((buf >> 14) as u8); | |
298 | r.push((buf >> 6 ) as u8); | |
299 | } | |
300 | } | |
301 | ||
302 | for byte in it { | |
303 | match *byte { | |
304 | b'=' | b'\r' | b'\n' => continue, | |
305 | _ => return Err(InvalidBase64Byte( | |
306 | *byte, (byte as *const _ as usize) - self.as_ptr() as usize)), | |
307 | } | |
308 | } | |
309 | ||
310 | match modulus { | |
311 | 2 => { | |
312 | r.push((buf >> 10) as u8); | |
313 | } | |
314 | 3 => { | |
315 | r.push((buf >> 16) as u8); | |
316 | r.push((buf >> 8 ) as u8); | |
317 | } | |
318 | 0 => (), | |
319 | _ => return Err(InvalidBase64Length), | |
320 | } | |
321 | ||
322 | Ok(r) | |
323 | } | |
324 | } | |
325 | ||
8bb4bdeb XL |
326 | impl<'a, T: ?Sized + FromBase64> FromBase64 for &'a T { |
327 | fn from_base64(&self) -> Result<Vec<u8>, FromBase64Error> { | |
328 | (**self).from_base64() | |
329 | } | |
330 | } | |
331 | ||
476ff2be | 332 | /// Base64 decoding lookup table, generated using: |
7cac9316 XL |
333 | /// |
334 | /// ``` | |
335 | /// let mut ch = 0u8; | |
336 | /// for ch in 0..255 { | |
337 | /// let mut ch = ch as u8; | |
338 | /// let code = match ch { | |
339 | /// b'A'...b'Z' => ch - 0x41, | |
340 | /// b'a'...b'z' => ch - 0x47, | |
341 | /// b'0'...b'9' => ch + 0x04, | |
342 | /// b'+' | b'-' => 0x3E, | |
343 | /// b'/' | b'_' => 0x3F, | |
344 | /// b'=' => 0xFE, | |
345 | /// b'\r' | b'\n' => 0xFD, | |
346 | /// _ => 0xFF, | |
347 | /// }; | |
348 | /// print!("0x{:02X}, ", code); | |
349 | /// if ch % 16 == 15 { println!(""); } | |
350 | /// else if ch == 0xFF { break; } | |
351 | /// ch += 1; | |
476ff2be | 352 | /// } |
7cac9316 | 353 | /// println!(""); |
476ff2be SL |
354 | /// ``` |
355 | const DECODE_TABLE: [u8; 256] = [ | |
356 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, 0xFD, 0xFF, 0xFF, | |
357 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
358 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0x3E, 0xFF, 0x3F, | |
359 | 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, | |
360 | 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, | |
361 | 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0x3F, | |
362 | 0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, | |
363 | 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30, 0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
364 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
365 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
366 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
367 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
368 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
369 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
370 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
371 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, | |
372 | ]; | |
373 | const INVALID_CODE: u8 = 0xFF; | |
374 | const EQUALS_CODE: u8 = 0xFE; | |
375 | const NEWLINE_CODE: u8 = 0xFD; | |
376 | const SPECIAL_CODES_START: u8 = NEWLINE_CODE; | |
377 | ||
378 | #[cfg(test)] | |
379 | mod tests { | |
380 | use base64::{Config, Newline, FromBase64, ToBase64, STANDARD, URL_SAFE}; | |
381 | ||
382 | #[test] | |
383 | fn test_to_base64_basic() { | |
384 | assert_eq!("".as_bytes().to_base64(STANDARD), ""); | |
385 | assert_eq!("f".as_bytes().to_base64(STANDARD), "Zg=="); | |
386 | assert_eq!("fo".as_bytes().to_base64(STANDARD), "Zm8="); | |
387 | assert_eq!("foo".as_bytes().to_base64(STANDARD), "Zm9v"); | |
388 | assert_eq!("foob".as_bytes().to_base64(STANDARD), "Zm9vYg=="); | |
389 | assert_eq!("fooba".as_bytes().to_base64(STANDARD), "Zm9vYmE="); | |
390 | assert_eq!("foobar".as_bytes().to_base64(STANDARD), "Zm9vYmFy"); | |
391 | } | |
392 | ||
393 | #[test] | |
394 | fn test_to_base64_crlf_line_break() { | |
7cac9316 | 395 | assert!(![0; 1000].to_base64(Config {line_length: None, ..STANDARD}) |
476ff2be SL |
396 | .contains("\r\n")); |
397 | assert_eq!(b"foobar".to_base64(Config {line_length: Some(4), | |
398 | ..STANDARD}), | |
399 | "Zm9v\r\nYmFy"); | |
400 | } | |
401 | ||
402 | #[test] | |
403 | fn test_to_base64_lf_line_break() { | |
7cac9316 | 404 | assert!(![0; 1000].to_base64(Config {line_length: None, |
476ff2be SL |
405 | newline: Newline::LF, |
406 | ..STANDARD}) | |
407 | .contains("\n")); | |
408 | assert_eq!(b"foobar".to_base64(Config {line_length: Some(4), | |
409 | newline: Newline::LF, | |
410 | ..STANDARD}), | |
411 | "Zm9v\nYmFy"); | |
412 | } | |
413 | ||
414 | #[test] | |
415 | fn test_to_base64_padding() { | |
416 | assert_eq!("f".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zg"); | |
417 | assert_eq!("fo".as_bytes().to_base64(Config {pad: false, ..STANDARD}), "Zm8"); | |
418 | } | |
419 | ||
420 | #[test] | |
421 | fn test_to_base64_url_safe() { | |
422 | assert_eq!([251, 255].to_base64(URL_SAFE), "-_8"); | |
423 | assert_eq!([251, 255].to_base64(STANDARD), "+/8="); | |
424 | } | |
425 | ||
426 | #[test] | |
427 | fn test_to_base64_empty_line_length() { | |
428 | [].to_base64(Config {line_length: Some(72), ..STANDARD}); | |
429 | } | |
430 | ||
431 | #[test] | |
432 | fn test_from_base64_basic() { | |
433 | assert_eq!("".from_base64().unwrap(), b""); | |
434 | assert_eq!("Zg==".from_base64().unwrap(), b"f"); | |
435 | assert_eq!("Zm8=".from_base64().unwrap(), b"fo"); | |
436 | assert_eq!("Zm9v".from_base64().unwrap(), b"foo"); | |
437 | assert_eq!("Zm9vYg==".from_base64().unwrap(), b"foob"); | |
438 | assert_eq!("Zm9vYmE=".from_base64().unwrap(), b"fooba"); | |
439 | assert_eq!("Zm9vYmFy".from_base64().unwrap(), b"foobar"); | |
440 | } | |
441 | ||
442 | #[test] | |
443 | fn test_from_base64_bytes() { | |
444 | assert_eq!(b"Zm9vYmFy".from_base64().unwrap(), b"foobar"); | |
445 | } | |
446 | ||
447 | #[test] | |
448 | fn test_from_base64_newlines() { | |
449 | assert_eq!("Zm9v\r\nYmFy".from_base64().unwrap(), | |
450 | b"foobar"); | |
451 | assert_eq!("Zm9vYg==\r\n".from_base64().unwrap(), | |
452 | b"foob"); | |
453 | assert_eq!("Zm9v\nYmFy".from_base64().unwrap(), | |
454 | b"foobar"); | |
455 | assert_eq!("Zm9vYg==\n".from_base64().unwrap(), | |
456 | b"foob"); | |
457 | } | |
458 | ||
459 | #[test] | |
460 | fn test_from_base64_urlsafe() { | |
461 | assert_eq!("-_8".from_base64().unwrap(), "+/8=".from_base64().unwrap()); | |
462 | } | |
463 | ||
464 | #[test] | |
465 | fn test_from_base64_invalid_char() { | |
466 | assert!("Zm$=".from_base64().is_err()); | |
467 | assert!("Zg==$".from_base64().is_err()); | |
468 | } | |
469 | ||
470 | #[test] | |
471 | fn test_from_base64_invalid_padding() { | |
472 | assert!("Z===".from_base64().is_err()); | |
473 | } | |
474 | ||
475 | #[test] | |
476 | fn test_base64_random() { | |
477 | use rand::{thread_rng, Rng}; | |
478 | ||
479 | for _ in 0..1000 { | |
480 | let times = thread_rng().gen_range(1, 100); | |
481 | let v = thread_rng().gen_iter::<u8>().take(times) | |
482 | .collect::<Vec<_>>(); | |
483 | assert_eq!(v.to_base64(STANDARD) | |
484 | .from_base64() | |
485 | .unwrap(), | |
486 | v); | |
487 | } | |
488 | } | |
489 | } |