]>
Commit | Line | Data |
---|---|---|
041b39d2 XL |
1 | // Copyright 2017 Serde Developers |
2 | // | |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
6 | // option. This file may not be copied, modified, or distributed | |
7 | // except according to those terms. | |
8 | ||
041b39d2 | 9 | use std::ops::Deref; |
83c7162d | 10 | use std::{char, cmp, io, str}; |
7cac9316 | 11 | |
0bf4aa26 XL |
12 | #[cfg(feature = "raw_value")] |
13 | use serde::de::Visitor; | |
14 | ||
041b39d2 | 15 | use iter::LineColIterator; |
7cac9316 | 16 | |
0bf4aa26 XL |
17 | use error::{Error, ErrorCode, Result}; |
18 | ||
19 | #[cfg(feature = "raw_value")] | |
20 | use raw::{BorrowedRawDeserializer, OwnedRawDeserializer}; | |
7cac9316 XL |
21 | |
22 | /// Trait used by the deserializer for iterating over input. This is manually | |
23 | /// "specialized" for iterating over &[u8]. Once feature(specialization) is | |
24 | /// stable we can use actual specialization. | |
25 | /// | |
26 | /// This trait is sealed and cannot be implemented for types outside of | |
27 | /// `serde_json`. | |
041b39d2 | 28 | pub trait Read<'de>: private::Sealed { |
7cac9316 | 29 | #[doc(hidden)] |
0731742a | 30 | fn next(&mut self) -> Result<Option<u8>>; |
7cac9316 | 31 | #[doc(hidden)] |
0731742a | 32 | fn peek(&mut self) -> Result<Option<u8>>; |
7cac9316 XL |
33 | |
34 | /// Only valid after a call to peek(). Discards the peeked byte. | |
35 | #[doc(hidden)] | |
36 | fn discard(&mut self); | |
37 | ||
38 | /// Position of the most recent call to next(). | |
39 | /// | |
40 | /// The most recent call was probably next() and not peek(), but this method | |
41 | /// should try to return a sensible result if the most recent call was | |
42 | /// actually peek() because we don't always know. | |
43 | /// | |
44 | /// Only called in case of an error, so performance is not important. | |
45 | #[doc(hidden)] | |
46 | fn position(&self) -> Position; | |
47 | ||
48 | /// Position of the most recent call to peek(). | |
49 | /// | |
50 | /// The most recent call was probably peek() and not next(), but this method | |
51 | /// should try to return a sensible result if the most recent call was | |
52 | /// actually next() because we don't always know. | |
53 | /// | |
54 | /// Only called in case of an error, so performance is not important. | |
55 | #[doc(hidden)] | |
56 | fn peek_position(&self) -> Position; | |
57 | ||
041b39d2 XL |
58 | /// Offset from the beginning of the input to the next byte that would be |
59 | /// returned by next() or peek(). | |
60 | #[doc(hidden)] | |
61 | fn byte_offset(&self) -> usize; | |
62 | ||
7cac9316 XL |
63 | /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped |
64 | /// string until the next quotation mark using the given scratch space if | |
65 | /// necessary. The scratch space is initially empty. | |
66 | #[doc(hidden)] | |
041b39d2 | 67 | fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>; |
7cac9316 XL |
68 | |
69 | /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped | |
70 | /// string until the next quotation mark using the given scratch space if | |
71 | /// necessary. The scratch space is initially empty. | |
72 | /// | |
73 | /// This function returns the raw bytes in the string with escape sequences | |
74 | /// expanded but without performing unicode validation. | |
75 | #[doc(hidden)] | |
76 | fn parse_str_raw<'s>( | |
77 | &'s mut self, | |
041b39d2 XL |
78 | scratch: &'s mut Vec<u8>, |
79 | ) -> Result<Reference<'de, 's, [u8]>>; | |
ea8adc8c XL |
80 | |
81 | /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped | |
82 | /// string until the next quotation mark but discards the data. | |
83 | #[doc(hidden)] | |
84 | fn ignore_str(&mut self) -> Result<()>; | |
0bf4aa26 XL |
85 | |
86 | /// Assumes the previous byte was a hex escape sequnce ('\u') in a string. | |
87 | /// Parses next hexadecimal sequence. | |
88 | #[doc(hidden)] | |
89 | fn decode_hex_escape(&mut self) -> Result<u16>; | |
90 | ||
91 | /// Switch raw buffering mode on. | |
92 | /// | |
93 | /// This is used when deserializing `RawValue`. | |
94 | #[cfg(feature = "raw_value")] | |
95 | #[doc(hidden)] | |
96 | fn begin_raw_buffering(&mut self); | |
97 | ||
98 | /// Switch raw buffering mode off and provides the raw buffered data to the | |
99 | /// given visitor. | |
100 | #[cfg(feature = "raw_value")] | |
101 | #[doc(hidden)] | |
102 | fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> | |
103 | where | |
104 | V: Visitor<'de>; | |
7cac9316 XL |
105 | } |
106 | ||
107 | pub struct Position { | |
108 | pub line: usize, | |
109 | pub column: usize, | |
110 | } | |
111 | ||
041b39d2 XL |
112 | pub enum Reference<'b, 'c, T: ?Sized + 'static> { |
113 | Borrowed(&'b T), | |
114 | Copied(&'c T), | |
115 | } | |
116 | ||
117 | impl<'b, 'c, T: ?Sized + 'static> Deref for Reference<'b, 'c, T> { | |
118 | type Target = T; | |
119 | ||
120 | fn deref(&self) -> &Self::Target { | |
121 | match *self { | |
122 | Reference::Borrowed(b) => b, | |
123 | Reference::Copied(c) => c, | |
124 | } | |
125 | } | |
7cac9316 XL |
126 | } |
127 | ||
128 | /// JSON input source that reads from a std::io input stream. | |
129 | pub struct IoRead<R> | |
041b39d2 XL |
130 | where |
131 | R: io::Read, | |
7cac9316 | 132 | { |
041b39d2 XL |
133 | iter: LineColIterator<io::Bytes<R>>, |
134 | /// Temporary storage of peeked byte. | |
135 | ch: Option<u8>, | |
0bf4aa26 XL |
136 | #[cfg(feature = "raw_value")] |
137 | raw_buffer: Option<Vec<u8>>, | |
7cac9316 XL |
138 | } |
139 | ||
140 | /// JSON input source that reads from a slice of bytes. | |
141 | // | |
142 | // This is more efficient than other iterators because peek() can be read-only | |
143 | // and we can compute line/col position only if an error happens. | |
144 | pub struct SliceRead<'a> { | |
145 | slice: &'a [u8], | |
146 | /// Index of the *next* byte that will be returned by next() or peek(). | |
147 | index: usize, | |
0bf4aa26 XL |
148 | #[cfg(feature = "raw_value")] |
149 | raw_buffering_start_index: usize, | |
7cac9316 XL |
150 | } |
151 | ||
152 | /// JSON input source that reads from a UTF-8 string. | |
153 | // | |
154 | // Able to elide UTF-8 checks by assuming that the input is valid UTF-8. | |
155 | pub struct StrRead<'a> { | |
156 | delegate: SliceRead<'a>, | |
0bf4aa26 XL |
157 | #[cfg(feature = "raw_value")] |
158 | data: &'a str, | |
7cac9316 XL |
159 | } |
160 | ||
161 | // Prevent users from implementing the Read trait. | |
162 | mod private { | |
163 | pub trait Sealed {} | |
164 | } | |
165 | ||
166 | ////////////////////////////////////////////////////////////////////////////// | |
167 | ||
041b39d2 XL |
168 | impl<R> IoRead<R> |
169 | where | |
170 | R: io::Read, | |
7cac9316 | 171 | { |
041b39d2 XL |
172 | /// Create a JSON input source to read from a std::io input stream. |
173 | pub fn new(reader: R) -> Self { | |
0bf4aa26 XL |
174 | #[cfg(not(feature = "raw_value"))] |
175 | { | |
176 | IoRead { | |
177 | iter: LineColIterator::new(reader.bytes()), | |
178 | ch: None, | |
179 | } | |
180 | } | |
181 | #[cfg(feature = "raw_value")] | |
182 | { | |
183 | IoRead { | |
184 | iter: LineColIterator::new(reader.bytes()), | |
185 | ch: None, | |
186 | raw_buffer: None, | |
187 | } | |
7cac9316 XL |
188 | } |
189 | } | |
190 | } | |
191 | ||
8faf50e0 | 192 | impl<R> private::Sealed for IoRead<R> where R: io::Read {} |
7cac9316 | 193 | |
041b39d2 XL |
194 | impl<R> IoRead<R> |
195 | where | |
196 | R: io::Read, | |
7cac9316 XL |
197 | { |
198 | fn parse_str_bytes<'s, T, F>( | |
199 | &'s mut self, | |
200 | scratch: &'s mut Vec<u8>, | |
201 | validate: bool, | |
041b39d2 | 202 | result: F, |
7cac9316 | 203 | ) -> Result<T> |
041b39d2 XL |
204 | where |
205 | T: 's, | |
206 | F: FnOnce(&'s Self, &'s [u8]) -> Result<T>, | |
7cac9316 XL |
207 | { |
208 | loop { | |
209 | let ch = try!(next_or_eof(self)); | |
210 | if !ESCAPE[ch as usize] { | |
211 | scratch.push(ch); | |
212 | continue; | |
213 | } | |
214 | match ch { | |
215 | b'"' => { | |
216 | return result(self, scratch); | |
217 | } | |
218 | b'\\' => { | |
219 | try!(parse_escape(self, scratch)); | |
220 | } | |
221 | _ => { | |
222 | if validate { | |
0531ce1d | 223 | return error(self, ErrorCode::ControlCharacterWhileParsingString); |
7cac9316 XL |
224 | } |
225 | scratch.push(ch); | |
226 | } | |
227 | } | |
228 | } | |
229 | } | |
230 | } | |
231 | ||
041b39d2 XL |
232 | impl<'de, R> Read<'de> for IoRead<R> |
233 | where | |
234 | R: io::Read, | |
7cac9316 XL |
235 | { |
236 | #[inline] | |
0731742a | 237 | fn next(&mut self) -> Result<Option<u8>> { |
7cac9316 | 238 | match self.ch.take() { |
0bf4aa26 XL |
239 | Some(ch) => { |
240 | #[cfg(feature = "raw_value")] | |
241 | { | |
242 | if let Some(ref mut buf) = self.raw_buffer { | |
243 | buf.push(ch); | |
244 | } | |
245 | } | |
246 | Ok(Some(ch)) | |
247 | } | |
83c7162d | 248 | None => match self.iter.next() { |
0731742a | 249 | Some(Err(err)) => Err(Error::io(err)), |
0bf4aa26 XL |
250 | Some(Ok(ch)) => { |
251 | #[cfg(feature = "raw_value")] | |
252 | { | |
253 | if let Some(ref mut buf) = self.raw_buffer { | |
254 | buf.push(ch); | |
255 | } | |
256 | } | |
257 | Ok(Some(ch)) | |
258 | } | |
83c7162d XL |
259 | None => Ok(None), |
260 | }, | |
7cac9316 XL |
261 | } |
262 | } | |
263 | ||
264 | #[inline] | |
0731742a | 265 | fn peek(&mut self) -> Result<Option<u8>> { |
7cac9316 XL |
266 | match self.ch { |
267 | Some(ch) => Ok(Some(ch)), | |
83c7162d | 268 | None => match self.iter.next() { |
0731742a | 269 | Some(Err(err)) => Err(Error::io(err)), |
83c7162d XL |
270 | Some(Ok(ch)) => { |
271 | self.ch = Some(ch); | |
272 | Ok(self.ch) | |
7cac9316 | 273 | } |
83c7162d XL |
274 | None => Ok(None), |
275 | }, | |
7cac9316 XL |
276 | } |
277 | } | |
278 | ||
0bf4aa26 | 279 | #[cfg(not(feature = "raw_value"))] |
7cac9316 XL |
280 | #[inline] |
281 | fn discard(&mut self) { | |
282 | self.ch = None; | |
283 | } | |
284 | ||
0bf4aa26 XL |
285 | #[cfg(feature = "raw_value")] |
286 | fn discard(&mut self) { | |
287 | if let Some(ch) = self.ch.take() { | |
288 | if let Some(ref mut buf) = self.raw_buffer { | |
289 | buf.push(ch); | |
290 | } | |
291 | } | |
292 | } | |
293 | ||
7cac9316 XL |
294 | fn position(&self) -> Position { |
295 | Position { | |
296 | line: self.iter.line(), | |
297 | column: self.iter.col(), | |
298 | } | |
299 | } | |
300 | ||
301 | fn peek_position(&self) -> Position { | |
302 | // The LineColIterator updates its position during peek() so it has the | |
303 | // right one here. | |
304 | self.position() | |
305 | } | |
306 | ||
041b39d2 XL |
307 | fn byte_offset(&self) -> usize { |
308 | match self.ch { | |
309 | Some(_) => self.iter.byte_offset() - 1, | |
310 | None => self.iter.byte_offset(), | |
7cac9316 XL |
311 | } |
312 | } | |
7cac9316 | 313 | |
041b39d2 XL |
314 | fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> { |
315 | self.parse_str_bytes(scratch, true, as_str) | |
316 | .map(Reference::Copied) | |
7cac9316 XL |
317 | } |
318 | ||
7cac9316 XL |
319 | fn parse_str_raw<'s>( |
320 | &'s mut self, | |
041b39d2 XL |
321 | scratch: &'s mut Vec<u8>, |
322 | ) -> Result<Reference<'de, 's, [u8]>> { | |
323 | self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes)) | |
324 | .map(Reference::Copied) | |
7cac9316 | 325 | } |
ea8adc8c XL |
326 | |
327 | fn ignore_str(&mut self) -> Result<()> { | |
328 | loop { | |
329 | let ch = try!(next_or_eof(self)); | |
330 | if !ESCAPE[ch as usize] { | |
331 | continue; | |
332 | } | |
333 | match ch { | |
334 | b'"' => { | |
335 | return Ok(()); | |
336 | } | |
337 | b'\\' => { | |
338 | try!(ignore_escape(self)); | |
339 | } | |
340 | _ => { | |
0531ce1d | 341 | return error(self, ErrorCode::ControlCharacterWhileParsingString); |
ea8adc8c XL |
342 | } |
343 | } | |
344 | } | |
345 | } | |
0bf4aa26 XL |
346 | |
347 | fn decode_hex_escape(&mut self) -> Result<u16> { | |
348 | let mut n = 0; | |
349 | for _ in 0..4 { | |
350 | match decode_hex_val(try!(next_or_eof(self))) { | |
351 | None => return error(self, ErrorCode::InvalidEscape), | |
352 | Some(val) => { | |
353 | n = (n << 4) + val; | |
354 | } | |
355 | } | |
356 | } | |
357 | Ok(n) | |
358 | } | |
359 | ||
360 | #[cfg(feature = "raw_value")] | |
361 | fn begin_raw_buffering(&mut self) { | |
362 | self.raw_buffer = Some(Vec::new()); | |
363 | } | |
364 | ||
365 | #[cfg(feature = "raw_value")] | |
366 | fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> | |
367 | where | |
368 | V: Visitor<'de>, | |
369 | { | |
370 | let raw = self.raw_buffer.take().unwrap(); | |
371 | let raw = String::from_utf8(raw).unwrap(); | |
0731742a XL |
372 | visitor.visit_map(OwnedRawDeserializer { |
373 | raw_value: Some(raw), | |
374 | }) | |
0bf4aa26 | 375 | } |
7cac9316 XL |
376 | } |
377 | ||
378 | ////////////////////////////////////////////////////////////////////////////// | |
379 | ||
380 | impl<'a> SliceRead<'a> { | |
381 | /// Create a JSON input source to read from a slice of bytes. | |
382 | pub fn new(slice: &'a [u8]) -> Self { | |
0bf4aa26 XL |
383 | #[cfg(not(feature = "raw_value"))] |
384 | { | |
385 | SliceRead { | |
386 | slice: slice, | |
387 | index: 0, | |
388 | } | |
389 | } | |
390 | #[cfg(feature = "raw_value")] | |
391 | { | |
392 | SliceRead { | |
393 | slice: slice, | |
394 | index: 0, | |
395 | raw_buffering_start_index: 0, | |
396 | } | |
7cac9316 XL |
397 | } |
398 | } | |
399 | ||
400 | fn position_of_index(&self, i: usize) -> Position { | |
83c7162d | 401 | let mut position = Position { line: 1, column: 0 }; |
7cac9316 XL |
402 | for ch in &self.slice[..i] { |
403 | match *ch { | |
404 | b'\n' => { | |
83c7162d XL |
405 | position.line += 1; |
406 | position.column = 0; | |
7cac9316 XL |
407 | } |
408 | _ => { | |
83c7162d | 409 | position.column += 1; |
7cac9316 XL |
410 | } |
411 | } | |
412 | } | |
83c7162d | 413 | position |
7cac9316 XL |
414 | } |
415 | ||
041b39d2 XL |
416 | /// The big optimization here over IoRead is that if the string contains no |
417 | /// backslash escape sequences, the returned &str is a slice of the raw JSON | |
418 | /// data so we avoid copying into the scratch space. | |
419 | fn parse_str_bytes<'s, T: ?Sized, F>( | |
7cac9316 XL |
420 | &'s mut self, |
421 | scratch: &'s mut Vec<u8>, | |
422 | validate: bool, | |
041b39d2 XL |
423 | result: F, |
424 | ) -> Result<Reference<'a, 's, T>> | |
425 | where | |
426 | T: 's, | |
427 | F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>, | |
7cac9316 XL |
428 | { |
429 | // Index of the first byte not yet copied into the scratch space. | |
430 | let mut start = self.index; | |
431 | ||
432 | loop { | |
041b39d2 | 433 | while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] { |
7cac9316 XL |
434 | self.index += 1; |
435 | } | |
436 | if self.index == self.slice.len() { | |
437 | return error(self, ErrorCode::EofWhileParsingString); | |
438 | } | |
439 | match self.slice[self.index] { | |
440 | b'"' => { | |
041b39d2 | 441 | if scratch.is_empty() { |
7cac9316 XL |
442 | // Fast path: return a slice of the raw JSON without any |
443 | // copying. | |
041b39d2 XL |
444 | let borrowed = &self.slice[start..self.index]; |
445 | self.index += 1; | |
446 | return result(self, borrowed).map(Reference::Borrowed); | |
7cac9316 | 447 | } else { |
041b39d2 | 448 | scratch.extend_from_slice(&self.slice[start..self.index]); |
041b39d2 | 449 | self.index += 1; |
8faf50e0 | 450 | return result(self, scratch).map(Reference::Copied); |
041b39d2 | 451 | } |
7cac9316 XL |
452 | } |
453 | b'\\' => { | |
454 | scratch.extend_from_slice(&self.slice[start..self.index]); | |
455 | self.index += 1; | |
456 | try!(parse_escape(self, scratch)); | |
457 | start = self.index; | |
458 | } | |
459 | _ => { | |
0731742a | 460 | self.index += 1; |
7cac9316 | 461 | if validate { |
0531ce1d | 462 | return error(self, ErrorCode::ControlCharacterWhileParsingString); |
7cac9316 | 463 | } |
7cac9316 XL |
464 | } |
465 | } | |
466 | } | |
467 | } | |
468 | } | |
469 | ||
470 | impl<'a> private::Sealed for SliceRead<'a> {} | |
471 | ||
041b39d2 | 472 | impl<'a> Read<'a> for SliceRead<'a> { |
7cac9316 | 473 | #[inline] |
0731742a | 474 | fn next(&mut self) -> Result<Option<u8>> { |
7cac9316 XL |
475 | // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))` |
476 | // is about 10% slower. | |
83c7162d XL |
477 | Ok(if self.index < self.slice.len() { |
478 | let ch = self.slice[self.index]; | |
479 | self.index += 1; | |
480 | Some(ch) | |
481 | } else { | |
482 | None | |
483 | }) | |
7cac9316 XL |
484 | } |
485 | ||
486 | #[inline] | |
0731742a | 487 | fn peek(&mut self) -> Result<Option<u8>> { |
7cac9316 XL |
488 | // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower |
489 | // for some reason. | |
83c7162d XL |
490 | Ok(if self.index < self.slice.len() { |
491 | Some(self.slice[self.index]) | |
492 | } else { | |
493 | None | |
494 | }) | |
7cac9316 XL |
495 | } |
496 | ||
497 | #[inline] | |
498 | fn discard(&mut self) { | |
499 | self.index += 1; | |
500 | } | |
501 | ||
502 | fn position(&self) -> Position { | |
503 | self.position_of_index(self.index) | |
504 | } | |
505 | ||
506 | fn peek_position(&self) -> Position { | |
507 | // Cap it at slice.len() just in case the most recent call was next() | |
508 | // and it returned the last byte. | |
509 | self.position_of_index(cmp::min(self.slice.len(), self.index + 1)) | |
510 | } | |
511 | ||
041b39d2 XL |
512 | fn byte_offset(&self) -> usize { |
513 | self.index | |
514 | } | |
515 | ||
516 | fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> { | |
7cac9316 XL |
517 | self.parse_str_bytes(scratch, true, as_str) |
518 | } | |
519 | ||
520 | fn parse_str_raw<'s>( | |
521 | &'s mut self, | |
041b39d2 XL |
522 | scratch: &'s mut Vec<u8>, |
523 | ) -> Result<Reference<'a, 's, [u8]>> { | |
7cac9316 XL |
524 | self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes)) |
525 | } | |
ea8adc8c XL |
526 | |
527 | fn ignore_str(&mut self) -> Result<()> { | |
528 | loop { | |
529 | while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] { | |
530 | self.index += 1; | |
531 | } | |
532 | if self.index == self.slice.len() { | |
533 | return error(self, ErrorCode::EofWhileParsingString); | |
534 | } | |
535 | match self.slice[self.index] { | |
536 | b'"' => { | |
537 | self.index += 1; | |
538 | return Ok(()); | |
539 | } | |
540 | b'\\' => { | |
541 | self.index += 1; | |
542 | try!(ignore_escape(self)); | |
543 | } | |
544 | _ => { | |
0531ce1d | 545 | return error(self, ErrorCode::ControlCharacterWhileParsingString); |
ea8adc8c XL |
546 | } |
547 | } | |
548 | } | |
549 | } | |
0bf4aa26 XL |
550 | |
551 | fn decode_hex_escape(&mut self) -> Result<u16> { | |
552 | if self.index + 4 > self.slice.len() { | |
0731742a | 553 | self.index = self.slice.len(); |
0bf4aa26 XL |
554 | return error(self, ErrorCode::EofWhileParsingString); |
555 | } | |
0731742a | 556 | |
0bf4aa26 XL |
557 | let mut n = 0; |
558 | for _ in 0..4 { | |
0731742a XL |
559 | let ch = decode_hex_val(self.slice[self.index]); |
560 | self.index += 1; | |
561 | match ch { | |
0bf4aa26 XL |
562 | None => return error(self, ErrorCode::InvalidEscape), |
563 | Some(val) => { | |
564 | n = (n << 4) + val; | |
565 | } | |
566 | } | |
0bf4aa26 XL |
567 | } |
568 | Ok(n) | |
569 | } | |
570 | ||
571 | #[cfg(feature = "raw_value")] | |
572 | fn begin_raw_buffering(&mut self) { | |
573 | self.raw_buffering_start_index = self.index; | |
574 | } | |
575 | ||
576 | #[cfg(feature = "raw_value")] | |
577 | fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> | |
578 | where | |
579 | V: Visitor<'a>, | |
580 | { | |
581 | let raw = &self.slice[self.raw_buffering_start_index..self.index]; | |
582 | let raw = str::from_utf8(raw).unwrap(); | |
0731742a XL |
583 | visitor.visit_map(BorrowedRawDeserializer { |
584 | raw_value: Some(raw), | |
585 | }) | |
0bf4aa26 | 586 | } |
7cac9316 XL |
587 | } |
588 | ||
589 | ////////////////////////////////////////////////////////////////////////////// | |
590 | ||
591 | impl<'a> StrRead<'a> { | |
592 | /// Create a JSON input source to read from a UTF-8 string. | |
593 | pub fn new(s: &'a str) -> Self { | |
0bf4aa26 XL |
594 | #[cfg(not(feature = "raw_value"))] |
595 | { | |
596 | StrRead { | |
597 | delegate: SliceRead::new(s.as_bytes()), | |
598 | } | |
599 | } | |
600 | #[cfg(feature = "raw_value")] | |
601 | { | |
602 | StrRead { | |
603 | delegate: SliceRead::new(s.as_bytes()), | |
604 | data: s, | |
605 | } | |
83c7162d | 606 | } |
7cac9316 XL |
607 | } |
608 | } | |
609 | ||
610 | impl<'a> private::Sealed for StrRead<'a> {} | |
611 | ||
041b39d2 | 612 | impl<'a> Read<'a> for StrRead<'a> { |
7cac9316 | 613 | #[inline] |
0731742a | 614 | fn next(&mut self) -> Result<Option<u8>> { |
7cac9316 XL |
615 | self.delegate.next() |
616 | } | |
617 | ||
618 | #[inline] | |
0731742a | 619 | fn peek(&mut self) -> Result<Option<u8>> { |
7cac9316 XL |
620 | self.delegate.peek() |
621 | } | |
622 | ||
623 | #[inline] | |
624 | fn discard(&mut self) { | |
625 | self.delegate.discard(); | |
626 | } | |
627 | ||
628 | fn position(&self) -> Position { | |
629 | self.delegate.position() | |
630 | } | |
631 | ||
632 | fn peek_position(&self) -> Position { | |
633 | self.delegate.peek_position() | |
634 | } | |
635 | ||
041b39d2 XL |
636 | fn byte_offset(&self) -> usize { |
637 | self.delegate.byte_offset() | |
638 | } | |
639 | ||
640 | fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> { | |
83c7162d XL |
641 | self.delegate.parse_str_bytes(scratch, true, |_, bytes| { |
642 | // The input is assumed to be valid UTF-8 and the \u-escapes are | |
643 | // checked along the way, so don't need to check here. | |
644 | Ok(unsafe { str::from_utf8_unchecked(bytes) }) | |
645 | }) | |
7cac9316 XL |
646 | } |
647 | ||
648 | fn parse_str_raw<'s>( | |
649 | &'s mut self, | |
041b39d2 XL |
650 | scratch: &'s mut Vec<u8>, |
651 | ) -> Result<Reference<'a, 's, [u8]>> { | |
7cac9316 XL |
652 | self.delegate.parse_str_raw(scratch) |
653 | } | |
ea8adc8c XL |
654 | |
655 | fn ignore_str(&mut self) -> Result<()> { | |
656 | self.delegate.ignore_str() | |
657 | } | |
0bf4aa26 XL |
658 | |
659 | fn decode_hex_escape(&mut self) -> Result<u16> { | |
660 | self.delegate.decode_hex_escape() | |
661 | } | |
662 | ||
663 | #[cfg(feature = "raw_value")] | |
664 | fn begin_raw_buffering(&mut self) { | |
665 | self.delegate.begin_raw_buffering() | |
666 | } | |
667 | ||
668 | #[cfg(feature = "raw_value")] | |
669 | fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value> | |
670 | where | |
671 | V: Visitor<'a>, | |
672 | { | |
673 | let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index]; | |
0731742a XL |
674 | visitor.visit_map(BorrowedRawDeserializer { |
675 | raw_value: Some(raw), | |
676 | }) | |
0bf4aa26 | 677 | } |
7cac9316 XL |
678 | } |
679 | ||
680 | ////////////////////////////////////////////////////////////////////////////// | |
681 | ||
682 | const CT: bool = true; // control character \x00...\x1F | |
683 | const QU: bool = true; // quote \x22 | |
684 | const BS: bool = true; // backslash \x5C | |
685 | const O: bool = false; // allow unescaped | |
686 | ||
687 | // Lookup table of bytes that must be escaped. A value of true at index i means | |
688 | // that byte i requires an escape sequence in the input. | |
689 | #[cfg_attr(rustfmt, rustfmt_skip)] | |
690 | static ESCAPE: [bool; 256] = [ | |
691 | // 1 2 3 4 5 6 7 8 9 A B C D E F | |
692 | CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 0 | |
693 | CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1 | |
694 | O, O, QU, O, O, O, O, O, O, O, O, O, O, O, O, O, // 2 | |
695 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // 3 | |
696 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // 4 | |
697 | O, O, O, O, O, O, O, O, O, O, O, O, BS, O, O, O, // 5 | |
698 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // 6 | |
699 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // 7 | |
700 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // 8 | |
701 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // 9 | |
702 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // A | |
703 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // B | |
704 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // C | |
705 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // D | |
706 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // E | |
707 | O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, // F | |
708 | ]; | |
709 | ||
ea8adc8c | 710 | fn next_or_eof<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result<u8> { |
0731742a | 711 | match try!(read.next()) { |
7cac9316 XL |
712 | Some(b) => Ok(b), |
713 | None => error(read, ErrorCode::EofWhileParsingString), | |
714 | } | |
715 | } | |
716 | ||
ea8adc8c | 717 | fn error<'de, R: ?Sized + Read<'de>, T>(read: &R, reason: ErrorCode) -> Result<T> { |
83c7162d XL |
718 | let position = read.position(); |
719 | Err(Error::syntax(reason, position.line, position.column)) | |
7cac9316 XL |
720 | } |
721 | ||
041b39d2 XL |
722 | fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> { |
723 | str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint)) | |
7cac9316 XL |
724 | } |
725 | ||
726 | /// Parses a JSON escape sequence and appends it into the scratch space. Assumes | |
727 | /// the previous byte read was a backslash. | |
041b39d2 | 728 | fn parse_escape<'de, R: Read<'de>>(read: &mut R, scratch: &mut Vec<u8>) -> Result<()> { |
7cac9316 XL |
729 | let ch = try!(next_or_eof(read)); |
730 | ||
731 | match ch { | |
732 | b'"' => scratch.push(b'"'), | |
733 | b'\\' => scratch.push(b'\\'), | |
734 | b'/' => scratch.push(b'/'), | |
735 | b'b' => scratch.push(b'\x08'), | |
736 | b'f' => scratch.push(b'\x0c'), | |
737 | b'n' => scratch.push(b'\n'), | |
738 | b'r' => scratch.push(b'\r'), | |
739 | b't' => scratch.push(b'\t'), | |
740 | b'u' => { | |
0bf4aa26 | 741 | let c = match try!(read.decode_hex_escape()) { |
041b39d2 XL |
742 | 0xDC00...0xDFFF => { |
743 | return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape); | |
744 | } | |
7cac9316 | 745 | |
041b39d2 XL |
746 | // Non-BMP characters are encoded as a sequence of |
747 | // two hex escapes, representing UTF-16 surrogates. | |
748 | n1 @ 0xD800...0xDBFF => { | |
749 | if try!(next_or_eof(read)) != b'\\' { | |
750 | return error(read, ErrorCode::UnexpectedEndOfHexEscape); | |
751 | } | |
752 | if try!(next_or_eof(read)) != b'u' { | |
753 | return error(read, ErrorCode::UnexpectedEndOfHexEscape); | |
754 | } | |
7cac9316 | 755 | |
0bf4aa26 | 756 | let n2 = try!(read.decode_hex_escape()); |
7cac9316 | 757 | |
041b39d2 XL |
758 | if n2 < 0xDC00 || n2 > 0xDFFF { |
759 | return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape); | |
760 | } | |
7cac9316 | 761 | |
041b39d2 | 762 | let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; |
7cac9316 | 763 | |
ea8adc8c | 764 | match char::from_u32(n) { |
041b39d2 XL |
765 | Some(c) => c, |
766 | None => { | |
767 | return error(read, ErrorCode::InvalidUnicodeCodePoint); | |
7cac9316 XL |
768 | } |
769 | } | |
041b39d2 | 770 | } |
7cac9316 | 771 | |
83c7162d XL |
772 | n => match char::from_u32(n as u32) { |
773 | Some(c) => c, | |
774 | None => { | |
775 | return error(read, ErrorCode::InvalidUnicodeCodePoint); | |
7cac9316 | 776 | } |
83c7162d | 777 | }, |
041b39d2 | 778 | }; |
7cac9316 | 779 | |
8faf50e0 | 780 | scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes()); |
7cac9316 XL |
781 | } |
782 | _ => { | |
783 | return error(read, ErrorCode::InvalidEscape); | |
784 | } | |
785 | } | |
786 | ||
787 | Ok(()) | |
788 | } | |
789 | ||
ea8adc8c XL |
790 | /// Parses a JSON escape sequence and discards the value. Assumes the previous |
791 | /// byte read was a backslash. | |
792 | fn ignore_escape<'de, R: ?Sized + Read<'de>>(read: &mut R) -> Result<()> { | |
793 | let ch = try!(next_or_eof(read)); | |
794 | ||
795 | match ch { | |
796 | b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {} | |
797 | b'u' => { | |
0bf4aa26 | 798 | let n = match try!(read.decode_hex_escape()) { |
ea8adc8c XL |
799 | 0xDC00...0xDFFF => { |
800 | return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape); | |
801 | } | |
802 | ||
803 | // Non-BMP characters are encoded as a sequence of | |
804 | // two hex escapes, representing UTF-16 surrogates. | |
805 | n1 @ 0xD800...0xDBFF => { | |
806 | if try!(next_or_eof(read)) != b'\\' { | |
807 | return error(read, ErrorCode::UnexpectedEndOfHexEscape); | |
808 | } | |
809 | if try!(next_or_eof(read)) != b'u' { | |
810 | return error(read, ErrorCode::UnexpectedEndOfHexEscape); | |
811 | } | |
812 | ||
0bf4aa26 | 813 | let n2 = try!(read.decode_hex_escape()); |
ea8adc8c XL |
814 | |
815 | if n2 < 0xDC00 || n2 > 0xDFFF { | |
816 | return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape); | |
817 | } | |
818 | ||
819 | (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000 | |
820 | } | |
821 | ||
822 | n => n as u32, | |
823 | }; | |
824 | ||
825 | if char::from_u32(n).is_none() { | |
826 | return error(read, ErrorCode::InvalidUnicodeCodePoint); | |
827 | } | |
828 | } | |
829 | _ => { | |
830 | return error(read, ErrorCode::InvalidEscape); | |
831 | } | |
832 | } | |
833 | ||
834 | Ok(()) | |
835 | } | |
836 | ||
0bf4aa26 XL |
837 | #[cfg_attr(rustfmt, rustfmt_skip)] |
838 | static HEX: [u8; 256] = [ | |
839 | // 1 2 3 4 5 6 7 8 9 A B C D E F | |
840 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 | |
841 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 1 | |
842 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 2 | |
843 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255, // 3 | |
844 | 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 4 | |
845 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 5 | |
846 | 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 6 | |
847 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 7 | |
848 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 8 | |
849 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 9 | |
850 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // A | |
851 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // B | |
852 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // C | |
853 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // D | |
854 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // E | |
855 | 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // F | |
856 | ]; | |
857 | ||
858 | fn decode_hex_val(val: u8) -> Option<u16> { | |
859 | let n = HEX[val as usize] as u16; | |
860 | if n == 255 { | |
861 | None | |
862 | } else { | |
863 | Some(n) | |
7cac9316 | 864 | } |
7cac9316 | 865 | } |