]> git.proxmox.com Git - rustc.git/blame - vendor/yaml-rust/src/parser.rs
New upstream version 1.43.0+dfsg1
[rustc.git] / vendor / yaml-rust / src / parser.rs
CommitLineData
7cac9316
XL
1use scanner::*;
2use std::collections::HashMap;
3// use yaml::*;
4
5#[derive(Clone, Copy, PartialEq, Debug, Eq)]
6enum State {
7 StreamStart,
8 ImplicitDocumentStart,
9 DocumentStart,
10 DocumentContent,
11 DocumentEnd,
12 BlockNode,
13 // BlockNodeOrIndentlessSequence,
14 // FlowNode,
15 BlockSequenceFirstEntry,
16 BlockSequenceEntry,
17 IndentlessSequenceEntry,
18 BlockMappingFirstKey,
19 BlockMappingKey,
20 BlockMappingValue,
21 FlowSequenceFirstEntry,
22 FlowSequenceEntry,
23 FlowSequenceEntryMappingKey,
24 FlowSequenceEntryMappingValue,
25 FlowSequenceEntryMappingEnd,
26 FlowMappingFirstKey,
27 FlowMappingKey,
28 FlowMappingValue,
29 FlowMappingEmptyValue,
30 End
31}
32
33/// `Event` is used with the low-level event base parsing API,
34/// see `EventReceiver` trait.
35#[derive(Clone, PartialEq, Debug, Eq)]
36pub enum Event {
37 /// Reserved for internal use
38 Nothing,
39 StreamStart,
40 StreamEnd,
41 DocumentStart,
42 DocumentEnd,
43 /// Refer to an anchor ID
44 Alias(usize),
45 /// Value, style, anchor_id, tag
46 Scalar(String, TScalarStyle, usize, Option<TokenType>),
47 /// Anchor ID
48 SequenceStart(usize),
49 SequenceEnd,
50 /// Anchor ID
51 MappingStart(usize),
52 MappingEnd
53}
54
55impl Event {
56 fn empty_scalar() -> Event {
57 // a null scalar
58 Event::Scalar("~".to_owned(), TScalarStyle::Plain, 0, None)
59 }
60
61 fn empty_scalar_with_anchor(anchor: usize, tag: Option<TokenType>) -> Event {
62 Event::Scalar("".to_owned(), TScalarStyle::Plain, anchor, tag)
63 }
64}
65
66#[derive(Debug)]
67pub struct Parser<T> {
68 scanner: Scanner<T>,
69 states: Vec<State>,
70 state: State,
71 marks: Vec<Marker>,
72 token: Option<Token>,
73 anchors: HashMap<String, usize>,
74 anchor_id: usize,
75}
76
77
78pub trait EventReceiver {
79 fn on_event(&mut self, ev: &Event);
80}
81
82
83pub trait MarkedEventReceiver {
84 fn on_event(&mut self, ev: &Event, _mark: Marker);
85}
86
87impl<R: EventReceiver> MarkedEventReceiver for R {
88 fn on_event(&mut self, ev: &Event, _mark: Marker) {
89 self.on_event(ev)
90 }
91}
92
93
94
95pub type ParseResult = Result<(Event, Marker), ScanError>;
96
97impl<T: Iterator<Item=char>> Parser<T> {
98 pub fn new(src: T) -> Parser<T> {
99 Parser {
100 scanner: Scanner::new(src),
101 states: Vec::new(),
102 state: State::StreamStart,
103 marks: Vec::new(),
104 token: None,
105
106 anchors: HashMap::new(),
107 // valid anchor_id starts from 1
108 anchor_id: 1,
109 }
110 }
111
112 fn peek(&mut self) -> Result<Token, ScanError> {
113 if self.token.is_none() {
114 self.token = self.scanner.next();
115 }
116 if self.token.is_none() {
117 match self.scanner.get_error() {
118 None =>
119 return Err(ScanError::new(self.scanner.mark(),
120 "unexpected eof")),
121 Some(e) => return Err(e),
122 }
123 }
124 // XXX better?
125 Ok(self.token.clone().unwrap())
126 }
127
128 fn skip(&mut self) {
129 self.token = None;
130 //self.peek();
131 }
132 fn pop_state(&mut self) {
133 self.state = self.states.pop().unwrap()
134 }
135 fn push_state(&mut self, state: State) {
136 self.states.push(state);
137 }
138
139 fn parse<R: MarkedEventReceiver>(&mut self, recv: &mut R)
140 -> Result<Event, ScanError> {
141 if self.state == State::End {
142 return Ok(Event::StreamEnd);
143 }
144 let (ev, mark) = try!(self.state_machine());
145 // println!("EV {:?}", ev);
146 recv.on_event(&ev, mark);
147 Ok(ev)
148 }
149
150 pub fn load<R: MarkedEventReceiver>(&mut self, recv: &mut R, multi: bool)
151 -> Result<(), ScanError> {
152 if !self.scanner.stream_started() {
153 let ev = try!(self.parse(recv));
154 assert_eq!(ev, Event::StreamStart);
155 }
156
157 if self.scanner.stream_ended() {
158 // XXX has parsed?
159 recv.on_event(&Event::StreamEnd, self.scanner.mark());
160 return Ok(());
161 }
162 loop {
163 let ev = try!(self.parse(recv));
164 if ev == Event::StreamEnd {
165 recv.on_event(&Event::StreamEnd, self.scanner.mark());
166 return Ok(());
167 }
168 // clear anchors before a new document
169 self.anchors.clear();
170 try!(self.load_document(&ev, recv));
171 if !multi {
172 break;
173 }
174 }
175 Ok(())
176 }
177
178 fn load_document<R: MarkedEventReceiver>(&mut self, first_ev: &Event, recv: &mut R)
179 -> Result<(), ScanError> {
180 assert_eq!(first_ev, &Event::DocumentStart);
181
182 let ev = try!(self.parse(recv));
183 try!(self.load_node(&ev, recv));
184
185 // DOCUMENT-END is expected.
186 let ev = try!(self.parse(recv));
187 assert_eq!(ev, Event::DocumentEnd);
188
189 Ok(())
190 }
191
192 fn load_node<R: MarkedEventReceiver>(&mut self, first_ev: &Event, recv: &mut R)
193 -> Result<(), ScanError> {
194 match *first_ev {
195 Event::Alias(..) | Event::Scalar(..) => {
196 Ok(())
197 },
198 Event::SequenceStart(_) => {
199 self.load_sequence(first_ev, recv)
200 },
201 Event::MappingStart(_) => {
202 self.load_mapping(first_ev, recv)
203 },
204 _ => { println!("UNREACHABLE EVENT: {:?}", first_ev);
205 unreachable!(); }
206 }
207 }
208
209 fn load_mapping<R: MarkedEventReceiver>(&mut self, _first_ev: &Event, recv: &mut R)
210 -> Result<(), ScanError> {
211 let mut ev = try!(self.parse(recv));
212 while ev != Event::MappingEnd {
213 // key
214 try!(self.load_node(&ev, recv));
215
216 // value
217 ev = try!(self.parse(recv));
218 try!(self.load_node(&ev, recv));
219
220 // next event
221 ev = try!(self.parse(recv));
222 }
223 Ok(())
224 }
225
226 fn load_sequence<R: MarkedEventReceiver>(&mut self, _first_ev: &Event, recv: &mut R)
227 -> Result<(), ScanError> {
228 let mut ev = try!(self.parse(recv));
229 while ev != Event::SequenceEnd {
230 try!(self.load_node(&ev, recv));
231
232 // next event
233 ev = try!(self.parse(recv));
234 }
235 Ok(())
236 }
237
238 fn state_machine(&mut self) -> ParseResult {
239 // let next_tok = try!(self.peek());
240 // println!("cur_state {:?}, next tok: {:?}", self.state, next_tok);
241 match self.state {
242 State::StreamStart => self.stream_start(),
243
244 State::ImplicitDocumentStart => self.document_start(true),
245 State::DocumentStart => self.document_start(false),
246 State::DocumentContent => self.document_content(),
247 State::DocumentEnd => self.document_end(),
248
249 State::BlockNode => self.parse_node(true, false),
250 // State::BlockNodeOrIndentlessSequence => self.parse_node(true, true),
251 // State::FlowNode => self.parse_node(false, false),
252
253 State::BlockMappingFirstKey => self.block_mapping_key(true),
254 State::BlockMappingKey => self.block_mapping_key(false),
255 State::BlockMappingValue => self.block_mapping_value(),
256
257 State::BlockSequenceFirstEntry => self.block_sequence_entry(true),
258 State::BlockSequenceEntry => self.block_sequence_entry(false),
259
260 State::FlowSequenceFirstEntry => self.flow_sequence_entry(true),
261 State::FlowSequenceEntry => self.flow_sequence_entry(false),
262
263 State::FlowMappingFirstKey => self.flow_mapping_key(true),
264 State::FlowMappingKey => self.flow_mapping_key(false),
265 State::FlowMappingValue => self.flow_mapping_value(false),
266
267 State::IndentlessSequenceEntry => self.indentless_sequence_entry(),
268
269 State::FlowSequenceEntryMappingKey => self.flow_sequence_entry_mapping_key(),
270 State::FlowSequenceEntryMappingValue => self.flow_sequence_entry_mapping_value(),
271 State::FlowSequenceEntryMappingEnd => self.flow_sequence_entry_mapping_end(),
272 State::FlowMappingEmptyValue => self.flow_mapping_value(true),
273
274 /* impossible */
275 State::End => unreachable!(),
276 }
277 }
278
279 fn stream_start(&mut self) -> ParseResult {
280 let tok = try!(self.peek());
281
282 match tok.1 {
283 TokenType::StreamStart(_) => {
284 self.state = State::ImplicitDocumentStart;
285 self.skip();
286 Ok((Event::StreamStart, tok.0))
287 },
288 _ => Err(ScanError::new(tok.0,
289 "did not find expected <stream-start>")),
290 }
291 }
292
293 fn document_start(&mut self, implicit: bool) -> ParseResult {
294 let mut tok = try!(self.peek());
295 if !implicit {
296 while let TokenType::DocumentEnd = tok.1 {
297 self.skip();
298 tok = try!(self.peek());
299 }
300 }
301
302 match tok.1 {
303 TokenType::StreamEnd => {
304 self.state = State::End;
305 self.skip();
306 Ok((Event::StreamEnd, tok.0))
307 },
308 TokenType::VersionDirective(..)
309 | TokenType::TagDirective(..)
310 | TokenType::DocumentStart => {
311 // explicit document
312 self._explict_document_start()
313 },
314 _ if implicit => {
315 try!(self.parser_process_directives());
316 self.push_state(State::DocumentEnd);
317 self.state = State::BlockNode;
318 Ok((Event::DocumentStart, tok.0))
319 },
320 _ => {
321 // explicit document
322 self._explict_document_start()
323 }
324 }
325 }
326
327 fn parser_process_directives(&mut self) -> Result<(), ScanError> {
328 loop {
329 let tok = try!(self.peek());
330 match tok.1 {
331 TokenType::VersionDirective(_, _) => {
332 // XXX parsing with warning according to spec
333 //if major != 1 || minor > 2 {
334 // return Err(ScanError::new(tok.0,
335 // "found incompatible YAML document"));
336 //}
337 },
338 TokenType::TagDirective(..) => {
339 // TODO add tag directive
340 },
341 _ => break
342 }
343 self.skip();
344 }
345 // TODO tag directive
346 Ok(())
347 }
348
349 fn _explict_document_start(&mut self) -> ParseResult {
350 try!(self.parser_process_directives());
351 let tok = try!(self.peek());
352 if tok.1 != TokenType::DocumentStart {
353 return Err(ScanError::new(tok.0, "did not find expected <document start>"));
354 }
355 self.push_state(State::DocumentEnd);
356 self.state = State::DocumentContent;
357 self.skip();
358 Ok((Event::DocumentStart, tok.0))
359 }
360
361 fn document_content(&mut self) -> ParseResult {
362 let tok = try!(self.peek());
363 match tok.1 {
364 TokenType::VersionDirective(..)
365 |TokenType::TagDirective(..)
366 |TokenType::DocumentStart
367 |TokenType::DocumentEnd
368 |TokenType::StreamEnd => {
369 self.pop_state();
370 // empty scalar
371 Ok((Event::empty_scalar(), tok.0))
372 },
373 _ => {
374 self.parse_node(true, false)
375 }
376 }
377 }
378
379 fn document_end(&mut self) -> ParseResult {
380 let mut _implicit = true;
381 let tok = try!(self.peek());
382 let _start_mark = tok.0;
383
384 if let TokenType::DocumentEnd = tok.1 {
385 self.skip();
386 _implicit = false;
387 }
388
389 // TODO tag handling
390 self.state = State::DocumentStart;
391 Ok((Event::DocumentEnd, tok.0))
392 }
393
394 fn register_anchor(&mut self, name: &str, _: &Marker) -> Result<usize, ScanError> {
395 // anchors can be overrided/reused
396 // if self.anchors.contains_key(name) {
397 // return Err(ScanError::new(*mark,
398 // "while parsing anchor, found duplicated anchor"));
399 // }
400 let new_id = self.anchor_id;
401 self.anchor_id += 1;
402 self.anchors.insert(name.to_owned(), new_id);
403 Ok(new_id)
404 }
405
406 fn parse_node(&mut self, block: bool, indentless_sequence: bool) -> ParseResult {
407 let mut tok = try!(self.peek());
408 let mut anchor_id = 0;
409 let mut tag = None;
410 match tok.1 {
411 TokenType::Alias(name) => {
412 self.pop_state();
413 self.skip();
414 match self.anchors.get(&name) {
415 None => return Err(ScanError::new(tok.0, "while parsing node, found unknown anchor")),
416 Some(id) => return Ok((Event::Alias(*id), tok.0))
417 }
418 },
419 TokenType::Anchor(name) => {
420 anchor_id = try!(self.register_anchor(&name, &tok.0));
421 self.skip();
422 tok = try!(self.peek());
423 if let TokenType::Tag(_, _) = tok.1 {
424 tag = Some(tok.1);
425 self.skip();
426 tok = try!(self.peek());
427 }
428 },
429 TokenType::Tag(..) => {
430 tag = Some(tok.1);
431 self.skip();
432 tok = try!(self.peek());
433 if let TokenType::Anchor(name) = tok.1 {
434 anchor_id = try!(self.register_anchor(&name, &tok.0));
435 self.skip();
436 tok = try!(self.peek());
437 }
438 },
439 _ => {}
440 }
441 match tok.1 {
442 TokenType::BlockEntry if indentless_sequence => {
443 self.state = State::IndentlessSequenceEntry;
444 Ok((Event::SequenceStart(anchor_id), tok.0))
445 },
446 TokenType::Scalar(style, v) => {
447 self.pop_state();
448 self.skip();
449 Ok((Event::Scalar(v, style, anchor_id, tag), tok.0))
450 },
451 TokenType::FlowSequenceStart => {
452 self.state = State::FlowSequenceFirstEntry;
453 Ok((Event::SequenceStart(anchor_id), tok.0))
454 },
455 TokenType::FlowMappingStart => {
456 self.state = State::FlowMappingFirstKey;
457 Ok((Event::MappingStart(anchor_id), tok.0))
458 },
459 TokenType::BlockSequenceStart if block => {
460 self.state = State::BlockSequenceFirstEntry;
461 Ok((Event::SequenceStart(anchor_id), tok.0))
462 },
463 TokenType::BlockMappingStart if block => {
464 self.state = State::BlockMappingFirstKey;
465 Ok((Event::MappingStart(anchor_id), tok.0))
466 },
467 // ex 7.2, an empty scalar can follow a secondary tag
468 _ if tag.is_some() || anchor_id > 0 => {
469 self.pop_state();
470 Ok((Event::empty_scalar_with_anchor(anchor_id, tag), tok.0))
471 },
472 _ => { Err(ScanError::new(tok.0, "while parsing a node, did not find expected node content")) }
473 }
474 }
475
476 fn block_mapping_key(&mut self, first: bool) -> ParseResult {
477 // skip BlockMappingStart
478 if first {
479 let _ = try!(self.peek());
480 //self.marks.push(tok.0);
481 self.skip();
482 }
483 let tok = try!(self.peek());
484 match tok.1 {
485 TokenType::Key => {
486 self.skip();
487 let tok = try!(self.peek());
488 match tok.1 {
489 TokenType::Key
490 | TokenType::Value
491 | TokenType::BlockEnd
492 => {
493 self.state = State::BlockMappingValue;
494 // empty scalar
495 Ok((Event::empty_scalar(), tok.0))
496 }
497 _ => {
498 self.push_state(State::BlockMappingValue);
499 self.parse_node(true, true)
500 }
501 }
502 },
503 // XXX(chenyh): libyaml failed to parse spec 1.2, ex8.18
504 TokenType::Value => {
505 self.state = State::BlockMappingValue;
506 Ok((Event::empty_scalar(), tok.0))
507 },
508 TokenType::BlockEnd => {
509 self.pop_state();
510 self.skip();
511 Ok((Event::MappingEnd, tok.0))
512 },
513 _ => {
514 Err(ScanError::new(tok.0, "while parsing a block mapping, did not find expected key"))
515 }
516 }
517 }
518
519 fn block_mapping_value(&mut self) -> ParseResult {
520 let tok = try!(self.peek());
521 match tok.1 {
522 TokenType::Value => {
523 self.skip();
524 let tok = try!(self.peek());
525 match tok.1 {
526 TokenType::Key | TokenType::Value | TokenType::BlockEnd
527 => {
528 self.state = State::BlockMappingKey;
529 // empty scalar
530 Ok((Event::empty_scalar(), tok.0))
531 }
532 _ => {
533 self.push_state(State::BlockMappingKey);
534 self.parse_node(true, true)
535 }
536 }
537 },
538 _ => {
539 self.state = State::BlockMappingKey;
540 // empty scalar
541 Ok((Event::empty_scalar(), tok.0))
542 }
543 }
544 }
545
546 fn flow_mapping_key(&mut self, first: bool) -> ParseResult {
547 if first {
548 let _ = try!(self.peek());
549 self.skip();
550 }
551 let mut tok = try!(self.peek());
552
553 if tok.1 != TokenType::FlowMappingEnd {
554 if !first {
555 if tok.1 == TokenType::FlowEntry {
556 self.skip();
557 tok = try!(self.peek());
558 } else {
559 return Err(ScanError::new(tok.0,
560 "while parsing a flow mapping, did not find expected ',' or '}'"));
561 }
562 }
563
564 if tok.1 == TokenType::Key {
565 self.skip();
566 tok = try!(self.peek());
567 match tok.1 {
568 TokenType::Value
569 | TokenType::FlowEntry
570 | TokenType::FlowMappingEnd => {
571 self.state = State::FlowMappingValue;
572 return Ok((Event::empty_scalar(), tok.0));
573 },
574 _ => {
575 self.push_state(State::FlowMappingValue);
576 return self.parse_node(false, false);
577 }
578 }
579 // XXX libyaml fail ex 7.3, empty key
580 } else if tok.1 == TokenType::Value {
581 self.state = State::FlowMappingValue;
582 return Ok((Event::empty_scalar(), tok.0));
583 } else if tok.1 != TokenType::FlowMappingEnd {
584 self.push_state(State::FlowMappingEmptyValue);
585 return self.parse_node(false, false);
586 }
587 }
588
589 self.pop_state();
590 self.skip();
591 Ok((Event::MappingEnd, tok.0))
592 }
593
594 fn flow_mapping_value(&mut self, empty: bool) -> ParseResult {
595 let tok = try!(self.peek());
596 if empty {
597 self.state = State::FlowMappingKey;
598 return Ok((Event::empty_scalar(), tok.0));
599 }
600
601 if tok.1 == TokenType::Value {
602 self.skip();
603 let tok = try!(self.peek());
604 match tok.1 {
605 TokenType::FlowEntry
606 | TokenType::FlowMappingEnd => { },
607 _ => {
608 self.push_state(State::FlowMappingKey);
609 return self.parse_node(false, false);
610 }
611 }
612 }
613
614 self.state = State::FlowMappingKey;
615 Ok((Event::empty_scalar(), tok.0))
616 }
617
618 fn flow_sequence_entry(&mut self, first: bool) -> ParseResult {
619 // skip FlowMappingStart
620 if first {
621 let _ = try!(self.peek());
622 //self.marks.push(tok.0);
623 self.skip();
624 }
625 let mut tok = try!(self.peek());
626 match tok.1 {
627 TokenType::FlowSequenceEnd => {
628 self.pop_state();
629 self.skip();
630 return Ok((Event::SequenceEnd, tok.0));
631 },
632 TokenType::FlowEntry if !first => {
633 self.skip();
634 tok = try!(self.peek());
635 },
636 _ if !first => {
637 return Err(ScanError::new(tok.0,
638 "while parsing a flow sequence, expectd ',' or ']'"));
639 }
640 _ => { /* next */ }
641 }
642 match tok.1 {
643 TokenType::FlowSequenceEnd => {
644 self.pop_state();
645 self.skip();
646 Ok((Event::SequenceEnd, tok.0))
647 },
648 TokenType::Key => {
649 self.state = State::FlowSequenceEntryMappingKey;
650 self.skip();
651 Ok((Event::MappingStart(0), tok.0))
652 }
653 _ => {
654 self.push_state(State::FlowSequenceEntry);
655 self.parse_node(false, false)
656 }
657 }
658 }
659
660 fn indentless_sequence_entry(&mut self) -> ParseResult {
661 let mut tok = try!(self.peek());
662 if tok.1 != TokenType::BlockEntry {
663 self.pop_state();
664 return Ok((Event::SequenceEnd, tok.0));
665 }
666
667 self.skip();
668 tok = try!(self.peek());
669 match tok.1 {
670 TokenType::BlockEntry
671 | TokenType::Key
672 | TokenType::Value
673 | TokenType::BlockEnd => {
674 self.state = State::IndentlessSequenceEntry;
675 Ok((Event::empty_scalar(), tok.0))
676 },
677 _ => {
678 self.push_state(State::IndentlessSequenceEntry);
679 self.parse_node(true, false)
680 }
681 }
682 }
683
684 fn block_sequence_entry(&mut self, first: bool) -> ParseResult {
685 // BLOCK-SEQUENCE-START
686 if first {
687 let _ = try!(self.peek());
688 //self.marks.push(tok.0);
689 self.skip();
690 }
691 let mut tok = try!(self.peek());
692 match tok.1 {
693 TokenType::BlockEnd => {
694 self.pop_state();
695 self.skip();
696 Ok((Event::SequenceEnd, tok.0))
697 },
698 TokenType::BlockEntry => {
699 self.skip();
700 tok = try!(self.peek());
701 match tok.1 {
702 TokenType::BlockEntry
703 | TokenType::BlockEnd => {
704 self.state = State::BlockSequenceEntry;
705 Ok((Event::empty_scalar(), tok.0))
706 },
707 _ => {
708 self.push_state(State::BlockSequenceEntry);
709 self.parse_node(true, false)
710 }
711 }
712 },
713 _ => {
714 Err(ScanError::new(tok.0,
715 "while parsing a block collection, did not find expected '-' indicator"))
716 }
717 }
718 }
719
720 fn flow_sequence_entry_mapping_key(&mut self) -> ParseResult {
721 let tok = try!(self.peek());
722
723 match tok.1 {
724 TokenType::Value
725 | TokenType::FlowEntry
726 | TokenType::FlowSequenceEnd => {
727 self.skip();
728 self.state = State::FlowSequenceEntryMappingValue;
729 Ok((Event::empty_scalar(), tok.0))
730 },
731 _ => {
732 self.push_state(State::FlowSequenceEntryMappingValue);
733 self.parse_node(false, false)
734 }
735 }
736 }
737
738 fn flow_sequence_entry_mapping_value(&mut self) -> ParseResult {
739 let tok = try!(self.peek());
740
741 match tok.1 {
742 TokenType::Value => {
743 self.skip();
744 let tok = try!(self.peek());
745 self.state = State::FlowSequenceEntryMappingValue;
746 match tok.1 {
747 TokenType::FlowEntry
748 | TokenType::FlowSequenceEnd => {
749 self.state = State::FlowSequenceEntryMappingEnd;
750 Ok((Event::empty_scalar(), tok.0))
751 },
752 _ => {
753 self.push_state(State::FlowSequenceEntryMappingEnd);
754 self.parse_node(false, false)
755 }
756 }
757 },
758 _ => {
759 self.state = State::FlowSequenceEntryMappingEnd;
760 Ok((Event::empty_scalar(), tok.0))
761 }
762 }
763 }
764
765 fn flow_sequence_entry_mapping_end(&mut self) -> ParseResult {
766 self.state = State::FlowSequenceEntry;
767 Ok((Event::MappingEnd, self.scanner.mark()))
768 }
769}