]> git.proxmox.com Git - rustc.git/blob - vendor/xml5ever/src/driver.rs
New upstream version 1.61.0+dfsg1
[rustc.git] / vendor / xml5ever / src / driver.rs
1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
3 //
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
9
10 use crate::tokenizer::{XmlTokenizer, XmlTokenizerOpts};
11 use crate::tree_builder::{TreeSink, XmlTreeBuilder, XmlTreeBuilderOpts};
12
13 use std::borrow::Cow;
14
15 use markup5ever::buffer_queue::BufferQueue;
16 use crate::tendril;
17 use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder};
18 use crate::tendril::StrTendril;
19
20 /// All-encompasing parser setting structure.
21 #[derive(Clone, Default)]
22 pub struct XmlParseOpts {
23 /// Xml tokenizer options.
24 pub tokenizer: XmlTokenizerOpts,
25 /// Xml tree builder .
26 pub tree_builder: XmlTreeBuilderOpts,
27 }
28
29 /// Parse and send results to a `TreeSink`.
30 ///
31 /// ## Example
32 ///
33 /// ```ignore
34 /// let mut sink = MySink;
35 /// parse_document(&mut sink, iter::once(my_str), Default::default());
36 /// ```
37 pub fn parse_document<Sink>(sink: Sink, opts: XmlParseOpts) -> XmlParser<Sink>
38 where
39 Sink: TreeSink,
40 {
41 let tb = XmlTreeBuilder::new(sink, opts.tree_builder);
42 let tok = XmlTokenizer::new(tb, opts.tokenizer);
43 XmlParser {
44 tokenizer: tok,
45 input_buffer: BufferQueue::new(),
46 }
47 }
48
49 /// An XML parser,
50 /// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods.
51 pub struct XmlParser<Sink>
52 where
53 Sink: TreeSink,
54 {
55 /// Tokenizer used by XmlParser.
56 pub tokenizer: XmlTokenizer<XmlTreeBuilder<Sink::Handle, Sink>>,
57 /// Input used by XmlParser.
58 pub input_buffer: BufferQueue,
59 }
60
61 impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for XmlParser<Sink> {
62 type Output = Sink::Output;
63
64 fn process(&mut self, t: StrTendril) {
65 self.input_buffer.push_back(t);
66 self.tokenizer.feed(&mut self.input_buffer);
67 }
68
69 // FIXME: Is it too noisy to report every character decoding error?
70 fn error(&mut self, desc: Cow<'static, str>) {
71 self.tokenizer.sink.sink.parse_error(desc)
72 }
73
74 fn finish(mut self) -> Self::Output {
75 self.tokenizer.end();
76 self.tokenizer.sink.sink.finish()
77 }
78 }
79
80 impl<Sink: TreeSink> XmlParser<Sink> {
81 /// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes.
82 ///
83 /// Use this when your input is bytes that are known to be in the UTF-8 encoding.
84 /// Decoding is lossy, like `String::from_utf8_lossy`.
85 pub fn from_utf8(self) -> Utf8LossyDecoder<Self> {
86 Utf8LossyDecoder::new(self)
87 }
88 }