1 // Copyright 2014-2017 The html5ever Project Developers. See the
2 // COPYRIGHT file at the top-level directory of this distribution.
4 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. This file may not be copied, modified, or distributed
8 // except according to those terms.
10 use crate::tokenizer
::{XmlTokenizer, XmlTokenizerOpts}
;
11 use crate::tree_builder
::{TreeSink, XmlTreeBuilder, XmlTreeBuilderOpts}
;
15 use markup5ever
::buffer_queue
::BufferQueue
;
17 use crate::tendril
::stream
::{TendrilSink, Utf8LossyDecoder}
;
18 use crate::tendril
::StrTendril
;
20 /// All-encompasing parser setting structure.
21 #[derive(Clone, Default)]
22 pub struct XmlParseOpts
{
23 /// Xml tokenizer options.
24 pub tokenizer
: XmlTokenizerOpts
,
25 /// Xml tree builder .
26 pub tree_builder
: XmlTreeBuilderOpts
,
29 /// Parse and send results to a `TreeSink`.
34 /// let mut sink = MySink;
35 /// parse_document(&mut sink, iter::once(my_str), Default::default());
37 pub fn parse_document
<Sink
>(sink
: Sink
, opts
: XmlParseOpts
) -> XmlParser
<Sink
>
41 let tb
= XmlTreeBuilder
::new(sink
, opts
.tree_builder
);
42 let tok
= XmlTokenizer
::new(tb
, opts
.tokenizer
);
45 input_buffer
: BufferQueue
::new(),
50 /// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods.
51 pub struct XmlParser
<Sink
>
55 /// Tokenizer used by XmlParser.
56 pub tokenizer
: XmlTokenizer
<XmlTreeBuilder
<Sink
::Handle
, Sink
>>,
57 /// Input used by XmlParser.
58 pub input_buffer
: BufferQueue
,
61 impl<Sink
: TreeSink
> TendrilSink
<tendril
::fmt
::UTF8
> for XmlParser
<Sink
> {
62 type Output
= Sink
::Output
;
64 fn process(&mut self, t
: StrTendril
) {
65 self.input_buffer
.push_back(t
);
66 self.tokenizer
.feed(&mut self.input_buffer
);
69 // FIXME: Is it too noisy to report every character decoding error?
70 fn error(&mut self, desc
: Cow
<'
static, str>) {
71 self.tokenizer
.sink
.sink
.parse_error(desc
)
74 fn finish(mut self) -> Self::Output
{
76 self.tokenizer
.sink
.sink
.finish()
80 impl<Sink
: TreeSink
> XmlParser
<Sink
> {
81 /// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes.
83 /// Use this when your input is bytes that are known to be in the UTF-8 encoding.
84 /// Decoding is lossy, like `String::from_utf8_lossy`.
85 pub fn from_utf8(self) -> Utf8LossyDecoder
<Self> {
86 Utf8LossyDecoder
::new(self)