]>
Commit | Line | Data |
---|---|---|
83c7162d XL |
1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. | |
3 | // | |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
7 | // option. This file may not be copied, modified, or distributed | |
8 | // except according to those terms. | |
9 | ||
10 | extern crate html5ever; | |
11 | extern crate typed_arena; | |
12 | ||
13 | use html5ever::{parse_document, QualName, Attribute, ExpandedName}; | |
14 | use html5ever::tendril::{TendrilSink, StrTendril}; | |
15 | use html5ever::interface::tree_builder::{TreeSink, QuirksMode, NodeOrText, ElementFlags}; | |
16 | use std::borrow::Cow; | |
17 | use std::cell::{Cell, RefCell}; | |
18 | use std::collections::HashSet; | |
19 | use std::io::{self, Read}; | |
20 | use std::ptr; | |
21 | ||
22 | fn main() { | |
23 | let mut bytes = Vec::new(); | |
24 | io::stdin().read_to_end(&mut bytes).unwrap(); | |
25 | let arena = typed_arena::Arena::new(); | |
26 | html5ever_parse_slice_into_arena(&bytes, &arena); | |
27 | } | |
28 | ||
29 | fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { | |
30 | let sink = Sink { | |
31 | arena: arena, | |
32 | document: arena.alloc(Node::new(NodeData::Document)), | |
33 | quirks_mode: QuirksMode::NoQuirks, | |
34 | }; | |
35 | parse_document(sink, Default::default()).from_utf8().one(bytes) | |
36 | } | |
37 | ||
38 | type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>; | |
39 | ||
40 | type Ref<'arena> = &'arena Node<'arena>; | |
41 | ||
42 | type Link<'arena> = Cell<Option<Ref<'arena>>>; | |
43 | ||
44 | struct Sink<'arena> { | |
45 | arena: Arena<'arena>, | |
46 | document: Ref<'arena>, | |
47 | quirks_mode: QuirksMode, | |
48 | } | |
49 | ||
50 | pub struct Node<'arena> { | |
51 | parent: Link<'arena>, | |
52 | next_sibling: Link<'arena>, | |
53 | previous_sibling: Link<'arena>, | |
54 | first_child: Link<'arena>, | |
55 | last_child: Link<'arena>, | |
56 | data: NodeData<'arena>, | |
57 | } | |
58 | ||
59 | pub enum NodeData<'arena> { | |
60 | Document, | |
61 | Doctype { | |
62 | name: StrTendril, | |
63 | public_id: StrTendril, | |
64 | system_id: StrTendril, | |
65 | }, | |
66 | Text { | |
67 | contents: RefCell<StrTendril>, | |
68 | }, | |
69 | Comment { | |
70 | contents: StrTendril, | |
71 | }, | |
72 | Element { | |
73 | name: QualName, | |
74 | attrs: RefCell<Vec<Attribute>>, | |
75 | template_contents: Option<Ref<'arena>>, | |
76 | mathml_annotation_xml_integration_point: bool, | |
77 | }, | |
78 | ProcessingInstruction { | |
79 | target: StrTendril, | |
80 | contents: StrTendril, | |
81 | }, | |
82 | } | |
83 | ||
84 | impl<'arena> Node<'arena> { | |
85 | fn new(data: NodeData<'arena>) -> Self { | |
86 | Node { | |
87 | parent: Cell::new(None), | |
88 | previous_sibling: Cell::new(None), | |
89 | next_sibling: Cell::new(None), | |
90 | first_child: Cell::new(None), | |
91 | last_child: Cell::new(None), | |
92 | data: data, | |
93 | } | |
94 | } | |
95 | ||
96 | fn detach(&self) { | |
97 | let parent = self.parent.take(); | |
98 | let previous_sibling = self.previous_sibling.take(); | |
99 | let next_sibling = self.next_sibling.take(); | |
100 | ||
101 | if let Some(next_sibling) = next_sibling { | |
102 | next_sibling.previous_sibling.set(previous_sibling); | |
103 | } else if let Some(parent) = parent { | |
104 | parent.last_child.set(previous_sibling); | |
105 | } | |
106 | ||
107 | if let Some(previous_sibling) = previous_sibling { | |
108 | previous_sibling.next_sibling.set(next_sibling); | |
109 | } else if let Some(parent) = parent { | |
110 | parent.first_child.set(next_sibling); | |
111 | } | |
112 | } | |
113 | ||
114 | fn append(&'arena self, new_child: &'arena Self) { | |
115 | new_child.detach(); | |
116 | new_child.parent.set(Some(self)); | |
117 | if let Some(last_child) = self.last_child.take() { | |
118 | new_child.previous_sibling.set(Some(last_child)); | |
119 | debug_assert!(last_child.next_sibling.get().is_none()); | |
120 | last_child.next_sibling.set(Some(new_child)); | |
121 | } else { | |
122 | debug_assert!(self.first_child.get().is_none()); | |
123 | self.first_child.set(Some(new_child)); | |
124 | } | |
125 | self.last_child.set(Some(new_child)); | |
126 | } | |
127 | ||
128 | fn insert_before(&'arena self, new_sibling: &'arena Self) { | |
129 | new_sibling.detach(); | |
130 | new_sibling.parent.set(self.parent.get()); | |
131 | new_sibling.next_sibling.set(Some(self)); | |
132 | if let Some(previous_sibling) = self.previous_sibling.take() { | |
133 | new_sibling.previous_sibling.set(Some(previous_sibling)); | |
134 | debug_assert!(ptr::eq::<Node>(previous_sibling.next_sibling.get().unwrap(), self)); | |
135 | previous_sibling.next_sibling.set(Some(new_sibling)); | |
136 | } else if let Some(parent) = self.parent.get() { | |
137 | debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self)); | |
138 | parent.first_child.set(Some(new_sibling)); | |
139 | } | |
140 | self.previous_sibling.set(Some(new_sibling)); | |
141 | } | |
142 | } | |
143 | ||
144 | impl<'arena> Sink<'arena> { | |
145 | fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> { | |
146 | self.arena.alloc(Node::new(data)) | |
147 | } | |
148 | ||
149 | fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A) | |
150 | where P: FnOnce() -> Option<Ref<'arena>>, | |
151 | A: FnOnce(Ref<'arena>), | |
152 | { | |
153 | let new_node = match child { | |
154 | NodeOrText::AppendText(text) => { | |
155 | // Append to an existing Text node if we have one. | |
156 | if let Some(&Node { data: NodeData::Text { ref contents }, .. }) = previous() { | |
157 | contents.borrow_mut().push_tendril(&text); | |
158 | return | |
159 | } | |
160 | self.new_node(NodeData::Text { contents: RefCell::new(text) }) | |
161 | } | |
162 | NodeOrText::AppendNode(node) => node | |
163 | }; | |
164 | ||
165 | append(new_node) | |
166 | } | |
167 | } | |
168 | ||
169 | impl<'arena> TreeSink for Sink<'arena> { | |
170 | type Handle = Ref<'arena>; | |
171 | type Output = Ref<'arena>; | |
172 | ||
173 | fn finish(self) -> Ref<'arena> { | |
174 | self.document | |
175 | } | |
176 | ||
177 | fn parse_error(&mut self, _: Cow<'static, str>) {} | |
178 | ||
179 | fn get_document(&mut self) -> Ref<'arena> { | |
180 | self.document | |
181 | } | |
182 | ||
183 | fn set_quirks_mode(&mut self, mode: QuirksMode) { | |
184 | self.quirks_mode = mode; | |
185 | } | |
186 | ||
187 | fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { | |
188 | ptr::eq::<Node>(*x, *y) | |
189 | } | |
190 | ||
191 | fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { | |
192 | match target.data { | |
193 | NodeData::Element { ref name, .. } => name.expanded(), | |
194 | _ => panic!("not an element!"), | |
195 | } | |
196 | } | |
197 | ||
198 | fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { | |
199 | if let NodeData::Element { template_contents: Some(ref contents), .. } = target.data { | |
200 | contents | |
201 | } else { | |
202 | panic!("not a template element!") | |
203 | } | |
204 | } | |
205 | ||
206 | fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { | |
207 | if let NodeData::Element { mathml_annotation_xml_integration_point, .. } = target.data { | |
208 | mathml_annotation_xml_integration_point | |
209 | } else { | |
210 | panic!("not an element!") | |
211 | } | |
212 | } | |
213 | ||
214 | fn create_element(&mut self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Ref<'arena> { | |
215 | self.new_node(NodeData::Element { | |
216 | name: name, | |
217 | attrs: RefCell::new(attrs), | |
218 | template_contents: if flags.template { | |
219 | Some(self.new_node(NodeData::Document)) | |
220 | } else { | |
221 | None | |
222 | }, | |
223 | mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, | |
224 | ||
225 | }) | |
226 | } | |
227 | ||
228 | fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { | |
229 | self.new_node(NodeData::Comment { contents: text }) | |
230 | } | |
231 | ||
232 | fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { | |
233 | self.new_node(NodeData::ProcessingInstruction { target: target, contents: data }) | |
234 | } | |
235 | ||
236 | fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { | |
237 | self.append_common( | |
238 | child, | |
239 | || parent.last_child.get(), | |
240 | |new_node| parent.append(new_node) | |
241 | ) | |
242 | } | |
243 | ||
244 | fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { | |
245 | self.append_common( | |
246 | child, | |
247 | || sibling.previous_sibling.get(), | |
248 | |new_node| sibling.insert_before(new_node) | |
249 | ) | |
250 | } | |
251 | ||
252 | fn append_based_on_parent_node(&mut self, element: &Ref<'arena>, | |
253 | prev_element: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { | |
254 | if element.parent.get().is_some() { | |
255 | self.append_before_sibling(element, child) | |
256 | } else { | |
257 | self.append(prev_element, child) | |
258 | } | |
259 | } | |
260 | ||
261 | fn append_doctype_to_document(&mut self, | |
262 | name: StrTendril, | |
263 | public_id: StrTendril, | |
264 | system_id: StrTendril) { | |
265 | self.document.append(self.new_node(NodeData::Doctype { | |
266 | name: name, | |
267 | public_id: public_id, | |
268 | system_id: system_id | |
269 | })) | |
270 | } | |
271 | ||
272 | fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) { | |
273 | let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { | |
274 | attrs.borrow_mut() | |
275 | } else { | |
276 | panic!("not an element") | |
277 | }; | |
278 | ||
279 | let existing_names = existing.iter().map(|e| e.name.clone()).collect::<HashSet<_>>(); | |
280 | existing.extend(attrs.into_iter().filter(|attr| { | |
281 | !existing_names.contains(&attr.name) | |
282 | })); | |
283 | } | |
284 | ||
285 | fn remove_from_parent(&mut self, target: &Ref<'arena>) { | |
286 | target.detach() | |
287 | } | |
288 | ||
289 | fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { | |
290 | let mut next_child = node.first_child.get(); | |
291 | while let Some(child) = next_child { | |
292 | debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node)); | |
293 | next_child = child.next_sibling.get(); | |
294 | new_parent.append(child) | |
295 | } | |
296 | } | |
297 | } |