]>
Commit | Line | Data |
---|---|---|
83c7162d XL |
1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
2 | // COPYRIGHT file at the top-level directory of this distribution. | |
3 | // | |
4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
7 | // option. This file may not be copied, modified, or distributed | |
8 | // except according to those terms. | |
9 | ||
10 | extern crate html5ever; | |
11 | extern crate typed_arena; | |
12 | ||
dc9dc135 XL |
13 | use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; |
14 | use html5ever::tendril::{StrTendril, TendrilSink}; | |
15 | use html5ever::{parse_document, Attribute, ExpandedName, QualName}; | |
83c7162d XL |
16 | use std::borrow::Cow; |
17 | use std::cell::{Cell, RefCell}; | |
18 | use std::collections::HashSet; | |
19 | use std::io::{self, Read}; | |
20 | use std::ptr; | |
21 | ||
22 | fn main() { | |
23 | let mut bytes = Vec::new(); | |
24 | io::stdin().read_to_end(&mut bytes).unwrap(); | |
25 | let arena = typed_arena::Arena::new(); | |
26 | html5ever_parse_slice_into_arena(&bytes, &arena); | |
27 | } | |
28 | ||
29 | fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { | |
30 | let sink = Sink { | |
31 | arena: arena, | |
32 | document: arena.alloc(Node::new(NodeData::Document)), | |
33 | quirks_mode: QuirksMode::NoQuirks, | |
34 | }; | |
dc9dc135 XL |
35 | parse_document(sink, Default::default()) |
36 | .from_utf8() | |
37 | .one(bytes) | |
83c7162d XL |
38 | } |
39 | ||
40 | type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>; | |
41 | ||
42 | type Ref<'arena> = &'arena Node<'arena>; | |
43 | ||
44 | type Link<'arena> = Cell<Option<Ref<'arena>>>; | |
45 | ||
46 | struct Sink<'arena> { | |
47 | arena: Arena<'arena>, | |
48 | document: Ref<'arena>, | |
49 | quirks_mode: QuirksMode, | |
50 | } | |
51 | ||
52 | pub struct Node<'arena> { | |
53 | parent: Link<'arena>, | |
54 | next_sibling: Link<'arena>, | |
55 | previous_sibling: Link<'arena>, | |
56 | first_child: Link<'arena>, | |
57 | last_child: Link<'arena>, | |
58 | data: NodeData<'arena>, | |
59 | } | |
60 | ||
61 | pub enum NodeData<'arena> { | |
62 | Document, | |
63 | Doctype { | |
64 | name: StrTendril, | |
65 | public_id: StrTendril, | |
66 | system_id: StrTendril, | |
67 | }, | |
68 | Text { | |
69 | contents: RefCell<StrTendril>, | |
70 | }, | |
71 | Comment { | |
72 | contents: StrTendril, | |
73 | }, | |
74 | Element { | |
75 | name: QualName, | |
76 | attrs: RefCell<Vec<Attribute>>, | |
77 | template_contents: Option<Ref<'arena>>, | |
78 | mathml_annotation_xml_integration_point: bool, | |
79 | }, | |
80 | ProcessingInstruction { | |
81 | target: StrTendril, | |
82 | contents: StrTendril, | |
83 | }, | |
84 | } | |
85 | ||
86 | impl<'arena> Node<'arena> { | |
87 | fn new(data: NodeData<'arena>) -> Self { | |
88 | Node { | |
89 | parent: Cell::new(None), | |
90 | previous_sibling: Cell::new(None), | |
91 | next_sibling: Cell::new(None), | |
92 | first_child: Cell::new(None), | |
93 | last_child: Cell::new(None), | |
94 | data: data, | |
95 | } | |
96 | } | |
97 | ||
98 | fn detach(&self) { | |
99 | let parent = self.parent.take(); | |
100 | let previous_sibling = self.previous_sibling.take(); | |
101 | let next_sibling = self.next_sibling.take(); | |
102 | ||
103 | if let Some(next_sibling) = next_sibling { | |
104 | next_sibling.previous_sibling.set(previous_sibling); | |
105 | } else if let Some(parent) = parent { | |
106 | parent.last_child.set(previous_sibling); | |
107 | } | |
108 | ||
109 | if let Some(previous_sibling) = previous_sibling { | |
110 | previous_sibling.next_sibling.set(next_sibling); | |
111 | } else if let Some(parent) = parent { | |
112 | parent.first_child.set(next_sibling); | |
113 | } | |
114 | } | |
115 | ||
116 | fn append(&'arena self, new_child: &'arena Self) { | |
117 | new_child.detach(); | |
118 | new_child.parent.set(Some(self)); | |
119 | if let Some(last_child) = self.last_child.take() { | |
120 | new_child.previous_sibling.set(Some(last_child)); | |
121 | debug_assert!(last_child.next_sibling.get().is_none()); | |
122 | last_child.next_sibling.set(Some(new_child)); | |
123 | } else { | |
124 | debug_assert!(self.first_child.get().is_none()); | |
125 | self.first_child.set(Some(new_child)); | |
126 | } | |
127 | self.last_child.set(Some(new_child)); | |
128 | } | |
129 | ||
130 | fn insert_before(&'arena self, new_sibling: &'arena Self) { | |
131 | new_sibling.detach(); | |
132 | new_sibling.parent.set(self.parent.get()); | |
133 | new_sibling.next_sibling.set(Some(self)); | |
134 | if let Some(previous_sibling) = self.previous_sibling.take() { | |
135 | new_sibling.previous_sibling.set(Some(previous_sibling)); | |
dc9dc135 XL |
136 | debug_assert!(ptr::eq::<Node>( |
137 | previous_sibling.next_sibling.get().unwrap(), | |
138 | self | |
139 | )); | |
83c7162d XL |
140 | previous_sibling.next_sibling.set(Some(new_sibling)); |
141 | } else if let Some(parent) = self.parent.get() { | |
142 | debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self)); | |
143 | parent.first_child.set(Some(new_sibling)); | |
144 | } | |
145 | self.previous_sibling.set(Some(new_sibling)); | |
146 | } | |
147 | } | |
148 | ||
149 | impl<'arena> Sink<'arena> { | |
150 | fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> { | |
151 | self.arena.alloc(Node::new(data)) | |
152 | } | |
153 | ||
154 | fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A) | |
dc9dc135 XL |
155 | where |
156 | P: FnOnce() -> Option<Ref<'arena>>, | |
157 | A: FnOnce(Ref<'arena>), | |
83c7162d XL |
158 | { |
159 | let new_node = match child { | |
160 | NodeOrText::AppendText(text) => { | |
161 | // Append to an existing Text node if we have one. | |
dc9dc135 XL |
162 | if let Some(&Node { |
163 | data: NodeData::Text { ref contents }, | |
164 | .. | |
165 | }) = previous() | |
166 | { | |
83c7162d | 167 | contents.borrow_mut().push_tendril(&text); |
dc9dc135 | 168 | return; |
83c7162d | 169 | } |
dc9dc135 XL |
170 | self.new_node(NodeData::Text { |
171 | contents: RefCell::new(text), | |
172 | }) | |
173 | }, | |
174 | NodeOrText::AppendNode(node) => node, | |
83c7162d XL |
175 | }; |
176 | ||
177 | append(new_node) | |
178 | } | |
179 | } | |
180 | ||
181 | impl<'arena> TreeSink for Sink<'arena> { | |
182 | type Handle = Ref<'arena>; | |
183 | type Output = Ref<'arena>; | |
184 | ||
185 | fn finish(self) -> Ref<'arena> { | |
186 | self.document | |
187 | } | |
188 | ||
189 | fn parse_error(&mut self, _: Cow<'static, str>) {} | |
190 | ||
191 | fn get_document(&mut self) -> Ref<'arena> { | |
192 | self.document | |
193 | } | |
194 | ||
195 | fn set_quirks_mode(&mut self, mode: QuirksMode) { | |
196 | self.quirks_mode = mode; | |
197 | } | |
198 | ||
199 | fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { | |
200 | ptr::eq::<Node>(*x, *y) | |
201 | } | |
202 | ||
203 | fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { | |
204 | match target.data { | |
205 | NodeData::Element { ref name, .. } => name.expanded(), | |
206 | _ => panic!("not an element!"), | |
207 | } | |
208 | } | |
209 | ||
210 | fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { | |
dc9dc135 XL |
211 | if let NodeData::Element { |
212 | template_contents: Some(ref contents), | |
213 | .. | |
214 | } = target.data | |
215 | { | |
83c7162d XL |
216 | contents |
217 | } else { | |
218 | panic!("not a template element!") | |
219 | } | |
220 | } | |
221 | ||
222 | fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { | |
dc9dc135 XL |
223 | if let NodeData::Element { |
224 | mathml_annotation_xml_integration_point, | |
225 | .. | |
226 | } = target.data | |
227 | { | |
83c7162d XL |
228 | mathml_annotation_xml_integration_point |
229 | } else { | |
230 | panic!("not an element!") | |
231 | } | |
232 | } | |
233 | ||
dc9dc135 XL |
234 | fn create_element( |
235 | &mut self, | |
236 | name: QualName, | |
237 | attrs: Vec<Attribute>, | |
238 | flags: ElementFlags, | |
239 | ) -> Ref<'arena> { | |
83c7162d XL |
240 | self.new_node(NodeData::Element { |
241 | name: name, | |
242 | attrs: RefCell::new(attrs), | |
243 | template_contents: if flags.template { | |
244 | Some(self.new_node(NodeData::Document)) | |
245 | } else { | |
246 | None | |
247 | }, | |
248 | mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, | |
83c7162d XL |
249 | }) |
250 | } | |
251 | ||
252 | fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { | |
253 | self.new_node(NodeData::Comment { contents: text }) | |
254 | } | |
255 | ||
256 | fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { | |
dc9dc135 XL |
257 | self.new_node(NodeData::ProcessingInstruction { |
258 | target: target, | |
259 | contents: data, | |
260 | }) | |
83c7162d XL |
261 | } |
262 | ||
263 | fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { | |
264 | self.append_common( | |
265 | child, | |
266 | || parent.last_child.get(), | |
dc9dc135 | 267 | |new_node| parent.append(new_node), |
83c7162d XL |
268 | ) |
269 | } | |
270 | ||
271 | fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { | |
272 | self.append_common( | |
273 | child, | |
274 | || sibling.previous_sibling.get(), | |
dc9dc135 | 275 | |new_node| sibling.insert_before(new_node), |
83c7162d XL |
276 | ) |
277 | } | |
278 | ||
dc9dc135 XL |
279 | fn append_based_on_parent_node( |
280 | &mut self, | |
281 | element: &Ref<'arena>, | |
282 | prev_element: &Ref<'arena>, | |
283 | child: NodeOrText<Ref<'arena>>, | |
284 | ) { | |
83c7162d XL |
285 | if element.parent.get().is_some() { |
286 | self.append_before_sibling(element, child) | |
287 | } else { | |
288 | self.append(prev_element, child) | |
289 | } | |
290 | } | |
291 | ||
dc9dc135 XL |
292 | fn append_doctype_to_document( |
293 | &mut self, | |
294 | name: StrTendril, | |
295 | public_id: StrTendril, | |
296 | system_id: StrTendril, | |
297 | ) { | |
83c7162d XL |
298 | self.document.append(self.new_node(NodeData::Doctype { |
299 | name: name, | |
300 | public_id: public_id, | |
dc9dc135 | 301 | system_id: system_id, |
83c7162d XL |
302 | })) |
303 | } | |
304 | ||
305 | fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) { | |
306 | let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { | |
307 | attrs.borrow_mut() | |
308 | } else { | |
309 | panic!("not an element") | |
310 | }; | |
311 | ||
dc9dc135 XL |
312 | let existing_names = existing |
313 | .iter() | |
314 | .map(|e| e.name.clone()) | |
315 | .collect::<HashSet<_>>(); | |
316 | existing.extend( | |
317 | attrs | |
318 | .into_iter() | |
319 | .filter(|attr| !existing_names.contains(&attr.name)), | |
320 | ); | |
83c7162d XL |
321 | } |
322 | ||
323 | fn remove_from_parent(&mut self, target: &Ref<'arena>) { | |
324 | target.detach() | |
325 | } | |
326 | ||
327 | fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { | |
328 | let mut next_child = node.first_child.get(); | |
329 | while let Some(child) = next_child { | |
330 | debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node)); | |
331 | next_child = child.next_sibling.get(); | |
332 | new_parent.append(child) | |
333 | } | |
334 | } | |
335 | } |