]>
Commit | Line | Data |
---|---|---|
064997fb FG |
1 | //! Serialization-friendly representation of `tt::Subtree`. |
2 | //! | |
3 | //! It is possible to serialize `Subtree` as is, as a tree, but using | |
4 | //! arbitrary-nested trees in JSON is problematic, as they can cause the JSON | |
5 | //! parser to overflow the stack. | |
6 | //! | |
7 | //! Additionally, such implementation would be pretty verbose, and we do care | |
8 | //! about performance here a bit. | |
9 | //! | |
10 | //! So what this module does is dumping a `tt::Subtree` into a bunch of flat | |
11 | //! array of numbers. See the test in the parent module to get an example | |
12 | //! output. | |
13 | //! | |
14 | //! ```json | |
15 | //! { | |
16 | //! // Array of subtrees, each subtree is represented by 4 numbers: | |
17 | //! // id of delimiter, delimiter kind, index of first child in `token_tree`, | |
18 | //! // index of last child in `token_tree` | |
19 | //! "subtree":[4294967295,0,0,5,2,2,5,5], | |
20 | //! // 2 ints per literal: [token id, index into `text`] | |
21 | //! "literal":[4294967295,1], | |
22 | //! // 3 ints per punct: [token id, char, spacing] | |
23 | //! "punct":[4294967295,64,1], | |
24 | //! // 2 ints per ident: [token id, index into `text`] | |
25 | //! "ident": [0,0,1,1], | |
26 | //! // children of all subtrees, concatenated. Each child is represented as `index << 2 | tag` | |
27 | //! // where tag denotes one of subtree, literal, punct or ident. | |
28 | //! "token_tree":[3,7,1,4], | |
29 | //! // Strings shared by idents and literals | |
30 | //! "text": ["struct","Foo"] | |
31 | //! } | |
32 | //! ``` | |
33 | //! | |
34 | //! We probably should replace most of the code here with bincode someday, but, | |
35 | //! as we don't have bincode in Cargo.toml yet, lets stick with serde_json for | |
36 | //! the time being. | |
37 | ||
f2b60f7d | 38 | use std::collections::{HashMap, VecDeque}; |
064997fb FG |
39 | |
40 | use serde::{Deserialize, Serialize}; | |
9ffffee4 FG |
41 | |
42 | use crate::tt::{self, TokenId}; | |
064997fb FG |
43 | |
44 | #[derive(Serialize, Deserialize, Debug)] | |
45 | pub struct FlatTree { | |
46 | subtree: Vec<u32>, | |
47 | literal: Vec<u32>, | |
48 | punct: Vec<u32>, | |
49 | ident: Vec<u32>, | |
50 | token_tree: Vec<u32>, | |
51 | text: Vec<String>, | |
52 | } | |
53 | ||
54 | struct SubtreeRepr { | |
55 | id: tt::TokenId, | |
9ffffee4 | 56 | kind: tt::DelimiterKind, |
064997fb FG |
57 | tt: [u32; 2], |
58 | } | |
59 | ||
60 | struct LiteralRepr { | |
61 | id: tt::TokenId, | |
62 | text: u32, | |
63 | } | |
64 | ||
65 | struct PunctRepr { | |
66 | id: tt::TokenId, | |
67 | char: char, | |
68 | spacing: tt::Spacing, | |
69 | } | |
70 | ||
71 | struct IdentRepr { | |
72 | id: tt::TokenId, | |
73 | text: u32, | |
74 | } | |
75 | ||
76 | impl FlatTree { | |
77 | pub fn new(subtree: &tt::Subtree) -> FlatTree { | |
78 | let mut w = Writer { | |
79 | string_table: HashMap::new(), | |
80 | work: VecDeque::new(), | |
81 | ||
82 | subtree: Vec::new(), | |
83 | literal: Vec::new(), | |
84 | punct: Vec::new(), | |
85 | ident: Vec::new(), | |
86 | token_tree: Vec::new(), | |
87 | text: Vec::new(), | |
88 | }; | |
89 | w.write(subtree); | |
90 | ||
91 | return FlatTree { | |
92 | subtree: write_vec(w.subtree, SubtreeRepr::write), | |
93 | literal: write_vec(w.literal, LiteralRepr::write), | |
94 | punct: write_vec(w.punct, PunctRepr::write), | |
95 | ident: write_vec(w.ident, IdentRepr::write), | |
96 | token_tree: w.token_tree, | |
97 | text: w.text, | |
98 | }; | |
99 | ||
100 | fn write_vec<T, F: Fn(T) -> [u32; N], const N: usize>(xs: Vec<T>, f: F) -> Vec<u32> { | |
101 | xs.into_iter().flat_map(f).collect() | |
102 | } | |
103 | } | |
104 | ||
105 | pub fn to_subtree(self) -> tt::Subtree { | |
106 | return Reader { | |
107 | subtree: read_vec(self.subtree, SubtreeRepr::read), | |
108 | literal: read_vec(self.literal, LiteralRepr::read), | |
109 | punct: read_vec(self.punct, PunctRepr::read), | |
110 | ident: read_vec(self.ident, IdentRepr::read), | |
111 | token_tree: self.token_tree, | |
112 | text: self.text, | |
113 | } | |
114 | .read(); | |
115 | ||
116 | fn read_vec<T, F: Fn([u32; N]) -> T, const N: usize>(xs: Vec<u32>, f: F) -> Vec<T> { | |
117 | let mut chunks = xs.chunks_exact(N); | |
118 | let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap())).collect(); | |
119 | assert!(chunks.remainder().is_empty()); | |
120 | res | |
121 | } | |
122 | } | |
123 | } | |
124 | ||
125 | impl SubtreeRepr { | |
126 | fn write(self) -> [u32; 4] { | |
127 | let kind = match self.kind { | |
9ffffee4 FG |
128 | tt::DelimiterKind::Invisible => 0, |
129 | tt::DelimiterKind::Parenthesis => 1, | |
130 | tt::DelimiterKind::Brace => 2, | |
131 | tt::DelimiterKind::Bracket => 3, | |
064997fb FG |
132 | }; |
133 | [self.id.0, kind, self.tt[0], self.tt[1]] | |
134 | } | |
135 | fn read([id, kind, lo, len]: [u32; 4]) -> SubtreeRepr { | |
136 | let kind = match kind { | |
9ffffee4 FG |
137 | 0 => tt::DelimiterKind::Invisible, |
138 | 1 => tt::DelimiterKind::Parenthesis, | |
139 | 2 => tt::DelimiterKind::Brace, | |
140 | 3 => tt::DelimiterKind::Bracket, | |
9c376795 | 141 | other => panic!("bad kind {other}"), |
064997fb FG |
142 | }; |
143 | SubtreeRepr { id: TokenId(id), kind, tt: [lo, len] } | |
144 | } | |
145 | } | |
146 | ||
147 | impl LiteralRepr { | |
148 | fn write(self) -> [u32; 2] { | |
149 | [self.id.0, self.text] | |
150 | } | |
151 | fn read([id, text]: [u32; 2]) -> LiteralRepr { | |
152 | LiteralRepr { id: TokenId(id), text } | |
153 | } | |
154 | } | |
155 | ||
156 | impl PunctRepr { | |
157 | fn write(self) -> [u32; 3] { | |
158 | let spacing = match self.spacing { | |
159 | tt::Spacing::Alone => 0, | |
160 | tt::Spacing::Joint => 1, | |
161 | }; | |
162 | [self.id.0, self.char as u32, spacing] | |
163 | } | |
164 | fn read([id, char, spacing]: [u32; 3]) -> PunctRepr { | |
165 | let spacing = match spacing { | |
166 | 0 => tt::Spacing::Alone, | |
167 | 1 => tt::Spacing::Joint, | |
9c376795 | 168 | other => panic!("bad spacing {other}"), |
064997fb FG |
169 | }; |
170 | PunctRepr { id: TokenId(id), char: char.try_into().unwrap(), spacing } | |
171 | } | |
172 | } | |
173 | ||
174 | impl IdentRepr { | |
175 | fn write(self) -> [u32; 2] { | |
176 | [self.id.0, self.text] | |
177 | } | |
178 | fn read(data: [u32; 2]) -> IdentRepr { | |
179 | IdentRepr { id: TokenId(data[0]), text: data[1] } | |
180 | } | |
181 | } | |
182 | ||
183 | struct Writer<'a> { | |
184 | work: VecDeque<(usize, &'a tt::Subtree)>, | |
185 | string_table: HashMap<&'a str, u32>, | |
186 | ||
187 | subtree: Vec<SubtreeRepr>, | |
188 | literal: Vec<LiteralRepr>, | |
189 | punct: Vec<PunctRepr>, | |
190 | ident: Vec<IdentRepr>, | |
191 | token_tree: Vec<u32>, | |
192 | text: Vec<String>, | |
193 | } | |
194 | ||
195 | impl<'a> Writer<'a> { | |
196 | fn write(&mut self, root: &'a tt::Subtree) { | |
197 | self.enqueue(root); | |
198 | while let Some((idx, subtree)) = self.work.pop_front() { | |
199 | self.subtree(idx, subtree); | |
200 | } | |
201 | } | |
202 | ||
203 | fn subtree(&mut self, idx: usize, subtree: &'a tt::Subtree) { | |
204 | let mut first_tt = self.token_tree.len(); | |
205 | let n_tt = subtree.token_trees.len(); | |
206 | self.token_tree.resize(first_tt + n_tt, !0); | |
207 | ||
208 | self.subtree[idx].tt = [first_tt as u32, (first_tt + n_tt) as u32]; | |
209 | ||
210 | for child in &subtree.token_trees { | |
211 | let idx_tag = match child { | |
212 | tt::TokenTree::Subtree(it) => { | |
213 | let idx = self.enqueue(it); | |
9c376795 | 214 | idx << 2 |
064997fb FG |
215 | } |
216 | tt::TokenTree::Leaf(leaf) => match leaf { | |
217 | tt::Leaf::Literal(lit) => { | |
218 | let idx = self.literal.len() as u32; | |
219 | let text = self.intern(&lit.text); | |
9ffffee4 | 220 | self.literal.push(LiteralRepr { id: lit.span, text }); |
064997fb FG |
221 | idx << 2 | 0b01 |
222 | } | |
223 | tt::Leaf::Punct(punct) => { | |
224 | let idx = self.punct.len() as u32; | |
225 | self.punct.push(PunctRepr { | |
226 | char: punct.char, | |
227 | spacing: punct.spacing, | |
9ffffee4 | 228 | id: punct.span, |
064997fb FG |
229 | }); |
230 | idx << 2 | 0b10 | |
231 | } | |
232 | tt::Leaf::Ident(ident) => { | |
233 | let idx = self.ident.len() as u32; | |
234 | let text = self.intern(&ident.text); | |
9ffffee4 | 235 | self.ident.push(IdentRepr { id: ident.span, text }); |
064997fb FG |
236 | idx << 2 | 0b11 |
237 | } | |
238 | }, | |
239 | }; | |
240 | self.token_tree[first_tt] = idx_tag; | |
241 | first_tt += 1; | |
242 | } | |
243 | } | |
244 | ||
245 | fn enqueue(&mut self, subtree: &'a tt::Subtree) -> u32 { | |
246 | let idx = self.subtree.len(); | |
9ffffee4 FG |
247 | let delimiter_id = subtree.delimiter.open; |
248 | let delimiter_kind = subtree.delimiter.kind; | |
064997fb FG |
249 | self.subtree.push(SubtreeRepr { id: delimiter_id, kind: delimiter_kind, tt: [!0, !0] }); |
250 | self.work.push_back((idx, subtree)); | |
251 | idx as u32 | |
252 | } | |
253 | ||
254 | pub(crate) fn intern(&mut self, text: &'a str) -> u32 { | |
255 | let table = &mut self.text; | |
256 | *self.string_table.entry(text).or_insert_with(|| { | |
257 | let idx = table.len(); | |
258 | table.push(text.to_string()); | |
259 | idx as u32 | |
260 | }) | |
261 | } | |
262 | } | |
263 | ||
264 | struct Reader { | |
265 | subtree: Vec<SubtreeRepr>, | |
266 | literal: Vec<LiteralRepr>, | |
267 | punct: Vec<PunctRepr>, | |
268 | ident: Vec<IdentRepr>, | |
269 | token_tree: Vec<u32>, | |
270 | text: Vec<String>, | |
271 | } | |
272 | ||
273 | impl Reader { | |
274 | pub(crate) fn read(self) -> tt::Subtree { | |
275 | let mut res: Vec<Option<tt::Subtree>> = vec![None; self.subtree.len()]; | |
276 | for i in (0..self.subtree.len()).rev() { | |
277 | let repr = &self.subtree[i]; | |
278 | let token_trees = &self.token_tree[repr.tt[0] as usize..repr.tt[1] as usize]; | |
279 | let s = tt::Subtree { | |
9ffffee4 FG |
280 | delimiter: tt::Delimiter { |
281 | open: repr.id, | |
282 | close: TokenId::UNSPECIFIED, | |
283 | kind: repr.kind, | |
284 | }, | |
064997fb FG |
285 | token_trees: token_trees |
286 | .iter() | |
287 | .copied() | |
288 | .map(|idx_tag| { | |
289 | let tag = idx_tag & 0b11; | |
290 | let idx = (idx_tag >> 2) as usize; | |
291 | match tag { | |
292 | // XXX: we iterate subtrees in reverse to guarantee | |
293 | // that this unwrap doesn't fire. | |
294 | 0b00 => res[idx].take().unwrap().into(), | |
295 | 0b01 => { | |
296 | let repr = &self.literal[idx]; | |
297 | tt::Leaf::Literal(tt::Literal { | |
298 | text: self.text[repr.text as usize].as_str().into(), | |
9ffffee4 | 299 | span: repr.id, |
064997fb FG |
300 | }) |
301 | .into() | |
302 | } | |
303 | 0b10 => { | |
304 | let repr = &self.punct[idx]; | |
305 | tt::Leaf::Punct(tt::Punct { | |
306 | char: repr.char, | |
307 | spacing: repr.spacing, | |
9ffffee4 | 308 | span: repr.id, |
064997fb FG |
309 | }) |
310 | .into() | |
311 | } | |
312 | 0b11 => { | |
313 | let repr = &self.ident[idx]; | |
314 | tt::Leaf::Ident(tt::Ident { | |
315 | text: self.text[repr.text as usize].as_str().into(), | |
9ffffee4 | 316 | span: repr.id, |
064997fb FG |
317 | }) |
318 | .into() | |
319 | } | |
9c376795 | 320 | other => panic!("bad tag: {other}"), |
064997fb FG |
321 | } |
322 | }) | |
323 | .collect(), | |
324 | }; | |
325 | res[i] = Some(s); | |
326 | } | |
327 | ||
328 | res[0].take().unwrap() | |
329 | } | |
330 | } |