]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | // Copyright 2012 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
85aaf69f | 11 | //! An "interner" is a data structure that associates values with usize tags and |
1a4d82fc JJ |
12 | //! allows bidirectional lookup; i.e. given a value, one can easily find the |
13 | //! type, and vice versa. | |
223e47cc | 14 | |
1a4d82fc JJ |
15 | use ast::Name; |
16 | ||
85aaf69f | 17 | use std::borrow::Borrow; |
1a4d82fc JJ |
18 | use std::cell::RefCell; |
19 | use std::cmp::Ordering; | |
20 | use std::collections::HashMap; | |
21 | use std::fmt; | |
22 | use std::hash::Hash; | |
1a4d82fc JJ |
23 | use std::ops::Deref; |
24 | use std::rc::Rc; | |
223e47cc LB |
25 | |
26 | pub struct Interner<T> { | |
1a4d82fc JJ |
27 | map: RefCell<HashMap<T, Name>>, |
28 | vect: RefCell<Vec<T> >, | |
223e47cc LB |
29 | } |
30 | ||
1a4d82fc | 31 | // when traits can extend traits, we should extend index<Name,T> to get [] |
85aaf69f SL |
32 | impl<T: Eq + Hash + Clone + 'static> Interner<T> { |
33 | pub fn new() -> Interner<T> { | |
34 | Interner { | |
35 | map: RefCell::new(HashMap::new()), | |
36 | vect: RefCell::new(Vec::new()), | |
37 | } | |
38 | } | |
39 | ||
40 | pub fn prefill(init: &[T]) -> Interner<T> { | |
41 | let rv = Interner::new(); | |
42 | for v in init { | |
43 | rv.intern((*v).clone()); | |
44 | } | |
45 | rv | |
46 | } | |
47 | ||
48 | pub fn intern(&self, val: T) -> Name { | |
49 | let mut map = self.map.borrow_mut(); | |
50 | match (*map).get(&val) { | |
51 | Some(&idx) => return idx, | |
52 | None => (), | |
53 | } | |
54 | ||
55 | let mut vect = self.vect.borrow_mut(); | |
56 | let new_idx = Name((*vect).len() as u32); | |
57 | (*map).insert(val.clone(), new_idx); | |
58 | (*vect).push(val); | |
59 | new_idx | |
60 | } | |
61 | ||
62 | pub fn gensym(&self, val: T) -> Name { | |
63 | let mut vect = self.vect.borrow_mut(); | |
64 | let new_idx = Name((*vect).len() as u32); | |
65 | // leave out of .map to avoid colliding | |
66 | (*vect).push(val); | |
67 | new_idx | |
68 | } | |
69 | ||
70 | pub fn get(&self, idx: Name) -> T { | |
71 | let vect = self.vect.borrow(); | |
72 | (*vect)[idx.usize()].clone() | |
73 | } | |
74 | ||
75 | pub fn len(&self) -> usize { | |
76 | let vect = self.vect.borrow(); | |
77 | (*vect).len() | |
78 | } | |
79 | ||
80 | pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name> | |
81 | where T: Borrow<Q>, Q: Eq + Hash { | |
1a4d82fc JJ |
82 | let map = self.map.borrow(); |
83 | match (*map).get(val) { | |
970d7e83 LB |
84 | Some(v) => Some(*v), |
85 | None => None, | |
86 | } | |
87 | } | |
1a4d82fc JJ |
88 | |
89 | pub fn clear(&self) { | |
90 | *self.map.borrow_mut() = HashMap::new(); | |
91 | *self.vect.borrow_mut() = Vec::new(); | |
92 | } | |
93 | } | |
94 | ||
95 | #[derive(Clone, PartialEq, Hash, PartialOrd)] | |
96 | pub struct RcStr { | |
97 | string: Rc<String>, | |
98 | } | |
99 | ||
100 | impl RcStr { | |
101 | pub fn new(string: &str) -> RcStr { | |
102 | RcStr { | |
103 | string: Rc::new(string.to_string()), | |
104 | } | |
105 | } | |
106 | } | |
107 | ||
108 | impl Eq for RcStr {} | |
109 | ||
110 | impl Ord for RcStr { | |
111 | fn cmp(&self, other: &RcStr) -> Ordering { | |
85aaf69f | 112 | self[..].cmp(&other[..]) |
1a4d82fc JJ |
113 | } |
114 | } | |
115 | ||
85aaf69f | 116 | impl fmt::Debug for RcStr { |
1a4d82fc | 117 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
85aaf69f SL |
118 | use std::fmt::Debug; |
119 | self[..].fmt(f) | |
1a4d82fc JJ |
120 | } |
121 | } | |
122 | ||
85aaf69f SL |
123 | impl fmt::Display for RcStr { |
124 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
125 | use std::fmt::Display; | |
126 | self[..].fmt(f) | |
127 | } | |
128 | } | |
129 | ||
130 | impl Borrow<str> for RcStr { | |
131 | fn borrow(&self) -> &str { | |
132 | &self.string[..] | |
1a4d82fc JJ |
133 | } |
134 | } | |
135 | ||
136 | impl Deref for RcStr { | |
137 | type Target = str; | |
138 | ||
85aaf69f | 139 | fn deref(&self) -> &str { &self.string[..] } |
970d7e83 LB |
140 | } |
141 | ||
1a4d82fc JJ |
142 | /// A StrInterner differs from Interner<String> in that it accepts |
143 | /// &str rather than RcStr, resulting in less allocation. | |
970d7e83 | 144 | pub struct StrInterner { |
1a4d82fc JJ |
145 | map: RefCell<HashMap<RcStr, Name>>, |
146 | vect: RefCell<Vec<RcStr> >, | |
970d7e83 LB |
147 | } |
148 | ||
1a4d82fc | 149 | /// When traits can extend traits, we should extend index<Name,T> to get [] |
970d7e83 LB |
150 | impl StrInterner { |
151 | pub fn new() -> StrInterner { | |
152 | StrInterner { | |
1a4d82fc JJ |
153 | map: RefCell::new(HashMap::new()), |
154 | vect: RefCell::new(Vec::new()), | |
970d7e83 LB |
155 | } |
156 | } | |
157 | ||
158 | pub fn prefill(init: &[&str]) -> StrInterner { | |
159 | let rv = StrInterner::new(); | |
85aaf69f | 160 | for &v in init { rv.intern(v); } |
970d7e83 LB |
161 | rv |
162 | } | |
163 | ||
1a4d82fc JJ |
164 | pub fn intern(&self, val: &str) -> Name { |
165 | let mut map = self.map.borrow_mut(); | |
166 | match map.get(val) { | |
970d7e83 LB |
167 | Some(&idx) => return idx, |
168 | None => (), | |
169 | } | |
170 | ||
1a4d82fc JJ |
171 | let new_idx = Name(self.len() as u32); |
172 | let val = RcStr::new(val); | |
173 | map.insert(val.clone(), new_idx); | |
174 | self.vect.borrow_mut().push(val); | |
970d7e83 LB |
175 | new_idx |
176 | } | |
177 | ||
1a4d82fc JJ |
178 | pub fn gensym(&self, val: &str) -> Name { |
179 | let new_idx = Name(self.len() as u32); | |
970d7e83 | 180 | // leave out of .map to avoid colliding |
1a4d82fc JJ |
181 | self.vect.borrow_mut().push(RcStr::new(val)); |
182 | new_idx | |
183 | } | |
184 | ||
185 | // I want these gensyms to share name pointers | |
186 | // with existing entries. This would be automatic, | |
187 | // except that the existing gensym creates its | |
188 | // own managed ptr using to_managed. I think that | |
189 | // adding this utility function is the most | |
190 | // lightweight way to get what I want, though not | |
191 | // necessarily the cleanest. | |
192 | ||
193 | /// Create a gensym with the same name as an existing | |
194 | /// entry. | |
195 | pub fn gensym_copy(&self, idx : Name) -> Name { | |
196 | let new_idx = Name(self.len() as u32); | |
197 | // leave out of map to avoid colliding | |
198 | let mut vect = self.vect.borrow_mut(); | |
85aaf69f | 199 | let existing = (*vect)[idx.usize()].clone(); |
1a4d82fc | 200 | vect.push(existing); |
970d7e83 LB |
201 | new_idx |
202 | } | |
203 | ||
1a4d82fc | 204 | pub fn get(&self, idx: Name) -> RcStr { |
85aaf69f | 205 | (*self.vect.borrow())[idx.usize()].clone() |
1a4d82fc | 206 | } |
223e47cc | 207 | |
85aaf69f | 208 | pub fn len(&self) -> usize { |
1a4d82fc JJ |
209 | self.vect.borrow().len() |
210 | } | |
223e47cc | 211 | |
1a4d82fc | 212 | pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name> |
85aaf69f | 213 | where RcStr: Borrow<Q>, Q: Eq + Hash { |
1a4d82fc | 214 | match (*self.map.borrow()).get(val) { |
970d7e83 LB |
215 | Some(v) => Some(*v), |
216 | None => None, | |
217 | } | |
218 | } | |
1a4d82fc JJ |
219 | |
220 | pub fn clear(&self) { | |
221 | *self.map.borrow_mut() = HashMap::new(); | |
222 | *self.vect.borrow_mut() = Vec::new(); | |
223 | } | |
224 | ||
225 | pub fn reset(&self, other: StrInterner) { | |
226 | *self.map.borrow_mut() = other.map.into_inner(); | |
227 | *self.vect.borrow_mut() = other.vect.into_inner(); | |
228 | } | |
223e47cc LB |
229 | } |
230 | ||
970d7e83 LB |
231 | #[cfg(test)] |
232 | mod tests { | |
233 | use super::*; | |
1a4d82fc JJ |
234 | use ast::Name; |
235 | ||
970d7e83 | 236 | #[test] |
c34b1796 | 237 | #[should_panic] |
970d7e83 | 238 | fn i1 () { |
1a4d82fc JJ |
239 | let i : Interner<RcStr> = Interner::new(); |
240 | i.get(Name(13)); | |
970d7e83 LB |
241 | } |
242 | ||
243 | #[test] | |
1a4d82fc JJ |
244 | fn interner_tests () { |
245 | let i : Interner<RcStr> = Interner::new(); | |
970d7e83 | 246 | // first one is zero: |
1a4d82fc | 247 | assert_eq!(i.intern(RcStr::new("dog")), Name(0)); |
970d7e83 | 248 | // re-use gets the same entry: |
1a4d82fc | 249 | assert_eq!(i.intern(RcStr::new("dog")), Name(0)); |
970d7e83 | 250 | // different string gets a different #: |
1a4d82fc JJ |
251 | assert_eq!(i.intern(RcStr::new("cat")), Name(1)); |
252 | assert_eq!(i.intern(RcStr::new("cat")), Name(1)); | |
970d7e83 | 253 | // dog is still at zero |
1a4d82fc | 254 | assert_eq!(i.intern(RcStr::new("dog")), Name(0)); |
970d7e83 | 255 | // gensym gets 3 |
1a4d82fc | 256 | assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2)); |
970d7e83 | 257 | // gensym of same string gets new number : |
1a4d82fc | 258 | assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3)); |
970d7e83 | 259 | // gensym of *existing* string gets new number: |
1a4d82fc JJ |
260 | assert_eq!(i.gensym(RcStr::new("dog")), Name(4)); |
261 | assert_eq!(i.get(Name(0)), RcStr::new("dog")); | |
262 | assert_eq!(i.get(Name(1)), RcStr::new("cat")); | |
263 | assert_eq!(i.get(Name(2)), RcStr::new("zebra")); | |
264 | assert_eq!(i.get(Name(3)), RcStr::new("zebra")); | |
265 | assert_eq!(i.get(Name(4)), RcStr::new("dog")); | |
970d7e83 | 266 | } |
223e47cc | 267 | |
970d7e83 LB |
268 | #[test] |
269 | fn i3 () { | |
1a4d82fc JJ |
270 | let i : Interner<RcStr> = Interner::prefill(&[ |
271 | RcStr::new("Alan"), | |
272 | RcStr::new("Bob"), | |
273 | RcStr::new("Carol") | |
274 | ]); | |
275 | assert_eq!(i.get(Name(0)), RcStr::new("Alan")); | |
276 | assert_eq!(i.get(Name(1)), RcStr::new("Bob")); | |
277 | assert_eq!(i.get(Name(2)), RcStr::new("Carol")); | |
278 | assert_eq!(i.intern(RcStr::new("Bob")), Name(1)); | |
279 | } | |
280 | ||
281 | #[test] | |
282 | fn string_interner_tests() { | |
283 | let i : StrInterner = StrInterner::new(); | |
284 | // first one is zero: | |
285 | assert_eq!(i.intern("dog"), Name(0)); | |
286 | // re-use gets the same entry: | |
287 | assert_eq!(i.intern ("dog"), Name(0)); | |
288 | // different string gets a different #: | |
289 | assert_eq!(i.intern("cat"), Name(1)); | |
290 | assert_eq!(i.intern("cat"), Name(1)); | |
291 | // dog is still at zero | |
292 | assert_eq!(i.intern("dog"), Name(0)); | |
293 | // gensym gets 3 | |
294 | assert_eq!(i.gensym("zebra"), Name(2)); | |
295 | // gensym of same string gets new number : | |
296 | assert_eq!(i.gensym("zebra"), Name(3)); | |
297 | // gensym of *existing* string gets new number: | |
298 | assert_eq!(i.gensym("dog"), Name(4)); | |
299 | // gensym tests again with gensym_copy: | |
300 | assert_eq!(i.gensym_copy(Name(2)), Name(5)); | |
301 | assert_eq!(i.get(Name(5)), RcStr::new("zebra")); | |
302 | assert_eq!(i.gensym_copy(Name(2)), Name(6)); | |
303 | assert_eq!(i.get(Name(6)), RcStr::new("zebra")); | |
304 | assert_eq!(i.get(Name(0)), RcStr::new("dog")); | |
305 | assert_eq!(i.get(Name(1)), RcStr::new("cat")); | |
306 | assert_eq!(i.get(Name(2)), RcStr::new("zebra")); | |
307 | assert_eq!(i.get(Name(3)), RcStr::new("zebra")); | |
308 | assert_eq!(i.get(Name(4)), RcStr::new("dog")); | |
970d7e83 | 309 | } |
223e47cc | 310 | } |