]> git.proxmox.com Git - rustc.git/blame - src/libsyntax/util/interner.rs
Imported Upstream version 1.0.0~beta
[rustc.git] / src / libsyntax / util / interner.rs
CommitLineData
223e47cc
LB
1// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
85aaf69f 11//! An "interner" is a data structure that associates values with usize tags and
1a4d82fc
JJ
12//! allows bidirectional lookup; i.e. given a value, one can easily find the
13//! type, and vice versa.
223e47cc 14
1a4d82fc
JJ
15use ast::Name;
16
85aaf69f 17use std::borrow::Borrow;
1a4d82fc
JJ
18use std::cell::RefCell;
19use std::cmp::Ordering;
20use std::collections::HashMap;
21use std::fmt;
22use std::hash::Hash;
1a4d82fc
JJ
23use std::ops::Deref;
24use std::rc::Rc;
223e47cc
LB
25
26pub struct Interner<T> {
1a4d82fc
JJ
27 map: RefCell<HashMap<T, Name>>,
28 vect: RefCell<Vec<T> >,
223e47cc
LB
29}
30
1a4d82fc 31// when traits can extend traits, we should extend index<Name,T> to get []
85aaf69f
SL
32impl<T: Eq + Hash + Clone + 'static> Interner<T> {
33 pub fn new() -> Interner<T> {
34 Interner {
35 map: RefCell::new(HashMap::new()),
36 vect: RefCell::new(Vec::new()),
37 }
38 }
39
40 pub fn prefill(init: &[T]) -> Interner<T> {
41 let rv = Interner::new();
42 for v in init {
43 rv.intern((*v).clone());
44 }
45 rv
46 }
47
48 pub fn intern(&self, val: T) -> Name {
49 let mut map = self.map.borrow_mut();
50 match (*map).get(&val) {
51 Some(&idx) => return idx,
52 None => (),
53 }
54
55 let mut vect = self.vect.borrow_mut();
56 let new_idx = Name((*vect).len() as u32);
57 (*map).insert(val.clone(), new_idx);
58 (*vect).push(val);
59 new_idx
60 }
61
62 pub fn gensym(&self, val: T) -> Name {
63 let mut vect = self.vect.borrow_mut();
64 let new_idx = Name((*vect).len() as u32);
65 // leave out of .map to avoid colliding
66 (*vect).push(val);
67 new_idx
68 }
69
70 pub fn get(&self, idx: Name) -> T {
71 let vect = self.vect.borrow();
72 (*vect)[idx.usize()].clone()
73 }
74
75 pub fn len(&self) -> usize {
76 let vect = self.vect.borrow();
77 (*vect).len()
78 }
79
80 pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name>
81 where T: Borrow<Q>, Q: Eq + Hash {
1a4d82fc
JJ
82 let map = self.map.borrow();
83 match (*map).get(val) {
970d7e83
LB
84 Some(v) => Some(*v),
85 None => None,
86 }
87 }
1a4d82fc
JJ
88
89 pub fn clear(&self) {
90 *self.map.borrow_mut() = HashMap::new();
91 *self.vect.borrow_mut() = Vec::new();
92 }
93}
94
95#[derive(Clone, PartialEq, Hash, PartialOrd)]
96pub struct RcStr {
97 string: Rc<String>,
98}
99
100impl RcStr {
101 pub fn new(string: &str) -> RcStr {
102 RcStr {
103 string: Rc::new(string.to_string()),
104 }
105 }
106}
107
108impl Eq for RcStr {}
109
110impl Ord for RcStr {
111 fn cmp(&self, other: &RcStr) -> Ordering {
85aaf69f 112 self[..].cmp(&other[..])
1a4d82fc
JJ
113 }
114}
115
85aaf69f 116impl fmt::Debug for RcStr {
1a4d82fc 117 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85aaf69f
SL
118 use std::fmt::Debug;
119 self[..].fmt(f)
1a4d82fc
JJ
120 }
121}
122
85aaf69f
SL
123impl fmt::Display for RcStr {
124 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
125 use std::fmt::Display;
126 self[..].fmt(f)
127 }
128}
129
130impl Borrow<str> for RcStr {
131 fn borrow(&self) -> &str {
132 &self.string[..]
1a4d82fc
JJ
133 }
134}
135
136impl Deref for RcStr {
137 type Target = str;
138
85aaf69f 139 fn deref(&self) -> &str { &self.string[..] }
970d7e83
LB
140}
141
1a4d82fc
JJ
142/// A StrInterner differs from Interner<String> in that it accepts
143/// &str rather than RcStr, resulting in less allocation.
970d7e83 144pub struct StrInterner {
1a4d82fc
JJ
145 map: RefCell<HashMap<RcStr, Name>>,
146 vect: RefCell<Vec<RcStr> >,
970d7e83
LB
147}
148
1a4d82fc 149/// When traits can extend traits, we should extend index<Name,T> to get []
970d7e83
LB
150impl StrInterner {
151 pub fn new() -> StrInterner {
152 StrInterner {
1a4d82fc
JJ
153 map: RefCell::new(HashMap::new()),
154 vect: RefCell::new(Vec::new()),
970d7e83
LB
155 }
156 }
157
158 pub fn prefill(init: &[&str]) -> StrInterner {
159 let rv = StrInterner::new();
85aaf69f 160 for &v in init { rv.intern(v); }
970d7e83
LB
161 rv
162 }
163
1a4d82fc
JJ
164 pub fn intern(&self, val: &str) -> Name {
165 let mut map = self.map.borrow_mut();
166 match map.get(val) {
970d7e83
LB
167 Some(&idx) => return idx,
168 None => (),
169 }
170
1a4d82fc
JJ
171 let new_idx = Name(self.len() as u32);
172 let val = RcStr::new(val);
173 map.insert(val.clone(), new_idx);
174 self.vect.borrow_mut().push(val);
970d7e83
LB
175 new_idx
176 }
177
1a4d82fc
JJ
178 pub fn gensym(&self, val: &str) -> Name {
179 let new_idx = Name(self.len() as u32);
970d7e83 180 // leave out of .map to avoid colliding
1a4d82fc
JJ
181 self.vect.borrow_mut().push(RcStr::new(val));
182 new_idx
183 }
184
185 // I want these gensyms to share name pointers
186 // with existing entries. This would be automatic,
187 // except that the existing gensym creates its
188 // own managed ptr using to_managed. I think that
189 // adding this utility function is the most
190 // lightweight way to get what I want, though not
191 // necessarily the cleanest.
192
193 /// Create a gensym with the same name as an existing
194 /// entry.
195 pub fn gensym_copy(&self, idx : Name) -> Name {
196 let new_idx = Name(self.len() as u32);
197 // leave out of map to avoid colliding
198 let mut vect = self.vect.borrow_mut();
85aaf69f 199 let existing = (*vect)[idx.usize()].clone();
1a4d82fc 200 vect.push(existing);
970d7e83
LB
201 new_idx
202 }
203
1a4d82fc 204 pub fn get(&self, idx: Name) -> RcStr {
85aaf69f 205 (*self.vect.borrow())[idx.usize()].clone()
1a4d82fc 206 }
223e47cc 207
85aaf69f 208 pub fn len(&self) -> usize {
1a4d82fc
JJ
209 self.vect.borrow().len()
210 }
223e47cc 211
1a4d82fc 212 pub fn find<Q: ?Sized>(&self, val: &Q) -> Option<Name>
85aaf69f 213 where RcStr: Borrow<Q>, Q: Eq + Hash {
1a4d82fc 214 match (*self.map.borrow()).get(val) {
970d7e83
LB
215 Some(v) => Some(*v),
216 None => None,
217 }
218 }
1a4d82fc
JJ
219
220 pub fn clear(&self) {
221 *self.map.borrow_mut() = HashMap::new();
222 *self.vect.borrow_mut() = Vec::new();
223 }
224
225 pub fn reset(&self, other: StrInterner) {
226 *self.map.borrow_mut() = other.map.into_inner();
227 *self.vect.borrow_mut() = other.vect.into_inner();
228 }
223e47cc
LB
229}
230
970d7e83
LB
231#[cfg(test)]
232mod tests {
233 use super::*;
1a4d82fc
JJ
234 use ast::Name;
235
970d7e83 236 #[test]
c34b1796 237 #[should_panic]
970d7e83 238 fn i1 () {
1a4d82fc
JJ
239 let i : Interner<RcStr> = Interner::new();
240 i.get(Name(13));
970d7e83
LB
241 }
242
243 #[test]
1a4d82fc
JJ
244 fn interner_tests () {
245 let i : Interner<RcStr> = Interner::new();
970d7e83 246 // first one is zero:
1a4d82fc 247 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
970d7e83 248 // re-use gets the same entry:
1a4d82fc 249 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
970d7e83 250 // different string gets a different #:
1a4d82fc
JJ
251 assert_eq!(i.intern(RcStr::new("cat")), Name(1));
252 assert_eq!(i.intern(RcStr::new("cat")), Name(1));
970d7e83 253 // dog is still at zero
1a4d82fc 254 assert_eq!(i.intern(RcStr::new("dog")), Name(0));
970d7e83 255 // gensym gets 3
1a4d82fc 256 assert_eq!(i.gensym(RcStr::new("zebra") ), Name(2));
970d7e83 257 // gensym of same string gets new number :
1a4d82fc 258 assert_eq!(i.gensym (RcStr::new("zebra") ), Name(3));
970d7e83 259 // gensym of *existing* string gets new number:
1a4d82fc
JJ
260 assert_eq!(i.gensym(RcStr::new("dog")), Name(4));
261 assert_eq!(i.get(Name(0)), RcStr::new("dog"));
262 assert_eq!(i.get(Name(1)), RcStr::new("cat"));
263 assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
264 assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
265 assert_eq!(i.get(Name(4)), RcStr::new("dog"));
970d7e83 266 }
223e47cc 267
970d7e83
LB
268 #[test]
269 fn i3 () {
1a4d82fc
JJ
270 let i : Interner<RcStr> = Interner::prefill(&[
271 RcStr::new("Alan"),
272 RcStr::new("Bob"),
273 RcStr::new("Carol")
274 ]);
275 assert_eq!(i.get(Name(0)), RcStr::new("Alan"));
276 assert_eq!(i.get(Name(1)), RcStr::new("Bob"));
277 assert_eq!(i.get(Name(2)), RcStr::new("Carol"));
278 assert_eq!(i.intern(RcStr::new("Bob")), Name(1));
279 }
280
281 #[test]
282 fn string_interner_tests() {
283 let i : StrInterner = StrInterner::new();
284 // first one is zero:
285 assert_eq!(i.intern("dog"), Name(0));
286 // re-use gets the same entry:
287 assert_eq!(i.intern ("dog"), Name(0));
288 // different string gets a different #:
289 assert_eq!(i.intern("cat"), Name(1));
290 assert_eq!(i.intern("cat"), Name(1));
291 // dog is still at zero
292 assert_eq!(i.intern("dog"), Name(0));
293 // gensym gets 3
294 assert_eq!(i.gensym("zebra"), Name(2));
295 // gensym of same string gets new number :
296 assert_eq!(i.gensym("zebra"), Name(3));
297 // gensym of *existing* string gets new number:
298 assert_eq!(i.gensym("dog"), Name(4));
299 // gensym tests again with gensym_copy:
300 assert_eq!(i.gensym_copy(Name(2)), Name(5));
301 assert_eq!(i.get(Name(5)), RcStr::new("zebra"));
302 assert_eq!(i.gensym_copy(Name(2)), Name(6));
303 assert_eq!(i.get(Name(6)), RcStr::new("zebra"));
304 assert_eq!(i.get(Name(0)), RcStr::new("dog"));
305 assert_eq!(i.get(Name(1)), RcStr::new("cat"));
306 assert_eq!(i.get(Name(2)), RcStr::new("zebra"));
307 assert_eq!(i.get(Name(3)), RcStr::new("zebra"));
308 assert_eq!(i.get(Name(4)), RcStr::new("dog"));
970d7e83 309 }
223e47cc 310}