]> git.proxmox.com Git - rustc.git/blob - src/librustc_back/svh.rs
Imported Upstream version 1.8.0+dfsg1
[rustc.git] / src / librustc_back / svh.rs
1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! Calculation and management of a Strict Version Hash for crates
12 //!
13 //! # Today's ABI problem
14 //!
15 //! In today's implementation of rustc, it is incredibly difficult to achieve
16 //! forward binary compatibility without resorting to C-like interfaces. Within
17 //! rust code itself, abi details such as symbol names suffer from a variety of
18 //! unrelated factors to code changing such as the "def id drift" problem. This
19 //! ends up yielding confusing error messages about metadata mismatches and
20 //! such.
21 //!
22 //! The core of this problem is when an upstream dependency changes and
23 //! downstream dependents are not recompiled. This causes compile errors because
24 //! the upstream crate's metadata has changed but the downstream crates are
25 //! still referencing the older crate's metadata.
26 //!
27 //! This problem exists for many reasons, the primary of which is that rust does
28 //! not currently support forwards ABI compatibility (in place upgrades of a
29 //! crate).
30 //!
31 //! # SVH and how it alleviates the problem
32 //!
33 //! With all of this knowledge on hand, this module contains the implementation
34 //! of a notion of a "Strict Version Hash" for a crate. This is essentially a
35 //! hash of all contents of a crate which can somehow be exposed to downstream
36 //! crates.
37 //!
38 //! This hash is currently calculated by just hashing the AST, but this is
39 //! obviously wrong (doc changes should not result in an incompatible ABI).
40 //! Implementation-wise, this is required at this moment in time.
41 //!
42 //! By encoding this strict version hash into all crate's metadata, stale crates
43 //! can be detected immediately and error'd about by rustc itself.
44 //!
45 //! # Relevant links
46 //!
47 //! Original issue: https://github.com/rust-lang/rust/issues/10207
48
49 use std::fmt;
50 use std::hash::{Hash, SipHasher, Hasher};
51 use rustc_front::hir;
52 use rustc_front::intravisit as visit;
53
54 #[derive(Clone, PartialEq, Debug)]
55 pub struct Svh {
56 hash: String,
57 }
58
59 impl Svh {
60 pub fn new(hash: &str) -> Svh {
61 assert!(hash.len() == 16);
62 Svh { hash: hash.to_string() }
63 }
64
65 pub fn as_str<'a>(&'a self) -> &'a str {
66 &self.hash
67 }
68
69 pub fn calculate(metadata: &Vec<String>, krate: &hir::Crate) -> Svh {
70 // FIXME (#14132): This is better than it used to be, but it still not
71 // ideal. We now attempt to hash only the relevant portions of the
72 // Crate AST as well as the top-level crate attributes. (However,
73 // the hashing of the crate attributes should be double-checked
74 // to ensure it is not incorporating implementation artifacts into
75 // the hash that are not otherwise visible.)
76
77 // FIXME: this should use SHA1, not SipHash. SipHash is not built to
78 // avoid collisions.
79 let mut state = SipHasher::new();
80
81 for data in metadata {
82 data.hash(&mut state);
83 }
84
85 {
86 let mut visit = svh_visitor::make(&mut state, krate);
87 visit::walk_crate(&mut visit, krate);
88 }
89
90 // FIXME (#14132): This hash is still sensitive to e.g. the
91 // spans of the crate Attributes and their underlying
92 // MetaItems; we should make ContentHashable impl for those
93 // types and then use hash_content. But, since all crate
94 // attributes should appear near beginning of the file, it is
95 // not such a big deal to be sensitive to their spans for now.
96 //
97 // We hash only the MetaItems instead of the entire Attribute
98 // to avoid hashing the AttrId
99 for attr in &krate.attrs {
100 attr.node.value.hash(&mut state);
101 }
102
103 let hash = state.finish();
104 return Svh {
105 hash: (0..64).step_by(4).map(|i| hex(hash >> i)).collect()
106 };
107
108 fn hex(b: u64) -> char {
109 let b = (b & 0xf) as u8;
110 let b = match b {
111 0 ... 9 => '0' as u8 + b,
112 _ => 'a' as u8 + b - 10,
113 };
114 b as char
115 }
116 }
117 }
118
119 impl fmt::Display for Svh {
120 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121 f.pad(self.as_str())
122 }
123 }
124
125 // FIXME (#14132): Even this SVH computation still has implementation
126 // artifacts: namely, the order of item declaration will affect the
127 // hash computation, but for many kinds of items the order of
128 // declaration should be irrelevant to the ABI.
129
130 mod svh_visitor {
131 pub use self::SawExprComponent::*;
132 pub use self::SawStmtComponent::*;
133 use self::SawAbiComponent::*;
134 use syntax::ast::{self, Name, NodeId};
135 use syntax::codemap::Span;
136 use syntax::parse::token;
137 use rustc_front::intravisit as visit;
138 use rustc_front::intravisit::{Visitor, FnKind};
139 use rustc_front::hir::*;
140 use rustc_front::hir;
141
142 use std::hash::{Hash, SipHasher};
143
144 pub struct StrictVersionHashVisitor<'a> {
145 pub krate: &'a Crate,
146 pub st: &'a mut SipHasher,
147 }
148
149 pub fn make<'a>(st: &'a mut SipHasher, krate: &'a Crate) -> StrictVersionHashVisitor<'a> {
150 StrictVersionHashVisitor { st: st, krate: krate }
151 }
152
153 // To off-load the bulk of the hash-computation on #[derive(Hash)],
154 // we define a set of enums corresponding to the content that our
155 // crate visitor will encounter as it traverses the ast.
156 //
157 // The important invariant is that all of the Saw*Component enums
158 // do not carry any Spans, Names, or Idents.
159 //
160 // Not carrying any Names/Idents is the important fix for problem
161 // noted on PR #13948: using the ident.name as the basis for a
162 // hash leads to unstable SVH, because ident.name is just an index
163 // into intern table (i.e. essentially a random address), not
164 // computed from the name content.
165 //
166 // With the below enums, the SVH computation is not sensitive to
167 // artifacts of how rustc was invoked nor of how the source code
168 // was laid out. (Or at least it is *less* sensitive.)
169
170 // This enum represents the different potential bits of code the
171 // visitor could encounter that could affect the ABI for the crate,
172 // and assigns each a distinct tag to feed into the hash computation.
173 #[derive(Hash)]
174 enum SawAbiComponent<'a> {
175
176 // FIXME (#14132): should we include (some function of)
177 // ident.ctxt as well?
178 SawIdent(token::InternedString),
179 SawStructDef(token::InternedString),
180
181 SawLifetime(token::InternedString),
182 SawLifetimeDef(token::InternedString),
183
184 SawMod,
185 SawForeignItem,
186 SawItem,
187 SawDecl,
188 SawTy,
189 SawGenerics,
190 SawFn,
191 SawTraitItem,
192 SawImplItem,
193 SawStructField,
194 SawVariant,
195 SawExplicitSelf,
196 SawPath,
197 SawBlock,
198 SawPat,
199 SawLocal,
200 SawArm,
201 SawExpr(SawExprComponent<'a>),
202 SawStmt(SawStmtComponent),
203 }
204
205 /// SawExprComponent carries all of the information that we want
206 /// to include in the hash that *won't* be covered by the
207 /// subsequent recursive traversal of the expression's
208 /// substructure by the visitor.
209 ///
210 /// We know every Expr_ variant is covered by a variant because
211 /// `fn saw_expr` maps each to some case below. Ensuring that
212 /// each variant carries an appropriate payload has to be verified
213 /// by hand.
214 ///
215 /// (However, getting that *exactly* right is not so important
216 /// because the SVH is just a developer convenience; there is no
217 /// guarantee of collision-freedom, hash collisions are just
218 /// (hopefully) unlikely.)
219 #[derive(Hash)]
220 pub enum SawExprComponent<'a> {
221
222 SawExprLoop(Option<token::InternedString>),
223 SawExprField(token::InternedString),
224 SawExprTupField(usize),
225 SawExprBreak(Option<token::InternedString>),
226 SawExprAgain(Option<token::InternedString>),
227
228 SawExprBox,
229 SawExprVec,
230 SawExprCall,
231 SawExprMethodCall,
232 SawExprTup,
233 SawExprBinary(hir::BinOp_),
234 SawExprUnary(hir::UnOp),
235 SawExprLit(ast::LitKind),
236 SawExprCast,
237 SawExprType,
238 SawExprIf,
239 SawExprWhile,
240 SawExprMatch,
241 SawExprClosure,
242 SawExprBlock,
243 SawExprAssign,
244 SawExprAssignOp(hir::BinOp_),
245 SawExprIndex,
246 SawExprRange,
247 SawExprPath(Option<usize>),
248 SawExprAddrOf(hir::Mutability),
249 SawExprRet,
250 SawExprInlineAsm(&'a hir::InlineAsm),
251 SawExprStruct,
252 SawExprRepeat,
253 }
254
255 fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> {
256 match *node {
257 ExprBox(..) => SawExprBox,
258 ExprVec(..) => SawExprVec,
259 ExprCall(..) => SawExprCall,
260 ExprMethodCall(..) => SawExprMethodCall,
261 ExprTup(..) => SawExprTup,
262 ExprBinary(op, _, _) => SawExprBinary(op.node),
263 ExprUnary(op, _) => SawExprUnary(op),
264 ExprLit(ref lit) => SawExprLit(lit.node.clone()),
265 ExprCast(..) => SawExprCast,
266 ExprType(..) => SawExprType,
267 ExprIf(..) => SawExprIf,
268 ExprWhile(..) => SawExprWhile,
269 ExprLoop(_, id) => SawExprLoop(id.map(|id| id.name.as_str())),
270 ExprMatch(..) => SawExprMatch,
271 ExprClosure(..) => SawExprClosure,
272 ExprBlock(..) => SawExprBlock,
273 ExprAssign(..) => SawExprAssign,
274 ExprAssignOp(op, _, _) => SawExprAssignOp(op.node),
275 ExprField(_, name) => SawExprField(name.node.as_str()),
276 ExprTupField(_, id) => SawExprTupField(id.node),
277 ExprIndex(..) => SawExprIndex,
278 ExprRange(..) => SawExprRange,
279 ExprPath(ref qself, _) => SawExprPath(qself.as_ref().map(|q| q.position)),
280 ExprAddrOf(m, _) => SawExprAddrOf(m),
281 ExprBreak(id) => SawExprBreak(id.map(|id| id.node.name.as_str())),
282 ExprAgain(id) => SawExprAgain(id.map(|id| id.node.name.as_str())),
283 ExprRet(..) => SawExprRet,
284 ExprInlineAsm(ref asm) => SawExprInlineAsm(asm),
285 ExprStruct(..) => SawExprStruct,
286 ExprRepeat(..) => SawExprRepeat,
287 }
288 }
289
290 /// SawStmtComponent is analogous to SawExprComponent, but for statements.
291 #[derive(Hash)]
292 pub enum SawStmtComponent {
293 SawStmtDecl,
294 SawStmtExpr,
295 SawStmtSemi,
296 }
297
298 fn saw_stmt(node: &Stmt_) -> SawStmtComponent {
299 match *node {
300 StmtDecl(..) => SawStmtDecl,
301 StmtExpr(..) => SawStmtExpr,
302 StmtSemi(..) => SawStmtSemi,
303 }
304 }
305
306 impl<'a> Visitor<'a> for StrictVersionHashVisitor<'a> {
307 fn visit_nested_item(&mut self, item: ItemId) {
308 self.visit_item(self.krate.item(item.id))
309 }
310
311 fn visit_variant_data(&mut self, s: &'a VariantData, name: Name,
312 g: &'a Generics, _: NodeId, _: Span) {
313 SawStructDef(name.as_str()).hash(self.st);
314 visit::walk_generics(self, g);
315 visit::walk_struct_def(self, s)
316 }
317
318 fn visit_variant(&mut self, v: &'a Variant, g: &'a Generics, item_id: NodeId) {
319 SawVariant.hash(self.st);
320 // walk_variant does not call walk_generics, so do it here.
321 visit::walk_generics(self, g);
322 visit::walk_variant(self, v, g, item_id)
323 }
324
325 // All of the remaining methods just record (in the hash
326 // SipHasher) that the visitor saw that particular variant
327 // (with its payload), and continue walking as the default
328 // visitor would.
329 //
330 // Some of the implementations have some notes as to how one
331 // might try to make their SVH computation less discerning
332 // (e.g. by incorporating reachability analysis). But
333 // currently all of their implementations are uniform and
334 // uninteresting.
335 //
336 // (If you edit a method such that it deviates from the
337 // pattern, please move that method up above this comment.)
338
339 fn visit_name(&mut self, _: Span, name: Name) {
340 SawIdent(name.as_str()).hash(self.st);
341 }
342
343 fn visit_lifetime(&mut self, l: &'a Lifetime) {
344 SawLifetime(l.name.as_str()).hash(self.st);
345 }
346
347 fn visit_lifetime_def(&mut self, l: &'a LifetimeDef) {
348 SawLifetimeDef(l.lifetime.name.as_str()).hash(self.st);
349 }
350
351 // We do recursively walk the bodies of functions/methods
352 // (rather than omitting their bodies from the hash) since
353 // monomorphization and cross-crate inlining generally implies
354 // that a change to a crate body will require downstream
355 // crates to be recompiled.
356 fn visit_expr(&mut self, ex: &'a Expr) {
357 SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex)
358 }
359
360 fn visit_stmt(&mut self, s: &'a Stmt) {
361 SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s)
362 }
363
364 fn visit_foreign_item(&mut self, i: &'a ForeignItem) {
365 // FIXME (#14132) ideally we would incorporate privacy (or
366 // perhaps reachability) somewhere here, so foreign items
367 // that do not leak into downstream crates would not be
368 // part of the ABI.
369 SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i)
370 }
371
372 fn visit_item(&mut self, i: &'a Item) {
373 // FIXME (#14132) ideally would incorporate reachability
374 // analysis somewhere here, so items that never leak into
375 // downstream crates (e.g. via monomorphisation or
376 // inlining) would not be part of the ABI.
377 SawItem.hash(self.st); visit::walk_item(self, i)
378 }
379
380 fn visit_mod(&mut self, m: &'a Mod, _s: Span, _n: NodeId) {
381 SawMod.hash(self.st); visit::walk_mod(self, m)
382 }
383
384 fn visit_decl(&mut self, d: &'a Decl) {
385 SawDecl.hash(self.st); visit::walk_decl(self, d)
386 }
387
388 fn visit_ty(&mut self, t: &'a Ty) {
389 SawTy.hash(self.st); visit::walk_ty(self, t)
390 }
391
392 fn visit_generics(&mut self, g: &'a Generics) {
393 SawGenerics.hash(self.st); visit::walk_generics(self, g)
394 }
395
396 fn visit_fn(&mut self, fk: FnKind<'a>, fd: &'a FnDecl,
397 b: &'a Block, s: Span, _: NodeId) {
398 SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s)
399 }
400
401 fn visit_trait_item(&mut self, ti: &'a TraitItem) {
402 SawTraitItem.hash(self.st); visit::walk_trait_item(self, ti)
403 }
404
405 fn visit_impl_item(&mut self, ii: &'a ImplItem) {
406 SawImplItem.hash(self.st); visit::walk_impl_item(self, ii)
407 }
408
409 fn visit_struct_field(&mut self, s: &'a StructField) {
410 SawStructField.hash(self.st); visit::walk_struct_field(self, s)
411 }
412
413 fn visit_explicit_self(&mut self, es: &'a ExplicitSelf) {
414 SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es)
415 }
416
417 fn visit_path(&mut self, path: &'a Path, _: ast::NodeId) {
418 SawPath.hash(self.st); visit::walk_path(self, path)
419 }
420
421 fn visit_path_list_item(&mut self, prefix: &'a Path, item: &'a PathListItem) {
422 SawPath.hash(self.st); visit::walk_path_list_item(self, prefix, item)
423 }
424
425 fn visit_block(&mut self, b: &'a Block) {
426 SawBlock.hash(self.st); visit::walk_block(self, b)
427 }
428
429 fn visit_pat(&mut self, p: &'a Pat) {
430 SawPat.hash(self.st); visit::walk_pat(self, p)
431 }
432
433 fn visit_local(&mut self, l: &'a Local) {
434 SawLocal.hash(self.st); visit::walk_local(self, l)
435 }
436
437 fn visit_arm(&mut self, a: &'a Arm) {
438 SawArm.hash(self.st); visit::walk_arm(self, a)
439 }
440 }
441 }