]> git.proxmox.com Git - rustc.git/blame - src/librustc_back/svh.rs
Imported Upstream version 1.3.0+dfsg1
[rustc.git] / src / librustc_back / svh.rs
CommitLineData
1a4d82fc
JJ
1// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! Calculation and management of a Strict Version Hash for crates
12//!
13//! # Today's ABI problem
14//!
15//! In today's implementation of rustc, it is incredibly difficult to achieve
16//! forward binary compatibility without resorting to C-like interfaces. Within
17//! rust code itself, abi details such as symbol names suffer from a variety of
18//! unrelated factors to code changing such as the "def id drift" problem. This
19//! ends up yielding confusing error messages about metadata mismatches and
20//! such.
21//!
22//! The core of this problem is when an upstream dependency changes and
23//! downstream dependents are not recompiled. This causes compile errors because
24//! the upstream crate's metadata has changed but the downstream crates are
25//! still referencing the older crate's metadata.
26//!
27//! This problem exists for many reasons, the primary of which is that rust does
28//! not currently support forwards ABI compatibility (in place upgrades of a
29//! crate).
30//!
31//! # SVH and how it alleviates the problem
32//!
33//! With all of this knowledge on hand, this module contains the implementation
34//! of a notion of a "Strict Version Hash" for a crate. This is essentially a
35//! hash of all contents of a crate which can somehow be exposed to downstream
36//! crates.
37//!
38//! This hash is currently calculated by just hashing the AST, but this is
39//! obviously wrong (doc changes should not result in an incompatible ABI).
40//! Implementation-wise, this is required at this moment in time.
41//!
42//! By encoding this strict version hash into all crate's metadata, stale crates
43//! can be detected immediately and error'd about by rustc itself.
44//!
45//! # Relevant links
46//!
47//! Original issue: https://github.com/rust-lang/rust/issues/10207
48
49use std::fmt;
50use std::hash::{Hash, SipHasher, Hasher};
1a4d82fc
JJ
51use syntax::ast;
52use syntax::visit;
53
85aaf69f 54#[derive(Clone, PartialEq, Debug)]
1a4d82fc
JJ
55pub struct Svh {
56 hash: String,
57}
58
59impl Svh {
60 pub fn new(hash: &str) -> Svh {
61 assert!(hash.len() == 16);
62 Svh { hash: hash.to_string() }
63 }
64
65 pub fn as_str<'a>(&'a self) -> &'a str {
c34b1796 66 &self.hash
1a4d82fc
JJ
67 }
68
69 pub fn calculate(metadata: &Vec<String>, krate: &ast::Crate) -> Svh {
70 // FIXME (#14132): This is better than it used to be, but it still not
71 // ideal. We now attempt to hash only the relevant portions of the
72 // Crate AST as well as the top-level crate attributes. (However,
73 // the hashing of the crate attributes should be double-checked
74 // to ensure it is not incorporating implementation artifacts into
75 // the hash that are not otherwise visible.)
76
77 // FIXME: this should use SHA1, not SipHash. SipHash is not built to
78 // avoid collisions.
79 let mut state = SipHasher::new();
80
85aaf69f 81 for data in metadata {
1a4d82fc
JJ
82 data.hash(&mut state);
83 }
84
85 {
86 let mut visit = svh_visitor::make(&mut state);
87 visit::walk_crate(&mut visit, krate);
88 }
89
90 // FIXME (#14132): This hash is still sensitive to e.g. the
91 // spans of the crate Attributes and their underlying
92 // MetaItems; we should make ContentHashable impl for those
93 // types and then use hash_content. But, since all crate
94 // attributes should appear near beginning of the file, it is
95 // not such a big deal to be sensitive to their spans for now.
96 //
97 // We hash only the MetaItems instead of the entire Attribute
98 // to avoid hashing the AttrId
85aaf69f 99 for attr in &krate.attrs {
1a4d82fc
JJ
100 attr.node.value.hash(&mut state);
101 }
102
103 let hash = state.finish();
104 return Svh {
c34b1796 105 hash: (0..64).step_by(4).map(|i| hex(hash >> i)).collect()
1a4d82fc
JJ
106 };
107
108 fn hex(b: u64) -> char {
109 let b = (b & 0xf) as u8;
110 let b = match b {
111 0 ... 9 => '0' as u8 + b,
112 _ => 'a' as u8 + b - 10,
113 };
114 b as char
115 }
116 }
117}
118
85aaf69f 119impl fmt::Display for Svh {
1a4d82fc
JJ
120 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121 f.pad(self.as_str())
122 }
123}
124
125// FIXME (#14132): Even this SVH computation still has implementation
126// artifacts: namely, the order of item declaration will affect the
127// hash computation, but for many kinds of items the order of
128// declaration should be irrelevant to the ABI.
129
130mod svh_visitor {
131 pub use self::SawExprComponent::*;
132 pub use self::SawStmtComponent::*;
133 use self::SawAbiComponent::*;
134 use syntax::ast;
135 use syntax::ast::*;
136 use syntax::codemap::Span;
137 use syntax::parse::token;
138 use syntax::print::pprust;
139 use syntax::visit;
140 use syntax::visit::{Visitor, FnKind};
141
142 use std::hash::{Hash, SipHasher};
143
144 pub struct StrictVersionHashVisitor<'a> {
145 pub st: &'a mut SipHasher,
146 }
147
148 pub fn make<'a>(st: &'a mut SipHasher) -> StrictVersionHashVisitor<'a> {
149 StrictVersionHashVisitor { st: st }
150 }
151
85aaf69f 152 // To off-load the bulk of the hash-computation on #[derive(Hash)],
1a4d82fc
JJ
153 // we define a set of enums corresponding to the content that our
154 // crate visitor will encounter as it traverses the ast.
155 //
156 // The important invariant is that all of the Saw*Component enums
157 // do not carry any Spans, Names, or Idents.
158 //
159 // Not carrying any Names/Idents is the important fix for problem
160 // noted on PR #13948: using the ident.name as the basis for a
161 // hash leads to unstable SVH, because ident.name is just an index
162 // into intern table (i.e. essentially a random address), not
163 // computed from the name content.
164 //
165 // With the below enums, the SVH computation is not sensitive to
166 // artifacts of how rustc was invoked nor of how the source code
167 // was laid out. (Or at least it is *less* sensitive.)
168
169 // This enum represents the different potential bits of code the
170 // visitor could encounter that could affect the ABI for the crate,
171 // and assigns each a distinct tag to feed into the hash computation.
172 #[derive(Hash)]
173 enum SawAbiComponent<'a> {
174
175 // FIXME (#14132): should we include (some function of)
176 // ident.ctxt as well?
177 SawIdent(token::InternedString),
178 SawStructDef(token::InternedString),
179
180 SawLifetimeRef(token::InternedString),
181 SawLifetimeDef(token::InternedString),
182
183 SawMod,
1a4d82fc
JJ
184 SawForeignItem,
185 SawItem,
186 SawDecl,
187 SawTy,
188 SawGenerics,
189 SawFn,
c34b1796
AL
190 SawTraitItem,
191 SawImplItem,
1a4d82fc
JJ
192 SawStructField,
193 SawVariant,
194 SawExplicitSelf,
195 SawPath,
196 SawOptLifetimeRef,
197 SawBlock,
198 SawPat,
199 SawLocal,
200 SawArm,
201 SawExpr(SawExprComponent<'a>),
202 SawStmt(SawStmtComponent),
203 }
204
205 /// SawExprComponent carries all of the information that we want
206 /// to include in the hash that *won't* be covered by the
207 /// subsequent recursive traversal of the expression's
208 /// substructure by the visitor.
209 ///
210 /// We know every Expr_ variant is covered by a variant because
211 /// `fn saw_expr` maps each to some case below. Ensuring that
212 /// each variant carries an appropriate payload has to be verified
213 /// by hand.
214 ///
215 /// (However, getting that *exactly* right is not so important
216 /// because the SVH is just a developer convenience; there is no
217 /// guarantee of collision-freedom, hash collisions are just
218 /// (hopefully) unlikely.)
219 #[derive(Hash)]
220 pub enum SawExprComponent<'a> {
221
222 SawExprLoop(Option<token::InternedString>),
223 SawExprField(token::InternedString),
c34b1796 224 SawExprTupField(usize),
1a4d82fc
JJ
225 SawExprBreak(Option<token::InternedString>),
226 SawExprAgain(Option<token::InternedString>),
227
228 SawExprBox,
229 SawExprVec,
230 SawExprCall,
231 SawExprMethodCall,
232 SawExprTup,
85aaf69f 233 SawExprBinary(ast::BinOp_),
1a4d82fc
JJ
234 SawExprUnary(ast::UnOp),
235 SawExprLit(ast::Lit_),
236 SawExprCast,
237 SawExprIf,
238 SawExprWhile,
239 SawExprMatch,
240 SawExprClosure,
241 SawExprBlock,
242 SawExprAssign,
85aaf69f 243 SawExprAssignOp(ast::BinOp_),
1a4d82fc
JJ
244 SawExprIndex,
245 SawExprRange,
c34b1796 246 SawExprPath(Option<usize>),
1a4d82fc
JJ
247 SawExprAddrOf(ast::Mutability),
248 SawExprRet,
249 SawExprInlineAsm(&'a ast::InlineAsm),
250 SawExprStruct,
251 SawExprRepeat,
252 SawExprParen,
1a4d82fc
JJ
253 }
254
255 fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> {
256 match *node {
257 ExprBox(..) => SawExprBox,
258 ExprVec(..) => SawExprVec,
259 ExprCall(..) => SawExprCall,
260 ExprMethodCall(..) => SawExprMethodCall,
261 ExprTup(..) => SawExprTup,
85aaf69f 262 ExprBinary(op, _, _) => SawExprBinary(op.node),
1a4d82fc
JJ
263 ExprUnary(op, _) => SawExprUnary(op),
264 ExprLit(ref lit) => SawExprLit(lit.node.clone()),
265 ExprCast(..) => SawExprCast,
266 ExprIf(..) => SawExprIf,
267 ExprWhile(..) => SawExprWhile,
c1a9b12d 268 ExprLoop(_, id) => SawExprLoop(id.map(|id| id.name.as_str())),
1a4d82fc
JJ
269 ExprMatch(..) => SawExprMatch,
270 ExprClosure(..) => SawExprClosure,
271 ExprBlock(..) => SawExprBlock,
272 ExprAssign(..) => SawExprAssign,
85aaf69f 273 ExprAssignOp(op, _, _) => SawExprAssignOp(op.node),
c1a9b12d 274 ExprField(_, id) => SawExprField(id.node.name.as_str()),
1a4d82fc
JJ
275 ExprTupField(_, id) => SawExprTupField(id.node),
276 ExprIndex(..) => SawExprIndex,
277 ExprRange(..) => SawExprRange,
c34b1796 278 ExprPath(ref qself, _) => SawExprPath(qself.as_ref().map(|q| q.position)),
1a4d82fc 279 ExprAddrOf(m, _) => SawExprAddrOf(m),
c1a9b12d
SL
280 ExprBreak(id) => SawExprBreak(id.map(|id| id.name.as_str())),
281 ExprAgain(id) => SawExprAgain(id.map(|id| id.name.as_str())),
1a4d82fc
JJ
282 ExprRet(..) => SawExprRet,
283 ExprInlineAsm(ref asm) => SawExprInlineAsm(asm),
284 ExprStruct(..) => SawExprStruct,
285 ExprRepeat(..) => SawExprRepeat,
286 ExprParen(..) => SawExprParen,
1a4d82fc
JJ
287
288 // just syntactic artifacts, expanded away by time of SVH.
85aaf69f 289 ExprForLoop(..) => unreachable!(),
1a4d82fc
JJ
290 ExprIfLet(..) => unreachable!(),
291 ExprWhileLet(..) => unreachable!(),
292 ExprMac(..) => unreachable!(),
293 }
294 }
295
296 /// SawStmtComponent is analogous to SawExprComponent, but for statements.
297 #[derive(Hash)]
298 pub enum SawStmtComponent {
299 SawStmtDecl,
300 SawStmtExpr,
301 SawStmtSemi,
302 }
303
304 fn saw_stmt(node: &Stmt_) -> SawStmtComponent {
305 match *node {
306 StmtDecl(..) => SawStmtDecl,
307 StmtExpr(..) => SawStmtExpr,
308 StmtSemi(..) => SawStmtSemi,
309 StmtMac(..) => unreachable!(),
310 }
311 }
312
1a4d82fc
JJ
313 impl<'a, 'v> Visitor<'v> for StrictVersionHashVisitor<'a> {
314
315 fn visit_mac(&mut self, mac: &Mac) {
316 // macro invocations, namely macro_rules definitions,
317 // *can* appear as items, even in the expanded crate AST.
318
c34b1796 319 if &macro_name(mac)[..] == "macro_rules" {
1a4d82fc
JJ
320 // Pretty-printing definition to a string strips out
321 // surface artifacts (currently), such as the span
322 // information, yielding a content-based hash.
323
324 // FIXME (#14132): building temporary string is
325 // expensive; a direct content-based hash on token
326 // trees might be faster. Implementing this is far
327 // easier in short term.
328 let macro_defn_as_string = pprust::to_string(|pp_state| {
329 pp_state.print_mac(mac, token::Paren)
330 });
331 macro_defn_as_string.hash(self.st);
332 } else {
333 // It is not possible to observe any kind of macro
334 // invocation at this stage except `macro_rules!`.
335 panic!("reached macro somehow: {}",
336 pprust::to_string(|pp_state| {
337 pp_state.print_mac(mac, token::Paren)
338 }));
339 }
340
341 visit::walk_mac(self, mac);
342
343 fn macro_name(mac: &Mac) -> token::InternedString {
344 match &mac.node {
345 &MacInvocTT(ref path, ref _tts, ref _stx_ctxt) => {
c34b1796 346 let s = &path.segments;
1a4d82fc 347 assert_eq!(s.len(), 1);
c1a9b12d 348 s[0].identifier.name.as_str()
1a4d82fc
JJ
349 }
350 }
351 }
352 }
353
354 fn visit_struct_def(&mut self, s: &StructDef, ident: Ident,
355 g: &Generics, _: NodeId) {
c1a9b12d 356 SawStructDef(ident.name.as_str()).hash(self.st);
1a4d82fc
JJ
357 visit::walk_generics(self, g);
358 visit::walk_struct_def(self, s)
359 }
360
361 fn visit_variant(&mut self, v: &Variant, g: &Generics) {
362 SawVariant.hash(self.st);
363 // walk_variant does not call walk_generics, so do it here.
364 visit::walk_generics(self, g);
365 visit::walk_variant(self, v, g)
366 }
367
368 fn visit_opt_lifetime_ref(&mut self, _: Span, l: &Option<Lifetime>) {
369 SawOptLifetimeRef.hash(self.st);
370 // (This is a strange method in the visitor trait, in that
371 // it does not expose a walk function to do the subroutine
372 // calls.)
373 match *l {
374 Some(ref l) => self.visit_lifetime_ref(l),
375 None => ()
376 }
377 }
378
379 // All of the remaining methods just record (in the hash
380 // SipHasher) that the visitor saw that particular variant
381 // (with its payload), and continue walking as the default
382 // visitor would.
383 //
384 // Some of the implementations have some notes as to how one
385 // might try to make their SVH computation less discerning
386 // (e.g. by incorporating reachability analysis). But
387 // currently all of their implementations are uniform and
388 // uninteresting.
389 //
390 // (If you edit a method such that it deviates from the
391 // pattern, please move that method up above this comment.)
392
393 fn visit_ident(&mut self, _: Span, ident: Ident) {
c1a9b12d 394 SawIdent(ident.name.as_str()).hash(self.st);
1a4d82fc
JJ
395 }
396
397 fn visit_lifetime_ref(&mut self, l: &Lifetime) {
c1a9b12d 398 SawLifetimeRef(l.name.as_str()).hash(self.st);
1a4d82fc
JJ
399 }
400
401 fn visit_lifetime_def(&mut self, l: &LifetimeDef) {
c1a9b12d 402 SawLifetimeDef(l.lifetime.name.as_str()).hash(self.st);
1a4d82fc
JJ
403 }
404
405 // We do recursively walk the bodies of functions/methods
406 // (rather than omitting their bodies from the hash) since
407 // monomorphization and cross-crate inlining generally implies
408 // that a change to a crate body will require downstream
409 // crates to be recompiled.
410 fn visit_expr(&mut self, ex: &Expr) {
411 SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex)
412 }
413
414 fn visit_stmt(&mut self, s: &Stmt) {
415 SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s)
416 }
417
1a4d82fc
JJ
418 fn visit_foreign_item(&mut self, i: &ForeignItem) {
419 // FIXME (#14132) ideally we would incorporate privacy (or
420 // perhaps reachability) somewhere here, so foreign items
421 // that do not leak into downstream crates would not be
422 // part of the ABI.
423 SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i)
424 }
425
426 fn visit_item(&mut self, i: &Item) {
427 // FIXME (#14132) ideally would incorporate reachability
428 // analysis somewhere here, so items that never leak into
429 // downstream crates (e.g. via monomorphisation or
430 // inlining) would not be part of the ABI.
431 SawItem.hash(self.st); visit::walk_item(self, i)
432 }
433
434 fn visit_mod(&mut self, m: &Mod, _s: Span, _n: NodeId) {
435 SawMod.hash(self.st); visit::walk_mod(self, m)
436 }
437
438 fn visit_decl(&mut self, d: &Decl) {
439 SawDecl.hash(self.st); visit::walk_decl(self, d)
440 }
441
442 fn visit_ty(&mut self, t: &Ty) {
443 SawTy.hash(self.st); visit::walk_ty(self, t)
444 }
445
446 fn visit_generics(&mut self, g: &Generics) {
447 SawGenerics.hash(self.st); visit::walk_generics(self, g)
448 }
449
450 fn visit_fn(&mut self, fk: FnKind<'v>, fd: &'v FnDecl,
451 b: &'v Block, s: Span, _: NodeId) {
452 SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s)
453 }
454
c34b1796
AL
455 fn visit_trait_item(&mut self, ti: &TraitItem) {
456 SawTraitItem.hash(self.st); visit::walk_trait_item(self, ti)
1a4d82fc
JJ
457 }
458
c34b1796
AL
459 fn visit_impl_item(&mut self, ii: &ImplItem) {
460 SawImplItem.hash(self.st); visit::walk_impl_item(self, ii)
1a4d82fc
JJ
461 }
462
463 fn visit_struct_field(&mut self, s: &StructField) {
464 SawStructField.hash(self.st); visit::walk_struct_field(self, s)
465 }
466
467 fn visit_explicit_self(&mut self, es: &ExplicitSelf) {
468 SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es)
469 }
470
471 fn visit_path(&mut self, path: &Path, _: ast::NodeId) {
472 SawPath.hash(self.st); visit::walk_path(self, path)
473 }
474
475 fn visit_block(&mut self, b: &Block) {
476 SawBlock.hash(self.st); visit::walk_block(self, b)
477 }
478
479 fn visit_pat(&mut self, p: &Pat) {
480 SawPat.hash(self.st); visit::walk_pat(self, p)
481 }
482
483 fn visit_local(&mut self, l: &Local) {
484 SawLocal.hash(self.st); visit::walk_local(self, l)
485 }
486
487 fn visit_arm(&mut self, a: &Arm) {
488 SawArm.hash(self.st); visit::walk_arm(self, a)
489 }
490 }
491}