]> git.proxmox.com Git - rustc.git/blob - src/librustc_back/svh.rs
Merge tag 'upstream/1.5.0+dfsg1'
[rustc.git] / src / librustc_back / svh.rs
1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! Calculation and management of a Strict Version Hash for crates
12 //!
13 //! # Today's ABI problem
14 //!
15 //! In today's implementation of rustc, it is incredibly difficult to achieve
16 //! forward binary compatibility without resorting to C-like interfaces. Within
17 //! rust code itself, abi details such as symbol names suffer from a variety of
18 //! unrelated factors to code changing such as the "def id drift" problem. This
19 //! ends up yielding confusing error messages about metadata mismatches and
20 //! such.
21 //!
22 //! The core of this problem is when an upstream dependency changes and
23 //! downstream dependents are not recompiled. This causes compile errors because
24 //! the upstream crate's metadata has changed but the downstream crates are
25 //! still referencing the older crate's metadata.
26 //!
27 //! This problem exists for many reasons, the primary of which is that rust does
28 //! not currently support forwards ABI compatibility (in place upgrades of a
29 //! crate).
30 //!
31 //! # SVH and how it alleviates the problem
32 //!
33 //! With all of this knowledge on hand, this module contains the implementation
34 //! of a notion of a "Strict Version Hash" for a crate. This is essentially a
35 //! hash of all contents of a crate which can somehow be exposed to downstream
36 //! crates.
37 //!
38 //! This hash is currently calculated by just hashing the AST, but this is
39 //! obviously wrong (doc changes should not result in an incompatible ABI).
40 //! Implementation-wise, this is required at this moment in time.
41 //!
42 //! By encoding this strict version hash into all crate's metadata, stale crates
43 //! can be detected immediately and error'd about by rustc itself.
44 //!
45 //! # Relevant links
46 //!
47 //! Original issue: https://github.com/rust-lang/rust/issues/10207
48
49 use std::fmt;
50 use std::hash::{Hash, SipHasher, Hasher};
51 use rustc_front::hir;
52 use rustc_front::visit;
53
54 #[derive(Clone, PartialEq, Debug)]
55 pub struct Svh {
56 hash: String,
57 }
58
59 impl Svh {
60 pub fn new(hash: &str) -> Svh {
61 assert!(hash.len() == 16);
62 Svh { hash: hash.to_string() }
63 }
64
65 pub fn as_str<'a>(&'a self) -> &'a str {
66 &self.hash
67 }
68
69 pub fn calculate(metadata: &Vec<String>, krate: &hir::Crate) -> Svh {
70 // FIXME (#14132): This is better than it used to be, but it still not
71 // ideal. We now attempt to hash only the relevant portions of the
72 // Crate AST as well as the top-level crate attributes. (However,
73 // the hashing of the crate attributes should be double-checked
74 // to ensure it is not incorporating implementation artifacts into
75 // the hash that are not otherwise visible.)
76
77 // FIXME: this should use SHA1, not SipHash. SipHash is not built to
78 // avoid collisions.
79 let mut state = SipHasher::new();
80
81 for data in metadata {
82 data.hash(&mut state);
83 }
84
85 {
86 let mut visit = svh_visitor::make(&mut state);
87 visit::walk_crate(&mut visit, krate);
88 }
89
90 // FIXME (#14132): This hash is still sensitive to e.g. the
91 // spans of the crate Attributes and their underlying
92 // MetaItems; we should make ContentHashable impl for those
93 // types and then use hash_content. But, since all crate
94 // attributes should appear near beginning of the file, it is
95 // not such a big deal to be sensitive to their spans for now.
96 //
97 // We hash only the MetaItems instead of the entire Attribute
98 // to avoid hashing the AttrId
99 for attr in &krate.attrs {
100 attr.node.value.hash(&mut state);
101 }
102
103 let hash = state.finish();
104 return Svh {
105 hash: (0..64).step_by(4).map(|i| hex(hash >> i)).collect()
106 };
107
108 fn hex(b: u64) -> char {
109 let b = (b & 0xf) as u8;
110 let b = match b {
111 0 ... 9 => '0' as u8 + b,
112 _ => 'a' as u8 + b - 10,
113 };
114 b as char
115 }
116 }
117 }
118
119 impl fmt::Display for Svh {
120 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121 f.pad(self.as_str())
122 }
123 }
124
125 // FIXME (#14132): Even this SVH computation still has implementation
126 // artifacts: namely, the order of item declaration will affect the
127 // hash computation, but for many kinds of items the order of
128 // declaration should be irrelevant to the ABI.
129
130 mod svh_visitor {
131 pub use self::SawExprComponent::*;
132 pub use self::SawStmtComponent::*;
133 use self::SawAbiComponent::*;
134 use syntax::ast::{self, Name, NodeId};
135 use syntax::codemap::Span;
136 use syntax::parse::token;
137 use rustc_front::visit;
138 use rustc_front::visit::{Visitor, FnKind};
139 use rustc_front::hir::*;
140 use rustc_front::hir;
141
142 use std::hash::{Hash, SipHasher};
143
144 pub struct StrictVersionHashVisitor<'a> {
145 pub st: &'a mut SipHasher,
146 }
147
148 pub fn make<'a>(st: &'a mut SipHasher) -> StrictVersionHashVisitor<'a> {
149 StrictVersionHashVisitor { st: st }
150 }
151
152 // To off-load the bulk of the hash-computation on #[derive(Hash)],
153 // we define a set of enums corresponding to the content that our
154 // crate visitor will encounter as it traverses the ast.
155 //
156 // The important invariant is that all of the Saw*Component enums
157 // do not carry any Spans, Names, or Idents.
158 //
159 // Not carrying any Names/Idents is the important fix for problem
160 // noted on PR #13948: using the ident.name as the basis for a
161 // hash leads to unstable SVH, because ident.name is just an index
162 // into intern table (i.e. essentially a random address), not
163 // computed from the name content.
164 //
165 // With the below enums, the SVH computation is not sensitive to
166 // artifacts of how rustc was invoked nor of how the source code
167 // was laid out. (Or at least it is *less* sensitive.)
168
169 // This enum represents the different potential bits of code the
170 // visitor could encounter that could affect the ABI for the crate,
171 // and assigns each a distinct tag to feed into the hash computation.
172 #[derive(Hash)]
173 enum SawAbiComponent<'a> {
174
175 // FIXME (#14132): should we include (some function of)
176 // ident.ctxt as well?
177 SawIdent(token::InternedString),
178 SawStructDef(token::InternedString),
179
180 SawLifetime(token::InternedString),
181 SawLifetimeDef(token::InternedString),
182
183 SawMod,
184 SawForeignItem,
185 SawItem,
186 SawDecl,
187 SawTy,
188 SawGenerics,
189 SawFn,
190 SawTraitItem,
191 SawImplItem,
192 SawStructField,
193 SawVariant,
194 SawExplicitSelf,
195 SawPath,
196 SawBlock,
197 SawPat,
198 SawLocal,
199 SawArm,
200 SawExpr(SawExprComponent<'a>),
201 SawStmt(SawStmtComponent),
202 }
203
204 /// SawExprComponent carries all of the information that we want
205 /// to include in the hash that *won't* be covered by the
206 /// subsequent recursive traversal of the expression's
207 /// substructure by the visitor.
208 ///
209 /// We know every Expr_ variant is covered by a variant because
210 /// `fn saw_expr` maps each to some case below. Ensuring that
211 /// each variant carries an appropriate payload has to be verified
212 /// by hand.
213 ///
214 /// (However, getting that *exactly* right is not so important
215 /// because the SVH is just a developer convenience; there is no
216 /// guarantee of collision-freedom, hash collisions are just
217 /// (hopefully) unlikely.)
218 #[derive(Hash)]
219 pub enum SawExprComponent<'a> {
220
221 SawExprLoop(Option<token::InternedString>),
222 SawExprField(token::InternedString),
223 SawExprTupField(usize),
224 SawExprBreak(Option<token::InternedString>),
225 SawExprAgain(Option<token::InternedString>),
226
227 SawExprBox,
228 SawExprVec,
229 SawExprCall,
230 SawExprMethodCall,
231 SawExprTup,
232 SawExprBinary(hir::BinOp_),
233 SawExprUnary(hir::UnOp),
234 SawExprLit(ast::Lit_),
235 SawExprCast,
236 SawExprIf,
237 SawExprWhile,
238 SawExprMatch,
239 SawExprClosure,
240 SawExprBlock,
241 SawExprAssign,
242 SawExprAssignOp(hir::BinOp_),
243 SawExprIndex,
244 SawExprRange,
245 SawExprPath(Option<usize>),
246 SawExprAddrOf(hir::Mutability),
247 SawExprRet,
248 SawExprInlineAsm(&'a hir::InlineAsm),
249 SawExprStruct,
250 SawExprRepeat,
251 }
252
253 fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> {
254 match *node {
255 ExprBox(..) => SawExprBox,
256 ExprVec(..) => SawExprVec,
257 ExprCall(..) => SawExprCall,
258 ExprMethodCall(..) => SawExprMethodCall,
259 ExprTup(..) => SawExprTup,
260 ExprBinary(op, _, _) => SawExprBinary(op.node),
261 ExprUnary(op, _) => SawExprUnary(op),
262 ExprLit(ref lit) => SawExprLit(lit.node.clone()),
263 ExprCast(..) => SawExprCast,
264 ExprIf(..) => SawExprIf,
265 ExprWhile(..) => SawExprWhile,
266 ExprLoop(_, id) => SawExprLoop(id.map(|id| id.name.as_str())),
267 ExprMatch(..) => SawExprMatch,
268 ExprClosure(..) => SawExprClosure,
269 ExprBlock(..) => SawExprBlock,
270 ExprAssign(..) => SawExprAssign,
271 ExprAssignOp(op, _, _) => SawExprAssignOp(op.node),
272 ExprField(_, name) => SawExprField(name.node.as_str()),
273 ExprTupField(_, id) => SawExprTupField(id.node),
274 ExprIndex(..) => SawExprIndex,
275 ExprRange(..) => SawExprRange,
276 ExprPath(ref qself, _) => SawExprPath(qself.as_ref().map(|q| q.position)),
277 ExprAddrOf(m, _) => SawExprAddrOf(m),
278 ExprBreak(id) => SawExprBreak(id.map(|id| id.node.name.as_str())),
279 ExprAgain(id) => SawExprAgain(id.map(|id| id.node.name.as_str())),
280 ExprRet(..) => SawExprRet,
281 ExprInlineAsm(ref asm) => SawExprInlineAsm(asm),
282 ExprStruct(..) => SawExprStruct,
283 ExprRepeat(..) => SawExprRepeat,
284 }
285 }
286
287 /// SawStmtComponent is analogous to SawExprComponent, but for statements.
288 #[derive(Hash)]
289 pub enum SawStmtComponent {
290 SawStmtDecl,
291 SawStmtExpr,
292 SawStmtSemi,
293 }
294
295 fn saw_stmt(node: &Stmt_) -> SawStmtComponent {
296 match *node {
297 StmtDecl(..) => SawStmtDecl,
298 StmtExpr(..) => SawStmtExpr,
299 StmtSemi(..) => SawStmtSemi,
300 }
301 }
302
303 impl<'a, 'v> Visitor<'v> for StrictVersionHashVisitor<'a> {
304 fn visit_variant_data(&mut self, s: &VariantData, name: Name,
305 g: &Generics, _: NodeId, _: Span) {
306 SawStructDef(name.as_str()).hash(self.st);
307 visit::walk_generics(self, g);
308 visit::walk_struct_def(self, s)
309 }
310
311 fn visit_variant(&mut self, v: &Variant, g: &Generics, item_id: NodeId) {
312 SawVariant.hash(self.st);
313 // walk_variant does not call walk_generics, so do it here.
314 visit::walk_generics(self, g);
315 visit::walk_variant(self, v, g, item_id)
316 }
317
318 // All of the remaining methods just record (in the hash
319 // SipHasher) that the visitor saw that particular variant
320 // (with its payload), and continue walking as the default
321 // visitor would.
322 //
323 // Some of the implementations have some notes as to how one
324 // might try to make their SVH computation less discerning
325 // (e.g. by incorporating reachability analysis). But
326 // currently all of their implementations are uniform and
327 // uninteresting.
328 //
329 // (If you edit a method such that it deviates from the
330 // pattern, please move that method up above this comment.)
331
332 fn visit_name(&mut self, _: Span, name: Name) {
333 SawIdent(name.as_str()).hash(self.st);
334 }
335
336 fn visit_lifetime(&mut self, l: &Lifetime) {
337 SawLifetime(l.name.as_str()).hash(self.st);
338 }
339
340 fn visit_lifetime_def(&mut self, l: &LifetimeDef) {
341 SawLifetimeDef(l.lifetime.name.as_str()).hash(self.st);
342 }
343
344 // We do recursively walk the bodies of functions/methods
345 // (rather than omitting their bodies from the hash) since
346 // monomorphization and cross-crate inlining generally implies
347 // that a change to a crate body will require downstream
348 // crates to be recompiled.
349 fn visit_expr(&mut self, ex: &Expr) {
350 SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex)
351 }
352
353 fn visit_stmt(&mut self, s: &Stmt) {
354 SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s)
355 }
356
357 fn visit_foreign_item(&mut self, i: &ForeignItem) {
358 // FIXME (#14132) ideally we would incorporate privacy (or
359 // perhaps reachability) somewhere here, so foreign items
360 // that do not leak into downstream crates would not be
361 // part of the ABI.
362 SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i)
363 }
364
365 fn visit_item(&mut self, i: &Item) {
366 // FIXME (#14132) ideally would incorporate reachability
367 // analysis somewhere here, so items that never leak into
368 // downstream crates (e.g. via monomorphisation or
369 // inlining) would not be part of the ABI.
370 SawItem.hash(self.st); visit::walk_item(self, i)
371 }
372
373 fn visit_mod(&mut self, m: &Mod, _s: Span, _n: NodeId) {
374 SawMod.hash(self.st); visit::walk_mod(self, m)
375 }
376
377 fn visit_decl(&mut self, d: &Decl) {
378 SawDecl.hash(self.st); visit::walk_decl(self, d)
379 }
380
381 fn visit_ty(&mut self, t: &Ty) {
382 SawTy.hash(self.st); visit::walk_ty(self, t)
383 }
384
385 fn visit_generics(&mut self, g: &Generics) {
386 SawGenerics.hash(self.st); visit::walk_generics(self, g)
387 }
388
389 fn visit_fn(&mut self, fk: FnKind<'v>, fd: &'v FnDecl,
390 b: &'v Block, s: Span, _: NodeId) {
391 SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s)
392 }
393
394 fn visit_trait_item(&mut self, ti: &TraitItem) {
395 SawTraitItem.hash(self.st); visit::walk_trait_item(self, ti)
396 }
397
398 fn visit_impl_item(&mut self, ii: &ImplItem) {
399 SawImplItem.hash(self.st); visit::walk_impl_item(self, ii)
400 }
401
402 fn visit_struct_field(&mut self, s: &StructField) {
403 SawStructField.hash(self.st); visit::walk_struct_field(self, s)
404 }
405
406 fn visit_explicit_self(&mut self, es: &ExplicitSelf) {
407 SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es)
408 }
409
410 fn visit_path(&mut self, path: &Path, _: ast::NodeId) {
411 SawPath.hash(self.st); visit::walk_path(self, path)
412 }
413
414 fn visit_path_list_item(&mut self, prefix: &Path, item: &'v PathListItem) {
415 SawPath.hash(self.st); visit::walk_path_list_item(self, prefix, item)
416 }
417
418 fn visit_block(&mut self, b: &Block) {
419 SawBlock.hash(self.st); visit::walk_block(self, b)
420 }
421
422 fn visit_pat(&mut self, p: &Pat) {
423 SawPat.hash(self.st); visit::walk_pat(self, p)
424 }
425
426 fn visit_local(&mut self, l: &Local) {
427 SawLocal.hash(self.st); visit::walk_local(self, l)
428 }
429
430 fn visit_arm(&mut self, a: &Arm) {
431 SawArm.hash(self.st); visit::walk_arm(self, a)
432 }
433 }
434 }