]>
Commit | Line | Data |
---|---|---|
1a4d82fc JJ |
1 | // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | //! Calculation and management of a Strict Version Hash for crates | |
12 | //! | |
13 | //! # Today's ABI problem | |
14 | //! | |
15 | //! In today's implementation of rustc, it is incredibly difficult to achieve | |
16 | //! forward binary compatibility without resorting to C-like interfaces. Within | |
17 | //! rust code itself, abi details such as symbol names suffer from a variety of | |
18 | //! unrelated factors to code changing such as the "def id drift" problem. This | |
19 | //! ends up yielding confusing error messages about metadata mismatches and | |
20 | //! such. | |
21 | //! | |
22 | //! The core of this problem is when an upstream dependency changes and | |
23 | //! downstream dependents are not recompiled. This causes compile errors because | |
24 | //! the upstream crate's metadata has changed but the downstream crates are | |
25 | //! still referencing the older crate's metadata. | |
26 | //! | |
27 | //! This problem exists for many reasons, the primary of which is that rust does | |
28 | //! not currently support forwards ABI compatibility (in place upgrades of a | |
29 | //! crate). | |
30 | //! | |
31 | //! # SVH and how it alleviates the problem | |
32 | //! | |
33 | //! With all of this knowledge on hand, this module contains the implementation | |
34 | //! of a notion of a "Strict Version Hash" for a crate. This is essentially a | |
35 | //! hash of all contents of a crate which can somehow be exposed to downstream | |
36 | //! crates. | |
37 | //! | |
38 | //! This hash is currently calculated by just hashing the AST, but this is | |
39 | //! obviously wrong (doc changes should not result in an incompatible ABI). | |
40 | //! Implementation-wise, this is required at this moment in time. | |
41 | //! | |
42 | //! By encoding this strict version hash into all crate's metadata, stale crates | |
43 | //! can be detected immediately and error'd about by rustc itself. | |
44 | //! | |
45 | //! # Relevant links | |
46 | //! | |
47 | //! Original issue: https://github.com/rust-lang/rust/issues/10207 | |
48 | ||
49 | use std::fmt; | |
50 | use std::hash::{Hash, SipHasher, Hasher}; | |
1a4d82fc JJ |
51 | use syntax::ast; |
52 | use syntax::visit; | |
53 | ||
85aaf69f | 54 | #[derive(Clone, PartialEq, Debug)] |
1a4d82fc JJ |
55 | pub struct Svh { |
56 | hash: String, | |
57 | } | |
58 | ||
59 | impl Svh { | |
60 | pub fn new(hash: &str) -> Svh { | |
61 | assert!(hash.len() == 16); | |
62 | Svh { hash: hash.to_string() } | |
63 | } | |
64 | ||
65 | pub fn as_str<'a>(&'a self) -> &'a str { | |
c34b1796 | 66 | &self.hash |
1a4d82fc JJ |
67 | } |
68 | ||
69 | pub fn calculate(metadata: &Vec<String>, krate: &ast::Crate) -> Svh { | |
70 | // FIXME (#14132): This is better than it used to be, but it still not | |
71 | // ideal. We now attempt to hash only the relevant portions of the | |
72 | // Crate AST as well as the top-level crate attributes. (However, | |
73 | // the hashing of the crate attributes should be double-checked | |
74 | // to ensure it is not incorporating implementation artifacts into | |
75 | // the hash that are not otherwise visible.) | |
76 | ||
77 | // FIXME: this should use SHA1, not SipHash. SipHash is not built to | |
78 | // avoid collisions. | |
79 | let mut state = SipHasher::new(); | |
80 | ||
85aaf69f | 81 | for data in metadata { |
1a4d82fc JJ |
82 | data.hash(&mut state); |
83 | } | |
84 | ||
85 | { | |
86 | let mut visit = svh_visitor::make(&mut state); | |
87 | visit::walk_crate(&mut visit, krate); | |
88 | } | |
89 | ||
90 | // FIXME (#14132): This hash is still sensitive to e.g. the | |
91 | // spans of the crate Attributes and their underlying | |
92 | // MetaItems; we should make ContentHashable impl for those | |
93 | // types and then use hash_content. But, since all crate | |
94 | // attributes should appear near beginning of the file, it is | |
95 | // not such a big deal to be sensitive to their spans for now. | |
96 | // | |
97 | // We hash only the MetaItems instead of the entire Attribute | |
98 | // to avoid hashing the AttrId | |
85aaf69f | 99 | for attr in &krate.attrs { |
1a4d82fc JJ |
100 | attr.node.value.hash(&mut state); |
101 | } | |
102 | ||
103 | let hash = state.finish(); | |
104 | return Svh { | |
c34b1796 | 105 | hash: (0..64).step_by(4).map(|i| hex(hash >> i)).collect() |
1a4d82fc JJ |
106 | }; |
107 | ||
108 | fn hex(b: u64) -> char { | |
109 | let b = (b & 0xf) as u8; | |
110 | let b = match b { | |
111 | 0 ... 9 => '0' as u8 + b, | |
112 | _ => 'a' as u8 + b - 10, | |
113 | }; | |
114 | b as char | |
115 | } | |
116 | } | |
117 | } | |
118 | ||
85aaf69f | 119 | impl fmt::Display for Svh { |
1a4d82fc JJ |
120 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
121 | f.pad(self.as_str()) | |
122 | } | |
123 | } | |
124 | ||
125 | // FIXME (#14132): Even this SVH computation still has implementation | |
126 | // artifacts: namely, the order of item declaration will affect the | |
127 | // hash computation, but for many kinds of items the order of | |
128 | // declaration should be irrelevant to the ABI. | |
129 | ||
130 | mod svh_visitor { | |
131 | pub use self::SawExprComponent::*; | |
132 | pub use self::SawStmtComponent::*; | |
133 | use self::SawAbiComponent::*; | |
134 | use syntax::ast; | |
135 | use syntax::ast::*; | |
136 | use syntax::codemap::Span; | |
137 | use syntax::parse::token; | |
138 | use syntax::print::pprust; | |
139 | use syntax::visit; | |
140 | use syntax::visit::{Visitor, FnKind}; | |
141 | ||
142 | use std::hash::{Hash, SipHasher}; | |
143 | ||
144 | pub struct StrictVersionHashVisitor<'a> { | |
145 | pub st: &'a mut SipHasher, | |
146 | } | |
147 | ||
148 | pub fn make<'a>(st: &'a mut SipHasher) -> StrictVersionHashVisitor<'a> { | |
149 | StrictVersionHashVisitor { st: st } | |
150 | } | |
151 | ||
85aaf69f | 152 | // To off-load the bulk of the hash-computation on #[derive(Hash)], |
1a4d82fc JJ |
153 | // we define a set of enums corresponding to the content that our |
154 | // crate visitor will encounter as it traverses the ast. | |
155 | // | |
156 | // The important invariant is that all of the Saw*Component enums | |
157 | // do not carry any Spans, Names, or Idents. | |
158 | // | |
159 | // Not carrying any Names/Idents is the important fix for problem | |
160 | // noted on PR #13948: using the ident.name as the basis for a | |
161 | // hash leads to unstable SVH, because ident.name is just an index | |
162 | // into intern table (i.e. essentially a random address), not | |
163 | // computed from the name content. | |
164 | // | |
165 | // With the below enums, the SVH computation is not sensitive to | |
166 | // artifacts of how rustc was invoked nor of how the source code | |
167 | // was laid out. (Or at least it is *less* sensitive.) | |
168 | ||
169 | // This enum represents the different potential bits of code the | |
170 | // visitor could encounter that could affect the ABI for the crate, | |
171 | // and assigns each a distinct tag to feed into the hash computation. | |
172 | #[derive(Hash)] | |
173 | enum SawAbiComponent<'a> { | |
174 | ||
175 | // FIXME (#14132): should we include (some function of) | |
176 | // ident.ctxt as well? | |
177 | SawIdent(token::InternedString), | |
178 | SawStructDef(token::InternedString), | |
179 | ||
180 | SawLifetimeRef(token::InternedString), | |
181 | SawLifetimeDef(token::InternedString), | |
182 | ||
183 | SawMod, | |
1a4d82fc JJ |
184 | SawForeignItem, |
185 | SawItem, | |
186 | SawDecl, | |
187 | SawTy, | |
188 | SawGenerics, | |
189 | SawFn, | |
c34b1796 AL |
190 | SawTraitItem, |
191 | SawImplItem, | |
1a4d82fc JJ |
192 | SawStructField, |
193 | SawVariant, | |
194 | SawExplicitSelf, | |
195 | SawPath, | |
196 | SawOptLifetimeRef, | |
197 | SawBlock, | |
198 | SawPat, | |
199 | SawLocal, | |
200 | SawArm, | |
201 | SawExpr(SawExprComponent<'a>), | |
202 | SawStmt(SawStmtComponent), | |
203 | } | |
204 | ||
205 | /// SawExprComponent carries all of the information that we want | |
206 | /// to include in the hash that *won't* be covered by the | |
207 | /// subsequent recursive traversal of the expression's | |
208 | /// substructure by the visitor. | |
209 | /// | |
210 | /// We know every Expr_ variant is covered by a variant because | |
211 | /// `fn saw_expr` maps each to some case below. Ensuring that | |
212 | /// each variant carries an appropriate payload has to be verified | |
213 | /// by hand. | |
214 | /// | |
215 | /// (However, getting that *exactly* right is not so important | |
216 | /// because the SVH is just a developer convenience; there is no | |
217 | /// guarantee of collision-freedom, hash collisions are just | |
218 | /// (hopefully) unlikely.) | |
219 | #[derive(Hash)] | |
220 | pub enum SawExprComponent<'a> { | |
221 | ||
222 | SawExprLoop(Option<token::InternedString>), | |
223 | SawExprField(token::InternedString), | |
c34b1796 | 224 | SawExprTupField(usize), |
1a4d82fc JJ |
225 | SawExprBreak(Option<token::InternedString>), |
226 | SawExprAgain(Option<token::InternedString>), | |
227 | ||
228 | SawExprBox, | |
229 | SawExprVec, | |
230 | SawExprCall, | |
231 | SawExprMethodCall, | |
232 | SawExprTup, | |
85aaf69f | 233 | SawExprBinary(ast::BinOp_), |
1a4d82fc JJ |
234 | SawExprUnary(ast::UnOp), |
235 | SawExprLit(ast::Lit_), | |
236 | SawExprCast, | |
237 | SawExprIf, | |
238 | SawExprWhile, | |
239 | SawExprMatch, | |
240 | SawExprClosure, | |
241 | SawExprBlock, | |
242 | SawExprAssign, | |
85aaf69f | 243 | SawExprAssignOp(ast::BinOp_), |
1a4d82fc JJ |
244 | SawExprIndex, |
245 | SawExprRange, | |
c34b1796 | 246 | SawExprPath(Option<usize>), |
1a4d82fc JJ |
247 | SawExprAddrOf(ast::Mutability), |
248 | SawExprRet, | |
249 | SawExprInlineAsm(&'a ast::InlineAsm), | |
250 | SawExprStruct, | |
251 | SawExprRepeat, | |
252 | SawExprParen, | |
1a4d82fc JJ |
253 | } |
254 | ||
255 | fn saw_expr<'a>(node: &'a Expr_) -> SawExprComponent<'a> { | |
256 | match *node { | |
257 | ExprBox(..) => SawExprBox, | |
258 | ExprVec(..) => SawExprVec, | |
259 | ExprCall(..) => SawExprCall, | |
260 | ExprMethodCall(..) => SawExprMethodCall, | |
261 | ExprTup(..) => SawExprTup, | |
85aaf69f | 262 | ExprBinary(op, _, _) => SawExprBinary(op.node), |
1a4d82fc JJ |
263 | ExprUnary(op, _) => SawExprUnary(op), |
264 | ExprLit(ref lit) => SawExprLit(lit.node.clone()), | |
265 | ExprCast(..) => SawExprCast, | |
266 | ExprIf(..) => SawExprIf, | |
267 | ExprWhile(..) => SawExprWhile, | |
c1a9b12d | 268 | ExprLoop(_, id) => SawExprLoop(id.map(|id| id.name.as_str())), |
1a4d82fc JJ |
269 | ExprMatch(..) => SawExprMatch, |
270 | ExprClosure(..) => SawExprClosure, | |
271 | ExprBlock(..) => SawExprBlock, | |
272 | ExprAssign(..) => SawExprAssign, | |
85aaf69f | 273 | ExprAssignOp(op, _, _) => SawExprAssignOp(op.node), |
c1a9b12d | 274 | ExprField(_, id) => SawExprField(id.node.name.as_str()), |
1a4d82fc JJ |
275 | ExprTupField(_, id) => SawExprTupField(id.node), |
276 | ExprIndex(..) => SawExprIndex, | |
277 | ExprRange(..) => SawExprRange, | |
c34b1796 | 278 | ExprPath(ref qself, _) => SawExprPath(qself.as_ref().map(|q| q.position)), |
1a4d82fc | 279 | ExprAddrOf(m, _) => SawExprAddrOf(m), |
c1a9b12d SL |
280 | ExprBreak(id) => SawExprBreak(id.map(|id| id.name.as_str())), |
281 | ExprAgain(id) => SawExprAgain(id.map(|id| id.name.as_str())), | |
1a4d82fc JJ |
282 | ExprRet(..) => SawExprRet, |
283 | ExprInlineAsm(ref asm) => SawExprInlineAsm(asm), | |
284 | ExprStruct(..) => SawExprStruct, | |
285 | ExprRepeat(..) => SawExprRepeat, | |
286 | ExprParen(..) => SawExprParen, | |
1a4d82fc JJ |
287 | |
288 | // just syntactic artifacts, expanded away by time of SVH. | |
85aaf69f | 289 | ExprForLoop(..) => unreachable!(), |
1a4d82fc JJ |
290 | ExprIfLet(..) => unreachable!(), |
291 | ExprWhileLet(..) => unreachable!(), | |
292 | ExprMac(..) => unreachable!(), | |
293 | } | |
294 | } | |
295 | ||
296 | /// SawStmtComponent is analogous to SawExprComponent, but for statements. | |
297 | #[derive(Hash)] | |
298 | pub enum SawStmtComponent { | |
299 | SawStmtDecl, | |
300 | SawStmtExpr, | |
301 | SawStmtSemi, | |
302 | } | |
303 | ||
304 | fn saw_stmt(node: &Stmt_) -> SawStmtComponent { | |
305 | match *node { | |
306 | StmtDecl(..) => SawStmtDecl, | |
307 | StmtExpr(..) => SawStmtExpr, | |
308 | StmtSemi(..) => SawStmtSemi, | |
309 | StmtMac(..) => unreachable!(), | |
310 | } | |
311 | } | |
312 | ||
1a4d82fc JJ |
313 | impl<'a, 'v> Visitor<'v> for StrictVersionHashVisitor<'a> { |
314 | ||
315 | fn visit_mac(&mut self, mac: &Mac) { | |
316 | // macro invocations, namely macro_rules definitions, | |
317 | // *can* appear as items, even in the expanded crate AST. | |
318 | ||
c34b1796 | 319 | if ¯o_name(mac)[..] == "macro_rules" { |
1a4d82fc JJ |
320 | // Pretty-printing definition to a string strips out |
321 | // surface artifacts (currently), such as the span | |
322 | // information, yielding a content-based hash. | |
323 | ||
324 | // FIXME (#14132): building temporary string is | |
325 | // expensive; a direct content-based hash on token | |
326 | // trees might be faster. Implementing this is far | |
327 | // easier in short term. | |
328 | let macro_defn_as_string = pprust::to_string(|pp_state| { | |
329 | pp_state.print_mac(mac, token::Paren) | |
330 | }); | |
331 | macro_defn_as_string.hash(self.st); | |
332 | } else { | |
333 | // It is not possible to observe any kind of macro | |
334 | // invocation at this stage except `macro_rules!`. | |
335 | panic!("reached macro somehow: {}", | |
336 | pprust::to_string(|pp_state| { | |
337 | pp_state.print_mac(mac, token::Paren) | |
338 | })); | |
339 | } | |
340 | ||
341 | visit::walk_mac(self, mac); | |
342 | ||
343 | fn macro_name(mac: &Mac) -> token::InternedString { | |
344 | match &mac.node { | |
345 | &MacInvocTT(ref path, ref _tts, ref _stx_ctxt) => { | |
c34b1796 | 346 | let s = &path.segments; |
1a4d82fc | 347 | assert_eq!(s.len(), 1); |
c1a9b12d | 348 | s[0].identifier.name.as_str() |
1a4d82fc JJ |
349 | } |
350 | } | |
351 | } | |
352 | } | |
353 | ||
354 | fn visit_struct_def(&mut self, s: &StructDef, ident: Ident, | |
355 | g: &Generics, _: NodeId) { | |
c1a9b12d | 356 | SawStructDef(ident.name.as_str()).hash(self.st); |
1a4d82fc JJ |
357 | visit::walk_generics(self, g); |
358 | visit::walk_struct_def(self, s) | |
359 | } | |
360 | ||
361 | fn visit_variant(&mut self, v: &Variant, g: &Generics) { | |
362 | SawVariant.hash(self.st); | |
363 | // walk_variant does not call walk_generics, so do it here. | |
364 | visit::walk_generics(self, g); | |
365 | visit::walk_variant(self, v, g) | |
366 | } | |
367 | ||
368 | fn visit_opt_lifetime_ref(&mut self, _: Span, l: &Option<Lifetime>) { | |
369 | SawOptLifetimeRef.hash(self.st); | |
370 | // (This is a strange method in the visitor trait, in that | |
371 | // it does not expose a walk function to do the subroutine | |
372 | // calls.) | |
373 | match *l { | |
374 | Some(ref l) => self.visit_lifetime_ref(l), | |
375 | None => () | |
376 | } | |
377 | } | |
378 | ||
379 | // All of the remaining methods just record (in the hash | |
380 | // SipHasher) that the visitor saw that particular variant | |
381 | // (with its payload), and continue walking as the default | |
382 | // visitor would. | |
383 | // | |
384 | // Some of the implementations have some notes as to how one | |
385 | // might try to make their SVH computation less discerning | |
386 | // (e.g. by incorporating reachability analysis). But | |
387 | // currently all of their implementations are uniform and | |
388 | // uninteresting. | |
389 | // | |
390 | // (If you edit a method such that it deviates from the | |
391 | // pattern, please move that method up above this comment.) | |
392 | ||
393 | fn visit_ident(&mut self, _: Span, ident: Ident) { | |
c1a9b12d | 394 | SawIdent(ident.name.as_str()).hash(self.st); |
1a4d82fc JJ |
395 | } |
396 | ||
397 | fn visit_lifetime_ref(&mut self, l: &Lifetime) { | |
c1a9b12d | 398 | SawLifetimeRef(l.name.as_str()).hash(self.st); |
1a4d82fc JJ |
399 | } |
400 | ||
401 | fn visit_lifetime_def(&mut self, l: &LifetimeDef) { | |
c1a9b12d | 402 | SawLifetimeDef(l.lifetime.name.as_str()).hash(self.st); |
1a4d82fc JJ |
403 | } |
404 | ||
405 | // We do recursively walk the bodies of functions/methods | |
406 | // (rather than omitting their bodies from the hash) since | |
407 | // monomorphization and cross-crate inlining generally implies | |
408 | // that a change to a crate body will require downstream | |
409 | // crates to be recompiled. | |
410 | fn visit_expr(&mut self, ex: &Expr) { | |
411 | SawExpr(saw_expr(&ex.node)).hash(self.st); visit::walk_expr(self, ex) | |
412 | } | |
413 | ||
414 | fn visit_stmt(&mut self, s: &Stmt) { | |
415 | SawStmt(saw_stmt(&s.node)).hash(self.st); visit::walk_stmt(self, s) | |
416 | } | |
417 | ||
1a4d82fc JJ |
418 | fn visit_foreign_item(&mut self, i: &ForeignItem) { |
419 | // FIXME (#14132) ideally we would incorporate privacy (or | |
420 | // perhaps reachability) somewhere here, so foreign items | |
421 | // that do not leak into downstream crates would not be | |
422 | // part of the ABI. | |
423 | SawForeignItem.hash(self.st); visit::walk_foreign_item(self, i) | |
424 | } | |
425 | ||
426 | fn visit_item(&mut self, i: &Item) { | |
427 | // FIXME (#14132) ideally would incorporate reachability | |
428 | // analysis somewhere here, so items that never leak into | |
429 | // downstream crates (e.g. via monomorphisation or | |
430 | // inlining) would not be part of the ABI. | |
431 | SawItem.hash(self.st); visit::walk_item(self, i) | |
432 | } | |
433 | ||
434 | fn visit_mod(&mut self, m: &Mod, _s: Span, _n: NodeId) { | |
435 | SawMod.hash(self.st); visit::walk_mod(self, m) | |
436 | } | |
437 | ||
438 | fn visit_decl(&mut self, d: &Decl) { | |
439 | SawDecl.hash(self.st); visit::walk_decl(self, d) | |
440 | } | |
441 | ||
442 | fn visit_ty(&mut self, t: &Ty) { | |
443 | SawTy.hash(self.st); visit::walk_ty(self, t) | |
444 | } | |
445 | ||
446 | fn visit_generics(&mut self, g: &Generics) { | |
447 | SawGenerics.hash(self.st); visit::walk_generics(self, g) | |
448 | } | |
449 | ||
450 | fn visit_fn(&mut self, fk: FnKind<'v>, fd: &'v FnDecl, | |
451 | b: &'v Block, s: Span, _: NodeId) { | |
452 | SawFn.hash(self.st); visit::walk_fn(self, fk, fd, b, s) | |
453 | } | |
454 | ||
c34b1796 AL |
455 | fn visit_trait_item(&mut self, ti: &TraitItem) { |
456 | SawTraitItem.hash(self.st); visit::walk_trait_item(self, ti) | |
1a4d82fc JJ |
457 | } |
458 | ||
c34b1796 AL |
459 | fn visit_impl_item(&mut self, ii: &ImplItem) { |
460 | SawImplItem.hash(self.st); visit::walk_impl_item(self, ii) | |
1a4d82fc JJ |
461 | } |
462 | ||
463 | fn visit_struct_field(&mut self, s: &StructField) { | |
464 | SawStructField.hash(self.st); visit::walk_struct_field(self, s) | |
465 | } | |
466 | ||
467 | fn visit_explicit_self(&mut self, es: &ExplicitSelf) { | |
468 | SawExplicitSelf.hash(self.st); visit::walk_explicit_self(self, es) | |
469 | } | |
470 | ||
471 | fn visit_path(&mut self, path: &Path, _: ast::NodeId) { | |
472 | SawPath.hash(self.st); visit::walk_path(self, path) | |
473 | } | |
474 | ||
475 | fn visit_block(&mut self, b: &Block) { | |
476 | SawBlock.hash(self.st); visit::walk_block(self, b) | |
477 | } | |
478 | ||
479 | fn visit_pat(&mut self, p: &Pat) { | |
480 | SawPat.hash(self.st); visit::walk_pat(self, p) | |
481 | } | |
482 | ||
483 | fn visit_local(&mut self, l: &Local) { | |
484 | SawLocal.hash(self.st); visit::walk_local(self, l) | |
485 | } | |
486 | ||
487 | fn visit_arm(&mut self, a: &Arm) { | |
488 | SawArm.hash(self.st); visit::walk_arm(self, a) | |
489 | } | |
490 | } | |
491 | } |