1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Calculation and management of a Strict Version Hash for crates
13 //! # Today's ABI problem
15 //! In today's implementation of rustc, it is incredibly difficult to achieve
16 //! forward binary compatibility without resorting to C-like interfaces. Within
17 //! rust code itself, abi details such as symbol names suffer from a variety of
18 //! unrelated factors to code changing such as the "def id drift" problem. This
19 //! ends up yielding confusing error messages about metadata mismatches and
22 //! The core of this problem is when an upstream dependency changes and
23 //! downstream dependents are not recompiled. This causes compile errors because
24 //! the upstream crate's metadata has changed but the downstream crates are
25 //! still referencing the older crate's metadata.
27 //! This problem exists for many reasons, the primary of which is that rust does
28 //! not currently support forwards ABI compatibility (in place upgrades of a
31 //! # SVH and how it alleviates the problem
33 //! With all of this knowledge on hand, this module contains the implementation
34 //! of a notion of a "Strict Version Hash" for a crate. This is essentially a
35 //! hash of all contents of a crate which can somehow be exposed to downstream
38 //! This hash is currently calculated by just hashing the AST, but this is
39 //! obviously wrong (doc changes should not result in an incompatible ABI).
40 //! Implementation-wise, this is required at this moment in time.
42 //! By encoding this strict version hash into all crate's metadata, stale crates
43 //! can be detected immediately and error'd about by rustc itself.
47 //! Original issue: https://github.com/rust-lang/rust/issues/10207
50 use std
::hash
::{Hash, SipHasher, Hasher}
;
52 use rustc_front
::intravisit
as visit
;
54 #[derive(Clone, PartialEq, Debug)]
60 pub fn new(hash
: &str) -> Svh
{
61 assert
!(hash
.len() == 16);
62 Svh { hash: hash.to_string() }
65 pub fn as_str
<'a
>(&'a
self) -> &'a
str {
69 pub fn calculate(metadata
: &Vec
<String
>, krate
: &hir
::Crate
) -> Svh
{
70 // FIXME (#14132): This is better than it used to be, but it still not
71 // ideal. We now attempt to hash only the relevant portions of the
72 // Crate AST as well as the top-level crate attributes. (However,
73 // the hashing of the crate attributes should be double-checked
74 // to ensure it is not incorporating implementation artifacts into
75 // the hash that are not otherwise visible.)
77 // FIXME: this should use SHA1, not SipHash. SipHash is not built to
79 let mut state
= SipHasher
::new();
81 for data
in metadata
{
82 data
.hash(&mut state
);
86 let mut visit
= svh_visitor
::make(&mut state
, krate
);
87 visit
::walk_crate(&mut visit
, krate
);
90 // FIXME (#14132): This hash is still sensitive to e.g. the
91 // spans of the crate Attributes and their underlying
92 // MetaItems; we should make ContentHashable impl for those
93 // types and then use hash_content. But, since all crate
94 // attributes should appear near beginning of the file, it is
95 // not such a big deal to be sensitive to their spans for now.
97 // We hash only the MetaItems instead of the entire Attribute
98 // to avoid hashing the AttrId
99 for attr
in &krate
.attrs
{
100 attr
.node
.value
.hash(&mut state
);
103 let hash
= state
.finish();
105 hash
: (0..64).step_by(4).map(|i
| hex(hash
>> i
)).collect()
108 fn hex(b
: u64) -> char {
109 let b
= (b
& 0xf) as u8;
111 0 ... 9 => '
0'
as u8 + b
,
112 _
=> 'a'
as u8 + b
- 10,
119 impl fmt
::Display
for Svh
{
120 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
125 // FIXME (#14132): Even this SVH computation still has implementation
126 // artifacts: namely, the order of item declaration will affect the
127 // hash computation, but for many kinds of items the order of
128 // declaration should be irrelevant to the ABI.
131 pub use self::SawExprComponent
::*;
132 pub use self::SawStmtComponent
::*;
133 use self::SawAbiComponent
::*;
134 use syntax
::ast
::{self, Name, NodeId}
;
135 use syntax
::codemap
::Span
;
136 use syntax
::parse
::token
;
137 use rustc_front
::intravisit
as visit
;
138 use rustc_front
::intravisit
::{Visitor, FnKind}
;
139 use rustc_front
::hir
::*;
140 use rustc_front
::hir
;
142 use std
::hash
::{Hash, SipHasher}
;
144 pub struct StrictVersionHashVisitor
<'a
> {
145 pub krate
: &'a Crate
,
146 pub st
: &'a
mut SipHasher
,
149 pub fn make
<'a
>(st
: &'a
mut SipHasher
, krate
: &'a Crate
) -> StrictVersionHashVisitor
<'a
> {
150 StrictVersionHashVisitor { st: st, krate: krate }
153 // To off-load the bulk of the hash-computation on #[derive(Hash)],
154 // we define a set of enums corresponding to the content that our
155 // crate visitor will encounter as it traverses the ast.
157 // The important invariant is that all of the Saw*Component enums
158 // do not carry any Spans, Names, or Idents.
160 // Not carrying any Names/Idents is the important fix for problem
161 // noted on PR #13948: using the ident.name as the basis for a
162 // hash leads to unstable SVH, because ident.name is just an index
163 // into intern table (i.e. essentially a random address), not
164 // computed from the name content.
166 // With the below enums, the SVH computation is not sensitive to
167 // artifacts of how rustc was invoked nor of how the source code
168 // was laid out. (Or at least it is *less* sensitive.)
170 // This enum represents the different potential bits of code the
171 // visitor could encounter that could affect the ABI for the crate,
172 // and assigns each a distinct tag to feed into the hash computation.
174 enum SawAbiComponent
<'a
> {
176 // FIXME (#14132): should we include (some function of)
177 // ident.ctxt as well?
178 SawIdent(token
::InternedString
),
179 SawStructDef(token
::InternedString
),
181 SawLifetime(token
::InternedString
),
182 SawLifetimeDef(token
::InternedString
),
201 SawExpr(SawExprComponent
<'a
>),
202 SawStmt(SawStmtComponent
),
205 /// SawExprComponent carries all of the information that we want
206 /// to include in the hash that *won't* be covered by the
207 /// subsequent recursive traversal of the expression's
208 /// substructure by the visitor.
210 /// We know every Expr_ variant is covered by a variant because
211 /// `fn saw_expr` maps each to some case below. Ensuring that
212 /// each variant carries an appropriate payload has to be verified
215 /// (However, getting that *exactly* right is not so important
216 /// because the SVH is just a developer convenience; there is no
217 /// guarantee of collision-freedom, hash collisions are just
218 /// (hopefully) unlikely.)
220 pub enum SawExprComponent
<'a
> {
222 SawExprLoop(Option
<token
::InternedString
>),
223 SawExprField(token
::InternedString
),
224 SawExprTupField(usize),
225 SawExprBreak(Option
<token
::InternedString
>),
226 SawExprAgain(Option
<token
::InternedString
>),
233 SawExprBinary(hir
::BinOp_
),
234 SawExprUnary(hir
::UnOp
),
235 SawExprLit(ast
::LitKind
),
244 SawExprAssignOp(hir
::BinOp_
),
247 SawExprPath(Option
<usize>),
248 SawExprAddrOf(hir
::Mutability
),
250 SawExprInlineAsm(&'a hir
::InlineAsm
),
255 fn saw_expr
<'a
>(node
: &'a Expr_
) -> SawExprComponent
<'a
> {
257 ExprBox(..) => SawExprBox
,
258 ExprVec(..) => SawExprVec
,
259 ExprCall(..) => SawExprCall
,
260 ExprMethodCall(..) => SawExprMethodCall
,
261 ExprTup(..) => SawExprTup
,
262 ExprBinary(op
, _
, _
) => SawExprBinary(op
.node
),
263 ExprUnary(op
, _
) => SawExprUnary(op
),
264 ExprLit(ref lit
) => SawExprLit(lit
.node
.clone()),
265 ExprCast(..) => SawExprCast
,
266 ExprType(..) => SawExprType
,
267 ExprIf(..) => SawExprIf
,
268 ExprWhile(..) => SawExprWhile
,
269 ExprLoop(_
, id
) => SawExprLoop(id
.map(|id
| id
.name
.as_str())),
270 ExprMatch(..) => SawExprMatch
,
271 ExprClosure(..) => SawExprClosure
,
272 ExprBlock(..) => SawExprBlock
,
273 ExprAssign(..) => SawExprAssign
,
274 ExprAssignOp(op
, _
, _
) => SawExprAssignOp(op
.node
),
275 ExprField(_
, name
) => SawExprField(name
.node
.as_str()),
276 ExprTupField(_
, id
) => SawExprTupField(id
.node
),
277 ExprIndex(..) => SawExprIndex
,
278 ExprRange(..) => SawExprRange
,
279 ExprPath(ref qself
, _
) => SawExprPath(qself
.as_ref().map(|q
| q
.position
)),
280 ExprAddrOf(m
, _
) => SawExprAddrOf(m
),
281 ExprBreak(id
) => SawExprBreak(id
.map(|id
| id
.node
.name
.as_str())),
282 ExprAgain(id
) => SawExprAgain(id
.map(|id
| id
.node
.name
.as_str())),
283 ExprRet(..) => SawExprRet
,
284 ExprInlineAsm(ref asm
) => SawExprInlineAsm(asm
),
285 ExprStruct(..) => SawExprStruct
,
286 ExprRepeat(..) => SawExprRepeat
,
290 /// SawStmtComponent is analogous to SawExprComponent, but for statements.
292 pub enum SawStmtComponent
{
298 fn saw_stmt(node
: &Stmt_
) -> SawStmtComponent
{
300 StmtDecl(..) => SawStmtDecl
,
301 StmtExpr(..) => SawStmtExpr
,
302 StmtSemi(..) => SawStmtSemi
,
306 impl<'a
> Visitor
<'a
> for StrictVersionHashVisitor
<'a
> {
307 fn visit_nested_item(&mut self, item
: ItemId
) {
308 self.visit_item(self.krate
.item(item
.id
))
311 fn visit_variant_data(&mut self, s
: &'a VariantData
, name
: Name
,
312 g
: &'a Generics
, _
: NodeId
, _
: Span
) {
313 SawStructDef(name
.as_str()).hash(self.st
);
314 visit
::walk_generics(self, g
);
315 visit
::walk_struct_def(self, s
)
318 fn visit_variant(&mut self, v
: &'a Variant
, g
: &'a Generics
, item_id
: NodeId
) {
319 SawVariant
.hash(self.st
);
320 // walk_variant does not call walk_generics, so do it here.
321 visit
::walk_generics(self, g
);
322 visit
::walk_variant(self, v
, g
, item_id
)
325 // All of the remaining methods just record (in the hash
326 // SipHasher) that the visitor saw that particular variant
327 // (with its payload), and continue walking as the default
330 // Some of the implementations have some notes as to how one
331 // might try to make their SVH computation less discerning
332 // (e.g. by incorporating reachability analysis). But
333 // currently all of their implementations are uniform and
336 // (If you edit a method such that it deviates from the
337 // pattern, please move that method up above this comment.)
339 fn visit_name(&mut self, _
: Span
, name
: Name
) {
340 SawIdent(name
.as_str()).hash(self.st
);
343 fn visit_lifetime(&mut self, l
: &'a Lifetime
) {
344 SawLifetime(l
.name
.as_str()).hash(self.st
);
347 fn visit_lifetime_def(&mut self, l
: &'a LifetimeDef
) {
348 SawLifetimeDef(l
.lifetime
.name
.as_str()).hash(self.st
);
351 // We do recursively walk the bodies of functions/methods
352 // (rather than omitting their bodies from the hash) since
353 // monomorphization and cross-crate inlining generally implies
354 // that a change to a crate body will require downstream
355 // crates to be recompiled.
356 fn visit_expr(&mut self, ex
: &'a Expr
) {
357 SawExpr(saw_expr(&ex
.node
)).hash(self.st
); visit
::walk_expr(self, ex
)
360 fn visit_stmt(&mut self, s
: &'a Stmt
) {
361 SawStmt(saw_stmt(&s
.node
)).hash(self.st
); visit
::walk_stmt(self, s
)
364 fn visit_foreign_item(&mut self, i
: &'a ForeignItem
) {
365 // FIXME (#14132) ideally we would incorporate privacy (or
366 // perhaps reachability) somewhere here, so foreign items
367 // that do not leak into downstream crates would not be
369 SawForeignItem
.hash(self.st
); visit
::walk_foreign_item(self, i
)
372 fn visit_item(&mut self, i
: &'a Item
) {
373 // FIXME (#14132) ideally would incorporate reachability
374 // analysis somewhere here, so items that never leak into
375 // downstream crates (e.g. via monomorphisation or
376 // inlining) would not be part of the ABI.
377 SawItem
.hash(self.st
); visit
::walk_item(self, i
)
380 fn visit_mod(&mut self, m
: &'a Mod
, _s
: Span
, _n
: NodeId
) {
381 SawMod
.hash(self.st
); visit
::walk_mod(self, m
)
384 fn visit_decl(&mut self, d
: &'a Decl
) {
385 SawDecl
.hash(self.st
); visit
::walk_decl(self, d
)
388 fn visit_ty(&mut self, t
: &'a Ty
) {
389 SawTy
.hash(self.st
); visit
::walk_ty(self, t
)
392 fn visit_generics(&mut self, g
: &'a Generics
) {
393 SawGenerics
.hash(self.st
); visit
::walk_generics(self, g
)
396 fn visit_fn(&mut self, fk
: FnKind
<'a
>, fd
: &'a FnDecl
,
397 b
: &'a Block
, s
: Span
, _
: NodeId
) {
398 SawFn
.hash(self.st
); visit
::walk_fn(self, fk
, fd
, b
, s
)
401 fn visit_trait_item(&mut self, ti
: &'a TraitItem
) {
402 SawTraitItem
.hash(self.st
); visit
::walk_trait_item(self, ti
)
405 fn visit_impl_item(&mut self, ii
: &'a ImplItem
) {
406 SawImplItem
.hash(self.st
); visit
::walk_impl_item(self, ii
)
409 fn visit_struct_field(&mut self, s
: &'a StructField
) {
410 SawStructField
.hash(self.st
); visit
::walk_struct_field(self, s
)
413 fn visit_explicit_self(&mut self, es
: &'a ExplicitSelf
) {
414 SawExplicitSelf
.hash(self.st
); visit
::walk_explicit_self(self, es
)
417 fn visit_path(&mut self, path
: &'a Path
, _
: ast
::NodeId
) {
418 SawPath
.hash(self.st
); visit
::walk_path(self, path
)
421 fn visit_path_list_item(&mut self, prefix
: &'a Path
, item
: &'a PathListItem
) {
422 SawPath
.hash(self.st
); visit
::walk_path_list_item(self, prefix
, item
)
425 fn visit_block(&mut self, b
: &'a Block
) {
426 SawBlock
.hash(self.st
); visit
::walk_block(self, b
)
429 fn visit_pat(&mut self, p
: &'a Pat
) {
430 SawPat
.hash(self.st
); visit
::walk_pat(self, p
)
433 fn visit_local(&mut self, l
: &'a Local
) {
434 SawLocal
.hash(self.st
); visit
::walk_local(self, l
)
437 fn visit_arm(&mut self, a
: &'a Arm
) {
438 SawArm
.hash(self.st
); visit
::walk_arm(self, a
)