]>
Commit | Line | Data |
---|---|---|
9e0c209e SL |
1 | // Copyright 2016 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | ||
12 | //! This module manages how the incremental compilation cache is represented in | |
13 | //! the file system. | |
14 | //! | |
15 | //! Incremental compilation caches are managed according to a copy-on-write | |
16 | //! strategy: Once a complete, consistent cache version is finalized, it is | |
17 | //! never modified. Instead, when a subsequent compilation session is started, | |
18 | //! the compiler will allocate a new version of the cache that starts out as | |
19 | //! a copy of the previous version. Then only this new copy is modified and it | |
20 | //! will not be visible to other processes until it is finalized. This ensures | |
21 | //! that multiple compiler processes can be executed concurrently for the same | |
22 | //! crate without interfering with each other or blocking each other. | |
23 | //! | |
24 | //! More concretely this is implemented via the following protocol: | |
25 | //! | |
26 | //! 1. For a newly started compilation session, the compiler allocates a | |
27 | //! new `session` directory within the incremental compilation directory. | |
28 | //! This session directory will have a unique name that ends with the suffix | |
29 | //! "-working" and that contains a creation timestamp. | |
30 | //! 2. Next, the compiler looks for the newest finalized session directory, | |
31 | //! that is, a session directory from a previous compilation session that | |
32 | //! has been marked as valid and consistent. A session directory is | |
33 | //! considered finalized if the "-working" suffix in the directory name has | |
34 | //! been replaced by the SVH of the crate. | |
35 | //! 3. Once the compiler has found a valid, finalized session directory, it will | |
36 | //! hard-link/copy its contents into the new "-working" directory. If all | |
37 | //! goes well, it will have its own, private copy of the source directory and | |
38 | //! subsequently not have to worry about synchronizing with other compiler | |
39 | //! processes. | |
40 | //! 4. Now the compiler can do its normal compilation process, which involves | |
41 | //! reading and updating its private session directory. | |
42 | //! 5. When compilation finishes without errors, the private session directory | |
43 | //! will be in a state where it can be used as input for other compilation | |
44 | //! sessions. That is, it will contain a dependency graph and cache artifacts | |
45 | //! that are consistent with the state of the source code it was compiled | |
46 | //! from, with no need to change them ever again. At this point, the compiler | |
47 | //! finalizes and "publishes" its private session directory by renaming it | |
48 | //! from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}". | |
49 | //! 6. At this point the "old" session directory that we copied our data from | |
50 | //! at the beginning of the session has become obsolete because we have just | |
51 | //! published a more current version. Thus the compiler will delete it. | |
52 | //! | |
53 | //! ## Garbage Collection | |
54 | //! | |
55 | //! Naively following the above protocol might lead to old session directories | |
56 | //! piling up if a compiler instance crashes for some reason before its able to | |
57 | //! remove its private session directory. In order to avoid wasting disk space, | |
58 | //! the compiler also does some garbage collection each time it is started in | |
59 | //! incremental compilation mode. Specifically, it will scan the incremental | |
60 | //! compilation directory for private session directories that are not in use | |
61 | //! any more and will delete those. It will also delete any finalized session | |
62 | //! directories for a given crate except for the most recent one. | |
63 | //! | |
64 | //! ## Synchronization | |
65 | //! | |
66 | //! There is some synchronization needed in order for the compiler to be able to | |
67 | //! determine whether a given private session directory is not in used any more. | |
68 | //! This is done by creating a lock file for each session directory and | |
69 | //! locking it while the directory is still being used. Since file locks have | |
70 | //! operating system support, we can rely on the lock being released if the | |
71 | //! compiler process dies for some unexpected reason. Thus, when garbage | |
72 | //! collecting private session directories, the collecting process can determine | |
73 | //! whether the directory is still in use by trying to acquire a lock on the | |
74 | //! file. If locking the file fails, the original process must still be alive. | |
75 | //! If locking the file succeeds, we know that the owning process is not alive | |
76 | //! any more and we can safely delete the directory. | |
77 | //! There is still a small time window between the original process creating the | |
78 | //! lock file and actually locking it. In order to minimize the chance that | |
79 | //! another process tries to acquire the lock in just that instance, only | |
80 | //! session directories that are older than a few seconds are considered for | |
81 | //! garbage collection. | |
82 | //! | |
83 | //! Another case that has to be considered is what happens if one process | |
84 | //! deletes a finalized session directory that another process is currently | |
85 | //! trying to copy from. This case is also handled via the lock file. Before | |
86 | //! a process starts copying a finalized session directory, it will acquire a | |
87 | //! shared lock on the directory's lock file. Any garbage collecting process, | |
88 | //! on the other hand, will acquire an exclusive lock on the lock file. | |
89 | //! Thus, if a directory is being collected, any reader process will fail | |
90 | //! acquiring the shared lock and will leave the directory alone. Conversely, | |
91 | //! if a collecting process can't acquire the exclusive lock because the | |
92 | //! directory is currently being read from, it will leave collecting that | |
93 | //! directory to another process at a later point in time. | |
94 | //! The exact same scheme is also used when reading the metadata hashes file | |
95 | //! from an extern crate. When a crate is compiled, the hash values of its | |
96 | //! metadata are stored in a file in its session directory. When the | |
97 | //! compilation session of another crate imports the first crate's metadata, | |
98 | //! it also has to read in the accompanying metadata hashes. It thus will access | |
99 | //! the finalized session directory of all crates it links to and while doing | |
100 | //! so, it will also place a read lock on that the respective session directory | |
101 | //! so that it won't be deleted while the metadata hashes are loaded. | |
102 | //! | |
103 | //! ## Preconditions | |
104 | //! | |
105 | //! This system relies on two features being available in the file system in | |
106 | //! order to work really well: file locking and hard linking. | |
107 | //! If hard linking is not available (like on FAT) the data in the cache | |
108 | //! actually has to be copied at the beginning of each session. | |
109 | //! If file locking does not work reliably (like on NFS), some of the | |
110 | //! synchronization will go haywire. | |
111 | //! In both cases we recommend to locate the incremental compilation directory | |
112 | //! on a file system that supports these things. | |
113 | //! It might be a good idea though to try and detect whether we are on an | |
114 | //! unsupported file system and emit a warning in that case. This is not yet | |
115 | //! implemented. | |
116 | ||
abe05a73 | 117 | use rustc::session::{Session, CrateDisambiguator}; |
b7449926 | 118 | use rustc_fs_util::{link_or_copy, LinkOrCopy}; |
476ff2be SL |
119 | use rustc_data_structures::{flock, base_n}; |
120 | use rustc_data_structures::fx::{FxHashSet, FxHashMap}; | |
b7449926 | 121 | use rustc_data_structures::svh::Svh; |
9e0c209e | 122 | |
9e0c209e SL |
123 | use std::fs as std_fs; |
124 | use std::io; | |
125 | use std::mem; | |
126 | use std::path::{Path, PathBuf}; | |
127 | use std::time::{UNIX_EPOCH, SystemTime, Duration}; | |
abe05a73 | 128 | |
b7449926 | 129 | use rand::{RngCore, thread_rng}; |
9e0c209e SL |
130 | |
131 | const LOCK_FILE_EXT: &'static str = ".lock"; | |
132 | const DEP_GRAPH_FILENAME: &'static str = "dep-graph.bin"; | |
133 | const WORK_PRODUCTS_FILENAME: &'static str = "work-products.bin"; | |
abe05a73 | 134 | const QUERY_CACHE_FILENAME: &'static str = "query-cache.bin"; |
9e0c209e | 135 | |
476ff2be SL |
136 | // We encode integers using the following base, so they are shorter than decimal |
137 | // or hexadecimal numbers (we want short file and directory names). Since these | |
138 | // numbers will be used in file names, we choose an encoding that is not | |
139 | // case-sensitive (as opposed to base64, for example). | |
ff7c6d11 | 140 | const INT_ENCODE_BASE: usize = base_n::CASE_INSENSITIVE; |
476ff2be | 141 | |
9e0c209e SL |
142 | pub fn dep_graph_path(sess: &Session) -> PathBuf { |
143 | in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME) | |
144 | } | |
ff7c6d11 XL |
145 | pub fn dep_graph_path_from(incr_comp_session_dir: &Path) -> PathBuf { |
146 | in_incr_comp_dir(incr_comp_session_dir, DEP_GRAPH_FILENAME) | |
147 | } | |
9e0c209e SL |
148 | |
149 | pub fn work_products_path(sess: &Session) -> PathBuf { | |
150 | in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME) | |
151 | } | |
152 | ||
abe05a73 XL |
153 | pub fn query_cache_path(sess: &Session) -> PathBuf { |
154 | in_incr_comp_dir_sess(sess, QUERY_CACHE_FILENAME) | |
155 | } | |
156 | ||
9e0c209e SL |
157 | pub fn lock_file_path(session_dir: &Path) -> PathBuf { |
158 | let crate_dir = session_dir.parent().unwrap(); | |
159 | ||
160 | let directory_name = session_dir.file_name().unwrap().to_string_lossy(); | |
161 | assert_no_characters_lost(&directory_name); | |
162 | ||
163 | let dash_indices: Vec<_> = directory_name.match_indices("-") | |
164 | .map(|(idx, _)| idx) | |
165 | .collect(); | |
166 | if dash_indices.len() != 3 { | |
167 | bug!("Encountered incremental compilation session directory with \ | |
168 | malformed name: {}", | |
169 | session_dir.display()) | |
170 | } | |
171 | ||
172 | crate_dir.join(&directory_name[0 .. dash_indices[2]]) | |
173 | .with_extension(&LOCK_FILE_EXT[1..]) | |
174 | } | |
175 | ||
176 | pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf { | |
177 | in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name) | |
178 | } | |
179 | ||
180 | pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf { | |
181 | incr_comp_session_dir.join(file_name) | |
182 | } | |
183 | ||
184 | /// Allocates the private session directory. The boolean in the Ok() result | |
185 | /// indicates whether we should try loading a dep graph from the successfully | |
186 | /// initialized directory, or not. | |
187 | /// The post-condition of this fn is that we have a valid incremental | |
188 | /// compilation session directory, if the result is `Ok`. A valid session | |
189 | /// directory is one that contains a locked lock file. It may or may not contain | |
190 | /// a dep-graph and work products from a previous session. | |
191 | /// If the call fails, the fn may leave behind an invalid session directory. | |
192 | /// The garbage collection will take care of it. | |
ea8adc8c XL |
193 | pub fn prepare_session_directory(sess: &Session, |
194 | crate_name: &str, | |
abe05a73 | 195 | crate_disambiguator: CrateDisambiguator) { |
ea8adc8c XL |
196 | if sess.opts.incremental.is_none() { |
197 | return | |
198 | } | |
199 | ||
9e0c209e SL |
200 | debug!("prepare_session_directory"); |
201 | ||
202 | // {incr-comp-dir}/{crate-name-and-disambiguator} | |
ea8adc8c | 203 | let crate_dir = crate_path(sess, crate_name, crate_disambiguator); |
9e0c209e | 204 | debug!("crate-dir: {}", crate_dir.display()); |
ea8adc8c XL |
205 | if create_dir(sess, &crate_dir, "crate").is_err() { |
206 | return | |
207 | } | |
9e0c209e | 208 | |
476ff2be SL |
209 | // Hack: canonicalize the path *after creating the directory* |
210 | // because, on windows, long paths can cause problems; | |
211 | // canonicalization inserts this weird prefix that makes windows | |
212 | // tolerate long paths. | |
213 | let crate_dir = match crate_dir.canonicalize() { | |
214 | Ok(v) => v, | |
215 | Err(err) => { | |
ea8adc8c XL |
216 | sess.err(&format!("incremental compilation: error canonicalizing path `{}`: {}", |
217 | crate_dir.display(), err)); | |
218 | return | |
476ff2be SL |
219 | } |
220 | }; | |
221 | ||
0bf4aa26 | 222 | let mut source_directories_already_tried = FxHashSet::default(); |
9e0c209e SL |
223 | |
224 | loop { | |
225 | // Generate a session directory of the form: | |
226 | // | |
227 | // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working | |
228 | let session_dir = generate_session_dir_path(&crate_dir); | |
229 | debug!("session-dir: {}", session_dir.display()); | |
230 | ||
231 | // Lock the new session directory. If this fails, return an | |
232 | // error without retrying | |
ea8adc8c XL |
233 | let (directory_lock, lock_file_path) = match lock_directory(sess, &session_dir) { |
234 | Ok(e) => e, | |
235 | Err(_) => return, | |
236 | }; | |
9e0c209e SL |
237 | |
238 | // Now that we have the lock, we can actually create the session | |
239 | // directory | |
ea8adc8c XL |
240 | if create_dir(sess, &session_dir, "session").is_err() { |
241 | return | |
242 | } | |
9e0c209e SL |
243 | |
244 | // Find a suitable source directory to copy from. Ignore those that we | |
245 | // have already tried before. | |
246 | let source_directory = find_source_directory(&crate_dir, | |
247 | &source_directories_already_tried); | |
248 | ||
249 | let source_directory = if let Some(dir) = source_directory { | |
250 | dir | |
251 | } else { | |
252 | // There's nowhere to copy from, we're done | |
253 | debug!("no source directory found. Continuing with empty session \ | |
254 | directory."); | |
255 | ||
ea8adc8c XL |
256 | sess.init_incr_comp_session(session_dir, directory_lock, false); |
257 | return | |
9e0c209e SL |
258 | }; |
259 | ||
260 | debug!("attempting to copy data from source: {}", | |
261 | source_directory.display()); | |
262 | ||
abe05a73 | 263 | |
9e0c209e SL |
264 | |
265 | // Try copying over all files from the source directory | |
abe05a73 XL |
266 | if let Ok(allows_links) = copy_files(sess, |
267 | &session_dir, | |
268 | &source_directory) { | |
9e0c209e SL |
269 | debug!("successfully copied data from: {}", |
270 | source_directory.display()); | |
271 | ||
c30ab7b3 | 272 | if !allows_links { |
ea8adc8c | 273 | sess.warn(&format!("Hard linking files in the incremental \ |
c30ab7b3 SL |
274 | compilation cache failed. Copying files \ |
275 | instead. Consider moving the cache \ | |
276 | directory to a file system which supports \ | |
277 | hard linking in session dir `{}`", | |
278 | session_dir.display()) | |
279 | ); | |
280 | } | |
281 | ||
ea8adc8c XL |
282 | sess.init_incr_comp_session(session_dir, directory_lock, true); |
283 | return | |
9e0c209e SL |
284 | } else { |
285 | debug!("copying failed - trying next directory"); | |
286 | ||
287 | // Something went wrong while trying to copy/link files from the | |
288 | // source directory. Try again with a different one. | |
289 | source_directories_already_tried.insert(source_directory); | |
290 | ||
291 | // Try to remove the session directory we just allocated. We don't | |
292 | // know if there's any garbage in it from the failed copy action. | |
293 | if let Err(err) = safe_remove_dir_all(&session_dir) { | |
ea8adc8c XL |
294 | sess.warn(&format!("Failed to delete partly initialized \ |
295 | session dir `{}`: {}", | |
296 | session_dir.display(), | |
297 | err)); | |
9e0c209e SL |
298 | } |
299 | ||
ea8adc8c | 300 | delete_session_dir_lock_file(sess, &lock_file_path); |
9e0c209e SL |
301 | mem::drop(directory_lock); |
302 | } | |
303 | } | |
304 | } | |
305 | ||
306 | ||
307 | /// This function finalizes and thus 'publishes' the session directory by | |
308 | /// renaming it to `s-{timestamp}-{svh}` and releasing the file lock. | |
309 | /// If there have been compilation errors, however, this function will just | |
310 | /// delete the presumably invalid session directory. | |
311 | pub fn finalize_session_directory(sess: &Session, svh: Svh) { | |
312 | if sess.opts.incremental.is_none() { | |
313 | return; | |
314 | } | |
315 | ||
316 | let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone(); | |
317 | ||
318 | if sess.has_errors() { | |
319 | // If there have been any errors during compilation, we don't want to | |
320 | // publish this session directory. Rather, we'll just delete it. | |
321 | ||
322 | debug!("finalize_session_directory() - invalidating session directory: {}", | |
323 | incr_comp_session_dir.display()); | |
324 | ||
325 | if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) { | |
326 | sess.warn(&format!("Error deleting incremental compilation \ | |
327 | session directory `{}`: {}", | |
328 | incr_comp_session_dir.display(), | |
329 | err)); | |
330 | } | |
331 | ||
332 | let lock_file_path = lock_file_path(&*incr_comp_session_dir); | |
333 | delete_session_dir_lock_file(sess, &lock_file_path); | |
334 | sess.mark_incr_comp_session_as_invalid(); | |
335 | } | |
336 | ||
337 | debug!("finalize_session_directory() - session directory: {}", | |
338 | incr_comp_session_dir.display()); | |
339 | ||
340 | let old_sub_dir_name = incr_comp_session_dir.file_name() | |
341 | .unwrap() | |
342 | .to_string_lossy(); | |
343 | assert_no_characters_lost(&old_sub_dir_name); | |
344 | ||
345 | // Keep the 's-{timestamp}-{random-number}' prefix, but replace the | |
346 | // '-working' part with the SVH of the crate | |
347 | let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-") | |
348 | .map(|(idx, _)| idx) | |
349 | .collect(); | |
350 | if dash_indices.len() != 3 { | |
351 | bug!("Encountered incremental compilation session directory with \ | |
352 | malformed name: {}", | |
353 | incr_comp_session_dir.display()) | |
354 | } | |
355 | ||
356 | // State: "s-{timestamp}-{random-number}-" | |
357 | let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]); | |
358 | ||
359 | // Append the svh | |
ff7c6d11 | 360 | base_n::push_str(svh.as_u64() as u128, INT_ENCODE_BASE, &mut new_sub_dir_name); |
9e0c209e SL |
361 | |
362 | // Create the full path | |
363 | let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); | |
364 | debug!("finalize_session_directory() - new path: {}", new_path.display()); | |
365 | ||
366 | match std_fs::rename(&*incr_comp_session_dir, &new_path) { | |
367 | Ok(_) => { | |
368 | debug!("finalize_session_directory() - directory renamed successfully"); | |
369 | ||
370 | // This unlocks the directory | |
371 | sess.finalize_incr_comp_session(new_path); | |
372 | } | |
373 | Err(e) => { | |
374 | // Warn about the error. However, no need to abort compilation now. | |
375 | sess.warn(&format!("Error finalizing incremental compilation \ | |
376 | session directory `{}`: {}", | |
377 | incr_comp_session_dir.display(), | |
378 | e)); | |
379 | ||
380 | debug!("finalize_session_directory() - error, marking as invalid"); | |
381 | // Drop the file lock, so we can garage collect | |
382 | sess.mark_incr_comp_session_as_invalid(); | |
383 | } | |
384 | } | |
385 | ||
386 | let _ = garbage_collect_session_directories(sess); | |
387 | } | |
388 | ||
389 | pub fn delete_all_session_dir_contents(sess: &Session) -> io::Result<()> { | |
390 | let sess_dir_iterator = sess.incr_comp_session_dir().read_dir()?; | |
391 | for entry in sess_dir_iterator { | |
392 | let entry = entry?; | |
393 | safe_remove_file(&entry.path())? | |
394 | } | |
395 | Ok(()) | |
396 | } | |
397 | ||
abe05a73 XL |
398 | fn copy_files(sess: &Session, |
399 | target_dir: &Path, | |
400 | source_dir: &Path) | |
c30ab7b3 | 401 | -> Result<bool, ()> { |
9e0c209e SL |
402 | // We acquire a shared lock on the lock file of the directory, so that |
403 | // nobody deletes it out from under us while we are reading from it. | |
404 | let lock_file_path = lock_file_path(source_dir); | |
405 | let _lock = if let Ok(lock) = flock::Lock::new(&lock_file_path, | |
406 | false, // don't wait, | |
407 | false, // don't create | |
408 | false) { // not exclusive | |
409 | lock | |
410 | } else { | |
411 | // Could not acquire the lock, don't try to copy from here | |
412 | return Err(()) | |
413 | }; | |
414 | ||
415 | let source_dir_iterator = match source_dir.read_dir() { | |
416 | Ok(it) => it, | |
417 | Err(_) => return Err(()) | |
418 | }; | |
419 | ||
420 | let mut files_linked = 0; | |
421 | let mut files_copied = 0; | |
422 | ||
423 | for entry in source_dir_iterator { | |
424 | match entry { | |
425 | Ok(entry) => { | |
426 | let file_name = entry.file_name(); | |
427 | ||
428 | let target_file_path = target_dir.join(file_name); | |
429 | let source_path = entry.path(); | |
430 | ||
431 | debug!("copying into session dir: {}", source_path.display()); | |
b7449926 XL |
432 | match link_or_copy(source_path, target_file_path) { |
433 | Ok(LinkOrCopy::Link) => { | |
9e0c209e SL |
434 | files_linked += 1 |
435 | } | |
b7449926 | 436 | Ok(LinkOrCopy::Copy) => { |
9e0c209e SL |
437 | files_copied += 1 |
438 | } | |
439 | Err(_) => return Err(()) | |
440 | } | |
441 | } | |
442 | Err(_) => { | |
443 | return Err(()) | |
444 | } | |
445 | } | |
446 | } | |
447 | ||
abe05a73 XL |
448 | if sess.opts.debugging_opts.incremental_info { |
449 | println!("[incremental] session directory: \ | |
450 | {} files hard-linked", files_linked); | |
451 | println!("[incremental] session directory: \ | |
452 | {} files copied", files_copied); | |
9e0c209e SL |
453 | } |
454 | ||
c30ab7b3 | 455 | Ok(files_linked > 0 || files_copied == 0) |
9e0c209e SL |
456 | } |
457 | ||
458 | /// Generate unique directory path of the form: | |
459 | /// {crate_dir}/s-{timestamp}-{random-number}-working | |
460 | fn generate_session_dir_path(crate_dir: &Path) -> PathBuf { | |
461 | let timestamp = timestamp_to_string(SystemTime::now()); | |
462 | debug!("generate_session_dir_path: timestamp = {}", timestamp); | |
463 | let random_number = thread_rng().next_u32(); | |
464 | debug!("generate_session_dir_path: random_number = {}", random_number); | |
465 | ||
466 | let directory_name = format!("s-{}-{}-working", | |
467 | timestamp, | |
ff7c6d11 | 468 | base_n::encode(random_number as u128, |
476ff2be | 469 | INT_ENCODE_BASE)); |
9e0c209e SL |
470 | debug!("generate_session_dir_path: directory_name = {}", directory_name); |
471 | let directory_path = crate_dir.join(directory_name); | |
472 | debug!("generate_session_dir_path: directory_path = {}", directory_path.display()); | |
473 | directory_path | |
474 | } | |
475 | ||
476 | fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(),()> { | |
cc61c64b | 477 | match std_fs::create_dir_all(path) { |
9e0c209e SL |
478 | Ok(()) => { |
479 | debug!("{} directory created successfully", dir_tag); | |
480 | Ok(()) | |
481 | } | |
482 | Err(err) => { | |
483 | sess.err(&format!("Could not create incremental compilation {} \ | |
484 | directory `{}`: {}", | |
485 | dir_tag, | |
486 | path.display(), | |
487 | err)); | |
488 | Err(()) | |
489 | } | |
490 | } | |
491 | } | |
492 | ||
493 | /// Allocate a the lock-file and lock it. | |
494 | fn lock_directory(sess: &Session, | |
495 | session_dir: &Path) | |
496 | -> Result<(flock::Lock, PathBuf), ()> { | |
497 | let lock_file_path = lock_file_path(session_dir); | |
498 | debug!("lock_directory() - lock_file: {}", lock_file_path.display()); | |
499 | ||
500 | match flock::Lock::new(&lock_file_path, | |
501 | false, // don't wait | |
502 | true, // create the lock file | |
503 | true) { // the lock should be exclusive | |
504 | Ok(lock) => Ok((lock, lock_file_path)), | |
505 | Err(err) => { | |
506 | sess.err(&format!("incremental compilation: could not create \ | |
507 | session directory lock file: {}", err)); | |
508 | Err(()) | |
509 | } | |
510 | } | |
511 | } | |
512 | ||
513 | fn delete_session_dir_lock_file(sess: &Session, | |
514 | lock_file_path: &Path) { | |
515 | if let Err(err) = safe_remove_file(&lock_file_path) { | |
516 | sess.warn(&format!("Error deleting lock file for incremental \ | |
517 | compilation session directory `{}`: {}", | |
518 | lock_file_path.display(), | |
519 | err)); | |
520 | } | |
521 | } | |
522 | ||
523 | /// Find the most recent published session directory that is not in the | |
524 | /// ignore-list. | |
525 | fn find_source_directory(crate_dir: &Path, | |
476ff2be | 526 | source_directories_already_tried: &FxHashSet<PathBuf>) |
9e0c209e SL |
527 | -> Option<PathBuf> { |
528 | let iter = crate_dir.read_dir() | |
529 | .unwrap() // FIXME | |
530 | .filter_map(|e| e.ok().map(|e| e.path())); | |
531 | ||
532 | find_source_directory_in_iter(iter, source_directories_already_tried) | |
533 | } | |
534 | ||
535 | fn find_source_directory_in_iter<I>(iter: I, | |
476ff2be | 536 | source_directories_already_tried: &FxHashSet<PathBuf>) |
9e0c209e SL |
537 | -> Option<PathBuf> |
538 | where I: Iterator<Item=PathBuf> | |
539 | { | |
540 | let mut best_candidate = (UNIX_EPOCH, None); | |
541 | ||
542 | for session_dir in iter { | |
543 | debug!("find_source_directory_in_iter - inspecting `{}`", | |
544 | session_dir.display()); | |
545 | ||
546 | let directory_name = session_dir.file_name().unwrap().to_string_lossy(); | |
547 | assert_no_characters_lost(&directory_name); | |
548 | ||
549 | if source_directories_already_tried.contains(&session_dir) || | |
550 | !is_session_directory(&directory_name) || | |
551 | !is_finalized(&directory_name) { | |
552 | debug!("find_source_directory_in_iter - ignoring."); | |
553 | continue | |
554 | } | |
555 | ||
556 | let timestamp = extract_timestamp_from_session_dir(&directory_name) | |
557 | .unwrap_or_else(|_| { | |
558 | bug!("unexpected incr-comp session dir: {}", session_dir.display()) | |
559 | }); | |
560 | ||
561 | if timestamp > best_candidate.0 { | |
562 | best_candidate = (timestamp, Some(session_dir.clone())); | |
563 | } | |
564 | } | |
565 | ||
566 | best_candidate.1 | |
567 | } | |
568 | ||
569 | fn is_finalized(directory_name: &str) -> bool { | |
570 | !directory_name.ends_with("-working") | |
571 | } | |
572 | ||
573 | fn is_session_directory(directory_name: &str) -> bool { | |
574 | directory_name.starts_with("s-") && | |
575 | !directory_name.ends_with(LOCK_FILE_EXT) | |
576 | } | |
577 | ||
578 | fn is_session_directory_lock_file(file_name: &str) -> bool { | |
579 | file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT) | |
580 | } | |
581 | ||
582 | fn extract_timestamp_from_session_dir(directory_name: &str) | |
583 | -> Result<SystemTime, ()> { | |
584 | if !is_session_directory(directory_name) { | |
585 | return Err(()) | |
586 | } | |
587 | ||
588 | let dash_indices: Vec<_> = directory_name.match_indices("-") | |
589 | .map(|(idx, _)| idx) | |
590 | .collect(); | |
591 | if dash_indices.len() != 3 { | |
592 | return Err(()) | |
593 | } | |
594 | ||
595 | string_to_timestamp(&directory_name[dash_indices[0]+1 .. dash_indices[1]]) | |
596 | } | |
597 | ||
9e0c209e SL |
598 | fn timestamp_to_string(timestamp: SystemTime) -> String { |
599 | let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); | |
600 | let micros = duration.as_secs() * 1_000_000 + | |
601 | (duration.subsec_nanos() as u64) / 1000; | |
ff7c6d11 | 602 | base_n::encode(micros as u128, INT_ENCODE_BASE) |
9e0c209e SL |
603 | } |
604 | ||
605 | fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> { | |
2c00a5a8 | 606 | let micros_since_unix_epoch = u64::from_str_radix(s, INT_ENCODE_BASE as u32); |
9e0c209e SL |
607 | |
608 | if micros_since_unix_epoch.is_err() { | |
609 | return Err(()) | |
610 | } | |
611 | ||
612 | let micros_since_unix_epoch = micros_since_unix_epoch.unwrap(); | |
613 | ||
614 | let duration = Duration::new(micros_since_unix_epoch / 1_000_000, | |
615 | 1000 * (micros_since_unix_epoch % 1_000_000) as u32); | |
616 | Ok(UNIX_EPOCH + duration) | |
617 | } | |
618 | ||
9e0c209e SL |
619 | fn crate_path(sess: &Session, |
620 | crate_name: &str, | |
abe05a73 | 621 | crate_disambiguator: CrateDisambiguator) |
9e0c209e | 622 | -> PathBuf { |
9e0c209e SL |
623 | |
624 | let incr_dir = sess.opts.incremental.as_ref().unwrap().clone(); | |
625 | ||
abe05a73 | 626 | // The full crate disambiguator is really long. 64 bits of it should be |
9e0c209e | 627 | // sufficient. |
abe05a73 | 628 | let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash(); |
ff7c6d11 XL |
629 | let crate_disambiguator = base_n::encode(crate_disambiguator as u128, |
630 | INT_ENCODE_BASE); | |
9e0c209e | 631 | |
abe05a73 | 632 | let crate_name = format!("{}-{}", crate_name, crate_disambiguator); |
9e0c209e SL |
633 | incr_dir.join(crate_name) |
634 | } | |
635 | ||
636 | fn assert_no_characters_lost(s: &str) { | |
637 | if s.contains('\u{FFFD}') { | |
638 | bug!("Could not losslessly convert '{}'.", s) | |
639 | } | |
640 | } | |
641 | ||
642 | fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool { | |
643 | timestamp < SystemTime::now() - Duration::from_secs(10) | |
644 | } | |
645 | ||
646 | pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { | |
647 | debug!("garbage_collect_session_directories() - begin"); | |
648 | ||
649 | let session_directory = sess.incr_comp_session_dir(); | |
650 | debug!("garbage_collect_session_directories() - session directory: {}", | |
651 | session_directory.display()); | |
652 | ||
653 | let crate_directory = session_directory.parent().unwrap(); | |
654 | debug!("garbage_collect_session_directories() - crate directory: {}", | |
655 | crate_directory.display()); | |
656 | ||
657 | // First do a pass over the crate directory, collecting lock files and | |
658 | // session directories | |
0bf4aa26 XL |
659 | let mut session_directories = FxHashSet::default(); |
660 | let mut lock_files = FxHashSet::default(); | |
9e0c209e SL |
661 | |
662 | for dir_entry in try!(crate_directory.read_dir()) { | |
663 | let dir_entry = match dir_entry { | |
664 | Ok(dir_entry) => dir_entry, | |
665 | _ => { | |
666 | // Ignore any errors | |
667 | continue | |
668 | } | |
669 | }; | |
670 | ||
671 | let entry_name = dir_entry.file_name(); | |
672 | let entry_name = entry_name.to_string_lossy(); | |
673 | ||
674 | if is_session_directory_lock_file(&entry_name) { | |
675 | assert_no_characters_lost(&entry_name); | |
676 | lock_files.insert(entry_name.into_owned()); | |
677 | } else if is_session_directory(&entry_name) { | |
678 | assert_no_characters_lost(&entry_name); | |
679 | session_directories.insert(entry_name.into_owned()); | |
680 | } else { | |
681 | // This is something we don't know, leave it alone | |
682 | } | |
683 | } | |
684 | ||
685 | // Now map from lock files to session directories | |
476ff2be | 686 | let lock_file_to_session_dir: FxHashMap<String, Option<String>> = |
9e0c209e SL |
687 | lock_files.into_iter() |
688 | .map(|lock_file_name| { | |
689 | assert!(lock_file_name.ends_with(LOCK_FILE_EXT)); | |
690 | let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len(); | |
691 | let session_dir = { | |
692 | let dir_prefix = &lock_file_name[0 .. dir_prefix_end]; | |
693 | session_directories.iter() | |
694 | .find(|dir_name| dir_name.starts_with(dir_prefix)) | |
695 | }; | |
696 | (lock_file_name, session_dir.map(String::clone)) | |
697 | }) | |
698 | .collect(); | |
699 | ||
700 | // Delete all lock files, that don't have an associated directory. They must | |
701 | // be some kind of leftover | |
702 | for (lock_file_name, directory_name) in &lock_file_to_session_dir { | |
703 | if directory_name.is_none() { | |
704 | let timestamp = match extract_timestamp_from_session_dir(lock_file_name) { | |
705 | Ok(timestamp) => timestamp, | |
706 | Err(()) => { | |
707 | debug!("Found lock-file with malformed timestamp: {}", | |
708 | crate_directory.join(&lock_file_name).display()); | |
709 | // Ignore it | |
710 | continue | |
711 | } | |
712 | }; | |
713 | ||
714 | let lock_file_path = crate_directory.join(&**lock_file_name); | |
715 | ||
716 | if is_old_enough_to_be_collected(timestamp) { | |
717 | debug!("garbage_collect_session_directories() - deleting \ | |
718 | garbage lock file: {}", lock_file_path.display()); | |
719 | delete_session_dir_lock_file(sess, &lock_file_path); | |
720 | } else { | |
721 | debug!("garbage_collect_session_directories() - lock file with \ | |
722 | no session dir not old enough to be collected: {}", | |
723 | lock_file_path.display()); | |
724 | } | |
725 | } | |
726 | } | |
727 | ||
728 | // Filter out `None` directories | |
476ff2be | 729 | let lock_file_to_session_dir: FxHashMap<String, String> = |
9e0c209e SL |
730 | lock_file_to_session_dir.into_iter() |
731 | .filter_map(|(lock_file_name, directory_name)| { | |
732 | directory_name.map(|n| (lock_file_name, n)) | |
733 | }) | |
734 | .collect(); | |
735 | ||
2c00a5a8 XL |
736 | // Delete all session directories that don't have a lock file. |
737 | for directory_name in session_directories { | |
738 | if !lock_file_to_session_dir.values().any(|dir| *dir == directory_name) { | |
739 | let path = crate_directory.join(directory_name); | |
740 | if let Err(err) = safe_remove_dir_all(&path) { | |
741 | sess.warn(&format!("Failed to garbage collect invalid incremental \ | |
742 | compilation session directory `{}`: {}", | |
743 | path.display(), | |
744 | err)); | |
745 | } | |
746 | } | |
747 | } | |
748 | ||
749 | // Now garbage collect the valid session directories. | |
9e0c209e SL |
750 | let mut deletion_candidates = vec![]; |
751 | let mut definitely_delete = vec![]; | |
752 | ||
753 | for (lock_file_name, directory_name) in &lock_file_to_session_dir { | |
754 | debug!("garbage_collect_session_directories() - inspecting: {}", | |
755 | directory_name); | |
756 | ||
757 | let timestamp = match extract_timestamp_from_session_dir(directory_name) { | |
758 | Ok(timestamp) => timestamp, | |
759 | Err(()) => { | |
760 | debug!("Found session-dir with malformed timestamp: {}", | |
761 | crate_directory.join(directory_name).display()); | |
762 | // Ignore it | |
763 | continue | |
764 | } | |
765 | }; | |
766 | ||
767 | if is_finalized(directory_name) { | |
768 | let lock_file_path = crate_directory.join(lock_file_name); | |
769 | match flock::Lock::new(&lock_file_path, | |
770 | false, // don't wait | |
771 | false, // don't create the lock-file | |
772 | true) { // get an exclusive lock | |
773 | Ok(lock) => { | |
774 | debug!("garbage_collect_session_directories() - \ | |
775 | successfully acquired lock"); | |
776 | debug!("garbage_collect_session_directories() - adding \ | |
777 | deletion candidate: {}", directory_name); | |
778 | ||
779 | // Note that we are holding on to the lock | |
780 | deletion_candidates.push((timestamp, | |
781 | crate_directory.join(directory_name), | |
782 | Some(lock))); | |
783 | } | |
784 | Err(_) => { | |
785 | debug!("garbage_collect_session_directories() - \ | |
786 | not collecting, still in use"); | |
787 | } | |
788 | } | |
789 | } else if is_old_enough_to_be_collected(timestamp) { | |
790 | // When cleaning out "-working" session directories, i.e. | |
791 | // session directories that might still be in use by another | |
792 | // compiler instance, we only look a directories that are | |
793 | // at least ten seconds old. This is supposed to reduce the | |
794 | // chance of deleting a directory in the time window where | |
795 | // the process has allocated the directory but has not yet | |
796 | // acquired the file-lock on it. | |
797 | ||
798 | // Try to acquire the directory lock. If we can't, it | |
799 | // means that the owning process is still alive and we | |
800 | // leave this directory alone. | |
801 | let lock_file_path = crate_directory.join(lock_file_name); | |
802 | match flock::Lock::new(&lock_file_path, | |
803 | false, // don't wait | |
804 | false, // don't create the lock-file | |
805 | true) { // get an exclusive lock | |
806 | Ok(lock) => { | |
807 | debug!("garbage_collect_session_directories() - \ | |
808 | successfully acquired lock"); | |
809 | ||
810 | // Note that we are holding on to the lock | |
811 | definitely_delete.push((crate_directory.join(directory_name), | |
812 | Some(lock))); | |
813 | } | |
814 | Err(_) => { | |
815 | debug!("garbage_collect_session_directories() - \ | |
816 | not collecting, still in use"); | |
817 | } | |
818 | } | |
819 | } else { | |
820 | debug!("garbage_collect_session_directories() - not finalized, not \ | |
821 | old enough"); | |
822 | } | |
823 | } | |
824 | ||
825 | // Delete all but the most recent of the candidates | |
826 | for (path, lock) in all_except_most_recent(deletion_candidates) { | |
827 | debug!("garbage_collect_session_directories() - deleting `{}`", | |
828 | path.display()); | |
829 | ||
830 | if let Err(err) = safe_remove_dir_all(&path) { | |
831 | sess.warn(&format!("Failed to garbage collect finalized incremental \ | |
832 | compilation session directory `{}`: {}", | |
833 | path.display(), | |
834 | err)); | |
835 | } else { | |
836 | delete_session_dir_lock_file(sess, &lock_file_path(&path)); | |
837 | } | |
838 | ||
839 | ||
840 | // Let's make it explicit that the file lock is released at this point, | |
841 | // or rather, that we held on to it until here | |
842 | mem::drop(lock); | |
843 | } | |
844 | ||
845 | for (path, lock) in definitely_delete { | |
846 | debug!("garbage_collect_session_directories() - deleting `{}`", | |
847 | path.display()); | |
848 | ||
849 | if let Err(err) = safe_remove_dir_all(&path) { | |
850 | sess.warn(&format!("Failed to garbage collect incremental \ | |
851 | compilation session directory `{}`: {}", | |
852 | path.display(), | |
853 | err)); | |
854 | } else { | |
855 | delete_session_dir_lock_file(sess, &lock_file_path(&path)); | |
856 | } | |
857 | ||
858 | // Let's make it explicit that the file lock is released at this point, | |
859 | // or rather, that we held on to it until here | |
860 | mem::drop(lock); | |
861 | } | |
862 | ||
863 | Ok(()) | |
864 | } | |
865 | ||
866 | fn all_except_most_recent(deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>) | |
476ff2be | 867 | -> FxHashMap<PathBuf, Option<flock::Lock>> { |
9e0c209e SL |
868 | let most_recent = deletion_candidates.iter() |
869 | .map(|&(timestamp, ..)| timestamp) | |
870 | .max(); | |
871 | ||
872 | if let Some(most_recent) = most_recent { | |
873 | deletion_candidates.into_iter() | |
874 | .filter(|&(timestamp, ..)| timestamp != most_recent) | |
875 | .map(|(_, path, lock)| (path, lock)) | |
876 | .collect() | |
877 | } else { | |
0bf4aa26 | 878 | FxHashMap::default() |
9e0c209e SL |
879 | } |
880 | } | |
881 | ||
882 | /// Since paths of artifacts within session directories can get quite long, we | |
883 | /// need to support deleting files with very long paths. The regular | |
884 | /// WinApi functions only support paths up to 260 characters, however. In order | |
885 | /// to circumvent this limitation, we canonicalize the path of the directory | |
886 | /// before passing it to std::fs::remove_dir_all(). This will convert the path | |
887 | /// into the '\\?\' format, which supports much longer paths. | |
888 | fn safe_remove_dir_all(p: &Path) -> io::Result<()> { | |
889 | if p.exists() { | |
890 | let canonicalized = try!(p.canonicalize()); | |
891 | std_fs::remove_dir_all(canonicalized) | |
892 | } else { | |
893 | Ok(()) | |
894 | } | |
895 | } | |
896 | ||
897 | fn safe_remove_file(p: &Path) -> io::Result<()> { | |
898 | if p.exists() { | |
899 | let canonicalized = try!(p.canonicalize()); | |
900 | std_fs::remove_file(canonicalized) | |
901 | } else { | |
902 | Ok(()) | |
903 | } | |
904 | } | |
905 | ||
906 | #[test] | |
907 | fn test_all_except_most_recent() { | |
908 | assert_eq!(all_except_most_recent( | |
909 | vec![ | |
910 | (UNIX_EPOCH + Duration::new(4, 0), PathBuf::from("4"), None), | |
911 | (UNIX_EPOCH + Duration::new(1, 0), PathBuf::from("1"), None), | |
912 | (UNIX_EPOCH + Duration::new(5, 0), PathBuf::from("5"), None), | |
913 | (UNIX_EPOCH + Duration::new(3, 0), PathBuf::from("3"), None), | |
914 | (UNIX_EPOCH + Duration::new(2, 0), PathBuf::from("2"), None), | |
476ff2be | 915 | ]).keys().cloned().collect::<FxHashSet<PathBuf>>(), |
9e0c209e SL |
916 | vec![ |
917 | PathBuf::from("1"), | |
918 | PathBuf::from("2"), | |
919 | PathBuf::from("3"), | |
920 | PathBuf::from("4"), | |
476ff2be | 921 | ].into_iter().collect::<FxHashSet<PathBuf>>() |
9e0c209e SL |
922 | ); |
923 | ||
924 | assert_eq!(all_except_most_recent( | |
925 | vec![ | |
476ff2be | 926 | ]).keys().cloned().collect::<FxHashSet<PathBuf>>(), |
0bf4aa26 | 927 | FxHashSet::default() |
9e0c209e SL |
928 | ); |
929 | } | |
930 | ||
931 | #[test] | |
932 | fn test_timestamp_serialization() { | |
933 | for i in 0 .. 1_000u64 { | |
934 | let time = UNIX_EPOCH + Duration::new(i * 1_434_578, (i as u32) * 239_000); | |
935 | let s = timestamp_to_string(time); | |
936 | assert_eq!(Ok(time), string_to_timestamp(&s)); | |
937 | } | |
938 | } | |
939 | ||
940 | #[test] | |
941 | fn test_find_source_directory_in_iter() { | |
0bf4aa26 | 942 | let already_visited = FxHashSet::default(); |
9e0c209e SL |
943 | |
944 | // Find newest | |
945 | assert_eq!(find_source_directory_in_iter( | |
946 | vec![PathBuf::from("crate-dir/s-3234-0000-svh"), | |
947 | PathBuf::from("crate-dir/s-2234-0000-svh"), | |
948 | PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited), | |
949 | Some(PathBuf::from("crate-dir/s-3234-0000-svh"))); | |
950 | ||
951 | // Filter out "-working" | |
952 | assert_eq!(find_source_directory_in_iter( | |
953 | vec![PathBuf::from("crate-dir/s-3234-0000-working"), | |
954 | PathBuf::from("crate-dir/s-2234-0000-svh"), | |
955 | PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited), | |
956 | Some(PathBuf::from("crate-dir/s-2234-0000-svh"))); | |
957 | ||
958 | // Handle empty | |
959 | assert_eq!(find_source_directory_in_iter(vec![].into_iter(), &already_visited), | |
960 | None); | |
961 | ||
962 | // Handle only working | |
963 | assert_eq!(find_source_directory_in_iter( | |
964 | vec![PathBuf::from("crate-dir/s-3234-0000-working"), | |
965 | PathBuf::from("crate-dir/s-2234-0000-working"), | |
966 | PathBuf::from("crate-dir/s-1234-0000-working")].into_iter(), &already_visited), | |
967 | None); | |
968 | } |