]>
Commit | Line | Data |
---|---|---|
9e0c209e SL |
1 | // Copyright 2016 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | ||
12 | //! This module manages how the incremental compilation cache is represented in | |
13 | //! the file system. | |
14 | //! | |
15 | //! Incremental compilation caches are managed according to a copy-on-write | |
16 | //! strategy: Once a complete, consistent cache version is finalized, it is | |
17 | //! never modified. Instead, when a subsequent compilation session is started, | |
18 | //! the compiler will allocate a new version of the cache that starts out as | |
19 | //! a copy of the previous version. Then only this new copy is modified and it | |
20 | //! will not be visible to other processes until it is finalized. This ensures | |
21 | //! that multiple compiler processes can be executed concurrently for the same | |
22 | //! crate without interfering with each other or blocking each other. | |
23 | //! | |
24 | //! More concretely this is implemented via the following protocol: | |
25 | //! | |
26 | //! 1. For a newly started compilation session, the compiler allocates a | |
27 | //! new `session` directory within the incremental compilation directory. | |
28 | //! This session directory will have a unique name that ends with the suffix | |
29 | //! "-working" and that contains a creation timestamp. | |
30 | //! 2. Next, the compiler looks for the newest finalized session directory, | |
31 | //! that is, a session directory from a previous compilation session that | |
32 | //! has been marked as valid and consistent. A session directory is | |
33 | //! considered finalized if the "-working" suffix in the directory name has | |
34 | //! been replaced by the SVH of the crate. | |
35 | //! 3. Once the compiler has found a valid, finalized session directory, it will | |
36 | //! hard-link/copy its contents into the new "-working" directory. If all | |
37 | //! goes well, it will have its own, private copy of the source directory and | |
38 | //! subsequently not have to worry about synchronizing with other compiler | |
39 | //! processes. | |
40 | //! 4. Now the compiler can do its normal compilation process, which involves | |
41 | //! reading and updating its private session directory. | |
42 | //! 5. When compilation finishes without errors, the private session directory | |
43 | //! will be in a state where it can be used as input for other compilation | |
44 | //! sessions. That is, it will contain a dependency graph and cache artifacts | |
45 | //! that are consistent with the state of the source code it was compiled | |
46 | //! from, with no need to change them ever again. At this point, the compiler | |
47 | //! finalizes and "publishes" its private session directory by renaming it | |
48 | //! from "s-{timestamp}-{random}-working" to "s-{timestamp}-{SVH}". | |
49 | //! 6. At this point the "old" session directory that we copied our data from | |
50 | //! at the beginning of the session has become obsolete because we have just | |
51 | //! published a more current version. Thus the compiler will delete it. | |
52 | //! | |
53 | //! ## Garbage Collection | |
54 | //! | |
55 | //! Naively following the above protocol might lead to old session directories | |
56 | //! piling up if a compiler instance crashes for some reason before its able to | |
57 | //! remove its private session directory. In order to avoid wasting disk space, | |
58 | //! the compiler also does some garbage collection each time it is started in | |
59 | //! incremental compilation mode. Specifically, it will scan the incremental | |
60 | //! compilation directory for private session directories that are not in use | |
61 | //! any more and will delete those. It will also delete any finalized session | |
62 | //! directories for a given crate except for the most recent one. | |
63 | //! | |
64 | //! ## Synchronization | |
65 | //! | |
66 | //! There is some synchronization needed in order for the compiler to be able to | |
67 | //! determine whether a given private session directory is not in used any more. | |
68 | //! This is done by creating a lock file for each session directory and | |
69 | //! locking it while the directory is still being used. Since file locks have | |
70 | //! operating system support, we can rely on the lock being released if the | |
71 | //! compiler process dies for some unexpected reason. Thus, when garbage | |
72 | //! collecting private session directories, the collecting process can determine | |
73 | //! whether the directory is still in use by trying to acquire a lock on the | |
74 | //! file. If locking the file fails, the original process must still be alive. | |
75 | //! If locking the file succeeds, we know that the owning process is not alive | |
76 | //! any more and we can safely delete the directory. | |
77 | //! There is still a small time window between the original process creating the | |
78 | //! lock file and actually locking it. In order to minimize the chance that | |
79 | //! another process tries to acquire the lock in just that instance, only | |
80 | //! session directories that are older than a few seconds are considered for | |
81 | //! garbage collection. | |
82 | //! | |
83 | //! Another case that has to be considered is what happens if one process | |
84 | //! deletes a finalized session directory that another process is currently | |
85 | //! trying to copy from. This case is also handled via the lock file. Before | |
86 | //! a process starts copying a finalized session directory, it will acquire a | |
87 | //! shared lock on the directory's lock file. Any garbage collecting process, | |
88 | //! on the other hand, will acquire an exclusive lock on the lock file. | |
89 | //! Thus, if a directory is being collected, any reader process will fail | |
90 | //! acquiring the shared lock and will leave the directory alone. Conversely, | |
91 | //! if a collecting process can't acquire the exclusive lock because the | |
92 | //! directory is currently being read from, it will leave collecting that | |
93 | //! directory to another process at a later point in time. | |
94 | //! The exact same scheme is also used when reading the metadata hashes file | |
95 | //! from an extern crate. When a crate is compiled, the hash values of its | |
96 | //! metadata are stored in a file in its session directory. When the | |
97 | //! compilation session of another crate imports the first crate's metadata, | |
98 | //! it also has to read in the accompanying metadata hashes. It thus will access | |
99 | //! the finalized session directory of all crates it links to and while doing | |
100 | //! so, it will also place a read lock on that the respective session directory | |
101 | //! so that it won't be deleted while the metadata hashes are loaded. | |
102 | //! | |
103 | //! ## Preconditions | |
104 | //! | |
105 | //! This system relies on two features being available in the file system in | |
106 | //! order to work really well: file locking and hard linking. | |
107 | //! If hard linking is not available (like on FAT) the data in the cache | |
108 | //! actually has to be copied at the beginning of each session. | |
109 | //! If file locking does not work reliably (like on NFS), some of the | |
110 | //! synchronization will go haywire. | |
111 | //! In both cases we recommend to locate the incremental compilation directory | |
112 | //! on a file system that supports these things. | |
113 | //! It might be a good idea though to try and detect whether we are on an | |
114 | //! unsupported file system and emit a warning in that case. This is not yet | |
115 | //! implemented. | |
116 | ||
9e0c209e | 117 | use rustc::hir::svh::Svh; |
abe05a73 | 118 | use rustc::session::{Session, CrateDisambiguator}; |
9e0c209e | 119 | use rustc::util::fs as fs_util; |
476ff2be SL |
120 | use rustc_data_structures::{flock, base_n}; |
121 | use rustc_data_structures::fx::{FxHashSet, FxHashMap}; | |
9e0c209e | 122 | |
9e0c209e SL |
123 | use std::fs as std_fs; |
124 | use std::io; | |
125 | use std::mem; | |
126 | use std::path::{Path, PathBuf}; | |
127 | use std::time::{UNIX_EPOCH, SystemTime, Duration}; | |
abe05a73 XL |
128 | |
129 | use rand::{thread_rng, Rng}; | |
9e0c209e SL |
130 | |
131 | const LOCK_FILE_EXT: &'static str = ".lock"; | |
132 | const DEP_GRAPH_FILENAME: &'static str = "dep-graph.bin"; | |
133 | const WORK_PRODUCTS_FILENAME: &'static str = "work-products.bin"; | |
134 | const METADATA_HASHES_FILENAME: &'static str = "metadata.bin"; | |
abe05a73 | 135 | const QUERY_CACHE_FILENAME: &'static str = "query-cache.bin"; |
9e0c209e | 136 | |
476ff2be SL |
137 | // We encode integers using the following base, so they are shorter than decimal |
138 | // or hexadecimal numbers (we want short file and directory names). Since these | |
139 | // numbers will be used in file names, we choose an encoding that is not | |
140 | // case-sensitive (as opposed to base64, for example). | |
141 | const INT_ENCODE_BASE: u64 = 36; | |
142 | ||
9e0c209e SL |
143 | pub fn dep_graph_path(sess: &Session) -> PathBuf { |
144 | in_incr_comp_dir_sess(sess, DEP_GRAPH_FILENAME) | |
145 | } | |
146 | ||
147 | pub fn work_products_path(sess: &Session) -> PathBuf { | |
148 | in_incr_comp_dir_sess(sess, WORK_PRODUCTS_FILENAME) | |
149 | } | |
150 | ||
151 | pub fn metadata_hash_export_path(sess: &Session) -> PathBuf { | |
152 | in_incr_comp_dir_sess(sess, METADATA_HASHES_FILENAME) | |
153 | } | |
154 | ||
abe05a73 XL |
155 | pub fn query_cache_path(sess: &Session) -> PathBuf { |
156 | in_incr_comp_dir_sess(sess, QUERY_CACHE_FILENAME) | |
157 | } | |
158 | ||
9e0c209e SL |
159 | pub fn lock_file_path(session_dir: &Path) -> PathBuf { |
160 | let crate_dir = session_dir.parent().unwrap(); | |
161 | ||
162 | let directory_name = session_dir.file_name().unwrap().to_string_lossy(); | |
163 | assert_no_characters_lost(&directory_name); | |
164 | ||
165 | let dash_indices: Vec<_> = directory_name.match_indices("-") | |
166 | .map(|(idx, _)| idx) | |
167 | .collect(); | |
168 | if dash_indices.len() != 3 { | |
169 | bug!("Encountered incremental compilation session directory with \ | |
170 | malformed name: {}", | |
171 | session_dir.display()) | |
172 | } | |
173 | ||
174 | crate_dir.join(&directory_name[0 .. dash_indices[2]]) | |
175 | .with_extension(&LOCK_FILE_EXT[1..]) | |
176 | } | |
177 | ||
178 | pub fn in_incr_comp_dir_sess(sess: &Session, file_name: &str) -> PathBuf { | |
179 | in_incr_comp_dir(&sess.incr_comp_session_dir(), file_name) | |
180 | } | |
181 | ||
182 | pub fn in_incr_comp_dir(incr_comp_session_dir: &Path, file_name: &str) -> PathBuf { | |
183 | incr_comp_session_dir.join(file_name) | |
184 | } | |
185 | ||
186 | /// Allocates the private session directory. The boolean in the Ok() result | |
187 | /// indicates whether we should try loading a dep graph from the successfully | |
188 | /// initialized directory, or not. | |
189 | /// The post-condition of this fn is that we have a valid incremental | |
190 | /// compilation session directory, if the result is `Ok`. A valid session | |
191 | /// directory is one that contains a locked lock file. It may or may not contain | |
192 | /// a dep-graph and work products from a previous session. | |
193 | /// If the call fails, the fn may leave behind an invalid session directory. | |
194 | /// The garbage collection will take care of it. | |
ea8adc8c XL |
195 | pub fn prepare_session_directory(sess: &Session, |
196 | crate_name: &str, | |
abe05a73 | 197 | crate_disambiguator: CrateDisambiguator) { |
ea8adc8c XL |
198 | if sess.opts.incremental.is_none() { |
199 | return | |
200 | } | |
201 | ||
9e0c209e SL |
202 | debug!("prepare_session_directory"); |
203 | ||
204 | // {incr-comp-dir}/{crate-name-and-disambiguator} | |
ea8adc8c | 205 | let crate_dir = crate_path(sess, crate_name, crate_disambiguator); |
9e0c209e | 206 | debug!("crate-dir: {}", crate_dir.display()); |
ea8adc8c XL |
207 | if create_dir(sess, &crate_dir, "crate").is_err() { |
208 | return | |
209 | } | |
9e0c209e | 210 | |
476ff2be SL |
211 | // Hack: canonicalize the path *after creating the directory* |
212 | // because, on windows, long paths can cause problems; | |
213 | // canonicalization inserts this weird prefix that makes windows | |
214 | // tolerate long paths. | |
215 | let crate_dir = match crate_dir.canonicalize() { | |
216 | Ok(v) => v, | |
217 | Err(err) => { | |
ea8adc8c XL |
218 | sess.err(&format!("incremental compilation: error canonicalizing path `{}`: {}", |
219 | crate_dir.display(), err)); | |
220 | return | |
476ff2be SL |
221 | } |
222 | }; | |
223 | ||
224 | let mut source_directories_already_tried = FxHashSet(); | |
9e0c209e SL |
225 | |
226 | loop { | |
227 | // Generate a session directory of the form: | |
228 | // | |
229 | // {incr-comp-dir}/{crate-name-and-disambiguator}/s-{timestamp}-{random}-working | |
230 | let session_dir = generate_session_dir_path(&crate_dir); | |
231 | debug!("session-dir: {}", session_dir.display()); | |
232 | ||
233 | // Lock the new session directory. If this fails, return an | |
234 | // error without retrying | |
ea8adc8c XL |
235 | let (directory_lock, lock_file_path) = match lock_directory(sess, &session_dir) { |
236 | Ok(e) => e, | |
237 | Err(_) => return, | |
238 | }; | |
9e0c209e SL |
239 | |
240 | // Now that we have the lock, we can actually create the session | |
241 | // directory | |
ea8adc8c XL |
242 | if create_dir(sess, &session_dir, "session").is_err() { |
243 | return | |
244 | } | |
9e0c209e SL |
245 | |
246 | // Find a suitable source directory to copy from. Ignore those that we | |
247 | // have already tried before. | |
248 | let source_directory = find_source_directory(&crate_dir, | |
249 | &source_directories_already_tried); | |
250 | ||
251 | let source_directory = if let Some(dir) = source_directory { | |
252 | dir | |
253 | } else { | |
254 | // There's nowhere to copy from, we're done | |
255 | debug!("no source directory found. Continuing with empty session \ | |
256 | directory."); | |
257 | ||
ea8adc8c XL |
258 | sess.init_incr_comp_session(session_dir, directory_lock, false); |
259 | return | |
9e0c209e SL |
260 | }; |
261 | ||
262 | debug!("attempting to copy data from source: {}", | |
263 | source_directory.display()); | |
264 | ||
abe05a73 | 265 | |
9e0c209e SL |
266 | |
267 | // Try copying over all files from the source directory | |
abe05a73 XL |
268 | if let Ok(allows_links) = copy_files(sess, |
269 | &session_dir, | |
270 | &source_directory) { | |
9e0c209e SL |
271 | debug!("successfully copied data from: {}", |
272 | source_directory.display()); | |
273 | ||
c30ab7b3 | 274 | if !allows_links { |
ea8adc8c | 275 | sess.warn(&format!("Hard linking files in the incremental \ |
c30ab7b3 SL |
276 | compilation cache failed. Copying files \ |
277 | instead. Consider moving the cache \ | |
278 | directory to a file system which supports \ | |
279 | hard linking in session dir `{}`", | |
280 | session_dir.display()) | |
281 | ); | |
282 | } | |
283 | ||
ea8adc8c XL |
284 | sess.init_incr_comp_session(session_dir, directory_lock, true); |
285 | return | |
9e0c209e SL |
286 | } else { |
287 | debug!("copying failed - trying next directory"); | |
288 | ||
289 | // Something went wrong while trying to copy/link files from the | |
290 | // source directory. Try again with a different one. | |
291 | source_directories_already_tried.insert(source_directory); | |
292 | ||
293 | // Try to remove the session directory we just allocated. We don't | |
294 | // know if there's any garbage in it from the failed copy action. | |
295 | if let Err(err) = safe_remove_dir_all(&session_dir) { | |
ea8adc8c XL |
296 | sess.warn(&format!("Failed to delete partly initialized \ |
297 | session dir `{}`: {}", | |
298 | session_dir.display(), | |
299 | err)); | |
9e0c209e SL |
300 | } |
301 | ||
ea8adc8c | 302 | delete_session_dir_lock_file(sess, &lock_file_path); |
9e0c209e SL |
303 | mem::drop(directory_lock); |
304 | } | |
305 | } | |
306 | } | |
307 | ||
308 | ||
309 | /// This function finalizes and thus 'publishes' the session directory by | |
310 | /// renaming it to `s-{timestamp}-{svh}` and releasing the file lock. | |
311 | /// If there have been compilation errors, however, this function will just | |
312 | /// delete the presumably invalid session directory. | |
313 | pub fn finalize_session_directory(sess: &Session, svh: Svh) { | |
314 | if sess.opts.incremental.is_none() { | |
315 | return; | |
316 | } | |
317 | ||
318 | let incr_comp_session_dir: PathBuf = sess.incr_comp_session_dir().clone(); | |
319 | ||
320 | if sess.has_errors() { | |
321 | // If there have been any errors during compilation, we don't want to | |
322 | // publish this session directory. Rather, we'll just delete it. | |
323 | ||
324 | debug!("finalize_session_directory() - invalidating session directory: {}", | |
325 | incr_comp_session_dir.display()); | |
326 | ||
327 | if let Err(err) = safe_remove_dir_all(&*incr_comp_session_dir) { | |
328 | sess.warn(&format!("Error deleting incremental compilation \ | |
329 | session directory `{}`: {}", | |
330 | incr_comp_session_dir.display(), | |
331 | err)); | |
332 | } | |
333 | ||
334 | let lock_file_path = lock_file_path(&*incr_comp_session_dir); | |
335 | delete_session_dir_lock_file(sess, &lock_file_path); | |
336 | sess.mark_incr_comp_session_as_invalid(); | |
337 | } | |
338 | ||
339 | debug!("finalize_session_directory() - session directory: {}", | |
340 | incr_comp_session_dir.display()); | |
341 | ||
342 | let old_sub_dir_name = incr_comp_session_dir.file_name() | |
343 | .unwrap() | |
344 | .to_string_lossy(); | |
345 | assert_no_characters_lost(&old_sub_dir_name); | |
346 | ||
347 | // Keep the 's-{timestamp}-{random-number}' prefix, but replace the | |
348 | // '-working' part with the SVH of the crate | |
349 | let dash_indices: Vec<_> = old_sub_dir_name.match_indices("-") | |
350 | .map(|(idx, _)| idx) | |
351 | .collect(); | |
352 | if dash_indices.len() != 3 { | |
353 | bug!("Encountered incremental compilation session directory with \ | |
354 | malformed name: {}", | |
355 | incr_comp_session_dir.display()) | |
356 | } | |
357 | ||
358 | // State: "s-{timestamp}-{random-number}-" | |
359 | let mut new_sub_dir_name = String::from(&old_sub_dir_name[.. dash_indices[2] + 1]); | |
360 | ||
361 | // Append the svh | |
476ff2be | 362 | base_n::push_str(svh.as_u64(), INT_ENCODE_BASE, &mut new_sub_dir_name); |
9e0c209e SL |
363 | |
364 | // Create the full path | |
365 | let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name); | |
366 | debug!("finalize_session_directory() - new path: {}", new_path.display()); | |
367 | ||
368 | match std_fs::rename(&*incr_comp_session_dir, &new_path) { | |
369 | Ok(_) => { | |
370 | debug!("finalize_session_directory() - directory renamed successfully"); | |
371 | ||
372 | // This unlocks the directory | |
373 | sess.finalize_incr_comp_session(new_path); | |
374 | } | |
375 | Err(e) => { | |
376 | // Warn about the error. However, no need to abort compilation now. | |
377 | sess.warn(&format!("Error finalizing incremental compilation \ | |
378 | session directory `{}`: {}", | |
379 | incr_comp_session_dir.display(), | |
380 | e)); | |
381 | ||
382 | debug!("finalize_session_directory() - error, marking as invalid"); | |
383 | // Drop the file lock, so we can garage collect | |
384 | sess.mark_incr_comp_session_as_invalid(); | |
385 | } | |
386 | } | |
387 | ||
388 | let _ = garbage_collect_session_directories(sess); | |
389 | } | |
390 | ||
391 | pub fn delete_all_session_dir_contents(sess: &Session) -> io::Result<()> { | |
392 | let sess_dir_iterator = sess.incr_comp_session_dir().read_dir()?; | |
393 | for entry in sess_dir_iterator { | |
394 | let entry = entry?; | |
395 | safe_remove_file(&entry.path())? | |
396 | } | |
397 | Ok(()) | |
398 | } | |
399 | ||
abe05a73 XL |
400 | fn copy_files(sess: &Session, |
401 | target_dir: &Path, | |
402 | source_dir: &Path) | |
c30ab7b3 | 403 | -> Result<bool, ()> { |
9e0c209e SL |
404 | // We acquire a shared lock on the lock file of the directory, so that |
405 | // nobody deletes it out from under us while we are reading from it. | |
406 | let lock_file_path = lock_file_path(source_dir); | |
407 | let _lock = if let Ok(lock) = flock::Lock::new(&lock_file_path, | |
408 | false, // don't wait, | |
409 | false, // don't create | |
410 | false) { // not exclusive | |
411 | lock | |
412 | } else { | |
413 | // Could not acquire the lock, don't try to copy from here | |
414 | return Err(()) | |
415 | }; | |
416 | ||
417 | let source_dir_iterator = match source_dir.read_dir() { | |
418 | Ok(it) => it, | |
419 | Err(_) => return Err(()) | |
420 | }; | |
421 | ||
422 | let mut files_linked = 0; | |
423 | let mut files_copied = 0; | |
424 | ||
425 | for entry in source_dir_iterator { | |
426 | match entry { | |
427 | Ok(entry) => { | |
428 | let file_name = entry.file_name(); | |
429 | ||
430 | let target_file_path = target_dir.join(file_name); | |
431 | let source_path = entry.path(); | |
432 | ||
433 | debug!("copying into session dir: {}", source_path.display()); | |
434 | match fs_util::link_or_copy(source_path, target_file_path) { | |
435 | Ok(fs_util::LinkOrCopy::Link) => { | |
436 | files_linked += 1 | |
437 | } | |
438 | Ok(fs_util::LinkOrCopy::Copy) => { | |
439 | files_copied += 1 | |
440 | } | |
441 | Err(_) => return Err(()) | |
442 | } | |
443 | } | |
444 | Err(_) => { | |
445 | return Err(()) | |
446 | } | |
447 | } | |
448 | } | |
449 | ||
abe05a73 XL |
450 | if sess.opts.debugging_opts.incremental_info { |
451 | println!("[incremental] session directory: \ | |
452 | {} files hard-linked", files_linked); | |
453 | println!("[incremental] session directory: \ | |
454 | {} files copied", files_copied); | |
9e0c209e SL |
455 | } |
456 | ||
c30ab7b3 | 457 | Ok(files_linked > 0 || files_copied == 0) |
9e0c209e SL |
458 | } |
459 | ||
460 | /// Generate unique directory path of the form: | |
461 | /// {crate_dir}/s-{timestamp}-{random-number}-working | |
462 | fn generate_session_dir_path(crate_dir: &Path) -> PathBuf { | |
463 | let timestamp = timestamp_to_string(SystemTime::now()); | |
464 | debug!("generate_session_dir_path: timestamp = {}", timestamp); | |
465 | let random_number = thread_rng().next_u32(); | |
466 | debug!("generate_session_dir_path: random_number = {}", random_number); | |
467 | ||
468 | let directory_name = format!("s-{}-{}-working", | |
469 | timestamp, | |
476ff2be SL |
470 | base_n::encode(random_number as u64, |
471 | INT_ENCODE_BASE)); | |
9e0c209e SL |
472 | debug!("generate_session_dir_path: directory_name = {}", directory_name); |
473 | let directory_path = crate_dir.join(directory_name); | |
474 | debug!("generate_session_dir_path: directory_path = {}", directory_path.display()); | |
475 | directory_path | |
476 | } | |
477 | ||
478 | fn create_dir(sess: &Session, path: &Path, dir_tag: &str) -> Result<(),()> { | |
cc61c64b | 479 | match std_fs::create_dir_all(path) { |
9e0c209e SL |
480 | Ok(()) => { |
481 | debug!("{} directory created successfully", dir_tag); | |
482 | Ok(()) | |
483 | } | |
484 | Err(err) => { | |
485 | sess.err(&format!("Could not create incremental compilation {} \ | |
486 | directory `{}`: {}", | |
487 | dir_tag, | |
488 | path.display(), | |
489 | err)); | |
490 | Err(()) | |
491 | } | |
492 | } | |
493 | } | |
494 | ||
495 | /// Allocate a the lock-file and lock it. | |
496 | fn lock_directory(sess: &Session, | |
497 | session_dir: &Path) | |
498 | -> Result<(flock::Lock, PathBuf), ()> { | |
499 | let lock_file_path = lock_file_path(session_dir); | |
500 | debug!("lock_directory() - lock_file: {}", lock_file_path.display()); | |
501 | ||
502 | match flock::Lock::new(&lock_file_path, | |
503 | false, // don't wait | |
504 | true, // create the lock file | |
505 | true) { // the lock should be exclusive | |
506 | Ok(lock) => Ok((lock, lock_file_path)), | |
507 | Err(err) => { | |
508 | sess.err(&format!("incremental compilation: could not create \ | |
509 | session directory lock file: {}", err)); | |
510 | Err(()) | |
511 | } | |
512 | } | |
513 | } | |
514 | ||
515 | fn delete_session_dir_lock_file(sess: &Session, | |
516 | lock_file_path: &Path) { | |
517 | if let Err(err) = safe_remove_file(&lock_file_path) { | |
518 | sess.warn(&format!("Error deleting lock file for incremental \ | |
519 | compilation session directory `{}`: {}", | |
520 | lock_file_path.display(), | |
521 | err)); | |
522 | } | |
523 | } | |
524 | ||
525 | /// Find the most recent published session directory that is not in the | |
526 | /// ignore-list. | |
527 | fn find_source_directory(crate_dir: &Path, | |
476ff2be | 528 | source_directories_already_tried: &FxHashSet<PathBuf>) |
9e0c209e SL |
529 | -> Option<PathBuf> { |
530 | let iter = crate_dir.read_dir() | |
531 | .unwrap() // FIXME | |
532 | .filter_map(|e| e.ok().map(|e| e.path())); | |
533 | ||
534 | find_source_directory_in_iter(iter, source_directories_already_tried) | |
535 | } | |
536 | ||
537 | fn find_source_directory_in_iter<I>(iter: I, | |
476ff2be | 538 | source_directories_already_tried: &FxHashSet<PathBuf>) |
9e0c209e SL |
539 | -> Option<PathBuf> |
540 | where I: Iterator<Item=PathBuf> | |
541 | { | |
542 | let mut best_candidate = (UNIX_EPOCH, None); | |
543 | ||
544 | for session_dir in iter { | |
545 | debug!("find_source_directory_in_iter - inspecting `{}`", | |
546 | session_dir.display()); | |
547 | ||
548 | let directory_name = session_dir.file_name().unwrap().to_string_lossy(); | |
549 | assert_no_characters_lost(&directory_name); | |
550 | ||
551 | if source_directories_already_tried.contains(&session_dir) || | |
552 | !is_session_directory(&directory_name) || | |
553 | !is_finalized(&directory_name) { | |
554 | debug!("find_source_directory_in_iter - ignoring."); | |
555 | continue | |
556 | } | |
557 | ||
558 | let timestamp = extract_timestamp_from_session_dir(&directory_name) | |
559 | .unwrap_or_else(|_| { | |
560 | bug!("unexpected incr-comp session dir: {}", session_dir.display()) | |
561 | }); | |
562 | ||
563 | if timestamp > best_candidate.0 { | |
564 | best_candidate = (timestamp, Some(session_dir.clone())); | |
565 | } | |
566 | } | |
567 | ||
568 | best_candidate.1 | |
569 | } | |
570 | ||
571 | fn is_finalized(directory_name: &str) -> bool { | |
572 | !directory_name.ends_with("-working") | |
573 | } | |
574 | ||
575 | fn is_session_directory(directory_name: &str) -> bool { | |
576 | directory_name.starts_with("s-") && | |
577 | !directory_name.ends_with(LOCK_FILE_EXT) | |
578 | } | |
579 | ||
580 | fn is_session_directory_lock_file(file_name: &str) -> bool { | |
581 | file_name.starts_with("s-") && file_name.ends_with(LOCK_FILE_EXT) | |
582 | } | |
583 | ||
584 | fn extract_timestamp_from_session_dir(directory_name: &str) | |
585 | -> Result<SystemTime, ()> { | |
586 | if !is_session_directory(directory_name) { | |
587 | return Err(()) | |
588 | } | |
589 | ||
590 | let dash_indices: Vec<_> = directory_name.match_indices("-") | |
591 | .map(|(idx, _)| idx) | |
592 | .collect(); | |
593 | if dash_indices.len() != 3 { | |
594 | return Err(()) | |
595 | } | |
596 | ||
597 | string_to_timestamp(&directory_name[dash_indices[0]+1 .. dash_indices[1]]) | |
598 | } | |
599 | ||
9e0c209e SL |
600 | fn timestamp_to_string(timestamp: SystemTime) -> String { |
601 | let duration = timestamp.duration_since(UNIX_EPOCH).unwrap(); | |
602 | let micros = duration.as_secs() * 1_000_000 + | |
603 | (duration.subsec_nanos() as u64) / 1000; | |
476ff2be | 604 | base_n::encode(micros, INT_ENCODE_BASE) |
9e0c209e SL |
605 | } |
606 | ||
607 | fn string_to_timestamp(s: &str) -> Result<SystemTime, ()> { | |
608 | let micros_since_unix_epoch = u64::from_str_radix(s, 36); | |
609 | ||
610 | if micros_since_unix_epoch.is_err() { | |
611 | return Err(()) | |
612 | } | |
613 | ||
614 | let micros_since_unix_epoch = micros_since_unix_epoch.unwrap(); | |
615 | ||
616 | let duration = Duration::new(micros_since_unix_epoch / 1_000_000, | |
617 | 1000 * (micros_since_unix_epoch % 1_000_000) as u32); | |
618 | Ok(UNIX_EPOCH + duration) | |
619 | } | |
620 | ||
9e0c209e SL |
621 | fn crate_path(sess: &Session, |
622 | crate_name: &str, | |
abe05a73 | 623 | crate_disambiguator: CrateDisambiguator) |
9e0c209e | 624 | -> PathBuf { |
9e0c209e SL |
625 | |
626 | let incr_dir = sess.opts.incremental.as_ref().unwrap().clone(); | |
627 | ||
abe05a73 | 628 | // The full crate disambiguator is really long. 64 bits of it should be |
9e0c209e | 629 | // sufficient. |
abe05a73 XL |
630 | let crate_disambiguator = crate_disambiguator.to_fingerprint().to_smaller_hash(); |
631 | let crate_disambiguator = base_n::encode(crate_disambiguator, INT_ENCODE_BASE); | |
9e0c209e | 632 | |
abe05a73 | 633 | let crate_name = format!("{}-{}", crate_name, crate_disambiguator); |
9e0c209e SL |
634 | incr_dir.join(crate_name) |
635 | } | |
636 | ||
637 | fn assert_no_characters_lost(s: &str) { | |
638 | if s.contains('\u{FFFD}') { | |
639 | bug!("Could not losslessly convert '{}'.", s) | |
640 | } | |
641 | } | |
642 | ||
643 | fn is_old_enough_to_be_collected(timestamp: SystemTime) -> bool { | |
644 | timestamp < SystemTime::now() - Duration::from_secs(10) | |
645 | } | |
646 | ||
647 | pub fn garbage_collect_session_directories(sess: &Session) -> io::Result<()> { | |
648 | debug!("garbage_collect_session_directories() - begin"); | |
649 | ||
650 | let session_directory = sess.incr_comp_session_dir(); | |
651 | debug!("garbage_collect_session_directories() - session directory: {}", | |
652 | session_directory.display()); | |
653 | ||
654 | let crate_directory = session_directory.parent().unwrap(); | |
655 | debug!("garbage_collect_session_directories() - crate directory: {}", | |
656 | crate_directory.display()); | |
657 | ||
658 | // First do a pass over the crate directory, collecting lock files and | |
659 | // session directories | |
476ff2be SL |
660 | let mut session_directories = FxHashSet(); |
661 | let mut lock_files = FxHashSet(); | |
9e0c209e SL |
662 | |
663 | for dir_entry in try!(crate_directory.read_dir()) { | |
664 | let dir_entry = match dir_entry { | |
665 | Ok(dir_entry) => dir_entry, | |
666 | _ => { | |
667 | // Ignore any errors | |
668 | continue | |
669 | } | |
670 | }; | |
671 | ||
672 | let entry_name = dir_entry.file_name(); | |
673 | let entry_name = entry_name.to_string_lossy(); | |
674 | ||
675 | if is_session_directory_lock_file(&entry_name) { | |
676 | assert_no_characters_lost(&entry_name); | |
677 | lock_files.insert(entry_name.into_owned()); | |
678 | } else if is_session_directory(&entry_name) { | |
679 | assert_no_characters_lost(&entry_name); | |
680 | session_directories.insert(entry_name.into_owned()); | |
681 | } else { | |
682 | // This is something we don't know, leave it alone | |
683 | } | |
684 | } | |
685 | ||
686 | // Now map from lock files to session directories | |
476ff2be | 687 | let lock_file_to_session_dir: FxHashMap<String, Option<String>> = |
9e0c209e SL |
688 | lock_files.into_iter() |
689 | .map(|lock_file_name| { | |
690 | assert!(lock_file_name.ends_with(LOCK_FILE_EXT)); | |
691 | let dir_prefix_end = lock_file_name.len() - LOCK_FILE_EXT.len(); | |
692 | let session_dir = { | |
693 | let dir_prefix = &lock_file_name[0 .. dir_prefix_end]; | |
694 | session_directories.iter() | |
695 | .find(|dir_name| dir_name.starts_with(dir_prefix)) | |
696 | }; | |
697 | (lock_file_name, session_dir.map(String::clone)) | |
698 | }) | |
699 | .collect(); | |
700 | ||
701 | // Delete all lock files, that don't have an associated directory. They must | |
702 | // be some kind of leftover | |
703 | for (lock_file_name, directory_name) in &lock_file_to_session_dir { | |
704 | if directory_name.is_none() { | |
705 | let timestamp = match extract_timestamp_from_session_dir(lock_file_name) { | |
706 | Ok(timestamp) => timestamp, | |
707 | Err(()) => { | |
708 | debug!("Found lock-file with malformed timestamp: {}", | |
709 | crate_directory.join(&lock_file_name).display()); | |
710 | // Ignore it | |
711 | continue | |
712 | } | |
713 | }; | |
714 | ||
715 | let lock_file_path = crate_directory.join(&**lock_file_name); | |
716 | ||
717 | if is_old_enough_to_be_collected(timestamp) { | |
718 | debug!("garbage_collect_session_directories() - deleting \ | |
719 | garbage lock file: {}", lock_file_path.display()); | |
720 | delete_session_dir_lock_file(sess, &lock_file_path); | |
721 | } else { | |
722 | debug!("garbage_collect_session_directories() - lock file with \ | |
723 | no session dir not old enough to be collected: {}", | |
724 | lock_file_path.display()); | |
725 | } | |
726 | } | |
727 | } | |
728 | ||
729 | // Filter out `None` directories | |
476ff2be | 730 | let lock_file_to_session_dir: FxHashMap<String, String> = |
9e0c209e SL |
731 | lock_file_to_session_dir.into_iter() |
732 | .filter_map(|(lock_file_name, directory_name)| { | |
733 | directory_name.map(|n| (lock_file_name, n)) | |
734 | }) | |
735 | .collect(); | |
736 | ||
737 | let mut deletion_candidates = vec![]; | |
738 | let mut definitely_delete = vec![]; | |
739 | ||
740 | for (lock_file_name, directory_name) in &lock_file_to_session_dir { | |
741 | debug!("garbage_collect_session_directories() - inspecting: {}", | |
742 | directory_name); | |
743 | ||
744 | let timestamp = match extract_timestamp_from_session_dir(directory_name) { | |
745 | Ok(timestamp) => timestamp, | |
746 | Err(()) => { | |
747 | debug!("Found session-dir with malformed timestamp: {}", | |
748 | crate_directory.join(directory_name).display()); | |
749 | // Ignore it | |
750 | continue | |
751 | } | |
752 | }; | |
753 | ||
754 | if is_finalized(directory_name) { | |
755 | let lock_file_path = crate_directory.join(lock_file_name); | |
756 | match flock::Lock::new(&lock_file_path, | |
757 | false, // don't wait | |
758 | false, // don't create the lock-file | |
759 | true) { // get an exclusive lock | |
760 | Ok(lock) => { | |
761 | debug!("garbage_collect_session_directories() - \ | |
762 | successfully acquired lock"); | |
763 | debug!("garbage_collect_session_directories() - adding \ | |
764 | deletion candidate: {}", directory_name); | |
765 | ||
766 | // Note that we are holding on to the lock | |
767 | deletion_candidates.push((timestamp, | |
768 | crate_directory.join(directory_name), | |
769 | Some(lock))); | |
770 | } | |
771 | Err(_) => { | |
772 | debug!("garbage_collect_session_directories() - \ | |
773 | not collecting, still in use"); | |
774 | } | |
775 | } | |
776 | } else if is_old_enough_to_be_collected(timestamp) { | |
777 | // When cleaning out "-working" session directories, i.e. | |
778 | // session directories that might still be in use by another | |
779 | // compiler instance, we only look a directories that are | |
780 | // at least ten seconds old. This is supposed to reduce the | |
781 | // chance of deleting a directory in the time window where | |
782 | // the process has allocated the directory but has not yet | |
783 | // acquired the file-lock on it. | |
784 | ||
785 | // Try to acquire the directory lock. If we can't, it | |
786 | // means that the owning process is still alive and we | |
787 | // leave this directory alone. | |
788 | let lock_file_path = crate_directory.join(lock_file_name); | |
789 | match flock::Lock::new(&lock_file_path, | |
790 | false, // don't wait | |
791 | false, // don't create the lock-file | |
792 | true) { // get an exclusive lock | |
793 | Ok(lock) => { | |
794 | debug!("garbage_collect_session_directories() - \ | |
795 | successfully acquired lock"); | |
796 | ||
797 | // Note that we are holding on to the lock | |
798 | definitely_delete.push((crate_directory.join(directory_name), | |
799 | Some(lock))); | |
800 | } | |
801 | Err(_) => { | |
802 | debug!("garbage_collect_session_directories() - \ | |
803 | not collecting, still in use"); | |
804 | } | |
805 | } | |
806 | } else { | |
807 | debug!("garbage_collect_session_directories() - not finalized, not \ | |
808 | old enough"); | |
809 | } | |
810 | } | |
811 | ||
812 | // Delete all but the most recent of the candidates | |
813 | for (path, lock) in all_except_most_recent(deletion_candidates) { | |
814 | debug!("garbage_collect_session_directories() - deleting `{}`", | |
815 | path.display()); | |
816 | ||
817 | if let Err(err) = safe_remove_dir_all(&path) { | |
818 | sess.warn(&format!("Failed to garbage collect finalized incremental \ | |
819 | compilation session directory `{}`: {}", | |
820 | path.display(), | |
821 | err)); | |
822 | } else { | |
823 | delete_session_dir_lock_file(sess, &lock_file_path(&path)); | |
824 | } | |
825 | ||
826 | ||
827 | // Let's make it explicit that the file lock is released at this point, | |
828 | // or rather, that we held on to it until here | |
829 | mem::drop(lock); | |
830 | } | |
831 | ||
832 | for (path, lock) in definitely_delete { | |
833 | debug!("garbage_collect_session_directories() - deleting `{}`", | |
834 | path.display()); | |
835 | ||
836 | if let Err(err) = safe_remove_dir_all(&path) { | |
837 | sess.warn(&format!("Failed to garbage collect incremental \ | |
838 | compilation session directory `{}`: {}", | |
839 | path.display(), | |
840 | err)); | |
841 | } else { | |
842 | delete_session_dir_lock_file(sess, &lock_file_path(&path)); | |
843 | } | |
844 | ||
845 | // Let's make it explicit that the file lock is released at this point, | |
846 | // or rather, that we held on to it until here | |
847 | mem::drop(lock); | |
848 | } | |
849 | ||
850 | Ok(()) | |
851 | } | |
852 | ||
853 | fn all_except_most_recent(deletion_candidates: Vec<(SystemTime, PathBuf, Option<flock::Lock>)>) | |
476ff2be | 854 | -> FxHashMap<PathBuf, Option<flock::Lock>> { |
9e0c209e SL |
855 | let most_recent = deletion_candidates.iter() |
856 | .map(|&(timestamp, ..)| timestamp) | |
857 | .max(); | |
858 | ||
859 | if let Some(most_recent) = most_recent { | |
860 | deletion_candidates.into_iter() | |
861 | .filter(|&(timestamp, ..)| timestamp != most_recent) | |
862 | .map(|(_, path, lock)| (path, lock)) | |
863 | .collect() | |
864 | } else { | |
476ff2be | 865 | FxHashMap() |
9e0c209e SL |
866 | } |
867 | } | |
868 | ||
869 | /// Since paths of artifacts within session directories can get quite long, we | |
870 | /// need to support deleting files with very long paths. The regular | |
871 | /// WinApi functions only support paths up to 260 characters, however. In order | |
872 | /// to circumvent this limitation, we canonicalize the path of the directory | |
873 | /// before passing it to std::fs::remove_dir_all(). This will convert the path | |
874 | /// into the '\\?\' format, which supports much longer paths. | |
875 | fn safe_remove_dir_all(p: &Path) -> io::Result<()> { | |
876 | if p.exists() { | |
877 | let canonicalized = try!(p.canonicalize()); | |
878 | std_fs::remove_dir_all(canonicalized) | |
879 | } else { | |
880 | Ok(()) | |
881 | } | |
882 | } | |
883 | ||
884 | fn safe_remove_file(p: &Path) -> io::Result<()> { | |
885 | if p.exists() { | |
886 | let canonicalized = try!(p.canonicalize()); | |
887 | std_fs::remove_file(canonicalized) | |
888 | } else { | |
889 | Ok(()) | |
890 | } | |
891 | } | |
892 | ||
893 | #[test] | |
894 | fn test_all_except_most_recent() { | |
895 | assert_eq!(all_except_most_recent( | |
896 | vec![ | |
897 | (UNIX_EPOCH + Duration::new(4, 0), PathBuf::from("4"), None), | |
898 | (UNIX_EPOCH + Duration::new(1, 0), PathBuf::from("1"), None), | |
899 | (UNIX_EPOCH + Duration::new(5, 0), PathBuf::from("5"), None), | |
900 | (UNIX_EPOCH + Duration::new(3, 0), PathBuf::from("3"), None), | |
901 | (UNIX_EPOCH + Duration::new(2, 0), PathBuf::from("2"), None), | |
476ff2be | 902 | ]).keys().cloned().collect::<FxHashSet<PathBuf>>(), |
9e0c209e SL |
903 | vec![ |
904 | PathBuf::from("1"), | |
905 | PathBuf::from("2"), | |
906 | PathBuf::from("3"), | |
907 | PathBuf::from("4"), | |
476ff2be | 908 | ].into_iter().collect::<FxHashSet<PathBuf>>() |
9e0c209e SL |
909 | ); |
910 | ||
911 | assert_eq!(all_except_most_recent( | |
912 | vec![ | |
476ff2be SL |
913 | ]).keys().cloned().collect::<FxHashSet<PathBuf>>(), |
914 | FxHashSet() | |
9e0c209e SL |
915 | ); |
916 | } | |
917 | ||
918 | #[test] | |
919 | fn test_timestamp_serialization() { | |
920 | for i in 0 .. 1_000u64 { | |
921 | let time = UNIX_EPOCH + Duration::new(i * 1_434_578, (i as u32) * 239_000); | |
922 | let s = timestamp_to_string(time); | |
923 | assert_eq!(Ok(time), string_to_timestamp(&s)); | |
924 | } | |
925 | } | |
926 | ||
927 | #[test] | |
928 | fn test_find_source_directory_in_iter() { | |
476ff2be | 929 | let already_visited = FxHashSet(); |
9e0c209e SL |
930 | |
931 | // Find newest | |
932 | assert_eq!(find_source_directory_in_iter( | |
933 | vec![PathBuf::from("crate-dir/s-3234-0000-svh"), | |
934 | PathBuf::from("crate-dir/s-2234-0000-svh"), | |
935 | PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited), | |
936 | Some(PathBuf::from("crate-dir/s-3234-0000-svh"))); | |
937 | ||
938 | // Filter out "-working" | |
939 | assert_eq!(find_source_directory_in_iter( | |
940 | vec![PathBuf::from("crate-dir/s-3234-0000-working"), | |
941 | PathBuf::from("crate-dir/s-2234-0000-svh"), | |
942 | PathBuf::from("crate-dir/s-1234-0000-svh")].into_iter(), &already_visited), | |
943 | Some(PathBuf::from("crate-dir/s-2234-0000-svh"))); | |
944 | ||
945 | // Handle empty | |
946 | assert_eq!(find_source_directory_in_iter(vec![].into_iter(), &already_visited), | |
947 | None); | |
948 | ||
949 | // Handle only working | |
950 | assert_eq!(find_source_directory_in_iter( | |
951 | vec![PathBuf::from("crate-dir/s-3234-0000-working"), | |
952 | PathBuf::from("crate-dir/s-2234-0000-working"), | |
953 | PathBuf::from("crate-dir/s-1234-0000-working")].into_iter(), &already_visited), | |
954 | None); | |
955 | } |