]>
Commit | Line | Data |
---|---|---|
e735f4d4 MP |
1 | /*** |
2 | This file is part of systemd. | |
3 | ||
4 | Copyright 2013 Lennart Poettering | |
5 | ||
6 | systemd is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU Lesser General Public License as published by | |
8 | the Free Software Foundation; either version 2.1 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | systemd is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public License | |
17 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
18 | ***/ | |
19 | ||
4c89c718 MP |
20 | #include <dirent.h> |
21 | #include <errno.h> | |
e735f4d4 | 22 | #include <fcntl.h> |
4c89c718 MP |
23 | #include <stdio.h> |
24 | #include <stdlib.h> | |
25 | #include <string.h> | |
26 | #include <sys/stat.h> | |
27 | #include <unistd.h> | |
6300502b | 28 | #include <linux/fs.h> |
db2df898 | 29 | #include "alloc-util.h" |
e735f4d4 | 30 | #include "btrfs-util.h" |
db2df898 | 31 | #include "chattr-util.h" |
e735f4d4 | 32 | #include "copy.h" |
db2df898 MP |
33 | #include "dirent-util.h" |
34 | #include "fd-util.h" | |
35 | #include "fs-util.h" | |
4c89c718 MP |
36 | #include "hashmap.h" |
37 | #include "lockfile-util.h" | |
38 | #include "log.h" | |
39 | #include "macro.h" | |
db2df898 | 40 | #include "machine-image.h" |
e735f4d4 | 41 | #include "mkdir.h" |
6300502b | 42 | #include "path-util.h" |
e3bff60a | 43 | #include "rm-rf.h" |
db2df898 MP |
44 | #include "string-table.h" |
45 | #include "string-util.h" | |
6300502b | 46 | #include "strv.h" |
4c89c718 | 47 | #include "time-util.h" |
6300502b | 48 | #include "utf8.h" |
4c89c718 | 49 | #include "util.h" |
db2df898 | 50 | #include "xattr-util.h" |
e735f4d4 MP |
51 | |
52 | static const char image_search_path[] = | |
53 | "/var/lib/machines\0" | |
d9dfd233 | 54 | "/var/lib/container\0" /* legacy */ |
e735f4d4 MP |
55 | "/usr/local/lib/machines\0" |
56 | "/usr/lib/machines\0"; | |
57 | ||
58 | Image *image_unref(Image *i) { | |
59 | if (!i) | |
60 | return NULL; | |
61 | ||
62 | free(i->name); | |
63 | free(i->path); | |
64 | free(i); | |
65 | return NULL; | |
66 | } | |
67 | ||
6300502b MP |
68 | static char **image_settings_path(Image *image) { |
69 | _cleanup_strv_free_ char **l = NULL; | |
70 | char **ret; | |
71 | const char *fn, *s; | |
72 | unsigned i = 0; | |
73 | ||
74 | assert(image); | |
75 | ||
76 | l = new0(char*, 4); | |
77 | if (!l) | |
78 | return NULL; | |
79 | ||
80 | fn = strjoina(image->name, ".nspawn"); | |
81 | ||
82 | FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") { | |
83 | l[i] = strappend(s, fn); | |
84 | if (!l[i]) | |
85 | return NULL; | |
86 | ||
87 | i++; | |
88 | } | |
89 | ||
90 | l[i] = file_in_same_dir(image->path, fn); | |
91 | if (!l[i]) | |
92 | return NULL; | |
93 | ||
94 | ret = l; | |
95 | l = NULL; | |
96 | ||
97 | return ret; | |
98 | } | |
99 | ||
e735f4d4 MP |
100 | static int image_new( |
101 | ImageType t, | |
102 | const char *pretty, | |
103 | const char *path, | |
104 | const char *filename, | |
105 | bool read_only, | |
106 | usec_t crtime, | |
107 | usec_t mtime, | |
108 | Image **ret) { | |
109 | ||
110 | _cleanup_(image_unrefp) Image *i = NULL; | |
111 | ||
112 | assert(t >= 0); | |
113 | assert(t < _IMAGE_TYPE_MAX); | |
114 | assert(pretty); | |
115 | assert(filename); | |
116 | assert(ret); | |
117 | ||
118 | i = new0(Image, 1); | |
119 | if (!i) | |
120 | return -ENOMEM; | |
121 | ||
122 | i->type = t; | |
123 | i->read_only = read_only; | |
124 | i->crtime = crtime; | |
125 | i->mtime = mtime; | |
126 | i->usage = i->usage_exclusive = (uint64_t) -1; | |
127 | i->limit = i->limit_exclusive = (uint64_t) -1; | |
128 | ||
129 | i->name = strdup(pretty); | |
130 | if (!i->name) | |
131 | return -ENOMEM; | |
132 | ||
133 | if (path) | |
134 | i->path = strjoin(path, "/", filename, NULL); | |
135 | else | |
136 | i->path = strdup(filename); | |
137 | ||
138 | if (!i->path) | |
139 | return -ENOMEM; | |
140 | ||
141 | path_kill_slashes(i->path); | |
142 | ||
143 | *ret = i; | |
144 | i = NULL; | |
145 | ||
146 | return 0; | |
147 | } | |
148 | ||
149 | static int image_make( | |
150 | const char *pretty, | |
151 | int dfd, | |
152 | const char *path, | |
153 | const char *filename, | |
154 | Image **ret) { | |
155 | ||
156 | struct stat st; | |
157 | bool read_only; | |
158 | int r; | |
159 | ||
160 | assert(filename); | |
161 | ||
162 | /* We explicitly *do* follow symlinks here, since we want to | |
163 | * allow symlinking trees into /var/lib/machines/, and treat | |
164 | * them normally. */ | |
165 | ||
166 | if (fstatat(dfd, filename, &st, 0) < 0) | |
167 | return -errno; | |
168 | ||
169 | read_only = | |
170 | (path && path_startswith(path, "/usr")) || | |
171 | (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS); | |
172 | ||
173 | if (S_ISDIR(st.st_mode)) { | |
174 | _cleanup_close_ int fd = -1; | |
175 | unsigned file_attr = 0; | |
176 | ||
177 | if (!ret) | |
178 | return 1; | |
179 | ||
180 | if (!pretty) | |
181 | pretty = filename; | |
182 | ||
183 | fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY); | |
184 | if (fd < 0) | |
185 | return -errno; | |
186 | ||
187 | /* btrfs subvolumes have inode 256 */ | |
188 | if (st.st_ino == 256) { | |
e735f4d4 | 189 | |
e3bff60a MP |
190 | r = btrfs_is_filesystem(fd); |
191 | if (r < 0) | |
192 | return r; | |
193 | if (r) { | |
e735f4d4 | 194 | BtrfsSubvolInfo info; |
e735f4d4 MP |
195 | |
196 | /* It's a btrfs subvolume */ | |
197 | ||
db2df898 | 198 | r = btrfs_subvol_get_info_fd(fd, 0, &info); |
e735f4d4 MP |
199 | if (r < 0) |
200 | return r; | |
201 | ||
202 | r = image_new(IMAGE_SUBVOLUME, | |
203 | pretty, | |
204 | path, | |
205 | filename, | |
206 | info.read_only || read_only, | |
207 | info.otime, | |
208 | 0, | |
209 | ret); | |
210 | if (r < 0) | |
211 | return r; | |
212 | ||
db2df898 MP |
213 | if (btrfs_quota_scan_ongoing(fd) == 0) { |
214 | BtrfsQuotaInfo quota; | |
e735f4d4 | 215 | |
db2df898 MP |
216 | r = btrfs_subvol_get_subtree_quota_fd(fd, 0, "a); |
217 | if (r >= 0) { | |
218 | (*ret)->usage = quota.referenced; | |
219 | (*ret)->usage_exclusive = quota.exclusive; | |
220 | ||
221 | (*ret)->limit = quota.referenced_max; | |
222 | (*ret)->limit_exclusive = quota.exclusive_max; | |
223 | } | |
e735f4d4 MP |
224 | } |
225 | ||
226 | return 1; | |
227 | } | |
228 | } | |
229 | ||
230 | /* If the IMMUTABLE bit is set, we consider the | |
231 | * directory read-only. Since the ioctl is not | |
232 | * supported everywhere we ignore failures. */ | |
233 | (void) read_attr_fd(fd, &file_attr); | |
234 | ||
235 | /* It's just a normal directory. */ | |
236 | r = image_new(IMAGE_DIRECTORY, | |
237 | pretty, | |
238 | path, | |
239 | filename, | |
240 | read_only || (file_attr & FS_IMMUTABLE_FL), | |
241 | 0, | |
242 | 0, | |
243 | ret); | |
244 | if (r < 0) | |
245 | return r; | |
246 | ||
247 | return 1; | |
248 | ||
249 | } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) { | |
250 | usec_t crtime = 0; | |
251 | ||
252 | /* It's a RAW disk image */ | |
253 | ||
254 | if (!ret) | |
255 | return 1; | |
256 | ||
257 | fd_getcrtime_at(dfd, filename, &crtime, 0); | |
258 | ||
259 | if (!pretty) | |
260 | pretty = strndupa(filename, strlen(filename) - 4); | |
261 | ||
262 | r = image_new(IMAGE_RAW, | |
263 | pretty, | |
264 | path, | |
265 | filename, | |
266 | !(st.st_mode & 0222) || read_only, | |
267 | crtime, | |
268 | timespec_load(&st.st_mtim), | |
269 | ret); | |
270 | if (r < 0) | |
271 | return r; | |
272 | ||
273 | (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512; | |
274 | (*ret)->limit = (*ret)->limit_exclusive = st.st_size; | |
275 | ||
276 | return 1; | |
277 | } | |
278 | ||
279 | return 0; | |
280 | } | |
281 | ||
282 | int image_find(const char *name, Image **ret) { | |
283 | const char *path; | |
284 | int r; | |
285 | ||
286 | assert(name); | |
287 | ||
288 | /* There are no images with invalid names */ | |
289 | if (!image_name_is_valid(name)) | |
290 | return 0; | |
291 | ||
292 | NULSTR_FOREACH(path, image_search_path) { | |
293 | _cleanup_closedir_ DIR *d = NULL; | |
294 | ||
295 | d = opendir(path); | |
296 | if (!d) { | |
297 | if (errno == ENOENT) | |
298 | continue; | |
299 | ||
300 | return -errno; | |
301 | } | |
302 | ||
303 | r = image_make(NULL, dirfd(d), path, name, ret); | |
304 | if (r == 0 || r == -ENOENT) { | |
305 | _cleanup_free_ char *raw = NULL; | |
306 | ||
307 | raw = strappend(name, ".raw"); | |
308 | if (!raw) | |
309 | return -ENOMEM; | |
310 | ||
311 | r = image_make(NULL, dirfd(d), path, raw, ret); | |
312 | if (r == 0 || r == -ENOENT) | |
313 | continue; | |
314 | } | |
315 | if (r < 0) | |
316 | return r; | |
317 | ||
318 | return 1; | |
319 | } | |
320 | ||
321 | if (streq(name, ".host")) | |
322 | return image_make(".host", AT_FDCWD, NULL, "/", ret); | |
323 | ||
324 | return 0; | |
325 | }; | |
326 | ||
327 | int image_discover(Hashmap *h) { | |
328 | const char *path; | |
329 | int r; | |
330 | ||
331 | assert(h); | |
332 | ||
333 | NULSTR_FOREACH(path, image_search_path) { | |
334 | _cleanup_closedir_ DIR *d = NULL; | |
335 | struct dirent *de; | |
336 | ||
337 | d = opendir(path); | |
338 | if (!d) { | |
339 | if (errno == ENOENT) | |
340 | continue; | |
341 | ||
342 | return -errno; | |
343 | } | |
344 | ||
345 | FOREACH_DIRENT_ALL(de, d, return -errno) { | |
346 | _cleanup_(image_unrefp) Image *image = NULL; | |
347 | ||
348 | if (!image_name_is_valid(de->d_name)) | |
349 | continue; | |
350 | ||
351 | if (hashmap_contains(h, de->d_name)) | |
352 | continue; | |
353 | ||
354 | r = image_make(NULL, dirfd(d), path, de->d_name, &image); | |
355 | if (r == 0 || r == -ENOENT) | |
356 | continue; | |
357 | if (r < 0) | |
358 | return r; | |
359 | ||
360 | r = hashmap_put(h, image->name, image); | |
361 | if (r < 0) | |
362 | return r; | |
363 | ||
364 | image = NULL; | |
365 | } | |
366 | } | |
367 | ||
368 | if (!hashmap_contains(h, ".host")) { | |
369 | _cleanup_(image_unrefp) Image *image = NULL; | |
370 | ||
371 | r = image_make(".host", AT_FDCWD, NULL, "/", &image); | |
372 | if (r < 0) | |
373 | return r; | |
374 | ||
375 | r = hashmap_put(h, image->name, image); | |
376 | if (r < 0) | |
377 | return r; | |
378 | ||
379 | image = NULL; | |
380 | ||
381 | } | |
382 | ||
383 | return 0; | |
384 | } | |
385 | ||
386 | void image_hashmap_free(Hashmap *map) { | |
387 | Image *i; | |
388 | ||
389 | while ((i = hashmap_steal_first(map))) | |
390 | image_unref(i); | |
391 | ||
392 | hashmap_free(map); | |
393 | } | |
394 | ||
395 | int image_remove(Image *i) { | |
396 | _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; | |
6300502b MP |
397 | _cleanup_strv_free_ char **settings = NULL; |
398 | char **j; | |
e735f4d4 MP |
399 | int r; |
400 | ||
401 | assert(i); | |
402 | ||
403 | if (path_equal(i->path, "/") || | |
404 | path_startswith(i->path, "/usr")) | |
405 | return -EROFS; | |
406 | ||
6300502b MP |
407 | settings = image_settings_path(i); |
408 | if (!settings) | |
409 | return -ENOMEM; | |
410 | ||
e735f4d4 MP |
411 | /* Make sure we don't interfere with a running nspawn */ |
412 | r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); | |
413 | if (r < 0) | |
414 | return r; | |
415 | ||
416 | switch (i->type) { | |
417 | ||
418 | case IMAGE_SUBVOLUME: | |
db2df898 | 419 | r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); |
6300502b MP |
420 | if (r < 0) |
421 | return r; | |
422 | break; | |
e735f4d4 MP |
423 | |
424 | case IMAGE_DIRECTORY: | |
425 | /* Allow deletion of read-only directories */ | |
426 | (void) chattr_path(i->path, false, FS_IMMUTABLE_FL); | |
6300502b MP |
427 | r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); |
428 | if (r < 0) | |
429 | return r; | |
430 | ||
431 | break; | |
e735f4d4 MP |
432 | |
433 | case IMAGE_RAW: | |
e3bff60a MP |
434 | if (unlink(i->path) < 0) |
435 | return -errno; | |
6300502b | 436 | break; |
e735f4d4 MP |
437 | |
438 | default: | |
e3bff60a | 439 | return -EOPNOTSUPP; |
e735f4d4 | 440 | } |
6300502b MP |
441 | |
442 | STRV_FOREACH(j, settings) { | |
443 | if (unlink(*j) < 0 && errno != ENOENT) | |
444 | log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j); | |
445 | } | |
446 | ||
447 | return 0; | |
448 | } | |
449 | ||
450 | static int rename_settings_file(const char *path, const char *new_name) { | |
451 | _cleanup_free_ char *rs = NULL; | |
452 | const char *fn; | |
453 | ||
454 | fn = strjoina(new_name, ".nspawn"); | |
455 | ||
456 | rs = file_in_same_dir(path, fn); | |
457 | if (!rs) | |
458 | return -ENOMEM; | |
459 | ||
460 | return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs); | |
e735f4d4 MP |
461 | } |
462 | ||
463 | int image_rename(Image *i, const char *new_name) { | |
464 | _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT; | |
465 | _cleanup_free_ char *new_path = NULL, *nn = NULL; | |
6300502b | 466 | _cleanup_strv_free_ char **settings = NULL; |
e735f4d4 | 467 | unsigned file_attr = 0; |
6300502b | 468 | char **j; |
e735f4d4 MP |
469 | int r; |
470 | ||
471 | assert(i); | |
472 | ||
473 | if (!image_name_is_valid(new_name)) | |
474 | return -EINVAL; | |
475 | ||
476 | if (path_equal(i->path, "/") || | |
477 | path_startswith(i->path, "/usr")) | |
478 | return -EROFS; | |
479 | ||
6300502b MP |
480 | settings = image_settings_path(i); |
481 | if (!settings) | |
482 | return -ENOMEM; | |
483 | ||
e735f4d4 MP |
484 | /* Make sure we don't interfere with a running nspawn */ |
485 | r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); | |
486 | if (r < 0) | |
487 | return r; | |
488 | ||
489 | /* Make sure nobody takes the new name, between the time we | |
490 | * checked it is currently unused in all search paths, and the | |
491 | * time we take possesion of it */ | |
492 | r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); | |
493 | if (r < 0) | |
494 | return r; | |
495 | ||
496 | r = image_find(new_name, NULL); | |
497 | if (r < 0) | |
498 | return r; | |
499 | if (r > 0) | |
500 | return -EEXIST; | |
501 | ||
502 | switch (i->type) { | |
503 | ||
504 | case IMAGE_DIRECTORY: | |
505 | /* Turn of the immutable bit while we rename the image, so that we can rename it */ | |
506 | (void) read_attr_path(i->path, &file_attr); | |
507 | ||
508 | if (file_attr & FS_IMMUTABLE_FL) | |
509 | (void) chattr_path(i->path, false, FS_IMMUTABLE_FL); | |
510 | ||
511 | /* fall through */ | |
512 | ||
513 | case IMAGE_SUBVOLUME: | |
514 | new_path = file_in_same_dir(i->path, new_name); | |
515 | break; | |
516 | ||
517 | case IMAGE_RAW: { | |
518 | const char *fn; | |
519 | ||
520 | fn = strjoina(new_name, ".raw"); | |
521 | new_path = file_in_same_dir(i->path, fn); | |
522 | break; | |
523 | } | |
524 | ||
525 | default: | |
e3bff60a | 526 | return -EOPNOTSUPP; |
e735f4d4 MP |
527 | } |
528 | ||
529 | if (!new_path) | |
530 | return -ENOMEM; | |
531 | ||
532 | nn = strdup(new_name); | |
533 | if (!nn) | |
534 | return -ENOMEM; | |
535 | ||
e3bff60a MP |
536 | r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path); |
537 | if (r < 0) | |
538 | return r; | |
e735f4d4 MP |
539 | |
540 | /* Restore the immutable bit, if it was set before */ | |
541 | if (file_attr & FS_IMMUTABLE_FL) | |
542 | (void) chattr_path(new_path, true, FS_IMMUTABLE_FL); | |
543 | ||
544 | free(i->path); | |
545 | i->path = new_path; | |
546 | new_path = NULL; | |
547 | ||
548 | free(i->name); | |
549 | i->name = nn; | |
550 | nn = NULL; | |
551 | ||
6300502b MP |
552 | STRV_FOREACH(j, settings) { |
553 | r = rename_settings_file(*j, new_name); | |
554 | if (r < 0 && r != -ENOENT) | |
555 | log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j); | |
556 | } | |
557 | ||
e735f4d4 MP |
558 | return 0; |
559 | } | |
560 | ||
6300502b MP |
561 | static int clone_settings_file(const char *path, const char *new_name) { |
562 | _cleanup_free_ char *rs = NULL; | |
563 | const char *fn; | |
564 | ||
565 | fn = strjoina(new_name, ".nspawn"); | |
566 | ||
567 | rs = file_in_same_dir(path, fn); | |
568 | if (!rs) | |
569 | return -ENOMEM; | |
570 | ||
571 | return copy_file_atomic(path, rs, 0664, false, 0); | |
572 | } | |
573 | ||
e735f4d4 MP |
574 | int image_clone(Image *i, const char *new_name, bool read_only) { |
575 | _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT; | |
6300502b | 576 | _cleanup_strv_free_ char **settings = NULL; |
e735f4d4 | 577 | const char *new_path; |
6300502b | 578 | char **j; |
e735f4d4 MP |
579 | int r; |
580 | ||
581 | assert(i); | |
582 | ||
583 | if (!image_name_is_valid(new_name)) | |
584 | return -EINVAL; | |
585 | ||
6300502b MP |
586 | settings = image_settings_path(i); |
587 | if (!settings) | |
588 | return -ENOMEM; | |
589 | ||
e735f4d4 MP |
590 | /* Make sure nobody takes the new name, between the time we |
591 | * checked it is currently unused in all search paths, and the | |
592 | * time we take possesion of it */ | |
593 | r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock); | |
594 | if (r < 0) | |
595 | return r; | |
596 | ||
597 | r = image_find(new_name, NULL); | |
598 | if (r < 0) | |
599 | return r; | |
600 | if (r > 0) | |
601 | return -EEXIST; | |
602 | ||
603 | switch (i->type) { | |
604 | ||
605 | case IMAGE_SUBVOLUME: | |
606 | case IMAGE_DIRECTORY: | |
607 | new_path = strjoina("/var/lib/machines/", new_name); | |
608 | ||
db2df898 MP |
609 | r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); |
610 | ||
611 | /* Enable "subtree" quotas for the copy, if we didn't | |
612 | * copy any quota from the source. */ | |
613 | (void) btrfs_subvol_auto_qgroup(i->path, 0, true); | |
614 | ||
e735f4d4 MP |
615 | break; |
616 | ||
617 | case IMAGE_RAW: | |
618 | new_path = strjoina("/var/lib/machines/", new_name, ".raw"); | |
619 | ||
620 | r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL); | |
621 | break; | |
622 | ||
623 | default: | |
e3bff60a | 624 | return -EOPNOTSUPP; |
e735f4d4 MP |
625 | } |
626 | ||
627 | if (r < 0) | |
628 | return r; | |
629 | ||
6300502b MP |
630 | STRV_FOREACH(j, settings) { |
631 | r = clone_settings_file(*j, new_name); | |
632 | if (r < 0 && r != -ENOENT) | |
633 | log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j); | |
634 | } | |
635 | ||
e735f4d4 MP |
636 | return 0; |
637 | } | |
638 | ||
639 | int image_read_only(Image *i, bool b) { | |
640 | _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT; | |
641 | int r; | |
642 | assert(i); | |
643 | ||
644 | if (path_equal(i->path, "/") || | |
645 | path_startswith(i->path, "/usr")) | |
646 | return -EROFS; | |
647 | ||
648 | /* Make sure we don't interfere with a running nspawn */ | |
649 | r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock); | |
650 | if (r < 0) | |
651 | return r; | |
652 | ||
653 | switch (i->type) { | |
654 | ||
655 | case IMAGE_SUBVOLUME: | |
db2df898 MP |
656 | |
657 | /* Note that we set the flag only on the top-level | |
658 | * subvolume of the image. */ | |
659 | ||
e735f4d4 MP |
660 | r = btrfs_subvol_set_read_only(i->path, b); |
661 | if (r < 0) | |
662 | return r; | |
663 | ||
664 | break; | |
665 | ||
666 | case IMAGE_DIRECTORY: | |
667 | /* For simple directory trees we cannot use the access | |
668 | mode of the top-level directory, since it has an | |
669 | effect on the container itself. However, we can | |
670 | use the "immutable" flag, to at least make the | |
671 | top-level directory read-only. It's not as good as | |
672 | a read-only subvolume, but at least something, and | |
673 | we can read the value back.*/ | |
674 | ||
675 | r = chattr_path(i->path, b, FS_IMMUTABLE_FL); | |
676 | if (r < 0) | |
677 | return r; | |
678 | ||
679 | break; | |
680 | ||
681 | case IMAGE_RAW: { | |
682 | struct stat st; | |
683 | ||
684 | if (stat(i->path, &st) < 0) | |
685 | return -errno; | |
686 | ||
687 | if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0) | |
688 | return -errno; | |
689 | ||
690 | /* If the images is now read-only, it's a good time to | |
691 | * defrag it, given that no write patterns will | |
692 | * fragment it again. */ | |
693 | if (b) | |
694 | (void) btrfs_defrag(i->path); | |
695 | break; | |
696 | } | |
697 | ||
698 | default: | |
e3bff60a | 699 | return -EOPNOTSUPP; |
e735f4d4 MP |
700 | } |
701 | ||
702 | return 0; | |
703 | } | |
704 | ||
705 | int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) { | |
706 | _cleanup_free_ char *p = NULL; | |
707 | LockFile t = LOCK_FILE_INIT; | |
708 | struct stat st; | |
709 | int r; | |
710 | ||
711 | assert(path); | |
712 | assert(global); | |
713 | assert(local); | |
714 | ||
715 | /* Locks an image path. This actually creates two locks: one | |
716 | * "local" one, next to the image path itself, which might be | |
717 | * shared via NFS. And another "global" one, in /run, that | |
718 | * uses the device/inode number. This has the benefit that we | |
719 | * can even lock a tree that is a mount point, correctly. */ | |
720 | ||
721 | if (path_equal(path, "/")) | |
722 | return -EBUSY; | |
723 | ||
724 | if (!path_is_absolute(path)) | |
725 | return -EINVAL; | |
726 | ||
727 | if (stat(path, &st) >= 0) { | |
728 | if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0) | |
729 | return -ENOMEM; | |
730 | } | |
731 | ||
732 | r = make_lock_file_for(path, operation, &t); | |
733 | if (r < 0) | |
734 | return r; | |
735 | ||
736 | if (p) { | |
e3bff60a | 737 | mkdir_p("/run/systemd/nspawn/locks", 0700); |
e735f4d4 MP |
738 | |
739 | r = make_lock_file(p, operation, global); | |
740 | if (r < 0) { | |
741 | release_lock_file(&t); | |
742 | return r; | |
743 | } | |
744 | } | |
745 | ||
746 | *local = t; | |
747 | return 0; | |
748 | } | |
749 | ||
e3bff60a MP |
750 | int image_set_limit(Image *i, uint64_t referenced_max) { |
751 | assert(i); | |
752 | ||
753 | if (path_equal(i->path, "/") || | |
754 | path_startswith(i->path, "/usr")) | |
755 | return -EROFS; | |
756 | ||
757 | if (i->type != IMAGE_SUBVOLUME) | |
758 | return -EOPNOTSUPP; | |
759 | ||
db2df898 MP |
760 | /* We set the quota both for the subvolume as well as for the |
761 | * subtree. The latter is mostly for historical reasons, since | |
762 | * we didn't use to have a concept of subtree quota, and hence | |
763 | * only modified the subvolume quota. */ | |
764 | ||
765 | (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max); | |
766 | (void) btrfs_subvol_auto_qgroup(i->path, 0, true); | |
767 | return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max); | |
e3bff60a MP |
768 | } |
769 | ||
e735f4d4 MP |
770 | int image_name_lock(const char *name, int operation, LockFile *ret) { |
771 | const char *p; | |
772 | ||
773 | assert(name); | |
774 | assert(ret); | |
775 | ||
776 | /* Locks an image name, regardless of the precise path used. */ | |
777 | ||
778 | if (!image_name_is_valid(name)) | |
779 | return -EINVAL; | |
780 | ||
781 | if (streq(name, ".host")) | |
782 | return -EBUSY; | |
783 | ||
e3bff60a | 784 | mkdir_p("/run/systemd/nspawn/locks", 0700); |
e735f4d4 MP |
785 | p = strjoina("/run/systemd/nspawn/locks/name-", name); |
786 | ||
787 | return make_lock_file(p, operation, ret); | |
788 | } | |
789 | ||
790 | bool image_name_is_valid(const char *s) { | |
791 | if (!filename_is_valid(s)) | |
792 | return false; | |
793 | ||
794 | if (string_has_cc(s, NULL)) | |
795 | return false; | |
796 | ||
797 | if (!utf8_is_valid(s)) | |
798 | return false; | |
799 | ||
800 | /* Temporary files for atomically creating new files */ | |
801 | if (startswith(s, ".#")) | |
802 | return false; | |
803 | ||
804 | return true; | |
805 | } | |
806 | ||
807 | static const char* const image_type_table[_IMAGE_TYPE_MAX] = { | |
808 | [IMAGE_DIRECTORY] = "directory", | |
809 | [IMAGE_SUBVOLUME] = "subvolume", | |
810 | [IMAGE_RAW] = "raw", | |
811 | }; | |
812 | ||
813 | DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType); |