]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/storage/storage_utils.c
Merge pull request #2008 from tych0/share-ns-in-execute
[mirror_lxc.git] / src / lxc / storage / storage_utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * Copyright © 2017 Canonical Ltd.
5 *
6 * Authors:
7 * Christian Brauner <christian.brauner@ubuntu.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this program; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #define _GNU_SOURCE
25 #include <dirent.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <grp.h>
29 #include <inttypes.h>
30 #include <libgen.h>
31 #include <sched.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <sys/mount.h>
37 #include <sys/prctl.h>
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <sys/wait.h>
41
42 #include "log.h"
43 #include "nbd.h"
44 #include "parse.h"
45 #include "storage.h"
46 #include "storage_utils.h"
47 #include "utils.h"
48
49 #ifndef BLKGETSIZE64
50 #define BLKGETSIZE64 _IOR(0x12, 114, size_t)
51 #endif
52
53 lxc_log_define(storage_utils, lxc);
54
55 /* the bulk of this needs to become a common helper */
56 char *dir_new_path(char *src, const char *oldname, const char *name,
57 const char *oldpath, const char *lxcpath)
58 {
59 char *ret, *p, *p2;
60 int l1, l2, nlen;
61
62 nlen = strlen(src) + 1;
63 l1 = strlen(oldpath);
64 p = src;
65 /* if src starts with oldpath, look for oldname only after
66 * that path */
67 if (strncmp(src, oldpath, l1) == 0) {
68 p += l1;
69 nlen += (strlen(lxcpath) - l1);
70 }
71 l2 = strlen(oldname);
72 while ((p = strstr(p, oldname)) != NULL) {
73 p += l2;
74 nlen += strlen(name) - l2;
75 }
76
77 ret = malloc(nlen);
78 if (!ret)
79 return NULL;
80
81 p = ret;
82 if (strncmp(src, oldpath, l1) == 0) {
83 p += sprintf(p, "%s", lxcpath);
84 src += l1;
85 }
86
87 while ((p2 = strstr(src, oldname)) != NULL) {
88 /* copy text up to oldname */
89 strncpy(p, src, p2 - src);
90 /* move target pointer (p) */
91 p += p2 - src;
92 /* print new name in place of oldname */
93 p += sprintf(p, "%s", name);
94 /* move src to end of oldname */
95 src = p2 + l2;
96 }
97 /* copy the rest of src */
98 sprintf(p, "%s", src);
99 return ret;
100 }
101
102 /*
103 * attach_block_device returns true if all went well,
104 * meaning either a block device was attached or was not
105 * needed. It returns false if something went wrong and
106 * container startup should be stopped.
107 */
108 bool attach_block_device(struct lxc_conf *conf)
109 {
110 char *path;
111
112 if (!conf->rootfs.path)
113 return true;
114
115 path = conf->rootfs.path;
116 if (!requires_nbd(path))
117 return true;
118
119 path = strchr(path, ':');
120 if (!path)
121 return false;
122
123 path++;
124 if (!attach_nbd(path, conf))
125 return false;
126
127 return true;
128 }
129
130 /*
131 * return block size of dev->src in units of bytes
132 */
133 int blk_getsize(struct lxc_storage *bdev, uint64_t *size)
134 {
135 int fd, ret;
136 const char *src;
137
138 src = lxc_storage_get_path(bdev->src, bdev->type);
139 fd = open(src, O_RDONLY);
140 if (fd < 0)
141 return -1;
142
143 /* size of device in bytes */
144 ret = ioctl(fd, BLKGETSIZE64, size);
145 close(fd);
146 return ret;
147 }
148
149 void detach_block_device(struct lxc_conf *conf)
150 {
151 if (conf->nbd_idx != -1)
152 detach_nbd_idx(conf->nbd_idx);
153 }
154
155 /*
156 * Given a lxc_storage (presumably blockdev-based), detect the fstype
157 * by trying mounting (in a private mntns) it.
158 * @lxc_storage: bdev to investigate
159 * @type: preallocated char* in which to write the fstype
160 * @len: length of passed in char*
161 * Returns length of fstype, of -1 on error
162 */
163 int detect_fs(struct lxc_storage *bdev, char *type, int len)
164 {
165 int ret;
166 int p[2];
167 size_t linelen;
168 pid_t pid;
169 FILE *f;
170 char *sp1, *sp2, *sp3;
171 const char *l, *srcdev;
172 char devpath[MAXPATHLEN];
173 char *line = NULL;
174
175 if (!bdev || !bdev->src || !bdev->dest)
176 return -1;
177
178 srcdev = lxc_storage_get_path(bdev->src, bdev->type);
179
180 ret = pipe(p);
181 if (ret < 0)
182 return -1;
183
184 if ((pid = fork()) < 0)
185 return -1;
186
187 if (pid > 0) {
188 int status;
189 close(p[1]);
190 memset(type, 0, len);
191 ret = read(p[0], type, len - 1);
192 close(p[0]);
193 if (ret < 0) {
194 SYSERROR("error reading from pipe");
195 wait(&status);
196 return -1;
197 } else if (ret == 0) {
198 ERROR("child exited early - fstype not found");
199 wait(&status);
200 return -1;
201 }
202 wait(&status);
203 type[len - 1] = '\0';
204 INFO("detected fstype %s for %s", type, srcdev);
205 return ret;
206 }
207
208 if (unshare(CLONE_NEWNS) < 0)
209 exit(1);
210
211 if (detect_shared_rootfs()) {
212 if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL)) {
213 SYSERROR("Failed to make / rslave");
214 ERROR("Continuing...");
215 }
216 }
217
218 ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts);
219 if (ret < 0) {
220 ERROR("failed mounting %s onto %s to detect fstype", srcdev,
221 bdev->dest);
222 exit(1);
223 }
224
225 l = linkderef(srcdev, devpath);
226 if (!l)
227 exit(1);
228 f = fopen("/proc/self/mounts", "r");
229 if (!f)
230 exit(1);
231
232 while (getline(&line, &linelen, f) != -1) {
233 sp1 = strchr(line, ' ');
234 if (!sp1)
235 exit(1);
236 *sp1 = '\0';
237 if (strcmp(line, l))
238 continue;
239 sp2 = strchr(sp1 + 1, ' ');
240 if (!sp2)
241 exit(1);
242 *sp2 = '\0';
243 sp3 = strchr(sp2 + 1, ' ');
244 if (!sp3)
245 exit(1);
246 *sp3 = '\0';
247 sp2++;
248 if (write(p[1], sp2, strlen(sp2)) != strlen(sp2))
249 exit(1);
250
251 exit(0);
252 }
253
254 exit(1);
255 }
256
257 int do_mkfs_exec_wrapper(void *args)
258 {
259 int ret;
260 char *mkfs;
261 char **data = args;
262 /* strlen("mkfs.")
263 * +
264 * strlen(data[0])
265 * +
266 * \0
267 */
268 size_t len = 5 + strlen(data[0]) + 1;
269
270 mkfs = malloc(len);
271 if (!mkfs)
272 return -1;
273
274 ret = snprintf(mkfs, len, "mkfs.%s", data[0]);
275 if (ret < 0 || (size_t)ret >= len) {
276 free(mkfs);
277 return -1;
278 }
279
280 TRACE("executing \"%s %s\"", mkfs, data[1]);
281 execlp(mkfs, mkfs, data[1], (char *)NULL);
282 SYSERROR("failed to run \"%s %s \"", mkfs, data[1]);
283 free(mkfs);
284 return -1;
285 }
286
287 /*
288 * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm
289 * is a block device.
290 */
291 int is_blktype(struct lxc_storage *b)
292 {
293 if (strcmp(b->type, "lvm") == 0)
294 return 1;
295
296 return 0;
297 }
298
299 int mount_unknown_fs(const char *rootfs, const char *target,
300 const char *options)
301 {
302 size_t i;
303 int ret;
304 struct cbarg {
305 const char *rootfs;
306 const char *target;
307 const char *options;
308 } cbarg = {
309 .rootfs = rootfs,
310 .target = target,
311 .options = options,
312 };
313
314 /*
315 * find the filesystem type with brute force:
316 * first we check with /etc/filesystems, in case the modules
317 * are auto-loaded and fall back to the supported kernel fs
318 */
319 char *fsfile[] = {
320 "/etc/filesystems",
321 "/proc/filesystems",
322 };
323
324 for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) {
325 if (access(fsfile[i], F_OK))
326 continue;
327
328 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
329 if (ret < 0) {
330 ERROR("failed to parse '%s'", fsfile[i]);
331 return -1;
332 }
333
334 if (ret)
335 return 0;
336 }
337
338 ERROR("failed to determine fs type for '%s'", rootfs);
339 return -1;
340 }
341
342 /*
343 * These are copied from conf.c. However as conf.c will be moved to using
344 * the callback system, they can be pulled from there eventually, so we
345 * don't need to pollute utils.c with these low level functions
346 */
347 int find_fstype_cb(char *buffer, void *data)
348 {
349 struct cbarg {
350 const char *rootfs;
351 const char *target;
352 const char *options;
353 } *cbarg = data;
354
355 unsigned long mntflags;
356 char *mntdata;
357 char *fstype;
358
359 /* we don't try 'nodev' entries */
360 if (strstr(buffer, "nodev"))
361 return 0;
362
363 fstype = buffer;
364 fstype += lxc_char_left_gc(fstype, strlen(fstype));
365 fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
366
367 DEBUG("trying to mount '%s'->'%s' with fstype '%s'", cbarg->rootfs,
368 cbarg->target, fstype);
369
370 if (parse_mntopts(cbarg->options, &mntflags, &mntdata) < 0) {
371 free(mntdata);
372 return 0;
373 }
374
375 if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
376 DEBUG("mount failed with error: %s", strerror(errno));
377 free(mntdata);
378 return 0;
379 }
380
381 free(mntdata);
382
383 INFO("mounted '%s' on '%s', with fstype '%s'", cbarg->rootfs,
384 cbarg->target, fstype);
385
386 return 1;
387 }
388
389 const char *linkderef(const char *path, char *dest)
390 {
391 struct stat sbuf;
392 ssize_t ret;
393
394 ret = stat(path, &sbuf);
395 if (ret < 0)
396 return NULL;
397
398 if (!S_ISLNK(sbuf.st_mode))
399 return path;
400
401 ret = readlink(path, dest, MAXPATHLEN);
402 if (ret < 0) {
403 SYSERROR("error reading link %s", path);
404 return NULL;
405 } else if (ret >= MAXPATHLEN) {
406 ERROR("link in %s too long", path);
407 return NULL;
408 }
409 dest[ret] = '\0';
410
411 return dest;
412 }
413
414 /*
415 * is an unprivileged user allowed to make this kind of snapshot
416 */
417 bool unpriv_snap_allowed(struct lxc_storage *b, const char *t, bool snap,
418 bool maybesnap)
419 {
420 if (!t) {
421 /* New type will be same as original (unless snap && b->type ==
422 * dir, in which case it will be overlayfs -- which is also
423 * allowed).
424 */
425 if (strcmp(b->type, "dir") == 0 ||
426 strcmp(b->type, "aufs") == 0 ||
427 strcmp(b->type, "overlay") == 0 ||
428 strcmp(b->type, "overlayfs") == 0 ||
429 strcmp(b->type, "btrfs") == 0 ||
430 strcmp(b->type, "loop") == 0)
431 return true;
432
433 return false;
434 }
435
436 /* Unprivileged users can copy and snapshot dir, overlayfs, and loop.
437 * In particular, not zfs, btrfs, or lvm.
438 */
439 if (strcmp(t, "dir") == 0 ||
440 strcmp(t, "aufs") == 0 ||
441 strcmp(t, "overlay") == 0 ||
442 strcmp(t, "overlayfs") == 0 ||
443 strcmp(t, "btrfs") == 0 ||
444 strcmp(t, "loop") == 0)
445 return true;
446
447 return false;
448 }
449
450 bool is_valid_storage_type(const char *type)
451 {
452 if (strcmp(type, "dir") == 0 ||
453 strcmp(type, "btrfs") == 0 ||
454 strcmp(type, "aufs") == 0 ||
455 strcmp(type, "loop") == 0 ||
456 strcmp(type, "lvm") == 0 ||
457 strcmp(type, "nbd") == 0 ||
458 strcmp(type, "overlay") == 0 ||
459 strcmp(type, "overlayfs") == 0 ||
460 strcmp(type, "rbd") == 0 ||
461 strcmp(type, "zfs") == 0)
462 return true;
463
464 return false;
465 }
466
467 int storage_destroy_wrapper(void *data)
468 {
469 struct lxc_conf *conf = data;
470
471 if (setgid(0) < 0) {
472 ERROR("Failed to setgid to 0");
473 return -1;
474 }
475
476 if (setgroups(0, NULL) < 0)
477 WARN("Failed to clear groups");
478
479 if (setuid(0) < 0) {
480 ERROR("Failed to setuid to 0");
481 return -1;
482 }
483
484 if (!storage_destroy(conf))
485 return -1;
486
487 return 0;
488 }