src/lxc/storage/storage_utils.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #include "config.h"
   4
   5 #include <ctype.h>
   6 #include <dirent.h>
   7 #include <errno.h>
   8 #include <fcntl.h>
   9 #include <grp.h>
  10 #include <inttypes.h>
  11 #include <libgen.h>
  12 #include <sched.h>
  13 #include <stdint.h>
  14 #include <stdio.h>
  15 #include <string.h>
  16 #include <sys/mount.h>
  17 #include <sys/prctl.h>
  18 #include <sys/stat.h>
  19 #include <sys/types.h>
  20 #include <sys/wait.h>
  21 #include <unistd.h>
  22
  23 #include "log.h"
  24 #include "nbd.h"
  25 #include "parse.h"
  26 #include "storage.h"
  27 #include "storage_utils.h"
  28 #include "syscall_wrappers.h"
  29 #include "utils.h"
  30
  31 #if !HAVE_STRLCPY
  32 #include "strlcpy.h"
  33 #endif
  34
  35 #ifndef BLKGETSIZE64
  36 #define BLKGETSIZE64 _IOR(0x12, 114, size_t)
  37 #endif
  38
  39 lxc_log_define(storage_utils, lxc);
  40
  41 /*
  42  * attach_block_device returns true if all went well,
  43  * meaning either a block device was attached or was not
  44  * needed.  It returns false if something went wrong and
  45  * container startup should be stopped.
  46  */
  47 bool attach_block_device(struct lxc_conf *conf)
  48 {
  49         char *path;
  50
  51         if (!conf->rootfs.path)
  52                 return true;
  53
  54         path = conf->rootfs.path;
  55         if (!requires_nbd(path))
  56                 return true;
  57
  58         path = strchr(path, ':');
  59         if (!path)
  60                 return false;
  61
  62         path++;
  63         if (!attach_nbd(path, conf))
  64                 return false;
  65
  66         return true;
  67 }
  68
  69 /*
  70  * return block size of dev->src in units of bytes
  71  */
  72 int blk_getsize(struct lxc_storage *bdev, uint64_t *size)
  73 {
  74         int fd, ret;
  75         const char *src;
  76
  77         src = lxc_storage_get_path(bdev->src, bdev->type);
  78
  79         fd = open(src, O_RDONLY | O_CLOEXEC);
  80         if (fd < 0) {
  81                 SYSERROR("Failed to open \"%s\"", src);
  82                 return -1;
  83         }
  84
  85         /* size of device in bytes */
  86         ret = ioctl(fd, BLKGETSIZE64, size);
  87         if (ret < 0)
  88                 SYSERROR("Failed to get block size of dev-src");
  89
  90         close(fd);
  91         return ret;
  92 }
  93
  94 void detach_block_device(struct lxc_conf *conf)
  95 {
  96         if (conf->nbd_idx != -1)
  97                 detach_nbd_idx(conf->nbd_idx);
  98 }
  99
 100 /*
 101  * Given a lxc_storage (presumably blockdev-based), detect the fstype
 102  * by trying mounting (in a private mntns) it.
 103  * @lxc_storage: bdev to investigate
 104  * @type: preallocated char* in which to write the fstype
 105  * @len: length of passed in char*
 106  * Returns length of fstype, of -1 on error
 107  */
 108 int detect_fs(struct lxc_storage *bdev, char *type, int len)
 109 {
 110         int ret;
 111         int p[2];
 112         size_t linelen;
 113         pid_t pid;
 114         FILE *f;
 115         char *sp1, *sp2, *sp3;
 116         const char *l, *srcdev;
 117         char devpath[PATH_MAX];
 118         char *line = NULL;
 119
 120         if (!bdev || !bdev->src || !bdev->dest)
 121                 return -1;
 122
 123         srcdev = lxc_storage_get_path(bdev->src, bdev->type);
 124
 125         ret = pipe(p);
 126         if (ret < 0) {
 127                 SYSERROR("Failed to create pipe");
 128                 return -1;
 129         }
 130
 131         pid = fork();
 132         if (pid < 0) {
 133                 SYSERROR("Failed to fork process");
 134                 return -1;
 135         }
 136
 137         if (pid > 0) {
 138                 int status;
 139
 140                 close(p[1]);
 141                 memset(type, 0, len);
 142
 143                 ret = read(p[0], type, len - 1);
 144                 if (ret < 0) {
 145                         SYSERROR("Failed to read FSType from pipe");
 146                 } else if (ret == 0) {
 147                         ERROR("FSType not found - child exited early");
 148                         ret = -1;
 149                 }
 150
 151                 close(p[0]);
 152                 wait(&status);
 153
 154                 if (ret < 0)
 155                         return ret;
 156
 157                 type[len - 1] = '\0';
 158                 INFO("Detected FSType \"%s\" for \"%s\"", type, srcdev);
 159
 160                 return ret;
 161         }
 162
 163         if (unshare(CLONE_NEWNS) < 0)
 164                 _exit(EXIT_FAILURE);
 165
 166         if (detect_shared_rootfs() && mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL))
 167                 SYSERROR("Failed to recursively turn root mount tree into dependent mount. Continuing...");
 168
 169         ret = mount_unknown_fs(srcdev, bdev->dest, bdev->mntopts);
 170         if (ret < 0) {
 171                 ERROR("Failed to mount \"%s\" onto \"%s\" to detect FSType", srcdev,
 172                       bdev->dest);
 173                 _exit(EXIT_FAILURE);
 174         }
 175
 176         l = linkderef(srcdev, devpath);
 177         if (!l)
 178                 _exit(EXIT_FAILURE);
 179
 180         f = fopen("/proc/self/mounts", "r");
 181         if (!f)
 182                 _exit(EXIT_FAILURE);
 183
 184         while (getline(&line, &linelen, f) != -1) {
 185                 ssize_t nbytes;
 186
 187                 sp1 = strchr(line, ' ');
 188                 if (!sp1)
 189                         _exit(EXIT_FAILURE);
 190
 191                 *sp1 = '\0';
 192                 if (strcmp(line, l))
 193                         continue;
 194
 195                 sp2 = strchr(sp1 + 1, ' ');
 196                 if (!sp2)
 197                         _exit(EXIT_FAILURE);
 198                 *sp2 = '\0';
 199
 200                 sp3 = strchr(sp2 + 1, ' ');
 201                 if (!sp3)
 202                         _exit(EXIT_FAILURE);
 203                 *sp3 = '\0';
 204
 205                 sp2++;
 206                 nbytes = write(p[1], sp2, strlen(sp2));
 207                 if (nbytes < 0 || (size_t)nbytes != strlen(sp2))
 208                         _exit(EXIT_FAILURE);
 209
 210                 _exit(EXIT_SUCCESS);
 211         }
 212
 213         _exit(EXIT_FAILURE);
 214 }
 215
 216 int do_mkfs_exec_wrapper(void *args)
 217 {
 218         int ret;
 219         char *mkfs;
 220         char **data = args;
 221         /* strlen("mkfs.")
 222          * +
 223          * strlen(data[0])
 224          * +
 225          * \0
 226          */
 227         size_t len = 5 + strlen(data[0]) + 1;
 228
 229         mkfs = malloc(len);
 230         if (!mkfs)
 231                 return -1;
 232
 233         ret = snprintf(mkfs, len, "mkfs.%s", data[0]);
 234         if (ret < 0 || (size_t)ret >= len) {
 235                 free(mkfs);
 236                 return -1;
 237         }
 238
 239         TRACE("Executing \"%s %s\"", mkfs, data[1]);
 240         execlp(mkfs, mkfs, data[1], (char *)NULL);
 241
 242         SYSERROR("Failed to run \"%s %s\"", mkfs, data[1]);
 243         free(mkfs);
 244
 245         return -1;
 246 }
 247
 248 /*
 249  * This will return 1 for physical disks, qemu-nbd, loop, etc right now only lvm
 250  * is a block device.
 251  */
 252 int is_blktype(struct lxc_storage *b)
 253 {
 254         if (strcmp(b->type, "lvm") == 0)
 255                 return 1;
 256
 257         return 0;
 258 }
 259
 260 int mount_unknown_fs(const char *rootfs, const char *target,
 261                      const char *options)
 262 {
 263         size_t i;
 264         int ret;
 265         struct cbarg {
 266                 const char *rootfs;
 267                 const char *target;
 268                 const char *options;
 269         } cbarg = {
 270             .rootfs = rootfs,
 271             .target = target,
 272             .options = options,
 273         };
 274
 275         /*
 276          * find the filesystem type with brute force:
 277          * first we check with /etc/filesystems, in case the modules
 278          * are auto-loaded and fall back to the supported kernel fs
 279          */
 280         char *fsfile[] = {
 281             "/etc/filesystems",
 282             "/proc/filesystems",
 283         };
 284
 285         for (i = 0; i < sizeof(fsfile) / sizeof(fsfile[0]); i++) {
 286                 if (access(fsfile[i], F_OK))
 287                         continue;
 288
 289                 ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg);
 290                 if (ret < 0) {
 291                         ERROR("Failed to parse \"%s\"", fsfile[i]);
 292                         return -1;
 293                 }
 294
 295                 if (ret)
 296                         return 0;
 297         }
 298
 299         ERROR("Failed to determine FSType for \"%s\"", rootfs);
 300
 301         return -1;
 302 }
 303
 304 /*
 305  * These are copied from conf.c.  However as conf.c will be moved to using
 306  * the callback system, they can be pulled from there eventually, so we
 307  * don't need to pollute utils.c with these low level functions
 308  */
 309 int find_fstype_cb(char *buffer, void *data)
 310 {
 311         struct cbarg {
 312                 const char *rootfs;
 313                 const char *target;
 314                 const char *options;
 315         } *cbarg = data;
 316         unsigned long mntflags = 0;
 317         char *mntdata = NULL;
 318         char *fstype;
 319
 320         /* we don't try 'nodev' entries */
 321         if (strstr(buffer, "nodev"))
 322                 return 0;
 323
 324         fstype = buffer;
 325         fstype += lxc_char_left_gc(fstype, strlen(fstype));
 326         fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0';
 327
 328         DEBUG("Trying to mount \"%s\"->\"%s\" with FSType \"%s\"", cbarg->rootfs,
 329               cbarg->target, fstype);
 330
 331         if (parse_mntopts_legacy(cbarg->options, &mntflags, &mntdata) < 0) {
 332                 free(mntdata);
 333                 return 0;
 334         }
 335
 336         if (mount(cbarg->rootfs, cbarg->target, fstype, mntflags, mntdata)) {
 337                 SYSDEBUG("Failed to mount");
 338                 free(mntdata);
 339                 return 0;
 340         }
 341
 342         free(mntdata);
 343
 344         INFO("Mounted \"%s\" on \"%s\", with FSType \"%s\"", cbarg->rootfs,
 345              cbarg->target, fstype);
 346
 347         return 1;
 348 }
 349
 350 const char *linkderef(const char *path, char *dest)
 351 {
 352         struct stat sbuf;
 353         ssize_t ret;
 354
 355         ret = stat(path, &sbuf);
 356         if (ret < 0) {
 357                 SYSERROR("Failed to get status of file - \"%s\"", path);
 358                 return NULL;
 359         }
 360
 361         if (!S_ISLNK(sbuf.st_mode))
 362                 return path;
 363
 364         ret = readlink(path, dest, PATH_MAX);
 365         if (ret < 0) {
 366                 SYSERROR("Failed to read link of \"%s\"", path);
 367                 return NULL;
 368         } else if (ret >= PATH_MAX) {
 369                 ERROR("The name of link of \"%s\" is too long", path);
 370                 return NULL;
 371         }
 372         dest[ret] = '\0';
 373
 374         return dest;
 375 }
 376
 377 /*
 378  * is an unprivileged user allowed to make this kind of snapshot
 379  */
 380 bool unpriv_snap_allowed(struct lxc_storage *b, const char *t, bool snap,
 381                          bool maybesnap)
 382 {
 383         if (!t) {
 384                 /* New type will be same as original (unless snap && b->type ==
 385                  * dir, in which case it will be overlayfs -- which is also
 386                  * allowed).
 387                  */
 388                 if (strcmp(b->type, "dir") == 0 ||
 389                     strcmp(b->type, "overlay") == 0 ||
 390                     strcmp(b->type, "overlayfs") == 0 ||
 391                     strcmp(b->type, "btrfs") == 0 ||
 392                     strcmp(b->type, "loop") == 0)
 393                         return true;
 394
 395                 return false;
 396         }
 397
 398         /* Unprivileged users can copy and snapshot dir, overlayfs, and loop.
 399          * In particular, not zfs, btrfs, or lvm.
 400          */
 401         if (strcmp(t, "dir") == 0 ||
 402             strcmp(t, "overlay") == 0 ||
 403             strcmp(t, "overlayfs") == 0 ||
 404             strcmp(t, "btrfs") == 0 ||
 405             strcmp(t, "loop") == 0)
 406                 return true;
 407
 408         return false;
 409 }
 410
 411 uint64_t get_fssize(char *s)
 412 {
 413         uint64_t ret;
 414         char *end;
 415
 416         ret = strtoull(s, &end, 0);
 417         if (end == s) {
 418                 ERROR("Invalid blockdev size '%s', using default size", s);
 419                 return 0;
 420         }
 421
 422         while (isblank(*end))
 423                 end++;
 424
 425         if (*end == '\0') {
 426                 ret *= 1024ULL * 1024ULL; /* MB by default */
 427         } else if (*end == 'b' || *end == 'B') {
 428                 ret *= 1ULL;
 429         } else if (*end == 'k' || *end == 'K') {
 430                 ret *= 1024ULL;
 431         } else if (*end == 'm' || *end == 'M') {
 432                 ret *= 1024ULL * 1024ULL;
 433         } else if (*end == 'g' || *end == 'G') {
 434                 ret *= 1024ULL * 1024ULL * 1024ULL;
 435         } else if (*end == 't' || *end == 'T') {
 436                 ret *= 1024ULL * 1024ULL * 1024ULL * 1024ULL;
 437         } else {
 438                 ERROR("Invalid blockdev unit size '%c' in '%s', using default size", *end, s);
 439                 return 0;
 440         }
 441
 442         return ret;
 443 }
 444
 445 bool is_valid_storage_type(const char *type)
 446 {
 447         if (strcmp(type, "dir") == 0 ||
 448             strcmp(type, "btrfs") == 0 ||
 449             strcmp(type, "loop") == 0 ||
 450             strcmp(type, "lvm") == 0 ||
 451             strcmp(type, "nbd") == 0 ||
 452             strcmp(type, "overlay") == 0 ||
 453             strcmp(type, "overlayfs") == 0 ||
 454             strcmp(type, "rbd") == 0 ||
 455             strcmp(type, "zfs") == 0)
 456                 return true;
 457
 458         return false;
 459 }
 460
 461 int storage_destroy_wrapper(void *data)
 462 {
 463         struct lxc_conf *conf = data;
 464
 465         (void)lxc_drop_groups();
 466
 467         if (setgid(0) < 0) {
 468                 SYSERROR("Failed to setgid to 0");
 469                 return -1;
 470         }
 471
 472         if (setuid(0) < 0) {
 473                 SYSERROR("Failed to setuid to 0");
 474                 return -1;
 475         }
 476
 477         if (!storage_destroy(conf)) {
 478                 ERROR("Failed to destroy storage");
 479                 return -1;
 480         }
 481
 482         return 0;
 483 }