]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.h
utils: move recursive_destroy() from cfgsng to utils.
[mirror_lxc.git] / src / lxc / utils.h
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23 #ifndef __LXC_UTILS_H
24 #define __LXC_UTILS_H
25
26 /* Properly support loop devices on 32bit systems. */
27 #define _FILE_OFFSET_BITS 64
28
29 #include "config.h"
30
31 #include <errno.h>
32 #include <stdarg.h>
33 #include <stdio.h>
34 #include <stdbool.h>
35 #include <unistd.h>
36 #include <linux/loop.h>
37 #include <linux/magic.h>
38 #include <linux/types.h>
39 #include <sys/syscall.h>
40 #include <sys/types.h>
41 #include <sys/vfs.h>
42
43 #ifdef HAVE_LINUX_MEMFD_H
44 #include <linux/memfd.h>
45 #endif
46
47 #include "initutils.h"
48
49 /* Define __S_ISTYPE if missing from the C library. */
50 #ifndef __S_ISTYPE
51 #define __S_ISTYPE(mode, mask) (((mode)&S_IFMT) == (mask))
52 #endif
53
54 #if HAVE_LIBCAP
55 #ifndef CAP_SETFCAP
56 #define CAP_SETFCAP 31
57 #endif
58
59 #ifndef CAP_MAC_OVERRIDE
60 #define CAP_MAC_OVERRIDE 32
61 #endif
62
63 #ifndef CAP_MAC_ADMIN
64 #define CAP_MAC_ADMIN 33
65 #endif
66 #endif
67
68 #ifndef PR_CAPBSET_DROP
69 #define PR_CAPBSET_DROP 24
70 #endif
71
72 #ifndef LO_FLAGS_AUTOCLEAR
73 #define LO_FLAGS_AUTOCLEAR 4
74 #endif
75
76 #ifndef CAP_SETUID
77 #define CAP_SETUID 7
78 #endif
79
80 #ifndef CAP_SETGID
81 #define CAP_SETGID 6
82 #endif
83
84 /* needed for cgroup automount checks, regardless of whether we
85 * have included linux/capability.h or not */
86 #ifndef CAP_SYS_ADMIN
87 #define CAP_SYS_ADMIN 21
88 #endif
89
90 #ifndef CGROUP_SUPER_MAGIC
91 #define CGROUP_SUPER_MAGIC 0x27e0eb
92 #endif
93
94 #ifndef CGROUP2_SUPER_MAGIC
95 #define CGROUP2_SUPER_MAGIC 0x63677270
96 #endif
97
98 /* Useful macros */
99 /* Maximum number for 64 bit integer is a string with 21 digits: 2^64 - 1 = 21 */
100 #define LXC_NUMSTRLEN64 21
101 #define LXC_LINELEN 4096
102 #define LXC_IDMAPLEN 4096
103 #define LXC_MAX_BUFFER 4096
104 /* /proc/ = 6
105 * +
106 * <pid-as-str> = LXC_NUMSTRLEN64
107 * +
108 * /fd/ = 4
109 * +
110 * <fd-as-str> = LXC_NUMSTRLEN64
111 * +
112 * \0 = 1
113 */
114 #define LXC_PROC_PID_FD_LEN (6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1)
115
116 /* returns 1 on success, 0 if there were any failures */
117 extern int lxc_rmdir_onedev(const char *path, const char *exclude);
118 extern int get_u16(unsigned short *val, const char *arg, int base);
119 extern int mkdir_p(const char *dir, mode_t mode);
120 extern char *get_rundir(void);
121
122 /* Define getline() if missing from the C library */
123 #ifndef HAVE_GETLINE
124 #ifdef HAVE_FGETLN
125 #include <../include/getline.h>
126 #endif
127 #endif
128
129 #if !defined(__NR_setns) && !defined(__NR_set_ns)
130 #if defined(__x86_64__)
131 #define __NR_setns 308
132 #elif defined(__i386__)
133 #define __NR_setns 346
134 #elif defined(__arm__)
135 #define __NR_setns 375
136 #elif defined(__aarch64__)
137 #define __NR_setns 375
138 #elif defined(__powerpc__)
139 #define __NR_setns 350
140 #elif defined(__s390__)
141 #define __NR_setns 339
142 #endif
143 #endif
144
145 /* Define setns() if missing from the C library */
146 #ifndef HAVE_SETNS
147 static inline int setns(int fd, int nstype)
148 {
149 #ifdef __NR_setns
150 return syscall(__NR_setns, fd, nstype);
151 #elif defined(__NR_set_ns)
152 return syscall(__NR_set_ns, fd, nstype);
153 #else
154 errno = ENOSYS;
155 return -1;
156 #endif
157 }
158 #endif
159
160 /* Define sethostname() if missing from the C library */
161 #ifndef HAVE_SETHOSTNAME
162 static inline int sethostname(const char *name, size_t len)
163 {
164 #ifdef __NR_sethostname
165 return syscall(__NR_sethostname, name, len);
166 #else
167 errno = ENOSYS;
168 return -1;
169 #endif
170 }
171 #endif
172
173 /* Define unshare() if missing from the C library */
174 #ifndef HAVE_UNSHARE
175 static inline int unshare(int flags)
176 {
177 #ifdef __NR_unshare
178 return syscall(__NR_unshare, flags);
179 #else
180 errno = ENOSYS;
181 return -1;
182 #endif
183 }
184 #else
185 extern int unshare(int);
186 #endif
187
188 /* Define signalfd() if missing from the C library */
189 #ifdef HAVE_SYS_SIGNALFD_H
190 # include <sys/signalfd.h>
191 #else
192 /* assume kernel headers are too old */
193 #include <stdint.h>
194 struct signalfd_siginfo
195 {
196 uint32_t ssi_signo;
197 int32_t ssi_errno;
198 int32_t ssi_code;
199 uint32_t ssi_pid;
200 uint32_t ssi_uid;
201 int32_t ssi_fd;
202 uint32_t ssi_tid;
203 uint32_t ssi_band;
204 uint32_t ssi_overrun;
205 uint32_t ssi_trapno;
206 int32_t ssi_status;
207 int32_t ssi_int;
208 uint64_t ssi_ptr;
209 uint64_t ssi_utime;
210 uint64_t ssi_stime;
211 uint64_t ssi_addr;
212 uint8_t __pad[48];
213 };
214
215 # ifndef __NR_signalfd4
216 /* assume kernel headers are too old */
217 # if __i386__
218 # define __NR_signalfd4 327
219 # elif __x86_64__
220 # define __NR_signalfd4 289
221 # elif __powerpc__
222 # define __NR_signalfd4 313
223 # elif __s390x__
224 # define __NR_signalfd4 322
225 # elif __arm__
226 # define __NR_signalfd4 355
227 # elif __mips__ && _MIPS_SIM == _ABIO32
228 # define __NR_signalfd4 4324
229 # elif __mips__ && _MIPS_SIM == _ABI64
230 # define __NR_signalfd4 5283
231 # elif __mips__ && _MIPS_SIM == _ABIN32
232 # define __NR_signalfd4 6287
233 # endif
234 #endif
235
236 # ifndef __NR_signalfd
237 /* assume kernel headers are too old */
238 # if __i386__
239 # define __NR_signalfd 321
240 # elif __x86_64__
241 # define __NR_signalfd 282
242 # elif __powerpc__
243 # define __NR_signalfd 305
244 # elif __s390x__
245 # define __NR_signalfd 316
246 # elif __arm__
247 # define __NR_signalfd 349
248 # elif __mips__ && _MIPS_SIM == _ABIO32
249 # define __NR_signalfd 4317
250 # elif __mips__ && _MIPS_SIM == _ABI64
251 # define __NR_signalfd 5276
252 # elif __mips__ && _MIPS_SIM == _ABIN32
253 # define __NR_signalfd 6280
254 # endif
255 #endif
256
257 static inline int signalfd(int fd, const sigset_t *mask, int flags)
258 {
259 int retval;
260
261 retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
262 if (errno == ENOSYS && flags == 0)
263 retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
264 return retval;
265 }
266 #endif
267
268 /* loop devices */
269 #ifndef LO_FLAGS_AUTOCLEAR
270 #define LO_FLAGS_AUTOCLEAR 4
271 #endif
272
273 #ifndef LOOP_CTL_GET_FREE
274 #define LOOP_CTL_GET_FREE 0x4C82
275 #endif
276
277 /* memfd_create() */
278 #ifndef MFD_CLOEXEC
279 #define MFD_CLOEXEC 0x0001U
280 #endif
281
282 #ifndef MFD_ALLOW_SEALING
283 #define MFD_ALLOW_SEALING 0x0002U
284 #endif
285
286 #ifndef HAVE_MEMFD_CREATE
287 static inline int memfd_create(const char *name, unsigned int flags) {
288 #ifndef __NR_memfd_create
289 #if defined __i386__
290 #define __NR_memfd_create 356
291 #elif defined __x86_64__
292 #define __NR_memfd_create 319
293 #elif defined __arm__
294 #define __NR_memfd_create 385
295 #elif defined __aarch64__
296 #define __NR_memfd_create 279
297 #elif defined __s390__
298 #define __NR_memfd_create 350
299 #elif defined __powerpc__
300 #define __NR_memfd_create 360
301 #elif defined __sparc__
302 #define __NR_memfd_create 348
303 #elif defined __blackfin__
304 #define __NR_memfd_create 390
305 #elif defined __ia64__
306 #define __NR_memfd_create 1340
307 #elif defined _MIPS_SIM
308 #if _MIPS_SIM == _MIPS_SIM_ABI32
309 #define __NR_memfd_create 4354
310 #endif
311 #if _MIPS_SIM == _MIPS_SIM_NABI32
312 #define __NR_memfd_create 6318
313 #endif
314 #if _MIPS_SIM == _MIPS_SIM_ABI64
315 #define __NR_memfd_create 5314
316 #endif
317 #endif
318 #endif
319 #ifdef __NR_memfd_create
320 return syscall(__NR_memfd_create, name, flags);
321 #else
322 errno = ENOSYS;
323 return -1;
324 #endif
325 }
326 #else
327 extern int memfd_create(const char *name, unsigned int flags);
328 #endif
329
330 static inline int lxc_set_cloexec(int fd)
331 {
332 return fcntl(fd, F_SETFD, FD_CLOEXEC);
333 }
334
335 /* Struct to carry child pid from lxc_popen() to lxc_pclose().
336 * Not an opaque struct to allow direct access to the underlying FILE *
337 * (i.e., struct lxc_popen_FILE *file; fgets(buf, sizeof(buf), file->f))
338 * without additional wrappers.
339 */
340 struct lxc_popen_FILE {
341 int pipe;
342 FILE *f;
343 pid_t child_pid;
344 };
345
346 /* popen(command, "re") replacement that restores default signal mask
347 * via sigprocmask(2) (unblocks all signals) after fork(2) but prior to calling exec(3).
348 * In short, popen(command, "re") does pipe() + fork() + exec()
349 * while lxc_popen(command) does pipe() + fork() + sigprocmask() + exec().
350 * Returns pointer to struct lxc_popen_FILE, that should be freed with lxc_pclose().
351 * On error returns NULL.
352 */
353 extern struct lxc_popen_FILE *lxc_popen(const char *command);
354
355 /* pclose() replacement to be used on struct lxc_popen_FILE *,
356 * returned by lxc_popen().
357 * Waits for associated process to terminate, returns its exit status and
358 * frees resources, pointed to by struct lxc_popen_FILE *.
359 */
360 extern int lxc_pclose(struct lxc_popen_FILE *fp);
361
362 /**
363 * BUILD_BUG_ON - break compile if a condition is true.
364 * @condition: the condition which the compiler should know is false.
365 *
366 * If you have some code which relies on certain constants being equal, or
367 * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
368 * detect if someone changes it.
369 *
370 * The implementation uses gcc's reluctance to create a negative array, but
371 * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments
372 * to inline functions). So as a fallback we use the optimizer; if it can't
373 * prove the condition is false, it will cause a link error on the undefined
374 * "__build_bug_on_failed". This error message can be harder to track down
375 * though, hence the two different methods.
376 */
377 #ifndef __OPTIMIZE__
378 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
379 #else
380 extern int __build_bug_on_failed;
381 #define BUILD_BUG_ON(condition) \
382 do { \
383 ((void)sizeof(char[1 - 2*!!(condition)])); \
384 if (condition) __build_bug_on_failed = 1; \
385 } while(0)
386 #endif
387
388 /*
389 * wait on a child we forked
390 */
391 extern int wait_for_pid(pid_t pid);
392 extern int lxc_wait_for_pid_status(pid_t pid);
393
394 /* send and receive buffers completely */
395 extern ssize_t lxc_write_nointr(int fd, const void *buf, size_t count);
396 extern ssize_t lxc_read_nointr(int fd, void *buf, size_t count);
397 extern ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count,
398 const void *expected_buf);
399 #if HAVE_LIBGNUTLS
400 #define SHA_DIGEST_LENGTH 20
401 extern int sha1sum_file(char *fnam, unsigned char *md_value);
402 #endif
403
404 /* read and write whole files */
405 extern int lxc_write_to_file(const char *filename, const void *buf,
406 size_t count, bool add_newline, mode_t mode);
407 extern int lxc_read_from_file(const char *filename, void *buf, size_t count);
408
409 /* convert variadic argument lists to arrays (for execl type argument lists) */
410 extern char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup);
411 extern const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip);
412
413 /* Some simple string functions; if they return pointers, they are allocated
414 * buffers.
415 */
416 extern char *lxc_string_replace(const char *needle, const char *replacement,
417 const char *haystack);
418 extern bool lxc_string_in_array(const char *needle, const char **haystack);
419 extern char *lxc_string_join(const char *sep, const char **parts,
420 bool use_as_prefix);
421 /* Normalize and split path: Leading and trailing / are removed, multiple
422 * / are compactified, .. and . are resolved (.. on the top level is considered
423 * identical to .).
424 * Examples:
425 * / -> { NULL }
426 * foo/../bar -> { bar, NULL }
427 * ../../ -> { NULL }
428 * ./bar/baz/.. -> { bar, NULL }
429 * foo//bar -> { foo, bar, NULL }
430 */
431 extern char **lxc_normalize_path(const char *path);
432 /* remove multiple slashes from the path, e.g. ///foo//bar -> /foo/bar */
433 extern char *lxc_deslashify(const char *path);
434 extern char *lxc_append_paths(const char *first, const char *second);
435 /* Note: the following two functions use strtok(), so they will never
436 * consider an empty element, even if two delimiters are next to
437 * each other.
438 */
439 extern bool lxc_string_in_list(const char *needle, const char *haystack,
440 char sep);
441 extern char **lxc_string_split(const char *string, char sep);
442 extern char **lxc_string_split_and_trim(const char *string, char sep);
443 extern char **lxc_string_split_quoted(char *string);
444 /* Append string to NULL-terminated string array. */
445 extern int lxc_append_string(char ***list, char *entry);
446
447 /* some simple array manipulation utilities */
448 typedef void (*lxc_free_fn)(void *);
449 typedef void *(*lxc_dup_fn)(void *);
450 extern int lxc_grow_array(void ***array, size_t *capacity, size_t new_size,
451 size_t capacity_increment);
452 extern void lxc_free_array(void **array, lxc_free_fn element_free_fn);
453 extern size_t lxc_array_len(void **array);
454
455 extern void **lxc_append_null_to_array(void **array, size_t count);
456 extern void remove_trailing_newlines(char *l);
457
458 /* initialize rand with urandom */
459 extern int randseed(bool);
460
461 /* are we unprivileged with respect to our namespaces */
462 inline static bool am_guest_unpriv(void) {
463 return geteuid() != 0;
464 }
465
466 /* are we unprivileged with respect to init_user_ns */
467 inline static bool am_host_unpriv(void)
468 {
469 FILE *f;
470 uid_t user, host, count;
471 int ret;
472
473 if (geteuid() != 0)
474 return true;
475
476 /* Now: are we in a user namespace? Because then we're also
477 * unprivileged.
478 */
479 f = fopen("/proc/self/uid_map", "r");
480 if (!f) {
481 return false;
482 }
483
484 ret = fscanf(f, "%u %u %u", &user, &host, &count);
485 fclose(f);
486 if (ret != 3) {
487 return false;
488 }
489
490 if (user != 0 || host != 0 || count != UINT32_MAX)
491 return true;
492 return false;
493 }
494
495 /*
496 * parse /proc/self/uid_map to find what @orig maps to
497 */
498 extern uid_t get_ns_uid(uid_t orig);
499
500 extern bool dir_exists(const char *path);
501
502 #define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
503 extern uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval);
504
505 extern int detect_shared_rootfs(void);
506 extern bool detect_ramfs_rootfs(void);
507 extern char *on_path(const char *cmd, const char *rootfs);
508 extern bool file_exists(const char *f);
509 extern bool cgns_supported(void);
510 extern char *choose_init(const char *rootfs);
511 extern int print_to_file(const char *file, const char *content);
512 extern bool switch_to_ns(pid_t pid, const char *ns);
513 extern int is_dir(const char *path);
514 extern char *get_template_path(const char *t);
515 extern int safe_mount(const char *src, const char *dest, const char *fstype,
516 unsigned long flags, const void *data,
517 const char *rootfs);
518 extern int lxc_mount_proc_if_needed(const char *rootfs);
519 extern int open_devnull(void);
520 extern int set_stdfds(int fd);
521 extern int null_stdfds(void);
522 extern int lxc_count_file_lines(const char *fn);
523 extern int lxc_preserve_ns(const int pid, const char *ns);
524
525 /* Check whether a signal is blocked by a process. */
526 extern bool task_blocks_signal(pid_t pid, int signal);
527
528 /* Helper functions to parse numbers. */
529 extern int lxc_safe_uint(const char *numstr, unsigned int *converted);
530 extern int lxc_safe_int(const char *numstr, int *converted);
531 extern int lxc_safe_long(const char *numstr, long int *converted);
532 extern int lxc_safe_long_long(const char *numstr, long long int *converted);
533 extern int lxc_safe_ulong(const char *numstr, unsigned long *converted);
534 extern int lxc_safe_uint64(const char *numstr, uint64_t *converted, int base);
535 /* Handles B, kb, MB, GB. Detects overflows and reports -ERANGE. */
536 extern int parse_byte_size_string(const char *s, int64_t *converted);
537
538 /* Switch to a new uid and gid. */
539 extern int lxc_switch_uid_gid(uid_t uid, gid_t gid);
540 extern int lxc_setgroups(int size, gid_t list[]);
541
542 /* Find an unused loop device and associate it with source. */
543 extern int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags);
544
545 /* Clear all mounts on a given node.
546 * >= 0 successfully cleared. The number returned is the number of umounts
547 * performed.
548 * < 0 error umounting. Return -errno.
549 */
550 extern int lxc_unstack_mountpoint(const char *path, bool lazy);
551
552 /*
553 * run_command runs a command and collect it's std{err,out} output in buf.
554 *
555 * @param[out] buf The buffer where the commands std{err,out] output will be
556 * read into. If no output was produced, buf will be memset
557 * to 0.
558 * @param[in] buf_size The size of buf. This function will reserve one byte for
559 * \0-termination.
560 * @param[in] child_fn The function to be run in the child process. This
561 * function must exec.
562 * @param[in] args Arguments to be passed to child_fn.
563 */
564 extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *),
565 void *args);
566
567 /* Concatenate all passed-in strings into one path. Do not fail. If any piece
568 * is not prefixed with '/', add a '/'.
569 */
570 __attribute__((sentinel)) extern char *must_make_path(const char *first, ...);
571 __attribute__((sentinel)) extern char *must_append_path(char *first, ...);
572
573 /* return copy of string @entry; do not fail. */
574 extern char *must_copy_string(const char *entry);
575
576 /* Re-alllocate a pointer, do not fail */
577 extern void *must_realloc(void *orig, size_t sz);
578
579 /* __typeof__ should be safe to use with all compilers. */
580 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
581 extern bool has_fs_type(const char *path, fs_type_magic magic_val);
582 extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
583 extern bool lxc_nic_exists(char *nic);
584 extern int lxc_make_tmpfile(char *template, bool rm);
585
586 static inline uint64_t lxc_getpagesize(void)
587 {
588 int64_t pgsz;
589
590 pgsz = sysconf(_SC_PAGESIZE);
591 if (pgsz <= 0)
592 pgsz = 1 << 12;
593
594 return pgsz;
595 }
596
597 /* If n is not a power of 2 this function will return the next power of 2
598 * greater than that number. Note that this function always returns the *next*
599 * power of 2 *greater* that number not the *nearest*. For example, passing 1025
600 * as argument this function will return 2048 although the closest power of 2
601 * would be 1024.
602 * If the caller passes in 0 they will receive 0 in return since this is invalid
603 * input and 0 is not a power of 2.
604 */
605 extern uint64_t lxc_find_next_power2(uint64_t n);
606
607 static inline pid_t lxc_raw_gettid(void)
608 {
609 #ifdef SYS_gettid
610 return syscall(SYS_gettid);
611 #else
612 return lxc_raw_getpid();
613 #endif
614 }
615
616 /* Set a signal the child process will receive after the parent has died. */
617 extern int lxc_set_death_signal(int signal);
618 extern int fd_cloexec(int fd, bool cloexec);
619 extern int recursive_destroy(char *dirname);
620
621 #endif /* __LXC_UTILS_H */