]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/utils.h
seccomp: filter syscalls based on arguments
[mirror_lxc.git] / src / lxc / utils.h
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
f1a4a029
ÇO
23#ifndef __LXC_UTILS_H
24#define __LXC_UTILS_H
0ad19a3f 25
c6868a1f
CB
26/* Properly support loop devices on 32bit systems. */
27#define _FILE_OFFSET_BITS 64
28
a1e5280d
CB
29#include "config.h"
30
6a44839f 31#include <errno.h>
61a1d519 32#include <stdarg.h>
d0386d66 33#include <stdio.h>
502657d5 34#include <stdbool.h>
c6868a1f
CB
35#include <unistd.h>
36#include <linux/loop.h>
29a11a7f 37#include <linux/magic.h>
ec346ea1 38#include <sys/syscall.h>
c797a220 39#include <sys/types.h>
a035c53a 40#include <sys/vfs.h>
f2363e38 41
f749d524
CB
42#ifdef HAVE_LINUX_MEMFD_H
43#include <linux/memfd.h>
44#endif
45
4295c5de 46#include "initutils.h"
c797a220 47
a394f952
CB
48/* Define __S_ISTYPE if missing from the C library. */
49#ifndef __S_ISTYPE
50#define __S_ISTYPE(mode, mask) (((mode)&S_IFMT) == (mask))
51#endif
52
478fb51d
CB
53#if HAVE_LIBCAP
54#ifndef CAP_SETFCAP
55#define CAP_SETFCAP 31
56#endif
57
58#ifndef CAP_MAC_OVERRIDE
59#define CAP_MAC_OVERRIDE 32
60#endif
61
62#ifndef CAP_MAC_ADMIN
63#define CAP_MAC_ADMIN 33
64#endif
65#endif
66
67#ifndef PR_CAPBSET_DROP
68#define PR_CAPBSET_DROP 24
69#endif
70
71#ifndef LO_FLAGS_AUTOCLEAR
72#define LO_FLAGS_AUTOCLEAR 4
73#endif
74
75#ifndef CAP_SETUID
76#define CAP_SETUID 7
77#endif
78
79#ifndef CAP_SETGID
80#define CAP_SETGID 6
81#endif
82
83/* needed for cgroup automount checks, regardless of whether we
84 * have included linux/capability.h or not */
85#ifndef CAP_SYS_ADMIN
86#define CAP_SYS_ADMIN 21
87#endif
88
b07511df
CB
89/* Useful macros */
90/* Maximum number for 64 bit integer is a string with 21 digits: 2^64 - 1 = 21 */
eab15c1e
CB
91#define LXC_NUMSTRLEN64 21
92#define LXC_LINELEN 4096
4bc3b759 93#define LXC_IDMAPLEN 4096
b07511df 94
60bf62d4 95/* returns 1 on success, 0 if there were any failures */
18aa217b 96extern int lxc_rmdir_onedev(char *path, const char *exclude);
7c11d57a 97extern int get_u16(unsigned short *val, const char *arg, int base);
1b09f2c0 98extern int mkdir_p(const char *dir, mode_t mode);
fd8c2777 99extern char *get_rundir(void);
9e60f51d 100
6a44839f
DE
101/* Define getline() if missing from the C library */
102#ifndef HAVE_GETLINE
103#ifdef HAVE_FGETLN
104#include <../include/getline.h>
105#endif
106#endif
107
108/* Define setns() if missing from the C library */
109#ifndef HAVE_SETNS
110static inline int setns(int fd, int nstype)
111{
112#ifdef __NR_setns
113 return syscall(__NR_setns, fd, nstype);
a1258e6d 114#elif defined(__NR_set_ns)
92e23841 115 return syscall(__NR_set_ns, fd, nstype);
6a44839f
DE
116#else
117 errno = ENOSYS;
118 return -1;
119#endif
120}
121#endif
122
123/* Define unshare() if missing from the C library */
124#ifndef HAVE_UNSHARE
125static inline int unshare(int flags)
126{
127#ifdef __NR_unshare
128 return syscall(__NR_unshare, flags);
129#else
130 errno = ENOSYS;
131 return -1;
132#endif
133}
134#else
1a0e70ac 135extern int unshare(int);
6a44839f
DE
136#endif
137
b5159817
DE
138/* Define signalfd() if missing from the C library */
139#ifdef HAVE_SYS_SIGNALFD_H
140# include <sys/signalfd.h>
141#else
142/* assume kernel headers are too old */
143#include <stdint.h>
144struct signalfd_siginfo
145{
146 uint32_t ssi_signo;
147 int32_t ssi_errno;
148 int32_t ssi_code;
149 uint32_t ssi_pid;
150 uint32_t ssi_uid;
151 int32_t ssi_fd;
152 uint32_t ssi_tid;
153 uint32_t ssi_band;
154 uint32_t ssi_overrun;
155 uint32_t ssi_trapno;
156 int32_t ssi_status;
157 int32_t ssi_int;
158 uint64_t ssi_ptr;
159 uint64_t ssi_utime;
160 uint64_t ssi_stime;
161 uint64_t ssi_addr;
162 uint8_t __pad[48];
163};
164
165# ifndef __NR_signalfd4
166/* assume kernel headers are too old */
167# if __i386__
168# define __NR_signalfd4 327
169# elif __x86_64__
170# define __NR_signalfd4 289
171# elif __powerpc__
172# define __NR_signalfd4 313
173# elif __s390x__
174# define __NR_signalfd4 322
180edd67
SG
175# elif __arm__
176# define __NR_signalfd4 355
f53b5916
JC
177# elif __mips__ && _MIPS_SIM == _ABIO32
178# define __NR_signalfd4 4324
179# elif __mips__ && _MIPS_SIM == _ABI64
180# define __NR_signalfd4 5283
181# elif __mips__ && _MIPS_SIM == _ABIN32
182# define __NR_signalfd4 6287
b5159817
DE
183# endif
184#endif
185
186# ifndef __NR_signalfd
187/* assume kernel headers are too old */
188# if __i386__
189# define __NR_signalfd 321
190# elif __x86_64__
191# define __NR_signalfd 282
192# elif __powerpc__
193# define __NR_signalfd 305
194# elif __s390x__
195# define __NR_signalfd 316
180edd67
SG
196# elif __arm__
197# define __NR_signalfd 349
f53b5916
JC
198# elif __mips__ && _MIPS_SIM == _ABIO32
199# define __NR_signalfd 4317
200# elif __mips__ && _MIPS_SIM == _ABI64
201# define __NR_signalfd 5276
202# elif __mips__ && _MIPS_SIM == _ABIN32
203# define __NR_signalfd 6280
b5159817
DE
204# endif
205#endif
206
207static inline int signalfd(int fd, const sigset_t *mask, int flags)
208{
209 int retval;
210
211 retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags);
212 if (errno == ENOSYS && flags == 0)
213 retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8);
214 return retval;
215}
216#endif
217
c6868a1f
CB
218/* loop devices */
219#ifndef LO_FLAGS_AUTOCLEAR
220#define LO_FLAGS_AUTOCLEAR 4
221#endif
222
223#ifndef LOOP_CTL_GET_FREE
224#define LOOP_CTL_GET_FREE 0x4C82
225#endif
226
f749d524
CB
227/* memfd_create() */
228#ifndef MFD_CLOEXEC
229#define MFD_CLOEXEC 0x0001U
230#endif
231
232#ifndef MFD_ALLOW_SEALING
233#define MFD_ALLOW_SEALING 0x0002U
234#endif
235
236#ifndef HAVE_MEMFD_CREATE
237static inline int memfd_create(const char *name, unsigned int flags) {
238 #ifndef __NR_memfd_create
239 #if defined __i386__
240 #define __NR_memfd_create 356
241 #elif defined __x86_64__
242 #define __NR_memfd_create 319
243 #elif defined __arm__
244 #define __NR_memfd_create 385
245 #elif defined __aarch64__
246 #define __NR_memfd_create 279
247 #elif defined __s390__
248 #define __NR_memfd_create 350
249 #elif defined __powerpc__
250 #define __NR_memfd_create 360
251 #elif defined __sparc__
252 #define __NR_memfd_create 348
253 #elif defined __blackfin__
254 #define __NR_memfd_create 390
255 #elif defined __ia64__
256 #define __NR_memfd_create 1340
257 #elif defined _MIPS_SIM
258 #if _MIPS_SIM == _MIPS_SIM_ABI32
259 #define __NR_memfd_create 4354
260 #endif
261 #if _MIPS_SIM == _MIPS_SIM_NABI32
262 #define __NR_memfd_create 6318
263 #endif
264 #if _MIPS_SIM == _MIPS_SIM_ABI64
265 #define __NR_memfd_create 5314
266 #endif
267 #endif
268 #endif
269 #ifdef __NR_memfd_create
270 return syscall(__NR_memfd_create, name, flags);
271 #else
272 errno = ENOSYS;
273 return -1;
274 #endif
275}
276#else
277extern int memfd_create(const char *name, unsigned int flags);
278#endif
279
b499121f
CB
280static inline int lxc_set_cloexec(int fd)
281{
282 return fcntl(fd, F_SETFD, FD_CLOEXEC);
283}
284
ebec9176
AM
285/* Struct to carry child pid from lxc_popen() to lxc_pclose().
286 * Not an opaque struct to allow direct access to the underlying FILE *
287 * (i.e., struct lxc_popen_FILE *file; fgets(buf, sizeof(buf), file->f))
288 * without additional wrappers.
289 */
290struct lxc_popen_FILE {
8bd8018e 291 int pipe;
ebec9176
AM
292 FILE *f;
293 pid_t child_pid;
294};
295
296/* popen(command, "re") replacement that restores default signal mask
297 * via sigprocmask(2) (unblocks all signals) after fork(2) but prior to calling exec(3).
298 * In short, popen(command, "re") does pipe() + fork() + exec()
299 * while lxc_popen(command) does pipe() + fork() + sigprocmask() + exec().
ebec9176
AM
300 * Returns pointer to struct lxc_popen_FILE, that should be freed with lxc_pclose().
301 * On error returns NULL.
302 */
303extern struct lxc_popen_FILE *lxc_popen(const char *command);
304
305/* pclose() replacement to be used on struct lxc_popen_FILE *,
306 * returned by lxc_popen().
307 * Waits for associated process to terminate, returns its exit status and
308 * frees resources, pointed to by struct lxc_popen_FILE *.
ebec9176
AM
309 */
310extern int lxc_pclose(struct lxc_popen_FILE *fp);
311
e51d4895
DE
312/**
313 * BUILD_BUG_ON - break compile if a condition is true.
314 * @condition: the condition which the compiler should know is false.
315 *
316 * If you have some code which relies on certain constants being equal, or
317 * other compile-time-evaluated condition, you should use BUILD_BUG_ON to
318 * detect if someone changes it.
319 *
320 * The implementation uses gcc's reluctance to create a negative array, but
321 * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments
322 * to inline functions). So as a fallback we use the optimizer; if it can't
323 * prove the condition is false, it will cause a link error on the undefined
324 * "__build_bug_on_failed". This error message can be harder to track down
325 * though, hence the two different methods.
326 */
327#ifndef __OPTIMIZE__
328#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
329#else
330extern int __build_bug_on_failed;
331#define BUILD_BUG_ON(condition) \
332 do { \
333 ((void)sizeof(char[1 - 2*!!(condition)])); \
334 if (condition) __build_bug_on_failed = 1; \
335 } while(0)
336#endif
337
9be53773
SH
338/*
339 * wait on a child we forked
340 */
341extern int wait_for_pid(pid_t pid);
c797a220 342extern int lxc_wait_for_pid_status(pid_t pid);
9be53773 343
92f023dc 344/* send and receive buffers completely */
650468bb
CS
345extern ssize_t lxc_write_nointr(int fd, const void* buf, size_t count);
346extern ssize_t lxc_read_nointr(int fd, void* buf, size_t count);
1a0e70ac
CB
347extern ssize_t lxc_read_nointr_expect(int fd, void *buf, size_t count,
348 const void *expected_buf);
3ce74686
SH
349#if HAVE_LIBGNUTLS
350#define SHA_DIGEST_LENGTH 20
351extern int sha1sum_file(char *fnam, unsigned char *md_value);
352#endif
92f023dc 353
0e95426b 354/* read and write whole files */
1a0e70ac
CB
355extern int lxc_write_to_file(const char *filename, const void *buf,
356 size_t count, bool add_newline);
0e95426b 357extern int lxc_read_from_file(const char *filename, void* buf, size_t count);
0e95426b 358
61a1d519
CS
359/* convert variadic argument lists to arrays (for execl type argument lists) */
360extern char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup);
361extern const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip);
362
1a0e70ac
CB
363/* Some simple string functions; if they return pointers, they are allocated
364 * buffers.
365 */
366extern char *lxc_string_replace(const char *needle, const char *replacement,
367 const char *haystack);
502657d5 368extern bool lxc_string_in_array(const char *needle, const char **haystack);
1a0e70ac
CB
369extern char *lxc_string_join(const char *sep, const char **parts,
370 bool use_as_prefix);
502657d5
CS
371/* Normalize and split path: Leading and trailing / are removed, multiple
372 * / are compactified, .. and . are resolved (.. on the top level is considered
373 * identical to .).
374 * Examples:
375 * / -> { NULL }
376 * foo/../bar -> { bar, NULL }
377 * ../../ -> { NULL }
378 * ./bar/baz/.. -> { bar, NULL }
379 * foo//bar -> { foo, bar, NULL }
380 */
381extern char **lxc_normalize_path(const char *path);
aeb3682f 382/* remove multiple slashes from the path, e.g. ///foo//bar -> /foo/bar */
eda0afd4 383extern char *lxc_deslashify(const char *path);
24b51482 384extern char *lxc_append_paths(const char *first, const char *second);
502657d5
CS
385/* Note: the following two functions use strtok(), so they will never
386 * consider an empty element, even if two delimiters are next to
387 * each other.
388 */
1a0e70ac
CB
389extern bool lxc_string_in_list(const char *needle, const char *haystack,
390 char sep);
502657d5
CS
391extern char **lxc_string_split(const char *string, char sep);
392extern char **lxc_string_split_and_trim(const char *string, char sep);
3dca1af0 393extern char **lxc_string_split_quoted(char *string);
000dfda7
CB
394/* Append string to NULL-terminated string array. */
395extern int lxc_append_string(char ***list, char *entry);
502657d5
CS
396
397/* some simple array manipulation utilities */
398typedef void (*lxc_free_fn)(void *);
399typedef void *(*lxc_dup_fn)(void *);
1a0e70ac
CB
400extern int lxc_grow_array(void ***array, size_t *capacity, size_t new_size,
401 size_t capacity_increment);
502657d5
CS
402extern void lxc_free_array(void **array, lxc_free_fn element_free_fn);
403extern size_t lxc_array_len(void **array);
502657d5 404
799f29ab 405extern void **lxc_append_null_to_array(void **array, size_t count);
1adbd020 406
a1e5280d 407/* mmap() wrapper. lxc_strmmap() will take care to \0-terminate files so that
1adbd020 408 * normal string-handling functions can be used on the buffer. */
25086a5f
CB
409extern void *lxc_strmmap(void *addr, size_t length, int prot, int flags, int fd,
410 off_t offset);
a1e5280d 411/* munmap() wrapper. Use it to free memory mmap()ed with lxc_strmmap(). */
25086a5f 412extern int lxc_strmunmap(void *addr, size_t length);
1adbd020 413
1a0e70ac 414/* initialize rand with urandom */
508c263e 415extern int randseed(bool);
052616eb 416
1354955b
SH
417inline static bool am_unpriv(void) {
418 return geteuid() != 0;
419}
5d897655
SH
420
421/*
422 * parse /proc/self/uid_map to find what @orig maps to
423 */
424extern uid_t get_ns_uid(uid_t orig);
c476bdce
SH
425
426extern bool dir_exists(const char *path);
93c379f0
ÇO
427
428#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
1a0e70ac
CB
429extern uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval);
430
431extern int detect_shared_rootfs(void);
432extern bool detect_ramfs_rootfs(void);
433extern char *on_path(const char *cmd, const char *rootfs);
434extern bool file_exists(const char *f);
435extern bool cgns_supported(void);
436extern char *choose_init(const char *rootfs);
437extern int print_to_file(const char *file, const char *content);
438extern bool switch_to_ns(pid_t pid, const char *ns);
439extern int is_dir(const char *path);
440extern char *get_template_path(const char *t);
1a0e70ac
CB
441extern int safe_mount(const char *src, const char *dest, const char *fstype,
442 unsigned long flags, const void *data,
443 const char *rootfs);
444extern int lxc_mount_proc_if_needed(const char *rootfs);
445extern int open_devnull(void);
446extern int set_stdfds(int fd);
447extern int null_stdfds(void);
448extern int lxc_count_file_lines(const char *fn);
449extern int lxc_preserve_ns(const int pid, const char *ns);
330ae3d3
CB
450
451/* Check whether a signal is blocked by a process. */
1a0e70ac 452extern bool task_blocking_signal(pid_t pid, int signal);
6bc2eafe
CB
453
454/* Helper functions to parse numbers. */
1a0e70ac
CB
455extern int lxc_safe_uint(const char *numstr, unsigned int *converted);
456extern int lxc_safe_int(const char *numstr, int *converted);
457extern int lxc_safe_long(const char *numstr, long int *converted);
b037bc67 458extern int lxc_safe_long_long(const char *numstr, long long int *converted);
1a0e70ac 459extern int lxc_safe_ulong(const char *numstr, unsigned long *converted);
e3db0162
CB
460/* Handles B, kb, MB, GB. Detects overflows and reports -ERANGE. */
461extern int parse_byte_size_string(const char *s, int64_t *converted);
6bc2eafe 462
dbaf55a3 463/* Switch to a new uid and gid. */
1a0e70ac
CB
464extern int lxc_switch_uid_gid(uid_t uid, gid_t gid);
465extern int lxc_setgroups(int size, gid_t list[]);
dbaf55a3 466
c6868a1f 467/* Find an unused loop device and associate it with source. */
1a0e70ac 468extern int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags);
c6868a1f 469
74251e49
CB
470/* Clear all mounts on a given node.
471 * >= 0 successfully cleared. The number returned is the number of umounts
472 * performed.
473 * < 0 error umounting. Return -errno.
474 */
1a0e70ac 475extern int lxc_unstack_mountpoint(const char *path, bool lazy);
74251e49 476
ea3a694f
CB
477/*
478 * run_command runs a command and collect it's std{err,out} output in buf.
479 *
480 * @param[out] buf The buffer where the commands std{err,out] output will be
481 * read into. If no output was produced, buf will be memset
482 * to 0.
483 * @param[in] buf_size The size of buf. This function will reserve one byte for
484 * \0-termination.
485 * @param[in] child_fn The function to be run in the child process. This
486 * function must exec.
487 * @param[in] args Arguments to be passed to child_fn.
488 */
1a0e70ac
CB
489extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *),
490 void *args);
ea3a694f 491
04ad7ffe
CB
492/* Concatenate all passed-in strings into one path. Do not fail. If any piece
493 * is not prefixed with '/', add a '/'.
494 */
1a0e70ac 495extern char *must_make_path(const char *first, ...) __attribute__((sentinel));
04ad7ffe
CB
496
497/* return copy of string @entry; do not fail. */
1a0e70ac 498extern char *must_copy_string(const char *entry);
04ad7ffe
CB
499
500/* Re-alllocate a pointer, do not fail */
1a0e70ac 501extern void *must_realloc(void *orig, size_t sz);
04ad7ffe 502
a035c53a
CB
503/* __typeof__ should be safe to use with all compilers. */
504typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
1a0e70ac
CB
505extern bool has_fs_type(const char *path, fs_type_magic magic_val);
506extern bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val);
d75c14e2 507extern bool lxc_nic_exists(char *nic);
127c6e70 508extern int lxc_make_tmpfile(char *template, bool rm);
e4636123 509extern uint64_t lxc_getpagesize(void);
a035c53a 510
6222c3f4
CB
511/* If n is not a power of 2 this function will return the next power of 2
512 * greater than that number. Note that this function always returns the *next*
513 * power of 2 *greater* that number not the *nearest*. For example, passing 1025
514 * as argument this function will return 2048 although the closest power of 2
515 * would be 1024.
516 * If the caller passes in 0 they will receive 0 in return since this is invalid
517 * input and 0 is not a power of 2.
518 */
519extern uint64_t lxc_find_next_power2(uint64_t n);
520
5b72de5f 521#endif /* __LXC_UTILS_H */