#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
#endif
-#include <alloca.h>
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include "config.h"
#include "log.h"
+#include "memory_utils.h"
#include "namespace.h"
#include "utils.h"
return clone_arg->fn(clone_arg->arg);
}
-pid_t lxc_clone(int (*fn)(void *), void *arg, int flags)
+#define __LXC_STACK_SIZE 4096
+pid_t lxc_clone(int (*fn)(void *), void *arg, int flags, int *pidfd)
{
+ pid_t ret;
struct clone_arg clone_arg = {
- .fn = fn,
- .arg = arg,
+ .fn = fn,
+ .arg = arg,
};
+ void *stack;
- size_t stack_size = lxc_getpagesize();
- void *stack = alloca(stack_size);
- pid_t ret;
+ stack = malloc(__LXC_STACK_SIZE);
+ if (!stack) {
+ SYSERROR("Failed to allocate clone stack");
+ return -ENOMEM;
+ }
#ifdef __ia64__
- ret = __clone2(do_clone, stack, stack_size, flags | SIGCHLD, &clone_arg);
+ ret = __clone2(fn, stack, __LXC_STACK_SIZE, flags | SIGCHLD, &clone_arg, pidfd);
#else
- ret = clone(do_clone, stack + stack_size, flags | SIGCHLD, &clone_arg);
+ ret = clone(fn, stack + __LXC_STACK_SIZE, flags | SIGCHLD, &clone_arg, pidfd);
#endif
if (ret < 0)
SYSERROR("Failed to clone (%#x)", flags);
return ret;
}
-/**
- * This is based on raw_clone in systemd but adapted to our needs. This uses
- * copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
- * doesn't really matter to us so disallow it.
- *
- * The nice thing about this is that we get fork() behavior. That is
- * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
- */
-pid_t lxc_raw_clone(unsigned long flags)
-{
-
- /* These flags don't interest at all so we don't jump through any hoopes
- * of retrieving them and passing them to the kernel.
- */
- errno = EINVAL;
- if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
- CLONE_CHILD_CLEARTID | CLONE_SETTLS)))
- return -EINVAL;
-
-#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
- /* On s390/s390x and cris the order of the first and second arguments
- * of the system call is reversed.
- */
- return (int)syscall(__NR_clone, NULL, flags | SIGCHLD);
-#elif defined(__sparc__) && defined(__arch64__)
- {
- /**
- * sparc64 always returns the other process id in %o0, and
- * a boolean flag whether this is the child or the parent in
- * %o1. Inline assembly is needed to get the flag returned
- * in %o1.
- */
- int in_child;
- int child_pid;
- asm volatile("mov %2, %%g1\n\t"
- "mov %3, %%o0\n\t"
- "mov 0 , %%o1\n\t"
- "t 0x6d\n\t"
- "mov %%o1, %0\n\t"
- "mov %%o0, %1"
- : "=r"(in_child), "=r"(child_pid)
- : "i"(__NR_clone), "r"(flags | SIGCHLD)
- : "%o1", "%o0", "%g1");
-
- if (in_child)
- return 0;
- else
- return child_pid;
- }
-#elif defined(__ia64__)
- /* On ia64 the stack and stack size are passed as separate arguments. */
- return (int)syscall(__NR_clone, flags | SIGCHLD, NULL, 0);
-#else
- return (int)syscall(__NR_clone, flags | SIGCHLD, NULL);
-#endif
-}
-
-pid_t lxc_raw_clone_cb(int (*fn)(void *), void *args, unsigned long flags)
-{
- pid_t pid;
-
- pid = lxc_raw_clone(flags);
- if (pid < 0)
- return -1;
-
- /* exit() is not thread-safe and might mess with the parent's signal
- * handlers and other stuff when exec() fails.
- */
- if (pid == 0)
- _exit(fn(args));
-
- return pid;
-}
-
/* Leave the user namespace at the first position in the array of structs so
* that we always attach to it first when iterating over the struct and using
* setns() to switch namespaces. This especially affects lxc_attach(): Suppose