]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - kernel/fork.c
reset: remove remaining WARN_ON() in <linux/reset.h>
[mirror_ubuntu-bionic-kernel.git] / kernel / fork.c
index 2295fc69717f6c3d877ef3cac15b55336d7746c6..cc2e7cfb312c40f7f3ff519a7b41eac89af08804 100644 (file)
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/task.h>
+#ifdef CONFIG_USER_NS
+extern int unprivileged_userns_clone;
+#else
+#define unprivileged_userns_clone 0
+#endif
 
 /*
  * Minimum number of threads to boot the kernel
@@ -215,10 +220,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
                if (!s)
                        continue;
 
-#ifdef CONFIG_DEBUG_KMEMLEAK
                /* Clear stale pointers from reused stack. */
                memset(s->addr, 0, THREAD_SIZE);
-#endif
+
                tsk->stack_vm_area = s;
                return s->addr;
        }
@@ -302,11 +306,38 @@ struct kmem_cache *files_cachep;
 struct kmem_cache *fs_cachep;
 
 /* SLAB cache for vm_area_struct structures */
-struct kmem_cache *vm_area_cachep;
+static struct kmem_cache *vm_area_cachep;
 
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
+{
+       struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+
+       if (vma) {
+               vma->vm_mm = mm;
+               INIT_LIST_HEAD(&vma->anon_vma_chain);
+       }
+       return vma;
+}
+
+struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
+{
+       struct vm_area_struct *new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+
+       if (new) {
+               *new = *orig;
+               INIT_LIST_HEAD(&new->anon_vma_chain);
+       }
+       return new;
+}
+
+void vm_area_free(struct vm_area_struct *vma)
+{
+       kmem_cache_free(vm_area_cachep, vma);
+}
+
 static void account_kernel_stack(struct task_struct *tsk, int account)
 {
        void *stack = task_stack_page(tsk);
@@ -643,6 +674,14 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                        continue;
                }
                charge = 0;
+               /*
+                * Don't duplicate many vmas if we've been oom-killed (for
+                * example)
+                */
+               if (fatal_signal_pending(current)) {
+                       retval = -EINTR;
+                       goto out;
+               }
                if (mpnt->vm_flags & VM_ACCOUNT) {
                        unsigned long len = vma_pages(mpnt);
 
@@ -650,11 +689,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                                goto fail_nomem;
                        charge = len;
                }
-               tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+               tmp = vm_area_dup(mpnt);
                if (!tmp)
                        goto fail_nomem;
-               *tmp = *mpnt;
-               INIT_LIST_HEAD(&tmp->anon_vma_chain);
                retval = vma_dup_policy(mpnt, tmp);
                if (retval)
                        goto fail_nomem_policy;
@@ -676,7 +713,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                        struct inode *inode = file_inode(file);
                        struct address_space *mapping = file->f_mapping;
 
-                       get_file(file);
+                       vma_get_file(tmp);
                        if (tmp->vm_flags & VM_DENYWRITE)
                                atomic_dec(&inode->i_writecount);
                        i_mmap_lock_write(mapping);
@@ -733,7 +770,7 @@ fail_uprobe_end:
 fail_nomem_anon_vma_fork:
        mpol_put(vma_policy(tmp));
 fail_nomem_policy:
-       kmem_cache_free(vm_area_cachep, tmp);
+       vm_area_free(tmp);
 fail_nomem:
        retval = -ENOMEM;
        vm_unacct_memory(charge);
@@ -1347,7 +1384,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
                return -ENOMEM;
 
        atomic_set(&sig->count, 1);
+       spin_lock_irq(&current->sighand->siglock);
        memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+       spin_unlock_irq(&current->sighand->siglock);
        return 0;
 }
 
@@ -1550,6 +1589,10 @@ static __latent_entropy struct task_struct *copy_process(
        if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
                return ERR_PTR(-EINVAL);
 
+       if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+               if (!capable(CAP_SYS_ADMIN))
+                       return ERR_PTR(-EPERM);
+
        /*
         * Thread groups must share signals as well, and detached threads
         * can only be started up within the thread group.
@@ -1667,8 +1710,6 @@ static __latent_entropy struct task_struct *copy_process(
 
        posix_cpu_timers_init(p);
 
-       p->start_time = ktime_get_ns();
-       p->real_start_time = ktime_get_boot_ns();
        p->io_context = NULL;
        p->audit_context = NULL;
        cgroup_fork(p);
@@ -1717,6 +1758,9 @@ static __latent_entropy struct task_struct *copy_process(
        p->sequential_io        = 0;
        p->sequential_io_avg    = 0;
 #endif
+#ifdef CONFIG_SECURITY
+       p->security = NULL;
+#endif
 
        /* Perform scheduler related setup. Assign this task to a CPU. */
        retval = sched_fork(clone_flags, p);
@@ -1832,6 +1876,17 @@ static __latent_entropy struct task_struct *copy_process(
        if (retval)
                goto bad_fork_free_pid;
 
+       /*
+        * From this point on we must avoid any synchronous user-space
+        * communication until we take the tasklist-lock. In particular, we do
+        * not want user-space to be able to predict the process start-time by
+        * stalling fork(2) after we recorded the start_time but before it is
+        * visible to the system.
+        */
+
+       p->start_time = ktime_get_ns();
+       p->real_start_time = ktime_get_boot_ns();
+
        /*
         * Make it visible to the rest of the system, but dont wake it up yet.
         * Need tasklist lock for parent etc handling!
@@ -2343,6 +2398,12 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
        if (unshare_flags & CLONE_NEWNS)
                unshare_flags |= CLONE_FS;
 
+       if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+               err = -EPERM;
+               if (!capable(CAP_SYS_ADMIN))
+                       goto bad_unshare_out;
+       }
+
        err = check_unshare_flags(unshare_flags);
        if (err)
                goto bad_unshare_out;