Merge branch 'for-4.5-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt

index e8d25e78421454635236295d0d63ad76fd604af4..ff49cf901148d895b765800ec6ddb79c0e38ed53 100644 (file)
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -7,7 +7,7 @@ This is the authoritative documentation on the design, interface and
  conventions of cgroup v2.  It describes all userland-visible aspects
  of cgroup including core and specific controller behaviors.  All
  future changes must be reflected in this document.  Documentation for
-v1 is available under Documentation/cgroup-legacy/.
+v1 is available under Documentation/cgroup-v1/.
  
  CONTENTS
  
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h

index 7f540f7f588d8c8461af975a5ebd21a08e6cf14b..789471dba6fb30f15c752fbca46ddbefe5bfe850 100644 (file)
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -127,6 +127,12 @@ struct cgroup_subsys_state {
          */
         u64 serial_nr;
  
+       /*
+        * Incremented by online self and children.  Used to guarantee that
+        * parents are not offlined before their children.
+        */
+       atomic_t online_cnt;
+
         /* percpu_ref killing and RCU release */
         struct rcu_head rcu_head;
         struct work_struct destroy_work;
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h

index 85a868ccb4931d374a1ee9fb4e4036bb84399561..fea160ee5803fd121d0493f622e240b4c35da480 100644 (file)
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -137,6 +137,8 @@ static inline void set_mems_allowed(nodemask_t nodemask)
         task_unlock(current);
  }
  
+extern void cpuset_post_attach_flush(void);
+
  #else /* !CONFIG_CPUSETS */
  
  static inline bool cpusets_enabled(void) { return false; }
@@ -243,6 +245,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
         return false;
  }
  
+static inline void cpuset_post_attach_flush(void)
+{
+}
+
  #endif /* !CONFIG_CPUSETS */
  
  #endif /* _LINUX_CPUSET_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index c03a640ef6da265db01b93c2970ab6b2da7abd67..d27904c193daa1d8a8680522093254cfde376177 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -58,6 +58,7 @@
  #include <linux/kthread.h>
  #include <linux/delay.h>
  #include <linux/atomic.h>
+#include <linux/cpuset.h>
  #include <net/sock.h>
  
  /*
@@ -2739,6 +2740,7 @@ out_unlock_rcu:
  out_unlock_threadgroup:
         percpu_up_write(&cgroup_threadgroup_rwsem);
         cgroup_kn_unlock(of->kn);
+       cpuset_post_attach_flush();
         return ret ?: nbytes;
  }
  
@@ -4655,14 +4657,15 @@ static void css_free_work_fn(struct work_struct *work)
  
         if (ss) {
                 /* css free path */
+               struct cgroup_subsys_state *parent = css->parent;
                 int id = css->id;
  
-               if (css->parent)
-                       css_put(css->parent);
-
                 ss->css_free(css);
                 cgroup_idr_remove(&ss->css_idr, id);
                 cgroup_put(cgrp);
+
+               if (parent)
+                       css_put(parent);
         } else {
                 /* cgroup free path */
                 atomic_dec(&cgrp->root->nr_cgrps);
@@ -4758,6 +4761,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
         INIT_LIST_HEAD(&css->sibling);
         INIT_LIST_HEAD(&css->children);
         css->serial_nr = css_serial_nr_next++;
+       atomic_set(&css->online_cnt, 0);
  
         if (cgroup_parent(cgrp)) {
                 css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -4780,6 +4784,10 @@ static int online_css(struct cgroup_subsys_state *css)
         if (!ret) {
                 css->flags |= CSS_ONLINE;
                 rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
+
+               atomic_inc(&css->online_cnt);
+               if (css->parent)
+                       atomic_inc(&css->parent->online_cnt);
         }
         return ret;
  }
@@ -5017,10 +5025,15 @@ static void css_killed_work_fn(struct work_struct *work)
                 container_of(work, struct cgroup_subsys_state, destroy_work);
  
         mutex_lock(&cgroup_mutex);
-       offline_css(css);
-       mutex_unlock(&cgroup_mutex);
  
-       css_put(css);
+       do {
+               offline_css(css);
+               css_put(css);
+               /* @css can't go away while we're holding cgroup_mutex */
+               css = css->parent;
+       } while (css && atomic_dec_and_test(&css->online_cnt));
+
+       mutex_unlock(&cgroup_mutex);
  }
  
  /* css kill confirmation processing requires process context, bounce */
@@ -5029,8 +5042,10 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
         struct cgroup_subsys_state *css =
                 container_of(ref, struct cgroup_subsys_state, refcnt);
  
-       INIT_WORK(&css->destroy_work, css_killed_work_fn);
-       queue_work(cgroup_destroy_wq, &css->destroy_work);
+       if (atomic_dec_and_test(&css->online_cnt)) {
+               INIT_WORK(&css->destroy_work, css_killed_work_fn);
+               queue_work(cgroup_destroy_wq, &css->destroy_work);
+       }
  }
  
  /**
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 3e945fcd81796f954a7e1c81ae95e78bd1a91dba..41989ab4db571cbf93d1a12738bc9afc3411e019 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -287,6 +287,8 @@ static struct cpuset top_cpuset = {
  static DEFINE_MUTEX(cpuset_mutex);
  static DEFINE_SPINLOCK(callback_lock);
  
+static struct workqueue_struct *cpuset_migrate_mm_wq;
+
  /*
   * CPU / memory hotplug is handled asynchronously.
   */
@@ -972,31 +974,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
  }
  
  /*
- * cpuset_migrate_mm
- *
- *    Migrate memory region from one set of nodes to another.
- *
- *    Temporarilly set tasks mems_allowed to target nodes of migration,
- *    so that the migration code can allocate pages on these nodes.
- *
- *    While the mm_struct we are migrating is typically from some
- *    other task, the task_struct mems_allowed that we are hacking
- *    is for our current task, which must allocate new pages for that
- *    migrating memory region.
+ * Migrate memory region from one set of nodes to another.  This is
+ * performed asynchronously as it can be called from process migration path
+ * holding locks involved in process management.  All mm migrations are
+ * performed in the queued order and can be waited for by flushing
+ * cpuset_migrate_mm_wq.
   */
  
+struct cpuset_migrate_mm_work {
+       struct work_struct      work;
+       struct mm_struct        *mm;
+       nodemask_t              from;
+       nodemask_t              to;
+};
+
+static void cpuset_migrate_mm_workfn(struct work_struct *work)
+{
+       struct cpuset_migrate_mm_work *mwork =
+               container_of(work, struct cpuset_migrate_mm_work, work);
+
+       /* on a wq worker, no need to worry about %current's mems_allowed */
+       do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
+       mmput(mwork->mm);
+       kfree(mwork);
+}
+
  static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
                                                         const nodemask_t *to)
  {
-       struct task_struct *tsk = current;
-
-       tsk->mems_allowed = *to;
+       struct cpuset_migrate_mm_work *mwork;
  
-       do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
+       mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
+       if (mwork) {
+               mwork->mm = mm;
+               mwork->from = *from;
+               mwork->to = *to;
+               INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
+               queue_work(cpuset_migrate_mm_wq, &mwork->work);
+       } else {
+               mmput(mm);
+       }
+}
  
-       rcu_read_lock();
-       guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed);
-       rcu_read_unlock();
+void cpuset_post_attach_flush(void)
+{
+       flush_workqueue(cpuset_migrate_mm_wq);
  }
  
  /*
@@ -1097,7 +1119,8 @@ static void update_tasks_nodemask(struct cpuset *cs)
                 mpol_rebind_mm(mm, &cs->mems_allowed);
                 if (migrate)
                         cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
-               mmput(mm);
+               else
+                       mmput(mm);
         }
         css_task_iter_end(&it);
  
@@ -1545,11 +1568,11 @@ static void cpuset_attach(struct cgroup_taskset *tset)
                          * @old_mems_allowed is the right nodesets that we
                          * migrate mm from.
                          */
-                       if (is_memory_migrate(cs)) {
+                       if (is_memory_migrate(cs))
                                 cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
                                                   &cpuset_attach_nodemask_to);
-                       }
-                       mmput(mm);
+                       else
+                               mmput(mm);
                 }
         }
  
@@ -1714,6 +1737,7 @@ out_unlock:
         mutex_unlock(&cpuset_mutex);
         kernfs_unbreak_active_protection(of->kn);
         css_put(&cs->css);
+       flush_workqueue(cpuset_migrate_mm_wq);
         return retval ?: nbytes;
  }
  
@@ -2359,6 +2383,9 @@ void __init cpuset_init_smp(void)
         top_cpuset.effective_mems = node_states[N_MEMORY];
  
         register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
+
+       cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
+       BUG_ON(!cpuset_migrate_mm_wq);
  }
  
  /**
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 10 Feb 2016 19:36:19 +0000 (11:36 -0800)
Documentation/cgroup-v2.txt		patch \| blob \| blame \| history
include/linux/cgroup-defs.h		patch \| blob \| blame \| history
include/linux/cpuset.h		patch \| blob \| blame \| history
kernel/cgroup.c		patch \| blob \| blame \| history
kernel/cpuset.c		patch \| blob \| blame \| history