Merge tag 'for-linus-20181102' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 2 Nov 2018 18:25:48 +0000 (11:25 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 2 Nov 2018 18:25:48 +0000 (11:25 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Nov 2018 18:25:48 +0000 (11:25 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Nov 2018 18:25:48 +0000 (11:25 -0700)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst

index 8384c681a4b2e0cb88595b0e95eb6660ab5d7857..476722b7b6367ca38bf0e3263f3e132b515dcfd6 100644 (file)
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1879,10 +1879,8 @@ following two functions.
  
    wbc_init_bio(@wbc, @bio)
         Should be called for each bio carrying writeback data and
-       associates the bio with the inode's owner cgroup and the
-       corresponding request queue.  This must be called after
-       a queue (device) has been associated with the bio and
-       before submission.
+       associates the bio with the inode's owner cgroup.  Can be
+       called anytime between bio allocation and submission.
  
    wbc_account_io(@wbc, @page, @bytes)
         Should be called for each data segment being written out.
@@ -1901,7 +1899,7 @@ the configuration, the bio may be executed at a lower priority and if
  the writeback session is holding shared resources, e.g. a journal
  entry, may lead to priority inversion.  There is no one easy solution
  for the problem.  Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
+cases by skipping wbc_init_bio() or using bio_associate_blkcg()
  directly.
  
  
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c

index d9a7916ff0ab6474a6f4abac2873a6685ad4d467..9fe5952d117d553f12f32055fde8683c554b06a8 100644 (file)
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
         uint64_t serial_nr;
  
         rcu_read_lock();
-       serial_nr = __bio_blkcg(bio)->css.serial_nr;
+       serial_nr = bio_blkcg(bio)->css.serial_nr;
  
         /*
          * Check whether blkcg has changed.  The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
         if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
                 goto out;
  
-       bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+       bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
         /*
          * Update blkg_path for bfq_log_* functions. We cache this
          * path, and update it here, for the following
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c

index 6075100f03a50a73da838b19891b923d0ad422a7..3a27d31fcda60250854ced98c68fe3db32ac587c 100644 (file)
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
  
         rcu_read_lock();
  
-       bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
+       bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
         if (!bfqg) {
                 bfqq = &bfqd->oom_bfqq;
                 goto out;
diff --git a/block/bio.c b/block/bio.c

index c27f77befbacc70a36e194f5f2427e20cde123af..d5368a4455613452972e0b8b06ea3e1ceacb335e 100644 (file)
--- a/block/bio.c
+++ b/block/bio.c
@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
         bio->bi_iter = bio_src->bi_iter;
         bio->bi_io_vec = bio_src->bi_io_vec;
  
-       bio_clone_blkg_association(bio, bio_src);
-
-       blkcg_bio_issue_init(bio);
+       bio_clone_blkcg_association(bio, bio_src);
  }
  EXPORT_SYMBOL(__bio_clone_fast);
  
@@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src);
  
  #ifdef CONFIG_BLK_CGROUP
  
-/**
- * bio_associate_blkg - associate a bio with the a blkg
- * @bio: target bio
- * @blkg: the blkg to associate
- *
- * This tries to associate @bio with the specified blkg.  Association failure
- * is handled by walking up the blkg tree.  Therefore, the blkg associated can
- * be anything between @blkg and the root_blkg.  This situation only happens
- * when a cgroup is dying and then the remaining bios will spill to the closest
- * alive blkg.
- *
- * A reference will be taken on the @blkg and will be released when @bio is
- * freed.
- */
-int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
-{
-       if (unlikely(bio->bi_blkg))
-               return -EBUSY;
-       bio->bi_blkg = blkg_tryget_closest(blkg);
-       return 0;
-}
-
-/**
- * __bio_associate_blkg_from_css - internal blkg association function
- *
- * This in the core association function that all association paths rely on.
- * A blkg reference is taken which is released upon freeing of the bio.
- */
-static int __bio_associate_blkg_from_css(struct bio *bio,
-                                        struct cgroup_subsys_state *css)
-{
-       struct request_queue *q = bio->bi_disk->queue;
-       struct blkcg_gq *blkg;
-       int ret;
-
-       rcu_read_lock();
-
-       if (!css || !css->parent)
-               blkg = q->root_blkg;
-       else
-               blkg = blkg_lookup_create(css_to_blkcg(css), q);
-
-       ret = bio_associate_blkg(bio, blkg);
-
-       rcu_read_unlock();
-       return ret;
-}
-
-/**
- * bio_associate_blkg_from_css - associate a bio with a specified css
- * @bio: target bio
- * @css: target css
- *
- * Associate @bio with the blkg found by combining the css's blkg and the
- * request_queue of the @bio.  This falls back to the queue's root_blkg if
- * the association fails with the css.
- */
-int bio_associate_blkg_from_css(struct bio *bio,
-                               struct cgroup_subsys_state *css)
-{
-       if (unlikely(bio->bi_blkg))
-               return -EBUSY;
-       return __bio_associate_blkg_from_css(bio, css);
-}
-EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
-
  #ifdef CONFIG_MEMCG
  /**
- * bio_associate_blkg_from_page - associate a bio with the page's blkg
+ * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
   * @bio: target bio
   * @page: the page to lookup the blkcg from
   *
- * Associate @bio with the blkg from @page's owning memcg and the respective
- * request_queue.  If cgroup_e_css returns NULL, fall back to the queue's
- * root_blkg.
- *
- * Note: this must be called after bio has an associated device.
+ * Associate @bio with the blkcg from @page's owning memcg.  This works like
+ * every other associate function wrt references.
   */
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
  {
-       struct cgroup_subsys_state *css;
-       int ret;
+       struct cgroup_subsys_state *blkcg_css;
  
-       if (unlikely(bio->bi_blkg))
+       if (unlikely(bio->bi_css))
                 return -EBUSY;
         if (!page->mem_cgroup)
                 return 0;
-
-       rcu_read_lock();
-
-       css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
-
-       ret = __bio_associate_blkg_from_css(bio, css);
-
-       rcu_read_unlock();
-       return ret;
+       blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
+                                    &io_cgrp_subsys);
+       bio->bi_css = blkcg_css;
+       return 0;
  }
  #endif /* CONFIG_MEMCG */
  
  /**
- * bio_associate_create_blkg - associate a bio with a blkg from q
- * @q: request_queue where bio is going
+ * bio_associate_blkcg - associate a bio with the specified blkcg
   * @bio: target bio
+ * @blkcg_css: css of the blkcg to associate
+ *
+ * Associate @bio with the blkcg specified by @blkcg_css.  Block layer will
+ * treat @bio as if it were issued by a task which belongs to the blkcg.
   *
- * Associate @bio with the blkg found from the bio's css and the request_queue.
- * If one is not found, bio_lookup_blkg creates the blkg.  This falls back to
- * the queue's root_blkg if association fails.
+ * This function takes an extra reference of @blkcg_css which will be put
+ * when @bio is released.  The caller must own @bio and is responsible for
+ * synchronizing calls to this function.
   */
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
  {
-       struct cgroup_subsys_state *css;
-       int ret = 0;
-
-       /* someone has already associated this bio with a blkg */
-       if (bio->bi_blkg)
-               return ret;
-
-       rcu_read_lock();
-
-       css = blkcg_css();
-
-       ret = __bio_associate_blkg_from_css(bio, css);
-
-       rcu_read_unlock();
-       return ret;
+       if (unlikely(bio->bi_css))
+               return -EBUSY;
+       css_get(blkcg_css);
+       bio->bi_css = blkcg_css;
+       return 0;
  }
+EXPORT_SYMBOL_GPL(bio_associate_blkcg);
  
  /**
- * bio_reassociate_blkg - reassociate a bio with a blkg from q
- * @q: request_queue where bio is going
+ * bio_associate_blkg - associate a bio with the specified blkg
   * @bio: target bio
+ * @blkg: the blkg to associate
   *
- * When submitting a bio, multiple recursive calls to make_request() may occur.
- * This causes the initial associate done in blkcg_bio_issue_check() to be
- * incorrect and reference the prior request_queue.  This performs reassociation
- * when this situation happens.
+ * Associate @bio with the blkg specified by @blkg.  This is the queue specific
+ * blkcg information associated with the @bio, a reference will be taken on the
+ * @blkg and will be freed when the bio is freed.
   */
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
  {
-       if (bio->bi_blkg) {
-               blkg_put(bio->bi_blkg);
-               bio->bi_blkg = NULL;
-       }
-
-       return bio_associate_create_blkg(q, bio);
+       if (unlikely(bio->bi_blkg))
+               return -EBUSY;
+       if (!blkg_try_get(blkg))
+               return -ENODEV;
+       bio->bi_blkg = blkg;
+       return 0;
  }
  
  /**
@@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio)
                 put_io_context(bio->bi_ioc);
                 bio->bi_ioc = NULL;
         }
+       if (bio->bi_css) {
+               css_put(bio->bi_css);
+               bio->bi_css = NULL;
+       }
         if (bio->bi_blkg) {
                 blkg_put(bio->bi_blkg);
                 bio->bi_blkg = NULL;
@@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio)
  }
  
  /**
- * bio_clone_blkg_association - clone blkg association from src to dst bio
+ * bio_clone_blkcg_association - clone blkcg association from src to dst bio
   * @dst: destination bio
   * @src: source bio
   */
-void bio_clone_blkg_association(struct bio *dst, struct bio *src)
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
  {
-       if (src->bi_blkg)
-               bio_associate_blkg(dst, src->bi_blkg);
+       if (src->bi_css)
+               WARN_ON(bio_associate_blkcg(dst, src->bi_css));
  }
-EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
+EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
  #endif /* CONFIG_BLK_CGROUP */
  
  static void __init biovec_init_slabs(void)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index 992da5592c6ed14208116b794975c75c2b3986a1..c630e02836a80d7d406778208c659aebda8fcf06 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
         kfree(blkg);
  }
  
-static void __blkg_release(struct rcu_head *rcu)
-{
-       struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
-
-       percpu_ref_exit(&blkg->refcnt);
-
-       /* release the blkcg and parent blkg refs this blkg has been holding */
-       css_put(&blkg->blkcg->css);
-       if (blkg->parent)
-               blkg_put(blkg->parent);
-
-       wb_congested_put(blkg->wb_congested);
-
-       blkg_free(blkg);
-}
-
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid.  For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-static void blkg_release(struct percpu_ref *ref)
-{
-       struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
-
-       call_rcu(&blkg->rcu_head, __blkg_release);
-}
-
  /**
   * blkg_alloc - allocate a blkg
   * @blkcg: block cgroup the new blkg is associated with
@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
         blkg->q = q;
         INIT_LIST_HEAD(&blkg->q_node);
         blkg->blkcg = blkcg;
+       atomic_set(&blkg->refcnt, 1);
  
         /* root blkg uses @q->root_rl, init rl only for !root blkgs */
         if (blkcg != &blkcg_root) {
@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                 blkg_get(blkg->parent);
         }
  
-       ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
-                             GFP_NOWAIT | __GFP_NOWARN);
-       if (ret)
-               goto err_cancel_ref;
-
         /* invoke per-policy init */
         for (i = 0; i < BLKCG_MAX_POLS; i++) {
                 struct blkcg_policy *pol = blkcg_policy[i];
@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
         blkg_put(blkg);
         return ERR_PTR(ret);
  
-err_cancel_ref:
-       percpu_ref_exit(&blkg->refcnt);
  err_put_congested:
         wb_congested_put(wb_congested);
  err_put_css:
@@ -296,7 +259,7 @@ err_free_blkg:
  }
  
  /**
- * __blkg_lookup_create - lookup blkg, try to create one if not there
+ * blkg_lookup_create - lookup blkg, try to create one if not there
   * @blkcg: blkcg of interest
   * @q: request_queue of interest
   *
@@ -305,11 +268,12 @@ err_free_blkg:
   * that all non-root blkg's have access to the parent blkg.  This function
   * should be called under RCU read lock and @q->queue_lock.
   *
- * Returns the blkg or the closest blkg if blkg_create fails as it walks
- * down from root.
+ * Returns pointer to the looked up or created blkg on success, ERR_PTR()
+ * value on error.  If @q is dead, returns ERR_PTR(-EINVAL).  If @q is not
+ * dead and bypassing, returns ERR_PTR(-EBUSY).
   */
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-                                     struct request_queue *q)
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+                                   struct request_queue *q)
  {
         struct blkcg_gq *blkg;
  
@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
          * we shouldn't allow anything to go through for a bypassing queue.
          */
         if (unlikely(blk_queue_bypass(q)))
-               return q->root_blkg;
+               return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
  
         blkg = __blkg_lookup(blkcg, q, true);
         if (blkg)
@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
  
         /*
          * Create blkgs walking down from blkcg_root to @blkcg, so that all
-        * non-root blkgs have access to their parents.  Returns the closest
-        * blkg to the intended blkg should blkg_create() fail.
+        * non-root blkgs have access to their parents.
          */
         while (true) {
                 struct blkcg *pos = blkcg;
                 struct blkcg *parent = blkcg_parent(blkcg);
-               struct blkcg_gq *ret_blkg = q->root_blkg;
-
-               while (parent) {
-                       blkg = __blkg_lookup(parent, q, false);
-                       if (blkg) {
-                               /* remember closest blkg */
-                               ret_blkg = blkg;
-                               break;
-                       }
+
+               while (parent && !__blkg_lookup(parent, q, false)) {
                         pos = parent;
                         parent = blkcg_parent(parent);
                 }
  
                 blkg = blkg_create(pos, q, NULL);
-               if (IS_ERR(blkg))
-                       return ret_blkg;
-               if (pos == blkcg)
+               if (pos == blkcg || IS_ERR(blkg))
                         return blkg;
         }
  }
  
-/**
- * blkg_lookup_create - find or create a blkg
- * @blkcg: target block cgroup
- * @q: target request_queue
- *
- * This looks up or creates the blkg representing the unique pair
- * of the blkcg and the request_queue.
- */
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
-                                   struct request_queue *q)
-{
-       struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
-       unsigned long flags;
-
-       if (unlikely(!blkg)) {
-               spin_lock_irqsave(q->queue_lock, flags);
-
-               blkg = __blkg_lookup_create(blkcg, q);
-
-               spin_unlock_irqrestore(q->queue_lock, flags);
-       }
-
-       return blkg;
-}
-
  static void blkg_destroy(struct blkcg_gq *blkg)
  {
         struct blkcg *blkcg = blkg->blkcg;
@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
          * Put the reference taken at the time of creation so that when all
          * queues are gone, group can be destroyed.
          */
-       percpu_ref_kill(&blkg->refcnt);
+       blkg_put(blkg);
  }
  
  /**
@@ -451,6 +380,29 @@ static void blkg_destroy_all(struct request_queue *q)
         q->root_rl.blkg = NULL;
  }
  
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid.  For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+void __blkg_release_rcu(struct rcu_head *rcu_head)
+{
+       struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
+
+       /* release the blkcg and parent blkg refs this blkg has been holding */
+       css_put(&blkg->blkcg->css);
+       if (blkg->parent)
+               blkg_put(blkg->parent);
+
+       wb_congested_put(blkg->wb_congested);
+
+       blkg_free(blkg);
+}
+EXPORT_SYMBOL_GPL(__blkg_release_rcu);
+
  /*
   * The next function used by blk_queue_for_each_rl().  It's a bit tricky
   * because the root blkg uses @q->root_rl instead of its own rl.
@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
         blkg = blkg_lookup(blkcg, q);
         if (!blkg)
                 goto out;
-       if (!blkg_tryget(blkg))
+       blkg = blkg_try_get(blkg);
+       if (!blkg)
                 goto out;
         rcu_read_unlock();
  
diff --git a/block/blk-core.c b/block/blk-core.c

index bc6ea87d10e02cffcaedec7cc9d4567d88cdd6b6..ce12515f9b9b9930da4515ed8f70a4cf5f5b946f 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -785,6 +785,9 @@ void blk_cleanup_queue(struct request_queue *q)
          * prevent that q->request_fn() gets invoked after draining finished.
          */
         blk_freeze_queue(q);
+
+       rq_qos_exit(q);
+
         spin_lock_irq(lock);
         queue_flag_set(QUEUE_FLAG_DEAD, q);
         spin_unlock_irq(lock);
@@ -2432,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
                         if (q)
                                 blk_queue_exit(q);
                         q = bio->bi_disk->queue;
-                       bio_reassociate_blkg(q, bio);
                         flags = 0;
                         if (bio->bi_opf & REQ_NOWAIT)
                                 flags = BLK_MQ_REQ_NOWAIT;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c

index 28f80d22752858a2b1fcdfefb5f079469ec480ee..38c35c32aff2dcf3fc0e9ac294a649f0be4a1cb1 100644 (file)
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -482,12 +482,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
                                      spinlock_t *lock)
  {
         struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
-       struct blkcg_gq *blkg = bio->bi_blkg;
+       struct blkcg *blkcg;
+       struct blkcg_gq *blkg;
+       struct request_queue *q = rqos->q;
         bool issue_as_root = bio_issue_as_root_blkg(bio);
  
         if (!blk_iolatency_enabled(blkiolat))
                 return;
  
+       rcu_read_lock();
+       blkcg = bio_blkcg(bio);
+       bio_associate_blkcg(bio, &blkcg->css);
+       blkg = blkg_lookup(blkcg, q);
+       if (unlikely(!blkg)) {
+               if (!lock)
+                       spin_lock_irq(q->queue_lock);
+               blkg = blkg_lookup_create(blkcg, q);
+               if (IS_ERR(blkg))
+                       blkg = NULL;
+               if (!lock)
+                       spin_unlock_irq(q->queue_lock);
+       }
+       if (!blkg)
+               goto out;
+
+       bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+       bio_associate_blkg(bio, blkg);
+out:
+       rcu_read_unlock();
         while (blkg && blkg->parent) {
                 struct iolatency_grp *iolat = blkg_to_lat(blkg);
                 if (!iolat) {
@@ -708,7 +730,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
                  * We could be exiting, don't access the pd unless we have a
                  * ref on the blkg.
                  */
-               if (!blkg_tryget(blkg))
+               if (!blkg_try_get(blkg))
                         continue;
  
                 iolat = blkg_to_lat(blkg);
diff --git a/block/blk-merge.c b/block/blk-merge.c

index 42a46744c11b45e4970bbe8a918fcf8b29d895d8..6b5ad275ed565de274746b1ef473f46a3a20a621 100644 (file)
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -714,6 +714,31 @@ static void blk_account_io_merge(struct request *req)
                 part_stat_unlock();
         }
  }
+/*
+ * Two cases of handling DISCARD merge:
+ * If max_discard_segments > 1, the driver takes every bio
+ * as a range and send them to controller together. The ranges
+ * needn't to be contiguous.
+ * Otherwise, the bios/requests will be handled as same as
+ * others which should be contiguous.
+ */
+static inline bool blk_discard_mergable(struct request *req)
+{
+       if (req_op(req) == REQ_OP_DISCARD &&
+           queue_max_discard_segments(req->q) > 1)
+               return true;
+       return false;
+}
+
+enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
+{
+       if (blk_discard_mergable(req))
+               return ELEVATOR_DISCARD_MERGE;
+       else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
+               return ELEVATOR_BACK_MERGE;
+
+       return ELEVATOR_NO_MERGE;
+}
  
  /*
   * For non-mq, this has to be called with the request spinlock acquired.
@@ -731,12 +756,6 @@ static struct request *attempt_merge(struct request_queue *q,
         if (req_op(req) != req_op(next))
                 return NULL;
  
-       /*
-        * not contiguous
-        */
-       if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
-               return NULL;
-
         if (rq_data_dir(req) != rq_data_dir(next)
             || req->rq_disk != next->rq_disk
             || req_no_special_merge(next))
@@ -760,11 +779,19 @@ static struct request *attempt_merge(struct request_queue *q,
          * counts here. Handle DISCARDs separately, as they
          * have separate settings.
          */
-       if (req_op(req) == REQ_OP_DISCARD) {
+
+       switch (blk_try_req_merge(req, next)) {
+       case ELEVATOR_DISCARD_MERGE:
                 if (!req_attempt_discard_merge(q, req, next))
                         return NULL;
-       } else if (!ll_merge_requests_fn(q, req, next))
+               break;
+       case ELEVATOR_BACK_MERGE:
+               if (!ll_merge_requests_fn(q, req, next))
+                       return NULL;
+               break;
+       default:
                 return NULL;
+       }
  
         /*
          * If failfast settings disagree or any of the two is already
@@ -888,8 +915,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
  
  enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
  {
-       if (req_op(rq) == REQ_OP_DISCARD &&
-           queue_max_discard_segments(rq->q) > 1)
+       if (blk_discard_mergable(rq))
                 return ELEVATOR_DISCARD_MERGE;
         else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
                 return ELEVATOR_BACK_MERGE;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 0641533597f1b2cc389e6579a986d6df17115646..844a454a7b3a60a0c3186b589ab12daad0e9b3b8 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -1007,8 +1007,6 @@ void blk_unregister_queue(struct gendisk *disk)
         kobject_del(&q->kobj);
         blk_trace_remove_sysfs(disk_to_dev(disk));
  
-       rq_qos_exit(q);
-
         mutex_lock(&q->sysfs_lock);
         if (q->request_fn || (q->mq_ops && q->elevator))
                 elv_unregister_queue(q);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c

index 4bda70e8db48a9150880dc04a8a1f3fcb8844ac6..db1a3a2ae00617fbe1e4804bbfd327e37ce55737 100644 (file)
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
  }
  #endif
  
+static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
+{
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+       /* fallback to root_blkg if we fail to get a blkg ref */
+       if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
+               bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
+       bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+#endif
+}
+
  bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
                     struct bio *bio)
  {
         struct throtl_qnode *qn = NULL;
-       struct throtl_grp *tg = blkg_to_tg(blkg);
+       struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
         struct throtl_service_queue *sq;
         bool rw = bio_data_dir(bio);
         bool throttled = false;
@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
         if (unlikely(blk_queue_bypass(q)))
                 goto out_unlock;
  
+       blk_throtl_assoc_bio(tg, bio);
         blk_throtl_update_idletime(tg);
  
         sq = &tg->service_queue;
diff --git a/block/bounce.c b/block/bounce.c

index cf49fe02f65cd017eb2132fd3475ba29a5d3cf75..36869afc258ccf6ea609e0e74db6cea56e6d2c34 100644 (file)
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
                 }
         }
  
-       bio_clone_blkg_association(bio, bio_src);
-
-       blkcg_bio_issue_init(bio);
+       bio_clone_blkcg_association(bio, bio_src);
  
         return bio;
  }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index 6a3d87dd3c1ac42abf04223ea14eb673e7c5ec5c..ed41aa978c4abc66cd2845f4c18177c97387a881 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
         uint64_t serial_nr;
  
         rcu_read_lock();
-       serial_nr = __bio_blkcg(bio)->css.serial_nr;
+       serial_nr = bio_blkcg(bio)->css.serial_nr;
         rcu_read_unlock();
  
         /*
@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
         struct cfq_group *cfqg;
  
         rcu_read_lock();
-       cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
+       cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
         if (!cfqg) {
                 cfqq = &cfqd->oom_cfqq;
                 goto out;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c

index df8103dd40ac2d1f6e5c29ef7790160a3335b35b..c18586fccb6f2b25c3d0ef535144deeabf640006 100644 (file)
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -396,15 +396,14 @@ static struct brd_device *brd_alloc(int i)
         disk->first_minor       = i * max_part;
         disk->fops              = &brd_fops;
         disk->private_data      = brd;
-       disk->queue             = brd->brd_queue;
         disk->flags             = GENHD_FL_EXT_DEVT;
         sprintf(disk->disk_name, "ram%d", i);
         set_capacity(disk, rd_size * 2);
-       disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
+       brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
  
         /* Tell the block layer that this is not a rotational device */
-       blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-       blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+       blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
+       blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
  
         return brd;
  
@@ -436,6 +435,7 @@ static struct brd_device *brd_init_one(int i, bool *new)
  
         brd = brd_alloc(i);
         if (brd) {
+               brd->brd_disk->queue = brd->brd_queue;
                 add_disk(brd->brd_disk);
                 list_add_tail(&brd->brd_list, &brd_devices);
         }
@@ -503,8 +503,14 @@ static int __init brd_init(void)
  
         /* point of no return */
  
-       list_for_each_entry(brd, &brd_devices, brd_list)
+       list_for_each_entry(brd, &brd_devices, brd_list) {
+               /*
+                * associate with queue just before adding disk for
+                * avoiding to mess up failure path
+                */
+               brd->brd_disk->queue = brd->brd_queue;
                 add_disk(brd->brd_disk);
+       }
  
         blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
                                   THIS_MODULE, brd_probe, NULL, NULL);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index e6273ae85246029358272406bfb00cca07226455..cb0cc868507620513d3de7658ed1f6999ceaa965 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,7 +77,6 @@
  #include <linux/falloc.h>
  #include <linux/uio.h>
  #include <linux/ioprio.h>
-#include <linux/blk-cgroup.h>
  
  #include "loop.h"
  
@@ -1760,8 +1759,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
  
         /* always use the first bio's css */
  #ifdef CONFIG_BLK_CGROUP
-       if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
-               cmd->css = &bio_blkcg(rq->bio)->css;
+       if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
+               cmd->css = rq->bio->bi_css;
                 css_get(cmd->css);
         } else
  #endif
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c

index dfc8de6ce5254872a8b96ec364d7cd3136f4f7d7..a7daa8acbab3a53feda3e1e92dcc51d86a32ba71 100644 (file)
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1942,8 +1942,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
                                 dev_warn(&dd->pdev->dev,
                                         "data movement but "
                                         "sect_count is 0\n");
-                                       err = -EINVAL;
-                                       goto abort;
+                               err = -EINVAL;
+                               goto abort;
                         }
                 }
         }
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c

index f3fb5bb8c82a1cfe861aef1af22b581709815817..ac1cffd2a09b05f5f5217e579c9e87ea80efce84 100644 (file)
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
                     !discard_bio)
                         continue;
                 bio_chain(discard_bio, bio);
-               bio_clone_blkg_association(discard_bio, bio);
+               bio_clone_blkcg_association(discard_bio, bio);
                 if (mddev->gendisk)
                         trace_block_bio_remap(bdev_get_queue(rdev->bdev),
                                 discard_bio, disk_devt(mddev->gendisk),
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c

index e52b9d3c0bd6cc22e1ac6b97736426267bb761f6..0b70c8bab045ac1a544cf3c72eb1a8766152fcd2 100644 (file)
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
         op->fcp_req.rspaddr = &op->rsp_iu;
         op->fcp_req.rsplen = sizeof(op->rsp_iu);
         op->fcp_req.done = nvme_fc_fcpio_done;
-       op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
         op->ctrl = ctrl;
         op->queue = queue;
         op->rq = rq;
@@ -1752,6 +1751,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
         if (res)
                 return res;
         op->op.fcp_req.first_sgl = &op->sgl[0];
+       op->op.fcp_req.private = &op->priv[0];
         return res;
  }
  
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c

index f30031945ee4b671033a0d80583d05c0724a096e..c33bb201b8846fae0319e1c575f9737af79a1944 100644 (file)
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1663,6 +1663,9 @@ static void nvme_map_cmb(struct nvme_dev *dev)
         struct pci_dev *pdev = to_pci_dev(dev->dev);
         int bar;
  
+       if (dev->cmb_size)
+               return;
+
         dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
         if (!dev->cmbsz)
                 return;
@@ -2147,7 +2150,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
  {
         struct pci_dev *pdev = to_pci_dev(dev->dev);
  
-       nvme_release_cmb(dev);
         pci_free_irq_vectors(pdev);
  
         if (pci_is_enabled(pdev)) {
@@ -2595,6 +2597,7 @@ static void nvme_remove(struct pci_dev *pdev)
         nvme_stop_ctrl(&dev->ctrl);
         nvme_remove_namespaces(&dev->ctrl);
         nvme_dev_disable(dev, true);
+       nvme_release_cmb(dev);
         nvme_free_host_mem(dev);
         nvme_dev_remove_admin(dev);
         nvme_free_queues(dev, 0);
diff --git a/fs/buffer.c b/fs/buffer.c

index d60d61e8ed7de495bddd0bc799f16c2606a4c68b..1286c2b95498de47d2ba08b57a93901bdf4367bd 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
          */
         bio = bio_alloc(GFP_NOIO, 1);
  
+       if (wbc) {
+               wbc_init_bio(wbc, bio);
+               wbc_account_io(wbc, bh->b_page, bh->b_size);
+       }
+
         bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
         bio_set_dev(bio, bh->b_bdev);
         bio->bi_write_hint = write_hint;
@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
                 op_flags |= REQ_PRIO;
         bio_set_op_attrs(bio, op, op_flags);
  
-       if (wbc) {
-               wbc_init_bio(wbc, bio);
-               wbc_account_io(wbc, bh->b_page, bh->b_size);
-       }
-
         submit_bio(bio);
         return 0;
  }
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c

index 2aa62d58d8dd87e095bcb61f84aa78ef755463a1..db7590178dfcf1a4b59ee3c44deaa89a21de8ca6 100644 (file)
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
         bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
         if (!bio)
                 return -ENOMEM;
+       wbc_init_bio(io->io_wbc, bio);
         bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
         bio_set_dev(bio, bh->b_bdev);
         bio->bi_end_io = ext4_end_bio;
         bio->bi_private = ext4_get_io_end(io->io_end);
         io->io_bio = bio;
         io->io_next_block = bh->b_blocknr;
-       wbc_init_bio(io->io_wbc, bio);
         return 0;
  }
  
diff --git a/include/linux/bio.h b/include/linux/bio.h

index b47c7f716731fc5ebcdf28f03db3b5eadada59e0..056fb627edb3e82779d0d4f92b27be43a423ce15 100644 (file)
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -503,31 +503,23 @@ do {                                              \
         disk_devt((bio)->bi_disk)
  
  #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
  #else
-static inline int bio_associate_blkg_from_page(struct bio *bio,
-                                              struct page *page) { return 0; }
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+                                               struct page *page) {  return 0; }
  #endif
  
  #ifdef CONFIG_BLK_CGROUP
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
  int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
-int bio_associate_blkg_from_css(struct bio *bio,
-                               struct cgroup_subsys_state *css);
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
  void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkg_association(struct bio *dst, struct bio *src);
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
  #else  /* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkg_from_css(struct bio *bio,
-                                             struct cgroup_subsys_state *css)
-{ return 0; }
-static inline int bio_associate_create_blkg(struct request_queue *q,
-                                           struct bio *bio) { return 0; }
-static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
-{ return 0; }
+static inline int bio_associate_blkcg(struct bio *bio,
+                       struct cgroup_subsys_state *blkcg_css) { return 0; }
  static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkg_association(struct bio *dst,
-                                             struct bio *src) { }
+static inline void bio_clone_blkcg_association(struct bio *dst,
+                       struct bio *src) { }
  #endif /* CONFIG_BLK_CGROUP */
  
  #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h

index 1e76ceebeb5dc58c7f98e9f1d18d65fbe62477ef..6d766a19f2bbb2b62facc79ff3871aa81be68534 100644 (file)
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -126,7 +126,7 @@ struct blkcg_gq {
         struct request_list             rl;
  
         /* reference count */
-       struct percpu_ref               refcnt;
+       atomic_t                        refcnt;
  
         /* is this blkg online? protected by both blkcg and q locks */
         bool                            online;
@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
  
  struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
                                       struct request_queue *q, bool update_hint);
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-                                     struct request_queue *q);
  struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
                                     struct request_queue *q);
  int blkcg_init_queue(struct request_queue *q);
@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
                    char *input, struct blkg_conf_ctx *ctx);
  void blkg_conf_finish(struct blkg_conf_ctx *ctx);
  
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
-       struct cgroup_subsys_state *css;
-
-       css = kthread_blkcg();
-       if (css)
-               return css;
-       return task_css(current, io_cgrp_id);
-}
  
  static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
  {
         return css ? container_of(css, struct blkcg, css) : NULL;
  }
  
-/**
- * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
- *
- * DO NOT USE.
- * There is a flaw using this version of the function.  In particular, this was
- * used in a broken paradigm where association was called on the given css.  It
- * is possible though that the returned css from task_css() is in the process
- * of dying due to migration of the current task.  So it is improper to assume
- * *_get() is going to succeed.  Both BFQ and CFQ rely on this logic and will
- * take additional work to handle more gracefully.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
-       if (bio && bio->bi_blkg)
-               return bio->bi_blkg->blkcg;
-       return css_to_blkcg(blkcg_css());
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, NULL if not associated.
- * Callers are expected to either handle NULL or know association has been
- * done prior to calling this.
- */
  static inline struct blkcg *bio_blkcg(struct bio *bio)
  {
-       if (bio && bio->bi_blkg)
-               return bio->bi_blkg->blkcg;
-       return NULL;
+       struct cgroup_subsys_state *css;
+
+       if (bio && bio->bi_css)
+               return css_to_blkcg(bio->bi_css);
+       css = kthread_blkcg();
+       if (css)
+               return css_to_blkcg(css);
+       return css_to_blkcg(task_css(current, io_cgrp_id));
  }
  
  static inline bool blk_cgroup_congested(void)
@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
   */
  static inline void blkg_get(struct blkcg_gq *blkg)
  {
-       percpu_ref_get(&blkg->refcnt);
+       WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+       atomic_inc(&blkg->refcnt);
  }
  
  /**
- * blkg_tryget - try and get a blkg reference
+ * blkg_try_get - try and get a blkg reference
   * @blkg: blkg to get
   *
   * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
   * of freeing this blkg, so we can only use it if the refcnt is not zero.
   */
-static inline bool blkg_tryget(struct blkcg_gq *blkg)
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
  {
-       return percpu_ref_tryget(&blkg->refcnt);
+       if (atomic_inc_not_zero(&blkg->refcnt))
+               return blkg;
+       return NULL;
  }
  
-/**
- * blkg_tryget_closest - try and get a blkg ref on the closet blkg
- * @blkg: blkg to get
- *
- * This walks up the blkg tree to find the closest non-dying blkg and returns
- * the blkg that it did association with as it may not be the passed in blkg.
- */
-static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
-{
-       while (!percpu_ref_tryget(&blkg->refcnt))
-               blkg = blkg->parent;
  
-       return blkg;
-}
+void __blkg_release_rcu(struct rcu_head *rcu);
  
  /**
   * blkg_put - put a blkg reference
@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
   */
  static inline void blkg_put(struct blkcg_gq *blkg)
  {
-       percpu_ref_put(&blkg->refcnt);
+       WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+       if (atomic_dec_and_test(&blkg->refcnt))
+               call_rcu(&blkg->rcu_head, __blkg_release_rcu);
  }
  
  /**
@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
  
         rcu_read_lock();
  
-       if (bio && bio->bi_blkg) {
-               blkcg = bio->bi_blkg->blkcg;
-               if (blkcg == &blkcg_root)
-                       goto rl_use_root;
-
-               blkg_get(bio->bi_blkg);
-               rcu_read_unlock();
-               return &bio->bi_blkg->rl;
-       }
+       blkcg = bio_blkcg(bio);
  
-       blkcg = css_to_blkcg(blkcg_css());
+       /* bypass blkg lookup and use @q->root_rl directly for root */
         if (blkcg == &blkcg_root)
-               goto rl_use_root;
+               goto root_rl;
  
+       /*
+        * Try to use blkg->rl.  blkg lookup may fail under memory pressure
+        * or if either the blkcg or queue is going away.  Fall back to
+        * root_rl in such cases.
+        */
         blkg = blkg_lookup(blkcg, q);
         if (unlikely(!blkg))
-               blkg = __blkg_lookup_create(blkcg, q);
-
-       if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
-               goto rl_use_root;
+               goto root_rl;
  
+       blkg_get(blkg);
         rcu_read_unlock();
         return &blkg->rl;
-
-       /*
-        * Each blkg has its own request_list, however, the root blkcg
-        * uses the request_queue's root_rl.  This is to avoid most
-        * overhead for the root blkcg.
-        */
-rl_use_root:
+root_rl:
         rcu_read_unlock();
         return &q->root_rl;
  }
@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
                                   struct bio *bio) { return false; }
  #endif
  
-
-static inline void blkcg_bio_issue_init(struct bio *bio)
-{
-       bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-}
-
  static inline bool blkcg_bio_issue_check(struct request_queue *q,
                                          struct bio *bio)
  {
+       struct blkcg *blkcg;
         struct blkcg_gq *blkg;
         bool throtl = false;
  
         rcu_read_lock();
+       blkcg = bio_blkcg(bio);
+
+       /* associate blkcg if bio hasn't attached one */
+       bio_associate_blkcg(bio, &blkcg->css);
  
-       bio_associate_create_blkg(q, bio);
-       blkg = bio->bi_blkg;
+       blkg = blkg_lookup(blkcg, q);
+       if (unlikely(!blkg)) {
+               spin_lock_irq(q->queue_lock);
+               blkg = blkg_lookup_create(blkcg, q);
+               if (IS_ERR(blkg))
+                       blkg = NULL;
+               spin_unlock_irq(q->queue_lock);
+       }
  
         throtl = blk_throtl_bio(q, blkg, bio);
  
         if (!throtl) {
+               blkg = blkg ?: q->root_blkg;
                 /*
                  * If the bio is flagged with BIO_QUEUE_ENTERED it means this
                  * is a split bio and we would have already accounted for the
@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
                 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
         }
  
-       blkcg_bio_issue_init(bio);
-
         rcu_read_unlock();
         return !throtl;
  }
@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
  static inline void blkcg_deactivate_policy(struct request_queue *q,
                                            const struct blkcg_policy *pol) { }
  
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
  static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
  
  static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
  static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
  static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
  
-static inline void blkcg_bio_issue_init(struct bio *bio) { }
  static inline bool blkcg_bio_issue_check(struct request_queue *q,
                                          struct bio *bio) { return true; }
  
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h

index 093a818c5b684754dbe8877721389947d74674ac..1dcf652ba0aa3e989a7b93e78e4303aa920a1dab 100644 (file)
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,6 +178,7 @@ struct bio {
          * release.  Read comment on top of bio_associate_current().
          */
         struct io_context       *bi_ioc;
+       struct cgroup_subsys_state *bi_css;
         struct blkcg_gq         *bi_blkg;
         struct bio_issue        bi_issue;
  #endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 9968332cceed0e64e5fc9bdb814507b0bf67451b..9d12757a65b01846486341c5d31d398ee51d89b4 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,8 +93,6 @@ extern struct css_set init_css_set;
  
  bool css_has_online_children(struct cgroup_subsys_state *css);
  struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
-                                        struct cgroup_subsys *ss);
  struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
                                              struct cgroup_subsys *ss);
  struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index 738a0c24874f0bfd308fad36057cae71401f71f0..fdfd04e348f698b3d108228868866072164d31b7 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
   *
   * @bio is a part of the writeback in progress controlled by @wbc.  Perform
   * writeback specific initialization.  This is used to apply the cgroup
- * writeback context.  Must be called after the bio has been associated with
- * a device.
+ * writeback context.
   */
  static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
  {
@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
          * regular writeback instead of writing things out itself.
          */
         if (wbc->wb)
-               bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
+               bio_associate_blkcg(bio, wbc->wb->blkcg_css);
  }
  
  #else  /* CONFIG_CGROUP_WRITEBACK */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c

index 8b79318810ad5c63d9e70cd634f6d6bc928659ef..6aaf5dd5383bba294719772bc76b7c1664d54ea9 100644 (file)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
  }
  
  /**
- * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
   * @cgrp: the cgroup of interest
   * @ss: the subsystem of interest (%NULL returns @cgrp->self)
   *
@@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
   * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
   * function is guaranteed to return non-NULL css.
   */
-static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
-                                                       struct cgroup_subsys *ss)
+static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+                                               struct cgroup_subsys *ss)
  {
         lockdep_assert_held(&cgroup_mutex);
  
@@ -523,35 +523,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
         return cgroup_css(cgrp, ss);
  }
  
-/**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest
- *
- * Find and get the effective css of @cgrp for @ss.  The effective css is
- * defined as the matching css of the nearest ancestor including self which
- * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
- * the root css is returned, so this function always returns a valid css.
- *
- * The returned css is not guaranteed to be online, and therefore it is the
- * callers responsiblity to tryget a reference for it.
- */
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
-                                        struct cgroup_subsys *ss)
-{
-       struct cgroup_subsys_state *css;
-
-       do {
-               css = cgroup_css(cgrp, ss);
-
-               if (css)
-                       return css;
-               cgrp = cgroup_parent(cgrp);
-       } while (cgrp);
-
-       return init_css_set.subsys[ss->id];
-}
-
  /**
   * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
   * @cgrp: the cgroup of interest
@@ -634,11 +605,10 @@ EXPORT_SYMBOL_GPL(of_css);
   *
   * Should be called under cgroup_[tree_]mutex.
   */
-#define for_each_e_css(css, ssid, cgrp)                                            \
-       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)            \
-               if (!((css) = cgroup_e_css_by_mask(cgrp,                    \
-                                                  cgroup_subsys[(ssid)]))) \
-                       ;                                                   \
+#define for_each_e_css(css, ssid, cgrp)                                        \
+       for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)        \
+               if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
+                       ;                                               \
                 else
  
  /**
@@ -1037,7 +1007,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
                          * @ss is in this hierarchy, so we want the
                          * effective css from @cgrp.
                          */
-                       template[i] = cgroup_e_css_by_mask(cgrp, ss);
+                       template[i] = cgroup_e_css(cgrp, ss);
                 } else {
                         /*
                          * @ss is not in this hierarchy, so we don't want
@@ -3054,7 +3024,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
                 return ret;
  
         /*
-        * At this point, cgroup_e_css_by_mask() results reflect the new csses
+        * At this point, cgroup_e_css() results reflect the new csses
          * making the following cgroup_update_dfl_csses() properly update
          * css associations of all tasks in the subtree.
          */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c

index fac0ddf8a8e22505749be3064e6b964ba12d4930..2868d85f1fb1d3286984c4727f0519957ac069a9 100644 (file)
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
         if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
                 return NULL;
  
-       if (!bio->bi_blkg)
+       if (!bio->bi_css)
                 return NULL;
-       return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
+       return cgroup_get_kernfs_id(bio->bi_css->cgroup);
  }
  #else
  static union kernfs_node_id *
diff --git a/mm/page_io.c b/mm/page_io.c

index 27b8357284420c21fe9016452ada1075dfdd71c5..d4d1c89bcdddcef43dfa04fa02926f21fba228fb 100644 (file)
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
                 goto out;
         }
         bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
-       bio_associate_blkg_from_page(bio, page);
+       bio_associate_blkcg_from_page(bio, page);
         count_swpout_vm_event(page);
         set_page_writeback(page);
         unlock_page(page);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 2 Nov 2018 18:25:48 +0000 (11:25 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 2 Nov 2018 18:25:48 +0000 (11:25 -0700)
Documentation/admin-guide/cgroup-v2.rst		patch \| blob \| blame \| history
block/bfq-cgroup.c		patch \| blob \| blame \| history
block/bfq-iosched.c		patch \| blob \| blame \| history
block/bio.c		patch \| blob \| blame \| history
block/blk-cgroup.c		patch \| blob \| blame \| history
block/blk-core.c		patch \| blob \| blame \| history
block/blk-iolatency.c		patch \| blob \| blame \| history
block/blk-merge.c		patch \| blob \| blame \| history
block/blk-sysfs.c		patch \| blob \| blame \| history
block/blk-throttle.c		patch \| blob \| blame \| history
block/bounce.c		patch \| blob \| blame \| history
block/cfq-iosched.c		patch \| blob \| blame \| history
drivers/block/brd.c		patch \| blob \| blame \| history
drivers/block/loop.c		patch \| blob \| blame \| history
drivers/block/mtip32xx/mtip32xx.c		patch \| blob \| blame \| history
drivers/md/raid0.c		patch \| blob \| blame \| history
drivers/nvme/host/fc.c		patch \| blob \| blame \| history
drivers/nvme/host/pci.c		patch \| blob \| blame \| history
fs/buffer.c		patch \| blob \| blame \| history
fs/ext4/page-io.c		patch \| blob \| blame \| history
include/linux/bio.h		patch \| blob \| blame \| history
include/linux/blk-cgroup.h		patch \| blob \| blame \| history
include/linux/blk_types.h		patch \| blob \| blame \| history
include/linux/cgroup.h		patch \| blob \| blame \| history
include/linux/writeback.h		patch \| blob \| blame \| history
kernel/cgroup/cgroup.c		patch \| blob \| blame \| history
kernel/trace/blktrace.c		patch \| blob \| blame \| history
mm/page_io.c		patch \| blob \| blame \| history