]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blobdiff - block/blk-core.c
Merge branch 'for-4.18-vsprintf-pcr-removal' into for-4.18
[mirror_ubuntu-hirsute-kernel.git] / block / blk-core.c
index 6d82c4f7fadd9466f84a2405352f8296597eaa09..abcb8684ba6771a63561d08c5728690ed272e921 100644 (file)
@@ -71,6 +71,78 @@ struct kmem_cache *blk_requestq_cachep;
  */
 static struct workqueue_struct *kblockd_workqueue;
 
+/**
+ * blk_queue_flag_set - atomically set a queue flag
+ * @flag: flag to be set
+ * @q: request queue
+ */
+void blk_queue_flag_set(unsigned int flag, struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       queue_flag_set(flag, q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_queue_flag_set);
+
+/**
+ * blk_queue_flag_clear - atomically clear a queue flag
+ * @flag: flag to be cleared
+ * @q: request queue
+ */
+void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       queue_flag_clear(flag, q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+}
+EXPORT_SYMBOL(blk_queue_flag_clear);
+
+/**
+ * blk_queue_flag_test_and_set - atomically test and set a queue flag
+ * @flag: flag to be set
+ * @q: request queue
+ *
+ * Returns the previous value of @flag - 0 if the flag was not set and 1 if
+ * the flag was already set.
+ */
+bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
+{
+       unsigned long flags;
+       bool res;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       res = queue_flag_test_and_set(flag, q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+
+       return res;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
+
+/**
+ * blk_queue_flag_test_and_clear - atomically test and clear a queue flag
+ * @flag: flag to be cleared
+ * @q: request queue
+ *
+ * Returns the previous value of @flag - 0 if the flag was not set and 1 if
+ * the flag was set.
+ */
+bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q)
+{
+       unsigned long flags;
+       bool res;
+
+       spin_lock_irqsave(q->queue_lock, flags);
+       res = queue_flag_test_and_clear(flag, q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+
+       return res;
+}
+EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_clear);
+
 static void blk_clear_congested(struct request_list *rl, int sync)
 {
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -361,25 +433,14 @@ EXPORT_SYMBOL(blk_sync_queue);
  */
 int blk_set_preempt_only(struct request_queue *q)
 {
-       unsigned long flags;
-       int res;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       res = queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
-       spin_unlock_irqrestore(q->queue_lock, flags);
-
-       return res;
+       return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
 }
 EXPORT_SYMBOL_GPL(blk_set_preempt_only);
 
 void blk_clear_preempt_only(struct request_queue *q)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
+       blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
        wake_up_all(&q->mq_freeze_wq);
-       spin_unlock_irqrestore(q->queue_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
 
@@ -629,9 +690,7 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
 
 void blk_set_queue_dying(struct request_queue *q)
 {
-       spin_lock_irq(q->queue_lock);
-       queue_flag_set(QUEUE_FLAG_DYING, q);
-       spin_unlock_irq(q->queue_lock);
+       blk_queue_flag_set(QUEUE_FLAG_DYING, q);
 
        /*
         * When queue DYING flag is set, we need to block new req
@@ -719,6 +778,37 @@ void blk_cleanup_queue(struct request_queue *q)
        del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer);
        blk_sync_queue(q);
 
+       /*
+        * I/O scheduler exit is only safe after the sysfs scheduler attribute
+        * has been removed.
+        */
+       WARN_ON_ONCE(q->kobj.state_in_sysfs);
+
+       /*
+        * Since the I/O scheduler exit code may access cgroup information,
+        * perform I/O scheduler exit before disassociating from the block
+        * cgroup controller.
+        */
+       if (q->elevator) {
+               ioc_clear_queue(q);
+               elevator_exit(q, q->elevator);
+               q->elevator = NULL;
+       }
+
+       /*
+        * Remove all references to @q from the block cgroup controller before
+        * restoring @q->queue_lock to avoid that restoring this pointer causes
+        * e.g. blkcg_print_blkgs() to crash.
+        */
+       blkcg_exit_queue(q);
+
+       /*
+        * Since the cgroup code may dereference the @q->backing_dev_info
+        * pointer, only decrease its reference count after having removed the
+        * association with the block cgroup controller.
+        */
+       bdi_put(q->backing_dev_info);
+
        if (q->mq_ops)
                blk_mq_free_queue(q);
        percpu_ref_exit(&q->q_usage_counter);
@@ -810,7 +900,7 @@ void blk_exit_rl(struct request_queue *q, struct request_list *rl)
 
 struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 {
-       return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE);
+       return blk_alloc_queue_node(gfp_mask, NUMA_NO_NODE, NULL);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
@@ -827,7 +917,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
                bool success = false;
                int ret;
 
-               rcu_read_lock_sched();
+               rcu_read_lock();
                if (percpu_ref_tryget_live(&q->q_usage_counter)) {
                        /*
                         * The code that sets the PREEMPT_ONLY flag is
@@ -840,7 +930,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
                                percpu_ref_put(&q->q_usage_counter);
                        }
                }
-               rcu_read_unlock_sched();
+               rcu_read_unlock();
 
                if (success)
                        return 0;
@@ -888,7 +978,21 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
        kblockd_schedule_work(&q->timeout_work);
 }
 
-struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
+/**
+ * blk_alloc_queue_node - allocate a request queue
+ * @gfp_mask: memory allocation flags
+ * @node_id: NUMA node to allocate memory from
+ * @lock: For legacy queues, pointer to a spinlock that will be used to e.g.
+ *        serialize calls to the legacy .request_fn() callback. Ignored for
+ *       blk-mq request queues.
+ *
+ * Note: pass the queue lock as the third argument to this function instead of
+ * setting the queue lock pointer explicitly to avoid triggering a sporadic
+ * crash in the blkcg code. This function namely calls blkcg_init_queue() and
+ * the queue lock pointer must be set before blkcg_init_queue() is called.
+ */
+struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
+                                          spinlock_t *lock)
 {
        struct request_queue *q;
 
@@ -939,11 +1043,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        mutex_init(&q->sysfs_lock);
        spin_lock_init(&q->__queue_lock);
 
-       /*
-        * By default initialize queue_lock to internal lock and driver can
-        * override it later if need be.
-        */
-       q->queue_lock = &q->__queue_lock;
+       if (!q->mq_ops)
+               q->queue_lock = lock ? : &q->__queue_lock;
 
        /*
         * A queue starts its life with bypass turned on to avoid
@@ -952,7 +1053,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
         * registered by blk_register_queue().
         */
        q->bypass_depth = 1;
-       __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
+       queue_flag_set_unlocked(QUEUE_FLAG_BYPASS, q);
 
        init_waitqueue_head(&q->mq_freeze_wq);
 
@@ -1030,13 +1131,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 {
        struct request_queue *q;
 
-       q = blk_alloc_queue_node(GFP_KERNEL, node_id);
+       q = blk_alloc_queue_node(GFP_KERNEL, node_id, lock);
        if (!q)
                return NULL;
 
        q->request_fn = rfn;
-       if (lock)
-               q->queue_lock = lock;
        if (blk_init_allocated_queue(q) < 0) {
                blk_cleanup_queue(q);
                return NULL;
@@ -2023,7 +2122,7 @@ out_unlock:
        return BLK_QC_T_NONE;
 }
 
-static void handle_bad_sector(struct bio *bio)
+static void handle_bad_sector(struct bio *bio, sector_t maxsector)
 {
        char b[BDEVNAME_SIZE];
 
@@ -2031,7 +2130,7 @@ static void handle_bad_sector(struct bio *bio)
        printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
                        bio_devname(bio, b), bio->bi_opf,
                        (unsigned long long)bio_end_sector(bio),
-                       (long long)get_capacity(bio->bi_disk));
+                       (long long)maxsector);
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -2092,68 +2191,59 @@ static noinline int should_fail_bio(struct bio *bio)
 }
 ALLOW_ERROR_INJECTION(should_fail_bio, ERRNO);
 
+/*
+ * Check whether this bio extends beyond the end of the device or partition.
+ * This may well happen - the kernel calls bread() without checking the size of
+ * the device, e.g., when mounting a file system.
+ */
+static inline int bio_check_eod(struct bio *bio, sector_t maxsector)
+{
+       unsigned int nr_sectors = bio_sectors(bio);
+
+       if (nr_sectors && maxsector &&
+           (nr_sectors > maxsector ||
+            bio->bi_iter.bi_sector > maxsector - nr_sectors)) {
+               handle_bad_sector(bio, maxsector);
+               return -EIO;
+       }
+       return 0;
+}
+
 /*
  * Remap block n of partition p to block n+start(p) of the disk.
  */
 static inline int blk_partition_remap(struct bio *bio)
 {
        struct hd_struct *p;
-       int ret = 0;
+       int ret = -EIO;
 
        rcu_read_lock();
        p = __disk_get_part(bio->bi_disk, bio->bi_partno);
-       if (unlikely(!p || should_fail_request(p, bio->bi_iter.bi_size) ||
-                    bio_check_ro(bio, p))) {
-               ret = -EIO;
+       if (unlikely(!p))
+               goto out;
+       if (unlikely(should_fail_request(p, bio->bi_iter.bi_size)))
+               goto out;
+       if (unlikely(bio_check_ro(bio, p)))
                goto out;
-       }
 
        /*
         * Zone reset does not include bi_size so bio_sectors() is always 0.
         * Include a test for the reset op code and perform the remap if needed.
         */
-       if (!bio_sectors(bio) && bio_op(bio) != REQ_OP_ZONE_RESET)
-               goto out;
-
-       bio->bi_iter.bi_sector += p->start_sect;
-       bio->bi_partno = 0;
-       trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
-                             bio->bi_iter.bi_sector - p->start_sect);
-
+       if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+               if (bio_check_eod(bio, part_nr_sects_read(p)))
+                       goto out;
+               bio->bi_iter.bi_sector += p->start_sect;
+               bio->bi_partno = 0;
+               trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p),
+                                     bio->bi_iter.bi_sector - p->start_sect);
+       }
+       ret = 0;
 out:
        rcu_read_unlock();
        return ret;
 }
 
-/*
- * Check whether this bio extends beyond the end of the device.
- */
-static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
-{
-       sector_t maxsector;
-
-       if (!nr_sectors)
-               return 0;
-
-       /* Test device or partition size, when known. */
-       maxsector = get_capacity(bio->bi_disk);
-       if (maxsector) {
-               sector_t sector = bio->bi_iter.bi_sector;
-
-               if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
-                       /*
-                        * This may well happen - the kernel calls bread()
-                        * without checking the size of the device, e.g., when
-                        * mounting a device.
-                        */
-                       handle_bad_sector(bio);
-                       return 1;
-               }
-       }
-
-       return 0;
-}
-
 static noinline_for_stack bool
 generic_make_request_checks(struct bio *bio)
 {
@@ -2164,9 +2254,6 @@ generic_make_request_checks(struct bio *bio)
 
        might_sleep();
 
-       if (bio_check_eod(bio, nr_sectors))
-               goto end_io;
-
        q = bio->bi_disk->queue;
        if (unlikely(!q)) {
                printk(KERN_ERR
@@ -2186,17 +2273,16 @@ generic_make_request_checks(struct bio *bio)
        if (should_fail_bio(bio))
                goto end_io;
 
-       if (!bio->bi_partno) {
-               if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+       if (bio->bi_partno) {
+               if (unlikely(blk_partition_remap(bio)))
                        goto end_io;
        } else {
-               if (blk_partition_remap(bio))
+               if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+                       goto end_io;
+               if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
                        goto end_io;
        }
 
-       if (bio_check_eod(bio, nr_sectors))
-               goto end_io;
-
        /*
         * Filter flush bio's early so that make_request based
         * drivers without flush support don't have to worry