]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - block/blk-barrier.c
block: drop barrier ordering by queue draining
[mirror_ubuntu-artful-kernel.git] / block / blk-barrier.c
1 /*
2 * Functions related to barrier IO handling
3 */
4 #include <linux/kernel.h>
5 #include <linux/module.h>
6 #include <linux/bio.h>
7 #include <linux/blkdev.h>
8 #include <linux/gfp.h>
9
10 #include "blk.h"
11
12 static struct request *queue_next_ordseq(struct request_queue *q);
13
14 /*
15 * Cache flushing for ordered writes handling
16 */
17 unsigned blk_ordered_cur_seq(struct request_queue *q)
18 {
19 if (!q->ordseq)
20 return 0;
21 return 1 << ffz(q->ordseq);
22 }
23
24 static struct request *blk_ordered_complete_seq(struct request_queue *q,
25 unsigned seq, int error)
26 {
27 struct request *next_rq = NULL;
28
29 if (error && !q->orderr)
30 q->orderr = error;
31
32 BUG_ON(q->ordseq & seq);
33 q->ordseq |= seq;
34
35 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
36 /* not complete yet, queue the next ordered sequence */
37 next_rq = queue_next_ordseq(q);
38 } else {
39 /* complete this barrier request */
40 __blk_end_request_all(q->orig_bar_rq, q->orderr);
41 q->orig_bar_rq = NULL;
42 q->ordseq = 0;
43
44 /* dispatch the next barrier if there's one */
45 if (!list_empty(&q->pending_barriers)) {
46 next_rq = list_entry_rq(q->pending_barriers.next);
47 list_move(&next_rq->queuelist, &q->queue_head);
48 }
49 }
50 return next_rq;
51 }
52
53 static void pre_flush_end_io(struct request *rq, int error)
54 {
55 elv_completed_request(rq->q, rq);
56 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
57 }
58
59 static void bar_end_io(struct request *rq, int error)
60 {
61 elv_completed_request(rq->q, rq);
62 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
63 }
64
65 static void post_flush_end_io(struct request *rq, int error)
66 {
67 elv_completed_request(rq->q, rq);
68 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
69 }
70
71 static void queue_flush(struct request_queue *q, struct request *rq,
72 rq_end_io_fn *end_io)
73 {
74 blk_rq_init(q, rq);
75 rq->cmd_type = REQ_TYPE_FS;
76 rq->cmd_flags = REQ_FLUSH;
77 rq->rq_disk = q->orig_bar_rq->rq_disk;
78 rq->end_io = end_io;
79
80 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
81 }
82
83 static struct request *queue_next_ordseq(struct request_queue *q)
84 {
85 struct request *rq = &q->bar_rq;
86
87 switch (blk_ordered_cur_seq(q)) {
88 case QUEUE_ORDSEQ_PREFLUSH:
89 queue_flush(q, rq, pre_flush_end_io);
90 break;
91
92 case QUEUE_ORDSEQ_BAR:
93 /* initialize proxy request and queue it */
94 blk_rq_init(q, rq);
95 init_request_from_bio(rq, q->orig_bar_rq->bio);
96 rq->cmd_flags &= ~REQ_HARDBARRIER;
97 if (q->ordered & QUEUE_ORDERED_DO_FUA)
98 rq->cmd_flags |= REQ_FUA;
99 rq->end_io = bar_end_io;
100
101 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
102 break;
103
104 case QUEUE_ORDSEQ_POSTFLUSH:
105 queue_flush(q, rq, post_flush_end_io);
106 break;
107
108 default:
109 BUG();
110 }
111 return rq;
112 }
113
114 struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
115 {
116 unsigned skip = 0;
117
118 if (!(rq->cmd_flags & REQ_HARDBARRIER))
119 return rq;
120
121 if (q->ordseq) {
122 /*
123 * Barrier is already in progress and they can't be
124 * processed in parallel. Queue for later processing.
125 */
126 list_move_tail(&rq->queuelist, &q->pending_barriers);
127 return NULL;
128 }
129
130 if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
131 /*
132 * Queue ordering not supported. Terminate
133 * with prejudice.
134 */
135 blk_dequeue_request(rq);
136 __blk_end_request_all(rq, -EOPNOTSUPP);
137 return NULL;
138 }
139
140 /*
141 * Start a new ordered sequence
142 */
143 q->orderr = 0;
144 q->ordered = q->next_ordered;
145 q->ordseq |= QUEUE_ORDSEQ_STARTED;
146
147 /*
148 * For an empty barrier, there's no actual BAR request, which
149 * in turn makes POSTFLUSH unnecessary. Mask them off.
150 */
151 if (!blk_rq_sectors(rq))
152 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
153 QUEUE_ORDERED_DO_POSTFLUSH);
154
155 /* stash away the original request */
156 blk_dequeue_request(rq);
157 q->orig_bar_rq = rq;
158
159 if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
160 skip |= QUEUE_ORDSEQ_PREFLUSH;
161
162 if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
163 skip |= QUEUE_ORDSEQ_BAR;
164
165 if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
166 skip |= QUEUE_ORDSEQ_POSTFLUSH;
167
168 /* complete skipped sequences and return the first sequence */
169 return blk_ordered_complete_seq(q, skip, 0);
170 }
171
172 static void bio_end_empty_barrier(struct bio *bio, int err)
173 {
174 if (err) {
175 if (err == -EOPNOTSUPP)
176 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
177 clear_bit(BIO_UPTODATE, &bio->bi_flags);
178 }
179 if (bio->bi_private)
180 complete(bio->bi_private);
181 bio_put(bio);
182 }
183
184 /**
185 * blkdev_issue_flush - queue a flush
186 * @bdev: blockdev to issue flush for
187 * @gfp_mask: memory allocation flags (for bio_alloc)
188 * @error_sector: error sector
189 * @flags: BLKDEV_IFL_* flags to control behaviour
190 *
191 * Description:
192 * Issue a flush for the block device in question. Caller can supply
193 * room for storing the error offset in case of a flush error, if they
194 * wish to. If WAIT flag is not passed then caller may check only what
195 * request was pushed in some internal queue for later handling.
196 */
197 int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
198 sector_t *error_sector, unsigned long flags)
199 {
200 DECLARE_COMPLETION_ONSTACK(wait);
201 struct request_queue *q;
202 struct bio *bio;
203 int ret = 0;
204
205 if (bdev->bd_disk == NULL)
206 return -ENXIO;
207
208 q = bdev_get_queue(bdev);
209 if (!q)
210 return -ENXIO;
211
212 /*
213 * some block devices may not have their queue correctly set up here
214 * (e.g. loop device without a backing file) and so issuing a flush
215 * here will panic. Ensure there is a request function before issuing
216 * the barrier.
217 */
218 if (!q->make_request_fn)
219 return -ENXIO;
220
221 bio = bio_alloc(gfp_mask, 0);
222 bio->bi_end_io = bio_end_empty_barrier;
223 bio->bi_bdev = bdev;
224 if (test_bit(BLKDEV_WAIT, &flags))
225 bio->bi_private = &wait;
226
227 bio_get(bio);
228 submit_bio(WRITE_BARRIER, bio);
229 if (test_bit(BLKDEV_WAIT, &flags)) {
230 wait_for_completion(&wait);
231 /*
232 * The driver must store the error location in ->bi_sector, if
233 * it supports it. For non-stacked drivers, this should be
234 * copied from blk_rq_pos(rq).
235 */
236 if (error_sector)
237 *error_sector = bio->bi_sector;
238 }
239
240 if (bio_flagged(bio, BIO_EOPNOTSUPP))
241 ret = -EOPNOTSUPP;
242 else if (!bio_flagged(bio, BIO_UPTODATE))
243 ret = -EIO;
244
245 bio_put(bio);
246 return ret;
247 }
248 EXPORT_SYMBOL(blkdev_issue_flush);