]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - block/blk-lib.c
block: introduce BLKDEV_DISCARD_ZERO to fix zeroout
[mirror_ubuntu-bionic-kernel.git] / block / blk-lib.c
CommitLineData
f31e7e40
DM
1/*
2 * Functions related to generic helpers functions
3 */
4#include <linux/kernel.h>
5#include <linux/module.h>
6#include <linux/bio.h>
7#include <linux/blkdev.h>
8#include <linux/scatterlist.h>
9
10#include "blk.h"
11
4e49ea4a 12static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
9082e87b 13 gfp_t gfp)
f31e7e40 14{
9082e87b
CH
15 struct bio *new = bio_alloc(gfp, nr_pages);
16
17 if (bio) {
18 bio_chain(bio, new);
4e49ea4a 19 submit_bio(bio);
9082e87b 20 }
5dba3089 21
9082e87b 22 return new;
f31e7e40
DM
23}
24
38f25255 25int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
288dab8a 26 sector_t nr_sects, gfp_t gfp_mask, int flags,
469e3216 27 struct bio **biop)
f31e7e40 28{
f31e7e40 29 struct request_queue *q = bdev_get_queue(bdev);
38f25255 30 struct bio *bio = *biop;
a22c4d7e 31 unsigned int granularity;
288dab8a 32 enum req_op op;
a22c4d7e 33 int alignment;
f31e7e40
DM
34
35 if (!q)
36 return -ENXIO;
288dab8a
CH
37
38 if (flags & BLKDEV_DISCARD_SECURE) {
e950fdf7
CH
39 if (flags & BLKDEV_DISCARD_ZERO)
40 return -EOPNOTSUPP;
288dab8a
CH
41 if (!blk_queue_secure_erase(q))
42 return -EOPNOTSUPP;
43 op = REQ_OP_SECURE_ERASE;
44 } else {
45 if (!blk_queue_discard(q))
46 return -EOPNOTSUPP;
e950fdf7
CH
47 if ((flags & BLKDEV_DISCARD_ZERO) &&
48 !q->limits.discard_zeroes_data)
49 return -EOPNOTSUPP;
288dab8a
CH
50 op = REQ_OP_DISCARD;
51 }
f31e7e40 52
a22c4d7e
ML
53 /* Zero-sector (unknown) and one-sector granularities are the same. */
54 granularity = max(q->limits.discard_granularity >> 9, 1U);
55 alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
56
5dba3089 57 while (nr_sects) {
c6e66634 58 unsigned int req_sects;
a22c4d7e 59 sector_t end_sect, tmp;
c6e66634 60
a22c4d7e
ML
61 /* Make sure bi_size doesn't overflow */
62 req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
63
9082e87b 64 /**
a22c4d7e
ML
65 * If splitting a request, and the next starting sector would be
66 * misaligned, stop the discard at the previous aligned sector.
67 */
c6e66634 68 end_sect = sector + req_sects;
a22c4d7e
ML
69 tmp = end_sect;
70 if (req_sects < nr_sects &&
71 sector_div(tmp, granularity) != alignment) {
72 end_sect = end_sect - alignment;
73 sector_div(end_sect, granularity);
74 end_sect = end_sect * granularity + alignment;
75 req_sects = end_sect - sector;
76 }
c6e66634 77
4e49ea4a 78 bio = next_bio(bio, 1, gfp_mask);
4f024f37 79 bio->bi_iter.bi_sector = sector;
f31e7e40 80 bio->bi_bdev = bdev;
288dab8a 81 bio_set_op_attrs(bio, op, 0);
f31e7e40 82
4f024f37 83 bio->bi_iter.bi_size = req_sects << 9;
c6e66634
PB
84 nr_sects -= req_sects;
85 sector = end_sect;
f31e7e40 86
c8123f8c
JA
87 /*
88 * We can loop for a long time in here, if someone does
89 * full device discards (like mkfs). Be nice and allow
90 * us to schedule out to avoid softlocking if preempt
91 * is disabled.
92 */
93 cond_resched();
5dba3089 94 }
38f25255
CH
95
96 *biop = bio;
97 return 0;
98}
99EXPORT_SYMBOL(__blkdev_issue_discard);
100
101/**
102 * blkdev_issue_discard - queue a discard
103 * @bdev: blockdev to issue discard for
104 * @sector: start sector
105 * @nr_sects: number of sectors to discard
106 * @gfp_mask: memory allocation flags (for bio_alloc)
107 * @flags: BLKDEV_IFL_* flags to control behaviour
108 *
109 * Description:
110 * Issue a discard request for the sectors in question.
111 */
112int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
113 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
114{
38f25255
CH
115 struct bio *bio = NULL;
116 struct blk_plug plug;
117 int ret;
118
38f25255 119 blk_start_plug(&plug);
288dab8a 120 ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
38f25255 121 &bio);
bbd848e0 122 if (!ret && bio) {
4e49ea4a 123 ret = submit_bio_wait(bio);
e950fdf7 124 if (ret == -EOPNOTSUPP && !(flags & BLKDEV_DISCARD_ZERO))
bbd848e0
MS
125 ret = 0;
126 }
0cfbcafc 127 blk_finish_plug(&plug);
f31e7e40 128
bbd848e0 129 return ret;
f31e7e40
DM
130}
131EXPORT_SYMBOL(blkdev_issue_discard);
3f14d792 132
4363ac7c
MP
133/**
134 * blkdev_issue_write_same - queue a write same operation
135 * @bdev: target blockdev
136 * @sector: start sector
137 * @nr_sects: number of sectors to write
138 * @gfp_mask: memory allocation flags (for bio_alloc)
139 * @page: page containing data to write
140 *
141 * Description:
142 * Issue a write same request for the sectors in question.
143 */
144int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
145 sector_t nr_sects, gfp_t gfp_mask,
146 struct page *page)
147{
4363ac7c
MP
148 struct request_queue *q = bdev_get_queue(bdev);
149 unsigned int max_write_same_sectors;
9082e87b 150 struct bio *bio = NULL;
4363ac7c
MP
151 int ret = 0;
152
153 if (!q)
154 return -ENXIO;
155
b49a0871
ML
156 /* Ensure that max_write_same_sectors doesn't overflow bi_size */
157 max_write_same_sectors = UINT_MAX >> 9;
4363ac7c 158
4363ac7c 159 while (nr_sects) {
4e49ea4a 160 bio = next_bio(bio, 1, gfp_mask);
4f024f37 161 bio->bi_iter.bi_sector = sector;
4363ac7c 162 bio->bi_bdev = bdev;
4363ac7c
MP
163 bio->bi_vcnt = 1;
164 bio->bi_io_vec->bv_page = page;
165 bio->bi_io_vec->bv_offset = 0;
166 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
95fe6c1a 167 bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
4363ac7c
MP
168
169 if (nr_sects > max_write_same_sectors) {
4f024f37 170 bio->bi_iter.bi_size = max_write_same_sectors << 9;
4363ac7c
MP
171 nr_sects -= max_write_same_sectors;
172 sector += max_write_same_sectors;
173 } else {
4f024f37 174 bio->bi_iter.bi_size = nr_sects << 9;
4363ac7c
MP
175 nr_sects = 0;
176 }
4363ac7c
MP
177 }
178
9082e87b 179 if (bio)
4e49ea4a 180 ret = submit_bio_wait(bio);
9082e87b 181 return ret != -EOPNOTSUPP ? ret : 0;
4363ac7c
MP
182}
183EXPORT_SYMBOL(blkdev_issue_write_same);
184
3f14d792 185/**
291d24f6 186 * blkdev_issue_zeroout - generate number of zero filed write bios
3f14d792
DM
187 * @bdev: blockdev to issue
188 * @sector: start sector
189 * @nr_sects: number of sectors to write
190 * @gfp_mask: memory allocation flags (for bio_alloc)
3f14d792
DM
191 *
192 * Description:
193 * Generate and issue number of bios with zerofiled pages.
3f14d792
DM
194 */
195
35086784
FF
196static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
197 sector_t nr_sects, gfp_t gfp_mask)
3f14d792 198{
18edc8ea 199 int ret;
9082e87b 200 struct bio *bio = NULL;
0aeea189 201 unsigned int sz;
3f14d792 202
3f14d792 203 while (nr_sects != 0) {
4e49ea4a 204 bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
9082e87b 205 gfp_mask);
4f024f37 206 bio->bi_iter.bi_sector = sector;
3f14d792 207 bio->bi_bdev = bdev;
95fe6c1a 208 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
3f14d792 209
0341aafb
JA
210 while (nr_sects != 0) {
211 sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
3f14d792
DM
212 ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
213 nr_sects -= ret >> 9;
214 sector += ret >> 9;
215 if (ret < (sz << 9))
216 break;
217 }
3f14d792 218 }
3f14d792 219
9082e87b 220 if (bio)
4e49ea4a 221 return submit_bio_wait(bio);
9082e87b 222 return 0;
3f14d792 223}
579e8f3c
MP
224
225/**
226 * blkdev_issue_zeroout - zero-fill a block range
227 * @bdev: blockdev to write
228 * @sector: start sector
229 * @nr_sects: number of sectors to write
230 * @gfp_mask: memory allocation flags (for bio_alloc)
d93ba7a5 231 * @discard: whether to discard the block range
579e8f3c
MP
232 *
233 * Description:
d93ba7a5
MP
234 * Zero-fill a block range. If the discard flag is set and the block
235 * device guarantees that subsequent READ operations to the block range
236 * in question will return zeroes, the blocks will be discarded. Should
237 * the discard request fail, if the discard flag is not set, or if
238 * discard_zeroes_data is not supported, this function will resort to
239 * zeroing the blocks manually, thus provisioning (allocating,
240 * anchoring) them. If the block device supports the WRITE SAME command
241 * blkdev_issue_zeroout() will use it to optimize the process of
242 * clearing the block range. Otherwise the zeroing will be performed
243 * using regular WRITE calls.
579e8f3c
MP
244 */
245
246int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
d93ba7a5 247 sector_t nr_sects, gfp_t gfp_mask, bool discard)
579e8f3c 248{
e950fdf7
CH
249 if (discard) {
250 if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
251 BLKDEV_DISCARD_ZERO))
252 return 0;
253 }
d93ba7a5 254
9f9ee1f2
MP
255 if (bdev_write_same(bdev) &&
256 blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
257 ZERO_PAGE(0)) == 0)
258 return 0;
579e8f3c
MP
259
260 return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
261}
3f14d792 262EXPORT_SYMBOL(blkdev_issue_zeroout);