]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - block/blk-map.c
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
[mirror_ubuntu-jammy-kernel.git] / block / blk-map.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Functions related to mapping data to requests
4 */
5 #include <linux/kernel.h>
6 #include <linux/sched/task_stack.h>
7 #include <linux/module.h>
8 #include <linux/bio.h>
9 #include <linux/blkdev.h>
10 #include <linux/uio.h>
11
12 #include "blk.h"
13
14 struct bio_map_data {
15 int is_our_pages;
16 struct iov_iter iter;
17 struct iovec iov[];
18 };
19
20 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data,
21 gfp_t gfp_mask)
22 {
23 struct bio_map_data *bmd;
24
25 if (data->nr_segs > UIO_MAXIOV)
26 return NULL;
27
28 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask);
29 if (!bmd)
30 return NULL;
31 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs);
32 bmd->iter = *data;
33 bmd->iter.iov = bmd->iov;
34 return bmd;
35 }
36
37 /**
38 * bio_copy_from_iter - copy all pages from iov_iter to bio
39 * @bio: The &struct bio which describes the I/O as destination
40 * @iter: iov_iter as source
41 *
42 * Copy all pages from iov_iter to bio.
43 * Returns 0 on success, or error on failure.
44 */
45 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
46 {
47 struct bio_vec *bvec;
48 struct bvec_iter_all iter_all;
49
50 bio_for_each_segment_all(bvec, bio, iter_all) {
51 ssize_t ret;
52
53 ret = copy_page_from_iter(bvec->bv_page,
54 bvec->bv_offset,
55 bvec->bv_len,
56 iter);
57
58 if (!iov_iter_count(iter))
59 break;
60
61 if (ret < bvec->bv_len)
62 return -EFAULT;
63 }
64
65 return 0;
66 }
67
68 /**
69 * bio_copy_to_iter - copy all pages from bio to iov_iter
70 * @bio: The &struct bio which describes the I/O as source
71 * @iter: iov_iter as destination
72 *
73 * Copy all pages from bio to iov_iter.
74 * Returns 0 on success, or error on failure.
75 */
76 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
77 {
78 struct bio_vec *bvec;
79 struct bvec_iter_all iter_all;
80
81 bio_for_each_segment_all(bvec, bio, iter_all) {
82 ssize_t ret;
83
84 ret = copy_page_to_iter(bvec->bv_page,
85 bvec->bv_offset,
86 bvec->bv_len,
87 &iter);
88
89 if (!iov_iter_count(&iter))
90 break;
91
92 if (ret < bvec->bv_len)
93 return -EFAULT;
94 }
95
96 return 0;
97 }
98
99 /**
100 * bio_uncopy_user - finish previously mapped bio
101 * @bio: bio being terminated
102 *
103 * Free pages allocated from bio_copy_user_iov() and write back data
104 * to user space in case of a read.
105 */
106 static int bio_uncopy_user(struct bio *bio)
107 {
108 struct bio_map_data *bmd = bio->bi_private;
109 int ret = 0;
110
111 if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
112 /*
113 * if we're in a workqueue, the request is orphaned, so
114 * don't copy into a random user address space, just free
115 * and return -EINTR so user space doesn't expect any data.
116 */
117 if (!current->mm)
118 ret = -EINTR;
119 else if (bio_data_dir(bio) == READ)
120 ret = bio_copy_to_iter(bio, bmd->iter);
121 if (bmd->is_our_pages)
122 bio_free_pages(bio);
123 }
124 kfree(bmd);
125 bio_put(bio);
126 return ret;
127 }
128
129 /**
130 * bio_copy_user_iov - copy user data to bio
131 * @q: destination block queue
132 * @map_data: pointer to the rq_map_data holding pages (if necessary)
133 * @iter: iovec iterator
134 * @gfp_mask: memory allocation flags
135 *
136 * Prepares and returns a bio for indirect user io, bouncing data
137 * to/from kernel pages as necessary. Must be paired with
138 * call bio_uncopy_user() on io completion.
139 */
140 static struct bio *bio_copy_user_iov(struct request_queue *q,
141 struct rq_map_data *map_data, struct iov_iter *iter,
142 gfp_t gfp_mask)
143 {
144 struct bio_map_data *bmd;
145 struct page *page;
146 struct bio *bio;
147 int i = 0, ret;
148 int nr_pages;
149 unsigned int len = iter->count;
150 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
151
152 bmd = bio_alloc_map_data(iter, gfp_mask);
153 if (!bmd)
154 return ERR_PTR(-ENOMEM);
155
156 /*
157 * We need to do a deep copy of the iov_iter including the iovecs.
158 * The caller provided iov might point to an on-stack or otherwise
159 * shortlived one.
160 */
161 bmd->is_our_pages = map_data ? 0 : 1;
162
163 nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
164 if (nr_pages > BIO_MAX_PAGES)
165 nr_pages = BIO_MAX_PAGES;
166
167 ret = -ENOMEM;
168 bio = bio_kmalloc(gfp_mask, nr_pages);
169 if (!bio)
170 goto out_bmd;
171
172 ret = 0;
173
174 if (map_data) {
175 nr_pages = 1 << map_data->page_order;
176 i = map_data->offset / PAGE_SIZE;
177 }
178 while (len) {
179 unsigned int bytes = PAGE_SIZE;
180
181 bytes -= offset;
182
183 if (bytes > len)
184 bytes = len;
185
186 if (map_data) {
187 if (i == map_data->nr_entries * nr_pages) {
188 ret = -ENOMEM;
189 break;
190 }
191
192 page = map_data->pages[i / nr_pages];
193 page += (i % nr_pages);
194
195 i++;
196 } else {
197 page = alloc_page(q->bounce_gfp | gfp_mask);
198 if (!page) {
199 ret = -ENOMEM;
200 break;
201 }
202 }
203
204 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) {
205 if (!map_data)
206 __free_page(page);
207 break;
208 }
209
210 len -= bytes;
211 offset = 0;
212 }
213
214 if (ret)
215 goto cleanup;
216
217 if (map_data)
218 map_data->offset += bio->bi_iter.bi_size;
219
220 /*
221 * success
222 */
223 if ((iov_iter_rw(iter) == WRITE &&
224 (!map_data || !map_data->null_mapped)) ||
225 (map_data && map_data->from_user)) {
226 ret = bio_copy_from_iter(bio, iter);
227 if (ret)
228 goto cleanup;
229 } else {
230 if (bmd->is_our_pages)
231 zero_fill_bio(bio);
232 iov_iter_advance(iter, bio->bi_iter.bi_size);
233 }
234
235 bio->bi_private = bmd;
236 if (map_data && map_data->null_mapped)
237 bio_set_flag(bio, BIO_NULL_MAPPED);
238 return bio;
239 cleanup:
240 if (!map_data)
241 bio_free_pages(bio);
242 bio_put(bio);
243 out_bmd:
244 kfree(bmd);
245 return ERR_PTR(ret);
246 }
247
248 /**
249 * bio_map_user_iov - map user iovec into bio
250 * @q: the struct request_queue for the bio
251 * @iter: iovec iterator
252 * @gfp_mask: memory allocation flags
253 *
254 * Map the user space address into a bio suitable for io to a block
255 * device. Returns an error pointer in case of error.
256 */
257 static struct bio *bio_map_user_iov(struct request_queue *q,
258 struct iov_iter *iter, gfp_t gfp_mask)
259 {
260 int j;
261 struct bio *bio;
262 int ret;
263
264 if (!iov_iter_count(iter))
265 return ERR_PTR(-EINVAL);
266
267 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
268 if (!bio)
269 return ERR_PTR(-ENOMEM);
270
271 while (iov_iter_count(iter)) {
272 struct page **pages;
273 ssize_t bytes;
274 size_t offs, added = 0;
275 int npages;
276
277 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs);
278 if (unlikely(bytes <= 0)) {
279 ret = bytes ? bytes : -EFAULT;
280 goto out_unmap;
281 }
282
283 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE);
284
285 if (unlikely(offs & queue_dma_alignment(q))) {
286 ret = -EINVAL;
287 j = 0;
288 } else {
289 for (j = 0; j < npages; j++) {
290 struct page *page = pages[j];
291 unsigned int n = PAGE_SIZE - offs;
292 bool same_page = false;
293
294 if (n > bytes)
295 n = bytes;
296
297 if (!__bio_add_pc_page(q, bio, page, n, offs,
298 &same_page)) {
299 if (same_page)
300 put_page(page);
301 break;
302 }
303
304 added += n;
305 bytes -= n;
306 offs = 0;
307 }
308 iov_iter_advance(iter, added);
309 }
310 /*
311 * release the pages we didn't map into the bio, if any
312 */
313 while (j < npages)
314 put_page(pages[j++]);
315 kvfree(pages);
316 /* couldn't stuff something into bio? */
317 if (bytes)
318 break;
319 }
320
321 bio_set_flag(bio, BIO_USER_MAPPED);
322
323 /*
324 * subtle -- if bio_map_user_iov() ended up bouncing a bio,
325 * it would normally disappear when its bi_end_io is run.
326 * however, we need it for the unmap, so grab an extra
327 * reference to it
328 */
329 bio_get(bio);
330 return bio;
331
332 out_unmap:
333 bio_release_pages(bio, false);
334 bio_put(bio);
335 return ERR_PTR(ret);
336 }
337
338 /**
339 * bio_unmap_user - unmap a bio
340 * @bio: the bio being unmapped
341 *
342 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from
343 * process context.
344 *
345 * bio_unmap_user() may sleep.
346 */
347 static void bio_unmap_user(struct bio *bio)
348 {
349 bio_release_pages(bio, bio_data_dir(bio) == READ);
350 bio_put(bio);
351 bio_put(bio);
352 }
353
354 static void bio_invalidate_vmalloc_pages(struct bio *bio)
355 {
356 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
357 if (bio->bi_private && !op_is_write(bio_op(bio))) {
358 unsigned long i, len = 0;
359
360 for (i = 0; i < bio->bi_vcnt; i++)
361 len += bio->bi_io_vec[i].bv_len;
362 invalidate_kernel_vmap_range(bio->bi_private, len);
363 }
364 #endif
365 }
366
367 static void bio_map_kern_endio(struct bio *bio)
368 {
369 bio_invalidate_vmalloc_pages(bio);
370 bio_put(bio);
371 }
372
373 /**
374 * bio_map_kern - map kernel address into bio
375 * @q: the struct request_queue for the bio
376 * @data: pointer to buffer to map
377 * @len: length in bytes
378 * @gfp_mask: allocation flags for bio allocation
379 *
380 * Map the kernel address into a bio suitable for io to a block
381 * device. Returns an error pointer in case of error.
382 */
383 static struct bio *bio_map_kern(struct request_queue *q, void *data,
384 unsigned int len, gfp_t gfp_mask)
385 {
386 unsigned long kaddr = (unsigned long)data;
387 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
388 unsigned long start = kaddr >> PAGE_SHIFT;
389 const int nr_pages = end - start;
390 bool is_vmalloc = is_vmalloc_addr(data);
391 struct page *page;
392 int offset, i;
393 struct bio *bio;
394
395 bio = bio_kmalloc(gfp_mask, nr_pages);
396 if (!bio)
397 return ERR_PTR(-ENOMEM);
398
399 if (is_vmalloc) {
400 flush_kernel_vmap_range(data, len);
401 bio->bi_private = data;
402 }
403
404 offset = offset_in_page(kaddr);
405 for (i = 0; i < nr_pages; i++) {
406 unsigned int bytes = PAGE_SIZE - offset;
407
408 if (len <= 0)
409 break;
410
411 if (bytes > len)
412 bytes = len;
413
414 if (!is_vmalloc)
415 page = virt_to_page(data);
416 else
417 page = vmalloc_to_page(data);
418 if (bio_add_pc_page(q, bio, page, bytes,
419 offset) < bytes) {
420 /* we don't support partial mappings */
421 bio_put(bio);
422 return ERR_PTR(-EINVAL);
423 }
424
425 data += bytes;
426 len -= bytes;
427 offset = 0;
428 }
429
430 bio->bi_end_io = bio_map_kern_endio;
431 return bio;
432 }
433
434 static void bio_copy_kern_endio(struct bio *bio)
435 {
436 bio_free_pages(bio);
437 bio_put(bio);
438 }
439
440 static void bio_copy_kern_endio_read(struct bio *bio)
441 {
442 char *p = bio->bi_private;
443 struct bio_vec *bvec;
444 struct bvec_iter_all iter_all;
445
446 bio_for_each_segment_all(bvec, bio, iter_all) {
447 memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
448 p += bvec->bv_len;
449 }
450
451 bio_copy_kern_endio(bio);
452 }
453
454 /**
455 * bio_copy_kern - copy kernel address into bio
456 * @q: the struct request_queue for the bio
457 * @data: pointer to buffer to copy
458 * @len: length in bytes
459 * @gfp_mask: allocation flags for bio and page allocation
460 * @reading: data direction is READ
461 *
462 * copy the kernel address into a bio suitable for io to a block
463 * device. Returns an error pointer in case of error.
464 */
465 static struct bio *bio_copy_kern(struct request_queue *q, void *data,
466 unsigned int len, gfp_t gfp_mask, int reading)
467 {
468 unsigned long kaddr = (unsigned long)data;
469 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
470 unsigned long start = kaddr >> PAGE_SHIFT;
471 struct bio *bio;
472 void *p = data;
473 int nr_pages = 0;
474
475 /*
476 * Overflow, abort
477 */
478 if (end < start)
479 return ERR_PTR(-EINVAL);
480
481 nr_pages = end - start;
482 bio = bio_kmalloc(gfp_mask, nr_pages);
483 if (!bio)
484 return ERR_PTR(-ENOMEM);
485
486 while (len) {
487 struct page *page;
488 unsigned int bytes = PAGE_SIZE;
489
490 if (bytes > len)
491 bytes = len;
492
493 page = alloc_page(q->bounce_gfp | gfp_mask);
494 if (!page)
495 goto cleanup;
496
497 if (!reading)
498 memcpy(page_address(page), p, bytes);
499
500 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes)
501 break;
502
503 len -= bytes;
504 p += bytes;
505 }
506
507 if (reading) {
508 bio->bi_end_io = bio_copy_kern_endio_read;
509 bio->bi_private = data;
510 } else {
511 bio->bi_end_io = bio_copy_kern_endio;
512 }
513
514 return bio;
515
516 cleanup:
517 bio_free_pages(bio);
518 bio_put(bio);
519 return ERR_PTR(-ENOMEM);
520 }
521
522 /*
523 * Append a bio to a passthrough request. Only works if the bio can be merged
524 * into the request based on the driver constraints.
525 */
526 int blk_rq_append_bio(struct request *rq, struct bio **bio)
527 {
528 struct bio *orig_bio = *bio;
529 struct bvec_iter iter;
530 struct bio_vec bv;
531 unsigned int nr_segs = 0;
532
533 blk_queue_bounce(rq->q, bio);
534
535 bio_for_each_bvec(bv, *bio, iter)
536 nr_segs++;
537
538 if (!rq->bio) {
539 blk_rq_bio_prep(rq, *bio, nr_segs);
540 } else {
541 if (!ll_back_merge_fn(rq, *bio, nr_segs)) {
542 if (orig_bio != *bio) {
543 bio_put(*bio);
544 *bio = orig_bio;
545 }
546 return -EINVAL;
547 }
548
549 rq->biotail->bi_next = *bio;
550 rq->biotail = *bio;
551 rq->__data_len += (*bio)->bi_iter.bi_size;
552 }
553
554 return 0;
555 }
556 EXPORT_SYMBOL(blk_rq_append_bio);
557
558 static int __blk_rq_unmap_user(struct bio *bio)
559 {
560 int ret = 0;
561
562 if (bio) {
563 if (bio_flagged(bio, BIO_USER_MAPPED))
564 bio_unmap_user(bio);
565 else
566 ret = bio_uncopy_user(bio);
567 }
568
569 return ret;
570 }
571
572 static int __blk_rq_map_user_iov(struct request *rq,
573 struct rq_map_data *map_data, struct iov_iter *iter,
574 gfp_t gfp_mask, bool copy)
575 {
576 struct request_queue *q = rq->q;
577 struct bio *bio, *orig_bio;
578 int ret;
579
580 if (copy)
581 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
582 else
583 bio = bio_map_user_iov(q, iter, gfp_mask);
584
585 if (IS_ERR(bio))
586 return PTR_ERR(bio);
587
588 bio->bi_opf &= ~REQ_OP_MASK;
589 bio->bi_opf |= req_op(rq);
590
591 orig_bio = bio;
592
593 /*
594 * We link the bounce buffer in and could have to traverse it
595 * later so we have to get a ref to prevent it from being freed
596 */
597 ret = blk_rq_append_bio(rq, &bio);
598 if (ret) {
599 __blk_rq_unmap_user(orig_bio);
600 return ret;
601 }
602 bio_get(bio);
603
604 return 0;
605 }
606
607 /**
608 * blk_rq_map_user_iov - map user data to a request, for passthrough requests
609 * @q: request queue where request should be inserted
610 * @rq: request to map data to
611 * @map_data: pointer to the rq_map_data holding pages (if necessary)
612 * @iter: iovec iterator
613 * @gfp_mask: memory allocation flags
614 *
615 * Description:
616 * Data will be mapped directly for zero copy I/O, if possible. Otherwise
617 * a kernel bounce buffer is used.
618 *
619 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
620 * still in process context.
621 *
622 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
623 * before being submitted to the device, as pages mapped may be out of
624 * reach. It's the callers responsibility to make sure this happens. The
625 * original bio must be passed back in to blk_rq_unmap_user() for proper
626 * unmapping.
627 */
628 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
629 struct rq_map_data *map_data,
630 const struct iov_iter *iter, gfp_t gfp_mask)
631 {
632 bool copy = false;
633 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
634 struct bio *bio = NULL;
635 struct iov_iter i;
636 int ret = -EINVAL;
637
638 if (!iter_is_iovec(iter))
639 goto fail;
640
641 if (map_data)
642 copy = true;
643 else if (iov_iter_alignment(iter) & align)
644 copy = true;
645 else if (queue_virt_boundary(q))
646 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
647
648 i = *iter;
649 do {
650 ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
651 if (ret)
652 goto unmap_rq;
653 if (!bio)
654 bio = rq->bio;
655 } while (iov_iter_count(&i));
656
657 if (!bio_flagged(bio, BIO_USER_MAPPED))
658 rq->rq_flags |= RQF_COPY_USER;
659 return 0;
660
661 unmap_rq:
662 blk_rq_unmap_user(bio);
663 fail:
664 rq->bio = NULL;
665 return ret;
666 }
667 EXPORT_SYMBOL(blk_rq_map_user_iov);
668
669 int blk_rq_map_user(struct request_queue *q, struct request *rq,
670 struct rq_map_data *map_data, void __user *ubuf,
671 unsigned long len, gfp_t gfp_mask)
672 {
673 struct iovec iov;
674 struct iov_iter i;
675 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i);
676
677 if (unlikely(ret < 0))
678 return ret;
679
680 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask);
681 }
682 EXPORT_SYMBOL(blk_rq_map_user);
683
684 /**
685 * blk_rq_unmap_user - unmap a request with user data
686 * @bio: start of bio list
687 *
688 * Description:
689 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must
690 * supply the original rq->bio from the blk_rq_map_user() return, since
691 * the I/O completion may have changed rq->bio.
692 */
693 int blk_rq_unmap_user(struct bio *bio)
694 {
695 struct bio *mapped_bio;
696 int ret = 0, ret2;
697
698 while (bio) {
699 mapped_bio = bio;
700 if (unlikely(bio_flagged(bio, BIO_BOUNCED)))
701 mapped_bio = bio->bi_private;
702
703 ret2 = __blk_rq_unmap_user(mapped_bio);
704 if (ret2 && !ret)
705 ret = ret2;
706
707 mapped_bio = bio;
708 bio = bio->bi_next;
709 bio_put(mapped_bio);
710 }
711
712 return ret;
713 }
714 EXPORT_SYMBOL(blk_rq_unmap_user);
715
716 /**
717 * blk_rq_map_kern - map kernel data to a request, for passthrough requests
718 * @q: request queue where request should be inserted
719 * @rq: request to fill
720 * @kbuf: the kernel buffer
721 * @len: length of user data
722 * @gfp_mask: memory allocation flags
723 *
724 * Description:
725 * Data will be mapped directly if possible. Otherwise a bounce
726 * buffer is used. Can be called multiple times to append multiple
727 * buffers.
728 */
729 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
730 unsigned int len, gfp_t gfp_mask)
731 {
732 int reading = rq_data_dir(rq) == READ;
733 unsigned long addr = (unsigned long) kbuf;
734 int do_copy = 0;
735 struct bio *bio, *orig_bio;
736 int ret;
737
738 if (len > (queue_max_hw_sectors(q) << 9))
739 return -EINVAL;
740 if (!len || !kbuf)
741 return -EINVAL;
742
743 do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf);
744 if (do_copy)
745 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
746 else
747 bio = bio_map_kern(q, kbuf, len, gfp_mask);
748
749 if (IS_ERR(bio))
750 return PTR_ERR(bio);
751
752 bio->bi_opf &= ~REQ_OP_MASK;
753 bio->bi_opf |= req_op(rq);
754
755 if (do_copy)
756 rq->rq_flags |= RQF_COPY_USER;
757
758 orig_bio = bio;
759 ret = blk_rq_append_bio(rq, &bio);
760 if (unlikely(ret)) {
761 /* request is too big */
762 bio_put(orig_bio);
763 return ret;
764 }
765
766 return 0;
767 }
768 EXPORT_SYMBOL(blk_rq_map_kern);