]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - mm/iov_iter.c
iov_iter.c: handle ITER_KVEC directly
[mirror_ubuntu-zesty-kernel.git] / mm / iov_iter.c
CommitLineData
4f18cd31
AV
1#include <linux/export.h>
2#include <linux/uio.h>
3#include <linux/pagemap.h>
91f79c43
AV
4#include <linux/slab.h>
5#include <linux/vmalloc.h>
4f18cd31 6
04a31165
AV
7#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
8 size_t left; \
9 size_t wanted = n; \
10 __p = i->iov; \
11 __v.iov_len = min(n, __p->iov_len - skip); \
12 if (likely(__v.iov_len)) { \
13 __v.iov_base = __p->iov_base + skip; \
14 left = (STEP); \
15 __v.iov_len -= left; \
16 skip += __v.iov_len; \
17 n -= __v.iov_len; \
18 } else { \
19 left = 0; \
20 } \
21 while (unlikely(!left && n)) { \
22 __p++; \
23 __v.iov_len = min(n, __p->iov_len); \
24 if (unlikely(!__v.iov_len)) \
25 continue; \
26 __v.iov_base = __p->iov_base; \
27 left = (STEP); \
28 __v.iov_len -= left; \
29 skip = __v.iov_len; \
30 n -= __v.iov_len; \
31 } \
32 n = wanted - n; \
33}
34
a280455f
AV
35#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
36 size_t wanted = n; \
37 __p = i->kvec; \
38 __v.iov_len = min(n, __p->iov_len - skip); \
39 if (likely(__v.iov_len)) { \
40 __v.iov_base = __p->iov_base + skip; \
41 (void)(STEP); \
42 skip += __v.iov_len; \
43 n -= __v.iov_len; \
44 } \
45 while (unlikely(n)) { \
46 __p++; \
47 __v.iov_len = min(n, __p->iov_len); \
48 if (unlikely(!__v.iov_len)) \
49 continue; \
50 __v.iov_base = __p->iov_base; \
51 (void)(STEP); \
52 skip = __v.iov_len; \
53 n -= __v.iov_len; \
54 } \
55 n = wanted; \
56}
57
04a31165
AV
58#define iterate_bvec(i, n, __v, __p, skip, STEP) { \
59 size_t wanted = n; \
60 __p = i->bvec; \
61 __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \
62 if (likely(__v.bv_len)) { \
63 __v.bv_page = __p->bv_page; \
64 __v.bv_offset = __p->bv_offset + skip; \
65 (void)(STEP); \
66 skip += __v.bv_len; \
67 n -= __v.bv_len; \
68 } \
69 while (unlikely(n)) { \
70 __p++; \
71 __v.bv_len = min_t(size_t, n, __p->bv_len); \
72 if (unlikely(!__v.bv_len)) \
73 continue; \
74 __v.bv_page = __p->bv_page; \
75 __v.bv_offset = __p->bv_offset; \
76 (void)(STEP); \
77 skip = __v.bv_len; \
78 n -= __v.bv_len; \
79 } \
80 n = wanted; \
81}
82
a280455f 83#define iterate_all_kinds(i, n, v, I, B, K) { \
04a31165
AV
84 size_t skip = i->iov_offset; \
85 if (unlikely(i->type & ITER_BVEC)) { \
86 const struct bio_vec *bvec; \
87 struct bio_vec v; \
88 iterate_bvec(i, n, v, bvec, skip, (B)) \
a280455f
AV
89 } else if (unlikely(i->type & ITER_KVEC)) { \
90 const struct kvec *kvec; \
91 struct kvec v; \
92 iterate_kvec(i, n, v, kvec, skip, (K)) \
04a31165
AV
93 } else { \
94 const struct iovec *iov; \
95 struct iovec v; \
96 iterate_iovec(i, n, v, iov, skip, (I)) \
97 } \
98}
99
a280455f 100#define iterate_and_advance(i, n, v, I, B, K) { \
7ce2a91e
AV
101 size_t skip = i->iov_offset; \
102 if (unlikely(i->type & ITER_BVEC)) { \
103 const struct bio_vec *bvec; \
104 struct bio_vec v; \
105 iterate_bvec(i, n, v, bvec, skip, (B)) \
106 if (skip == bvec->bv_len) { \
107 bvec++; \
108 skip = 0; \
109 } \
110 i->nr_segs -= bvec - i->bvec; \
111 i->bvec = bvec; \
a280455f
AV
112 } else if (unlikely(i->type & ITER_KVEC)) { \
113 const struct kvec *kvec; \
114 struct kvec v; \
115 iterate_kvec(i, n, v, kvec, skip, (K)) \
116 if (skip == kvec->iov_len) { \
117 kvec++; \
118 skip = 0; \
119 } \
120 i->nr_segs -= kvec - i->kvec; \
121 i->kvec = kvec; \
7ce2a91e
AV
122 } else { \
123 const struct iovec *iov; \
124 struct iovec v; \
125 iterate_iovec(i, n, v, iov, skip, (I)) \
126 if (skip == iov->iov_len) { \
127 iov++; \
128 skip = 0; \
129 } \
130 i->nr_segs -= iov - i->iov; \
131 i->iov = iov; \
132 } \
133 i->count -= n; \
134 i->iov_offset = skip; \
135}
136
62a8067a 137static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
4f18cd31
AV
138 struct iov_iter *i)
139{
140 size_t skip, copy, left, wanted;
141 const struct iovec *iov;
142 char __user *buf;
143 void *kaddr, *from;
144
145 if (unlikely(bytes > i->count))
146 bytes = i->count;
147
148 if (unlikely(!bytes))
149 return 0;
150
151 wanted = bytes;
152 iov = i->iov;
153 skip = i->iov_offset;
154 buf = iov->iov_base + skip;
155 copy = min(bytes, iov->iov_len - skip);
156
157 if (!fault_in_pages_writeable(buf, copy)) {
158 kaddr = kmap_atomic(page);
159 from = kaddr + offset;
160
161 /* first chunk, usually the only one */
162 left = __copy_to_user_inatomic(buf, from, copy);
163 copy -= left;
164 skip += copy;
165 from += copy;
166 bytes -= copy;
167
168 while (unlikely(!left && bytes)) {
169 iov++;
170 buf = iov->iov_base;
171 copy = min(bytes, iov->iov_len);
172 left = __copy_to_user_inatomic(buf, from, copy);
173 copy -= left;
174 skip = copy;
175 from += copy;
176 bytes -= copy;
177 }
178 if (likely(!bytes)) {
179 kunmap_atomic(kaddr);
180 goto done;
181 }
182 offset = from - kaddr;
183 buf += copy;
184 kunmap_atomic(kaddr);
185 copy = min(bytes, iov->iov_len - skip);
186 }
187 /* Too bad - revert to non-atomic kmap */
188 kaddr = kmap(page);
189 from = kaddr + offset;
190 left = __copy_to_user(buf, from, copy);
191 copy -= left;
192 skip += copy;
193 from += copy;
194 bytes -= copy;
195 while (unlikely(!left && bytes)) {
196 iov++;
197 buf = iov->iov_base;
198 copy = min(bytes, iov->iov_len);
199 left = __copy_to_user(buf, from, copy);
200 copy -= left;
201 skip = copy;
202 from += copy;
203 bytes -= copy;
204 }
205 kunmap(page);
206done:
81055e58
AV
207 if (skip == iov->iov_len) {
208 iov++;
209 skip = 0;
210 }
4f18cd31
AV
211 i->count -= wanted - bytes;
212 i->nr_segs -= iov - i->iov;
213 i->iov = iov;
214 i->iov_offset = skip;
215 return wanted - bytes;
216}
4f18cd31 217
62a8067a 218static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
f0d1bec9
AV
219 struct iov_iter *i)
220{
221 size_t skip, copy, left, wanted;
222 const struct iovec *iov;
223 char __user *buf;
224 void *kaddr, *to;
225
226 if (unlikely(bytes > i->count))
227 bytes = i->count;
228
229 if (unlikely(!bytes))
230 return 0;
231
232 wanted = bytes;
233 iov = i->iov;
234 skip = i->iov_offset;
235 buf = iov->iov_base + skip;
236 copy = min(bytes, iov->iov_len - skip);
237
238 if (!fault_in_pages_readable(buf, copy)) {
239 kaddr = kmap_atomic(page);
240 to = kaddr + offset;
241
242 /* first chunk, usually the only one */
243 left = __copy_from_user_inatomic(to, buf, copy);
244 copy -= left;
245 skip += copy;
246 to += copy;
247 bytes -= copy;
248
249 while (unlikely(!left && bytes)) {
250 iov++;
251 buf = iov->iov_base;
252 copy = min(bytes, iov->iov_len);
253 left = __copy_from_user_inatomic(to, buf, copy);
254 copy -= left;
255 skip = copy;
256 to += copy;
257 bytes -= copy;
258 }
259 if (likely(!bytes)) {
260 kunmap_atomic(kaddr);
261 goto done;
262 }
263 offset = to - kaddr;
264 buf += copy;
265 kunmap_atomic(kaddr);
266 copy = min(bytes, iov->iov_len - skip);
267 }
268 /* Too bad - revert to non-atomic kmap */
269 kaddr = kmap(page);
270 to = kaddr + offset;
271 left = __copy_from_user(to, buf, copy);
272 copy -= left;
273 skip += copy;
274 to += copy;
275 bytes -= copy;
276 while (unlikely(!left && bytes)) {
277 iov++;
278 buf = iov->iov_base;
279 copy = min(bytes, iov->iov_len);
280 left = __copy_from_user(to, buf, copy);
281 copy -= left;
282 skip = copy;
283 to += copy;
284 bytes -= copy;
285 }
286 kunmap(page);
287done:
81055e58
AV
288 if (skip == iov->iov_len) {
289 iov++;
290 skip = 0;
291 }
f0d1bec9
AV
292 i->count -= wanted - bytes;
293 i->nr_segs -= iov - i->iov;
294 i->iov = iov;
295 i->iov_offset = skip;
296 return wanted - bytes;
297}
f0d1bec9 298
4f18cd31
AV
299/*
300 * Fault in the first iovec of the given iov_iter, to a maximum length
301 * of bytes. Returns 0 on success, or non-zero if the memory could not be
302 * accessed (ie. because it is an invalid address).
303 *
304 * writev-intensive code may want this to prefault several iovecs -- that
305 * would be possible (callers must not rely on the fact that _only_ the
306 * first iovec will be faulted with the current implementation).
307 */
308int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
309{
a280455f 310 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
62a8067a
AV
311 char __user *buf = i->iov->iov_base + i->iov_offset;
312 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
313 return fault_in_pages_readable(buf, bytes);
314 }
315 return 0;
4f18cd31
AV
316}
317EXPORT_SYMBOL(iov_iter_fault_in_readable);
318
71d8e532
AV
319void iov_iter_init(struct iov_iter *i, int direction,
320 const struct iovec *iov, unsigned long nr_segs,
321 size_t count)
322{
323 /* It will get better. Eventually... */
a280455f 324 if (segment_eq(get_fs(), KERNEL_DS)) {
62a8067a 325 direction |= ITER_KVEC;
a280455f
AV
326 i->type = direction;
327 i->kvec = (struct kvec *)iov;
328 } else {
329 i->type = direction;
330 i->iov = iov;
331 }
71d8e532
AV
332 i->nr_segs = nr_segs;
333 i->iov_offset = 0;
334 i->count = count;
335}
336EXPORT_SYMBOL(iov_iter_init);
7b2c99d1 337
62a8067a
AV
338static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
339{
340 char *from = kmap_atomic(page);
341 memcpy(to, from + offset, len);
342 kunmap_atomic(from);
343}
344
345static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
346{
347 char *to = kmap_atomic(page);
348 memcpy(to + offset, from, len);
349 kunmap_atomic(to);
350}
351
c35e0248
MW
352static void memzero_page(struct page *page, size_t offset, size_t len)
353{
354 char *addr = kmap_atomic(page);
355 memset(addr + offset, 0, len);
356 kunmap_atomic(addr);
357}
358
3d4d3e48 359size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
62a8067a 360{
3d4d3e48 361 char *from = addr;
62a8067a
AV
362 if (unlikely(bytes > i->count))
363 bytes = i->count;
364
365 if (unlikely(!bytes))
366 return 0;
367
3d4d3e48
AV
368 iterate_and_advance(i, bytes, v,
369 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
370 v.iov_len),
371 memcpy_to_page(v.bv_page, v.bv_offset,
a280455f
AV
372 (from += v.bv_len) - v.bv_len, v.bv_len),
373 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
3d4d3e48 374 )
62a8067a 375
3d4d3e48 376 return bytes;
c35e0248 377}
d271524a 378EXPORT_SYMBOL(copy_to_iter);
c35e0248 379
d271524a 380size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
c35e0248 381{
0dbca9a4
AV
382 char *to = addr;
383 if (unlikely(bytes > i->count))
384 bytes = i->count;
385
386 if (unlikely(!bytes))
387 return 0;
388
389 iterate_and_advance(i, bytes, v,
390 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
391 v.iov_len),
392 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
a280455f
AV
393 v.bv_offset, v.bv_len),
394 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
0dbca9a4
AV
395 )
396
397 return bytes;
c35e0248 398}
d271524a 399EXPORT_SYMBOL(copy_from_iter);
c35e0248 400
62a8067a
AV
401size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
402 struct iov_iter *i)
403{
d271524a
AV
404 if (i->type & (ITER_BVEC|ITER_KVEC)) {
405 void *kaddr = kmap_atomic(page);
406 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
407 kunmap_atomic(kaddr);
408 return wanted;
409 } else
62a8067a
AV
410 return copy_page_to_iter_iovec(page, offset, bytes, i);
411}
412EXPORT_SYMBOL(copy_page_to_iter);
413
414size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
415 struct iov_iter *i)
416{
a280455f 417 if (i->type & (ITER_BVEC|ITER_KVEC)) {
d271524a
AV
418 void *kaddr = kmap_atomic(page);
419 size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
420 kunmap_atomic(kaddr);
421 return wanted;
422 } else
62a8067a
AV
423 return copy_page_from_iter_iovec(page, offset, bytes, i);
424}
425EXPORT_SYMBOL(copy_page_from_iter);
426
c35e0248
MW
427size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
428{
8442fa46
AV
429 if (unlikely(bytes > i->count))
430 bytes = i->count;
431
432 if (unlikely(!bytes))
433 return 0;
434
435 iterate_and_advance(i, bytes, v,
436 __clear_user(v.iov_base, v.iov_len),
a280455f
AV
437 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
438 memset(v.iov_base, 0, v.iov_len)
8442fa46
AV
439 )
440
441 return bytes;
c35e0248
MW
442}
443EXPORT_SYMBOL(iov_iter_zero);
444
62a8067a
AV
445size_t iov_iter_copy_from_user_atomic(struct page *page,
446 struct iov_iter *i, unsigned long offset, size_t bytes)
447{
04a31165
AV
448 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
449 iterate_all_kinds(i, bytes, v,
450 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
451 v.iov_base, v.iov_len),
452 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
a280455f
AV
453 v.bv_offset, v.bv_len),
454 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
04a31165
AV
455 )
456 kunmap_atomic(kaddr);
457 return bytes;
62a8067a
AV
458}
459EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
460
461void iov_iter_advance(struct iov_iter *i, size_t size)
462{
a280455f 463 iterate_and_advance(i, size, v, 0, 0, 0)
62a8067a
AV
464}
465EXPORT_SYMBOL(iov_iter_advance);
466
467/*
468 * Return the count of just the current iov_iter segment.
469 */
470size_t iov_iter_single_seg_count(const struct iov_iter *i)
471{
472 if (i->nr_segs == 1)
473 return i->count;
474 else if (i->type & ITER_BVEC)
62a8067a 475 return min(i->count, i->bvec->bv_len - i->iov_offset);
ad0eab92
PM
476 else
477 return min(i->count, i->iov->iov_len - i->iov_offset);
62a8067a
AV
478}
479EXPORT_SYMBOL(iov_iter_single_seg_count);
480
481unsigned long iov_iter_alignment(const struct iov_iter *i)
482{
04a31165
AV
483 unsigned long res = 0;
484 size_t size = i->count;
485
486 if (!size)
487 return 0;
488
489 iterate_all_kinds(i, size, v,
490 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
a280455f
AV
491 res |= v.bv_offset | v.bv_len,
492 res |= (unsigned long)v.iov_base | v.iov_len
04a31165
AV
493 )
494 return res;
62a8067a
AV
495}
496EXPORT_SYMBOL(iov_iter_alignment);
497
498ssize_t iov_iter_get_pages(struct iov_iter *i,
2c80929c 499 struct page **pages, size_t maxsize, unsigned maxpages,
62a8067a
AV
500 size_t *start)
501{
e5393fae
AV
502 if (maxsize > i->count)
503 maxsize = i->count;
504
505 if (!maxsize)
506 return 0;
507
508 iterate_all_kinds(i, maxsize, v, ({
509 unsigned long addr = (unsigned long)v.iov_base;
510 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
511 int n;
512 int res;
513
514 if (len > maxpages * PAGE_SIZE)
515 len = maxpages * PAGE_SIZE;
516 addr &= ~(PAGE_SIZE - 1);
517 n = DIV_ROUND_UP(len, PAGE_SIZE);
518 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
519 if (unlikely(res < 0))
520 return res;
521 return (res == n ? len : res * PAGE_SIZE) - *start;
522 0;}),({
523 /* can't be more than PAGE_SIZE */
524 *start = v.bv_offset;
525 get_page(*pages = v.bv_page);
526 return v.bv_len;
a280455f
AV
527 }),({
528 return -EFAULT;
e5393fae
AV
529 })
530 )
531 return 0;
62a8067a
AV
532}
533EXPORT_SYMBOL(iov_iter_get_pages);
534
1b17f1f2
AV
535static struct page **get_pages_array(size_t n)
536{
537 struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
538 if (!p)
539 p = vmalloc(n * sizeof(struct page *));
540 return p;
541}
542
62a8067a
AV
543ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
544 struct page ***pages, size_t maxsize,
545 size_t *start)
546{
1b17f1f2
AV
547 struct page **p;
548
549 if (maxsize > i->count)
550 maxsize = i->count;
551
552 if (!maxsize)
553 return 0;
554
555 iterate_all_kinds(i, maxsize, v, ({
556 unsigned long addr = (unsigned long)v.iov_base;
557 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
558 int n;
559 int res;
560
561 addr &= ~(PAGE_SIZE - 1);
562 n = DIV_ROUND_UP(len, PAGE_SIZE);
563 p = get_pages_array(n);
564 if (!p)
565 return -ENOMEM;
566 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
567 if (unlikely(res < 0)) {
568 kvfree(p);
569 return res;
570 }
571 *pages = p;
572 return (res == n ? len : res * PAGE_SIZE) - *start;
573 0;}),({
574 /* can't be more than PAGE_SIZE */
575 *start = v.bv_offset;
576 *pages = p = get_pages_array(1);
577 if (!p)
578 return -ENOMEM;
579 get_page(*p = v.bv_page);
580 return v.bv_len;
a280455f
AV
581 }),({
582 return -EFAULT;
1b17f1f2
AV
583 })
584 )
585 return 0;
62a8067a
AV
586}
587EXPORT_SYMBOL(iov_iter_get_pages_alloc);
588
589int iov_iter_npages(const struct iov_iter *i, int maxpages)
590{
e0f2dc40
AV
591 size_t size = i->count;
592 int npages = 0;
593
594 if (!size)
595 return 0;
596
597 iterate_all_kinds(i, size, v, ({
598 unsigned long p = (unsigned long)v.iov_base;
599 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
600 - p / PAGE_SIZE;
601 if (npages >= maxpages)
602 return maxpages;
603 0;}),({
604 npages++;
605 if (npages >= maxpages)
606 return maxpages;
a280455f
AV
607 }),({
608 unsigned long p = (unsigned long)v.iov_base;
609 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
610 - p / PAGE_SIZE;
611 if (npages >= maxpages)
612 return maxpages;
e0f2dc40
AV
613 })
614 )
615 return npages;
62a8067a 616}
f67da30c 617EXPORT_SYMBOL(iov_iter_npages);