]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - lib/iov_iter.c
[iov_iter] new primitives - copy_from_iter_full() and friends
[mirror_ubuntu-artful-kernel.git] / lib / iov_iter.c
CommitLineData
4f18cd31
AV
1#include <linux/export.h>
2#include <linux/uio.h>
3#include <linux/pagemap.h>
91f79c43
AV
4#include <linux/slab.h>
5#include <linux/vmalloc.h>
241699cd 6#include <linux/splice.h>
a604ec7e 7#include <net/checksum.h>
4f18cd31 8
241699cd
AV
9#define PIPE_PARANOIA /* for now */
10
04a31165
AV
11#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
12 size_t left; \
13 size_t wanted = n; \
14 __p = i->iov; \
15 __v.iov_len = min(n, __p->iov_len - skip); \
16 if (likely(__v.iov_len)) { \
17 __v.iov_base = __p->iov_base + skip; \
18 left = (STEP); \
19 __v.iov_len -= left; \
20 skip += __v.iov_len; \
21 n -= __v.iov_len; \
22 } else { \
23 left = 0; \
24 } \
25 while (unlikely(!left && n)) { \
26 __p++; \
27 __v.iov_len = min(n, __p->iov_len); \
28 if (unlikely(!__v.iov_len)) \
29 continue; \
30 __v.iov_base = __p->iov_base; \
31 left = (STEP); \
32 __v.iov_len -= left; \
33 skip = __v.iov_len; \
34 n -= __v.iov_len; \
35 } \
36 n = wanted - n; \
37}
38
a280455f
AV
39#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
40 size_t wanted = n; \
41 __p = i->kvec; \
42 __v.iov_len = min(n, __p->iov_len - skip); \
43 if (likely(__v.iov_len)) { \
44 __v.iov_base = __p->iov_base + skip; \
45 (void)(STEP); \
46 skip += __v.iov_len; \
47 n -= __v.iov_len; \
48 } \
49 while (unlikely(n)) { \
50 __p++; \
51 __v.iov_len = min(n, __p->iov_len); \
52 if (unlikely(!__v.iov_len)) \
53 continue; \
54 __v.iov_base = __p->iov_base; \
55 (void)(STEP); \
56 skip = __v.iov_len; \
57 n -= __v.iov_len; \
58 } \
59 n = wanted; \
60}
61
1bdc76ae
ML
62#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
63 struct bvec_iter __start; \
64 __start.bi_size = n; \
65 __start.bi_bvec_done = skip; \
66 __start.bi_idx = 0; \
67 for_each_bvec(__v, i->bvec, __bi, __start) { \
68 if (!__v.bv_len) \
04a31165 69 continue; \
04a31165 70 (void)(STEP); \
04a31165 71 } \
04a31165
AV
72}
73
a280455f 74#define iterate_all_kinds(i, n, v, I, B, K) { \
04a31165
AV
75 size_t skip = i->iov_offset; \
76 if (unlikely(i->type & ITER_BVEC)) { \
04a31165 77 struct bio_vec v; \
1bdc76ae
ML
78 struct bvec_iter __bi; \
79 iterate_bvec(i, n, v, __bi, skip, (B)) \
a280455f
AV
80 } else if (unlikely(i->type & ITER_KVEC)) { \
81 const struct kvec *kvec; \
82 struct kvec v; \
83 iterate_kvec(i, n, v, kvec, skip, (K)) \
04a31165
AV
84 } else { \
85 const struct iovec *iov; \
86 struct iovec v; \
87 iterate_iovec(i, n, v, iov, skip, (I)) \
88 } \
89}
90
a280455f 91#define iterate_and_advance(i, n, v, I, B, K) { \
dd254f5a
AV
92 if (unlikely(i->count < n)) \
93 n = i->count; \
19f18459 94 if (i->count) { \
dd254f5a
AV
95 size_t skip = i->iov_offset; \
96 if (unlikely(i->type & ITER_BVEC)) { \
1bdc76ae 97 const struct bio_vec *bvec = i->bvec; \
dd254f5a 98 struct bio_vec v; \
1bdc76ae
ML
99 struct bvec_iter __bi; \
100 iterate_bvec(i, n, v, __bi, skip, (B)) \
101 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
102 i->nr_segs -= i->bvec - bvec; \
103 skip = __bi.bi_bvec_done; \
dd254f5a
AV
104 } else if (unlikely(i->type & ITER_KVEC)) { \
105 const struct kvec *kvec; \
106 struct kvec v; \
107 iterate_kvec(i, n, v, kvec, skip, (K)) \
108 if (skip == kvec->iov_len) { \
109 kvec++; \
110 skip = 0; \
111 } \
112 i->nr_segs -= kvec - i->kvec; \
113 i->kvec = kvec; \
114 } else { \
115 const struct iovec *iov; \
116 struct iovec v; \
117 iterate_iovec(i, n, v, iov, skip, (I)) \
118 if (skip == iov->iov_len) { \
119 iov++; \
120 skip = 0; \
121 } \
122 i->nr_segs -= iov - i->iov; \
123 i->iov = iov; \
7ce2a91e 124 } \
dd254f5a
AV
125 i->count -= n; \
126 i->iov_offset = skip; \
7ce2a91e 127 } \
7ce2a91e
AV
128}
129
62a8067a 130static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
4f18cd31
AV
131 struct iov_iter *i)
132{
133 size_t skip, copy, left, wanted;
134 const struct iovec *iov;
135 char __user *buf;
136 void *kaddr, *from;
137
138 if (unlikely(bytes > i->count))
139 bytes = i->count;
140
141 if (unlikely(!bytes))
142 return 0;
143
144 wanted = bytes;
145 iov = i->iov;
146 skip = i->iov_offset;
147 buf = iov->iov_base + skip;
148 copy = min(bytes, iov->iov_len - skip);
149
3fa6c507 150 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
4f18cd31
AV
151 kaddr = kmap_atomic(page);
152 from = kaddr + offset;
153
154 /* first chunk, usually the only one */
155 left = __copy_to_user_inatomic(buf, from, copy);
156 copy -= left;
157 skip += copy;
158 from += copy;
159 bytes -= copy;
160
161 while (unlikely(!left && bytes)) {
162 iov++;
163 buf = iov->iov_base;
164 copy = min(bytes, iov->iov_len);
165 left = __copy_to_user_inatomic(buf, from, copy);
166 copy -= left;
167 skip = copy;
168 from += copy;
169 bytes -= copy;
170 }
171 if (likely(!bytes)) {
172 kunmap_atomic(kaddr);
173 goto done;
174 }
175 offset = from - kaddr;
176 buf += copy;
177 kunmap_atomic(kaddr);
178 copy = min(bytes, iov->iov_len - skip);
179 }
180 /* Too bad - revert to non-atomic kmap */
3fa6c507 181
4f18cd31
AV
182 kaddr = kmap(page);
183 from = kaddr + offset;
184 left = __copy_to_user(buf, from, copy);
185 copy -= left;
186 skip += copy;
187 from += copy;
188 bytes -= copy;
189 while (unlikely(!left && bytes)) {
190 iov++;
191 buf = iov->iov_base;
192 copy = min(bytes, iov->iov_len);
193 left = __copy_to_user(buf, from, copy);
194 copy -= left;
195 skip = copy;
196 from += copy;
197 bytes -= copy;
198 }
199 kunmap(page);
3fa6c507 200
4f18cd31 201done:
81055e58
AV
202 if (skip == iov->iov_len) {
203 iov++;
204 skip = 0;
205 }
4f18cd31
AV
206 i->count -= wanted - bytes;
207 i->nr_segs -= iov - i->iov;
208 i->iov = iov;
209 i->iov_offset = skip;
210 return wanted - bytes;
211}
4f18cd31 212
62a8067a 213static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
f0d1bec9
AV
214 struct iov_iter *i)
215{
216 size_t skip, copy, left, wanted;
217 const struct iovec *iov;
218 char __user *buf;
219 void *kaddr, *to;
220
221 if (unlikely(bytes > i->count))
222 bytes = i->count;
223
224 if (unlikely(!bytes))
225 return 0;
226
227 wanted = bytes;
228 iov = i->iov;
229 skip = i->iov_offset;
230 buf = iov->iov_base + skip;
231 copy = min(bytes, iov->iov_len - skip);
232
3fa6c507 233 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
f0d1bec9
AV
234 kaddr = kmap_atomic(page);
235 to = kaddr + offset;
236
237 /* first chunk, usually the only one */
238 left = __copy_from_user_inatomic(to, buf, copy);
239 copy -= left;
240 skip += copy;
241 to += copy;
242 bytes -= copy;
243
244 while (unlikely(!left && bytes)) {
245 iov++;
246 buf = iov->iov_base;
247 copy = min(bytes, iov->iov_len);
248 left = __copy_from_user_inatomic(to, buf, copy);
249 copy -= left;
250 skip = copy;
251 to += copy;
252 bytes -= copy;
253 }
254 if (likely(!bytes)) {
255 kunmap_atomic(kaddr);
256 goto done;
257 }
258 offset = to - kaddr;
259 buf += copy;
260 kunmap_atomic(kaddr);
261 copy = min(bytes, iov->iov_len - skip);
262 }
263 /* Too bad - revert to non-atomic kmap */
3fa6c507 264
f0d1bec9
AV
265 kaddr = kmap(page);
266 to = kaddr + offset;
267 left = __copy_from_user(to, buf, copy);
268 copy -= left;
269 skip += copy;
270 to += copy;
271 bytes -= copy;
272 while (unlikely(!left && bytes)) {
273 iov++;
274 buf = iov->iov_base;
275 copy = min(bytes, iov->iov_len);
276 left = __copy_from_user(to, buf, copy);
277 copy -= left;
278 skip = copy;
279 to += copy;
280 bytes -= copy;
281 }
282 kunmap(page);
3fa6c507 283
f0d1bec9 284done:
81055e58
AV
285 if (skip == iov->iov_len) {
286 iov++;
287 skip = 0;
288 }
f0d1bec9
AV
289 i->count -= wanted - bytes;
290 i->nr_segs -= iov - i->iov;
291 i->iov = iov;
292 i->iov_offset = skip;
293 return wanted - bytes;
294}
f0d1bec9 295
241699cd
AV
296#ifdef PIPE_PARANOIA
297static bool sanity(const struct iov_iter *i)
298{
299 struct pipe_inode_info *pipe = i->pipe;
300 int idx = i->idx;
301 int next = pipe->curbuf + pipe->nrbufs;
302 if (i->iov_offset) {
303 struct pipe_buffer *p;
304 if (unlikely(!pipe->nrbufs))
305 goto Bad; // pipe must be non-empty
306 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
307 goto Bad; // must be at the last buffer...
308
309 p = &pipe->bufs[idx];
310 if (unlikely(p->offset + p->len != i->iov_offset))
311 goto Bad; // ... at the end of segment
312 } else {
313 if (idx != (next & (pipe->buffers - 1)))
314 goto Bad; // must be right after the last buffer
315 }
316 return true;
317Bad:
318 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
319 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
320 pipe->curbuf, pipe->nrbufs, pipe->buffers);
321 for (idx = 0; idx < pipe->buffers; idx++)
322 printk(KERN_ERR "[%p %p %d %d]\n",
323 pipe->bufs[idx].ops,
324 pipe->bufs[idx].page,
325 pipe->bufs[idx].offset,
326 pipe->bufs[idx].len);
327 WARN_ON(1);
328 return false;
329}
330#else
331#define sanity(i) true
332#endif
333
334static inline int next_idx(int idx, struct pipe_inode_info *pipe)
335{
336 return (idx + 1) & (pipe->buffers - 1);
337}
338
339static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
340 struct iov_iter *i)
341{
342 struct pipe_inode_info *pipe = i->pipe;
343 struct pipe_buffer *buf;
344 size_t off;
345 int idx;
346
347 if (unlikely(bytes > i->count))
348 bytes = i->count;
349
350 if (unlikely(!bytes))
351 return 0;
352
353 if (!sanity(i))
354 return 0;
355
356 off = i->iov_offset;
357 idx = i->idx;
358 buf = &pipe->bufs[idx];
359 if (off) {
360 if (offset == off && buf->page == page) {
361 /* merge with the last one */
362 buf->len += bytes;
363 i->iov_offset += bytes;
364 goto out;
365 }
366 idx = next_idx(idx, pipe);
367 buf = &pipe->bufs[idx];
368 }
369 if (idx == pipe->curbuf && pipe->nrbufs)
370 return 0;
371 pipe->nrbufs++;
372 buf->ops = &page_cache_pipe_buf_ops;
373 get_page(buf->page = page);
374 buf->offset = offset;
375 buf->len = bytes;
376 i->iov_offset = offset + bytes;
377 i->idx = idx;
378out:
379 i->count -= bytes;
380 return bytes;
381}
382
171a0203
AA
383/*
384 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
385 * bytes. For each iovec, fault in each page that constitutes the iovec.
386 *
387 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
388 * because it is an invalid address).
389 */
d4690f1e 390int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
171a0203
AA
391{
392 size_t skip = i->iov_offset;
393 const struct iovec *iov;
394 int err;
395 struct iovec v;
396
397 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
398 iterate_iovec(i, bytes, v, iov, skip, ({
4bce9f6e 399 err = fault_in_pages_readable(v.iov_base, v.iov_len);
171a0203
AA
400 if (unlikely(err))
401 return err;
402 0;}))
403 }
404 return 0;
405}
d4690f1e 406EXPORT_SYMBOL(iov_iter_fault_in_readable);
171a0203 407
71d8e532
AV
408void iov_iter_init(struct iov_iter *i, int direction,
409 const struct iovec *iov, unsigned long nr_segs,
410 size_t count)
411{
412 /* It will get better. Eventually... */
a280455f 413 if (segment_eq(get_fs(), KERNEL_DS)) {
62a8067a 414 direction |= ITER_KVEC;
a280455f
AV
415 i->type = direction;
416 i->kvec = (struct kvec *)iov;
417 } else {
418 i->type = direction;
419 i->iov = iov;
420 }
71d8e532
AV
421 i->nr_segs = nr_segs;
422 i->iov_offset = 0;
423 i->count = count;
424}
425EXPORT_SYMBOL(iov_iter_init);
7b2c99d1 426
62a8067a
AV
427static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
428{
429 char *from = kmap_atomic(page);
430 memcpy(to, from + offset, len);
431 kunmap_atomic(from);
432}
433
36f7a8a4 434static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
62a8067a
AV
435{
436 char *to = kmap_atomic(page);
437 memcpy(to + offset, from, len);
438 kunmap_atomic(to);
439}
440
c35e0248
MW
441static void memzero_page(struct page *page, size_t offset, size_t len)
442{
443 char *addr = kmap_atomic(page);
444 memset(addr + offset, 0, len);
445 kunmap_atomic(addr);
446}
447
241699cd
AV
448static inline bool allocated(struct pipe_buffer *buf)
449{
450 return buf->ops == &default_pipe_buf_ops;
451}
452
453static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
454{
455 size_t off = i->iov_offset;
456 int idx = i->idx;
457 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
458 idx = next_idx(idx, i->pipe);
459 off = 0;
460 }
461 *idxp = idx;
462 *offp = off;
463}
464
465static size_t push_pipe(struct iov_iter *i, size_t size,
466 int *idxp, size_t *offp)
467{
468 struct pipe_inode_info *pipe = i->pipe;
469 size_t off;
470 int idx;
471 ssize_t left;
472
473 if (unlikely(size > i->count))
474 size = i->count;
475 if (unlikely(!size))
476 return 0;
477
478 left = size;
479 data_start(i, &idx, &off);
480 *idxp = idx;
481 *offp = off;
482 if (off) {
483 left -= PAGE_SIZE - off;
484 if (left <= 0) {
485 pipe->bufs[idx].len += size;
486 return size;
487 }
488 pipe->bufs[idx].len = PAGE_SIZE;
489 idx = next_idx(idx, pipe);
490 }
491 while (idx != pipe->curbuf || !pipe->nrbufs) {
492 struct page *page = alloc_page(GFP_USER);
493 if (!page)
494 break;
495 pipe->nrbufs++;
496 pipe->bufs[idx].ops = &default_pipe_buf_ops;
497 pipe->bufs[idx].page = page;
498 pipe->bufs[idx].offset = 0;
499 if (left <= PAGE_SIZE) {
500 pipe->bufs[idx].len = left;
501 return size;
502 }
503 pipe->bufs[idx].len = PAGE_SIZE;
504 left -= PAGE_SIZE;
505 idx = next_idx(idx, pipe);
506 }
507 return size - left;
508}
509
510static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
511 struct iov_iter *i)
512{
513 struct pipe_inode_info *pipe = i->pipe;
514 size_t n, off;
515 int idx;
516
517 if (!sanity(i))
518 return 0;
519
520 bytes = n = push_pipe(i, bytes, &idx, &off);
521 if (unlikely(!n))
522 return 0;
523 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
524 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
525 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
526 i->idx = idx;
527 i->iov_offset = off + chunk;
528 n -= chunk;
529 addr += chunk;
530 }
531 i->count -= bytes;
532 return bytes;
533}
534
36f7a8a4 535size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
62a8067a 536{
36f7a8a4 537 const char *from = addr;
241699cd
AV
538 if (unlikely(i->type & ITER_PIPE))
539 return copy_pipe_to_iter(addr, bytes, i);
3d4d3e48
AV
540 iterate_and_advance(i, bytes, v,
541 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
542 v.iov_len),
543 memcpy_to_page(v.bv_page, v.bv_offset,
a280455f
AV
544 (from += v.bv_len) - v.bv_len, v.bv_len),
545 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
3d4d3e48 546 )
62a8067a 547
3d4d3e48 548 return bytes;
c35e0248 549}
d271524a 550EXPORT_SYMBOL(copy_to_iter);
c35e0248 551
d271524a 552size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
c35e0248 553{
0dbca9a4 554 char *to = addr;
241699cd
AV
555 if (unlikely(i->type & ITER_PIPE)) {
556 WARN_ON(1);
557 return 0;
558 }
0dbca9a4
AV
559 iterate_and_advance(i, bytes, v,
560 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
561 v.iov_len),
562 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
a280455f
AV
563 v.bv_offset, v.bv_len),
564 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
0dbca9a4
AV
565 )
566
567 return bytes;
c35e0248 568}
d271524a 569EXPORT_SYMBOL(copy_from_iter);
c35e0248 570
cbbd26b8
AV
571bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
572{
573 char *to = addr;
574 if (unlikely(i->type & ITER_PIPE)) {
575 WARN_ON(1);
576 return false;
577 }
578 if (unlikely(i->count < bytes)) \
579 return false;
580
581 iterate_all_kinds(i, bytes, v, ({
582 if (__copy_from_user((to += v.iov_len) - v.iov_len,
583 v.iov_base, v.iov_len))
584 return false;
585 0;}),
586 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
587 v.bv_offset, v.bv_len),
588 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
589 )
590
591 iov_iter_advance(i, bytes);
592 return true;
593}
594EXPORT_SYMBOL(copy_from_iter_full);
595
aa583096
AV
596size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
597{
598 char *to = addr;
241699cd
AV
599 if (unlikely(i->type & ITER_PIPE)) {
600 WARN_ON(1);
601 return 0;
602 }
aa583096
AV
603 iterate_and_advance(i, bytes, v,
604 __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
605 v.iov_base, v.iov_len),
606 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
607 v.bv_offset, v.bv_len),
608 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
609 )
610
611 return bytes;
612}
613EXPORT_SYMBOL(copy_from_iter_nocache);
614
cbbd26b8
AV
615bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
616{
617 char *to = addr;
618 if (unlikely(i->type & ITER_PIPE)) {
619 WARN_ON(1);
620 return false;
621 }
622 if (unlikely(i->count < bytes)) \
623 return false;
624 iterate_all_kinds(i, bytes, v, ({
625 if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
626 v.iov_base, v.iov_len))
627 return false;
628 0;}),
629 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
630 v.bv_offset, v.bv_len),
631 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
632 )
633
634 iov_iter_advance(i, bytes);
635 return true;
636}
637EXPORT_SYMBOL(copy_from_iter_full_nocache);
638
62a8067a
AV
639size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
640 struct iov_iter *i)
641{
d271524a
AV
642 if (i->type & (ITER_BVEC|ITER_KVEC)) {
643 void *kaddr = kmap_atomic(page);
644 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
645 kunmap_atomic(kaddr);
646 return wanted;
241699cd 647 } else if (likely(!(i->type & ITER_PIPE)))
62a8067a 648 return copy_page_to_iter_iovec(page, offset, bytes, i);
241699cd
AV
649 else
650 return copy_page_to_iter_pipe(page, offset, bytes, i);
62a8067a
AV
651}
652EXPORT_SYMBOL(copy_page_to_iter);
653
654size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
655 struct iov_iter *i)
656{
241699cd
AV
657 if (unlikely(i->type & ITER_PIPE)) {
658 WARN_ON(1);
659 return 0;
660 }
a280455f 661 if (i->type & (ITER_BVEC|ITER_KVEC)) {
d271524a
AV
662 void *kaddr = kmap_atomic(page);
663 size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
664 kunmap_atomic(kaddr);
665 return wanted;
666 } else
62a8067a
AV
667 return copy_page_from_iter_iovec(page, offset, bytes, i);
668}
669EXPORT_SYMBOL(copy_page_from_iter);
670
241699cd
AV
671static size_t pipe_zero(size_t bytes, struct iov_iter *i)
672{
673 struct pipe_inode_info *pipe = i->pipe;
674 size_t n, off;
675 int idx;
676
677 if (!sanity(i))
678 return 0;
679
680 bytes = n = push_pipe(i, bytes, &idx, &off);
681 if (unlikely(!n))
682 return 0;
683
684 for ( ; n; idx = next_idx(idx, pipe), off = 0) {
685 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
686 memzero_page(pipe->bufs[idx].page, off, chunk);
687 i->idx = idx;
688 i->iov_offset = off + chunk;
689 n -= chunk;
690 }
691 i->count -= bytes;
692 return bytes;
693}
694
c35e0248
MW
695size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
696{
241699cd
AV
697 if (unlikely(i->type & ITER_PIPE))
698 return pipe_zero(bytes, i);
8442fa46
AV
699 iterate_and_advance(i, bytes, v,
700 __clear_user(v.iov_base, v.iov_len),
a280455f
AV
701 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
702 memset(v.iov_base, 0, v.iov_len)
8442fa46
AV
703 )
704
705 return bytes;
c35e0248
MW
706}
707EXPORT_SYMBOL(iov_iter_zero);
708
62a8067a
AV
709size_t iov_iter_copy_from_user_atomic(struct page *page,
710 struct iov_iter *i, unsigned long offset, size_t bytes)
711{
04a31165 712 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
241699cd
AV
713 if (unlikely(i->type & ITER_PIPE)) {
714 kunmap_atomic(kaddr);
715 WARN_ON(1);
716 return 0;
717 }
04a31165
AV
718 iterate_all_kinds(i, bytes, v,
719 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
720 v.iov_base, v.iov_len),
721 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
a280455f
AV
722 v.bv_offset, v.bv_len),
723 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
04a31165
AV
724 )
725 kunmap_atomic(kaddr);
726 return bytes;
62a8067a
AV
727}
728EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
729
241699cd
AV
730static void pipe_advance(struct iov_iter *i, size_t size)
731{
732 struct pipe_inode_info *pipe = i->pipe;
733 struct pipe_buffer *buf;
734 int idx = i->idx;
680bb946 735 size_t off = i->iov_offset, orig_sz;
241699cd
AV
736
737 if (unlikely(i->count < size))
738 size = i->count;
680bb946 739 orig_sz = size;
241699cd
AV
740
741 if (size) {
742 if (off) /* make it relative to the beginning of buffer */
743 size += off - pipe->bufs[idx].offset;
744 while (1) {
745 buf = &pipe->bufs[idx];
746 if (size <= buf->len)
747 break;
748 size -= buf->len;
749 idx = next_idx(idx, pipe);
750 }
751 buf->len = size;
752 i->idx = idx;
753 off = i->iov_offset = buf->offset + size;
754 }
755 if (off)
756 idx = next_idx(idx, pipe);
757 if (pipe->nrbufs) {
758 int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
759 /* [curbuf,unused) is in use. Free [idx,unused) */
760 while (idx != unused) {
a779638c 761 pipe_buf_release(pipe, &pipe->bufs[idx]);
241699cd
AV
762 idx = next_idx(idx, pipe);
763 pipe->nrbufs--;
764 }
765 }
680bb946 766 i->count -= orig_sz;
241699cd
AV
767}
768
62a8067a
AV
769void iov_iter_advance(struct iov_iter *i, size_t size)
770{
241699cd
AV
771 if (unlikely(i->type & ITER_PIPE)) {
772 pipe_advance(i, size);
773 return;
774 }
a280455f 775 iterate_and_advance(i, size, v, 0, 0, 0)
62a8067a
AV
776}
777EXPORT_SYMBOL(iov_iter_advance);
778
779/*
780 * Return the count of just the current iov_iter segment.
781 */
782size_t iov_iter_single_seg_count(const struct iov_iter *i)
783{
241699cd
AV
784 if (unlikely(i->type & ITER_PIPE))
785 return i->count; // it is a silly place, anyway
62a8067a
AV
786 if (i->nr_segs == 1)
787 return i->count;
788 else if (i->type & ITER_BVEC)
62a8067a 789 return min(i->count, i->bvec->bv_len - i->iov_offset);
ad0eab92
PM
790 else
791 return min(i->count, i->iov->iov_len - i->iov_offset);
62a8067a
AV
792}
793EXPORT_SYMBOL(iov_iter_single_seg_count);
794
abb78f87 795void iov_iter_kvec(struct iov_iter *i, int direction,
05afcb77 796 const struct kvec *kvec, unsigned long nr_segs,
abb78f87
AV
797 size_t count)
798{
799 BUG_ON(!(direction & ITER_KVEC));
800 i->type = direction;
05afcb77 801 i->kvec = kvec;
abb78f87
AV
802 i->nr_segs = nr_segs;
803 i->iov_offset = 0;
804 i->count = count;
805}
806EXPORT_SYMBOL(iov_iter_kvec);
807
05afcb77
AV
808void iov_iter_bvec(struct iov_iter *i, int direction,
809 const struct bio_vec *bvec, unsigned long nr_segs,
810 size_t count)
811{
812 BUG_ON(!(direction & ITER_BVEC));
813 i->type = direction;
814 i->bvec = bvec;
815 i->nr_segs = nr_segs;
816 i->iov_offset = 0;
817 i->count = count;
818}
819EXPORT_SYMBOL(iov_iter_bvec);
820
241699cd
AV
821void iov_iter_pipe(struct iov_iter *i, int direction,
822 struct pipe_inode_info *pipe,
823 size_t count)
824{
825 BUG_ON(direction != ITER_PIPE);
826 i->type = direction;
827 i->pipe = pipe;
828 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
829 i->iov_offset = 0;
830 i->count = count;
831}
832EXPORT_SYMBOL(iov_iter_pipe);
833
62a8067a
AV
834unsigned long iov_iter_alignment(const struct iov_iter *i)
835{
04a31165
AV
836 unsigned long res = 0;
837 size_t size = i->count;
838
839 if (!size)
840 return 0;
841
241699cd
AV
842 if (unlikely(i->type & ITER_PIPE)) {
843 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
844 return size | i->iov_offset;
845 return size;
846 }
04a31165
AV
847 iterate_all_kinds(i, size, v,
848 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
a280455f
AV
849 res |= v.bv_offset | v.bv_len,
850 res |= (unsigned long)v.iov_base | v.iov_len
04a31165
AV
851 )
852 return res;
62a8067a
AV
853}
854EXPORT_SYMBOL(iov_iter_alignment);
855
357f435d
AV
856unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
857{
858 unsigned long res = 0;
859 size_t size = i->count;
860 if (!size)
861 return 0;
862
241699cd
AV
863 if (unlikely(i->type & ITER_PIPE)) {
864 WARN_ON(1);
865 return ~0U;
866 }
867
357f435d
AV
868 iterate_all_kinds(i, size, v,
869 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
870 (size != v.iov_len ? size : 0), 0),
871 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
872 (size != v.bv_len ? size : 0)),
873 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
874 (size != v.iov_len ? size : 0))
875 );
876 return res;
877}
878EXPORT_SYMBOL(iov_iter_gap_alignment);
879
241699cd
AV
880static inline size_t __pipe_get_pages(struct iov_iter *i,
881 size_t maxsize,
882 struct page **pages,
883 int idx,
884 size_t *start)
885{
886 struct pipe_inode_info *pipe = i->pipe;
1689c73a 887 ssize_t n = push_pipe(i, maxsize, &idx, start);
241699cd
AV
888 if (!n)
889 return -EFAULT;
890
891 maxsize = n;
892 n += *start;
1689c73a 893 while (n > 0) {
241699cd
AV
894 get_page(*pages++ = pipe->bufs[idx].page);
895 idx = next_idx(idx, pipe);
896 n -= PAGE_SIZE;
897 }
898
899 return maxsize;
900}
901
902static ssize_t pipe_get_pages(struct iov_iter *i,
903 struct page **pages, size_t maxsize, unsigned maxpages,
904 size_t *start)
905{
906 unsigned npages;
907 size_t capacity;
908 int idx;
909
910 if (!sanity(i))
911 return -EFAULT;
912
913 data_start(i, &idx, start);
914 /* some of this one + all after this one */
915 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
916 capacity = min(npages,maxpages) * PAGE_SIZE - *start;
917
918 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
919}
920
62a8067a 921ssize_t iov_iter_get_pages(struct iov_iter *i,
2c80929c 922 struct page **pages, size_t maxsize, unsigned maxpages,
62a8067a
AV
923 size_t *start)
924{
e5393fae
AV
925 if (maxsize > i->count)
926 maxsize = i->count;
927
928 if (!maxsize)
929 return 0;
930
241699cd
AV
931 if (unlikely(i->type & ITER_PIPE))
932 return pipe_get_pages(i, pages, maxsize, maxpages, start);
e5393fae
AV
933 iterate_all_kinds(i, maxsize, v, ({
934 unsigned long addr = (unsigned long)v.iov_base;
935 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
936 int n;
937 int res;
938
939 if (len > maxpages * PAGE_SIZE)
940 len = maxpages * PAGE_SIZE;
941 addr &= ~(PAGE_SIZE - 1);
942 n = DIV_ROUND_UP(len, PAGE_SIZE);
943 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
944 if (unlikely(res < 0))
945 return res;
946 return (res == n ? len : res * PAGE_SIZE) - *start;
947 0;}),({
948 /* can't be more than PAGE_SIZE */
949 *start = v.bv_offset;
950 get_page(*pages = v.bv_page);
951 return v.bv_len;
a280455f
AV
952 }),({
953 return -EFAULT;
e5393fae
AV
954 })
955 )
956 return 0;
62a8067a
AV
957}
958EXPORT_SYMBOL(iov_iter_get_pages);
959
1b17f1f2
AV
960static struct page **get_pages_array(size_t n)
961{
962 struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
963 if (!p)
964 p = vmalloc(n * sizeof(struct page *));
965 return p;
966}
967
241699cd
AV
968static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
969 struct page ***pages, size_t maxsize,
970 size_t *start)
971{
972 struct page **p;
973 size_t n;
974 int idx;
975 int npages;
976
977 if (!sanity(i))
978 return -EFAULT;
979
980 data_start(i, &idx, start);
981 /* some of this one + all after this one */
982 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
983 n = npages * PAGE_SIZE - *start;
984 if (maxsize > n)
985 maxsize = n;
986 else
987 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
988 p = get_pages_array(npages);
989 if (!p)
990 return -ENOMEM;
991 n = __pipe_get_pages(i, maxsize, p, idx, start);
992 if (n > 0)
993 *pages = p;
994 else
995 kvfree(p);
996 return n;
997}
998
62a8067a
AV
999ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1000 struct page ***pages, size_t maxsize,
1001 size_t *start)
1002{
1b17f1f2
AV
1003 struct page **p;
1004
1005 if (maxsize > i->count)
1006 maxsize = i->count;
1007
1008 if (!maxsize)
1009 return 0;
1010
241699cd
AV
1011 if (unlikely(i->type & ITER_PIPE))
1012 return pipe_get_pages_alloc(i, pages, maxsize, start);
1b17f1f2
AV
1013 iterate_all_kinds(i, maxsize, v, ({
1014 unsigned long addr = (unsigned long)v.iov_base;
1015 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1016 int n;
1017 int res;
1018
1019 addr &= ~(PAGE_SIZE - 1);
1020 n = DIV_ROUND_UP(len, PAGE_SIZE);
1021 p = get_pages_array(n);
1022 if (!p)
1023 return -ENOMEM;
1024 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
1025 if (unlikely(res < 0)) {
1026 kvfree(p);
1027 return res;
1028 }
1029 *pages = p;
1030 return (res == n ? len : res * PAGE_SIZE) - *start;
1031 0;}),({
1032 /* can't be more than PAGE_SIZE */
1033 *start = v.bv_offset;
1034 *pages = p = get_pages_array(1);
1035 if (!p)
1036 return -ENOMEM;
1037 get_page(*p = v.bv_page);
1038 return v.bv_len;
a280455f
AV
1039 }),({
1040 return -EFAULT;
1b17f1f2
AV
1041 })
1042 )
1043 return 0;
62a8067a
AV
1044}
1045EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1046
a604ec7e
AV
1047size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1048 struct iov_iter *i)
1049{
1050 char *to = addr;
1051 __wsum sum, next;
1052 size_t off = 0;
a604ec7e 1053 sum = *csum;
241699cd
AV
1054 if (unlikely(i->type & ITER_PIPE)) {
1055 WARN_ON(1);
1056 return 0;
1057 }
a604ec7e
AV
1058 iterate_and_advance(i, bytes, v, ({
1059 int err = 0;
cbbd26b8 1060 next = csum_and_copy_from_user(v.iov_base,
a604ec7e
AV
1061 (to += v.iov_len) - v.iov_len,
1062 v.iov_len, 0, &err);
1063 if (!err) {
1064 sum = csum_block_add(sum, next, off);
1065 off += v.iov_len;
1066 }
1067 err ? v.iov_len : 0;
1068 }), ({
1069 char *p = kmap_atomic(v.bv_page);
1070 next = csum_partial_copy_nocheck(p + v.bv_offset,
1071 (to += v.bv_len) - v.bv_len,
1072 v.bv_len, 0);
1073 kunmap_atomic(p);
1074 sum = csum_block_add(sum, next, off);
1075 off += v.bv_len;
1076 }),({
1077 next = csum_partial_copy_nocheck(v.iov_base,
1078 (to += v.iov_len) - v.iov_len,
1079 v.iov_len, 0);
1080 sum = csum_block_add(sum, next, off);
1081 off += v.iov_len;
1082 })
1083 )
1084 *csum = sum;
1085 return bytes;
1086}
1087EXPORT_SYMBOL(csum_and_copy_from_iter);
1088
cbbd26b8
AV
1089bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1090 struct iov_iter *i)
1091{
1092 char *to = addr;
1093 __wsum sum, next;
1094 size_t off = 0;
1095 sum = *csum;
1096 if (unlikely(i->type & ITER_PIPE)) {
1097 WARN_ON(1);
1098 return false;
1099 }
1100 if (unlikely(i->count < bytes))
1101 return false;
1102 iterate_all_kinds(i, bytes, v, ({
1103 int err = 0;
1104 next = csum_and_copy_from_user(v.iov_base,
1105 (to += v.iov_len) - v.iov_len,
1106 v.iov_len, 0, &err);
1107 if (err)
1108 return false;
1109 sum = csum_block_add(sum, next, off);
1110 off += v.iov_len;
1111 0;
1112 }), ({
1113 char *p = kmap_atomic(v.bv_page);
1114 next = csum_partial_copy_nocheck(p + v.bv_offset,
1115 (to += v.bv_len) - v.bv_len,
1116 v.bv_len, 0);
1117 kunmap_atomic(p);
1118 sum = csum_block_add(sum, next, off);
1119 off += v.bv_len;
1120 }),({
1121 next = csum_partial_copy_nocheck(v.iov_base,
1122 (to += v.iov_len) - v.iov_len,
1123 v.iov_len, 0);
1124 sum = csum_block_add(sum, next, off);
1125 off += v.iov_len;
1126 })
1127 )
1128 *csum = sum;
1129 iov_iter_advance(i, bytes);
1130 return true;
1131}
1132EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1133
36f7a8a4 1134size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
a604ec7e
AV
1135 struct iov_iter *i)
1136{
36f7a8a4 1137 const char *from = addr;
a604ec7e
AV
1138 __wsum sum, next;
1139 size_t off = 0;
a604ec7e 1140 sum = *csum;
241699cd
AV
1141 if (unlikely(i->type & ITER_PIPE)) {
1142 WARN_ON(1); /* for now */
1143 return 0;
1144 }
a604ec7e
AV
1145 iterate_and_advance(i, bytes, v, ({
1146 int err = 0;
1147 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
cbbd26b8 1148 v.iov_base,
a604ec7e
AV
1149 v.iov_len, 0, &err);
1150 if (!err) {
1151 sum = csum_block_add(sum, next, off);
1152 off += v.iov_len;
1153 }
1154 err ? v.iov_len : 0;
1155 }), ({
1156 char *p = kmap_atomic(v.bv_page);
1157 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
1158 p + v.bv_offset,
1159 v.bv_len, 0);
1160 kunmap_atomic(p);
1161 sum = csum_block_add(sum, next, off);
1162 off += v.bv_len;
1163 }),({
1164 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
1165 v.iov_base,
1166 v.iov_len, 0);
1167 sum = csum_block_add(sum, next, off);
1168 off += v.iov_len;
1169 })
1170 )
1171 *csum = sum;
1172 return bytes;
1173}
1174EXPORT_SYMBOL(csum_and_copy_to_iter);
1175
62a8067a
AV
1176int iov_iter_npages(const struct iov_iter *i, int maxpages)
1177{
e0f2dc40
AV
1178 size_t size = i->count;
1179 int npages = 0;
1180
1181 if (!size)
1182 return 0;
1183
241699cd
AV
1184 if (unlikely(i->type & ITER_PIPE)) {
1185 struct pipe_inode_info *pipe = i->pipe;
1186 size_t off;
1187 int idx;
1188
1189 if (!sanity(i))
1190 return 0;
1191
1192 data_start(i, &idx, &off);
1193 /* some of this one + all after this one */
1194 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
1195 if (npages >= maxpages)
1196 return maxpages;
1197 } else iterate_all_kinds(i, size, v, ({
e0f2dc40
AV
1198 unsigned long p = (unsigned long)v.iov_base;
1199 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1200 - p / PAGE_SIZE;
1201 if (npages >= maxpages)
1202 return maxpages;
1203 0;}),({
1204 npages++;
1205 if (npages >= maxpages)
1206 return maxpages;
a280455f
AV
1207 }),({
1208 unsigned long p = (unsigned long)v.iov_base;
1209 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1210 - p / PAGE_SIZE;
1211 if (npages >= maxpages)
1212 return maxpages;
e0f2dc40
AV
1213 })
1214 )
1215 return npages;
62a8067a 1216}
f67da30c 1217EXPORT_SYMBOL(iov_iter_npages);
4b8164b9
AV
1218
1219const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1220{
1221 *new = *old;
241699cd
AV
1222 if (unlikely(new->type & ITER_PIPE)) {
1223 WARN_ON(1);
1224 return NULL;
1225 }
4b8164b9
AV
1226 if (new->type & ITER_BVEC)
1227 return new->bvec = kmemdup(new->bvec,
1228 new->nr_segs * sizeof(struct bio_vec),
1229 flags);
1230 else
1231 /* iovec and kvec have identical layout */
1232 return new->iov = kmemdup(new->iov,
1233 new->nr_segs * sizeof(struct iovec),
1234 flags);
1235}
1236EXPORT_SYMBOL(dup_iter);
bc917be8 1237
ffecee4f
VN
1238/**
1239 * import_iovec() - Copy an array of &struct iovec from userspace
1240 * into the kernel, check that it is valid, and initialize a new
1241 * &struct iov_iter iterator to access it.
1242 *
1243 * @type: One of %READ or %WRITE.
1244 * @uvector: Pointer to the userspace array.
1245 * @nr_segs: Number of elements in userspace array.
1246 * @fast_segs: Number of elements in @iov.
1247 * @iov: (input and output parameter) Pointer to pointer to (usually small
1248 * on-stack) kernel array.
1249 * @i: Pointer to iterator that will be initialized on success.
1250 *
1251 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1252 * then this function places %NULL in *@iov on return. Otherwise, a new
1253 * array will be allocated and the result placed in *@iov. This means that
1254 * the caller may call kfree() on *@iov regardless of whether the small
1255 * on-stack array was used or not (and regardless of whether this function
1256 * returns an error or not).
1257 *
1258 * Return: 0 on success or negative error code on error.
1259 */
bc917be8
AV
1260int import_iovec(int type, const struct iovec __user * uvector,
1261 unsigned nr_segs, unsigned fast_segs,
1262 struct iovec **iov, struct iov_iter *i)
1263{
1264 ssize_t n;
1265 struct iovec *p;
1266 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1267 *iov, &p);
1268 if (n < 0) {
1269 if (p != *iov)
1270 kfree(p);
1271 *iov = NULL;
1272 return n;
1273 }
1274 iov_iter_init(i, type, p, nr_segs, n);
1275 *iov = p == *iov ? NULL : p;
1276 return 0;
1277}
1278EXPORT_SYMBOL(import_iovec);
1279
1280#ifdef CONFIG_COMPAT
1281#include <linux/compat.h>
1282
1283int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
1284 unsigned nr_segs, unsigned fast_segs,
1285 struct iovec **iov, struct iov_iter *i)
1286{
1287 ssize_t n;
1288 struct iovec *p;
1289 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
1290 *iov, &p);
1291 if (n < 0) {
1292 if (p != *iov)
1293 kfree(p);
1294 *iov = NULL;
1295 return n;
1296 }
1297 iov_iter_init(i, type, p, nr_segs, n);
1298 *iov = p == *iov ? NULL : p;
1299 return 0;
1300}
1301#endif
1302
1303int import_single_range(int rw, void __user *buf, size_t len,
1304 struct iovec *iov, struct iov_iter *i)
1305{
1306 if (len > MAX_RW_COUNT)
1307 len = MAX_RW_COUNT;
1308 if (unlikely(!access_ok(!rw, buf, len)))
1309 return -EFAULT;
1310
1311 iov->iov_base = buf;
1312 iov->iov_len = len;
1313 iov_iter_init(i, rw, iov, 1, len);
1314 return 0;
1315}
e1267585 1316EXPORT_SYMBOL(import_single_range);