]>
Commit | Line | Data |
---|---|---|
1 | #include <linux/export.h> | |
2 | #include <linux/uio.h> | |
3 | #include <linux/pagemap.h> | |
4 | #include <linux/slab.h> | |
5 | #include <linux/vmalloc.h> | |
6 | ||
7 | #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ | |
8 | size_t left; \ | |
9 | size_t wanted = n; \ | |
10 | __p = i->iov; \ | |
11 | __v.iov_len = min(n, __p->iov_len - skip); \ | |
12 | if (likely(__v.iov_len)) { \ | |
13 | __v.iov_base = __p->iov_base + skip; \ | |
14 | left = (STEP); \ | |
15 | __v.iov_len -= left; \ | |
16 | skip += __v.iov_len; \ | |
17 | n -= __v.iov_len; \ | |
18 | } else { \ | |
19 | left = 0; \ | |
20 | } \ | |
21 | while (unlikely(!left && n)) { \ | |
22 | __p++; \ | |
23 | __v.iov_len = min(n, __p->iov_len); \ | |
24 | if (unlikely(!__v.iov_len)) \ | |
25 | continue; \ | |
26 | __v.iov_base = __p->iov_base; \ | |
27 | left = (STEP); \ | |
28 | __v.iov_len -= left; \ | |
29 | skip = __v.iov_len; \ | |
30 | n -= __v.iov_len; \ | |
31 | } \ | |
32 | n = wanted - n; \ | |
33 | } | |
34 | ||
35 | #define iterate_bvec(i, n, __v, __p, skip, STEP) { \ | |
36 | size_t wanted = n; \ | |
37 | __p = i->bvec; \ | |
38 | __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \ | |
39 | if (likely(__v.bv_len)) { \ | |
40 | __v.bv_page = __p->bv_page; \ | |
41 | __v.bv_offset = __p->bv_offset + skip; \ | |
42 | (void)(STEP); \ | |
43 | skip += __v.bv_len; \ | |
44 | n -= __v.bv_len; \ | |
45 | } \ | |
46 | while (unlikely(n)) { \ | |
47 | __p++; \ | |
48 | __v.bv_len = min_t(size_t, n, __p->bv_len); \ | |
49 | if (unlikely(!__v.bv_len)) \ | |
50 | continue; \ | |
51 | __v.bv_page = __p->bv_page; \ | |
52 | __v.bv_offset = __p->bv_offset; \ | |
53 | (void)(STEP); \ | |
54 | skip = __v.bv_len; \ | |
55 | n -= __v.bv_len; \ | |
56 | } \ | |
57 | n = wanted; \ | |
58 | } | |
59 | ||
60 | #define iterate_all_kinds(i, n, v, I, B) { \ | |
61 | size_t skip = i->iov_offset; \ | |
62 | if (unlikely(i->type & ITER_BVEC)) { \ | |
63 | const struct bio_vec *bvec; \ | |
64 | struct bio_vec v; \ | |
65 | iterate_bvec(i, n, v, bvec, skip, (B)) \ | |
66 | } else { \ | |
67 | const struct iovec *iov; \ | |
68 | struct iovec v; \ | |
69 | iterate_iovec(i, n, v, iov, skip, (I)) \ | |
70 | } \ | |
71 | } | |
72 | ||
73 | #define iterate_and_advance(i, n, v, I, B) { \ | |
74 | size_t skip = i->iov_offset; \ | |
75 | if (unlikely(i->type & ITER_BVEC)) { \ | |
76 | const struct bio_vec *bvec; \ | |
77 | struct bio_vec v; \ | |
78 | iterate_bvec(i, n, v, bvec, skip, (B)) \ | |
79 | if (skip == bvec->bv_len) { \ | |
80 | bvec++; \ | |
81 | skip = 0; \ | |
82 | } \ | |
83 | i->nr_segs -= bvec - i->bvec; \ | |
84 | i->bvec = bvec; \ | |
85 | } else { \ | |
86 | const struct iovec *iov; \ | |
87 | struct iovec v; \ | |
88 | iterate_iovec(i, n, v, iov, skip, (I)) \ | |
89 | if (skip == iov->iov_len) { \ | |
90 | iov++; \ | |
91 | skip = 0; \ | |
92 | } \ | |
93 | i->nr_segs -= iov - i->iov; \ | |
94 | i->iov = iov; \ | |
95 | } \ | |
96 | i->count -= n; \ | |
97 | i->iov_offset = skip; \ | |
98 | } | |
99 | ||
100 | static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, | |
101 | struct iov_iter *i) | |
102 | { | |
103 | size_t skip, copy, left, wanted; | |
104 | const struct iovec *iov; | |
105 | char __user *buf; | |
106 | void *kaddr, *from; | |
107 | ||
108 | if (unlikely(bytes > i->count)) | |
109 | bytes = i->count; | |
110 | ||
111 | if (unlikely(!bytes)) | |
112 | return 0; | |
113 | ||
114 | wanted = bytes; | |
115 | iov = i->iov; | |
116 | skip = i->iov_offset; | |
117 | buf = iov->iov_base + skip; | |
118 | copy = min(bytes, iov->iov_len - skip); | |
119 | ||
120 | if (!fault_in_pages_writeable(buf, copy)) { | |
121 | kaddr = kmap_atomic(page); | |
122 | from = kaddr + offset; | |
123 | ||
124 | /* first chunk, usually the only one */ | |
125 | left = __copy_to_user_inatomic(buf, from, copy); | |
126 | copy -= left; | |
127 | skip += copy; | |
128 | from += copy; | |
129 | bytes -= copy; | |
130 | ||
131 | while (unlikely(!left && bytes)) { | |
132 | iov++; | |
133 | buf = iov->iov_base; | |
134 | copy = min(bytes, iov->iov_len); | |
135 | left = __copy_to_user_inatomic(buf, from, copy); | |
136 | copy -= left; | |
137 | skip = copy; | |
138 | from += copy; | |
139 | bytes -= copy; | |
140 | } | |
141 | if (likely(!bytes)) { | |
142 | kunmap_atomic(kaddr); | |
143 | goto done; | |
144 | } | |
145 | offset = from - kaddr; | |
146 | buf += copy; | |
147 | kunmap_atomic(kaddr); | |
148 | copy = min(bytes, iov->iov_len - skip); | |
149 | } | |
150 | /* Too bad - revert to non-atomic kmap */ | |
151 | kaddr = kmap(page); | |
152 | from = kaddr + offset; | |
153 | left = __copy_to_user(buf, from, copy); | |
154 | copy -= left; | |
155 | skip += copy; | |
156 | from += copy; | |
157 | bytes -= copy; | |
158 | while (unlikely(!left && bytes)) { | |
159 | iov++; | |
160 | buf = iov->iov_base; | |
161 | copy = min(bytes, iov->iov_len); | |
162 | left = __copy_to_user(buf, from, copy); | |
163 | copy -= left; | |
164 | skip = copy; | |
165 | from += copy; | |
166 | bytes -= copy; | |
167 | } | |
168 | kunmap(page); | |
169 | done: | |
170 | if (skip == iov->iov_len) { | |
171 | iov++; | |
172 | skip = 0; | |
173 | } | |
174 | i->count -= wanted - bytes; | |
175 | i->nr_segs -= iov - i->iov; | |
176 | i->iov = iov; | |
177 | i->iov_offset = skip; | |
178 | return wanted - bytes; | |
179 | } | |
180 | ||
181 | static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, | |
182 | struct iov_iter *i) | |
183 | { | |
184 | size_t skip, copy, left, wanted; | |
185 | const struct iovec *iov; | |
186 | char __user *buf; | |
187 | void *kaddr, *to; | |
188 | ||
189 | if (unlikely(bytes > i->count)) | |
190 | bytes = i->count; | |
191 | ||
192 | if (unlikely(!bytes)) | |
193 | return 0; | |
194 | ||
195 | wanted = bytes; | |
196 | iov = i->iov; | |
197 | skip = i->iov_offset; | |
198 | buf = iov->iov_base + skip; | |
199 | copy = min(bytes, iov->iov_len - skip); | |
200 | ||
201 | if (!fault_in_pages_readable(buf, copy)) { | |
202 | kaddr = kmap_atomic(page); | |
203 | to = kaddr + offset; | |
204 | ||
205 | /* first chunk, usually the only one */ | |
206 | left = __copy_from_user_inatomic(to, buf, copy); | |
207 | copy -= left; | |
208 | skip += copy; | |
209 | to += copy; | |
210 | bytes -= copy; | |
211 | ||
212 | while (unlikely(!left && bytes)) { | |
213 | iov++; | |
214 | buf = iov->iov_base; | |
215 | copy = min(bytes, iov->iov_len); | |
216 | left = __copy_from_user_inatomic(to, buf, copy); | |
217 | copy -= left; | |
218 | skip = copy; | |
219 | to += copy; | |
220 | bytes -= copy; | |
221 | } | |
222 | if (likely(!bytes)) { | |
223 | kunmap_atomic(kaddr); | |
224 | goto done; | |
225 | } | |
226 | offset = to - kaddr; | |
227 | buf += copy; | |
228 | kunmap_atomic(kaddr); | |
229 | copy = min(bytes, iov->iov_len - skip); | |
230 | } | |
231 | /* Too bad - revert to non-atomic kmap */ | |
232 | kaddr = kmap(page); | |
233 | to = kaddr + offset; | |
234 | left = __copy_from_user(to, buf, copy); | |
235 | copy -= left; | |
236 | skip += copy; | |
237 | to += copy; | |
238 | bytes -= copy; | |
239 | while (unlikely(!left && bytes)) { | |
240 | iov++; | |
241 | buf = iov->iov_base; | |
242 | copy = min(bytes, iov->iov_len); | |
243 | left = __copy_from_user(to, buf, copy); | |
244 | copy -= left; | |
245 | skip = copy; | |
246 | to += copy; | |
247 | bytes -= copy; | |
248 | } | |
249 | kunmap(page); | |
250 | done: | |
251 | if (skip == iov->iov_len) { | |
252 | iov++; | |
253 | skip = 0; | |
254 | } | |
255 | i->count -= wanted - bytes; | |
256 | i->nr_segs -= iov - i->iov; | |
257 | i->iov = iov; | |
258 | i->iov_offset = skip; | |
259 | return wanted - bytes; | |
260 | } | |
261 | ||
262 | /* | |
263 | * Fault in the first iovec of the given iov_iter, to a maximum length | |
264 | * of bytes. Returns 0 on success, or non-zero if the memory could not be | |
265 | * accessed (ie. because it is an invalid address). | |
266 | * | |
267 | * writev-intensive code may want this to prefault several iovecs -- that | |
268 | * would be possible (callers must not rely on the fact that _only_ the | |
269 | * first iovec will be faulted with the current implementation). | |
270 | */ | |
271 | int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) | |
272 | { | |
273 | if (!(i->type & ITER_BVEC)) { | |
274 | char __user *buf = i->iov->iov_base + i->iov_offset; | |
275 | bytes = min(bytes, i->iov->iov_len - i->iov_offset); | |
276 | return fault_in_pages_readable(buf, bytes); | |
277 | } | |
278 | return 0; | |
279 | } | |
280 | EXPORT_SYMBOL(iov_iter_fault_in_readable); | |
281 | ||
282 | void iov_iter_init(struct iov_iter *i, int direction, | |
283 | const struct iovec *iov, unsigned long nr_segs, | |
284 | size_t count) | |
285 | { | |
286 | /* It will get better. Eventually... */ | |
287 | if (segment_eq(get_fs(), KERNEL_DS)) | |
288 | direction |= ITER_KVEC; | |
289 | i->type = direction; | |
290 | i->iov = iov; | |
291 | i->nr_segs = nr_segs; | |
292 | i->iov_offset = 0; | |
293 | i->count = count; | |
294 | } | |
295 | EXPORT_SYMBOL(iov_iter_init); | |
296 | ||
297 | static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) | |
298 | { | |
299 | char *from = kmap_atomic(page); | |
300 | memcpy(to, from + offset, len); | |
301 | kunmap_atomic(from); | |
302 | } | |
303 | ||
304 | static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len) | |
305 | { | |
306 | char *to = kmap_atomic(page); | |
307 | memcpy(to + offset, from, len); | |
308 | kunmap_atomic(to); | |
309 | } | |
310 | ||
311 | static void memzero_page(struct page *page, size_t offset, size_t len) | |
312 | { | |
313 | char *addr = kmap_atomic(page); | |
314 | memset(addr + offset, 0, len); | |
315 | kunmap_atomic(addr); | |
316 | } | |
317 | ||
318 | size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) | |
319 | { | |
320 | char *from = addr; | |
321 | if (unlikely(bytes > i->count)) | |
322 | bytes = i->count; | |
323 | ||
324 | if (unlikely(!bytes)) | |
325 | return 0; | |
326 | ||
327 | iterate_and_advance(i, bytes, v, | |
328 | __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, | |
329 | v.iov_len), | |
330 | memcpy_to_page(v.bv_page, v.bv_offset, | |
331 | (from += v.bv_len) - v.bv_len, v.bv_len) | |
332 | ) | |
333 | ||
334 | return bytes; | |
335 | } | |
336 | EXPORT_SYMBOL(copy_to_iter); | |
337 | ||
338 | size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) | |
339 | { | |
340 | char *to = addr; | |
341 | if (unlikely(bytes > i->count)) | |
342 | bytes = i->count; | |
343 | ||
344 | if (unlikely(!bytes)) | |
345 | return 0; | |
346 | ||
347 | iterate_and_advance(i, bytes, v, | |
348 | __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, | |
349 | v.iov_len), | |
350 | memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, | |
351 | v.bv_offset, v.bv_len) | |
352 | ) | |
353 | ||
354 | return bytes; | |
355 | } | |
356 | EXPORT_SYMBOL(copy_from_iter); | |
357 | ||
358 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | |
359 | struct iov_iter *i) | |
360 | { | |
361 | if (i->type & (ITER_BVEC|ITER_KVEC)) { | |
362 | void *kaddr = kmap_atomic(page); | |
363 | size_t wanted = copy_to_iter(kaddr + offset, bytes, i); | |
364 | kunmap_atomic(kaddr); | |
365 | return wanted; | |
366 | } else | |
367 | return copy_page_to_iter_iovec(page, offset, bytes, i); | |
368 | } | |
369 | EXPORT_SYMBOL(copy_page_to_iter); | |
370 | ||
371 | size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, | |
372 | struct iov_iter *i) | |
373 | { | |
374 | if (i->type & ITER_BVEC) { | |
375 | void *kaddr = kmap_atomic(page); | |
376 | size_t wanted = copy_from_iter(kaddr + offset, bytes, i); | |
377 | kunmap_atomic(kaddr); | |
378 | return wanted; | |
379 | } else | |
380 | return copy_page_from_iter_iovec(page, offset, bytes, i); | |
381 | } | |
382 | EXPORT_SYMBOL(copy_page_from_iter); | |
383 | ||
384 | size_t iov_iter_zero(size_t bytes, struct iov_iter *i) | |
385 | { | |
386 | if (unlikely(bytes > i->count)) | |
387 | bytes = i->count; | |
388 | ||
389 | if (unlikely(!bytes)) | |
390 | return 0; | |
391 | ||
392 | iterate_and_advance(i, bytes, v, | |
393 | __clear_user(v.iov_base, v.iov_len), | |
394 | memzero_page(v.bv_page, v.bv_offset, v.bv_len) | |
395 | ) | |
396 | ||
397 | return bytes; | |
398 | } | |
399 | EXPORT_SYMBOL(iov_iter_zero); | |
400 | ||
401 | size_t iov_iter_copy_from_user_atomic(struct page *page, | |
402 | struct iov_iter *i, unsigned long offset, size_t bytes) | |
403 | { | |
404 | char *kaddr = kmap_atomic(page), *p = kaddr + offset; | |
405 | iterate_all_kinds(i, bytes, v, | |
406 | __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, | |
407 | v.iov_base, v.iov_len), | |
408 | memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, | |
409 | v.bv_offset, v.bv_len) | |
410 | ) | |
411 | kunmap_atomic(kaddr); | |
412 | return bytes; | |
413 | } | |
414 | EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); | |
415 | ||
416 | void iov_iter_advance(struct iov_iter *i, size_t size) | |
417 | { | |
418 | iterate_and_advance(i, size, v, 0, 0) | |
419 | } | |
420 | EXPORT_SYMBOL(iov_iter_advance); | |
421 | ||
422 | /* | |
423 | * Return the count of just the current iov_iter segment. | |
424 | */ | |
425 | size_t iov_iter_single_seg_count(const struct iov_iter *i) | |
426 | { | |
427 | if (i->nr_segs == 1) | |
428 | return i->count; | |
429 | else if (i->type & ITER_BVEC) | |
430 | return min(i->count, i->bvec->bv_len - i->iov_offset); | |
431 | else | |
432 | return min(i->count, i->iov->iov_len - i->iov_offset); | |
433 | } | |
434 | EXPORT_SYMBOL(iov_iter_single_seg_count); | |
435 | ||
436 | unsigned long iov_iter_alignment(const struct iov_iter *i) | |
437 | { | |
438 | unsigned long res = 0; | |
439 | size_t size = i->count; | |
440 | ||
441 | if (!size) | |
442 | return 0; | |
443 | ||
444 | iterate_all_kinds(i, size, v, | |
445 | (res |= (unsigned long)v.iov_base | v.iov_len, 0), | |
446 | res |= v.bv_offset | v.bv_len | |
447 | ) | |
448 | return res; | |
449 | } | |
450 | EXPORT_SYMBOL(iov_iter_alignment); | |
451 | ||
452 | ssize_t iov_iter_get_pages(struct iov_iter *i, | |
453 | struct page **pages, size_t maxsize, unsigned maxpages, | |
454 | size_t *start) | |
455 | { | |
456 | if (maxsize > i->count) | |
457 | maxsize = i->count; | |
458 | ||
459 | if (!maxsize) | |
460 | return 0; | |
461 | ||
462 | iterate_all_kinds(i, maxsize, v, ({ | |
463 | unsigned long addr = (unsigned long)v.iov_base; | |
464 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | |
465 | int n; | |
466 | int res; | |
467 | ||
468 | if (len > maxpages * PAGE_SIZE) | |
469 | len = maxpages * PAGE_SIZE; | |
470 | addr &= ~(PAGE_SIZE - 1); | |
471 | n = DIV_ROUND_UP(len, PAGE_SIZE); | |
472 | res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); | |
473 | if (unlikely(res < 0)) | |
474 | return res; | |
475 | return (res == n ? len : res * PAGE_SIZE) - *start; | |
476 | 0;}),({ | |
477 | /* can't be more than PAGE_SIZE */ | |
478 | *start = v.bv_offset; | |
479 | get_page(*pages = v.bv_page); | |
480 | return v.bv_len; | |
481 | }) | |
482 | ) | |
483 | return 0; | |
484 | } | |
485 | EXPORT_SYMBOL(iov_iter_get_pages); | |
486 | ||
487 | static struct page **get_pages_array(size_t n) | |
488 | { | |
489 | struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); | |
490 | if (!p) | |
491 | p = vmalloc(n * sizeof(struct page *)); | |
492 | return p; | |
493 | } | |
494 | ||
495 | ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, | |
496 | struct page ***pages, size_t maxsize, | |
497 | size_t *start) | |
498 | { | |
499 | struct page **p; | |
500 | ||
501 | if (maxsize > i->count) | |
502 | maxsize = i->count; | |
503 | ||
504 | if (!maxsize) | |
505 | return 0; | |
506 | ||
507 | iterate_all_kinds(i, maxsize, v, ({ | |
508 | unsigned long addr = (unsigned long)v.iov_base; | |
509 | size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); | |
510 | int n; | |
511 | int res; | |
512 | ||
513 | addr &= ~(PAGE_SIZE - 1); | |
514 | n = DIV_ROUND_UP(len, PAGE_SIZE); | |
515 | p = get_pages_array(n); | |
516 | if (!p) | |
517 | return -ENOMEM; | |
518 | res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); | |
519 | if (unlikely(res < 0)) { | |
520 | kvfree(p); | |
521 | return res; | |
522 | } | |
523 | *pages = p; | |
524 | return (res == n ? len : res * PAGE_SIZE) - *start; | |
525 | 0;}),({ | |
526 | /* can't be more than PAGE_SIZE */ | |
527 | *start = v.bv_offset; | |
528 | *pages = p = get_pages_array(1); | |
529 | if (!p) | |
530 | return -ENOMEM; | |
531 | get_page(*p = v.bv_page); | |
532 | return v.bv_len; | |
533 | }) | |
534 | ) | |
535 | return 0; | |
536 | } | |
537 | EXPORT_SYMBOL(iov_iter_get_pages_alloc); | |
538 | ||
539 | int iov_iter_npages(const struct iov_iter *i, int maxpages) | |
540 | { | |
541 | size_t size = i->count; | |
542 | int npages = 0; | |
543 | ||
544 | if (!size) | |
545 | return 0; | |
546 | ||
547 | iterate_all_kinds(i, size, v, ({ | |
548 | unsigned long p = (unsigned long)v.iov_base; | |
549 | npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) | |
550 | - p / PAGE_SIZE; | |
551 | if (npages >= maxpages) | |
552 | return maxpages; | |
553 | 0;}),({ | |
554 | npages++; | |
555 | if (npages >= maxpages) | |
556 | return maxpages; | |
557 | }) | |
558 | ) | |
559 | return npages; | |
560 | } | |
561 | EXPORT_SYMBOL(iov_iter_npages); |