]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - fs/nfs/read.c
NFS: Convert v2 into a module
[mirror_ubuntu-bionic-kernel.git] / fs / nfs / read.c
1 /*
2 * linux/fs/nfs/read.c
3 *
4 * Block I/O for NFS
5 *
6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
8 */
9
10 #include <linux/time.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/fcntl.h>
14 #include <linux/stat.h>
15 #include <linux/mm.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/sunrpc/clnt.h>
19 #include <linux/nfs_fs.h>
20 #include <linux/nfs_page.h>
21 #include <linux/module.h>
22
23 #include "nfs4_fs.h"
24 #include "internal.h"
25 #include "iostat.h"
26 #include "fscache.h"
27
28 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
29
30 static const struct nfs_pageio_ops nfs_pageio_read_ops;
31 static const struct rpc_call_ops nfs_read_common_ops;
32 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
33
34 static struct kmem_cache *nfs_rdata_cachep;
35
36 struct nfs_read_header *nfs_readhdr_alloc(void)
37 {
38 struct nfs_read_header *rhdr;
39
40 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
41 if (rhdr) {
42 struct nfs_pgio_header *hdr = &rhdr->header;
43
44 INIT_LIST_HEAD(&hdr->pages);
45 INIT_LIST_HEAD(&hdr->rpc_list);
46 spin_lock_init(&hdr->lock);
47 atomic_set(&hdr->refcnt, 0);
48 }
49 return rhdr;
50 }
51
52 static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
53 unsigned int pagecount)
54 {
55 struct nfs_read_data *data, *prealloc;
56
57 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
58 if (prealloc->header == NULL)
59 data = prealloc;
60 else
61 data = kzalloc(sizeof(*data), GFP_KERNEL);
62 if (!data)
63 goto out;
64
65 if (nfs_pgarray_set(&data->pages, pagecount)) {
66 data->header = hdr;
67 atomic_inc(&hdr->refcnt);
68 } else {
69 if (data != prealloc)
70 kfree(data);
71 data = NULL;
72 }
73 out:
74 return data;
75 }
76
77 void nfs_readhdr_free(struct nfs_pgio_header *hdr)
78 {
79 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
80
81 kmem_cache_free(nfs_rdata_cachep, rhdr);
82 }
83
84 void nfs_readdata_release(struct nfs_read_data *rdata)
85 {
86 struct nfs_pgio_header *hdr = rdata->header;
87 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
88
89 put_nfs_open_context(rdata->args.context);
90 if (rdata->pages.pagevec != rdata->pages.page_array)
91 kfree(rdata->pages.pagevec);
92 if (rdata != &read_header->rpc_data)
93 kfree(rdata);
94 else
95 rdata->header = NULL;
96 if (atomic_dec_and_test(&hdr->refcnt))
97 hdr->completion_ops->completion(hdr);
98 }
99
100 static
101 int nfs_return_empty_page(struct page *page)
102 {
103 zero_user(page, 0, PAGE_CACHE_SIZE);
104 SetPageUptodate(page);
105 unlock_page(page);
106 return 0;
107 }
108
109 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
110 struct inode *inode,
111 const struct nfs_pgio_completion_ops *compl_ops)
112 {
113 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
114 NFS_SERVER(inode)->rsize, 0);
115 }
116 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
117
118 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
119 {
120 pgio->pg_ops = &nfs_pageio_read_ops;
121 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
122 }
123 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
124
125 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
126 struct page *page)
127 {
128 struct nfs_page *new;
129 unsigned int len;
130 struct nfs_pageio_descriptor pgio;
131
132 len = nfs_page_length(page);
133 if (len == 0)
134 return nfs_return_empty_page(page);
135 new = nfs_create_request(ctx, inode, page, 0, len);
136 if (IS_ERR(new)) {
137 unlock_page(page);
138 return PTR_ERR(new);
139 }
140 if (len < PAGE_CACHE_SIZE)
141 zero_user_segment(page, len, PAGE_CACHE_SIZE);
142
143 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
144 nfs_pageio_add_request(&pgio, new);
145 nfs_pageio_complete(&pgio);
146 NFS_I(inode)->read_io += pgio.pg_bytes_written;
147 return 0;
148 }
149
150 static void nfs_readpage_release(struct nfs_page *req)
151 {
152 struct inode *d_inode = req->wb_context->dentry->d_inode;
153
154 if (PageUptodate(req->wb_page))
155 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
156
157 unlock_page(req->wb_page);
158
159 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
160 req->wb_context->dentry->d_inode->i_sb->s_id,
161 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
162 req->wb_bytes,
163 (long long)req_offset(req));
164 nfs_release_request(req);
165 }
166
167 /* Note io was page aligned */
168 static void nfs_read_completion(struct nfs_pgio_header *hdr)
169 {
170 unsigned long bytes = 0;
171
172 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
173 goto out;
174 while (!list_empty(&hdr->pages)) {
175 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
176 struct page *page = req->wb_page;
177
178 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
179 if (bytes > hdr->good_bytes)
180 zero_user(page, 0, PAGE_SIZE);
181 else if (hdr->good_bytes - bytes < PAGE_SIZE)
182 zero_user_segment(page,
183 hdr->good_bytes & ~PAGE_MASK,
184 PAGE_SIZE);
185 }
186 bytes += req->wb_bytes;
187 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
188 if (bytes <= hdr->good_bytes)
189 SetPageUptodate(page);
190 } else
191 SetPageUptodate(page);
192 nfs_list_remove_request(req);
193 nfs_readpage_release(req);
194 }
195 out:
196 hdr->release(hdr);
197 }
198
199 int nfs_initiate_read(struct rpc_clnt *clnt,
200 struct nfs_read_data *data,
201 const struct rpc_call_ops *call_ops, int flags)
202 {
203 struct inode *inode = data->header->inode;
204 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
205 struct rpc_task *task;
206 struct rpc_message msg = {
207 .rpc_argp = &data->args,
208 .rpc_resp = &data->res,
209 .rpc_cred = data->header->cred,
210 };
211 struct rpc_task_setup task_setup_data = {
212 .task = &data->task,
213 .rpc_client = clnt,
214 .rpc_message = &msg,
215 .callback_ops = call_ops,
216 .callback_data = data,
217 .workqueue = nfsiod_workqueue,
218 .flags = RPC_TASK_ASYNC | swap_flags | flags,
219 };
220
221 /* Set up the initial task struct. */
222 NFS_PROTO(inode)->read_setup(data, &msg);
223
224 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
225 "offset %llu)\n",
226 data->task.tk_pid,
227 inode->i_sb->s_id,
228 (long long)NFS_FILEID(inode),
229 data->args.count,
230 (unsigned long long)data->args.offset);
231
232 task = rpc_run_task(&task_setup_data);
233 if (IS_ERR(task))
234 return PTR_ERR(task);
235 rpc_put_task(task);
236 return 0;
237 }
238 EXPORT_SYMBOL_GPL(nfs_initiate_read);
239
240 /*
241 * Set up the NFS read request struct
242 */
243 static void nfs_read_rpcsetup(struct nfs_read_data *data,
244 unsigned int count, unsigned int offset)
245 {
246 struct nfs_page *req = data->header->req;
247
248 data->args.fh = NFS_FH(data->header->inode);
249 data->args.offset = req_offset(req) + offset;
250 data->args.pgbase = req->wb_pgbase + offset;
251 data->args.pages = data->pages.pagevec;
252 data->args.count = count;
253 data->args.context = get_nfs_open_context(req->wb_context);
254 data->args.lock_context = req->wb_lock_context;
255
256 data->res.fattr = &data->fattr;
257 data->res.count = count;
258 data->res.eof = 0;
259 nfs_fattr_init(&data->fattr);
260 }
261
262 static int nfs_do_read(struct nfs_read_data *data,
263 const struct rpc_call_ops *call_ops)
264 {
265 struct inode *inode = data->header->inode;
266
267 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
268 }
269
270 static int
271 nfs_do_multiple_reads(struct list_head *head,
272 const struct rpc_call_ops *call_ops)
273 {
274 struct nfs_read_data *data;
275 int ret = 0;
276
277 while (!list_empty(head)) {
278 int ret2;
279
280 data = list_first_entry(head, struct nfs_read_data, list);
281 list_del_init(&data->list);
282
283 ret2 = nfs_do_read(data, call_ops);
284 if (ret == 0)
285 ret = ret2;
286 }
287 return ret;
288 }
289
290 static void
291 nfs_async_read_error(struct list_head *head)
292 {
293 struct nfs_page *req;
294
295 while (!list_empty(head)) {
296 req = nfs_list_entry(head->next);
297 nfs_list_remove_request(req);
298 nfs_readpage_release(req);
299 }
300 }
301
302 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
303 .error_cleanup = nfs_async_read_error,
304 .completion = nfs_read_completion,
305 };
306
307 static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
308 struct nfs_pgio_header *hdr)
309 {
310 set_bit(NFS_IOHDR_REDO, &hdr->flags);
311 while (!list_empty(&hdr->rpc_list)) {
312 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
313 struct nfs_read_data, list);
314 list_del(&data->list);
315 nfs_readdata_release(data);
316 }
317 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
318 }
319
320 /*
321 * Generate multiple requests to fill a single page.
322 *
323 * We optimize to reduce the number of read operations on the wire. If we
324 * detect that we're reading a page, or an area of a page, that is past the
325 * end of file, we do not generate NFS read operations but just clear the
326 * parts of the page that would have come back zero from the server anyway.
327 *
328 * We rely on the cached value of i_size to make this determination; another
329 * client can fill pages on the server past our cached end-of-file, but we
330 * won't see the new data until our attribute cache is updated. This is more
331 * or less conventional NFS client behavior.
332 */
333 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
334 struct nfs_pgio_header *hdr)
335 {
336 struct nfs_page *req = hdr->req;
337 struct page *page = req->wb_page;
338 struct nfs_read_data *data;
339 size_t rsize = desc->pg_bsize, nbytes;
340 unsigned int offset;
341
342 offset = 0;
343 nbytes = desc->pg_count;
344 do {
345 size_t len = min(nbytes,rsize);
346
347 data = nfs_readdata_alloc(hdr, 1);
348 if (!data) {
349 nfs_pagein_error(desc, hdr);
350 return -ENOMEM;
351 }
352 data->pages.pagevec[0] = page;
353 nfs_read_rpcsetup(data, len, offset);
354 list_add(&data->list, &hdr->rpc_list);
355 nbytes -= len;
356 offset += len;
357 } while (nbytes != 0);
358
359 nfs_list_remove_request(req);
360 nfs_list_add_request(req, &hdr->pages);
361 desc->pg_rpc_callops = &nfs_read_common_ops;
362 return 0;
363 }
364
365 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
366 struct nfs_pgio_header *hdr)
367 {
368 struct nfs_page *req;
369 struct page **pages;
370 struct nfs_read_data *data;
371 struct list_head *head = &desc->pg_list;
372
373 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
374 desc->pg_count));
375 if (!data) {
376 nfs_pagein_error(desc, hdr);
377 return -ENOMEM;
378 }
379
380 pages = data->pages.pagevec;
381 while (!list_empty(head)) {
382 req = nfs_list_entry(head->next);
383 nfs_list_remove_request(req);
384 nfs_list_add_request(req, &hdr->pages);
385 *pages++ = req->wb_page;
386 }
387
388 nfs_read_rpcsetup(data, desc->pg_count, 0);
389 list_add(&data->list, &hdr->rpc_list);
390 desc->pg_rpc_callops = &nfs_read_common_ops;
391 return 0;
392 }
393
394 int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
395 struct nfs_pgio_header *hdr)
396 {
397 if (desc->pg_bsize < PAGE_CACHE_SIZE)
398 return nfs_pagein_multi(desc, hdr);
399 return nfs_pagein_one(desc, hdr);
400 }
401
402 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
403 {
404 struct nfs_read_header *rhdr;
405 struct nfs_pgio_header *hdr;
406 int ret;
407
408 rhdr = nfs_readhdr_alloc();
409 if (!rhdr) {
410 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
411 return -ENOMEM;
412 }
413 hdr = &rhdr->header;
414 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
415 atomic_inc(&hdr->refcnt);
416 ret = nfs_generic_pagein(desc, hdr);
417 if (ret == 0)
418 ret = nfs_do_multiple_reads(&hdr->rpc_list,
419 desc->pg_rpc_callops);
420 if (atomic_dec_and_test(&hdr->refcnt))
421 hdr->completion_ops->completion(hdr);
422 return ret;
423 }
424
425 static const struct nfs_pageio_ops nfs_pageio_read_ops = {
426 .pg_test = nfs_generic_pg_test,
427 .pg_doio = nfs_generic_pg_readpages,
428 };
429
430 /*
431 * This is the callback from RPC telling us whether a reply was
432 * received or some error occurred (timeout or socket shutdown).
433 */
434 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
435 {
436 struct inode *inode = data->header->inode;
437 int status;
438
439 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
440 task->tk_status);
441
442 status = NFS_PROTO(inode)->read_done(task, data);
443 if (status != 0)
444 return status;
445
446 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
447
448 if (task->tk_status == -ESTALE) {
449 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
450 nfs_mark_for_revalidate(inode);
451 }
452 return 0;
453 }
454
455 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
456 {
457 struct nfs_readargs *argp = &data->args;
458 struct nfs_readres *resp = &data->res;
459
460 /* This is a short read! */
461 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
462 /* Has the server at least made some progress? */
463 if (resp->count == 0) {
464 nfs_set_pgio_error(data->header, -EIO, argp->offset);
465 return;
466 }
467 /* Yes, so retry the read at the end of the data */
468 data->mds_offset += resp->count;
469 argp->offset += resp->count;
470 argp->pgbase += resp->count;
471 argp->count -= resp->count;
472 rpc_restart_call_prepare(task);
473 }
474
475 static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
476 {
477 struct nfs_read_data *data = calldata;
478 struct nfs_pgio_header *hdr = data->header;
479
480 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
481 if (nfs_readpage_result(task, data) != 0)
482 return;
483 if (task->tk_status < 0)
484 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
485 else if (data->res.eof) {
486 loff_t bound;
487
488 bound = data->args.offset + data->res.count;
489 spin_lock(&hdr->lock);
490 if (bound < hdr->io_start + hdr->good_bytes) {
491 set_bit(NFS_IOHDR_EOF, &hdr->flags);
492 clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
493 hdr->good_bytes = bound - hdr->io_start;
494 }
495 spin_unlock(&hdr->lock);
496 } else if (data->res.count != data->args.count)
497 nfs_readpage_retry(task, data);
498 }
499
500 static void nfs_readpage_release_common(void *calldata)
501 {
502 nfs_readdata_release(calldata);
503 }
504
505 void nfs_read_prepare(struct rpc_task *task, void *calldata)
506 {
507 struct nfs_read_data *data = calldata;
508 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
509 }
510
511 static const struct rpc_call_ops nfs_read_common_ops = {
512 .rpc_call_prepare = nfs_read_prepare,
513 .rpc_call_done = nfs_readpage_result_common,
514 .rpc_release = nfs_readpage_release_common,
515 };
516
517 /*
518 * Read a page over NFS.
519 * We read the page synchronously in the following case:
520 * - The error flag is set for this page. This happens only when a
521 * previous async read operation failed.
522 */
523 int nfs_readpage(struct file *file, struct page *page)
524 {
525 struct nfs_open_context *ctx;
526 struct inode *inode = page->mapping->host;
527 int error;
528
529 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
530 page, PAGE_CACHE_SIZE, page->index);
531 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
532 nfs_add_stats(inode, NFSIOS_READPAGES, 1);
533
534 /*
535 * Try to flush any pending writes to the file..
536 *
537 * NOTE! Because we own the page lock, there cannot
538 * be any new pending writes generated at this point
539 * for this page (other pages can be written to).
540 */
541 error = nfs_wb_page(inode, page);
542 if (error)
543 goto out_unlock;
544 if (PageUptodate(page))
545 goto out_unlock;
546
547 error = -ESTALE;
548 if (NFS_STALE(inode))
549 goto out_unlock;
550
551 if (file == NULL) {
552 error = -EBADF;
553 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
554 if (ctx == NULL)
555 goto out_unlock;
556 } else
557 ctx = get_nfs_open_context(nfs_file_open_context(file));
558
559 if (!IS_SYNC(inode)) {
560 error = nfs_readpage_from_fscache(ctx, inode, page);
561 if (error == 0)
562 goto out;
563 }
564
565 error = nfs_readpage_async(ctx, inode, page);
566
567 out:
568 put_nfs_open_context(ctx);
569 return error;
570 out_unlock:
571 unlock_page(page);
572 return error;
573 }
574
575 struct nfs_readdesc {
576 struct nfs_pageio_descriptor *pgio;
577 struct nfs_open_context *ctx;
578 };
579
580 static int
581 readpage_async_filler(void *data, struct page *page)
582 {
583 struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
584 struct inode *inode = page->mapping->host;
585 struct nfs_page *new;
586 unsigned int len;
587 int error;
588
589 len = nfs_page_length(page);
590 if (len == 0)
591 return nfs_return_empty_page(page);
592
593 new = nfs_create_request(desc->ctx, inode, page, 0, len);
594 if (IS_ERR(new))
595 goto out_error;
596
597 if (len < PAGE_CACHE_SIZE)
598 zero_user_segment(page, len, PAGE_CACHE_SIZE);
599 if (!nfs_pageio_add_request(desc->pgio, new)) {
600 error = desc->pgio->pg_error;
601 goto out_unlock;
602 }
603 return 0;
604 out_error:
605 error = PTR_ERR(new);
606 out_unlock:
607 unlock_page(page);
608 return error;
609 }
610
611 int nfs_readpages(struct file *filp, struct address_space *mapping,
612 struct list_head *pages, unsigned nr_pages)
613 {
614 struct nfs_pageio_descriptor pgio;
615 struct nfs_readdesc desc = {
616 .pgio = &pgio,
617 };
618 struct inode *inode = mapping->host;
619 unsigned long npages;
620 int ret = -ESTALE;
621
622 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
623 inode->i_sb->s_id,
624 (long long)NFS_FILEID(inode),
625 nr_pages);
626 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
627
628 if (NFS_STALE(inode))
629 goto out;
630
631 if (filp == NULL) {
632 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
633 if (desc.ctx == NULL)
634 return -EBADF;
635 } else
636 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
637
638 /* attempt to read as many of the pages as possible from the cache
639 * - this returns -ENOBUFS immediately if the cookie is negative
640 */
641 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
642 pages, &nr_pages);
643 if (ret == 0)
644 goto read_complete; /* all pages were read */
645
646 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
647
648 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
649
650 nfs_pageio_complete(&pgio);
651 NFS_I(inode)->read_io += pgio.pg_bytes_written;
652 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
653 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
654 read_complete:
655 put_nfs_open_context(desc.ctx);
656 out:
657 return ret;
658 }
659
660 int __init nfs_init_readpagecache(void)
661 {
662 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
663 sizeof(struct nfs_read_header),
664 0, SLAB_HWCACHE_ALIGN,
665 NULL);
666 if (nfs_rdata_cachep == NULL)
667 return -ENOMEM;
668
669 return 0;
670 }
671
672 void nfs_destroy_readpagecache(void)
673 {
674 kmem_cache_destroy(nfs_rdata_cachep);
675 }