]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - drivers/staging/lustre/lustre/llite/vvp_io.c
staging: lustre: remove lustre_lite.h
[mirror_ubuntu-zesty-kernel.git] / drivers / staging / lustre / lustre / llite / vvp_io.c
CommitLineData
d7e09d03
PT
1/*
2 * GPL HEADER START
3 *
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
6a5b99a4 18 * http://www.gnu.org/licenses/gpl-2.0.html
d7e09d03 19 *
d7e09d03
PT
20 * GPL HEADER END
21 */
22/*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
25 *
1dc563a6 26 * Copyright (c) 2011, 2015, Intel Corporation.
d7e09d03
PT
27 */
28/*
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
31 *
32 * Implementation of cl_io for VVP layer.
33 *
34 * Author: Nikita Danilov <nikita.danilov@sun.com>
35 * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
36 */
37
38#define DEBUG_SUBSYSTEM S_LLITE
39
67a235f5 40#include "../include/obd.h"
d7e09d03 41
0d345656 42#include "llite_internal.h"
d7e09d03
PT
43#include "vvp_internal.h"
44
e3c9078a
TH
45static struct vvp_io *cl2vvp_io(const struct lu_env *env,
46 const struct cl_io_slice *slice)
fee6eb50
JH
47{
48 struct vvp_io *vio;
49
50 vio = container_of(slice, struct vvp_io, vui_cl);
51 LASSERT(vio == vvp_env_io(env));
52
53 return vio;
54}
55
d7e09d03 56/**
74c0da19 57 * True, if \a io is a normal io, False for splice_{read,write}
d7e09d03 58 */
fee6eb50 59static int cl_is_normalio(const struct lu_env *env, const struct cl_io *io)
d7e09d03
PT
60{
61 struct vvp_io *vio = vvp_env_io(env);
62
63 LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
64
e0a8144b 65 return vio->vui_io_subtype == IO_NORMAL;
d7e09d03
PT
66}
67
68/**
69 * For swapping layout. The file's layout may have changed.
70 * To avoid populating pages to a wrong stripe, we have to verify the
71 * correctness of layout. It works because swapping layout processes
72 * have to acquire group lock.
73 */
74static bool can_populate_pages(const struct lu_env *env, struct cl_io *io,
e15ba45d 75 struct inode *inode)
d7e09d03
PT
76{
77 struct ll_inode_info *lli = ll_i2info(inode);
e0a8144b 78 struct vvp_io *vio = vvp_env_io(env);
d7e09d03
PT
79 bool rc = true;
80
81 switch (io->ci_type) {
82 case CIT_READ:
83 case CIT_WRITE:
84 /* don't need lock here to check lli_layout_gen as we have held
c0894c6c
OD
85 * extent lock and GROUP lock has to hold to swap layout
86 */
e0a8144b 87 if (ll_layout_version_get(lli) != vio->vui_layout_gen) {
d7e09d03
PT
88 io->ci_need_restart = 1;
89 /* this will return application a short read/write */
90 io->ci_continue = 0;
91 rc = false;
92 }
93 case CIT_FAULT:
94 /* fault is okay because we've already had a page. */
95 default:
96 break;
97 }
98
99 return rc;
100}
101
fee6eb50
JH
102static void vvp_object_size_lock(struct cl_object *obj)
103{
104 struct inode *inode = vvp_object_inode(obj);
105
106 ll_inode_size_lock(inode);
107 cl_object_attr_lock(obj);
108}
109
110static void vvp_object_size_unlock(struct cl_object *obj)
111{
112 struct inode *inode = vvp_object_inode(obj);
113
114 cl_object_attr_unlock(obj);
115 ll_inode_size_unlock(inode);
116}
117
118/**
119 * Helper function that if necessary adjusts file size (inode->i_size), when
120 * position at the offset \a pos is accessed. File size can be arbitrary stale
121 * on a Lustre client, but client at least knows KMS. If accessed area is
122 * inside [0, KMS], set file size to KMS, otherwise glimpse file size.
123 *
124 * Locking: cl_isize_lock is used to serialize changes to inode size and to
125 * protect consistency between inode size and cl_object
126 * attributes. cl_object_size_lock() protects consistency between cl_attr's of
127 * top-object and sub-objects.
128 */
129static int vvp_prep_size(const struct lu_env *env, struct cl_object *obj,
130 struct cl_io *io, loff_t start, size_t count,
131 int *exceed)
132{
9acc4500 133 struct cl_attr *attr = vvp_env_thread_attr(env);
fee6eb50
JH
134 struct inode *inode = vvp_object_inode(obj);
135 loff_t pos = start + count - 1;
136 loff_t kms;
137 int result;
138
139 /*
140 * Consistency guarantees: following possibilities exist for the
141 * relation between region being accessed and real file size at this
142 * moment:
143 *
144 * (A): the region is completely inside of the file;
145 *
146 * (B-x): x bytes of region are inside of the file, the rest is
147 * outside;
148 *
149 * (C): the region is completely outside of the file.
150 *
151 * This classification is stable under DLM lock already acquired by
152 * the caller, because to change the class, other client has to take
153 * DLM lock conflicting with our lock. Also, any updates to ->i_size
154 * by other threads on this client are serialized by
155 * ll_inode_size_lock(). This guarantees that short reads are handled
156 * correctly in the face of concurrent writes and truncates.
157 */
158 vvp_object_size_lock(obj);
159 result = cl_object_attr_get(env, obj, attr);
160 if (result == 0) {
161 kms = attr->cat_kms;
162 if (pos > kms) {
163 /*
164 * A glimpse is necessary to determine whether we
165 * return a short read (B) or some zeroes at the end
166 * of the buffer (C)
167 */
168 vvp_object_size_unlock(obj);
169 result = cl_glimpse_lock(env, io, inode, obj, 0);
170 if (result == 0 && exceed) {
171 /* If objective page index exceed end-of-file
172 * page index, return directly. Do not expect
173 * kernel will check such case correctly.
174 * linux-2.6.18-128.1.1 miss to do that.
175 * --bug 17336
176 */
177 loff_t size = i_size_read(inode);
5f479924
GKH
178 loff_t cur_index = start >> PAGE_SHIFT;
179 loff_t size_index = (size - 1) >> PAGE_SHIFT;
fee6eb50
JH
180
181 if ((size == 0 && cur_index != 0) ||
182 size_index < cur_index)
183 *exceed = 1;
184 }
185 return result;
186 }
187 /*
188 * region is within kms and, hence, within real file
189 * size (A). We need to increase i_size to cover the
190 * read region so that generic_file_read() will do its
191 * job, but that doesn't mean the kms size is
192 * _correct_, it is only the _minimum_ size. If
193 * someone does a stat they will get the correct size
194 * which will always be >= the kms value here.
195 * b=11081
196 */
197 if (i_size_read(inode) < kms) {
198 i_size_write(inode, kms);
199 CDEBUG(D_VFSTRACE, DFID " updating i_size %llu\n",
200 PFID(lu_object_fid(&obj->co_lu)),
201 (__u64)i_size_read(inode));
202 }
203 }
204
205 vvp_object_size_unlock(obj);
206
207 return result;
208}
209
d7e09d03
PT
210/*****************************************************************************
211 *
212 * io operations.
213 *
214 */
215
fee6eb50
JH
216static int vvp_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
217 __u32 enqflags, enum cl_lock_mode mode,
218 pgoff_t start, pgoff_t end)
219{
220 struct vvp_io *vio = vvp_env_io(env);
221 struct cl_lock_descr *descr = &vio->vui_link.cill_descr;
222 struct cl_object *obj = io->ci_obj;
223
224 CLOBINVRNT(env, obj, vvp_object_invariant(obj));
225
226 CDEBUG(D_VFSTRACE, "lock: %d [%lu, %lu]\n", mode, start, end);
227
228 memset(&vio->vui_link, 0, sizeof(vio->vui_link));
229
230 if (vio->vui_fd && (vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
231 descr->cld_mode = CLM_GROUP;
98eae5e7 232 descr->cld_gid = vio->vui_fd->fd_grouplock.lg_gid;
fee6eb50
JH
233 } else {
234 descr->cld_mode = mode;
235 }
236 descr->cld_obj = obj;
237 descr->cld_start = start;
238 descr->cld_end = end;
239 descr->cld_enq_flags = enqflags;
240
241 cl_io_lock_add(env, io, &vio->vui_link);
242 return 0;
243}
244
245static int vvp_io_one_lock(const struct lu_env *env, struct cl_io *io,
246 __u32 enqflags, enum cl_lock_mode mode,
247 loff_t start, loff_t end)
248{
249 struct cl_object *obj = io->ci_obj;
250
251 return vvp_io_one_lock_index(env, io, enqflags, mode,
252 cl_index(obj, start), cl_index(obj, end));
253}
254
77605e41
JX
255static int vvp_io_write_iter_init(const struct lu_env *env,
256 const struct cl_io_slice *ios)
257{
e0a8144b 258 struct vvp_io *vio = cl2vvp_io(env, ios);
77605e41 259
e0a8144b
JH
260 cl_page_list_init(&vio->u.write.vui_queue);
261 vio->u.write.vui_written = 0;
262 vio->u.write.vui_from = 0;
263 vio->u.write.vui_to = PAGE_SIZE;
77605e41
JX
264
265 return 0;
266}
267
268static void vvp_io_write_iter_fini(const struct lu_env *env,
269 const struct cl_io_slice *ios)
270{
e0a8144b 271 struct vvp_io *vio = cl2vvp_io(env, ios);
77605e41 272
e0a8144b 273 LASSERT(vio->u.write.vui_queue.pl_nr == 0);
77605e41
JX
274}
275
d7e09d03
PT
276static int vvp_io_fault_iter_init(const struct lu_env *env,
277 const struct cl_io_slice *ios)
278{
279 struct vvp_io *vio = cl2vvp_io(env, ios);
8c7b0e1a 280 struct inode *inode = vvp_object_inode(ios->cis_obj);
d7e09d03 281
e0a8144b 282 LASSERT(inode == file_inode(vio->vui_fd->fd_file));
46c360f9 283 vio->u.fault.ft_mtime = inode->i_mtime.tv_sec;
d7e09d03
PT
284 return 0;
285}
286
287static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
288{
289 struct cl_io *io = ios->cis_io;
290 struct cl_object *obj = io->ci_obj;
e0a8144b 291 struct vvp_io *vio = cl2vvp_io(env, ios);
1b1594da 292 struct inode *inode = vvp_object_inode(obj);
d7e09d03 293
8c7b0e1a 294 CLOBINVRNT(env, obj, vvp_object_invariant(obj));
d7e09d03 295
5ea17d6c
JL
296 CDEBUG(D_VFSTRACE, DFID
297 " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
298 PFID(lu_object_fid(&obj->co_lu)),
299 io->ci_ignore_layout, io->ci_verify_layout,
e0a8144b 300 vio->vui_layout_gen, io->ci_restore_needed);
5ea17d6c
JL
301
302 if (io->ci_restore_needed == 1) {
303 int rc;
304
305 /* file was detected release, we need to restore it
306 * before finishing the io
307 */
1b1594da 308 rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
5ea17d6c 309 /* if restore registration failed, no restart,
c0894c6c
OD
310 * we will return -ENODATA
311 */
5ea17d6c
JL
312 /* The layout will change after restore, so we need to
313 * block on layout lock hold by the MDT
314 * as MDT will not send new layout in lvb (see LU-3124)
315 * we have to explicitly fetch it, all this will be done
316 * by ll_layout_refresh()
317 */
318 if (rc == 0) {
319 io->ci_restore_needed = 0;
320 io->ci_need_restart = 1;
321 io->ci_verify_layout = 1;
322 } else {
323 io->ci_restore_needed = 1;
324 io->ci_need_restart = 0;
325 io->ci_verify_layout = 0;
326 io->ci_result = rc;
327 }
328 }
d7e09d03
PT
329
330 if (!io->ci_ignore_layout && io->ci_verify_layout) {
331 __u32 gen = 0;
332
333 /* check layout version */
1b1594da 334 ll_layout_refresh(inode, &gen);
e0a8144b 335 io->ci_need_restart = vio->vui_layout_gen != gen;
5ea17d6c
JL
336 if (io->ci_need_restart) {
337 CDEBUG(D_VFSTRACE,
338 DFID" layout changed from %d to %d.\n",
339 PFID(lu_object_fid(&obj->co_lu)),
e0a8144b 340 vio->vui_layout_gen, gen);
c0894c6c 341 /* today successful restore is the only possible case */
5ea17d6c 342 /* restore was done, clear restoring state */
8c7b0e1a 343 ll_i2info(vvp_object_inode(obj))->lli_flags &=
5ea17d6c
JL
344 ~LLIF_FILE_RESTORING;
345 }
d7e09d03
PT
346 }
347}
348
349static void vvp_io_fault_fini(const struct lu_env *env,
350 const struct cl_io_slice *ios)
351{
352 struct cl_io *io = ios->cis_io;
353 struct cl_page *page = io->u.ci_fault.ft_page;
354
8c7b0e1a 355 CLOBINVRNT(env, io->ci_obj, vvp_object_invariant(io->ci_obj));
d7e09d03 356
6e16818b 357 if (page) {
d7e09d03
PT
358 lu_ref_del(&page->cp_reference, "fault", io);
359 cl_page_put(env, page);
360 io->u.ci_fault.ft_page = NULL;
361 }
362 vvp_io_fini(env, ios);
363}
364
2d95f10e 365static enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma)
d7e09d03
PT
366{
367 /*
368 * we only want to hold PW locks if the mmap() can generate
369 * writes back to the file and that only happens in shared
370 * writable vmas
371 */
372 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
373 return CLM_WRITE;
374 return CLM_READ;
375}
376
377static int vvp_mmap_locks(const struct lu_env *env,
10cdef73 378 struct vvp_io *vio, struct cl_io *io)
d7e09d03 379{
9acc4500 380 struct vvp_thread_info *cti = vvp_env_info(env);
d7e09d03
PT
381 struct mm_struct *mm = current->mm;
382 struct vm_area_struct *vma;
9acc4500 383 struct cl_lock_descr *descr = &cti->vti_descr;
d7e09d03
PT
384 ldlm_policy_data_t policy;
385 unsigned long addr;
d7e09d03 386 ssize_t count;
06563b56 387 int result = 0;
b42b15fd
AV
388 struct iov_iter i;
389 struct iovec iov;
d7e09d03
PT
390
391 LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
392
393 if (!cl_is_normalio(env, io))
0a3bdb00 394 return 0;
d7e09d03 395
e0a8144b 396 if (!vio->vui_iter) /* nfs or loop back device write */
0a3bdb00 397 return 0;
d7e09d03
PT
398
399 /* No MM (e.g. NFS)? No vmas too. */
6e16818b 400 if (!mm)
0a3bdb00 401 return 0;
d7e09d03 402
e0a8144b 403 iov_for_each(iov, i, *vio->vui_iter) {
b42b15fd
AV
404 addr = (unsigned long)iov.iov_base;
405 count = iov.iov_len;
d7e09d03
PT
406 if (count == 0)
407 continue;
408
616387e8
OD
409 count += addr & (~PAGE_MASK);
410 addr &= PAGE_MASK;
d7e09d03
PT
411
412 down_read(&mm->mmap_sem);
a58a38ac 413 while ((vma = our_vma(mm, addr, count)) != NULL) {
2a8a3597 414 struct inode *inode = file_inode(vma->vm_file);
d7e09d03
PT
415 int flags = CEF_MUST;
416
417 if (ll_file_nolock(vma->vm_file)) {
418 /*
06563b56 419 * For no lock case is not allowed for mmap
d7e09d03 420 */
06563b56
JX
421 result = -EINVAL;
422 break;
d7e09d03
PT
423 }
424
425 /*
426 * XXX: Required lock mode can be weakened: CIT_WRITE
427 * io only ever reads user level buffer, and CIT_READ
428 * only writes on it.
429 */
430 policy_from_vma(&policy, vma, addr, count);
431 descr->cld_mode = vvp_mode_from_vma(vma);
432 descr->cld_obj = ll_i2info(inode)->lli_clob;
433 descr->cld_start = cl_index(descr->cld_obj,
434 policy.l_extent.start);
435 descr->cld_end = cl_index(descr->cld_obj,
436 policy.l_extent.end);
437 descr->cld_enq_flags = flags;
438 result = cl_io_lock_alloc_add(env, io, descr);
439
440 CDEBUG(D_VFSTRACE, "lock: %d: [%lu, %lu]\n",
441 descr->cld_mode, descr->cld_start,
442 descr->cld_end);
443
06563b56
JX
444 if (result < 0)
445 break;
d7e09d03
PT
446
447 if (vma->vm_end - addr >= count)
448 break;
449
450 count -= vma->vm_end - addr;
451 addr = vma->vm_end;
452 }
453 up_read(&mm->mmap_sem);
06563b56
JX
454 if (result < 0)
455 break;
d7e09d03 456 }
06563b56 457 return result;
d7e09d03
PT
458}
459
fee6eb50
JH
460static void vvp_io_advance(const struct lu_env *env,
461 const struct cl_io_slice *ios,
462 size_t nob)
463{
464 struct vvp_io *vio = cl2vvp_io(env, ios);
465 struct cl_io *io = ios->cis_io;
466 struct cl_object *obj = ios->cis_io->ci_obj;
467
468 CLOBINVRNT(env, obj, vvp_object_invariant(obj));
469
470 if (!cl_is_normalio(env, io))
471 return;
472
473 iov_iter_reexpand(vio->vui_iter, vio->vui_tot_count -= nob);
474}
475
476static void vvp_io_update_iov(const struct lu_env *env,
477 struct vvp_io *vio, struct cl_io *io)
478{
479 size_t size = io->u.ci_rw.crw_count;
480
481 if (!cl_is_normalio(env, io) || !vio->vui_iter)
482 return;
483
484 iov_iter_truncate(vio->vui_iter, size);
485}
486
d7e09d03
PT
487static int vvp_io_rw_lock(const struct lu_env *env, struct cl_io *io,
488 enum cl_lock_mode mode, loff_t start, loff_t end)
489{
e0a8144b 490 struct vvp_io *vio = vvp_env_io(env);
d7e09d03
PT
491 int result;
492 int ast_flags = 0;
493
494 LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
d7e09d03 495
e0a8144b 496 vvp_io_update_iov(env, vio, io);
d7e09d03
PT
497
498 if (io->u.ci_rw.crw_nonblock)
499 ast_flags |= CEF_NONBLOCK;
e0a8144b 500 result = vvp_mmap_locks(env, vio, io);
d7e09d03 501 if (result == 0)
10cdef73 502 result = vvp_io_one_lock(env, io, ast_flags, mode, start, end);
0a3bdb00 503 return result;
d7e09d03
PT
504}
505
506static int vvp_io_read_lock(const struct lu_env *env,
507 const struct cl_io_slice *ios)
508{
4c309612
JX
509 struct cl_io *io = ios->cis_io;
510 struct cl_io_rw_common *rd = &io->u.ci_rd.rd;
d7e09d03
PT
511 int result;
512
4c309612
JX
513 result = vvp_io_rw_lock(env, io, CLM_READ, rd->crw_pos,
514 rd->crw_pos + rd->crw_count - 1);
515
0a3bdb00 516 return result;
d7e09d03
PT
517}
518
519static int vvp_io_fault_lock(const struct lu_env *env,
520 const struct cl_io_slice *ios)
521{
522 struct cl_io *io = ios->cis_io;
523 struct vvp_io *vio = cl2vvp_io(env, ios);
524 /*
525 * XXX LDLM_FL_CBPENDING
526 */
10cdef73
JH
527 return vvp_io_one_lock_index(env,
528 io, 0,
529 vvp_mode_from_vma(vio->u.fault.ft_vma),
530 io->u.ci_fault.ft_index,
531 io->u.ci_fault.ft_index);
d7e09d03
PT
532}
533
534static int vvp_io_write_lock(const struct lu_env *env,
535 const struct cl_io_slice *ios)
536{
537 struct cl_io *io = ios->cis_io;
538 loff_t start;
539 loff_t end;
540
541 if (io->u.ci_wr.wr_append) {
542 start = 0;
543 end = OBD_OBJECT_EOF;
544 } else {
545 start = io->u.ci_wr.wr.crw_pos;
546 end = start + io->u.ci_wr.wr.crw_count - 1;
547 }
548 return vvp_io_rw_lock(env, io, CLM_WRITE, start, end);
549}
550
551static int vvp_io_setattr_iter_init(const struct lu_env *env,
552 const struct cl_io_slice *ios)
553{
554 return 0;
555}
556
557/**
e0a8144b 558 * Implementation of cl_io_operations::vio_lock() method for CIT_SETATTR io.
d7e09d03
PT
559 *
560 * Handles "lockless io" mode when extent locking is done by server.
561 */
562static int vvp_io_setattr_lock(const struct lu_env *env,
563 const struct cl_io_slice *ios)
564{
d7e09d03
PT
565 struct cl_io *io = ios->cis_io;
566 __u64 new_size;
567 __u32 enqflags = 0;
568
569 if (cl_io_is_trunc(io)) {
570 new_size = io->u.ci_setattr.sa_attr.lvb_size;
571 if (new_size == 0)
572 enqflags = CEF_DISCARD_DATA;
573 } else {
574 if ((io->u.ci_setattr.sa_attr.lvb_mtime >=
575 io->u.ci_setattr.sa_attr.lvb_ctime) ||
576 (io->u.ci_setattr.sa_attr.lvb_atime >=
577 io->u.ci_setattr.sa_attr.lvb_ctime))
578 return 0;
579 new_size = 0;
580 }
e0a8144b 581
10cdef73 582 return vvp_io_one_lock(env, io, enqflags, CLM_WRITE,
d7e09d03
PT
583 new_size, OBD_OBJECT_EOF);
584}
585
586static int vvp_do_vmtruncate(struct inode *inode, size_t size)
587{
588 int result;
589 /*
590 * Only ll_inode_size_lock is taken at this level.
591 */
592 ll_inode_size_lock(inode);
593 result = inode_newsize_ok(inode, size);
594 if (result < 0) {
595 ll_inode_size_unlock(inode);
596 return result;
597 }
598 truncate_setsize(inode, size);
599 ll_inode_size_unlock(inode);
600 return result;
601}
602
603static int vvp_io_setattr_trunc(const struct lu_env *env,
604 const struct cl_io_slice *ios,
605 struct inode *inode, loff_t size)
606{
607 inode_dio_wait(inode);
608 return 0;
609}
610
611static int vvp_io_setattr_time(const struct lu_env *env,
612 const struct cl_io_slice *ios)
613{
614 struct cl_io *io = ios->cis_io;
615 struct cl_object *obj = io->ci_obj;
9acc4500 616 struct cl_attr *attr = vvp_env_thread_attr(env);
d7e09d03
PT
617 int result;
618 unsigned valid = CAT_CTIME;
619
620 cl_object_attr_lock(obj);
621 attr->cat_ctime = io->u.ci_setattr.sa_attr.lvb_ctime;
622 if (io->u.ci_setattr.sa_valid & ATTR_ATIME_SET) {
623 attr->cat_atime = io->u.ci_setattr.sa_attr.lvb_atime;
624 valid |= CAT_ATIME;
625 }
626 if (io->u.ci_setattr.sa_valid & ATTR_MTIME_SET) {
627 attr->cat_mtime = io->u.ci_setattr.sa_attr.lvb_mtime;
628 valid |= CAT_MTIME;
629 }
630 result = cl_object_attr_set(env, obj, attr, valid);
631 cl_object_attr_unlock(obj);
632
633 return result;
634}
635
636static int vvp_io_setattr_start(const struct lu_env *env,
637 const struct cl_io_slice *ios)
638{
639 struct cl_io *io = ios->cis_io;
8c7b0e1a 640 struct inode *inode = vvp_object_inode(io->ci_obj);
5dd16419 641 int result = 0;
d7e09d03 642
5955102c 643 inode_lock(inode);
d7e09d03 644 if (cl_io_is_trunc(io))
5dd16419
JX
645 result = vvp_io_setattr_trunc(env, ios, inode,
646 io->u.ci_setattr.sa_attr.lvb_size);
647 if (result == 0)
648 result = vvp_io_setattr_time(env, ios);
649 return result;
d7e09d03
PT
650}
651
652static void vvp_io_setattr_end(const struct lu_env *env,
653 const struct cl_io_slice *ios)
654{
655 struct cl_io *io = ios->cis_io;
8c7b0e1a 656 struct inode *inode = vvp_object_inode(io->ci_obj);
d7e09d03 657
81e053c7 658 if (cl_io_is_trunc(io))
d7e09d03 659 /* Truncate in memory pages - they must be clean pages
c0894c6c
OD
660 * because osc has already notified to destroy osc_extents.
661 */
d7e09d03 662 vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
81e053c7 663
5955102c 664 inode_unlock(inode);
d7e09d03
PT
665}
666
667static void vvp_io_setattr_fini(const struct lu_env *env,
668 const struct cl_io_slice *ios)
669{
670 vvp_io_fini(env, ios);
671}
672
d7e09d03
PT
673static int vvp_io_read_start(const struct lu_env *env,
674 const struct cl_io_slice *ios)
675{
676 struct vvp_io *vio = cl2vvp_io(env, ios);
d7e09d03
PT
677 struct cl_io *io = ios->cis_io;
678 struct cl_object *obj = io->ci_obj;
8c7b0e1a 679 struct inode *inode = vvp_object_inode(obj);
e0a8144b 680 struct file *file = vio->vui_fd->fd_file;
d7e09d03
PT
681
682 int result;
683 loff_t pos = io->u.ci_rd.rd.crw_pos;
684 long cnt = io->u.ci_rd.rd.crw_count;
e0a8144b 685 long tot = vio->vui_tot_count;
d7e09d03
PT
686 int exceed = 0;
687
8c7b0e1a 688 CLOBINVRNT(env, obj, vvp_object_invariant(obj));
d7e09d03
PT
689
690 CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt);
691
692 if (!can_populate_pages(env, io, inode))
693 return 0;
694
fee6eb50 695 result = vvp_prep_size(env, obj, io, pos, tot, &exceed);
d7e09d03
PT
696 if (result != 0)
697 return result;
698 else if (exceed != 0)
699 goto out;
700
701 LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu,
e15ba45d
OD
702 "Read ino %lu, %lu bytes, offset %lld, size %llu\n",
703 inode->i_ino, cnt, pos, i_size_read(inode));
d7e09d03
PT
704
705 /* turn off the kernel's read-ahead */
e0a8144b 706 vio->vui_fd->fd_file->f_ra.ra_pages = 0;
d7e09d03
PT
707
708 /* initialize read-ahead window once per syscall */
e0a8144b
JH
709 if (!vio->vui_ra_valid) {
710 vio->vui_ra_valid = true;
711 vio->vui_ra_start = cl_index(obj, pos);
5f479924 712 vio->vui_ra_count = cl_index(obj, tot + PAGE_SIZE - 1);
bc4320a9 713 ll_ras_enter(file);
d7e09d03
PT
714 }
715
716 /* BUG: 5972 */
717 file_accessed(file);
e0a8144b 718 switch (vio->vui_io_subtype) {
d7e09d03 719 case IO_NORMAL:
e0a8144b
JH
720 LASSERT(vio->vui_iocb->ki_pos == pos);
721 result = generic_file_read_iter(vio->vui_iocb, vio->vui_iter);
74c0da19 722 break;
d7e09d03
PT
723 case IO_SPLICE:
724 result = generic_file_splice_read(file, &pos,
e0a8144b
JH
725 vio->u.splice.vui_pipe, cnt,
726 vio->u.splice.vui_flags);
d7e09d03
PT
727 /* LU-1109: do splice read stripe by stripe otherwise if it
728 * may make nfsd stuck if this read occupied all internal pipe
c0894c6c
OD
729 * buffers.
730 */
d7e09d03
PT
731 io->ci_continue = 0;
732 break;
733 default:
e0a8144b 734 CERROR("Wrong IO type %u\n", vio->vui_io_subtype);
d7e09d03
PT
735 LBUG();
736 }
737
738out:
739 if (result >= 0) {
740 if (result < cnt)
741 io->ci_continue = 0;
742 io->ci_nob += result;
743 ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
e0a8144b 744 vio->vui_fd, pos, result, READ);
d7e09d03
PT
745 result = 0;
746 }
747 return result;
748}
749
77605e41
JX
750static int vvp_io_commit_sync(const struct lu_env *env, struct cl_io *io,
751 struct cl_page_list *plist, int from, int to)
752{
753 struct cl_2queue *queue = &io->ci_queue;
754 struct cl_page *page;
755 unsigned int bytes = 0;
756 int rc = 0;
757
758 if (plist->pl_nr == 0)
759 return 0;
760
c11599b8 761 if (from > 0 || to != PAGE_SIZE) {
77605e41 762 page = cl_page_list_first(plist);
c11599b8
JX
763 if (plist->pl_nr == 1) {
764 cl_page_clip(env, page, from, to);
c11599b8 765 } else {
902a34ad
LD
766 if (from > 0)
767 cl_page_clip(env, page, from, PAGE_SIZE);
768 if (to != PAGE_SIZE) {
769 page = cl_page_list_last(plist);
770 cl_page_clip(env, page, 0, to);
771 }
772 }
c11599b8 773 }
77605e41
JX
774
775 cl_2queue_init(queue);
776 cl_page_list_splice(plist, &queue->c2_qin);
777 rc = cl_io_submit_sync(env, io, CRT_WRITE, queue, 0);
778
779 /* plist is not sorted any more */
780 cl_page_list_splice(&queue->c2_qin, plist);
781 cl_page_list_splice(&queue->c2_qout, plist);
782 cl_2queue_fini(env, queue);
783
784 if (rc == 0) {
785 /* calculate bytes */
786 bytes = plist->pl_nr << PAGE_SHIFT;
787 bytes -= from + PAGE_SIZE - to;
788
789 while (plist->pl_nr > 0) {
790 page = cl_page_list_first(plist);
791 cl_page_list_del(env, plist, page);
792
793 cl_page_clip(env, page, 0, PAGE_SIZE);
794
7addf402 795 SetPageUptodate(cl_page_vmpage(page));
77605e41
JX
796 cl_page_disown(env, io, page);
797
798 /* held in ll_cl_init() */
799 lu_ref_del(&page->cp_reference, "cl_io", io);
800 cl_page_put(env, page);
801 }
802 }
803
804 return bytes > 0 ? bytes : rc;
805}
806
807static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
808 struct cl_page *page)
809{
3a52f803 810 struct vvp_page *vpg;
7addf402
JX
811 struct page *vmpage = page->cp_vmpage;
812 struct cl_object *clob = cl_io_top(io)->ci_obj;
77605e41
JX
813
814 SetPageUptodate(vmpage);
815 set_page_dirty(vmpage);
7addf402 816
3a52f803
JH
817 vpg = cl2vvp_page(cl_object_page_slice(clob, page));
818 vvp_write_pending(cl2vvp(clob), vpg);
77605e41
JX
819
820 cl_page_disown(env, io, page);
821
822 /* held in ll_cl_init() */
058a6a42 823 lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
77605e41
JX
824 cl_page_put(env, page);
825}
826
827/* make sure the page list is contiguous */
7addf402
JX
828static bool page_list_sanity_check(struct cl_object *obj,
829 struct cl_page_list *plist)
77605e41
JX
830{
831 struct cl_page *page;
832 pgoff_t index = CL_PAGE_EOF;
833
834 cl_page_list_for_each(page, plist) {
3a52f803 835 struct vvp_page *vpg = cl_object_page_slice(obj, page);
7addf402 836
77605e41 837 if (index == CL_PAGE_EOF) {
3a52f803 838 index = vvp_index(vpg);
77605e41
JX
839 continue;
840 }
841
842 ++index;
3a52f803 843 if (index == vvp_index(vpg))
77605e41
JX
844 continue;
845
846 return false;
847 }
848 return true;
849}
850
851/* Return how many bytes have queued or written */
852int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io)
853{
854 struct cl_object *obj = io->ci_obj;
8c7b0e1a 855 struct inode *inode = vvp_object_inode(obj);
e0a8144b
JH
856 struct vvp_io *vio = vvp_env_io(env);
857 struct cl_page_list *queue = &vio->u.write.vui_queue;
77605e41
JX
858 struct cl_page *page;
859 int rc = 0;
860 int bytes = 0;
e0a8144b 861 unsigned int npages = vio->u.write.vui_queue.pl_nr;
77605e41
JX
862
863 if (npages == 0)
864 return 0;
865
866 CDEBUG(D_VFSTRACE, "commit async pages: %d, from %d, to %d\n",
e0a8144b 867 npages, vio->u.write.vui_from, vio->u.write.vui_to);
77605e41 868
7addf402 869 LASSERT(page_list_sanity_check(obj, queue));
77605e41
JX
870
871 /* submit IO with async write */
872 rc = cl_io_commit_async(env, io, queue,
e0a8144b 873 vio->u.write.vui_from, vio->u.write.vui_to,
77605e41
JX
874 write_commit_callback);
875 npages -= queue->pl_nr; /* already committed pages */
876 if (npages > 0) {
877 /* calculate how many bytes were written */
878 bytes = npages << PAGE_SHIFT;
879
880 /* first page */
e0a8144b 881 bytes -= vio->u.write.vui_from;
77605e41 882 if (queue->pl_nr == 0) /* last page */
e0a8144b 883 bytes -= PAGE_SIZE - vio->u.write.vui_to;
77605e41
JX
884 LASSERTF(bytes > 0, "bytes = %d, pages = %d\n", bytes, npages);
885
e0a8144b 886 vio->u.write.vui_written += bytes;
77605e41
JX
887
888 CDEBUG(D_VFSTRACE, "Committed %d pages %d bytes, tot: %ld\n",
e0a8144b 889 npages, bytes, vio->u.write.vui_written);
77605e41
JX
890
891 /* the first page must have been written. */
e0a8144b 892 vio->u.write.vui_from = 0;
77605e41 893 }
7addf402 894 LASSERT(page_list_sanity_check(obj, queue));
77605e41
JX
895 LASSERT(ergo(rc == 0, queue->pl_nr == 0));
896
897 /* out of quota, try sync write */
898 if (rc == -EDQUOT && !cl_io_is_mkwrite(io)) {
899 rc = vvp_io_commit_sync(env, io, queue,
e0a8144b
JH
900 vio->u.write.vui_from,
901 vio->u.write.vui_to);
77605e41 902 if (rc > 0) {
e0a8144b 903 vio->u.write.vui_written += rc;
77605e41
JX
904 rc = 0;
905 }
906 }
907
908 /* update inode size */
d2995737 909 ll_merge_attr(env, inode);
77605e41
JX
910
911 /* Now the pages in queue were failed to commit, discard them
912 * unless they were dirtied before.
913 */
914 while (queue->pl_nr > 0) {
915 page = cl_page_list_first(queue);
916 cl_page_list_del(env, queue, page);
917
7addf402 918 if (!PageDirty(cl_page_vmpage(page)))
77605e41
JX
919 cl_page_discard(env, io, page);
920
921 cl_page_disown(env, io, page);
922
923 /* held in ll_cl_init() */
924 lu_ref_del(&page->cp_reference, "cl_io", io);
925 cl_page_put(env, page);
926 }
927 cl_page_list_fini(env, queue);
928
929 return rc;
930}
931
d7e09d03
PT
932static int vvp_io_write_start(const struct lu_env *env,
933 const struct cl_io_slice *ios)
934{
e0a8144b 935 struct vvp_io *vio = cl2vvp_io(env, ios);
d7e09d03
PT
936 struct cl_io *io = ios->cis_io;
937 struct cl_object *obj = io->ci_obj;
8c7b0e1a 938 struct inode *inode = vvp_object_inode(obj);
d7e09d03
PT
939 ssize_t result = 0;
940 loff_t pos = io->u.ci_wr.wr.crw_pos;
941 size_t cnt = io->u.ci_wr.wr.crw_count;
942
d7e09d03
PT
943 if (!can_populate_pages(env, io, inode))
944 return 0;
945
946 if (cl_io_is_append(io)) {
947 /*
948 * PARALLEL IO This has to be changed for parallel IO doing
949 * out-of-order writes.
950 */
06563b56 951 ll_merge_attr(env, inode);
7551b8b5
NC
952 pos = i_size_read(inode);
953 io->u.ci_wr.wr.crw_pos = pos;
e0a8144b 954 vio->vui_iocb->ki_pos = pos;
74c0da19 955 } else {
e0a8144b 956 LASSERT(vio->vui_iocb->ki_pos == pos);
d7e09d03
PT
957 }
958
959 CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
960
5d770fe8
PS
961 if (!vio->vui_iter) {
962 /* from a temp io in ll_cl_init(). */
d7e09d03 963 result = 0;
5d770fe8
PS
964 } else {
965 /*
966 * When using the locked AIO function (generic_file_aio_write())
967 * testing has shown the inode mutex to be a limiting factor
968 * with multi-threaded single shared file performance. To get
969 * around this, we now use the lockless version. To maintain
970 * consistency, proper locking to protect against writes,
971 * trucates, etc. is handled in the higher layers of lustre.
972 */
973 bool lock_node = !IS_NOSEC(inode);
974
975 if (lock_node)
976 inode_lock(inode);
977 result = __generic_file_write_iter(vio->vui_iocb,
978 vio->vui_iter);
979 if (lock_node)
980 inode_unlock(inode);
981
982 if (result > 0 || result == -EIOCBQUEUED)
983 result = generic_write_sync(vio->vui_iocb, result);
984 }
b42b15fd 985
d7e09d03 986 if (result > 0) {
77605e41 987 result = vvp_io_write_commit(env, io);
e0a8144b
JH
988 if (vio->u.write.vui_written > 0) {
989 result = vio->u.write.vui_written;
77605e41
JX
990 io->ci_nob += result;
991
992 CDEBUG(D_VFSTRACE, "write: nob %zd, result: %zd\n",
993 io->ci_nob, result);
994 }
995 }
996 if (result > 0) {
997 struct ll_inode_info *lli = ll_i2info(inode);
998
999 spin_lock(&lli->lli_lock);
1000 lli->lli_flags |= LLIF_DATA_MODIFIED;
1001 spin_unlock(&lli->lli_lock);
1002
d7e09d03
PT
1003 if (result < cnt)
1004 io->ci_continue = 0;
d7e09d03 1005 ll_rw_stats_tally(ll_i2sbi(inode), current->pid,
e0a8144b 1006 vio->vui_fd, pos, result, WRITE);
d7e09d03
PT
1007 result = 0;
1008 }
0a3bdb00 1009 return result;
d7e09d03
PT
1010}
1011
1012static int vvp_io_kernel_fault(struct vvp_fault_io *cfio)
1013{
10cdef73 1014 struct vm_fault *vmf = cfio->ft_vmf;
d7e09d03 1015
10cdef73
JH
1016 cfio->ft_flags = filemap_fault(cfio->ft_vma, vmf);
1017 cfio->ft_flags_valid = 1;
d7e09d03
PT
1018
1019 if (vmf->page) {
aa3bee0d
GKH
1020 CDEBUG(D_PAGE,
1021 "page %p map %p index %lu flags %lx count %u priv %0lx: got addr %p type NOPAGE\n",
1022 vmf->page, vmf->page->mapping, vmf->page->index,
1023 (long)vmf->page->flags, page_count(vmf->page),
1024 page_private(vmf->page), vmf->virtual_address);
10cdef73 1025 if (unlikely(!(cfio->ft_flags & VM_FAULT_LOCKED))) {
d7e09d03 1026 lock_page(vmf->page);
10cdef73 1027 cfio->ft_flags |= VM_FAULT_LOCKED;
d7e09d03
PT
1028 }
1029
1030 cfio->ft_vmpage = vmf->page;
1031 return 0;
1032 }
1033
10cdef73 1034 if (cfio->ft_flags & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
d7e09d03
PT
1035 CDEBUG(D_PAGE, "got addr %p - SIGBUS\n", vmf->virtual_address);
1036 return -EFAULT;
1037 }
1038
10cdef73 1039 if (cfio->ft_flags & VM_FAULT_OOM) {
d7e09d03
PT
1040 CDEBUG(D_PAGE, "got addr %p - OOM\n", vmf->virtual_address);
1041 return -ENOMEM;
1042 }
1043
10cdef73 1044 if (cfio->ft_flags & VM_FAULT_RETRY)
d7e09d03
PT
1045 return -EAGAIN;
1046
10cdef73 1047 CERROR("Unknown error in page fault %d!\n", cfio->ft_flags);
d7e09d03
PT
1048 return -EINVAL;
1049}
1050
77605e41
JX
1051static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
1052 struct cl_page *page)
1053{
3a52f803 1054 struct vvp_page *vpg;
7addf402 1055 struct cl_object *clob = cl_io_top(io)->ci_obj;
77605e41 1056
7addf402 1057 set_page_dirty(page->cp_vmpage);
77605e41 1058
3a52f803
JH
1059 vpg = cl2vvp_page(cl_object_page_slice(clob, page));
1060 vvp_write_pending(cl2vvp(clob), vpg);
77605e41
JX
1061}
1062
d7e09d03
PT
1063static int vvp_io_fault_start(const struct lu_env *env,
1064 const struct cl_io_slice *ios)
1065{
1066 struct vvp_io *vio = cl2vvp_io(env, ios);
1067 struct cl_io *io = ios->cis_io;
1068 struct cl_object *obj = io->ci_obj;
8c7b0e1a 1069 struct inode *inode = vvp_object_inode(obj);
d7e09d03
PT
1070 struct cl_fault_io *fio = &io->u.ci_fault;
1071 struct vvp_fault_io *cfio = &vio->u.fault;
1072 loff_t offset;
1073 int result = 0;
1074 struct page *vmpage = NULL;
1075 struct cl_page *page;
1076 loff_t size;
77605e41 1077 pgoff_t last_index;
d7e09d03
PT
1078
1079 if (fio->ft_executable &&
46c360f9 1080 inode->i_mtime.tv_sec != vio->u.fault.ft_mtime)
d7e09d03
PT
1081 CWARN("binary "DFID
1082 " changed while waiting for the page fault lock\n",
1083 PFID(lu_object_fid(&obj->co_lu)));
1084
1085 /* offset of the last byte on the page */
1086 offset = cl_offset(obj, fio->ft_index + 1) - 1;
1087 LASSERT(cl_index(obj, offset) == fio->ft_index);
fee6eb50 1088 result = vvp_prep_size(env, obj, io, 0, offset + 1, NULL);
d7e09d03
PT
1089 if (result != 0)
1090 return result;
1091
1092 /* must return locked page */
1093 if (fio->ft_mkwrite) {
6e16818b 1094 LASSERT(cfio->ft_vmpage);
d7e09d03
PT
1095 lock_page(cfio->ft_vmpage);
1096 } else {
1097 result = vvp_io_kernel_fault(cfio);
1098 if (result != 0)
1099 return result;
1100 }
1101
1102 vmpage = cfio->ft_vmpage;
1103 LASSERT(PageLocked(vmpage));
1104
1105 if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_FAULT_TRUNC_RACE))
1106 ll_invalidate_page(vmpage);
1107
1108 size = i_size_read(inode);
1109 /* Though we have already held a cl_lock upon this page, but
c0894c6c
OD
1110 * it still can be truncated locally.
1111 */
d7e09d03
PT
1112 if (unlikely((vmpage->mapping != inode->i_mapping) ||
1113 (page_offset(vmpage) > size))) {
1114 CDEBUG(D_PAGE, "llite: fault and truncate race happened!\n");
1115
1116 /* return +1 to stop cl_io_loop() and ll_fault() will catch
c0894c6c
OD
1117 * and retry.
1118 */
b2952d62 1119 result = 1;
34e1f2bb 1120 goto out;
d7e09d03
PT
1121 }
1122
77605e41
JX
1123 last_index = cl_index(obj, size - 1);
1124
557732ad 1125 if (fio->ft_mkwrite) {
d7e09d03
PT
1126 /*
1127 * Capture the size while holding the lli_trunc_sem from above
1128 * we want to make sure that we complete the mkwrite action
1129 * while holding this lock. We need to make sure that we are
1130 * not past the end of the file.
1131 */
d7e09d03
PT
1132 if (last_index < fio->ft_index) {
1133 CDEBUG(D_PAGE,
2d00bd17
JP
1134 "llite: mkwrite and truncate race happened: %p: 0x%lx 0x%lx\n",
1135 vmpage->mapping, fio->ft_index, last_index);
d7e09d03
PT
1136 /*
1137 * We need to return if we are
1138 * passed the end of the file. This will propagate
1139 * up the call stack to ll_page_mkwrite where
1140 * we will return VM_FAULT_NOPAGE. Any non-negative
1141 * value returned here will be silently
1142 * converted to 0. If the vmpage->mapping is null
1143 * the error code would be converted back to ENODATA
1144 * in ll_page_mkwrite0. Thus we return -ENODATA
1145 * to handle both cases
1146 */
34e1f2bb
JL
1147 result = -ENODATA;
1148 goto out;
d7e09d03
PT
1149 }
1150 }
1151
1152 page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE);
34e1f2bb
JL
1153 if (IS_ERR(page)) {
1154 result = PTR_ERR(page);
1155 goto out;
1156 }
d7e09d03
PT
1157
1158 /* if page is going to be written, we should add this page into cache
c0894c6c
OD
1159 * earlier.
1160 */
d7e09d03
PT
1161 if (fio->ft_mkwrite) {
1162 wait_on_page_writeback(vmpage);
77605e41
JX
1163 if (!PageDirty(vmpage)) {
1164 struct cl_page_list *plist = &io->ci_queue.c2_qin;
3a52f803 1165 struct vvp_page *vpg = cl_object_page_slice(obj, page);
77605e41 1166 int to = PAGE_SIZE;
d7e09d03
PT
1167
1168 /* vvp_page_assume() calls wait_on_page_writeback(). */
1169 cl_page_assume(env, io, page);
1170
77605e41
JX
1171 cl_page_list_init(plist);
1172 cl_page_list_add(plist, page);
1173
1174 /* size fixup */
3a52f803 1175 if (last_index == vvp_index(vpg))
77605e41 1176 to = size & ~PAGE_MASK;
d7e09d03
PT
1177
1178 /* Do not set Dirty bit here so that in case IO is
1179 * started before the page is really made dirty, we
c0894c6c
OD
1180 * still have chance to detect it.
1181 */
77605e41
JX
1182 result = cl_io_commit_async(env, io, plist, 0, to,
1183 mkwrite_commit_callback);
d7e09d03 1184 LASSERT(cl_page_is_owned(page, io));
77605e41 1185 cl_page_list_fini(env, plist);
d7e09d03
PT
1186
1187 vmpage = NULL;
1188 if (result < 0) {
d7e09d03
PT
1189 cl_page_discard(env, io, page);
1190 cl_page_disown(env, io, page);
1191
1192 cl_page_put(env, page);
1193
1194 /* we're in big trouble, what can we do now? */
1195 if (result == -EDQUOT)
1196 result = -ENOSPC;
34e1f2bb 1197 goto out;
da5ecb4d 1198 } else {
d7e09d03 1199 cl_page_disown(env, io, page);
da5ecb4d 1200 }
d7e09d03
PT
1201 }
1202 }
1203
d7e09d03
PT
1204 /*
1205 * The ft_index is only used in the case of
1206 * a mkwrite action. We need to check
1207 * our assertions are correct, since
1208 * we should have caught this above
1209 */
77605e41
JX
1210 LASSERT(!fio->ft_mkwrite || fio->ft_index <= last_index);
1211 if (fio->ft_index == last_index)
d7e09d03
PT
1212 /*
1213 * Last page is mapped partially.
1214 */
1215 fio->ft_nob = size - cl_offset(obj, fio->ft_index);
1216 else
1217 fio->ft_nob = cl_page_size(obj);
1218
1219 lu_ref_add(&page->cp_reference, "fault", io);
1220 fio->ft_page = page;
d7e09d03
PT
1221
1222out:
1223 /* return unlocked vmpage to avoid deadlocking */
6e16818b 1224 if (vmpage)
d7e09d03 1225 unlock_page(vmpage);
10cdef73
JH
1226
1227 cfio->ft_flags &= ~VM_FAULT_LOCKED;
1228
d7e09d03
PT
1229 return result;
1230}
1231
1232static int vvp_io_fsync_start(const struct lu_env *env,
1233 const struct cl_io_slice *ios)
1234{
1235 /* we should mark TOWRITE bit to each dirty page in radix tree to
1236 * verify pages have been written, but this is difficult because of
c0894c6c
OD
1237 * race.
1238 */
d7e09d03
PT
1239 return 0;
1240}
1241
1242static int vvp_io_read_page(const struct lu_env *env,
1243 const struct cl_io_slice *ios,
1244 const struct cl_page_slice *slice)
1245{
1246 struct cl_io *io = ios->cis_io;
3a52f803 1247 struct vvp_page *vpg = cl2vvp_page(slice);
d7e09d03 1248 struct cl_page *page = slice->cpl_page;
8c7b0e1a 1249 struct inode *inode = vvp_object_inode(slice->cpl_obj);
d7e09d03 1250 struct ll_sb_info *sbi = ll_i2sbi(inode);
e0a8144b 1251 struct ll_file_data *fd = cl2vvp_io(env, ios)->vui_fd;
d7e09d03 1252 struct ll_readahead_state *ras = &fd->fd_ras;
d7e09d03 1253 struct cl_2queue *queue = &io->ci_queue;
d7e09d03 1254
d7e09d03
PT
1255 if (sbi->ll_ra_info.ra_max_pages_per_file &&
1256 sbi->ll_ra_info.ra_max_pages)
3a52f803
JH
1257 ras_update(sbi, inode, ras, vvp_index(vpg),
1258 vpg->vpg_defer_uptodate);
d7e09d03 1259
3a52f803
JH
1260 if (vpg->vpg_defer_uptodate) {
1261 vpg->vpg_ra_used = 1;
d7e09d03
PT
1262 cl_page_export(env, page, 1);
1263 }
1264 /*
1265 * Add page into the queue even when it is marked uptodate above.
1266 * this will unlock it automatically as part of cl_page_list_disown().
1267 */
fd7444fe 1268
53f1a127 1269 cl_page_list_add(&queue->c2_qin, page);
d7e09d03
PT
1270 if (sbi->ll_ra_info.ra_max_pages_per_file &&
1271 sbi->ll_ra_info.ra_max_pages)
fd7444fe 1272 ll_readahead(env, io, &queue->c2_qin, ras,
3a52f803 1273 vpg->vpg_defer_uptodate);
d7e09d03 1274
0a3bdb00 1275 return 0;
d7e09d03
PT
1276}
1277
e3c9078a 1278static void vvp_io_end(const struct lu_env *env, const struct cl_io_slice *ios)
fee6eb50
JH
1279{
1280 CLOBINVRNT(env, ios->cis_io->ci_obj,
1281 vvp_object_invariant(ios->cis_io->ci_obj));
1282}
1283
d7e09d03
PT
1284static const struct cl_io_operations vvp_io_ops = {
1285 .op = {
1286 [CIT_READ] = {
bc4320a9 1287 .cio_fini = vvp_io_fini,
d7e09d03
PT
1288 .cio_lock = vvp_io_read_lock,
1289 .cio_start = vvp_io_read_start,
10cdef73 1290 .cio_advance = vvp_io_advance,
d7e09d03
PT
1291 },
1292 [CIT_WRITE] = {
1293 .cio_fini = vvp_io_fini,
77605e41
JX
1294 .cio_iter_init = vvp_io_write_iter_init,
1295 .cio_iter_fini = vvp_io_write_iter_fini,
d7e09d03
PT
1296 .cio_lock = vvp_io_write_lock,
1297 .cio_start = vvp_io_write_start,
10cdef73 1298 .cio_advance = vvp_io_advance,
d7e09d03
PT
1299 },
1300 [CIT_SETATTR] = {
1301 .cio_fini = vvp_io_setattr_fini,
1302 .cio_iter_init = vvp_io_setattr_iter_init,
1303 .cio_lock = vvp_io_setattr_lock,
1304 .cio_start = vvp_io_setattr_start,
1305 .cio_end = vvp_io_setattr_end
1306 },
1307 [CIT_FAULT] = {
1308 .cio_fini = vvp_io_fault_fini,
1309 .cio_iter_init = vvp_io_fault_iter_init,
1310 .cio_lock = vvp_io_fault_lock,
1311 .cio_start = vvp_io_fault_start,
10cdef73 1312 .cio_end = vvp_io_end,
d7e09d03
PT
1313 },
1314 [CIT_FSYNC] = {
1315 .cio_start = vvp_io_fsync_start,
1316 .cio_fini = vvp_io_fini
1317 },
1318 [CIT_MISC] = {
1319 .cio_fini = vvp_io_fini
1320 }
1321 },
1322 .cio_read_page = vvp_io_read_page,
d7e09d03
PT
1323};
1324
1325int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
1326 struct cl_io *io)
1327{
1328 struct vvp_io *vio = vvp_env_io(env);
8c7b0e1a 1329 struct inode *inode = vvp_object_inode(obj);
d7e09d03
PT
1330 int result;
1331
8c7b0e1a 1332 CLOBINVRNT(env, obj, vvp_object_invariant(obj));
d7e09d03 1333
5ea17d6c
JL
1334 CDEBUG(D_VFSTRACE, DFID
1335 " ignore/verify layout %d/%d, layout version %d restore needed %d\n",
1336 PFID(lu_object_fid(&obj->co_lu)),
1337 io->ci_ignore_layout, io->ci_verify_layout,
e0a8144b 1338 vio->vui_layout_gen, io->ci_restore_needed);
5ea17d6c 1339
e0a8144b
JH
1340 CL_IO_SLICE_CLEAN(vio, vui_cl);
1341 cl_io_slice_add(io, &vio->vui_cl, obj, &vvp_io_ops);
1342 vio->vui_ra_valid = false;
d7e09d03
PT
1343 result = 0;
1344 if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
1345 size_t count;
1346 struct ll_inode_info *lli = ll_i2info(inode);
1347
1348 count = io->u.ci_rw.crw_count;
1349 /* "If nbyte is 0, read() will return 0 and have no other
c0894c6c
OD
1350 * results." -- Single Unix Spec
1351 */
d7e09d03
PT
1352 if (count == 0)
1353 result = 1;
b42b15fd 1354 else
e0a8144b 1355 vio->vui_tot_count = count;
b42b15fd 1356
d7e09d03
PT
1357 /* for read/write, we store the jobid in the inode, and
1358 * it'll be fetched by osc when building RPC.
1359 *
1360 * it's not accurate if the file is shared by different
1361 * jobs.
1362 */
1363 lustre_get_jobid(lli->lli_jobid);
1364 } else if (io->ci_type == CIT_SETATTR) {
1365 if (!cl_io_is_trunc(io))
1366 io->ci_lockreq = CILR_MANDATORY;
1367 }
1368
1369 /* ignore layout change for generic CIT_MISC but not for glimpse.
1370 * io context for glimpse must set ci_verify_layout to true,
c0894c6c
OD
1371 * see cl_glimpse_size0() for details.
1372 */
d7e09d03
PT
1373 if (io->ci_type == CIT_MISC && !io->ci_verify_layout)
1374 io->ci_ignore_layout = 1;
1375
1376 /* Enqueue layout lock and get layout version. We need to do this
1377 * even for operations requiring to open file, such as read and write,
c0894c6c
OD
1378 * because it might not grant layout lock in IT_OPEN.
1379 */
65fb55d1 1380 if (result == 0 && !io->ci_ignore_layout) {
e0a8144b 1381 result = ll_layout_refresh(inode, &vio->vui_layout_gen);
65fb55d1
NY
1382 if (result == -ENOENT)
1383 /* If the inode on MDS has been removed, but the objects
1384 * on OSTs haven't been destroyed (async unlink), layout
d0a0acc3 1385 * fetch will return -ENOENT, we'd ignore this error
c0894c6c
OD
1386 * and continue with dirty flush. LU-3230.
1387 */
65fb55d1
NY
1388 result = 0;
1389 if (result < 0)
1390 CERROR("%s: refresh file layout " DFID " error %d.\n",
e15ba45d
OD
1391 ll_get_fsname(inode->i_sb, NULL, 0),
1392 PFID(lu_object_fid(&obj->co_lu)), result);
65fb55d1 1393 }
d7e09d03 1394
0a3bdb00 1395 return result;
d7e09d03 1396}