]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zpl_file.c
OpenZFS 9235 - rename zpool_rewind_policy_t to zpool_load_policy_t
[mirror_zfs.git] / module / zfs / zpl_file.c
CommitLineData
1efb473f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
5475aada 23 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
1efb473f
BB
24 */
25
26
f7b939bd
CIK
27#ifdef CONFIG_COMPAT
28#include <linux/compat.h>
29#endif
93ce2b4c 30#include <sys/file.h>
119a394a 31#include <sys/dmu_objset.h>
1efb473f
BB
32#include <sys/zfs_vfsops.h>
33#include <sys/zfs_vnops.h>
34#include <sys/zfs_znode.h>
9c5167d1 35#include <sys/zfs_project.h>
1efb473f
BB
36
37
126400a1
BB
38static int
39zpl_open(struct inode *ip, struct file *filp)
40{
81e97e21 41 cred_t *cr = CRED();
126400a1 42 int error;
40d06e3c 43 fstrans_cookie_t cookie;
126400a1 44
7dc71949
CC
45 error = generic_file_open(ip, filp);
46 if (error)
47 return (error);
48
81e97e21 49 crhold(cr);
40d06e3c 50 cookie = spl_fstrans_mark();
126400a1 51 error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
40d06e3c 52 spl_fstrans_unmark(cookie);
81e97e21 53 crfree(cr);
126400a1
BB
54 ASSERT3S(error, <=, 0);
55
7dc71949 56 return (error);
126400a1
BB
57}
58
59static int
60zpl_release(struct inode *ip, struct file *filp)
61{
81e97e21 62 cred_t *cr = CRED();
126400a1 63 int error;
40d06e3c 64 fstrans_cookie_t cookie;
126400a1 65
40d06e3c 66 cookie = spl_fstrans_mark();
78d7a5d7 67 if (ITOZ(ip)->z_atime_dirty)
1e8db771 68 zfs_mark_inode_dirty(ip);
78d7a5d7 69
81e97e21 70 crhold(cr);
126400a1 71 error = -zfs_close(ip, filp->f_flags, cr);
40d06e3c 72 spl_fstrans_unmark(cookie);
81e97e21 73 crfree(cr);
126400a1
BB
74 ASSERT3S(error, <=, 0);
75
76 return (error);
77}
78
1efb473f 79static int
9464b959 80zpl_iterate(struct file *filp, zpl_dir_context_t *ctx)
1efb473f 81{
81e97e21 82 cred_t *cr = CRED();
1efb473f 83 int error;
40d06e3c 84 fstrans_cookie_t cookie;
1efb473f 85
81e97e21 86 crhold(cr);
40d06e3c 87 cookie = spl_fstrans_mark();
d9c97ec0 88 error = -zfs_readdir(file_inode(filp), ctx, cr);
40d06e3c 89 spl_fstrans_unmark(cookie);
81e97e21 90 crfree(cr);
1efb473f
BB
91 ASSERT3S(error, <=, 0);
92
93 return (error);
94}
95
9baaa7de 96#if !defined(HAVE_VFS_ITERATE) && !defined(HAVE_VFS_ITERATE_SHARED)
0f37d0c8
RY
97static int
98zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
99{
9464b959
BB
100 zpl_dir_context_t ctx =
101 ZPL_DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
0f37d0c8
RY
102 int error;
103
104 error = zpl_iterate(filp, &ctx);
105 filp->f_pos = ctx.pos;
106
107 return (error);
108}
9464b959 109#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
0f37d0c8 110
adcd70bd 111#if defined(HAVE_FSYNC_WITH_DENTRY)
3117dd0b 112/*
adcd70bd
BB
113 * Linux 2.6.x - 2.6.34 API,
114 * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
115 * to the fops->fsync() hook. For this reason, we must be careful not to
116 * use filp unconditionally.
117 */
118static int
119zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
120{
121 cred_t *cr = CRED();
122 int error;
40d06e3c 123 fstrans_cookie_t cookie;
adcd70bd
BB
124
125 crhold(cr);
40d06e3c 126 cookie = spl_fstrans_mark();
adcd70bd 127 error = -zfs_fsync(dentry->d_inode, datasync, cr);
40d06e3c 128 spl_fstrans_unmark(cookie);
adcd70bd
BB
129 crfree(cr);
130 ASSERT3S(error, <=, 0);
131
132 return (error);
133}
134
7ca25051 135#ifdef HAVE_FILE_AIO_FSYNC
cd3939c5
RY
136static int
137zpl_aio_fsync(struct kiocb *kiocb, int datasync)
138{
139 struct file *filp = kiocb->ki_filp;
d9c97ec0 140 return (zpl_fsync(filp, file_dentry(filp), datasync));
cd3939c5 141}
7ca25051
D
142#endif
143
adcd70bd
BB
144#elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
145/*
146 * Linux 2.6.35 - 3.0 API,
147 * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
3117dd0b
BB
148 * redundant. The dentry is still accessible via filp->f_path.dentry,
149 * and we are guaranteed that filp will never be NULL.
3117dd0b 150 */
3117dd0b
BB
151static int
152zpl_fsync(struct file *filp, int datasync)
153{
adcd70bd
BB
154 struct inode *inode = filp->f_mapping->host;
155 cred_t *cr = CRED();
156 int error;
40d06e3c 157 fstrans_cookie_t cookie;
adcd70bd
BB
158
159 crhold(cr);
40d06e3c 160 cookie = spl_fstrans_mark();
adcd70bd 161 error = -zfs_fsync(inode, datasync, cr);
40d06e3c 162 spl_fstrans_unmark(cookie);
adcd70bd
BB
163 crfree(cr);
164 ASSERT3S(error, <=, 0);
165
166 return (error);
167}
168
7ca25051 169#ifdef HAVE_FILE_AIO_FSYNC
cd3939c5
RY
170static int
171zpl_aio_fsync(struct kiocb *kiocb, int datasync)
172{
173 return (zpl_fsync(kiocb->ki_filp, datasync));
174}
7ca25051
D
175#endif
176
adcd70bd
BB
177#elif defined(HAVE_FSYNC_RANGE)
178/*
179 * Linux 3.1 - 3.x API,
180 * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
181 * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex
182 * lock is no longer held by the caller, for zfs we don't require the lock
183 * to be held so we don't acquire it.
184 */
3117dd0b 185static int
adcd70bd 186zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
1efb473f 187{
adcd70bd 188 struct inode *inode = filp->f_mapping->host;
81e97e21 189 cred_t *cr = CRED();
1efb473f 190 int error;
40d06e3c 191 fstrans_cookie_t cookie;
1efb473f 192
adcd70bd
BB
193 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
194 if (error)
195 return (error);
196
81e97e21 197 crhold(cr);
40d06e3c 198 cookie = spl_fstrans_mark();
adcd70bd 199 error = -zfs_fsync(inode, datasync, cr);
40d06e3c 200 spl_fstrans_unmark(cookie);
81e97e21 201 crfree(cr);
1efb473f
BB
202 ASSERT3S(error, <=, 0);
203
204 return (error);
205}
cd3939c5 206
7ca25051 207#ifdef HAVE_FILE_AIO_FSYNC
cd3939c5
RY
208static int
209zpl_aio_fsync(struct kiocb *kiocb, int datasync)
210{
57ae8400 211 return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
cd3939c5 212}
7ca25051
D
213#endif
214
adcd70bd
BB
215#else
216#error "Unsupported fops->fsync() implementation"
217#endif
1efb473f 218
5475aada 219static ssize_t
cd3939c5 220zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
5475aada
CC
221 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
222 cred_t *cr, size_t skip)
1efb473f 223{
e3dc14b8 224 ssize_t read;
1efb473f 225 uio_t uio;
cd3939c5 226 int error;
40d06e3c 227 fstrans_cookie_t cookie;
1efb473f 228
5475aada
CC
229 uio.uio_iov = iovp;
230 uio.uio_skip = skip;
cd3939c5
RY
231 uio.uio_resid = count;
232 uio.uio_iovcnt = nr_segs;
233 uio.uio_loffset = *ppos;
1efb473f
BB
234 uio.uio_limit = MAXOFFSET_T;
235 uio.uio_segflg = segment;
236
40d06e3c 237 cookie = spl_fstrans_mark();
1efb473f 238 error = -zfs_read(ip, &uio, flags, cr);
40d06e3c 239 spl_fstrans_unmark(cookie);
1efb473f
BB
240 if (error < 0)
241 return (error);
242
cd3939c5
RY
243 read = count - uio.uio_resid;
244 *ppos += read;
e3dc14b8
BB
245 task_io_account_read(read);
246
247 return (read);
1efb473f
BB
248}
249
cd3939c5
RY
250inline ssize_t
251zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
252 uio_seg_t segment, int flags, cred_t *cr)
253{
254 struct iovec iov;
255
256 iov.iov_base = (void *)buf;
257 iov.iov_len = len;
258
259 return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
5475aada 260 flags, cr, 0));
cd3939c5
RY
261}
262
cd3939c5 263static ssize_t
57ae8400 264zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
5475aada 265 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
cd3939c5
RY
266{
267 cred_t *cr = CRED();
268 struct file *filp = kiocb->ki_filp;
cd3939c5 269 ssize_t read;
cd3939c5
RY
270
271 crhold(cr);
5475aada
CC
272 read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
273 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
cd3939c5
RY
274 crfree(cr);
275
0df9673f 276 file_accessed(filp);
1efb473f
BB
277 return (read);
278}
279
57ae8400
MK
280#if defined(HAVE_VFS_RW_ITERATE)
281static ssize_t
282zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
283{
5475aada
CC
284 ssize_t ret;
285 uio_seg_t seg = UIO_USERSPACE;
286 if (to->type & ITER_KVEC)
287 seg = UIO_SYSSPACE;
288 if (to->type & ITER_BVEC)
289 seg = UIO_BVEC;
290 ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
291 iov_iter_count(to), seg, to->iov_offset);
292 if (ret > 0)
293 iov_iter_advance(to, ret);
294 return (ret);
57ae8400
MK
295}
296#else
297static ssize_t
298zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
299 unsigned long nr_segs, loff_t pos)
300{
933ec999
CC
301 ssize_t ret;
302 size_t count;
303
304 ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_WRITE);
305 if (ret)
306 return (ret);
307
308 return (zpl_iter_read_common(kiocb, iovp, nr_segs, count,
5475aada 309 UIO_USERSPACE, 0));
57ae8400
MK
310}
311#endif /* HAVE_VFS_RW_ITERATE */
312
5475aada 313static ssize_t
cd3939c5 314zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
5475aada
CC
315 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
316 cred_t *cr, size_t skip)
1efb473f 317{
e3dc14b8 318 ssize_t wrote;
1efb473f 319 uio_t uio;
cd3939c5 320 int error;
40d06e3c 321 fstrans_cookie_t cookie;
1efb473f 322
1efdc45e
BB
323 if (flags & O_APPEND)
324 *ppos = i_size_read(ip);
325
5475aada
CC
326 uio.uio_iov = iovp;
327 uio.uio_skip = skip;
cd3939c5
RY
328 uio.uio_resid = count;
329 uio.uio_iovcnt = nr_segs;
330 uio.uio_loffset = *ppos;
1efb473f
BB
331 uio.uio_limit = MAXOFFSET_T;
332 uio.uio_segflg = segment;
333
40d06e3c 334 cookie = spl_fstrans_mark();
1efb473f 335 error = -zfs_write(ip, &uio, flags, cr);
40d06e3c 336 spl_fstrans_unmark(cookie);
1efb473f
BB
337 if (error < 0)
338 return (error);
339
cd3939c5
RY
340 wrote = count - uio.uio_resid;
341 *ppos += wrote;
e3dc14b8
BB
342 task_io_account_write(wrote);
343
344 return (wrote);
1efb473f 345}
933ec999 346
cd3939c5
RY
347inline ssize_t
348zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
349 uio_seg_t segment, int flags, cred_t *cr)
350{
351 struct iovec iov;
352
353 iov.iov_base = (void *)buf;
354 iov.iov_len = len;
355
356 return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
5475aada 357 flags, cr, 0));
cd3939c5 358}
1efb473f 359
cd3939c5 360static ssize_t
57ae8400 361zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
5475aada 362 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
cd3939c5
RY
363{
364 cred_t *cr = CRED();
365 struct file *filp = kiocb->ki_filp;
cd3939c5 366 ssize_t wrote;
cd3939c5
RY
367
368 crhold(cr);
5475aada
CC
369 wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
370 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
cd3939c5
RY
371 crfree(cr);
372
1efb473f
BB
373 return (wrote);
374}
375
57ae8400
MK
376#if defined(HAVE_VFS_RW_ITERATE)
377static ssize_t
378zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
379{
933ec999 380 size_t count;
5475aada
CC
381 ssize_t ret;
382 uio_seg_t seg = UIO_USERSPACE;
933ec999
CC
383
384#ifndef HAVE_GENERIC_WRITE_CHECKS_KIOCB
385 struct file *file = kiocb->ki_filp;
386 struct address_space *mapping = file->f_mapping;
387 struct inode *ip = mapping->host;
388 int isblk = S_ISBLK(ip->i_mode);
389
390 count = iov_iter_count(from);
391 ret = generic_write_checks(file, &kiocb->ki_pos, &count, isblk);
c7af63d6
CC
392 if (ret)
393 return (ret);
933ec999
CC
394#else
395 /*
396 * XXX - ideally this check should be in the same lock region with
397 * write operations, so that there's no TOCTTOU race when doing
398 * append and someone else grow the file.
399 */
400 ret = generic_write_checks(kiocb, from);
933ec999
CC
401 if (ret <= 0)
402 return (ret);
c7af63d6
CC
403 count = ret;
404#endif
933ec999 405
5475aada
CC
406 if (from->type & ITER_KVEC)
407 seg = UIO_SYSSPACE;
408 if (from->type & ITER_BVEC)
409 seg = UIO_BVEC;
933ec999 410
5475aada 411 ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
933ec999 412 count, seg, from->iov_offset);
5475aada
CC
413 if (ret > 0)
414 iov_iter_advance(from, ret);
933ec999 415
5475aada 416 return (ret);
57ae8400
MK
417}
418#else
419static ssize_t
420zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
421 unsigned long nr_segs, loff_t pos)
422{
933ec999
CC
423 struct file *file = kiocb->ki_filp;
424 struct address_space *mapping = file->f_mapping;
425 struct inode *ip = mapping->host;
426 int isblk = S_ISBLK(ip->i_mode);
427 size_t count;
428 ssize_t ret;
429
430 ret = generic_segment_checks(iovp, &nr_segs, &count, VERIFY_READ);
431 if (ret)
432 return (ret);
433
434 ret = generic_write_checks(file, &pos, &count, isblk);
435 if (ret)
436 return (ret);
437
438 return (zpl_iter_write_common(kiocb, iovp, nr_segs, count,
5475aada 439 UIO_USERSPACE, 0));
57ae8400
MK
440}
441#endif /* HAVE_VFS_RW_ITERATE */
442
802e7b5f
LD
443static loff_t
444zpl_llseek(struct file *filp, loff_t offset, int whence)
445{
446#if defined(SEEK_HOLE) && defined(SEEK_DATA)
40d06e3c
TC
447 fstrans_cookie_t cookie;
448
802e7b5f
LD
449 if (whence == SEEK_DATA || whence == SEEK_HOLE) {
450 struct inode *ip = filp->f_mapping->host;
451 loff_t maxbytes = ip->i_sb->s_maxbytes;
452 loff_t error;
453
9baaa7de 454 spl_inode_lock_shared(ip);
40d06e3c 455 cookie = spl_fstrans_mark();
802e7b5f 456 error = -zfs_holey(ip, whence, &offset);
40d06e3c 457 spl_fstrans_unmark(cookie);
802e7b5f
LD
458 if (error == 0)
459 error = lseek_execute(filp, ip, offset, maxbytes);
9baaa7de 460 spl_inode_unlock_shared(ip);
802e7b5f
LD
461
462 return (error);
463 }
464#endif /* SEEK_HOLE && SEEK_DATA */
465
d1d7e268 466 return (generic_file_llseek(filp, offset, whence));
802e7b5f
LD
467}
468
c0d35759
BB
469/*
470 * It's worth taking a moment to describe how mmap is implemented
471 * for zfs because it differs considerably from other Linux filesystems.
472 * However, this issue is handled the same way under OpenSolaris.
473 *
474 * The issue is that by design zfs bypasses the Linux page cache and
475 * leaves all caching up to the ARC. This has been shown to work
476 * well for the common read(2)/write(2) case. However, mmap(2)
477 * is problem because it relies on being tightly integrated with the
478 * page cache. To handle this we cache mmap'ed files twice, once in
479 * the ARC and a second time in the page cache. The code is careful
480 * to keep both copies synchronized.
481 *
482 * When a file with an mmap'ed region is written to using write(2)
483 * both the data in the ARC and existing pages in the page cache
484 * are updated. For a read(2) data will be read first from the page
485 * cache then the ARC if needed. Neither a write(2) or read(2) will
486 * will ever result in new pages being added to the page cache.
487 *
488 * New pages are added to the page cache only via .readpage() which
489 * is called when the vfs needs to read a page off disk to back the
490 * virtual memory region. These pages may be modified without
491 * notifying the ARC and will be written out periodically via
492 * .writepage(). This will occur due to either a sync or the usual
493 * page aging behavior. Note because a read(2) of a mmap'ed file
494 * will always check the page cache first even when the ARC is out
495 * of date correct data will still be returned.
496 *
497 * While this implementation ensures correct behavior it does have
498 * have some drawbacks. The most obvious of which is that it
499 * increases the required memory footprint when access mmap'ed
500 * files. It also adds additional complexity to the code keeping
501 * both caches synchronized.
502 *
503 * Longer term it may be possible to cleanly resolve this wart by
504 * mapping page cache pages directly on to the ARC buffers. The
505 * Linux address space operations are flexible enough to allow
506 * selection of which pages back a particular index. The trick
507 * would be working out the details of which subsystem is in
508 * charge, the ARC, the page cache, or both. It may also prove
509 * helpful to move the ARC buffers to a scatter-gather lists
510 * rather than a vmalloc'ed region.
511 */
512static int
513zpl_mmap(struct file *filp, struct vm_area_struct *vma)
514{
e2e7aa2d
BB
515 struct inode *ip = filp->f_mapping->host;
516 znode_t *zp = ITOZ(ip);
c0d35759 517 int error;
40d06e3c 518 fstrans_cookie_t cookie;
c0d35759 519
40d06e3c 520 cookie = spl_fstrans_mark();
e2e7aa2d
BB
521 error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
522 (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
40d06e3c 523 spl_fstrans_unmark(cookie);
e2e7aa2d
BB
524 if (error)
525 return (error);
526
c0d35759
BB
527 error = generic_file_mmap(filp, vma);
528 if (error)
529 return (error);
530
531 mutex_enter(&zp->z_lock);
18a2485f 532 zp->z_is_mapped = B_TRUE;
c0d35759
BB
533 mutex_exit(&zp->z_lock);
534
535 return (error);
536}
537
538/*
539 * Populate a page with data for the Linux page cache. This function is
540 * only used to support mmap(2). There will be an identical copy of the
541 * data in the ARC which is kept up to date via .write() and .writepage().
542 *
543 * Current this function relies on zpl_read_common() and the O_DIRECT
544 * flag to read in a page. This works but the more correct way is to
545 * update zfs_fillpage() to be Linux friendly and use that interface.
546 */
547static int
548zpl_readpage(struct file *filp, struct page *pp)
549{
550 struct inode *ip;
dde471ef 551 struct page *pl[1];
c0d35759 552 int error = 0;
40d06e3c 553 fstrans_cookie_t cookie;
c0d35759
BB
554
555 ASSERT(PageLocked(pp));
556 ip = pp->mapping->host;
dde471ef 557 pl[0] = pp;
c0d35759 558
40d06e3c 559 cookie = spl_fstrans_mark();
dde471ef 560 error = -zfs_getpage(ip, pl, 1);
40d06e3c 561 spl_fstrans_unmark(cookie);
c0d35759 562
dde471ef
PJ
563 if (error) {
564 SetPageError(pp);
565 ClearPageUptodate(pp);
566 } else {
567 ClearPageError(pp);
568 SetPageUptodate(pp);
569 flush_dcache_page(pp);
570 }
c0d35759 571
dde471ef 572 unlock_page(pp);
d1d7e268 573 return (error);
dde471ef 574}
c0d35759 575
f3ab88d6
BB
576/*
577 * Populate a set of pages with data for the Linux page cache. This
578 * function will only be called for read ahead and never for demand
579 * paging. For simplicity, the code relies on read_cache_pages() to
580 * correctly lock each page for IO and call zpl_readpage().
581 */
582static int
583zpl_readpages(struct file *filp, struct address_space *mapping,
4ea3f864 584 struct list_head *pages, unsigned nr_pages)
f3ab88d6 585{
95d9fd02
BB
586 return (read_cache_pages(mapping, pages,
587 (filler_t *)zpl_readpage, filp));
f3ab88d6
BB
588}
589
dde471ef
PJ
590int
591zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
592{
3c0e5c0f 593 struct address_space *mapping = data;
92119cc2 594 fstrans_cookie_t cookie;
3c0e5c0f
BB
595
596 ASSERT(PageLocked(pp));
597 ASSERT(!PageWriteback(pp));
8630650a 598
92119cc2 599 cookie = spl_fstrans_mark();
62c4165a 600 (void) zfs_putpage(mapping->host, pp, wbc);
92119cc2 601 spl_fstrans_unmark(cookie);
c0d35759 602
3c0e5c0f 603 return (0);
dde471ef 604}
c0d35759 605
dde471ef
PJ
606static int
607zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
608{
119a394a 609 znode_t *zp = ITOZ(mapping->host);
0037b49e 610 zfsvfs_t *zfsvfs = ITOZSB(mapping->host);
119a394a
ED
611 enum writeback_sync_modes sync_mode;
612 int result;
613
0037b49e
BB
614 ZFS_ENTER(zfsvfs);
615 if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
119a394a 616 wbc->sync_mode = WB_SYNC_ALL;
0037b49e 617 ZFS_EXIT(zfsvfs);
119a394a
ED
618 sync_mode = wbc->sync_mode;
619
620 /*
621 * We don't want to run write_cache_pages() in SYNC mode here, because
622 * that would make putpage() wait for a single page to be committed to
623 * disk every single time, resulting in atrocious performance. Instead
624 * we run it once in non-SYNC mode so that the ZIL gets all the data,
625 * and then we commit it all in one go.
626 */
627 wbc->sync_mode = WB_SYNC_NONE;
628 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
629 if (sync_mode != wbc->sync_mode) {
0037b49e 630 ZFS_ENTER(zfsvfs);
119a394a 631 ZFS_VERIFY_ZP(zp);
0037b49e
BB
632 if (zfsvfs->z_log != NULL)
633 zil_commit(zfsvfs->z_log, zp->z_id);
634 ZFS_EXIT(zfsvfs);
119a394a
ED
635
636 /*
637 * We need to call write_cache_pages() again (we can't just
638 * return after the commit) because the previous call in
639 * non-SYNC mode does not guarantee that we got all the dirty
640 * pages (see the implementation of write_cache_pages() for
641 * details). That being said, this is a no-op in most cases.
642 */
643 wbc->sync_mode = sync_mode;
644 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
645 }
646 return (result);
c0d35759
BB
647}
648
649/*
650 * Write out dirty pages to the ARC, this function is only required to
651 * support mmap(2). Mapped pages may be dirtied by memory operations
652 * which never call .write(). These dirty pages are kept in sync with
653 * the ARC buffers via this hook.
c0d35759
BB
654 */
655static int
656zpl_writepage(struct page *pp, struct writeback_control *wbc)
657{
119a394a
ED
658 if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
659 wbc->sync_mode = WB_SYNC_ALL;
660
661 return (zpl_putpage(pp, wbc, pp->mapping));
c0d35759
BB
662}
663
cb2d1901
ED
664/*
665 * The only flag combination which matches the behavior of zfs_space()
223df016
TC
666 * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE
667 * flag was introduced in the 2.6.38 kernel.
cb2d1901 668 */
223df016 669#if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
cb2d1901
ED
670long
671zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
672{
cb2d1901
ED
673 int error = -EOPNOTSUPP;
674
223df016
TC
675#if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
676 cred_t *cr = CRED();
677 flock64_t bf;
678 loff_t olen;
40d06e3c 679 fstrans_cookie_t cookie;
223df016
TC
680
681 if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
682 return (error);
cb2d1901 683
223df016
TC
684 if (offset < 0 || len <= 0)
685 return (-EINVAL);
cb2d1901 686
223df016
TC
687 spl_inode_lock(ip);
688 olen = i_size_read(ip);
cb2d1901 689
223df016
TC
690 if (offset > olen) {
691 spl_inode_unlock(ip);
692 return (0);
cb2d1901 693 }
223df016
TC
694 if (offset + len > olen)
695 len = olen - offset;
696 bf.l_type = F_WRLCK;
697 bf.l_whence = 0;
698 bf.l_start = offset;
699 bf.l_len = len;
700 bf.l_pid = 0;
701
9fa4db44 702 crhold(cr);
40d06e3c 703 cookie = spl_fstrans_mark();
223df016 704 error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
40d06e3c 705 spl_fstrans_unmark(cookie);
223df016 706 spl_inode_unlock(ip);
cb2d1901
ED
707
708 crfree(cr);
223df016 709#endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
cb2d1901
ED
710
711 ASSERT3S(error, <=, 0);
712 return (error);
713}
223df016 714#endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
cb2d1901
ED
715
716#ifdef HAVE_FILE_FALLOCATE
717static long
718zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
719{
d9c97ec0 720 return zpl_fallocate_common(file_inode(filp),
cb2d1901
ED
721 mode, offset, len);
722}
723#endif /* HAVE_FILE_FALLOCATE */
724
9c5167d1
NF
725#define ZFS_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | ZFS_PROJINHERIT_FL)
726#define ZFS_FL_USER_MODIFIABLE (FS_FL_USER_MODIFIABLE | ZFS_PROJINHERIT_FL)
727
728static uint32_t
729__zpl_ioctl_getflags(struct inode *ip)
9d317793 730{
9d317793 731 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
9c5167d1 732 uint32_t ioctl_flags = 0;
9d317793
RY
733
734 if (zfs_flags & ZFS_IMMUTABLE)
735 ioctl_flags |= FS_IMMUTABLE_FL;
736
737 if (zfs_flags & ZFS_APPENDONLY)
738 ioctl_flags |= FS_APPEND_FL;
739
740 if (zfs_flags & ZFS_NODUMP)
741 ioctl_flags |= FS_NODUMP_FL;
742
9c5167d1
NF
743 if (zfs_flags & ZFS_PROJINHERIT)
744 ioctl_flags |= ZFS_PROJINHERIT_FL;
9d317793 745
9c5167d1
NF
746 return (ioctl_flags & ZFS_FL_USER_VISIBLE);
747}
9d317793 748
9c5167d1
NF
749/*
750 * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
751 * attributes common to both Linux and Solaris are mapped.
752 */
753static int
754zpl_ioctl_getflags(struct file *filp, void __user *arg)
755{
756 uint32_t flags;
757 int err;
758
759 flags = __zpl_ioctl_getflags(file_inode(filp));
760 err = copy_to_user(arg, &flags, sizeof (flags));
761
762 return (err);
9d317793
RY
763}
764
765/*
766 * fchange() is a helper macro to detect if we have been asked to change a
767 * flag. This is ugly, but the requirement that we do this is a consequence of
768 * how the Linux file attribute interface was designed. Another consequence is
769 * that concurrent modification of files suffers from a TOCTOU race. Neither
770 * are things we can fix without modifying the kernel-userland interface, which
771 * is outside of our jurisdiction.
772 */
773
c360af54 774#define fchange(f0, f1, b0, b1) (!((f0) & (b0)) != !((f1) & (b1)))
9d317793
RY
775
776static int
9c5167d1 777__zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
9d317793 778{
9c5167d1
NF
779 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
780 xoptattr_t *xoap;
9d317793 781
9c5167d1
NF
782 if (ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL |
783 ZFS_PROJINHERIT_FL))
9d317793
RY
784 return (-EOPNOTSUPP);
785
9c5167d1 786 if (ioctl_flags & ~ZFS_FL_USER_MODIFIABLE)
9d317793
RY
787 return (-EACCES);
788
789 if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
790 fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
791 !capable(CAP_LINUX_IMMUTABLE))
792 return (-EACCES);
793
794 if (!zpl_inode_owner_or_capable(ip))
795 return (-EACCES);
796
9c5167d1
NF
797 xva_init(xva);
798 xoap = xva_getxoptattr(xva);
9d317793 799
9c5167d1 800 XVA_SET_REQ(xva, XAT_IMMUTABLE);
9d317793
RY
801 if (ioctl_flags & FS_IMMUTABLE_FL)
802 xoap->xoa_immutable = B_TRUE;
803
9c5167d1 804 XVA_SET_REQ(xva, XAT_APPENDONLY);
9d317793
RY
805 if (ioctl_flags & FS_APPEND_FL)
806 xoap->xoa_appendonly = B_TRUE;
807
9c5167d1 808 XVA_SET_REQ(xva, XAT_NODUMP);
9d317793
RY
809 if (ioctl_flags & FS_NODUMP_FL)
810 xoap->xoa_nodump = B_TRUE;
811
9c5167d1
NF
812 XVA_SET_REQ(xva, XAT_PROJINHERIT);
813 if (ioctl_flags & ZFS_PROJINHERIT_FL)
814 xoap->xoa_projinherit = B_TRUE;
815
816 return (0);
817}
818
819static int
820zpl_ioctl_setflags(struct file *filp, void __user *arg)
821{
822 struct inode *ip = file_inode(filp);
823 uint32_t flags;
824 cred_t *cr = CRED();
825 xvattr_t xva;
826 int err;
827 fstrans_cookie_t cookie;
828
829 if (copy_from_user(&flags, arg, sizeof (flags)))
830 return (-EFAULT);
831
832 err = __zpl_ioctl_setflags(ip, flags, &xva);
833 if (err)
834 return (err);
835
9d317793 836 crhold(cr);
40d06e3c 837 cookie = spl_fstrans_mark();
9c5167d1 838 err = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
40d06e3c 839 spl_fstrans_unmark(cookie);
9d317793
RY
840 crfree(cr);
841
9c5167d1
NF
842 return (err);
843}
844
845static int
846zpl_ioctl_getxattr(struct file *filp, void __user *arg)
847{
848 zfsxattr_t fsx = { 0 };
849 struct inode *ip = file_inode(filp);
850 int err;
851
852 fsx.fsx_xflags = __zpl_ioctl_getflags(ip);
853 fsx.fsx_projid = ITOZ(ip)->z_projid;
854 err = copy_to_user(arg, &fsx, sizeof (fsx));
855
856 return (err);
857}
858
859static int
860zpl_ioctl_setxattr(struct file *filp, void __user *arg)
861{
862 struct inode *ip = file_inode(filp);
863 zfsxattr_t fsx;
864 cred_t *cr = CRED();
865 xvattr_t xva;
866 xoptattr_t *xoap;
867 int err;
868 fstrans_cookie_t cookie;
869
870 if (copy_from_user(&fsx, arg, sizeof (fsx)))
871 return (-EFAULT);
872
873 if (!zpl_is_valid_projid(fsx.fsx_projid))
874 return (-EINVAL);
875
876 err = __zpl_ioctl_setflags(ip, fsx.fsx_xflags, &xva);
877 if (err)
878 return (err);
879
880 xoap = xva_getxoptattr(&xva);
881 XVA_SET_REQ(&xva, XAT_PROJID);
882 xoap->xoa_projid = fsx.fsx_projid;
883
884 crhold(cr);
885 cookie = spl_fstrans_mark();
886 err = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
887 spl_fstrans_unmark(cookie);
888 crfree(cr);
889
890 return (err);
9d317793
RY
891}
892
88c28395
BB
893static long
894zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
895{
896 switch (cmd) {
9d317793
RY
897 case FS_IOC_GETFLAGS:
898 return (zpl_ioctl_getflags(filp, (void *)arg));
899 case FS_IOC_SETFLAGS:
900 return (zpl_ioctl_setflags(filp, (void *)arg));
9c5167d1
NF
901 case ZFS_IOC_FSGETXATTR:
902 return (zpl_ioctl_getxattr(filp, (void *)arg));
903 case ZFS_IOC_FSSETXATTR:
904 return (zpl_ioctl_setxattr(filp, (void *)arg));
88c28395
BB
905 default:
906 return (-ENOTTY);
907 }
908}
909
910#ifdef CONFIG_COMPAT
911static long
912zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
913{
f7b939bd
CIK
914 switch (cmd) {
915 case FS_IOC32_GETFLAGS:
916 cmd = FS_IOC_GETFLAGS;
917 break;
918 case FS_IOC32_SETFLAGS:
919 cmd = FS_IOC_SETFLAGS;
920 break;
921 default:
922 return (-ENOTTY);
923 }
924 return (zpl_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)));
88c28395
BB
925}
926#endif /* CONFIG_COMPAT */
927
928
1efb473f 929const struct address_space_operations zpl_address_space_operations = {
dde471ef 930 .readpages = zpl_readpages,
1efb473f
BB
931 .readpage = zpl_readpage,
932 .writepage = zpl_writepage,
d1d7e268 933 .writepages = zpl_writepages,
1efb473f
BB
934};
935
936const struct file_operations zpl_file_operations = {
126400a1
BB
937 .open = zpl_open,
938 .release = zpl_release,
802e7b5f 939 .llseek = zpl_llseek,
57ae8400 940#ifdef HAVE_VFS_RW_ITERATE
7a789346
CC
941#ifdef HAVE_NEW_SYNC_READ
942 .read = new_sync_read,
943 .write = new_sync_write,
944#endif
57ae8400
MK
945 .read_iter = zpl_iter_read,
946 .write_iter = zpl_iter_write,
947#else
7a789346
CC
948 .read = do_sync_read,
949 .write = do_sync_write,
cd3939c5
RY
950 .aio_read = zpl_aio_read,
951 .aio_write = zpl_aio_write,
57ae8400 952#endif
c0d35759 953 .mmap = zpl_mmap,
1efb473f 954 .fsync = zpl_fsync,
7ca25051 955#ifdef HAVE_FILE_AIO_FSYNC
cd3939c5 956 .aio_fsync = zpl_aio_fsync,
7ca25051 957#endif
cb2d1901 958#ifdef HAVE_FILE_FALLOCATE
d1d7e268 959 .fallocate = zpl_fallocate,
cb2d1901 960#endif /* HAVE_FILE_FALLOCATE */
d1d7e268 961 .unlocked_ioctl = zpl_ioctl,
88c28395 962#ifdef CONFIG_COMPAT
d1d7e268 963 .compat_ioctl = zpl_compat_ioctl,
88c28395 964#endif
1efb473f
BB
965};
966
967const struct file_operations zpl_dir_file_operations = {
968 .llseek = generic_file_llseek,
969 .read = generic_read_dir,
9464b959 970#if defined(HAVE_VFS_ITERATE_SHARED)
9baaa7de
CC
971 .iterate_shared = zpl_iterate,
972#elif defined(HAVE_VFS_ITERATE)
0f37d0c8
RY
973 .iterate = zpl_iterate,
974#else
1efb473f 975 .readdir = zpl_readdir,
0f37d0c8 976#endif
1efb473f 977 .fsync = zpl_fsync,
88c28395
BB
978 .unlocked_ioctl = zpl_ioctl,
979#ifdef CONFIG_COMPAT
980 .compat_ioctl = zpl_compat_ioctl,
981#endif
1efb473f 982};