]> git.proxmox.com Git - mirror_zfs-debian.git/blob - module/zfs/zpl_file.c
Imported Upstream version 0.6.5.3
[mirror_zfs-debian.git] / module / zfs / zpl_file.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
24 */
25
26
27 #include <sys/dmu_objset.h>
28 #include <sys/zfs_vfsops.h>
29 #include <sys/zfs_vnops.h>
30 #include <sys/zfs_znode.h>
31 #include <sys/zpl.h>
32
33
34 static int
35 zpl_open(struct inode *ip, struct file *filp)
36 {
37 cred_t *cr = CRED();
38 int error;
39 fstrans_cookie_t cookie;
40
41 error = generic_file_open(ip, filp);
42 if (error)
43 return (error);
44
45 crhold(cr);
46 cookie = spl_fstrans_mark();
47 error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
48 spl_fstrans_unmark(cookie);
49 crfree(cr);
50 ASSERT3S(error, <=, 0);
51
52 return (error);
53 }
54
55 static int
56 zpl_release(struct inode *ip, struct file *filp)
57 {
58 cred_t *cr = CRED();
59 int error;
60 fstrans_cookie_t cookie;
61
62 cookie = spl_fstrans_mark();
63 if (ITOZ(ip)->z_atime_dirty)
64 zfs_mark_inode_dirty(ip);
65
66 crhold(cr);
67 error = -zfs_close(ip, filp->f_flags, cr);
68 spl_fstrans_unmark(cookie);
69 crfree(cr);
70 ASSERT3S(error, <=, 0);
71
72 return (error);
73 }
74
75 static int
76 zpl_iterate(struct file *filp, struct dir_context *ctx)
77 {
78 struct dentry *dentry = filp->f_path.dentry;
79 cred_t *cr = CRED();
80 int error;
81 fstrans_cookie_t cookie;
82
83 crhold(cr);
84 cookie = spl_fstrans_mark();
85 error = -zfs_readdir(dentry->d_inode, ctx, cr);
86 spl_fstrans_unmark(cookie);
87 crfree(cr);
88 ASSERT3S(error, <=, 0);
89
90 return (error);
91 }
92
93 #if !defined(HAVE_VFS_ITERATE)
94 static int
95 zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
96 {
97 struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
98 int error;
99
100 error = zpl_iterate(filp, &ctx);
101 filp->f_pos = ctx.pos;
102
103 return (error);
104 }
105 #endif /* HAVE_VFS_ITERATE */
106
107 #if defined(HAVE_FSYNC_WITH_DENTRY)
108 /*
109 * Linux 2.6.x - 2.6.34 API,
110 * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
111 * to the fops->fsync() hook. For this reason, we must be careful not to
112 * use filp unconditionally.
113 */
114 static int
115 zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
116 {
117 cred_t *cr = CRED();
118 int error;
119 fstrans_cookie_t cookie;
120
121 crhold(cr);
122 cookie = spl_fstrans_mark();
123 error = -zfs_fsync(dentry->d_inode, datasync, cr);
124 spl_fstrans_unmark(cookie);
125 crfree(cr);
126 ASSERT3S(error, <=, 0);
127
128 return (error);
129 }
130
131 static int
132 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
133 {
134 struct file *filp = kiocb->ki_filp;
135 return (zpl_fsync(filp, filp->f_path.dentry, datasync));
136 }
137 #elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
138 /*
139 * Linux 2.6.35 - 3.0 API,
140 * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
141 * redundant. The dentry is still accessible via filp->f_path.dentry,
142 * and we are guaranteed that filp will never be NULL.
143 */
144 static int
145 zpl_fsync(struct file *filp, int datasync)
146 {
147 struct inode *inode = filp->f_mapping->host;
148 cred_t *cr = CRED();
149 int error;
150 fstrans_cookie_t cookie;
151
152 crhold(cr);
153 cookie = spl_fstrans_mark();
154 error = -zfs_fsync(inode, datasync, cr);
155 spl_fstrans_unmark(cookie);
156 crfree(cr);
157 ASSERT3S(error, <=, 0);
158
159 return (error);
160 }
161
162 static int
163 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
164 {
165 return (zpl_fsync(kiocb->ki_filp, datasync));
166 }
167 #elif defined(HAVE_FSYNC_RANGE)
168 /*
169 * Linux 3.1 - 3.x API,
170 * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
171 * been pushed down in to the .fsync() vfs hook. Additionally, the i_mutex
172 * lock is no longer held by the caller, for zfs we don't require the lock
173 * to be held so we don't acquire it.
174 */
175 static int
176 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
177 {
178 struct inode *inode = filp->f_mapping->host;
179 cred_t *cr = CRED();
180 int error;
181 fstrans_cookie_t cookie;
182
183 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
184 if (error)
185 return (error);
186
187 crhold(cr);
188 cookie = spl_fstrans_mark();
189 error = -zfs_fsync(inode, datasync, cr);
190 spl_fstrans_unmark(cookie);
191 crfree(cr);
192 ASSERT3S(error, <=, 0);
193
194 return (error);
195 }
196
197 static int
198 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
199 {
200 return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos, -1, datasync));
201 }
202 #else
203 #error "Unsupported fops->fsync() implementation"
204 #endif
205
206 static ssize_t
207 zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
208 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
209 cred_t *cr, size_t skip)
210 {
211 ssize_t read;
212 uio_t uio;
213 int error;
214 fstrans_cookie_t cookie;
215
216 uio.uio_iov = iovp;
217 uio.uio_skip = skip;
218 uio.uio_resid = count;
219 uio.uio_iovcnt = nr_segs;
220 uio.uio_loffset = *ppos;
221 uio.uio_limit = MAXOFFSET_T;
222 uio.uio_segflg = segment;
223
224 cookie = spl_fstrans_mark();
225 error = -zfs_read(ip, &uio, flags, cr);
226 spl_fstrans_unmark(cookie);
227 if (error < 0)
228 return (error);
229
230 read = count - uio.uio_resid;
231 *ppos += read;
232 task_io_account_read(read);
233
234 return (read);
235 }
236
237 inline ssize_t
238 zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
239 uio_seg_t segment, int flags, cred_t *cr)
240 {
241 struct iovec iov;
242
243 iov.iov_base = (void *)buf;
244 iov.iov_len = len;
245
246 return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
247 flags, cr, 0));
248 }
249
250 static ssize_t
251 zpl_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
252 {
253 cred_t *cr = CRED();
254 ssize_t read;
255
256 crhold(cr);
257 read = zpl_read_common(filp->f_mapping->host, buf, len, ppos,
258 UIO_USERSPACE, filp->f_flags, cr);
259 crfree(cr);
260
261 return (read);
262 }
263
264 static ssize_t
265 zpl_iter_read_common(struct kiocb *kiocb, const struct iovec *iovp,
266 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
267 {
268 cred_t *cr = CRED();
269 struct file *filp = kiocb->ki_filp;
270 ssize_t read;
271
272 crhold(cr);
273 read = zpl_read_common_iovec(filp->f_mapping->host, iovp, count,
274 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
275 crfree(cr);
276
277 return (read);
278 }
279
280 #if defined(HAVE_VFS_RW_ITERATE)
281 static ssize_t
282 zpl_iter_read(struct kiocb *kiocb, struct iov_iter *to)
283 {
284 ssize_t ret;
285 uio_seg_t seg = UIO_USERSPACE;
286 if (to->type & ITER_KVEC)
287 seg = UIO_SYSSPACE;
288 if (to->type & ITER_BVEC)
289 seg = UIO_BVEC;
290 ret = zpl_iter_read_common(kiocb, to->iov, to->nr_segs,
291 iov_iter_count(to), seg, to->iov_offset);
292 if (ret > 0)
293 iov_iter_advance(to, ret);
294 return (ret);
295 }
296 #else
297 static ssize_t
298 zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
299 unsigned long nr_segs, loff_t pos)
300 {
301 return (zpl_iter_read_common(kiocb, iovp, nr_segs, kiocb->ki_nbytes,
302 UIO_USERSPACE, 0));
303 }
304 #endif /* HAVE_VFS_RW_ITERATE */
305
306 static ssize_t
307 zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
308 unsigned long nr_segs, loff_t *ppos, uio_seg_t segment, int flags,
309 cred_t *cr, size_t skip)
310 {
311 ssize_t wrote;
312 uio_t uio;
313 int error;
314 fstrans_cookie_t cookie;
315
316 if (flags & O_APPEND)
317 *ppos = i_size_read(ip);
318
319 uio.uio_iov = iovp;
320 uio.uio_skip = skip;
321 uio.uio_resid = count;
322 uio.uio_iovcnt = nr_segs;
323 uio.uio_loffset = *ppos;
324 uio.uio_limit = MAXOFFSET_T;
325 uio.uio_segflg = segment;
326
327 cookie = spl_fstrans_mark();
328 error = -zfs_write(ip, &uio, flags, cr);
329 spl_fstrans_unmark(cookie);
330 if (error < 0)
331 return (error);
332
333 wrote = count - uio.uio_resid;
334 *ppos += wrote;
335 task_io_account_write(wrote);
336
337 return (wrote);
338 }
339 inline ssize_t
340 zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
341 uio_seg_t segment, int flags, cred_t *cr)
342 {
343 struct iovec iov;
344
345 iov.iov_base = (void *)buf;
346 iov.iov_len = len;
347
348 return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
349 flags, cr, 0));
350 }
351
352 static ssize_t
353 zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
354 {
355 cred_t *cr = CRED();
356 ssize_t wrote;
357
358 crhold(cr);
359 wrote = zpl_write_common(filp->f_mapping->host, buf, len, ppos,
360 UIO_USERSPACE, filp->f_flags, cr);
361 crfree(cr);
362
363 return (wrote);
364 }
365
366 static ssize_t
367 zpl_iter_write_common(struct kiocb *kiocb, const struct iovec *iovp,
368 unsigned long nr_segs, size_t count, uio_seg_t seg, size_t skip)
369 {
370 cred_t *cr = CRED();
371 struct file *filp = kiocb->ki_filp;
372 ssize_t wrote;
373
374 crhold(cr);
375 wrote = zpl_write_common_iovec(filp->f_mapping->host, iovp, count,
376 nr_segs, &kiocb->ki_pos, seg, filp->f_flags, cr, skip);
377 crfree(cr);
378
379 return (wrote);
380 }
381
382 #if defined(HAVE_VFS_RW_ITERATE)
383 static ssize_t
384 zpl_iter_write(struct kiocb *kiocb, struct iov_iter *from)
385 {
386 ssize_t ret;
387 uio_seg_t seg = UIO_USERSPACE;
388 if (from->type & ITER_KVEC)
389 seg = UIO_SYSSPACE;
390 if (from->type & ITER_BVEC)
391 seg = UIO_BVEC;
392 ret = zpl_iter_write_common(kiocb, from->iov, from->nr_segs,
393 iov_iter_count(from), seg, from->iov_offset);
394 if (ret > 0)
395 iov_iter_advance(from, ret);
396 return (ret);
397 }
398 #else
399 static ssize_t
400 zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
401 unsigned long nr_segs, loff_t pos)
402 {
403 return (zpl_iter_write_common(kiocb, iovp, nr_segs, kiocb->ki_nbytes,
404 UIO_USERSPACE, 0));
405 }
406 #endif /* HAVE_VFS_RW_ITERATE */
407
408 static loff_t
409 zpl_llseek(struct file *filp, loff_t offset, int whence)
410 {
411 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
412 fstrans_cookie_t cookie;
413
414 if (whence == SEEK_DATA || whence == SEEK_HOLE) {
415 struct inode *ip = filp->f_mapping->host;
416 loff_t maxbytes = ip->i_sb->s_maxbytes;
417 loff_t error;
418
419 spl_inode_lock(ip);
420 cookie = spl_fstrans_mark();
421 error = -zfs_holey(ip, whence, &offset);
422 spl_fstrans_unmark(cookie);
423 if (error == 0)
424 error = lseek_execute(filp, ip, offset, maxbytes);
425 spl_inode_unlock(ip);
426
427 return (error);
428 }
429 #endif /* SEEK_HOLE && SEEK_DATA */
430
431 return (generic_file_llseek(filp, offset, whence));
432 }
433
434 /*
435 * It's worth taking a moment to describe how mmap is implemented
436 * for zfs because it differs considerably from other Linux filesystems.
437 * However, this issue is handled the same way under OpenSolaris.
438 *
439 * The issue is that by design zfs bypasses the Linux page cache and
440 * leaves all caching up to the ARC. This has been shown to work
441 * well for the common read(2)/write(2) case. However, mmap(2)
442 * is problem because it relies on being tightly integrated with the
443 * page cache. To handle this we cache mmap'ed files twice, once in
444 * the ARC and a second time in the page cache. The code is careful
445 * to keep both copies synchronized.
446 *
447 * When a file with an mmap'ed region is written to using write(2)
448 * both the data in the ARC and existing pages in the page cache
449 * are updated. For a read(2) data will be read first from the page
450 * cache then the ARC if needed. Neither a write(2) or read(2) will
451 * will ever result in new pages being added to the page cache.
452 *
453 * New pages are added to the page cache only via .readpage() which
454 * is called when the vfs needs to read a page off disk to back the
455 * virtual memory region. These pages may be modified without
456 * notifying the ARC and will be written out periodically via
457 * .writepage(). This will occur due to either a sync or the usual
458 * page aging behavior. Note because a read(2) of a mmap'ed file
459 * will always check the page cache first even when the ARC is out
460 * of date correct data will still be returned.
461 *
462 * While this implementation ensures correct behavior it does have
463 * have some drawbacks. The most obvious of which is that it
464 * increases the required memory footprint when access mmap'ed
465 * files. It also adds additional complexity to the code keeping
466 * both caches synchronized.
467 *
468 * Longer term it may be possible to cleanly resolve this wart by
469 * mapping page cache pages directly on to the ARC buffers. The
470 * Linux address space operations are flexible enough to allow
471 * selection of which pages back a particular index. The trick
472 * would be working out the details of which subsystem is in
473 * charge, the ARC, the page cache, or both. It may also prove
474 * helpful to move the ARC buffers to a scatter-gather lists
475 * rather than a vmalloc'ed region.
476 */
477 static int
478 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
479 {
480 struct inode *ip = filp->f_mapping->host;
481 znode_t *zp = ITOZ(ip);
482 int error;
483 fstrans_cookie_t cookie;
484
485 cookie = spl_fstrans_mark();
486 error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
487 (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
488 spl_fstrans_unmark(cookie);
489 if (error)
490 return (error);
491
492 error = generic_file_mmap(filp, vma);
493 if (error)
494 return (error);
495
496 mutex_enter(&zp->z_lock);
497 zp->z_is_mapped = 1;
498 mutex_exit(&zp->z_lock);
499
500 return (error);
501 }
502
503 /*
504 * Populate a page with data for the Linux page cache. This function is
505 * only used to support mmap(2). There will be an identical copy of the
506 * data in the ARC which is kept up to date via .write() and .writepage().
507 *
508 * Current this function relies on zpl_read_common() and the O_DIRECT
509 * flag to read in a page. This works but the more correct way is to
510 * update zfs_fillpage() to be Linux friendly and use that interface.
511 */
512 static int
513 zpl_readpage(struct file *filp, struct page *pp)
514 {
515 struct inode *ip;
516 struct page *pl[1];
517 int error = 0;
518 fstrans_cookie_t cookie;
519
520 ASSERT(PageLocked(pp));
521 ip = pp->mapping->host;
522 pl[0] = pp;
523
524 cookie = spl_fstrans_mark();
525 error = -zfs_getpage(ip, pl, 1);
526 spl_fstrans_unmark(cookie);
527
528 if (error) {
529 SetPageError(pp);
530 ClearPageUptodate(pp);
531 } else {
532 ClearPageError(pp);
533 SetPageUptodate(pp);
534 flush_dcache_page(pp);
535 }
536
537 unlock_page(pp);
538 return (error);
539 }
540
541 /*
542 * Populate a set of pages with data for the Linux page cache. This
543 * function will only be called for read ahead and never for demand
544 * paging. For simplicity, the code relies on read_cache_pages() to
545 * correctly lock each page for IO and call zpl_readpage().
546 */
547 static int
548 zpl_readpages(struct file *filp, struct address_space *mapping,
549 struct list_head *pages, unsigned nr_pages)
550 {
551 return (read_cache_pages(mapping, pages,
552 (filler_t *)zpl_readpage, filp));
553 }
554
555 int
556 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
557 {
558 struct address_space *mapping = data;
559 fstrans_cookie_t cookie;
560
561 ASSERT(PageLocked(pp));
562 ASSERT(!PageWriteback(pp));
563
564 cookie = spl_fstrans_mark();
565 (void) zfs_putpage(mapping->host, pp, wbc);
566 spl_fstrans_unmark(cookie);
567
568 return (0);
569 }
570
571 static int
572 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
573 {
574 znode_t *zp = ITOZ(mapping->host);
575 zfs_sb_t *zsb = ITOZSB(mapping->host);
576 enum writeback_sync_modes sync_mode;
577 int result;
578
579 ZFS_ENTER(zsb);
580 if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
581 wbc->sync_mode = WB_SYNC_ALL;
582 ZFS_EXIT(zsb);
583 sync_mode = wbc->sync_mode;
584
585 /*
586 * We don't want to run write_cache_pages() in SYNC mode here, because
587 * that would make putpage() wait for a single page to be committed to
588 * disk every single time, resulting in atrocious performance. Instead
589 * we run it once in non-SYNC mode so that the ZIL gets all the data,
590 * and then we commit it all in one go.
591 */
592 wbc->sync_mode = WB_SYNC_NONE;
593 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
594 if (sync_mode != wbc->sync_mode) {
595 ZFS_ENTER(zsb);
596 ZFS_VERIFY_ZP(zp);
597 if (zsb->z_log != NULL)
598 zil_commit(zsb->z_log, zp->z_id);
599 ZFS_EXIT(zsb);
600
601 /*
602 * We need to call write_cache_pages() again (we can't just
603 * return after the commit) because the previous call in
604 * non-SYNC mode does not guarantee that we got all the dirty
605 * pages (see the implementation of write_cache_pages() for
606 * details). That being said, this is a no-op in most cases.
607 */
608 wbc->sync_mode = sync_mode;
609 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
610 }
611 return (result);
612 }
613
614 /*
615 * Write out dirty pages to the ARC, this function is only required to
616 * support mmap(2). Mapped pages may be dirtied by memory operations
617 * which never call .write(). These dirty pages are kept in sync with
618 * the ARC buffers via this hook.
619 */
620 static int
621 zpl_writepage(struct page *pp, struct writeback_control *wbc)
622 {
623 if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
624 wbc->sync_mode = WB_SYNC_ALL;
625
626 return (zpl_putpage(pp, wbc, pp->mapping));
627 }
628
629 /*
630 * The only flag combination which matches the behavior of zfs_space()
631 * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE. The FALLOC_FL_PUNCH_HOLE
632 * flag was introduced in the 2.6.38 kernel.
633 */
634 #if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
635 long
636 zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
637 {
638 int error = -EOPNOTSUPP;
639
640 #if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
641 cred_t *cr = CRED();
642 flock64_t bf;
643 loff_t olen;
644 fstrans_cookie_t cookie;
645
646 if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
647 return (error);
648
649 crhold(cr);
650
651 if (offset < 0 || len <= 0)
652 return (-EINVAL);
653
654 spl_inode_lock(ip);
655 olen = i_size_read(ip);
656
657 if (offset > olen) {
658 spl_inode_unlock(ip);
659 return (0);
660 }
661 if (offset + len > olen)
662 len = olen - offset;
663 bf.l_type = F_WRLCK;
664 bf.l_whence = 0;
665 bf.l_start = offset;
666 bf.l_len = len;
667 bf.l_pid = 0;
668
669 cookie = spl_fstrans_mark();
670 error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
671 spl_fstrans_unmark(cookie);
672 spl_inode_unlock(ip);
673
674 crfree(cr);
675 #endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
676
677 ASSERT3S(error, <=, 0);
678 return (error);
679 }
680 #endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
681
682 #ifdef HAVE_FILE_FALLOCATE
683 static long
684 zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
685 {
686 return zpl_fallocate_common(filp->f_path.dentry->d_inode,
687 mode, offset, len);
688 }
689 #endif /* HAVE_FILE_FALLOCATE */
690
691 /*
692 * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
693 * attributes common to both Linux and Solaris are mapped.
694 */
695 static int
696 zpl_ioctl_getflags(struct file *filp, void __user *arg)
697 {
698 struct inode *ip = file_inode(filp);
699 unsigned int ioctl_flags = 0;
700 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
701 int error;
702
703 if (zfs_flags & ZFS_IMMUTABLE)
704 ioctl_flags |= FS_IMMUTABLE_FL;
705
706 if (zfs_flags & ZFS_APPENDONLY)
707 ioctl_flags |= FS_APPEND_FL;
708
709 if (zfs_flags & ZFS_NODUMP)
710 ioctl_flags |= FS_NODUMP_FL;
711
712 ioctl_flags &= FS_FL_USER_VISIBLE;
713
714 error = copy_to_user(arg, &ioctl_flags, sizeof (ioctl_flags));
715
716 return (error);
717 }
718
719 /*
720 * fchange() is a helper macro to detect if we have been asked to change a
721 * flag. This is ugly, but the requirement that we do this is a consequence of
722 * how the Linux file attribute interface was designed. Another consequence is
723 * that concurrent modification of files suffers from a TOCTOU race. Neither
724 * are things we can fix without modifying the kernel-userland interface, which
725 * is outside of our jurisdiction.
726 */
727
728 #define fchange(f0, f1, b0, b1) ((((f0) & (b0)) == (b0)) != \
729 (((b1) & (f1)) == (f1)))
730
731 static int
732 zpl_ioctl_setflags(struct file *filp, void __user *arg)
733 {
734 struct inode *ip = file_inode(filp);
735 uint64_t zfs_flags = ITOZ(ip)->z_pflags;
736 unsigned int ioctl_flags;
737 cred_t *cr = CRED();
738 xvattr_t xva;
739 xoptattr_t *xoap;
740 int error;
741 fstrans_cookie_t cookie;
742
743 if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
744 return (-EFAULT);
745
746 if ((ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL)))
747 return (-EOPNOTSUPP);
748
749 if ((ioctl_flags & ~(FS_FL_USER_MODIFIABLE)))
750 return (-EACCES);
751
752 if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
753 fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
754 !capable(CAP_LINUX_IMMUTABLE))
755 return (-EACCES);
756
757 if (!zpl_inode_owner_or_capable(ip))
758 return (-EACCES);
759
760 xva_init(&xva);
761 xoap = xva_getxoptattr(&xva);
762
763 XVA_SET_REQ(&xva, XAT_IMMUTABLE);
764 if (ioctl_flags & FS_IMMUTABLE_FL)
765 xoap->xoa_immutable = B_TRUE;
766
767 XVA_SET_REQ(&xva, XAT_APPENDONLY);
768 if (ioctl_flags & FS_APPEND_FL)
769 xoap->xoa_appendonly = B_TRUE;
770
771 XVA_SET_REQ(&xva, XAT_NODUMP);
772 if (ioctl_flags & FS_NODUMP_FL)
773 xoap->xoa_nodump = B_TRUE;
774
775 crhold(cr);
776 cookie = spl_fstrans_mark();
777 error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
778 spl_fstrans_unmark(cookie);
779 crfree(cr);
780
781 return (error);
782 }
783
784 static long
785 zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
786 {
787 switch (cmd) {
788 case FS_IOC_GETFLAGS:
789 return (zpl_ioctl_getflags(filp, (void *)arg));
790 case FS_IOC_SETFLAGS:
791 return (zpl_ioctl_setflags(filp, (void *)arg));
792 default:
793 return (-ENOTTY);
794 }
795 }
796
797 #ifdef CONFIG_COMPAT
798 static long
799 zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
800 {
801 return (zpl_ioctl(filp, cmd, arg));
802 }
803 #endif /* CONFIG_COMPAT */
804
805
806 const struct address_space_operations zpl_address_space_operations = {
807 .readpages = zpl_readpages,
808 .readpage = zpl_readpage,
809 .writepage = zpl_writepage,
810 .writepages = zpl_writepages,
811 };
812
813 const struct file_operations zpl_file_operations = {
814 .open = zpl_open,
815 .release = zpl_release,
816 .llseek = zpl_llseek,
817 .read = zpl_read,
818 .write = zpl_write,
819 #ifdef HAVE_VFS_RW_ITERATE
820 .read_iter = zpl_iter_read,
821 .write_iter = zpl_iter_write,
822 #else
823 .aio_read = zpl_aio_read,
824 .aio_write = zpl_aio_write,
825 #endif
826 .mmap = zpl_mmap,
827 .fsync = zpl_fsync,
828 .aio_fsync = zpl_aio_fsync,
829 #ifdef HAVE_FILE_FALLOCATE
830 .fallocate = zpl_fallocate,
831 #endif /* HAVE_FILE_FALLOCATE */
832 .unlocked_ioctl = zpl_ioctl,
833 #ifdef CONFIG_COMPAT
834 .compat_ioctl = zpl_compat_ioctl,
835 #endif
836 };
837
838 const struct file_operations zpl_dir_file_operations = {
839 .llseek = generic_file_llseek,
840 .read = generic_read_dir,
841 #ifdef HAVE_VFS_ITERATE
842 .iterate = zpl_iterate,
843 #else
844 .readdir = zpl_readdir,
845 #endif
846 .fsync = zpl_fsync,
847 .unlocked_ioctl = zpl_ioctl,
848 #ifdef CONFIG_COMPAT
849 .compat_ioctl = zpl_compat_ioctl,
850 #endif
851 };