module/zfs/zpl_file.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
  23  */
  24
  25
  26 #include <sys/dmu_objset.h>
  27 #include <sys/zfs_vfsops.h>
  28 #include <sys/zfs_vnops.h>
  29 #include <sys/zfs_znode.h>
  30 #include <sys/zpl.h>
  31
  32
  33 static int
  34 zpl_open(struct inode *ip, struct file *filp)
  35 {
  36         cred_t *cr = CRED();
  37         int error;
  38
  39         error = generic_file_open(ip, filp);
  40         if (error)
  41                 return (error);
  42
  43         crhold(cr);
  44         error = -zfs_open(ip, filp->f_mode, filp->f_flags, cr);
  45         crfree(cr);
  46         ASSERT3S(error, <=, 0);
  47
  48         return (error);
  49 }
  50
  51 static int
  52 zpl_release(struct inode *ip, struct file *filp)
  53 {
  54         cred_t *cr = CRED();
  55         int error;
  56
  57         if (ITOZ(ip)->z_atime_dirty)
  58                 zfs_mark_inode_dirty(ip);
  59
  60         crhold(cr);
  61         error = -zfs_close(ip, filp->f_flags, cr);
  62         crfree(cr);
  63         ASSERT3S(error, <=, 0);
  64
  65         return (error);
  66 }
  67
  68 static int
  69 zpl_iterate(struct file *filp, struct dir_context *ctx)
  70 {
  71         struct dentry *dentry = filp->f_path.dentry;
  72         cred_t *cr = CRED();
  73         int error;
  74
  75         crhold(cr);
  76         error = -zfs_readdir(dentry->d_inode, ctx, cr);
  77         crfree(cr);
  78         ASSERT3S(error, <=, 0);
  79
  80         return (error);
  81 }
  82
  83 #if !defined(HAVE_VFS_ITERATE)
  84 static int
  85 zpl_readdir(struct file *filp, void *dirent, filldir_t filldir)
  86 {
  87         struct dir_context ctx = DIR_CONTEXT_INIT(dirent, filldir, filp->f_pos);
  88         int error;
  89
  90         error = zpl_iterate(filp, &ctx);
  91         filp->f_pos = ctx.pos;
  92
  93         return (error);
  94 }
  95 #endif /* HAVE_VFS_ITERATE */
  96
  97 #if defined(HAVE_FSYNC_WITH_DENTRY)
  98 /*
  99  * Linux 2.6.x - 2.6.34 API,
 100  * Through 2.6.34 the nfsd kernel server would pass a NULL 'file struct *'
 101  * to the fops->fsync() hook.  For this reason, we must be careful not to
 102  * use filp unconditionally.
 103  */
 104 static int
 105 zpl_fsync(struct file *filp, struct dentry *dentry, int datasync)
 106 {
 107         cred_t *cr = CRED();
 108         int error;
 109
 110         crhold(cr);
 111         error = -zfs_fsync(dentry->d_inode, datasync, cr);
 112         crfree(cr);
 113         ASSERT3S(error, <=, 0);
 114
 115         return (error);
 116 }
 117
 118 static int
 119 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
 120 {
 121         struct file *filp = kiocb->ki_filp;
 122         return (zpl_fsync(filp, filp->f_path.dentry, datasync));
 123 }
 124 #elif defined(HAVE_FSYNC_WITHOUT_DENTRY)
 125 /*
 126  * Linux 2.6.35 - 3.0 API,
 127  * As of 2.6.35 the dentry argument to the fops->fsync() hook was deemed
 128  * redundant.  The dentry is still accessible via filp->f_path.dentry,
 129  * and we are guaranteed that filp will never be NULL.
 130  */
 131 static int
 132 zpl_fsync(struct file *filp, int datasync)
 133 {
 134         struct inode *inode = filp->f_mapping->host;
 135         cred_t *cr = CRED();
 136         int error;
 137
 138         crhold(cr);
 139         error = -zfs_fsync(inode, datasync, cr);
 140         crfree(cr);
 141         ASSERT3S(error, <=, 0);
 142
 143         return (error);
 144 }
 145
 146 static int
 147 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
 148 {
 149         return (zpl_fsync(kiocb->ki_filp, datasync));
 150 }
 151 #elif defined(HAVE_FSYNC_RANGE)
 152 /*
 153  * Linux 3.1 - 3.x API,
 154  * As of 3.1 the responsibility to call filemap_write_and_wait_range() has
 155  * been pushed down in to the .fsync() vfs hook.  Additionally, the i_mutex
 156  * lock is no longer held by the caller, for zfs we don't require the lock
 157  * to be held so we don't acquire it.
 158  */
 159 static int
 160 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 161 {
 162         struct inode *inode = filp->f_mapping->host;
 163         cred_t *cr = CRED();
 164         int error;
 165
 166         error = filemap_write_and_wait_range(inode->i_mapping, start, end);
 167         if (error)
 168                 return (error);
 169
 170         crhold(cr);
 171         error = -zfs_fsync(inode, datasync, cr);
 172         crfree(cr);
 173         ASSERT3S(error, <=, 0);
 174
 175         return (error);
 176 }
 177
 178 static int
 179 zpl_aio_fsync(struct kiocb *kiocb, int datasync)
 180 {
 181         return (zpl_fsync(kiocb->ki_filp, kiocb->ki_pos,
 182             kiocb->ki_pos + kiocb->ki_nbytes, datasync));
 183 }
 184 #else
 185 #error "Unsupported fops->fsync() implementation"
 186 #endif
 187
 188 static inline ssize_t
 189 zpl_read_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
 190     unsigned long nr_segs, loff_t *ppos, uio_seg_t segment,
 191     int flags, cred_t *cr)
 192 {
 193         ssize_t read;
 194         uio_t uio;
 195         int error;
 196
 197         uio.uio_iov = (struct iovec *)iovp;
 198         uio.uio_resid = count;
 199         uio.uio_iovcnt = nr_segs;
 200         uio.uio_loffset = *ppos;
 201         uio.uio_limit = MAXOFFSET_T;
 202         uio.uio_segflg = segment;
 203
 204         error = -zfs_read(ip, &uio, flags, cr);
 205         if (error < 0)
 206                 return (error);
 207
 208         read = count - uio.uio_resid;
 209         *ppos += read;
 210         task_io_account_read(read);
 211
 212         return (read);
 213 }
 214
 215 inline ssize_t
 216 zpl_read_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
 217     uio_seg_t segment, int flags, cred_t *cr)
 218 {
 219         struct iovec iov;
 220
 221         iov.iov_base = (void *)buf;
 222         iov.iov_len = len;
 223
 224         return (zpl_read_common_iovec(ip, &iov, len, 1, ppos, segment,
 225             flags, cr));
 226 }
 227
 228 static ssize_t
 229 zpl_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 230 {
 231         cred_t *cr = CRED();
 232         ssize_t read;
 233
 234         crhold(cr);
 235         read = zpl_read_common(filp->f_mapping->host, buf, len, ppos,
 236             UIO_USERSPACE, filp->f_flags, cr);
 237         crfree(cr);
 238
 239         return (read);
 240 }
 241
 242 static ssize_t
 243 zpl_aio_read(struct kiocb *kiocb, const struct iovec *iovp,
 244         unsigned long nr_segs, loff_t pos)
 245 {
 246         cred_t *cr = CRED();
 247         struct file *filp = kiocb->ki_filp;
 248         size_t count = kiocb->ki_nbytes;
 249         ssize_t read;
 250         size_t alloc_size = sizeof (struct iovec) * nr_segs;
 251         struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP | KM_NODEBUG);
 252         bcopy(iovp, iov_tmp, alloc_size);
 253
 254         ASSERT(iovp);
 255
 256         crhold(cr);
 257         read = zpl_read_common_iovec(filp->f_mapping->host, iov_tmp, count,
 258             nr_segs, &kiocb->ki_pos, UIO_USERSPACE, filp->f_flags, cr);
 259         crfree(cr);
 260
 261         kmem_free(iov_tmp, alloc_size);
 262
 263         return (read);
 264 }
 265
 266 static inline ssize_t
 267 zpl_write_common_iovec(struct inode *ip, const struct iovec *iovp, size_t count,
 268     unsigned long nr_segs, loff_t *ppos, uio_seg_t segment,
 269     int flags, cred_t *cr)
 270 {
 271         ssize_t wrote;
 272         uio_t uio;
 273         int error;
 274
 275         uio.uio_iov = (struct iovec *)iovp;
 276         uio.uio_resid = count;
 277         uio.uio_iovcnt = nr_segs;
 278         uio.uio_loffset = *ppos;
 279         uio.uio_limit = MAXOFFSET_T;
 280         uio.uio_segflg = segment;
 281
 282         error = -zfs_write(ip, &uio, flags, cr);
 283         if (error < 0)
 284                 return (error);
 285
 286         wrote = count - uio.uio_resid;
 287         *ppos += wrote;
 288         task_io_account_write(wrote);
 289
 290         return (wrote);
 291 }
 292 inline ssize_t
 293 zpl_write_common(struct inode *ip, const char *buf, size_t len, loff_t *ppos,
 294     uio_seg_t segment, int flags, cred_t *cr)
 295 {
 296         struct iovec iov;
 297
 298         iov.iov_base = (void *)buf;
 299         iov.iov_len = len;
 300
 301         return (zpl_write_common_iovec(ip, &iov, len, 1, ppos, segment,
 302             flags, cr));
 303 }
 304
 305 static ssize_t
 306 zpl_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 307 {
 308         cred_t *cr = CRED();
 309         ssize_t wrote;
 310
 311         crhold(cr);
 312         wrote = zpl_write_common(filp->f_mapping->host, buf, len, ppos,
 313             UIO_USERSPACE, filp->f_flags, cr);
 314         crfree(cr);
 315
 316         return (wrote);
 317 }
 318
 319 static ssize_t
 320 zpl_aio_write(struct kiocb *kiocb, const struct iovec *iovp,
 321         unsigned long nr_segs, loff_t pos)
 322 {
 323         cred_t *cr = CRED();
 324         struct file *filp = kiocb->ki_filp;
 325         size_t count = kiocb->ki_nbytes;
 326         ssize_t wrote;
 327         size_t alloc_size = sizeof (struct iovec) * nr_segs;
 328         struct iovec *iov_tmp = kmem_alloc(alloc_size, KM_SLEEP | KM_NODEBUG);
 329         bcopy(iovp, iov_tmp, alloc_size);
 330
 331         ASSERT(iovp);
 332
 333         crhold(cr);
 334         wrote = zpl_write_common_iovec(filp->f_mapping->host, iov_tmp, count,
 335             nr_segs, &kiocb->ki_pos, UIO_USERSPACE, filp->f_flags, cr);
 336         crfree(cr);
 337
 338         kmem_free(iov_tmp, alloc_size);
 339
 340         return (wrote);
 341 }
 342
 343 static loff_t
 344 zpl_llseek(struct file *filp, loff_t offset, int whence)
 345 {
 346 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
 347         if (whence == SEEK_DATA || whence == SEEK_HOLE) {
 348                 struct inode *ip = filp->f_mapping->host;
 349                 loff_t maxbytes = ip->i_sb->s_maxbytes;
 350                 loff_t error;
 351
 352                 spl_inode_lock(ip);
 353                 error = -zfs_holey(ip, whence, &offset);
 354                 if (error == 0)
 355                         error = lseek_execute(filp, ip, offset, maxbytes);
 356                 spl_inode_unlock(ip);
 357
 358                 return (error);
 359         }
 360 #endif /* SEEK_HOLE && SEEK_DATA */
 361
 362         return (generic_file_llseek(filp, offset, whence));
 363 }
 364
 365 /*
 366  * It's worth taking a moment to describe how mmap is implemented
 367  * for zfs because it differs considerably from other Linux filesystems.
 368  * However, this issue is handled the same way under OpenSolaris.
 369  *
 370  * The issue is that by design zfs bypasses the Linux page cache and
 371  * leaves all caching up to the ARC.  This has been shown to work
 372  * well for the common read(2)/write(2) case.  However, mmap(2)
 373  * is problem because it relies on being tightly integrated with the
 374  * page cache.  To handle this we cache mmap'ed files twice, once in
 375  * the ARC and a second time in the page cache.  The code is careful
 376  * to keep both copies synchronized.
 377  *
 378  * When a file with an mmap'ed region is written to using write(2)
 379  * both the data in the ARC and existing pages in the page cache
 380  * are updated.  For a read(2) data will be read first from the page
 381  * cache then the ARC if needed.  Neither a write(2) or read(2) will
 382  * will ever result in new pages being added to the page cache.
 383  *
 384  * New pages are added to the page cache only via .readpage() which
 385  * is called when the vfs needs to read a page off disk to back the
 386  * virtual memory region.  These pages may be modified without
 387  * notifying the ARC and will be written out periodically via
 388  * .writepage().  This will occur due to either a sync or the usual
 389  * page aging behavior.  Note because a read(2) of a mmap'ed file
 390  * will always check the page cache first even when the ARC is out
 391  * of date correct data will still be returned.
 392  *
 393  * While this implementation ensures correct behavior it does have
 394  * have some drawbacks.  The most obvious of which is that it
 395  * increases the required memory footprint when access mmap'ed
 396  * files.  It also adds additional complexity to the code keeping
 397  * both caches synchronized.
 398  *
 399  * Longer term it may be possible to cleanly resolve this wart by
 400  * mapping page cache pages directly on to the ARC buffers.  The
 401  * Linux address space operations are flexible enough to allow
 402  * selection of which pages back a particular index.  The trick
 403  * would be working out the details of which subsystem is in
 404  * charge, the ARC, the page cache, or both.  It may also prove
 405  * helpful to move the ARC buffers to a scatter-gather lists
 406  * rather than a vmalloc'ed region.
 407  */
 408 static int
 409 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
 410 {
 411         struct inode *ip = filp->f_mapping->host;
 412         znode_t *zp = ITOZ(ip);
 413         int error;
 414
 415         error = -zfs_map(ip, vma->vm_pgoff, (caddr_t *)vma->vm_start,
 416             (size_t)(vma->vm_end - vma->vm_start), vma->vm_flags);
 417         if (error)
 418                 return (error);
 419
 420         error = generic_file_mmap(filp, vma);
 421         if (error)
 422                 return (error);
 423
 424         mutex_enter(&zp->z_lock);
 425         zp->z_is_mapped = 1;
 426         mutex_exit(&zp->z_lock);
 427
 428         return (error);
 429 }
 430
 431 /*
 432  * Populate a page with data for the Linux page cache.  This function is
 433  * only used to support mmap(2).  There will be an identical copy of the
 434  * data in the ARC which is kept up to date via .write() and .writepage().
 435  *
 436  * Current this function relies on zpl_read_common() and the O_DIRECT
 437  * flag to read in a page.  This works but the more correct way is to
 438  * update zfs_fillpage() to be Linux friendly and use that interface.
 439  */
 440 static int
 441 zpl_readpage(struct file *filp, struct page *pp)
 442 {
 443         struct inode *ip;
 444         struct page *pl[1];
 445         int error = 0;
 446
 447         ASSERT(PageLocked(pp));
 448         ip = pp->mapping->host;
 449         pl[0] = pp;
 450
 451         error = -zfs_getpage(ip, pl, 1);
 452
 453         if (error) {
 454                 SetPageError(pp);
 455                 ClearPageUptodate(pp);
 456         } else {
 457                 ClearPageError(pp);
 458                 SetPageUptodate(pp);
 459                 flush_dcache_page(pp);
 460         }
 461
 462         unlock_page(pp);
 463         return (error);
 464 }
 465
 466 /*
 467  * Populate a set of pages with data for the Linux page cache.  This
 468  * function will only be called for read ahead and never for demand
 469  * paging.  For simplicity, the code relies on read_cache_pages() to
 470  * correctly lock each page for IO and call zpl_readpage().
 471  */
 472 static int
 473 zpl_readpages(struct file *filp, struct address_space *mapping,
 474         struct list_head *pages, unsigned nr_pages)
 475 {
 476         return (read_cache_pages(mapping, pages,
 477             (filler_t *)zpl_readpage, filp));
 478 }
 479
 480 int
 481 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
 482 {
 483         struct address_space *mapping = data;
 484         fstrans_cookie_t cookie;
 485
 486         ASSERT(PageLocked(pp));
 487         ASSERT(!PageWriteback(pp));
 488
 489         cookie = spl_fstrans_mark();
 490         (void) zfs_putpage(mapping->host, pp, wbc);
 491         spl_fstrans_unmark(cookie);
 492
 493         return (0);
 494 }
 495
 496 static int
 497 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
 498 {
 499         znode_t         *zp = ITOZ(mapping->host);
 500         zfs_sb_t        *zsb = ITOZSB(mapping->host);
 501         enum writeback_sync_modes sync_mode;
 502         int result;
 503
 504         ZFS_ENTER(zsb);
 505         if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
 506                 wbc->sync_mode = WB_SYNC_ALL;
 507         ZFS_EXIT(zsb);
 508         sync_mode = wbc->sync_mode;
 509
 510         /*
 511          * We don't want to run write_cache_pages() in SYNC mode here, because
 512          * that would make putpage() wait for a single page to be committed to
 513          * disk every single time, resulting in atrocious performance. Instead
 514          * we run it once in non-SYNC mode so that the ZIL gets all the data,
 515          * and then we commit it all in one go.
 516          */
 517         wbc->sync_mode = WB_SYNC_NONE;
 518         result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
 519         if (sync_mode != wbc->sync_mode) {
 520                 ZFS_ENTER(zsb);
 521                 ZFS_VERIFY_ZP(zp);
 522                 if (zsb->z_log != NULL)
 523                         zil_commit(zsb->z_log, zp->z_id);
 524                 ZFS_EXIT(zsb);
 525
 526                 /*
 527                  * We need to call write_cache_pages() again (we can't just
 528                  * return after the commit) because the previous call in
 529                  * non-SYNC mode does not guarantee that we got all the dirty
 530                  * pages (see the implementation of write_cache_pages() for
 531                  * details). That being said, this is a no-op in most cases.
 532                  */
 533                 wbc->sync_mode = sync_mode;
 534                 result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
 535         }
 536         return (result);
 537 }
 538
 539 /*
 540  * Write out dirty pages to the ARC, this function is only required to
 541  * support mmap(2).  Mapped pages may be dirtied by memory operations
 542  * which never call .write().  These dirty pages are kept in sync with
 543  * the ARC buffers via this hook.
 544  */
 545 static int
 546 zpl_writepage(struct page *pp, struct writeback_control *wbc)
 547 {
 548         if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
 549                 wbc->sync_mode = WB_SYNC_ALL;
 550
 551         return (zpl_putpage(pp, wbc, pp->mapping));
 552 }
 553
 554 /*
 555  * The only flag combination which matches the behavior of zfs_space()
 556  * is FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE.  The FALLOC_FL_PUNCH_HOLE
 557  * flag was introduced in the 2.6.38 kernel.
 558  */
 559 #if defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE)
 560 long
 561 zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
 562 {
 563         int error = -EOPNOTSUPP;
 564
 565 #if defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE)
 566         cred_t *cr = CRED();
 567         flock64_t bf;
 568         loff_t olen;
 569
 570         if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
 571                 return (error);
 572
 573         crhold(cr);
 574
 575         if (offset < 0 || len <= 0)
 576                 return (-EINVAL);
 577
 578         spl_inode_lock(ip);
 579         olen = i_size_read(ip);
 580
 581         if (offset > olen) {
 582                 spl_inode_unlock(ip);
 583                 return (0);
 584         }
 585         if (offset + len > olen)
 586                 len = olen - offset;
 587         bf.l_type = F_WRLCK;
 588         bf.l_whence = 0;
 589         bf.l_start = offset;
 590         bf.l_len = len;
 591         bf.l_pid = 0;
 592
 593         error = -zfs_space(ip, F_FREESP, &bf, FWRITE, offset, cr);
 594         spl_inode_unlock(ip);
 595
 596         crfree(cr);
 597 #endif /* defined(FALLOC_FL_PUNCH_HOLE) && defined(FALLOC_FL_KEEP_SIZE) */
 598
 599         ASSERT3S(error, <=, 0);
 600         return (error);
 601 }
 602 #endif /* defined(HAVE_FILE_FALLOCATE) || defined(HAVE_INODE_FALLOCATE) */
 603
 604 #ifdef HAVE_FILE_FALLOCATE
 605 static long
 606 zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
 607 {
 608         return zpl_fallocate_common(filp->f_path.dentry->d_inode,
 609             mode, offset, len);
 610 }
 611 #endif /* HAVE_FILE_FALLOCATE */
 612
 613 /*
 614  * Map zfs file z_pflags (xvattr_t) to linux file attributes. Only file
 615  * attributes common to both Linux and Solaris are mapped.
 616  */
 617 static int
 618 zpl_ioctl_getflags(struct file *filp, void __user *arg)
 619 {
 620         struct inode *ip = filp->f_dentry->d_inode;
 621         unsigned int ioctl_flags = 0;
 622         uint64_t zfs_flags = ITOZ(ip)->z_pflags;
 623         int error;
 624
 625         if (zfs_flags & ZFS_IMMUTABLE)
 626                 ioctl_flags |= FS_IMMUTABLE_FL;
 627
 628         if (zfs_flags & ZFS_APPENDONLY)
 629                 ioctl_flags |= FS_APPEND_FL;
 630
 631         if (zfs_flags & ZFS_NODUMP)
 632                 ioctl_flags |= FS_NODUMP_FL;
 633
 634         ioctl_flags &= FS_FL_USER_VISIBLE;
 635
 636         error = copy_to_user(arg, &ioctl_flags, sizeof (ioctl_flags));
 637
 638         return (error);
 639 }
 640
 641 /*
 642  * fchange() is a helper macro to detect if we have been asked to change a
 643  * flag. This is ugly, but the requirement that we do this is a consequence of
 644  * how the Linux file attribute interface was designed. Another consequence is
 645  * that concurrent modification of files suffers from a TOCTOU race. Neither
 646  * are things we can fix without modifying the kernel-userland interface, which
 647  * is outside of our jurisdiction.
 648  */
 649
 650 #define fchange(f0, f1, b0, b1) ((((f0) & (b0)) == (b0)) != \
 651         (((b1) & (f1)) == (f1)))
 652
 653 static int
 654 zpl_ioctl_setflags(struct file *filp, void __user *arg)
 655 {
 656         struct inode    *ip = filp->f_dentry->d_inode;
 657         uint64_t        zfs_flags = ITOZ(ip)->z_pflags;
 658         unsigned int    ioctl_flags;
 659         cred_t          *cr = CRED();
 660         xvattr_t        xva;
 661         xoptattr_t      *xoap;
 662         int             error;
 663
 664         if (copy_from_user(&ioctl_flags, arg, sizeof (ioctl_flags)))
 665                 return (-EFAULT);
 666
 667         if ((ioctl_flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL)))
 668                 return (-EOPNOTSUPP);
 669
 670         if ((ioctl_flags & ~(FS_FL_USER_MODIFIABLE)))
 671                 return (-EACCES);
 672
 673         if ((fchange(ioctl_flags, zfs_flags, FS_IMMUTABLE_FL, ZFS_IMMUTABLE) ||
 674             fchange(ioctl_flags, zfs_flags, FS_APPEND_FL, ZFS_APPENDONLY)) &&
 675             !capable(CAP_LINUX_IMMUTABLE))
 676                 return (-EACCES);
 677
 678         if (!zpl_inode_owner_or_capable(ip))
 679                 return (-EACCES);
 680
 681         xva_init(&xva);
 682         xoap = xva_getxoptattr(&xva);
 683
 684         XVA_SET_REQ(&xva, XAT_IMMUTABLE);
 685         if (ioctl_flags & FS_IMMUTABLE_FL)
 686                 xoap->xoa_immutable = B_TRUE;
 687
 688         XVA_SET_REQ(&xva, XAT_APPENDONLY);
 689         if (ioctl_flags & FS_APPEND_FL)
 690                 xoap->xoa_appendonly = B_TRUE;
 691
 692         XVA_SET_REQ(&xva, XAT_NODUMP);
 693         if (ioctl_flags & FS_NODUMP_FL)
 694                 xoap->xoa_nodump = B_TRUE;
 695
 696         crhold(cr);
 697         error = -zfs_setattr(ip, (vattr_t *)&xva, 0, cr);
 698         crfree(cr);
 699
 700         return (error);
 701 }
 702
 703 static long
 704 zpl_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 705 {
 706         switch (cmd) {
 707         case FS_IOC_GETFLAGS:
 708                 return (zpl_ioctl_getflags(filp, (void *)arg));
 709         case FS_IOC_SETFLAGS:
 710                 return (zpl_ioctl_setflags(filp, (void *)arg));
 711         default:
 712                 return (-ENOTTY);
 713         }
 714 }
 715
 716 #ifdef CONFIG_COMPAT
 717 static long
 718 zpl_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 719 {
 720         return (zpl_ioctl(filp, cmd, arg));
 721 }
 722 #endif /* CONFIG_COMPAT */
 723
 724
 725 const struct address_space_operations zpl_address_space_operations = {
 726         .readpages      = zpl_readpages,
 727         .readpage       = zpl_readpage,
 728         .writepage      = zpl_writepage,
 729         .writepages     = zpl_writepages,
 730 };
 731
 732 const struct file_operations zpl_file_operations = {
 733         .open           = zpl_open,
 734         .release        = zpl_release,
 735         .llseek         = zpl_llseek,
 736         .read           = zpl_read,
 737         .write          = zpl_write,
 738         .aio_read       = zpl_aio_read,
 739         .aio_write      = zpl_aio_write,
 740         .mmap           = zpl_mmap,
 741         .fsync          = zpl_fsync,
 742         .aio_fsync      = zpl_aio_fsync,
 743 #ifdef HAVE_FILE_FALLOCATE
 744         .fallocate      = zpl_fallocate,
 745 #endif /* HAVE_FILE_FALLOCATE */
 746         .unlocked_ioctl = zpl_ioctl,
 747 #ifdef CONFIG_COMPAT
 748         .compat_ioctl   = zpl_compat_ioctl,
 749 #endif
 750 };
 751
 752 const struct file_operations zpl_dir_file_operations = {
 753         .llseek         = generic_file_llseek,
 754         .read           = generic_read_dir,
 755 #ifdef HAVE_VFS_ITERATE
 756         .iterate        = zpl_iterate,
 757 #else
 758         .readdir        = zpl_readdir,
 759 #endif
 760         .fsync          = zpl_fsync,
 761         .unlocked_ioctl = zpl_ioctl,
 762 #ifdef CONFIG_COMPAT
 763         .compat_ioctl   = zpl_compat_ioctl,
 764 #endif
 765 };