Linux: Defer loading the object set in zfs_setattr()

[mirror_zfs.git] / module / os / linux / zfs / zfs_vnops_os.c
diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c

index 3be387a30e5c9e0881de17347a622bcc5ff08e40..2a766a585b708487301e73ac52f34d2e3f83f5b5 100644 (file)
--- a/module/os/linux/zfs/zfs_vnops_os.c
+++ b/module/os/linux/zfs/zfs_vnops_os.c
@@ -6,7 +6,7 @@
   * You may not use this file except in compliance with the License.
   *
   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
   * See the License for the specific language governing permissions
   * and limitations under the License.
   *
@@ -82,20 +82,23 @@
   * to freed memory.  The example below illustrates the following Big Rules:
   *
   *  (1) A check must be made in each zfs thread for a mounted file system.
- *     This is done avoiding races using ZFS_ENTER(zfsvfs).
- *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
- *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
+ *     This is done avoiding races using zfs_enter(zfsvfs).
+ *      A zfs_exit(zfsvfs) is needed before all returns.  Any znodes
+ *      must be checked with zfs_verify_zp(zp).  Both of these macros
   *      can return EIO from the calling function.
   *
- *  (2)        zrele() should always be the last thing except for zil_commit()
- *     (if necessary) and ZFS_EXIT(). This is for 3 reasons:
- *     First, if it's the last reference, the vnode/znode
- *     can be freed, so the zp may point to freed memory.  Second, the last
- *     reference will call zfs_zinactive(), which may induce a lot of work --
- *     pushing cached pages (which acquires range locks) and syncing out
- *     cached atime changes.  Third, zfs_zinactive() may require a new tx,
- *     which could deadlock the system if you were already holding one.
- *     If you must call zrele() within a tx then use zfs_zrele_async().
+ *  (2) zrele() should always be the last thing except for zil_commit() (if
+ *     necessary) and zfs_exit(). This is for 3 reasons: First, if it's the
+ *     last reference, the vnode/znode can be freed, so the zp may point to
+ *     freed memory.  Second, the last reference will call zfs_zinactive(),
+ *     which may induce a lot of work -- pushing cached pages (which acquires
+ *     range locks) and syncing out cached atime changes.  Third,
+ *     zfs_zinactive() may require a new tx, which could deadlock the system
+ *     if you were already holding one. This deadlock occurs because the tx
+ *     currently being operated on prevents a txg from syncing, which
+ *     prevents the new tx from progressing, resulting in a deadlock.  If you
+ *     must call zrele() within a tx, use zfs_zrele_async(). Note that iput()
+ *     is a synonym for zrele().
   *
   *  (3)        All range locks must be grabbed before calling dmu_tx_assign(),
   *     as they can span dmu_tx_assign() calls.
@@ -104,7 +107,7 @@
   *      dmu_tx_assign().  This is critical because we don't want to block
   *      while holding locks.
   *
- *     If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
+ *     If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT.  This
   *     reduces lock contention and CPU usage when we must wait (note that if
   *     throughput is constrained by the storage, nearly every transaction
   *     must wait).
@@ -139,7 +142,7 @@
   *
   * In general, this is how things should be ordered in each vnode op:
   *
- *     ZFS_ENTER(zfsvfs);              // exit if unmounted
+ *     zfs_enter(zfsvfs);              // exit if unmounted
   * top:
   *     zfs_dirent_lock(&dl, ...)       // lock directory entry (may igrab())
   *     rw_enter(...);                  // grab any other locks you need
@@ -157,7 +160,7 @@
   *                     goto top;
   *             }
   *             dmu_tx_abort(tx);       // abort DMU tx
- *             ZFS_EXIT(zfsvfs);       // finished in zfs
+ *             zfs_exit(zfsvfs);       // finished in zfs
   *             return (error);         // really out of space
   *     }
   *     error = do_real_work();         // do whatever this VOP does
@@ -168,116 +171,101 @@
   *     zfs_dirent_unlock(dl);          // unlock directory entry
   *     zrele(...);                     // release held znodes
   *     zil_commit(zilog, foid);        // synchronous when necessary
- *     ZFS_EXIT(zfsvfs);               // finished in zfs
+ *     zfs_exit(zfsvfs);               // finished in zfs
   *     return (error);                 // done, report error
   */
-
-/*
- * Virus scanning is unsupported.  It would be possible to add a hook
- * here to performance the required virus scan.  This could be done
- * entirely in the kernel or potentially as an update to invoke a
- * scanning utility.
- */
-static int
-zfs_vscan(struct inode *ip, cred_t *cr, int async)
-{
-       return (0);
-}
-
-/* ARGSUSED */
  int
  zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
  {
+       (void) cr;
         znode_t *zp = ITOZ(ip);
         zfsvfs_t *zfsvfs = ITOZSB(ip);
+       int error;
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
         /* Honor ZFS_APPENDONLY file attribute */
-       if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
+       if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
             ((flag & O_APPEND) == 0)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EPERM));
         }
  
-       /* Virus scan eligible files on open */
-       if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
-           !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
-               if (zfs_vscan(ip, cr, 0) != 0) {
-                       ZFS_EXIT(zfsvfs);
-                       return (SET_ERROR(EACCES));
-               }
+       /*
+        * Keep a count of the synchronous opens in the znode.  On first
+        * synchronous open we must convert all previous async transactions
+        * into sync to keep correct ordering.
+        */
+       if (flag & O_SYNC) {
+               if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
+                       zil_async_to_sync(zfsvfs->z_log, zp->z_id);
         }
  
-       /* Keep a count of the synchronous opens in the znode */
-       if (flag & O_SYNC)
-               atomic_inc_32(&zp->z_sync_cnt);
-
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (0);
  }
  
-/* ARGSUSED */
  int
  zfs_close(struct inode *ip, int flag, cred_t *cr)
  {
+       (void) cr;
         znode_t *zp = ITOZ(ip);
         zfsvfs_t *zfsvfs = ITOZSB(ip);
+       int error;
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
         /* Decrement the synchronous opens in the znode */
         if (flag & O_SYNC)
                 atomic_dec_32(&zp->z_sync_cnt);
  
-       if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
-           !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
-               VERIFY(zfs_vscan(ip, cr, 1) == 0);
-
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (0);
  }
  
  #if defined(_KERNEL)
+
+static int zfs_fillpage(struct inode *ip, struct page *pp);
+
  /*
   * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Write:   If we find a memory mapped page, we write to *both*
- *             the page and the dmu buffer.
+ * between the DMU cache and the memory mapped pages.  Update all mapped
+ * pages with the contents of the coresponding dmu buffer.
   */
  void
  update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
  {
-       struct inode *ip = ZTOI(zp);
-       struct address_space *mp = ip->i_mapping;
-       struct page *pp;
-       uint64_t nbytes;
-       int64_t off;
-       void *pb;
+       struct address_space *mp = ZTOI(zp)->i_mapping;
+       int64_t off = start & (PAGE_SIZE - 1);
  
-       off = start & (PAGE_SIZE-1);
         for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-               nbytes = MIN(PAGE_SIZE - off, len);
+               uint64_t nbytes = MIN(PAGE_SIZE - off, len);
  
-               pp = find_lock_page(mp, start >> PAGE_SHIFT);
+               struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
                 if (pp) {
                         if (mapping_writably_mapped(mp))
                                 flush_dcache_page(pp);
  
-                       pb = kmap(pp);
-                       (void) dmu_read(os, zp->z_id, start + off, nbytes,
-                           pb + off, DMU_READ_PREFETCH);
+                       void *pb = kmap(pp);
+                       int error = dmu_read(os, zp->z_id, start + off,
+                           nbytes, pb + off, DMU_READ_PREFETCH);
                         kunmap(pp);
  
-                       if (mapping_writably_mapped(mp))
-                               flush_dcache_page(pp);
+                       if (error) {
+                               SetPageError(pp);
+                               ClearPageUptodate(pp);
+                       } else {
+                               ClearPageError(pp);
+                               SetPageUptodate(pp);
+
+                               if (mapping_writably_mapped(mp))
+                                       flush_dcache_page(pp);
+
+                               mark_page_accessed(pp);
+                       }
  
-                       mark_page_accessed(pp);
-                       SetPageUptodate(pp);
-                       ClearPageError(pp);
                         unlock_page(pp);
                         put_page(pp);
                 }
@@ -288,39 +276,45 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
  }
  
  /*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages.  What this means:
- *
- * On Read:    We "read" preferentially from memory mapped pages,
- *             else we default from the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- *      the file is memory mapped.
+ * When a file is memory mapped, we must keep the I/O data synchronized
+ * between the DMU cache and the memory mapped pages.  Preferentially read
+ * from memory mapped pages, otherwise fallback to reading through the dmu.
   */
  int
-mappedread(znode_t *zp, int nbytes, uio_t *uio)
+mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
  {
         struct inode *ip = ZTOI(zp);
         struct address_space *mp = ip->i_mapping;
-       struct page *pp;
-       int64_t start, off;
-       uint64_t bytes;
+       int64_t start = uio->uio_loffset;
+       int64_t off = start & (PAGE_SIZE - 1);
         int len = nbytes;
         int error = 0;
-       void *pb;
  
-       start = uio->uio_loffset;
-       off = start & (PAGE_SIZE-1);
         for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
-               bytes = MIN(PAGE_SIZE - off, len);
+               uint64_t bytes = MIN(PAGE_SIZE - off, len);
  
-               pp = find_lock_page(mp, start >> PAGE_SHIFT);
+               struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
                 if (pp) {
-                       ASSERT(PageUptodate(pp));
+                       /*
+                        * If filemap_fault() retries there exists a window
+                        * where the page will be unlocked and not up to date.
+                        * In this case we must try and fill the page.
+                        */
+                       if (unlikely(!PageUptodate(pp))) {
+                               error = zfs_fillpage(ip, pp);
+                               if (error) {
+                                       unlock_page(pp);
+                                       put_page(pp);
+                                       return (error);
+                               }
+                       }
+
+                       ASSERT(PageUptodate(pp) || PageDirty(pp));
+
                         unlock_page(pp);
  
-                       pb = kmap(pp);
-                       error = uiomove(pb + off, bytes, UIO_READ, uio);
+                       void *pb = kmap(pp);
+                       error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
                         kunmap(pp);
  
                         if (mapping_writably_mapped(mp))
@@ -335,14 +329,16 @@ mappedread(znode_t *zp, int nbytes, uio_t *uio)
  
                 len -= bytes;
                 off = 0;
+
                 if (error)
                         break;
         }
+
         return (error);
  }
  #endif /* _KERNEL */
  
-unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
+static unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
  
  /*
   * Write the bytes to a file.
@@ -372,8 +368,8 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len,
         iov.iov_base = (void *)data;
         iov.iov_len = len;
  
-       uio_t uio;
-       uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
+       zfs_uio_t uio;
+       zfs_uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
  
         cookie = spl_fstrans_mark();
         error = zfs_write(zp, &uio, 0, kcred);
@@ -381,14 +377,20 @@ zfs_write_simple(znode_t *zp, const void *data, size_t len,
  
         if (error == 0) {
                 if (residp != NULL)
-                       *residp = uio_resid(&uio);
-               else if (uio_resid(&uio) != 0)
+                       *residp = zfs_uio_resid(&uio);
+               else if (zfs_uio_resid(&uio) != 0)
                         error = SET_ERROR(EIO);
         }
  
         return (error);
  }
  
+static void
+zfs_rele_async_task(void *arg)
+{
+       iput(arg);
+}
+
  void
  zfs_zrele_async(znode_t *zp)
  {
@@ -398,11 +400,18 @@ zfs_zrele_async(znode_t *zp)
         ASSERT(atomic_read(&ip->i_count) > 0);
         ASSERT(os != NULL);
  
-       if (atomic_read(&ip->i_count) == 1)
+       /*
+        * If decrementing the count would put us at 0, we can't do it inline
+        * here, because that would be synchronous. Instead, dispatch an iput
+        * to run later.
+        *
+        * For more information on the dangers of a synchronous iput, see the
+        * header comment of this file.
+        */
+       if (!atomic_add_unless(&ip->i_count, -1, 1)) {
                 VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
-                   (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
-       else
-               zrele(zp);
+                   zfs_rele_async_task, ip, TQ_SLEEP) != TASKQID_INVALID);
+       }
  }
  
  
@@ -424,7 +433,6 @@ zfs_zrele_async(znode_t *zp)
   * Timestamps:
   *     NA
   */
-/* ARGSUSED */
  int
  zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
      int *direntflags, pathname_t *realpnp)
@@ -460,8 +468,8 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
                 }
         }
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zdp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
+               return (error);
  
         *zpp = NULL;
  
@@ -471,12 +479,12 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
                  * Maybe someday we will.
                  */
                 if (zdp->z_pflags & ZFS_XATTR) {
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (SET_ERROR(EINVAL));
                 }
  
                 if ((error = zfs_get_xattrdir(zdp, zpp, cr, flags))) {
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (error);
                 }
  
@@ -485,17 +493,17 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
                  */
  
                 if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
-                   B_FALSE, cr))) {
+                   B_TRUE, cr, zfs_init_idmap))) {
                         zrele(*zpp);
                         *zpp = NULL;
                 }
  
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
         if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(ENOTDIR));
         }
  
@@ -503,22 +511,23 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
          * Check accessibility of directory.
          */
  
-       if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
-               ZFS_EXIT(zfsvfs);
+       if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
+           zfs_init_idmap))) {
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
         if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
             NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EILSEQ));
         }
  
         error = zfs_dirlook(zdp, nm, zpp, flags, direntflags, realpnp);
         if ((error == 0) && (*zpp))
-               zfs_inode_update(*zpp);
+               zfs_znode_update_vfs(*zpp);
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
@@ -535,6 +544,7 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
   *             cr      - credentials of caller.
   *             flag    - file flag.
   *             vsecp   - ACL to be set
+ *             mnt_ns  - user namespace of the mount
   *
   *     OUT:    zpp     - znode of created or trunc'd entry.
   *
@@ -544,11 +554,10 @@ zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
   *     dzp - ctime|mtime updated if new entry created
   *      zp - ctime|mtime always, atime if new
   */
-
-/* ARGSUSED */
  int
  zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
-    int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
+    int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp,
+    zidmap_t *mnt_ns)
  {
         znode_t         *zp;
         zfsvfs_t        *zfsvfs = ZTOZSB(dzp);
@@ -563,6 +572,7 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
         boolean_t       fuid_dirtied;
         boolean_t       have_acl = B_FALSE;
         boolean_t       waited = B_FALSE;
+       boolean_t       skip_acl = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
  
         /*
          * If we have an ephemeral id, ACL, or XVATTR then
@@ -579,21 +589,21 @@ zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
         if (name == NULL)
                 return (SET_ERROR(EINVAL));
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(dzp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+               return (error);
         os = zfsvfs->z_os;
         zilog = zfsvfs->z_log;
  
         if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
             NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EILSEQ));
         }
  
         if (vap->va_mask & ATTR_XVATTR) {
                 if ((error = secpolicy_xvattr((xvattr_t *)vap,
                     crgetuid(cr), cr, vap->va_mode)) != 0) {
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (error);
                 }
         }
@@ -622,7 +632,7 @@ top:
                                 zfs_acl_ids_free(&acl_ids);
                         if (strcmp(name, "..") == 0)
                                 error = SET_ERROR(EISDIR);
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (error);
                 }
         }
@@ -635,7 +645,8 @@ top:
                  * Create a new file object and update the directory
                  * to reference it.
                  */
-               if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+               if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, skip_acl, cr,
+                   mnt_ns))) {
                         if (have_acl)
                                 zfs_acl_ids_free(&acl_ids);
                         goto out;
@@ -654,7 +665,7 @@ top:
                 }
  
                 if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
-                   cr, vsecp, &acl_ids)) != 0)
+                   cr, vsecp, &acl_ids, mnt_ns)) != 0)
                         goto out;
                 have_acl = B_TRUE;
  
@@ -694,7 +705,7 @@ top:
                         }
                         zfs_acl_ids_free(&acl_ids);
                         dmu_tx_abort(tx);
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (error);
                 }
                 zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
@@ -727,7 +738,6 @@ top:
  
                 if (have_acl)
                         zfs_acl_ids_free(&acl_ids);
-               have_acl = B_FALSE;
  
                 /*
                  * A directory entry already exists for this name.
@@ -749,7 +759,8 @@ top:
                 /*
                  * Verify requested access to file.
                  */
-               if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
+               if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr,
+                   mnt_ns))) {
                         goto out;
                 }
  
@@ -779,23 +790,24 @@ out:
                 if (zp)
                         zrele(zp);
         } else {
-               zfs_inode_update(dzp);
-               zfs_inode_update(zp);
+               zfs_znode_update_vfs(dzp);
+               zfs_znode_update_vfs(zp);
                 *zpp = zp;
         }
  
         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
                 zil_commit(zilog, 0);
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
-/* ARGSUSED */
  int
  zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
-    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
+    int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp,
+    zidmap_t *mnt_ns)
  {
+       (void) excl, (void) mode, (void) flag;
         znode_t         *zp = NULL, *dzp = ITOZ(dip);
         zfsvfs_t        *zfsvfs = ITOZSB(dip);
         objset_t        *os;
@@ -821,14 +833,14 @@ zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
             (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
                 return (SET_ERROR(EINVAL));
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(dzp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+               return (error);
         os = zfsvfs->z_os;
  
         if (vap->va_mask & ATTR_XVATTR) {
                 if ((error = secpolicy_xvattr((xvattr_t *)vap,
                     crgetuid(cr), cr, vap->va_mode)) != 0) {
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (error);
                 }
         }
@@ -840,14 +852,14 @@ top:
          * Create a new file object and update the directory
          * to reference it.
          */
-       if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+       if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
                 if (have_acl)
                         zfs_acl_ids_free(&acl_ids);
                 goto out;
         }
  
         if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
-           cr, vsecp, &acl_ids)) != 0)
+           cr, vsecp, &acl_ids, mnt_ns)) != 0)
                 goto out;
         have_acl = B_TRUE;
  
@@ -883,7 +895,7 @@ top:
                 }
                 zfs_acl_ids_free(&acl_ids);
                 dmu_tx_abort(tx);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
         zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
@@ -902,12 +914,12 @@ out:
                 if (zp)
                         zrele(zp);
         } else {
-               zfs_inode_update(dzp);
-               zfs_inode_update(zp);
+               zfs_znode_update_vfs(dzp);
+               zfs_znode_update_vfs(zp);
                 *ipp = ZTOI(zp);
         }
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
@@ -927,9 +939,8 @@ out:
   *      ip - ctime (if nlink > 0)
   */
  
-uint64_t null_xattr = 0;
+static uint64_t null_xattr = 0;
  
-/*ARGSUSED*/
  int
  zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
  {
@@ -955,8 +966,8 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
         if (name == NULL)
                 return (SET_ERROR(EINVAL));
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(dzp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+               return (error);
         zilog = zfsvfs->z_log;
  
         if (flags & FIGNORECASE) {
@@ -975,11 +986,11 @@ top:
             NULL, realnmp))) {
                 if (realnmp)
                         pn_free(realnmp);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
-       if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+       if ((error = zfs_zaccess_delete(dzp, zp, cr, zfs_init_idmap))) {
                 goto out;
         }
  
@@ -993,7 +1004,7 @@ top:
  
         mutex_enter(&zp->z_lock);
         may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
-           !(zp->z_is_mapped);
+           !zn_has_cached_data(zp, 0, LLONG_MAX);
         mutex_exit(&zp->z_lock);
  
         /*
@@ -1056,7 +1067,7 @@ top:
                 zrele(zp);
                 if (xzp)
                         zrele(xzp);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
@@ -1081,8 +1092,10 @@ top:
                     &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
                 delete_now = may_delete_now && !toobig &&
                     atomic_read(&ZTOI(zp)->i_count) == 1 &&
-                   !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked &&
+                   !zn_has_cached_data(zp, 0, LLONG_MAX) &&
+                   xattr_obj == xattr_obj_unlinked &&
                     zfs_external_acl(zp) == acl_obj;
+               VERIFY_IMPLY(xattr_obj_unlinked, xzp);
         }
  
         if (delete_now) {
@@ -1129,8 +1142,8 @@ out:
                 pn_free(realnmp);
  
         zfs_dirent_unlock(dl);
-       zfs_inode_update(dzp);
-       zfs_inode_update(zp);
+       zfs_znode_update_vfs(dzp);
+       zfs_znode_update_vfs(zp);
  
         if (delete_now)
                 zrele(zp);
@@ -1138,14 +1151,14 @@ out:
                 zfs_zrele_async(zp);
  
         if (xzp) {
-               zfs_inode_update(xzp);
+               zfs_znode_update_vfs(xzp);
                 zfs_zrele_async(xzp);
         }
  
         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
                 zil_commit(zilog, 0);
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
@@ -1159,6 +1172,7 @@ out:
   *             cr      - credentials of caller.
   *             flags   - case flags.
   *             vsecp   - ACL to be set
+ *             mnt_ns  - user namespace of the mount
   *
   *     OUT:    zpp     - znode of created directory.
   *
@@ -1169,10 +1183,9 @@ out:
   *     dzp - ctime|mtime updated
   *     zpp - ctime|mtime|atime updated
   */
-/*ARGSUSED*/
  int
  zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
-    cred_t *cr, int flags, vsecattr_t *vsecp)
+    cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
  {
         znode_t         *zp;
         zfsvfs_t        *zfsvfs = ZTOZSB(dzp);
@@ -1203,18 +1216,18 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
         if (dirname == NULL)
                 return (SET_ERROR(EINVAL));
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(dzp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+               return (error);
         zilog = zfsvfs->z_log;
  
         if (dzp->z_pflags & ZFS_XATTR) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EINVAL));
         }
  
         if (zfsvfs->z_utf8 && u8_validate(dirname,
             strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EILSEQ));
         }
         if (flags & FIGNORECASE)
@@ -1223,14 +1236,14 @@ zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
         if (vap->va_mask & ATTR_XVATTR) {
                 if ((error = secpolicy_xvattr((xvattr_t *)vap,
                     crgetuid(cr), cr, vap->va_mode)) != 0) {
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (error);
                 }
         }
  
         if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
-           vsecp, &acl_ids)) != 0) {
-               ZFS_EXIT(zfsvfs);
+           vsecp, &acl_ids, mnt_ns)) != 0) {
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
         /*
@@ -1246,21 +1259,22 @@ top:
         if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
             NULL, NULL))) {
                 zfs_acl_ids_free(&acl_ids);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
-       if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
+       if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
+           mnt_ns))) {
                 zfs_acl_ids_free(&acl_ids);
                 zfs_dirent_unlock(dl);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
         if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
                 zfs_acl_ids_free(&acl_ids);
                 zfs_dirent_unlock(dl);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EDQUOT));
         }
  
@@ -1292,7 +1306,7 @@ top:
                 }
                 zfs_acl_ids_free(&acl_ids);
                 dmu_tx_abort(tx);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
@@ -1335,10 +1349,10 @@ out:
         if (error != 0) {
                 zrele(zp);
         } else {
-               zfs_inode_update(dzp);
-               zfs_inode_update(zp);
+               zfs_znode_update_vfs(dzp);
+               zfs_znode_update_vfs(zp);
         }
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
@@ -1358,7 +1372,6 @@ out:
   * Timestamps:
   *     dzp - ctime|mtime updated
   */
-/*ARGSUSED*/
  int
  zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
      int flags)
@@ -1375,8 +1388,8 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
         if (name == NULL)
                 return (SET_ERROR(EINVAL));
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(dzp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+               return (error);
         zilog = zfsvfs->z_log;
  
         if (flags & FIGNORECASE)
@@ -1389,11 +1402,11 @@ top:
          */
         if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
             NULL, NULL))) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
-       if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+       if ((error = zfs_zaccess_delete(dzp, zp, cr, zfs_init_idmap))) {
                 goto out;
         }
  
@@ -1440,7 +1453,7 @@ top:
                 }
                 dmu_tx_abort(tx);
                 zrele(zp);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
@@ -1461,14 +1474,14 @@ top:
  out:
         zfs_dirent_unlock(dl);
  
-       zfs_inode_update(dzp);
-       zfs_inode_update(zp);
+       zfs_znode_update_vfs(dzp);
+       zfs_znode_update_vfs(zp);
         zrele(zp);
  
         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
                 zil_commit(zilog, 0);
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
@@ -1491,10 +1504,10 @@ out:
   * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
   * we use the offset 2 for the '.zfs' directory.
   */
-/* ARGSUSED */
  int
  zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
  {
+       (void) cr;
         znode_t         *zp = ITOZ(ip);
         zfsvfs_t        *zfsvfs = ITOZSB(ip);
         objset_t        *os;
@@ -1507,8 +1520,8 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
         uint64_t        parent;
         uint64_t        offset; /* must be unsigned; checks for < 1 */
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
         if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
             &parent, sizeof (parent))) != 0)
@@ -1603,11 +1616,8 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
                 if (done)
                         break;
  
-               /* Prefetch znode */
-               if (prefetch) {
-                       dmu_prefetch(os, objnum, 0, 0, 0,
-                           ZIO_PRIORITY_SYNC_READ);
-               }
+               if (prefetch)
+                       dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
  
                 /*
                  * Move to the next entry, fill in the previous offset.
@@ -1627,7 +1637,7 @@ update:
         if (error == ENOENT)
                 error = 0;
  out:
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
  
         return (error);
  }
@@ -1644,21 +1654,30 @@ out:
   *
   *     RETURN: 0 (always succeeds)
   */
-/* ARGSUSED */
  int
-zfs_getattr_fast(struct inode *ip, struct kstat *sp)
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip,
+    struct kstat *sp)
+#else
+zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
+#endif
  {
         znode_t *zp = ITOZ(ip);
         zfsvfs_t *zfsvfs = ITOZSB(ip);
         uint32_t blksize;
         u_longlong_t nblocks;
+       int error;
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
         mutex_enter(&zp->z_lock);
  
-       generic_fillattr(ip, sp);
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+       zpl_generic_fillattr(user_ns, request_mask, ip, sp);
+#else
+       zpl_generic_fillattr(user_ns, ip, sp);
+#endif
         /*
          * +1 link count for root inode with visible '.zfs' directory.
          */
@@ -1689,7 +1708,7 @@ zfs_getattr_fast(struct inode *ip, struct kstat *sp)
                             dmu_objset_id(zfsvfs->z_os);
         }
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
  
         return (0);
  }
@@ -1824,6 +1843,7 @@ next:
   *             flags   - ATTR_UTIME set if non-default time values provided.
   *                     - ATTR_NOACLCHECK (CIFS context only).
   *             cr      - credentials of caller.
+ *             mnt_ns  - user namespace of the mount
   *
   *     RETURN: 0 if success
   *             error code if failure
@@ -1831,13 +1851,12 @@ next:
   * Timestamps:
   *     ip - ctime updated, mtime updated if size changed.
   */
-/* ARGSUSED */
  int
-zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
+zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
  {
         struct inode    *ip;
         zfsvfs_t        *zfsvfs = ZTOZSB(zp);
-       objset_t        *os = zfsvfs->z_os;
+       objset_t        *os;
         zilog_t         *zilog;
         dmu_tx_t        *tx;
         vattr_t         oldva;
@@ -1866,9 +1885,10 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
         if (mask == 0)
                 return (0);
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (err);
         ip = ZTOI(zp);
+       os = zfsvfs->z_os;
  
         /*
          * If this is a xvattr_t, then get a pointer to the structure of
@@ -1879,13 +1899,13 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
                 if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
                         if (!dmu_objset_projectquota_enabled(os) ||
                             (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
-                               ZFS_EXIT(zfsvfs);
+                               zfs_exit(zfsvfs, FTAG);
                                 return (SET_ERROR(ENOTSUP));
                         }
  
                         projid = xoap->xoa_projid;
                         if (unlikely(projid == ZFS_INVALID_PROJID)) {
-                               ZFS_EXIT(zfsvfs);
+                               zfs_exit(zfsvfs, FTAG);
                                 return (SET_ERROR(EINVAL));
                         }
  
@@ -1900,7 +1920,7 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
                     ((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
                     (!dmu_objset_projectquota_enabled(os) ||
                     (!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (SET_ERROR(ENOTSUP));
                 }
         }
@@ -1916,17 +1936,17 @@ zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
             (((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
             ((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
             (mask & ATTR_XVATTR))) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EINVAL));
         }
  
         if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EISDIR));
         }
  
         if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EINVAL));
         }
  
@@ -1982,7 +2002,8 @@ top:
          */
  
         if (mask & ATTR_SIZE) {
-               err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
+               err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr,
+                   mnt_ns);
                 if (err)
                         goto out3;
  
@@ -2007,13 +2028,15 @@ top:
             XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
             XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
                 need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
-                   skipaclchk, cr);
+                   skipaclchk, cr, mnt_ns);
         }
  
         if (mask & (ATTR_UID|ATTR_GID)) {
                 int     idmask = (mask & (ATTR_UID|ATTR_GID));
                 int     take_owner;
                 int     take_group;
+               uid_t   uid;
+               gid_t   gid;
  
                 /*
                  * NOTE: even if a new mode is being set,
@@ -2027,9 +2050,13 @@ top:
                  * Take ownership or chgrp to group we are a member of
                  */
  
-               take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
+               uid = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ip),
+                   vap->va_uid);
+               gid = zfs_gid_to_vfsgid(mnt_ns, zfs_i_user_ns(ip),
+                   vap->va_gid);
+               take_owner = (mask & ATTR_UID) && (uid == crgetuid(cr));
                 take_group = (mask & ATTR_GID) &&
-                   zfs_groupmember(zfsvfs, vap->va_gid, cr);
+                   zfs_groupmember(zfsvfs, gid, cr);
  
                 /*
                  * If both ATTR_UID and ATTR_GID are set then take_owner and
@@ -2045,7 +2072,7 @@ top:
                     ((idmask == ATTR_UID) && take_owner) ||
                     ((idmask == ATTR_GID) && take_group)) {
                         if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
-                           skipaclchk, cr) == 0) {
+                           skipaclchk, cr, mnt_ns) == 0) {
                                 /*
                                  * Remove setuid/setgid for non-privileged users
                                  */
@@ -2158,12 +2185,12 @@ top:
         mutex_exit(&zp->z_lock);
  
         if (mask & ATTR_MODE) {
-               if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
+               if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
+                   mnt_ns) == 0) {
                         err = secpolicy_setid_setsticky_clear(ip, vap,
-                           &oldva, cr);
+                           &oldva, cr, mnt_ns, zfs_i_user_ns(ip));
                         if (err)
                                 goto out3;
-
                         trim_mask |= ATTR_MODE;
                 } else {
                         need_policy = TRUE;
@@ -2184,7 +2211,7 @@ top:
                         vap->va_mask &= ~trim_mask;
                 }
                 err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
-                   (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
+                   zfs_zaccess_unix, zp);
                 if (err)
                         goto out3;
  
@@ -2412,15 +2439,17 @@ top:
  
         if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
                 zp->z_atime_dirty = B_FALSE;
-               ZFS_TIME_ENCODE(&ip->i_atime, atime);
+               inode_timespec_t tmp_atime;
+               ZFS_TIME_ENCODE(&tmp_atime, atime);
+               zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_atime);
                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
                     &atime, sizeof (atime));
         }
  
         if (mask & (ATTR_MTIME | ATTR_SIZE)) {
                 ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
-               ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate(
-                   vap->va_mtime, ZTOI(zp));
+               zpl_inode_set_mtime_to_ts(ZTOI(zp),
+                   zpl_inode_timestamp_truncate(vap->va_mtime, ZTOI(zp)));
  
                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
                     mtime, sizeof (mtime));
@@ -2428,8 +2457,8 @@ top:
  
         if (mask & (ATTR_CTIME | ATTR_SIZE)) {
                 ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
-               ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime,
-                   ZTOI(zp));
+               zpl_inode_set_ctime_to_ts(ZTOI(zp),
+                   zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp)));
                 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
                     ctime, sizeof (ctime));
         }
@@ -2529,10 +2558,10 @@ out:
                 dmu_tx_commit(tx);
                 if (attrzp) {
                         if (err2 == 0 && handle_eadir)
-                               err2 = zfs_setattr_dir(attrzp);
+                               err = zfs_setattr_dir(attrzp);
                         zrele(attrzp);
                 }
-               zfs_inode_update(zp);
+               zfs_znode_update_vfs(zp);
         }
  
  out2:
@@ -2543,7 +2572,7 @@ out3:
         kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
         kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
         kmem_free(tmpxvattr, sizeof (xvattr_t));
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (err);
  }
  
@@ -2654,16 +2683,18 @@ zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
   *             tnm     - New entry name.
   *             cr      - credentials of caller.
   *             flags   - case flags
+ *             rflags  - RENAME_* flags
+ *             wa_vap  - attributes for RENAME_WHITEOUT (must be a char 0:0).
+ *             mnt_ns  - user namespace of the mount
   *
   *     RETURN: 0 on success, error code on failure.
   *
   * Timestamps:
   *     sdzp,tdzp - ctime|mtime updated
   */
-/*ARGSUSED*/
  int
  zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
-    cred_t *cr, int flags)
+    cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
  {
         znode_t         *szp, *tzp;
         zfsvfs_t        *zfsvfs = ZTOZSB(sdzp);
@@ -2675,15 +2706,41 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
         int             error = 0;
         int             zflg = 0;
         boolean_t       waited = B_FALSE;
+       /* Needed for whiteout inode creation. */
+       boolean_t       fuid_dirtied;
+       zfs_acl_ids_t   acl_ids;
+       boolean_t       have_acl = B_FALSE;
+       znode_t         *wzp = NULL;
+
  
         if (snm == NULL || tnm == NULL)
                 return (SET_ERROR(EINVAL));
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(sdzp);
+       if (rflags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
+               return (SET_ERROR(EINVAL));
+
+       /* Already checked by Linux VFS, but just to make sure. */
+       if (rflags & RENAME_EXCHANGE &&
+           (rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT)))
+               return (SET_ERROR(EINVAL));
+
+       /*
+        * Make sure we only get wo_vap iff. RENAME_WHITEOUT and that it's the
+        * right kind of vattr_t for the whiteout file. These are set
+        * internally by ZFS so should never be incorrect.
+        */
+       VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
+       VERIFY_IMPLY(wo_vap, wo_vap->va_mode == S_IFCHR);
+       VERIFY_IMPLY(wo_vap, wo_vap->va_rdev == makedevice(0, 0));
+
+       if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
+               return (error);
         zilog = zfsvfs->z_log;
  
-       ZFS_VERIFY_ZP(tdzp);
+       if ((error = zfs_verify_zp(tdzp)) != 0) {
+               zfs_exit(zfsvfs, FTAG);
+               return (error);
+       }
  
         /*
          * We check i_sb because snapshots and the ctldir must have different
@@ -2691,13 +2748,13 @@ zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
          */
         if (ZTOI(tdzp)->i_sb != ZTOI(sdzp)->i_sb ||
             zfsctl_is_node(ZTOI(tdzp))) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EXDEV));
         }
  
         if (zfsvfs->z_utf8 && u8_validate(tnm,
             strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EILSEQ));
         }
  
@@ -2715,7 +2772,7 @@ top:
          * See the comment in zfs_link() for why this is considered bad.
          */
         if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EINVAL));
         }
  
@@ -2745,7 +2802,7 @@ top:
                          * the rename() function shall return successfully
                          * and perform no other action."
                          */
-                       ZFS_EXIT(zfsvfs);
+                       zfs_exit(zfsvfs, FTAG);
                         return (0);
                 }
                 /*
@@ -2817,7 +2874,7 @@ top:
  
                 if (strcmp(snm, "..") == 0)
                         serr = EINVAL;
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (serr);
         }
         if (terr) {
@@ -2829,7 +2886,7 @@ top:
  
                 if (strcmp(tnm, "..") == 0)
                         terr = EINVAL;
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (terr);
         }
  
@@ -2852,8 +2909,7 @@ top:
          * Note that if target and source are the same, this can be
          * done in a single check.
          */
-
-       if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
+       if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, mnt_ns)))
                 goto out;
  
         if (S_ISDIR(ZTOI(szp)->i_mode)) {
@@ -2869,17 +2925,19 @@ top:
          * Does target exist?
          */
         if (tzp) {
+               if (rflags & RENAME_NOREPLACE) {
+                       error = SET_ERROR(EEXIST);
+                       goto out;
+               }
                 /*
-                * Source and target must be the same type.
+                * Source and target must be the same type (unless exchanging).
                  */
-               if (S_ISDIR(ZTOI(szp)->i_mode)) {
-                       if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
-                               error = SET_ERROR(ENOTDIR);
-                               goto out;
-                       }
-               } else {
-                       if (S_ISDIR(ZTOI(tzp)->i_mode)) {
-                               error = SET_ERROR(EISDIR);
+               if (!(rflags & RENAME_EXCHANGE)) {
+                       boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0;
+                       boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0;
+
+                       if (s_is_dir != t_is_dir) {
+                               error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR);
                                 goto out;
                         }
                 }
@@ -2892,12 +2950,43 @@ top:
                         error = 0;
                         goto out;
                 }
+       } else if (rflags & RENAME_EXCHANGE) {
+               /* Target must exist for RENAME_EXCHANGE. */
+               error = SET_ERROR(ENOENT);
+               goto out;
+       }
+
+       /* Set up inode creation for RENAME_WHITEOUT. */
+       if (rflags & RENAME_WHITEOUT) {
+               /*
+                * Whiteout files are not regular files or directories, so to
+                * match zfs_create() we do not inherit the project id.
+                */
+               uint64_t wo_projid = ZFS_DEFAULT_PROJID;
+
+               error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns);
+               if (error)
+                       goto out;
+
+               if (!have_acl) {
+                       error = zfs_acl_ids_create(sdzp, 0, wo_vap, cr, NULL,
+                           &acl_ids, mnt_ns);
+                       if (error)
+                               goto out;
+                       have_acl = B_TRUE;
+               }
+
+               if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) {
+                       error = SET_ERROR(EDQUOT);
+                       goto out;
+               }
         }
  
         tx = dmu_tx_create(zfsvfs->z_os);
         dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
         dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
-       dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
+       dmu_tx_hold_zap(tx, sdzp->z_id,
+           (rflags & RENAME_EXCHANGE) ? TRUE : FALSE, snm);
         dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
         if (sdzp != tdzp) {
                 dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
@@ -2907,7 +2996,21 @@ top:
                 dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
                 zfs_sa_upgrade_txholds(tx, tzp);
         }
+       if (rflags & RENAME_WHITEOUT) {
+               dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+                   ZFS_SA_BASE_ATTR_SIZE);
  
+               dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm);
+               dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
+               if (!zfsvfs->z_use_sa &&
+                   acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+                       dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+                           0, acl_ids.z_aclp->z_acl_bytes);
+               }
+       }
+       fuid_dirtied = zfsvfs->z_fuid_dirty;
+       if (fuid_dirtied)
+               zfs_fuid_txhold(zfsvfs, tx);
         zfs_sa_upgrade_txholds(tx, szp);
         dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
         error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
@@ -2933,82 +3036,175 @@ top:
                 zrele(szp);
                 if (tzp)
                         zrele(tzp);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
-       if (tzp)        /* Attempt to remove the existing target */
-               error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
+       /*
+        * Unlink the source.
+        */
+       szp->z_pflags |= ZFS_AV_MODIFIED;
+       if (tdzp->z_pflags & ZFS_PROJINHERIT)
+               szp->z_pflags |= ZFS_PROJINHERIT;
  
-       if (error == 0) {
-               error = zfs_link_create(tdl, szp, tx, ZRENAMING);
-               if (error == 0) {
-                       szp->z_pflags |= ZFS_AV_MODIFIED;
-                       if (tdzp->z_pflags & ZFS_PROJINHERIT)
-                               szp->z_pflags |= ZFS_PROJINHERIT;
-
-                       error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
-                           (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+       error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+           (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+       VERIFY0(error);
+
+       error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
+       if (error)
+               goto commit;
+
+       /*
+        * Unlink the target.
+        */
+       if (tzp) {
+               int tzflg = zflg;
+
+               if (rflags & RENAME_EXCHANGE) {
+                       /* This inode will be re-linked soon. */
+                       tzflg |= ZRENAMING;
+
+                       tzp->z_pflags |= ZFS_AV_MODIFIED;
+                       if (sdzp->z_pflags & ZFS_PROJINHERIT)
+                               tzp->z_pflags |= ZFS_PROJINHERIT;
+
+                       error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+                           (void *)&tzp->z_pflags, sizeof (uint64_t), tx);
                         ASSERT0(error);
+               }
+               error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL);
+               if (error)
+                       goto commit_link_szp;
+       }
  
-                       error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
-                       if (error == 0) {
-                               zfs_log_rename(zilog, tx, TX_RENAME |
-                                   (flags & FIGNORECASE ? TX_CI : 0), sdzp,
-                                   sdl->dl_name, tdzp, tdl->dl_name, szp);
-                       } else {
-                               /*
-                                * At this point, we have successfully created
-                                * the target name, but have failed to remove
-                                * the source name.  Since the create was done
-                                * with the ZRENAMING flag, there are
-                                * complications; for one, the link count is
-                                * wrong.  The easiest way to deal with this
-                                * is to remove the newly created target, and
-                                * return the original error.  This must
-                                * succeed; fortunately, it is very unlikely to
-                                * fail, since we just created it.
-                                */
-                               VERIFY3U(zfs_link_destroy(tdl, szp, tx,
-                                   ZRENAMING, NULL), ==, 0);
-                       }
-               } else {
-                       /*
-                        * If we had removed the existing target, subsequent
-                        * call to zfs_link_create() to add back the same entry
-                        * but, the new dnode (szp) should not fail.
-                        */
-                       ASSERT(tzp == NULL);
+       /*
+        * Create the new target links:
+        *   * We always link the target.
+        *   * RENAME_EXCHANGE: Link the old target to the source.
+        *   * RENAME_WHITEOUT: Create a whiteout inode in-place of the source.
+        */
+       error = zfs_link_create(tdl, szp, tx, ZRENAMING);
+       if (error) {
+               /*
+                * If we have removed the existing target, a subsequent call to
+                * zfs_link_create() to add back the same entry, but with a new
+                * dnode (szp), should not fail.
+                */
+               ASSERT3P(tzp, ==, NULL);
+               goto commit_link_tzp;
+       }
+
+       switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
+       case RENAME_EXCHANGE:
+               error = zfs_link_create(sdl, tzp, tx, ZRENAMING);
+               /*
+                * The same argument as zfs_link_create() failing for
+                * szp applies here, since the source directory must
+                * have had an entry we are replacing.
+                */
+               ASSERT0(error);
+               if (error)
+                       goto commit_unlink_td_szp;
+               break;
+       case RENAME_WHITEOUT:
+               zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids);
+               error = zfs_link_create(sdl, wzp, tx, ZNEW);
+               if (error) {
+                       zfs_znode_delete(wzp, tx);
+                       remove_inode_hash(ZTOI(wzp));
+                       goto commit_unlink_td_szp;
                 }
+               break;
         }
  
+       if (fuid_dirtied)
+               zfs_fuid_sync(zfsvfs, tx);
+
+       switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
+       case RENAME_EXCHANGE:
+               zfs_log_rename_exchange(zilog, tx,
+                   (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
+                   tdzp, tdl->dl_name, szp);
+               break;
+       case RENAME_WHITEOUT:
+               zfs_log_rename_whiteout(zilog, tx,
+                   (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
+                   tdzp, tdl->dl_name, szp, wzp);
+               break;
+       default:
+               ASSERT0(rflags & ~RENAME_NOREPLACE);
+               zfs_log_rename(zilog, tx, (flags & FIGNORECASE ? TX_CI : 0),
+                   sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp);
+               break;
+       }
+
+commit:
         dmu_tx_commit(tx);
  out:
-       if (zl != NULL)
-               zfs_rename_unlock(&zl);
-
-       zfs_dirent_unlock(sdl);
-       zfs_dirent_unlock(tdl);
+       if (have_acl)
+               zfs_acl_ids_free(&acl_ids);
  
-       zfs_inode_update(sdzp);
+       zfs_znode_update_vfs(sdzp);
         if (sdzp == tdzp)
                 rw_exit(&sdzp->z_name_lock);
  
         if (sdzp != tdzp)
-               zfs_inode_update(tdzp);
+               zfs_znode_update_vfs(tdzp);
  
-       zfs_inode_update(szp);
+       zfs_znode_update_vfs(szp);
         zrele(szp);
+       if (wzp) {
+               zfs_znode_update_vfs(wzp);
+               zrele(wzp);
+       }
         if (tzp) {
-               zfs_inode_update(tzp);
+               zfs_znode_update_vfs(tzp);
                 zrele(tzp);
         }
  
+       if (zl != NULL)
+               zfs_rename_unlock(&zl);
+
+       zfs_dirent_unlock(sdl);
+       zfs_dirent_unlock(tdl);
+
         if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
                 zil_commit(zilog, 0);
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
+
+       /*
+        * Clean-up path for broken link state.
+        *
+        * At this point we are in a (very) bad state, so we need to do our
+        * best to correct the state. In particular, all of the nlinks are
+        * wrong because we were destroying and creating links with ZRENAMING.
+        *
+        * In some form, all of these operations have to resolve the state:
+        *
+        *  * link_destroy() *must* succeed. Fortunately, this is very likely
+        *    since we only just created it.
+        *
+        *  * link_create()s are allowed to fail (though they shouldn't because
+        *    we only just unlinked them and are putting the entries back
+        *    during clean-up). But if they fail, we can just forcefully drop
+        *    the nlink value to (at the very least) avoid broken nlink values
+        *    -- though in the case of non-empty directories we will have to
+        *    panic (otherwise we'd have a leaked directory with a broken ..).
+        */
+commit_unlink_td_szp:
+       VERIFY0(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL));
+commit_link_tzp:
+       if (tzp) {
+               if (zfs_link_create(tdl, tzp, tx, ZRENAMING))
+                       VERIFY0(zfs_drop_nlink(tzp, tx, NULL));
+       }
+commit_link_szp:
+       if (zfs_link_create(sdl, szp, tx, ZRENAMING))
+               VERIFY0(zfs_drop_nlink(szp, tx, NULL));
+       goto commit;
  }
  
  /*
@@ -3020,6 +3216,7 @@ out:
   *             link    - Name for new symlink entry.
   *             cr      - credentials of caller.
   *             flags   - case flags
+ *             mnt_ns  - user namespace of the mount
   *
   *     OUT:    zpp     - Znode for new symbolic link.
   *
@@ -3028,10 +3225,9 @@ out:
   * Timestamps:
   *     dip - ctime|mtime updated
   */
-/*ARGSUSED*/
  int
  zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
-    znode_t **zpp, cred_t *cr, int flags)
+    znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
  {
         znode_t         *zp;
         zfs_dirlock_t   *dl;
@@ -3051,26 +3247,26 @@ zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
         if (name == NULL)
                 return (SET_ERROR(EINVAL));
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(dzp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+               return (error);
         zilog = zfsvfs->z_log;
  
         if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
             NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EILSEQ));
         }
         if (flags & FIGNORECASE)
                 zflg |= ZCILOOK;
  
         if (len > MAXPATHLEN) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(ENAMETOOLONG));
         }
  
         if ((error = zfs_acl_ids_create(dzp, 0,
-           vap, cr, NULL, &acl_ids)) != 0) {
-               ZFS_EXIT(zfsvfs);
+           vap, cr, NULL, &acl_ids, mnt_ns)) != 0) {
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  top:
@@ -3082,21 +3278,21 @@ top:
         error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
         if (error) {
                 zfs_acl_ids_free(&acl_ids);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
-       if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+       if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
                 zfs_acl_ids_free(&acl_ids);
                 zfs_dirent_unlock(dl);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
         if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
                 zfs_acl_ids_free(&acl_ids);
                 zfs_dirent_unlock(dl);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EDQUOT));
         }
         tx = dmu_tx_create(zfsvfs->z_os);
@@ -3123,13 +3319,13 @@ top:
                 }
                 zfs_acl_ids_free(&acl_ids);
                 dmu_tx_abort(tx);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
         /*
          * Create a new object for the symlink.
-        * for version 4 ZPL datsets the symlink will be an SA attribute
+        * for version 4 ZPL datasets the symlink will be an SA attribute
          */
         zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
  
@@ -3159,8 +3355,8 @@ top:
                         txtype |= TX_CI;
                 zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
  
-               zfs_inode_update(dzp);
-               zfs_inode_update(zp);
+               zfs_znode_update_vfs(dzp);
+               zfs_znode_update_vfs(zp);
         }
  
         zfs_acl_ids_free(&acl_ids);
@@ -3178,7 +3374,7 @@ top:
                 zrele(zp);
         }
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
@@ -3196,16 +3392,16 @@ top:
   * Timestamps:
   *     ip - atime updated
   */
-/* ARGSUSED */
  int
-zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
+zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
  {
+       (void) cr;
         znode_t         *zp = ITOZ(ip);
         zfsvfs_t        *zfsvfs = ITOZSB(ip);
         int             error;
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
         mutex_enter(&zp->z_lock);
         if (zp->z_is_sa)
@@ -3215,7 +3411,7 @@ zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
                 error = zfs_sa_readlink(zp, uio);
         mutex_exit(&zp->z_lock);
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
@@ -3235,7 +3431,6 @@ zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
   *     tdzp - ctime|mtime updated
   *      szp - ctime updated
   */
-/* ARGSUSED */
  int
  zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
      int flags)
@@ -3261,8 +3456,8 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
         if (name == NULL)
                 return (SET_ERROR(EINVAL));
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(tdzp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
+               return (error);
         zilog = zfsvfs->z_log;
  
         /*
@@ -3270,11 +3465,14 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
          * Better choices include ENOTSUP or EISDIR.
          */
         if (S_ISDIR(sip->i_mode)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EPERM));
         }
  
-       ZFS_VERIFY_ZP(szp);
+       if ((error = zfs_verify_zp(szp)) != 0) {
+               zfs_exit(zfsvfs, FTAG);
+               return (error);
+       }
  
         /*
          * If we are using project inheritance, means if the directory has
@@ -3285,7 +3483,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
          */
         if (tdzp->z_pflags & ZFS_PROJINHERIT &&
             tdzp->z_projid != szp->z_projid) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EXDEV));
         }
  
@@ -3294,7 +3492,7 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
          * super blocks.
          */
         if (sip->i_sb != ZTOI(tdzp)->i_sb || zfsctl_is_node(sip)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EXDEV));
         }
  
@@ -3302,17 +3500,17 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
  
         if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
             &parent, sizeof (uint64_t))) != 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
         if (parent == zfsvfs->z_shares_dir) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EPERM));
         }
  
         if (zfsvfs->z_utf8 && u8_validate(name,
             strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EILSEQ));
         }
         if (flags & FIGNORECASE)
@@ -3325,19 +3523,20 @@ zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
          * imposed in attribute space.
          */
         if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EINVAL));
         }
  
         owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
             cr, ZFS_OWNER);
         if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EPERM));
         }
  
-       if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
-               ZFS_EXIT(zfsvfs);
+       if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr,
+           zfs_init_idmap))) {
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
@@ -3347,7 +3546,7 @@ top:
          */
         error = zfs_dirent_lock(&dl, tdzp, name, &tzp, zf, NULL, NULL);
         if (error) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
@@ -3369,7 +3568,7 @@ top:
                         goto top;
                 }
                 dmu_tx_abort(tx);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
         /* unmark z_unlinked so zfs_link_create will not reject */
@@ -3409,14 +3608,14 @@ top:
         if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)
                 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
  
-       zfs_inode_update(tdzp);
-       zfs_inode_update(szp);
-       ZFS_EXIT(zfsvfs);
+       zfs_znode_update_vfs(tdzp);
+       zfs_znode_update_vfs(szp);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
  static void
-zfs_putpage_commit_cb(void *arg)
+zfs_putpage_sync_commit_cb(void *arg)
  {
         struct page *pp = arg;
  
@@ -3424,13 +3623,26 @@ zfs_putpage_commit_cb(void *arg)
         end_page_writeback(pp);
  }
  
+static void
+zfs_putpage_async_commit_cb(void *arg)
+{
+       struct page *pp = arg;
+       znode_t *zp = ITOZ(pp->mapping->host);
+
+       ClearPageError(pp);
+       end_page_writeback(pp);
+       atomic_dec_32(&zp->z_async_writes_cnt);
+}
+
  /*
   * Push a page out to disk, once the page is on stable storage the
   * registered commit callback will be run as notification of completion.
   *
- *     IN:     ip      - page mapped for inode.
- *             pp      - page to push (page is locked)
- *             wbc     - writeback control data
+ *     IN:     ip       - page mapped for inode.
+ *             pp       - page to push (page is locked)
+ *             wbc      - writeback control data
+ *             for_sync - does the caller intend to wait synchronously for the
+ *                        page writeback to complete?
   *
   *     RETURN: 0 if success
   *             error code if failure
@@ -3438,9 +3650,9 @@ zfs_putpage_commit_cb(void *arg)
   * Timestamps:
   *     ip - ctime|mtime updated
   */
-/* ARGSUSED */
  int
-zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
+    boolean_t for_sync)
  {
         znode_t         *zp = ITOZ(ip);
         zfsvfs_t        *zfsvfs = ITOZSB(ip);
@@ -3451,12 +3663,13 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
         caddr_t         va;
         int             err = 0;
         uint64_t        mtime[2], ctime[2];
+       inode_timespec_t tmp_ts;
         sa_bulk_attr_t  bulk[3];
         int             cnt = 0;
         struct address_space *mapping;
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (err);
  
         ASSERT(PageLocked(pp));
  
@@ -3468,7 +3681,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
         /* Page is beyond end of file */
         if (pgoff >= offset) {
                 unlock_page(pp);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (0);
         }
  
@@ -3528,7 +3741,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
         if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
                 unlock_page(pp);
                 zfs_rangelock_exit(lr);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (0);
         }
  
@@ -3538,11 +3751,25 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
                 zfs_rangelock_exit(lr);
  
                 if (wbc->sync_mode != WB_SYNC_NONE) {
+                       /*
+                        * Speed up any non-sync page writebacks since
+                        * they may take several seconds to complete.
+                        * Refer to the comment in zpl_fsync() (when
+                        * HAVE_FSYNC_RANGE is defined) for details.
+                        */
+                       if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
+                               zil_commit(zfsvfs->z_log, zp->z_id);
+                       }
+
                         if (PageWriteback(pp))
+#ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
+                               folio_wait_bit(page_folio(pp), PG_writeback);
+#else
                                 wait_on_page_bit(pp, PG_writeback);
+#endif
                 }
  
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (0);
         }
  
@@ -3550,7 +3777,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
         if (!clear_page_dirty_for_io(pp)) {
                 unlock_page(pp);
                 zfs_rangelock_exit(lr);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (0);
         }
  
@@ -3559,6 +3786,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
          * was in fact not skipped and should not be counted as if it were.
          */
         wbc->pages_skipped--;
+       if (!for_sync)
+               atomic_inc_32(&zp->z_async_writes_cnt);
         set_page_writeback(pp);
         unlock_page(pp);
  
@@ -3573,11 +3802,17 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
                         dmu_tx_wait(tx);
  
                 dmu_tx_abort(tx);
+#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
+               filemap_dirty_folio(page_mapping(pp), page_folio(pp));
+#else
                 __set_page_dirty_nobuffers(pp);
+#endif
                 ClearPageError(pp);
                 end_page_writeback(pp);
+               if (!for_sync)
+                       atomic_dec_32(&zp->z_async_writes_cnt);
                 zfs_rangelock_exit(lr);
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (err);
         }
  
@@ -3592,29 +3827,49 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
             &zp->z_pflags, 8);
  
         /* Preserve the mtime and ctime provided by the inode */
-       ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-       ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+       tmp_ts = zpl_inode_get_mtime(ip);
+       ZFS_TIME_ENCODE(&tmp_ts, mtime);
+       tmp_ts = zpl_inode_get_ctime(ip);
+       ZFS_TIME_ENCODE(&tmp_ts, ctime);
         zp->z_atime_dirty = B_FALSE;
         zp->z_seq++;
  
         err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
  
-       zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
-           zfs_putpage_commit_cb, pp);
-       dmu_tx_commit(tx);
-
-       zfs_rangelock_exit(lr);
-
+       boolean_t commit = B_FALSE;
         if (wbc->sync_mode != WB_SYNC_NONE) {
                 /*
                  * Note that this is rarely called under writepages(), because
                  * writepages() normally handles the entire commit for
                  * performance reasons.
                  */
-               zil_commit(zfsvfs->z_log, zp->z_id);
+               commit = B_TRUE;
+       } else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
+               /*
+                * If the caller does not intend to wait synchronously
+                * for this page writeback to complete and there are active
+                * synchronous calls on this file, do a commit so that
+                * the latter don't accidentally end up waiting for
+                * our writeback to complete. Refer to the comment in
+                * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
+                */
+               commit = B_TRUE;
         }
  
-       ZFS_EXIT(zfsvfs);
+       zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
+           for_sync ? zfs_putpage_sync_commit_cb :
+           zfs_putpage_async_commit_cb, pp);
+
+       dmu_tx_commit(tx);
+
+       zfs_rangelock_exit(lr);
+
+       if (commit)
+               zil_commit(zfsvfs->z_log, zp->z_id);
+
+       dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
+
+       zfs_exit(zfsvfs, FTAG);
         return (err);
  }
  
@@ -3629,6 +3884,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
         zfsvfs_t        *zfsvfs = ITOZSB(ip);
         dmu_tx_t        *tx;
         uint64_t        mode, atime[2], mtime[2], ctime[2];
+       inode_timespec_t tmp_ts;
         sa_bulk_attr_t  bulk[4];
         int             error = 0;
         int             cnt = 0;
@@ -3636,8 +3892,8 @@ zfs_dirty_inode(struct inode *ip, int flags)
         if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
                 return (0);
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
  #ifdef I_DIRTY_TIME
         /*
@@ -3673,9 +3929,12 @@ zfs_dirty_inode(struct inode *ip, int flags)
         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
  
         /* Preserve the mode, mtime and ctime provided by the inode */
-       ZFS_TIME_ENCODE(&ip->i_atime, atime);
-       ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
-       ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+       tmp_ts = zpl_inode_get_atime(ip);
+       ZFS_TIME_ENCODE(&tmp_ts, atime);
+       tmp_ts = zpl_inode_get_mtime(ip);
+       ZFS_TIME_ENCODE(&tmp_ts, mtime);
+       tmp_ts = zpl_inode_get_ctime(ip);
+       ZFS_TIME_ENCODE(&tmp_ts, ctime);
         mode = ip->i_mode;
  
         zp->z_mode = mode;
@@ -3685,11 +3944,10 @@ zfs_dirty_inode(struct inode *ip, int flags)
  
         dmu_tx_commit(tx);
  out:
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
-/*ARGSUSED*/
  void
  zfs_inactive(struct inode *ip)
  {
@@ -3719,7 +3977,9 @@ zfs_inactive(struct inode *ip)
                 if (error) {
                         dmu_tx_abort(tx);
                 } else {
-                       ZFS_TIME_ENCODE(&ip->i_atime, atime);
+                       inode_timespec_t tmp_atime;
+                       tmp_atime = zpl_inode_get_atime(ip);
+                       ZFS_TIME_ENCODE(&tmp_atime, atime);
                         mutex_enter(&zp->z_lock);
                         (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
                             (void *)&atime, sizeof (atime), tx);
@@ -3738,79 +3998,68 @@ zfs_inactive(struct inode *ip)
   * Fill pages with data from the disk.
   */
  static int
-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_fillpage(struct inode *ip, struct page *pp)
  {
-       znode_t *zp = ITOZ(ip);
         zfsvfs_t *zfsvfs = ITOZSB(ip);
-       objset_t *os;
-       struct page *cur_pp;
-       u_offset_t io_off, total;
-       size_t io_len;
-       loff_t i_size;
-       unsigned page_idx;
-       int err;
+       loff_t i_size = i_size_read(ip);
+       u_offset_t io_off = page_offset(pp);
+       size_t io_len = PAGE_SIZE;
  
-       os = zfsvfs->z_os;
-       io_len = nr_pages << PAGE_SHIFT;
-       i_size = i_size_read(ip);
-       io_off = page_offset(pl[0]);
+       ASSERT3U(io_off, <, i_size);
  
         if (io_off + io_len > i_size)
                 io_len = i_size - io_off;
  
-       /*
-        * Iterate over list of pages and read each page individually.
-        */
-       page_idx = 0;
-       for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
-               caddr_t va;
+       void *va = kmap(pp);
+       int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
+           io_len, va, DMU_READ_PREFETCH);
+       if (io_len != PAGE_SIZE)
+               memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
+       kunmap(pp);
  
-               cur_pp = pl[page_idx++];
-               va = kmap(cur_pp);
-               err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
-                   DMU_READ_PREFETCH);
-               kunmap(cur_pp);
-               if (err) {
-                       /* convert checksum errors into IO errors */
-                       if (err == ECKSUM)
-                               err = SET_ERROR(EIO);
-                       return (err);
-               }
+       if (error) {
+               /* convert checksum errors into IO errors */
+               if (error == ECKSUM)
+                       error = SET_ERROR(EIO);
+
+               SetPageError(pp);
+               ClearPageUptodate(pp);
+       } else {
+               ClearPageError(pp);
+               SetPageUptodate(pp);
         }
  
-       return (0);
+       return (error);
  }
  
  /*
- * Uses zfs_fillpage to read data from the file and fill the pages.
+ * Uses zfs_fillpage to read data from the file and fill the page.
   *
   *     IN:     ip       - inode of file to get data from.
- *             pl       - list of pages to read
- *             nr_pages - number of pages to read
+ *             pp       - page to read
   *
   *     RETURN: 0 on success, error code on failure.
   *
   * Timestamps:
   *     vp - atime updated
   */
-/* ARGSUSED */
  int
-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_getpage(struct inode *ip, struct page *pp)
  {
-       znode_t  *zp  = ITOZ(ip);
         zfsvfs_t *zfsvfs = ITOZSB(ip);
-       int      err;
+       znode_t *zp = ITOZ(ip);
+       int error;
  
-       if (pl == NULL)
-               return (0);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       error = zfs_fillpage(ip, pp);
+       if (error == 0)
+               dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
  
-       err = zfs_fillpage(ip, pl, nr_pages);
+       zfs_exit(zfsvfs, FTAG);
  
-       ZFS_EXIT(zfsvfs);
-       return (err);
+       return (error);
  }
  
  /*
@@ -3825,35 +4074,36 @@ zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
   *     RETURN: 0 if success
   *             error code if failure
   */
-/*ARGSUSED*/
  int
  zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
      unsigned long vm_flags)
  {
+       (void) addrp;
         znode_t  *zp = ITOZ(ip);
         zfsvfs_t *zfsvfs = ITOZSB(ip);
+       int error;
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
-       if ((vm_flags & VM_WRITE) && (zp->z_pflags &
-           (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
-               ZFS_EXIT(zfsvfs);
+       if ((vm_flags & VM_WRITE) && (vm_flags & VM_SHARED) &&
+           (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EPERM));
         }
  
         if ((vm_flags & (VM_READ | VM_EXEC)) &&
             (zp->z_pflags & ZFS_AV_QUARANTINED)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EACCES));
         }
  
         if (off < 0 || len > MAXOFFSET_T - off) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(ENXIO));
         }
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (0);
  }
  
@@ -3875,20 +4125,20 @@ zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
   * Timestamps:
   *     zp - ctime|mtime updated
   */
-/* ARGSUSED */
  int
  zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
      offset_t offset, cred_t *cr)
  {
+       (void) offset;
         zfsvfs_t        *zfsvfs = ZTOZSB(zp);
         uint64_t        off, len;
         int             error;
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+               return (error);
  
         if (cmd != F_FREESP) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EINVAL));
         }
  
@@ -3897,12 +4147,12 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
          * so check it explicitly here.
          */
         if (zfs_is_readonly(zfsvfs)) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EROFS));
         }
  
         if (bfp->l_len < 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (SET_ERROR(EINVAL));
         }
  
@@ -3912,8 +4162,9 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
          * On Linux we can get here through truncate_range() which
          * operates directly on inodes, so we need to check access rights.
          */
-       if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
-               ZFS_EXIT(zfsvfs);
+       if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr,
+           zfs_init_idmap))) {
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
@@ -3922,11 +4173,10 @@ zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
  
         error = zfs_freesp(zp, off, len, flag, TRUE);
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (error);
  }
  
-/*ARGSUSED*/
  int
  zfs_fid(struct inode *ip, fid_t *fidp)
  {
@@ -3938,12 +4188,23 @@ zfs_fid(struct inode *ip, fid_t *fidp)
         zfid_short_t    *zfid;
         int             size, i, error;
  
-       ZFS_ENTER(zfsvfs);
-       ZFS_VERIFY_ZP(zp);
+       if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+               return (error);
+
+       if (fidp->fid_len < SHORT_FID_LEN) {
+               fidp->fid_len = SHORT_FID_LEN;
+               zfs_exit(zfsvfs, FTAG);
+               return (SET_ERROR(ENOSPC));
+       }
+
+       if ((error = zfs_verify_zp(zp)) != 0) {
+               zfs_exit(zfsvfs, FTAG);
+               return (error);
+       }
  
         if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
             &gen64, sizeof (uint64_t))) != 0) {
-               ZFS_EXIT(zfsvfs);
+               zfs_exit(zfsvfs, FTAG);
                 return (error);
         }
  
@@ -3964,7 +4225,7 @@ zfs_fid(struct inode *ip, fid_t *fidp)
         for (i = 0; i < sizeof (zfid->zf_gen); i++)
                 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
  
-       ZFS_EXIT(zfsvfs);
+       zfs_exit(zfsvfs, FTAG);
         return (0);
  }
  
@@ -3992,9 +4253,12 @@ EXPORT_SYMBOL(zfs_putpage);
  EXPORT_SYMBOL(zfs_dirty_inode);
  EXPORT_SYMBOL(zfs_map);
  
-/* BEGIN CSTYLED */
+/* CSTYLED */
  module_param(zfs_delete_blocks, ulong, 0644);
  MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
-/* END CSTYLED */
+
+/* CSTYLED */
+module_param(zfs_bclone_enabled, uint, 0644);
+MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
  
  #endif