* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* to freed memory. The example below illustrates the following Big Rules:
*
* (1) A check must be made in each zfs thread for a mounted file system.
- * This is done avoiding races using ZFS_ENTER(zfsvfs).
- * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes
- * must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
+ * This is done avoiding races using zfs_enter(zfsvfs).
+ * A zfs_exit(zfsvfs) is needed before all returns. Any znodes
+ * must be checked with zfs_verify_zp(zp). Both of these macros
* can return EIO from the calling function.
*
- * (2) zrele() should always be the last thing except for zil_commit()
- * (if necessary) and ZFS_EXIT(). This is for 3 reasons:
- * First, if it's the last reference, the vnode/znode
- * can be freed, so the zp may point to freed memory. Second, the last
- * reference will call zfs_zinactive(), which may induce a lot of work --
- * pushing cached pages (which acquires range locks) and syncing out
- * cached atime changes. Third, zfs_zinactive() may require a new tx,
- * which could deadlock the system if you were already holding one.
- * If you must call zrele() within a tx then use zfs_zrele_async().
+ * (2) zrele() should always be the last thing except for zil_commit() (if
+ * necessary) and zfs_exit(). This is for 3 reasons: First, if it's the
+ * last reference, the vnode/znode can be freed, so the zp may point to
+ * freed memory. Second, the last reference will call zfs_zinactive(),
+ * which may induce a lot of work -- pushing cached pages (which acquires
+ * range locks) and syncing out cached atime changes. Third,
+ * zfs_zinactive() may require a new tx, which could deadlock the system
+ * if you were already holding one. This deadlock occurs because the tx
+ * currently being operated on prevents a txg from syncing, which
+ * prevents the new tx from progressing, resulting in a deadlock. If you
+ * must call zrele() within a tx, use zfs_zrele_async(). Note that iput()
+ * is a synonym for zrele().
*
* (3) All range locks must be grabbed before calling dmu_tx_assign(),
* as they can span dmu_tx_assign() calls.
* dmu_tx_assign(). This is critical because we don't want to block
* while holding locks.
*
- * If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT. This
+ * If no ZPL locks are held (aside from zfs_enter()), use TXG_WAIT. This
* reduces lock contention and CPU usage when we must wait (note that if
* throughput is constrained by the storage, nearly every transaction
* must wait).
*
* In general, this is how things should be ordered in each vnode op:
*
- * ZFS_ENTER(zfsvfs); // exit if unmounted
+ * zfs_enter(zfsvfs); // exit if unmounted
* top:
* zfs_dirent_lock(&dl, ...) // lock directory entry (may igrab())
* rw_enter(...); // grab any other locks you need
* goto top;
* }
* dmu_tx_abort(tx); // abort DMU tx
- * ZFS_EXIT(zfsvfs); // finished in zfs
+ * zfs_exit(zfsvfs); // finished in zfs
* return (error); // really out of space
* }
* error = do_real_work(); // do whatever this VOP does
* zfs_dirent_unlock(dl); // unlock directory entry
* zrele(...); // release held znodes
* zil_commit(zilog, foid); // synchronous when necessary
- * ZFS_EXIT(zfsvfs); // finished in zfs
+ * zfs_exit(zfsvfs); // finished in zfs
* return (error); // done, report error
*/
-
-/*
- * Virus scanning is unsupported. It would be possible to add a hook
- * here to performance the required virus scan. This could be done
- * entirely in the kernel or potentially as an update to invoke a
- * scanning utility.
- */
-static int
-zfs_vscan(struct inode *ip, cred_t *cr, int async)
-{
- return (0);
-}
-
-/* ARGSUSED */
int
zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
{
+ (void) cr;
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
+ int error;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
/* Honor ZFS_APPENDONLY file attribute */
- if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
+ if (blk_mode_is_open_write(mode) && (zp->z_pflags & ZFS_APPENDONLY) &&
((flag & O_APPEND) == 0)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EPERM));
}
- /* Virus scan eligible files on open */
- if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
- !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
- if (zfs_vscan(ip, cr, 0) != 0) {
- ZFS_EXIT(zfsvfs);
- return (SET_ERROR(EACCES));
- }
+ /*
+ * Keep a count of the synchronous opens in the znode. On first
+ * synchronous open we must convert all previous async transactions
+ * into sync to keep correct ordering.
+ */
+ if (flag & O_SYNC) {
+ if (atomic_inc_32_nv(&zp->z_sync_cnt) == 1)
+ zil_async_to_sync(zfsvfs->z_log, zp->z_id);
}
- /* Keep a count of the synchronous opens in the znode */
- if (flag & O_SYNC)
- atomic_inc_32(&zp->z_sync_cnt);
-
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
-/* ARGSUSED */
int
zfs_close(struct inode *ip, int flag, cred_t *cr)
{
+ (void) cr;
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
+ int error;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
/* Decrement the synchronous opens in the znode */
if (flag & O_SYNC)
atomic_dec_32(&zp->z_sync_cnt);
- if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
- !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
- VERIFY(zfs_vscan(ip, cr, 1) == 0);
-
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
#if defined(_KERNEL)
+
+static int zfs_fillpage(struct inode *ip, struct page *pp);
+
/*
* When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages. What this means:
- *
- * On Write: If we find a memory mapped page, we write to *both*
- * the page and the dmu buffer.
+ * between the DMU cache and the memory mapped pages. Update all mapped
+ * pages with the contents of the coresponding dmu buffer.
*/
void
update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
{
- struct inode *ip = ZTOI(zp);
- struct address_space *mp = ip->i_mapping;
- struct page *pp;
- uint64_t nbytes;
- int64_t off;
- void *pb;
+ struct address_space *mp = ZTOI(zp)->i_mapping;
+ int64_t off = start & (PAGE_SIZE - 1);
- off = start & (PAGE_SIZE-1);
for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
- nbytes = MIN(PAGE_SIZE - off, len);
+ uint64_t nbytes = MIN(PAGE_SIZE - off, len);
- pp = find_lock_page(mp, start >> PAGE_SHIFT);
+ struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
if (pp) {
if (mapping_writably_mapped(mp))
flush_dcache_page(pp);
- pb = kmap(pp);
- (void) dmu_read(os, zp->z_id, start + off, nbytes,
- pb + off, DMU_READ_PREFETCH);
+ void *pb = kmap(pp);
+ int error = dmu_read(os, zp->z_id, start + off,
+ nbytes, pb + off, DMU_READ_PREFETCH);
kunmap(pp);
- if (mapping_writably_mapped(mp))
- flush_dcache_page(pp);
+ if (error) {
+ SetPageError(pp);
+ ClearPageUptodate(pp);
+ } else {
+ ClearPageError(pp);
+ SetPageUptodate(pp);
+
+ if (mapping_writably_mapped(mp))
+ flush_dcache_page(pp);
+
+ mark_page_accessed(pp);
+ }
- mark_page_accessed(pp);
- SetPageUptodate(pp);
- ClearPageError(pp);
unlock_page(pp);
put_page(pp);
}
}
/*
- * When a file is memory mapped, we must keep the IO data synchronized
- * between the DMU cache and the memory mapped pages. What this means:
- *
- * On Read: We "read" preferentially from memory mapped pages,
- * else we default from the dmu buffer.
- *
- * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
- * the file is memory mapped.
+ * When a file is memory mapped, we must keep the I/O data synchronized
+ * between the DMU cache and the memory mapped pages. Preferentially read
+ * from memory mapped pages, otherwise fallback to reading through the dmu.
*/
int
-mappedread(znode_t *zp, int nbytes, uio_t *uio)
+mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
{
struct inode *ip = ZTOI(zp);
struct address_space *mp = ip->i_mapping;
- struct page *pp;
- int64_t start, off;
- uint64_t bytes;
+ int64_t start = uio->uio_loffset;
+ int64_t off = start & (PAGE_SIZE - 1);
int len = nbytes;
int error = 0;
- void *pb;
- start = uio->uio_loffset;
- off = start & (PAGE_SIZE-1);
for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
- bytes = MIN(PAGE_SIZE - off, len);
+ uint64_t bytes = MIN(PAGE_SIZE - off, len);
- pp = find_lock_page(mp, start >> PAGE_SHIFT);
+ struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
if (pp) {
- ASSERT(PageUptodate(pp));
+ /*
+ * If filemap_fault() retries there exists a window
+ * where the page will be unlocked and not up to date.
+ * In this case we must try and fill the page.
+ */
+ if (unlikely(!PageUptodate(pp))) {
+ error = zfs_fillpage(ip, pp);
+ if (error) {
+ unlock_page(pp);
+ put_page(pp);
+ return (error);
+ }
+ }
+
+ ASSERT(PageUptodate(pp) || PageDirty(pp));
+
unlock_page(pp);
- pb = kmap(pp);
- error = uiomove(pb + off, bytes, UIO_READ, uio);
+ void *pb = kmap(pp);
+ error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
kunmap(pp);
if (mapping_writably_mapped(mp))
len -= bytes;
off = 0;
+
if (error)
break;
}
+
return (error);
}
#endif /* _KERNEL */
-unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
+static unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
/*
* Write the bytes to a file.
iov.iov_base = (void *)data;
iov.iov_len = len;
- uio_t uio;
- uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
+ zfs_uio_t uio;
+ zfs_uio_iovec_init(&uio, &iov, 1, pos, UIO_SYSSPACE, len, 0);
cookie = spl_fstrans_mark();
error = zfs_write(zp, &uio, 0, kcred);
if (error == 0) {
if (residp != NULL)
- *residp = uio_resid(&uio);
- else if (uio_resid(&uio) != 0)
+ *residp = zfs_uio_resid(&uio);
+ else if (zfs_uio_resid(&uio) != 0)
error = SET_ERROR(EIO);
}
return (error);
}
+static void
+zfs_rele_async_task(void *arg)
+{
+ iput(arg);
+}
+
void
zfs_zrele_async(znode_t *zp)
{
ASSERT(atomic_read(&ip->i_count) > 0);
ASSERT(os != NULL);
- if (atomic_read(&ip->i_count) == 1)
+ /*
+ * If decrementing the count would put us at 0, we can't do it inline
+ * here, because that would be synchronous. Instead, dispatch an iput
+ * to run later.
+ *
+ * For more information on the dangers of a synchronous iput, see the
+ * header comment of this file.
+ */
+ if (!atomic_add_unless(&ip->i_count, -1, 1)) {
VERIFY(taskq_dispatch(dsl_pool_zrele_taskq(dmu_objset_pool(os)),
- (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
- else
- zrele(zp);
+ zfs_rele_async_task, ip, TQ_SLEEP) != TASKQID_INVALID);
+ }
}
* Timestamps:
* NA
*/
-/* ARGSUSED */
int
zfs_lookup(znode_t *zdp, char *nm, znode_t **zpp, int flags, cred_t *cr,
int *direntflags, pathname_t *realpnp)
}
}
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zdp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zdp, FTAG)) != 0)
+ return (error);
*zpp = NULL;
* Maybe someday we will.
*/
if (zdp->z_pflags & ZFS_XATTR) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
if ((error = zfs_get_xattrdir(zdp, zpp, cr, flags))) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
*/
if ((error = zfs_zaccess(*zpp, ACE_EXECUTE, 0,
- B_FALSE, cr))) {
+ B_TRUE, cr, zfs_init_idmap))) {
zrele(*zpp);
*zpp = NULL;
}
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
if (!S_ISDIR(ZTOI(zdp)->i_mode)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(ENOTDIR));
}
* Check accessibility of directory.
*/
- if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
- ZFS_EXIT(zfsvfs);
+ if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr,
+ zfs_init_idmap))) {
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EILSEQ));
}
error = zfs_dirlook(zdp, nm, zpp, flags, direntflags, realpnp);
if ((error == 0) && (*zpp))
- zfs_inode_update(*zpp);
+ zfs_znode_update_vfs(*zpp);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
* cr - credentials of caller.
* flag - file flag.
* vsecp - ACL to be set
+ * mnt_ns - user namespace of the mount
*
* OUT: zpp - znode of created or trunc'd entry.
*
* dzp - ctime|mtime updated if new entry created
* zp - ctime|mtime always, atime if new
*/
-
-/* ARGSUSED */
int
zfs_create(znode_t *dzp, char *name, vattr_t *vap, int excl,
- int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp)
+ int mode, znode_t **zpp, cred_t *cr, int flag, vsecattr_t *vsecp,
+ zidmap_t *mnt_ns)
{
znode_t *zp;
zfsvfs_t *zfsvfs = ZTOZSB(dzp);
boolean_t fuid_dirtied;
boolean_t have_acl = B_FALSE;
boolean_t waited = B_FALSE;
+ boolean_t skip_acl = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
/*
* If we have an ephemeral id, ACL, or XVATTR then
if (name == NULL)
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(dzp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+ return (error);
os = zfsvfs->z_os;
zilog = zfsvfs->z_log;
if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EILSEQ));
}
if (vap->va_mask & ATTR_XVATTR) {
if ((error = secpolicy_xvattr((xvattr_t *)vap,
crgetuid(cr), cr, vap->va_mode)) != 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
}
zfs_acl_ids_free(&acl_ids);
if (strcmp(name, "..") == 0)
error = SET_ERROR(EISDIR);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
}
* Create a new file object and update the directory
* to reference it.
*/
- if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, skip_acl, cr,
+ mnt_ns))) {
if (have_acl)
zfs_acl_ids_free(&acl_ids);
goto out;
}
if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
- cr, vsecp, &acl_ids)) != 0)
+ cr, vsecp, &acl_ids, mnt_ns)) != 0)
goto out;
have_acl = B_TRUE;
}
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
if (have_acl)
zfs_acl_ids_free(&acl_ids);
- have_acl = B_FALSE;
/*
* A directory entry already exists for this name.
/*
* Verify requested access to file.
*/
- if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
+ if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr,
+ mnt_ns))) {
goto out;
}
if (zp)
zrele(zp);
} else {
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
*zpp = zp;
}
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
-/* ARGSUSED */
int
zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
- int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
+ int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp,
+ zidmap_t *mnt_ns)
{
+ (void) excl, (void) mode, (void) flag;
znode_t *zp = NULL, *dzp = ITOZ(dip);
zfsvfs_t *zfsvfs = ITOZSB(dip);
objset_t *os;
(vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(dzp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+ return (error);
os = zfsvfs->z_os;
if (vap->va_mask & ATTR_XVATTR) {
if ((error = secpolicy_xvattr((xvattr_t *)vap,
crgetuid(cr), cr, vap->va_mode)) != 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
}
* Create a new file object and update the directory
* to reference it.
*/
- if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
if (have_acl)
zfs_acl_ids_free(&acl_ids);
goto out;
}
if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
- cr, vsecp, &acl_ids)) != 0)
+ cr, vsecp, &acl_ids, mnt_ns)) != 0)
goto out;
have_acl = B_TRUE;
}
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
if (zp)
zrele(zp);
} else {
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
*ipp = ZTOI(zp);
}
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
* ip - ctime (if nlink > 0)
*/
-uint64_t null_xattr = 0;
+static uint64_t null_xattr = 0;
-/*ARGSUSED*/
int
zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
{
if (name == NULL)
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(dzp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+ return (error);
zilog = zfsvfs->z_log;
if (flags & FIGNORECASE) {
NULL, realnmp))) {
if (realnmp)
pn_free(realnmp);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
- if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+ if ((error = zfs_zaccess_delete(dzp, zp, cr, zfs_init_idmap))) {
goto out;
}
mutex_enter(&zp->z_lock);
may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
- !(zp->z_is_mapped);
+ !zn_has_cached_data(zp, 0, LLONG_MAX);
mutex_exit(&zp->z_lock);
/*
zrele(zp);
if (xzp)
zrele(xzp);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
&xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
delete_now = may_delete_now && !toobig &&
atomic_read(&ZTOI(zp)->i_count) == 1 &&
- !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked &&
+ !zn_has_cached_data(zp, 0, LLONG_MAX) &&
+ xattr_obj == xattr_obj_unlinked &&
zfs_external_acl(zp) == acl_obj;
+ VERIFY_IMPLY(xattr_obj_unlinked, xzp);
}
if (delete_now) {
pn_free(realnmp);
zfs_dirent_unlock(dl);
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
if (delete_now)
zrele(zp);
zfs_zrele_async(zp);
if (xzp) {
- zfs_inode_update(xzp);
+ zfs_znode_update_vfs(xzp);
zfs_zrele_async(xzp);
}
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
* cr - credentials of caller.
* flags - case flags.
* vsecp - ACL to be set
+ * mnt_ns - user namespace of the mount
*
* OUT: zpp - znode of created directory.
*
* dzp - ctime|mtime updated
* zpp - ctime|mtime|atime updated
*/
-/*ARGSUSED*/
int
zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap, znode_t **zpp,
- cred_t *cr, int flags, vsecattr_t *vsecp)
+ cred_t *cr, int flags, vsecattr_t *vsecp, zidmap_t *mnt_ns)
{
znode_t *zp;
zfsvfs_t *zfsvfs = ZTOZSB(dzp);
if (dirname == NULL)
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(dzp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+ return (error);
zilog = zfsvfs->z_log;
if (dzp->z_pflags & ZFS_XATTR) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
if (zfsvfs->z_utf8 && u8_validate(dirname,
strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EILSEQ));
}
if (flags & FIGNORECASE)
if (vap->va_mask & ATTR_XVATTR) {
if ((error = secpolicy_xvattr((xvattr_t *)vap,
crgetuid(cr), cr, vap->va_mode)) != 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
}
if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
- vsecp, &acl_ids)) != 0) {
- ZFS_EXIT(zfsvfs);
+ vsecp, &acl_ids, mnt_ns)) != 0) {
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
/*
if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
NULL, NULL))) {
zfs_acl_ids_free(&acl_ids);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
- if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr,
+ mnt_ns))) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, zfs_inherit_projid(dzp))) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EDQUOT));
}
}
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
if (error != 0) {
zrele(zp);
} else {
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
}
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
* Timestamps:
* dzp - ctime|mtime updated
*/
-/*ARGSUSED*/
int
zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr,
int flags)
if (name == NULL)
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(dzp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+ return (error);
zilog = zfsvfs->z_log;
if (flags & FIGNORECASE)
*/
if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
NULL, NULL))) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
- if ((error = zfs_zaccess_delete(dzp, zp, cr))) {
+ if ((error = zfs_zaccess_delete(dzp, zp, cr, zfs_init_idmap))) {
goto out;
}
}
dmu_tx_abort(tx);
zrele(zp);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
out:
zfs_dirent_unlock(dl);
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
zrele(zp);
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
* We use 0 for '.', and 1 for '..'. If this is the root of the filesystem,
* we use the offset 2 for the '.zfs' directory.
*/
-/* ARGSUSED */
int
zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
{
+ (void) cr;
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
objset_t *os;
uint64_t parent;
uint64_t offset; /* must be unsigned; checks for < 1 */
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
&parent, sizeof (parent))) != 0)
if (done)
break;
- /* Prefetch znode */
- if (prefetch) {
- dmu_prefetch(os, objnum, 0, 0, 0,
- ZIO_PRIORITY_SYNC_READ);
- }
+ if (prefetch)
+ dmu_prefetch_dnode(os, objnum, ZIO_PRIORITY_SYNC_READ);
/*
* Move to the next entry, fill in the previous offset.
if (error == ENOENT)
error = 0;
out:
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
*
* RETURN: 0 (always succeeds)
*/
-/* ARGSUSED */
int
-zfs_getattr_fast(struct inode *ip, struct kstat *sp)
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+zfs_getattr_fast(zidmap_t *user_ns, u32 request_mask, struct inode *ip,
+ struct kstat *sp)
+#else
+zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
+#endif
{
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
uint32_t blksize;
u_longlong_t nblocks;
+ int error;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
mutex_enter(&zp->z_lock);
- generic_fillattr(ip, sp);
+#ifdef HAVE_GENERIC_FILLATTR_IDMAP_REQMASK
+ zpl_generic_fillattr(user_ns, request_mask, ip, sp);
+#else
+ zpl_generic_fillattr(user_ns, ip, sp);
+#endif
/*
* +1 link count for root inode with visible '.zfs' directory.
*/
dmu_objset_id(zfsvfs->z_os);
}
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
* flags - ATTR_UTIME set if non-default time values provided.
* - ATTR_NOACLCHECK (CIFS context only).
* cr - credentials of caller.
+ * mnt_ns - user namespace of the mount
*
* RETURN: 0 if success
* error code if failure
* Timestamps:
* ip - ctime updated, mtime updated if size changed.
*/
-/* ARGSUSED */
int
-zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr)
+zfs_setattr(znode_t *zp, vattr_t *vap, int flags, cred_t *cr, zidmap_t *mnt_ns)
{
struct inode *ip;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
- objset_t *os = zfsvfs->z_os;
+ objset_t *os;
zilog_t *zilog;
dmu_tx_t *tx;
vattr_t oldva;
if (mask == 0)
return (0);
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (err);
ip = ZTOI(zp);
+ os = zfsvfs->z_os;
/*
* If this is a xvattr_t, then get a pointer to the structure of
if (XVA_ISSET_REQ(xvap, XAT_PROJID)) {
if (!dmu_objset_projectquota_enabled(os) ||
(!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode))) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(ENOTSUP));
}
projid = xoap->xoa_projid;
if (unlikely(projid == ZFS_INVALID_PROJID)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
((zp->z_pflags & ZFS_PROJINHERIT) != 0)) &&
(!dmu_objset_projectquota_enabled(os) ||
(!S_ISREG(ip->i_mode) && !S_ISDIR(ip->i_mode)))) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(ENOTSUP));
}
}
(((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
(mask & ATTR_XVATTR))) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EISDIR));
}
if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
*/
if (mask & ATTR_SIZE) {
- err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
+ err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr,
+ mnt_ns);
if (err)
goto out3;
XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
- skipaclchk, cr);
+ skipaclchk, cr, mnt_ns);
}
if (mask & (ATTR_UID|ATTR_GID)) {
int idmask = (mask & (ATTR_UID|ATTR_GID));
int take_owner;
int take_group;
+ uid_t uid;
+ gid_t gid;
/*
* NOTE: even if a new mode is being set,
* Take ownership or chgrp to group we are a member of
*/
- take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
+ uid = zfs_uid_to_vfsuid(mnt_ns, zfs_i_user_ns(ip),
+ vap->va_uid);
+ gid = zfs_gid_to_vfsgid(mnt_ns, zfs_i_user_ns(ip),
+ vap->va_gid);
+ take_owner = (mask & ATTR_UID) && (uid == crgetuid(cr));
take_group = (mask & ATTR_GID) &&
- zfs_groupmember(zfsvfs, vap->va_gid, cr);
+ zfs_groupmember(zfsvfs, gid, cr);
/*
* If both ATTR_UID and ATTR_GID are set then take_owner and
((idmask == ATTR_UID) && take_owner) ||
((idmask == ATTR_GID) && take_group)) {
if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
- skipaclchk, cr) == 0) {
+ skipaclchk, cr, mnt_ns) == 0) {
/*
* Remove setuid/setgid for non-privileged users
*/
mutex_exit(&zp->z_lock);
if (mask & ATTR_MODE) {
- if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
+ if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr,
+ mnt_ns) == 0) {
err = secpolicy_setid_setsticky_clear(ip, vap,
- &oldva, cr);
+ &oldva, cr, mnt_ns, zfs_i_user_ns(ip));
if (err)
goto out3;
-
trim_mask |= ATTR_MODE;
} else {
need_policy = TRUE;
vap->va_mask &= ~trim_mask;
}
err = secpolicy_vnode_setattr(cr, ip, vap, &oldva, flags,
- (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
+ zfs_zaccess_unix, zp);
if (err)
goto out3;
if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
zp->z_atime_dirty = B_FALSE;
- ZFS_TIME_ENCODE(&ip->i_atime, atime);
+ inode_timespec_t tmp_atime;
+ ZFS_TIME_ENCODE(&tmp_atime, atime);
+ zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_atime);
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
&atime, sizeof (atime));
}
if (mask & (ATTR_MTIME | ATTR_SIZE)) {
ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
- ZTOI(zp)->i_mtime = zpl_inode_timestamp_truncate(
- vap->va_mtime, ZTOI(zp));
+ zpl_inode_set_mtime_to_ts(ZTOI(zp),
+ zpl_inode_timestamp_truncate(vap->va_mtime, ZTOI(zp)));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
mtime, sizeof (mtime));
if (mask & (ATTR_CTIME | ATTR_SIZE)) {
ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
- ZTOI(zp)->i_ctime = zpl_inode_timestamp_truncate(vap->va_ctime,
- ZTOI(zp));
+ zpl_inode_set_ctime_to_ts(ZTOI(zp),
+ zpl_inode_timestamp_truncate(vap->va_ctime, ZTOI(zp)));
SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
ctime, sizeof (ctime));
}
dmu_tx_commit(tx);
if (attrzp) {
if (err2 == 0 && handle_eadir)
- err2 = zfs_setattr_dir(attrzp);
+ err = zfs_setattr_dir(attrzp);
zrele(attrzp);
}
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(zp);
}
out2:
kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * bulks);
kmem_free(bulk, sizeof (sa_bulk_attr_t) * bulks);
kmem_free(tmpxvattr, sizeof (xvattr_t));
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (err);
}
* tnm - New entry name.
* cr - credentials of caller.
* flags - case flags
+ * rflags - RENAME_* flags
+ * wa_vap - attributes for RENAME_WHITEOUT (must be a char 0:0).
+ * mnt_ns - user namespace of the mount
*
* RETURN: 0 on success, error code on failure.
*
* Timestamps:
* sdzp,tdzp - ctime|mtime updated
*/
-/*ARGSUSED*/
int
zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp, char *tnm,
- cred_t *cr, int flags)
+ cred_t *cr, int flags, uint64_t rflags, vattr_t *wo_vap, zidmap_t *mnt_ns)
{
znode_t *szp, *tzp;
zfsvfs_t *zfsvfs = ZTOZSB(sdzp);
int error = 0;
int zflg = 0;
boolean_t waited = B_FALSE;
+ /* Needed for whiteout inode creation. */
+ boolean_t fuid_dirtied;
+ zfs_acl_ids_t acl_ids;
+ boolean_t have_acl = B_FALSE;
+ znode_t *wzp = NULL;
+
if (snm == NULL || tnm == NULL)
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(sdzp);
+ if (rflags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
+ return (SET_ERROR(EINVAL));
+
+ /* Already checked by Linux VFS, but just to make sure. */
+ if (rflags & RENAME_EXCHANGE &&
+ (rflags & (RENAME_NOREPLACE | RENAME_WHITEOUT)))
+ return (SET_ERROR(EINVAL));
+
+ /*
+ * Make sure we only get wo_vap iff. RENAME_WHITEOUT and that it's the
+ * right kind of vattr_t for the whiteout file. These are set
+ * internally by ZFS so should never be incorrect.
+ */
+ VERIFY_EQUIV(rflags & RENAME_WHITEOUT, wo_vap != NULL);
+ VERIFY_IMPLY(wo_vap, wo_vap->va_mode == S_IFCHR);
+ VERIFY_IMPLY(wo_vap, wo_vap->va_rdev == makedevice(0, 0));
+
+ if ((error = zfs_enter_verify_zp(zfsvfs, sdzp, FTAG)) != 0)
+ return (error);
zilog = zfsvfs->z_log;
- ZFS_VERIFY_ZP(tdzp);
+ if ((error = zfs_verify_zp(tdzp)) != 0) {
+ zfs_exit(zfsvfs, FTAG);
+ return (error);
+ }
/*
* We check i_sb because snapshots and the ctldir must have different
*/
if (ZTOI(tdzp)->i_sb != ZTOI(sdzp)->i_sb ||
zfsctl_is_node(ZTOI(tdzp))) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EXDEV));
}
if (zfsvfs->z_utf8 && u8_validate(tnm,
strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EILSEQ));
}
* See the comment in zfs_link() for why this is considered bad.
*/
if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
* the rename() function shall return successfully
* and perform no other action."
*/
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
/*
if (strcmp(snm, "..") == 0)
serr = EINVAL;
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (serr);
}
if (terr) {
if (strcmp(tnm, "..") == 0)
terr = EINVAL;
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (terr);
}
* Note that if target and source are the same, this can be
* done in a single check.
*/
-
- if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr)))
+ if ((error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr, mnt_ns)))
goto out;
if (S_ISDIR(ZTOI(szp)->i_mode)) {
* Does target exist?
*/
if (tzp) {
+ if (rflags & RENAME_NOREPLACE) {
+ error = SET_ERROR(EEXIST);
+ goto out;
+ }
/*
- * Source and target must be the same type.
+ * Source and target must be the same type (unless exchanging).
*/
- if (S_ISDIR(ZTOI(szp)->i_mode)) {
- if (!S_ISDIR(ZTOI(tzp)->i_mode)) {
- error = SET_ERROR(ENOTDIR);
- goto out;
- }
- } else {
- if (S_ISDIR(ZTOI(tzp)->i_mode)) {
- error = SET_ERROR(EISDIR);
+ if (!(rflags & RENAME_EXCHANGE)) {
+ boolean_t s_is_dir = S_ISDIR(ZTOI(szp)->i_mode) != 0;
+ boolean_t t_is_dir = S_ISDIR(ZTOI(tzp)->i_mode) != 0;
+
+ if (s_is_dir != t_is_dir) {
+ error = SET_ERROR(s_is_dir ? ENOTDIR : EISDIR);
goto out;
}
}
error = 0;
goto out;
}
+ } else if (rflags & RENAME_EXCHANGE) {
+ /* Target must exist for RENAME_EXCHANGE. */
+ error = SET_ERROR(ENOENT);
+ goto out;
+ }
+
+ /* Set up inode creation for RENAME_WHITEOUT. */
+ if (rflags & RENAME_WHITEOUT) {
+ /*
+ * Whiteout files are not regular files or directories, so to
+ * match zfs_create() we do not inherit the project id.
+ */
+ uint64_t wo_projid = ZFS_DEFAULT_PROJID;
+
+ error = zfs_zaccess(sdzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns);
+ if (error)
+ goto out;
+
+ if (!have_acl) {
+ error = zfs_acl_ids_create(sdzp, 0, wo_vap, cr, NULL,
+ &acl_ids, mnt_ns);
+ if (error)
+ goto out;
+ have_acl = B_TRUE;
+ }
+
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, wo_projid)) {
+ error = SET_ERROR(EDQUOT);
+ goto out;
+ }
}
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
- dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
+ dmu_tx_hold_zap(tx, sdzp->z_id,
+ (rflags & RENAME_EXCHANGE) ? TRUE : FALSE, snm);
dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
if (sdzp != tdzp) {
dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, tzp);
}
+ if (rflags & RENAME_WHITEOUT) {
+ dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+ ZFS_SA_BASE_ATTR_SIZE);
+ dmu_tx_hold_zap(tx, sdzp->z_id, TRUE, snm);
+ dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
+ if (!zfsvfs->z_use_sa &&
+ acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+ dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+ 0, acl_ids.z_aclp->z_acl_bytes);
+ }
+ }
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
+ if (fuid_dirtied)
+ zfs_fuid_txhold(zfsvfs, tx);
zfs_sa_upgrade_txholds(tx, szp);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
zrele(szp);
if (tzp)
zrele(tzp);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
- if (tzp) /* Attempt to remove the existing target */
- error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
+ /*
+ * Unlink the source.
+ */
+ szp->z_pflags |= ZFS_AV_MODIFIED;
+ if (tdzp->z_pflags & ZFS_PROJINHERIT)
+ szp->z_pflags |= ZFS_PROJINHERIT;
- if (error == 0) {
- error = zfs_link_create(tdl, szp, tx, ZRENAMING);
- if (error == 0) {
- szp->z_pflags |= ZFS_AV_MODIFIED;
- if (tdzp->z_pflags & ZFS_PROJINHERIT)
- szp->z_pflags |= ZFS_PROJINHERIT;
-
- error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
- (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+ error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+ (void *)&szp->z_pflags, sizeof (uint64_t), tx);
+ VERIFY0(error);
+
+ error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
+ if (error)
+ goto commit;
+
+ /*
+ * Unlink the target.
+ */
+ if (tzp) {
+ int tzflg = zflg;
+
+ if (rflags & RENAME_EXCHANGE) {
+ /* This inode will be re-linked soon. */
+ tzflg |= ZRENAMING;
+
+ tzp->z_pflags |= ZFS_AV_MODIFIED;
+ if (sdzp->z_pflags & ZFS_PROJINHERIT)
+ tzp->z_pflags |= ZFS_PROJINHERIT;
+
+ error = sa_update(tzp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
+ (void *)&tzp->z_pflags, sizeof (uint64_t), tx);
ASSERT0(error);
+ }
+ error = zfs_link_destroy(tdl, tzp, tx, tzflg, NULL);
+ if (error)
+ goto commit_link_szp;
+ }
- error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
- if (error == 0) {
- zfs_log_rename(zilog, tx, TX_RENAME |
- (flags & FIGNORECASE ? TX_CI : 0), sdzp,
- sdl->dl_name, tdzp, tdl->dl_name, szp);
- } else {
- /*
- * At this point, we have successfully created
- * the target name, but have failed to remove
- * the source name. Since the create was done
- * with the ZRENAMING flag, there are
- * complications; for one, the link count is
- * wrong. The easiest way to deal with this
- * is to remove the newly created target, and
- * return the original error. This must
- * succeed; fortunately, it is very unlikely to
- * fail, since we just created it.
- */
- VERIFY3U(zfs_link_destroy(tdl, szp, tx,
- ZRENAMING, NULL), ==, 0);
- }
- } else {
- /*
- * If we had removed the existing target, subsequent
- * call to zfs_link_create() to add back the same entry
- * but, the new dnode (szp) should not fail.
- */
- ASSERT(tzp == NULL);
+ /*
+ * Create the new target links:
+ * * We always link the target.
+ * * RENAME_EXCHANGE: Link the old target to the source.
+ * * RENAME_WHITEOUT: Create a whiteout inode in-place of the source.
+ */
+ error = zfs_link_create(tdl, szp, tx, ZRENAMING);
+ if (error) {
+ /*
+ * If we have removed the existing target, a subsequent call to
+ * zfs_link_create() to add back the same entry, but with a new
+ * dnode (szp), should not fail.
+ */
+ ASSERT3P(tzp, ==, NULL);
+ goto commit_link_tzp;
+ }
+
+ switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
+ case RENAME_EXCHANGE:
+ error = zfs_link_create(sdl, tzp, tx, ZRENAMING);
+ /*
+ * The same argument as zfs_link_create() failing for
+ * szp applies here, since the source directory must
+ * have had an entry we are replacing.
+ */
+ ASSERT0(error);
+ if (error)
+ goto commit_unlink_td_szp;
+ break;
+ case RENAME_WHITEOUT:
+ zfs_mknode(sdzp, wo_vap, tx, cr, 0, &wzp, &acl_ids);
+ error = zfs_link_create(sdl, wzp, tx, ZNEW);
+ if (error) {
+ zfs_znode_delete(wzp, tx);
+ remove_inode_hash(ZTOI(wzp));
+ goto commit_unlink_td_szp;
}
+ break;
}
+ if (fuid_dirtied)
+ zfs_fuid_sync(zfsvfs, tx);
+
+ switch (rflags & (RENAME_EXCHANGE | RENAME_WHITEOUT)) {
+ case RENAME_EXCHANGE:
+ zfs_log_rename_exchange(zilog, tx,
+ (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
+ tdzp, tdl->dl_name, szp);
+ break;
+ case RENAME_WHITEOUT:
+ zfs_log_rename_whiteout(zilog, tx,
+ (flags & FIGNORECASE ? TX_CI : 0), sdzp, sdl->dl_name,
+ tdzp, tdl->dl_name, szp, wzp);
+ break;
+ default:
+ ASSERT0(rflags & ~RENAME_NOREPLACE);
+ zfs_log_rename(zilog, tx, (flags & FIGNORECASE ? TX_CI : 0),
+ sdzp, sdl->dl_name, tdzp, tdl->dl_name, szp);
+ break;
+ }
+
+commit:
dmu_tx_commit(tx);
out:
- if (zl != NULL)
- zfs_rename_unlock(&zl);
-
- zfs_dirent_unlock(sdl);
- zfs_dirent_unlock(tdl);
+ if (have_acl)
+ zfs_acl_ids_free(&acl_ids);
- zfs_inode_update(sdzp);
+ zfs_znode_update_vfs(sdzp);
if (sdzp == tdzp)
rw_exit(&sdzp->z_name_lock);
if (sdzp != tdzp)
- zfs_inode_update(tdzp);
+ zfs_znode_update_vfs(tdzp);
- zfs_inode_update(szp);
+ zfs_znode_update_vfs(szp);
zrele(szp);
+ if (wzp) {
+ zfs_znode_update_vfs(wzp);
+ zrele(wzp);
+ }
if (tzp) {
- zfs_inode_update(tzp);
+ zfs_znode_update_vfs(tzp);
zrele(tzp);
}
+ if (zl != NULL)
+ zfs_rename_unlock(&zl);
+
+ zfs_dirent_unlock(sdl);
+ zfs_dirent_unlock(tdl);
+
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
+
+ /*
+ * Clean-up path for broken link state.
+ *
+ * At this point we are in a (very) bad state, so we need to do our
+ * best to correct the state. In particular, all of the nlinks are
+ * wrong because we were destroying and creating links with ZRENAMING.
+ *
+ * In some form, all of these operations have to resolve the state:
+ *
+ * * link_destroy() *must* succeed. Fortunately, this is very likely
+ * since we only just created it.
+ *
+ * * link_create()s are allowed to fail (though they shouldn't because
+ * we only just unlinked them and are putting the entries back
+ * during clean-up). But if they fail, we can just forcefully drop
+ * the nlink value to (at the very least) avoid broken nlink values
+ * -- though in the case of non-empty directories we will have to
+ * panic (otherwise we'd have a leaked directory with a broken ..).
+ */
+commit_unlink_td_szp:
+ VERIFY0(zfs_link_destroy(tdl, szp, tx, ZRENAMING, NULL));
+commit_link_tzp:
+ if (tzp) {
+ if (zfs_link_create(tdl, tzp, tx, ZRENAMING))
+ VERIFY0(zfs_drop_nlink(tzp, tx, NULL));
+ }
+commit_link_szp:
+ if (zfs_link_create(sdl, szp, tx, ZRENAMING))
+ VERIFY0(zfs_drop_nlink(szp, tx, NULL));
+ goto commit;
}
/*
* link - Name for new symlink entry.
* cr - credentials of caller.
* flags - case flags
+ * mnt_ns - user namespace of the mount
*
* OUT: zpp - Znode for new symbolic link.
*
* Timestamps:
* dip - ctime|mtime updated
*/
-/*ARGSUSED*/
int
zfs_symlink(znode_t *dzp, char *name, vattr_t *vap, char *link,
- znode_t **zpp, cred_t *cr, int flags)
+ znode_t **zpp, cred_t *cr, int flags, zidmap_t *mnt_ns)
{
znode_t *zp;
zfs_dirlock_t *dl;
if (name == NULL)
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(dzp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, dzp, FTAG)) != 0)
+ return (error);
zilog = zfsvfs->z_log;
if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EILSEQ));
}
if (flags & FIGNORECASE)
zflg |= ZCILOOK;
if (len > MAXPATHLEN) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(ENAMETOOLONG));
}
if ((error = zfs_acl_ids_create(dzp, 0,
- vap, cr, NULL, &acl_ids)) != 0) {
- ZFS_EXIT(zfsvfs);
+ vap, cr, NULL, &acl_ids, mnt_ns)) != 0) {
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
top:
error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
if (error) {
zfs_acl_ids_free(&acl_ids);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
- if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr, mnt_ns))) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
if (zfs_acl_ids_overquota(zfsvfs, &acl_ids, ZFS_DEFAULT_PROJID)) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EDQUOT));
}
tx = dmu_tx_create(zfsvfs->z_os);
}
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
/*
* Create a new object for the symlink.
- * for version 4 ZPL datsets the symlink will be an SA attribute
+ * for version 4 ZPL datasets the symlink will be an SA attribute
*/
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
txtype |= TX_CI;
zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
- zfs_inode_update(dzp);
- zfs_inode_update(zp);
+ zfs_znode_update_vfs(dzp);
+ zfs_znode_update_vfs(zp);
}
zfs_acl_ids_free(&acl_ids);
zrele(zp);
}
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
* Timestamps:
* ip - atime updated
*/
-/* ARGSUSED */
int
-zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
+zfs_readlink(struct inode *ip, zfs_uio_t *uio, cred_t *cr)
{
+ (void) cr;
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
int error;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
mutex_enter(&zp->z_lock);
if (zp->z_is_sa)
error = zfs_sa_readlink(zp, uio);
mutex_exit(&zp->z_lock);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
* tdzp - ctime|mtime updated
* szp - ctime updated
*/
-/* ARGSUSED */
int
zfs_link(znode_t *tdzp, znode_t *szp, char *name, cred_t *cr,
int flags)
if (name == NULL)
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(tdzp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, tdzp, FTAG)) != 0)
+ return (error);
zilog = zfsvfs->z_log;
/*
* Better choices include ENOTSUP or EISDIR.
*/
if (S_ISDIR(sip->i_mode)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EPERM));
}
- ZFS_VERIFY_ZP(szp);
+ if ((error = zfs_verify_zp(szp)) != 0) {
+ zfs_exit(zfsvfs, FTAG);
+ return (error);
+ }
/*
* If we are using project inheritance, means if the directory has
*/
if (tdzp->z_pflags & ZFS_PROJINHERIT &&
tdzp->z_projid != szp->z_projid) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EXDEV));
}
* super blocks.
*/
if (sip->i_sb != ZTOI(tdzp)->i_sb || zfsctl_is_node(sip)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EXDEV));
}
if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
&parent, sizeof (uint64_t))) != 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
if (parent == zfsvfs->z_shares_dir) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EPERM));
}
if (zfsvfs->z_utf8 && u8_validate(name,
strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EILSEQ));
}
if (flags & FIGNORECASE)
* imposed in attribute space.
*/
if ((szp->z_pflags & ZFS_XATTR) != (tdzp->z_pflags & ZFS_XATTR)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
cr, ZFS_OWNER);
if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EPERM));
}
- if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
- ZFS_EXIT(zfsvfs);
+ if ((error = zfs_zaccess(tdzp, ACE_ADD_FILE, 0, B_FALSE, cr,
+ zfs_init_idmap))) {
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
*/
error = zfs_dirent_lock(&dl, tdzp, name, &tzp, zf, NULL, NULL);
if (error) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
goto top;
}
dmu_tx_abort(tx);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
/* unmark z_unlinked so zfs_link_create will not reject */
if (is_tmpfile && zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED)
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
- zfs_inode_update(tdzp);
- zfs_inode_update(szp);
- ZFS_EXIT(zfsvfs);
+ zfs_znode_update_vfs(tdzp);
+ zfs_znode_update_vfs(szp);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
static void
-zfs_putpage_commit_cb(void *arg)
+zfs_putpage_sync_commit_cb(void *arg)
{
struct page *pp = arg;
end_page_writeback(pp);
}
+static void
+zfs_putpage_async_commit_cb(void *arg)
+{
+ struct page *pp = arg;
+ znode_t *zp = ITOZ(pp->mapping->host);
+
+ ClearPageError(pp);
+ end_page_writeback(pp);
+ atomic_dec_32(&zp->z_async_writes_cnt);
+}
+
/*
* Push a page out to disk, once the page is on stable storage the
* registered commit callback will be run as notification of completion.
*
- * IN: ip - page mapped for inode.
- * pp - page to push (page is locked)
- * wbc - writeback control data
+ * IN: ip - page mapped for inode.
+ * pp - page to push (page is locked)
+ * wbc - writeback control data
+ * for_sync - does the caller intend to wait synchronously for the
+ * page writeback to complete?
*
* RETURN: 0 if success
* error code if failure
* Timestamps:
* ip - ctime|mtime updated
*/
-/* ARGSUSED */
int
-zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
+zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
+ boolean_t for_sync)
{
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
caddr_t va;
int err = 0;
uint64_t mtime[2], ctime[2];
+ inode_timespec_t tmp_ts;
sa_bulk_attr_t bulk[3];
int cnt = 0;
struct address_space *mapping;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((err = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (err);
ASSERT(PageLocked(pp));
/* Page is beyond end of file */
if (pgoff >= offset) {
unlock_page(pp);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
unlock_page(pp);
zfs_rangelock_exit(lr);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
zfs_rangelock_exit(lr);
if (wbc->sync_mode != WB_SYNC_NONE) {
+ /*
+ * Speed up any non-sync page writebacks since
+ * they may take several seconds to complete.
+ * Refer to the comment in zpl_fsync() (when
+ * HAVE_FSYNC_RANGE is defined) for details.
+ */
+ if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
+ zil_commit(zfsvfs->z_log, zp->z_id);
+ }
+
if (PageWriteback(pp))
+#ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
+ folio_wait_bit(page_folio(pp), PG_writeback);
+#else
wait_on_page_bit(pp, PG_writeback);
+#endif
}
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
if (!clear_page_dirty_for_io(pp)) {
unlock_page(pp);
zfs_rangelock_exit(lr);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
* was in fact not skipped and should not be counted as if it were.
*/
wbc->pages_skipped--;
+ if (!for_sync)
+ atomic_inc_32(&zp->z_async_writes_cnt);
set_page_writeback(pp);
unlock_page(pp);
dmu_tx_wait(tx);
dmu_tx_abort(tx);
+#ifdef HAVE_VFS_FILEMAP_DIRTY_FOLIO
+ filemap_dirty_folio(page_mapping(pp), page_folio(pp));
+#else
__set_page_dirty_nobuffers(pp);
+#endif
ClearPageError(pp);
end_page_writeback(pp);
+ if (!for_sync)
+ atomic_dec_32(&zp->z_async_writes_cnt);
zfs_rangelock_exit(lr);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (err);
}
&zp->z_pflags, 8);
/* Preserve the mtime and ctime provided by the inode */
- ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
- ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+ tmp_ts = zpl_inode_get_mtime(ip);
+ ZFS_TIME_ENCODE(&tmp_ts, mtime);
+ tmp_ts = zpl_inode_get_ctime(ip);
+ ZFS_TIME_ENCODE(&tmp_ts, ctime);
zp->z_atime_dirty = B_FALSE;
zp->z_seq++;
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
- zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
- zfs_putpage_commit_cb, pp);
- dmu_tx_commit(tx);
-
- zfs_rangelock_exit(lr);
-
+ boolean_t commit = B_FALSE;
if (wbc->sync_mode != WB_SYNC_NONE) {
/*
* Note that this is rarely called under writepages(), because
* writepages() normally handles the entire commit for
* performance reasons.
*/
- zil_commit(zfsvfs->z_log, zp->z_id);
+ commit = B_TRUE;
+ } else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
+ /*
+ * If the caller does not intend to wait synchronously
+ * for this page writeback to complete and there are active
+ * synchronous calls on this file, do a commit so that
+ * the latter don't accidentally end up waiting for
+ * our writeback to complete. Refer to the comment in
+ * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
+ */
+ commit = B_TRUE;
}
- ZFS_EXIT(zfsvfs);
+ zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, commit,
+ for_sync ? zfs_putpage_sync_commit_cb :
+ zfs_putpage_async_commit_cb, pp);
+
+ dmu_tx_commit(tx);
+
+ zfs_rangelock_exit(lr);
+
+ if (commit)
+ zil_commit(zfsvfs->z_log, zp->z_id);
+
+ dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
+
+ zfs_exit(zfsvfs, FTAG);
return (err);
}
zfsvfs_t *zfsvfs = ITOZSB(ip);
dmu_tx_t *tx;
uint64_t mode, atime[2], mtime[2], ctime[2];
+ inode_timespec_t tmp_ts;
sa_bulk_attr_t bulk[4];
int error = 0;
int cnt = 0;
if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
return (0);
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
#ifdef I_DIRTY_TIME
/*
SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
/* Preserve the mode, mtime and ctime provided by the inode */
- ZFS_TIME_ENCODE(&ip->i_atime, atime);
- ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
- ZFS_TIME_ENCODE(&ip->i_ctime, ctime);
+ tmp_ts = zpl_inode_get_atime(ip);
+ ZFS_TIME_ENCODE(&tmp_ts, atime);
+ tmp_ts = zpl_inode_get_mtime(ip);
+ ZFS_TIME_ENCODE(&tmp_ts, mtime);
+ tmp_ts = zpl_inode_get_ctime(ip);
+ ZFS_TIME_ENCODE(&tmp_ts, ctime);
mode = ip->i_mode;
zp->z_mode = mode;
dmu_tx_commit(tx);
out:
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
-/*ARGSUSED*/
void
zfs_inactive(struct inode *ip)
{
if (error) {
dmu_tx_abort(tx);
} else {
- ZFS_TIME_ENCODE(&ip->i_atime, atime);
+ inode_timespec_t tmp_atime;
+ tmp_atime = zpl_inode_get_atime(ip);
+ ZFS_TIME_ENCODE(&tmp_atime, atime);
mutex_enter(&zp->z_lock);
(void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
(void *)&atime, sizeof (atime), tx);
* Fill pages with data from the disk.
*/
static int
-zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_fillpage(struct inode *ip, struct page *pp)
{
- znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
- objset_t *os;
- struct page *cur_pp;
- u_offset_t io_off, total;
- size_t io_len;
- loff_t i_size;
- unsigned page_idx;
- int err;
+ loff_t i_size = i_size_read(ip);
+ u_offset_t io_off = page_offset(pp);
+ size_t io_len = PAGE_SIZE;
- os = zfsvfs->z_os;
- io_len = nr_pages << PAGE_SHIFT;
- i_size = i_size_read(ip);
- io_off = page_offset(pl[0]);
+ ASSERT3U(io_off, <, i_size);
if (io_off + io_len > i_size)
io_len = i_size - io_off;
- /*
- * Iterate over list of pages and read each page individually.
- */
- page_idx = 0;
- for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
- caddr_t va;
+ void *va = kmap(pp);
+ int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
+ io_len, va, DMU_READ_PREFETCH);
+ if (io_len != PAGE_SIZE)
+ memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
+ kunmap(pp);
- cur_pp = pl[page_idx++];
- va = kmap(cur_pp);
- err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
- DMU_READ_PREFETCH);
- kunmap(cur_pp);
- if (err) {
- /* convert checksum errors into IO errors */
- if (err == ECKSUM)
- err = SET_ERROR(EIO);
- return (err);
- }
+ if (error) {
+ /* convert checksum errors into IO errors */
+ if (error == ECKSUM)
+ error = SET_ERROR(EIO);
+
+ SetPageError(pp);
+ ClearPageUptodate(pp);
+ } else {
+ ClearPageError(pp);
+ SetPageUptodate(pp);
}
- return (0);
+ return (error);
}
/*
- * Uses zfs_fillpage to read data from the file and fill the pages.
+ * Uses zfs_fillpage to read data from the file and fill the page.
*
* IN: ip - inode of file to get data from.
- * pl - list of pages to read
- * nr_pages - number of pages to read
+ * pp - page to read
*
* RETURN: 0 on success, error code on failure.
*
* Timestamps:
* vp - atime updated
*/
-/* ARGSUSED */
int
-zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
+zfs_getpage(struct inode *ip, struct page *pp)
{
- znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
- int err;
+ znode_t *zp = ITOZ(ip);
+ int error;
- if (pl == NULL)
- return (0);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ error = zfs_fillpage(ip, pp);
+ if (error == 0)
+ dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
- err = zfs_fillpage(ip, pl, nr_pages);
+ zfs_exit(zfsvfs, FTAG);
- ZFS_EXIT(zfsvfs);
- return (err);
+ return (error);
}
/*
* RETURN: 0 if success
* error code if failure
*/
-/*ARGSUSED*/
int
zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
unsigned long vm_flags)
{
+ (void) addrp;
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
+ int error;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
- if ((vm_flags & VM_WRITE) && (zp->z_pflags &
- (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
- ZFS_EXIT(zfsvfs);
+ if ((vm_flags & VM_WRITE) && (vm_flags & VM_SHARED) &&
+ (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EPERM));
}
if ((vm_flags & (VM_READ | VM_EXEC)) &&
(zp->z_pflags & ZFS_AV_QUARANTINED)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EACCES));
}
if (off < 0 || len > MAXOFFSET_T - off) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(ENXIO));
}
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
* Timestamps:
* zp - ctime|mtime updated
*/
-/* ARGSUSED */
int
zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
offset_t offset, cred_t *cr)
{
+ (void) offset;
zfsvfs_t *zfsvfs = ZTOZSB(zp);
uint64_t off, len;
int error;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
+ return (error);
if (cmd != F_FREESP) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
* so check it explicitly here.
*/
if (zfs_is_readonly(zfsvfs)) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EROFS));
}
if (bfp->l_len < 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (SET_ERROR(EINVAL));
}
* On Linux we can get here through truncate_range() which
* operates directly on inodes, so we need to check access rights.
*/
- if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
- ZFS_EXIT(zfsvfs);
+ if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr,
+ zfs_init_idmap))) {
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
error = zfs_freesp(zp, off, len, flag, TRUE);
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
-/*ARGSUSED*/
int
zfs_fid(struct inode *ip, fid_t *fidp)
{
zfid_short_t *zfid;
int size, i, error;
- ZFS_ENTER(zfsvfs);
- ZFS_VERIFY_ZP(zp);
+ if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
+ return (error);
+
+ if (fidp->fid_len < SHORT_FID_LEN) {
+ fidp->fid_len = SHORT_FID_LEN;
+ zfs_exit(zfsvfs, FTAG);
+ return (SET_ERROR(ENOSPC));
+ }
+
+ if ((error = zfs_verify_zp(zp)) != 0) {
+ zfs_exit(zfsvfs, FTAG);
+ return (error);
+ }
if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
&gen64, sizeof (uint64_t))) != 0) {
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (error);
}
for (i = 0; i < sizeof (zfid->zf_gen); i++)
zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
- ZFS_EXIT(zfsvfs);
+ zfs_exit(zfsvfs, FTAG);
return (0);
}
EXPORT_SYMBOL(zfs_dirty_inode);
EXPORT_SYMBOL(zfs_map);
-/* BEGIN CSTYLED */
+/* CSTYLED */
module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
-/* END CSTYLED */
+
+/* CSTYLED */
+module_param(zfs_bclone_enabled, uint, 0644);
+MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
#endif