*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
/* Portions Copyright 2007 Jeremy Teo */
#include <sys/zfs_rlock.h>
#include <sys/zfs_fuid.h>
#include <sys/zfs_vnops.h>
+#include <sys/zfs_ctldir.h>
#include <sys/dnode.h>
#include <sys/fs/zfs.h>
#include <sys/kidmap.h>
rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
+ rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&zp->z_range_avl, zfs_range_compare,
zp->z_dirlocks = NULL;
zp->z_acl_cached = NULL;
+ zp->z_xattr_cached = NULL;
+ zp->z_xattr_parent = NULL;
zp->z_moved = 0;
return (0);
}
rw_destroy(&zp->z_parent_lock);
rw_destroy(&zp->z_name_lock);
mutex_destroy(&zp->z_acl_lock);
+ rw_destroy(&zp->z_xattr_lock);
avl_destroy(&zp->z_range_avl);
mutex_destroy(&zp->z_range_lock);
ASSERT(zp->z_dirlocks == NULL);
ASSERT(zp->z_acl_cached == NULL);
+ ASSERT(zp->z_xattr_cached == NULL);
+ ASSERT(zp->z_xattr_parent == NULL);
}
void
znode_t *zp = ITOZ(ip);
zfs_sb_t *zsb = ZTOZSB(zp);
+ if (zfsctl_is_node(ip))
+ zfsctl_inode_destroy(ip);
+
mutex_enter(&zsb->z_znodes_lock);
- list_remove(&zsb->z_all_znodes, zp);
+ if (list_link_active(&zp->z_link_node)) {
+ list_remove(&zsb->z_all_znodes, zp);
+ zsb->z_nr_znodes--;
+ }
mutex_exit(&zsb->z_znodes_lock);
if (zp->z_acl_cached) {
zp->z_acl_cached = NULL;
}
+ if (zp->z_xattr_cached) {
+ nvlist_free(zp->z_xattr_cached);
+ zp->z_xattr_cached = NULL;
+ }
+
+ if (zp->z_xattr_parent) {
+ iput(ZTOI(zp->z_xattr_parent));
+ zp->z_xattr_parent = NULL;
+ }
+
kmem_cache_free(znode_cache, zp);
}
static znode_t *
zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
- struct dentry *dentry, struct inode *dip)
+ struct inode *dip)
{
znode_t *zp;
struct inode *ip;
zp = ITOZ(ip);
ASSERT(zp->z_dirlocks == NULL);
+ ASSERT3P(zp->z_acl_cached, ==, NULL);
+ ASSERT3P(zp->z_xattr_cached, ==, NULL);
+ ASSERT3P(zp->z_xattr_parent, ==, NULL);
zp->z_moved = 0;
zp->z_sa_hdl = NULL;
zp->z_unlinked = 0;
zp->z_blksz = blksz;
zp->z_seq = 0x7A4653;
zp->z_sync_cnt = 0;
- zp->z_is_zvol = 0;
+ zp->z_is_zvol = B_FALSE;
+ zp->z_is_mapped = B_FALSE;
+ zp->z_is_ctldir = B_FALSE;
+ zp->z_is_stale = B_FALSE;
zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
goto error;
}
+ /*
+ * xattr znodes hold a reference on their unique parent
+ */
+ if (dip && zp->z_pflags & ZFS_XATTR) {
+ igrab(dip);
+ zp->z_xattr_parent = ITOZ(dip);
+ }
+
ip->i_ino = obj;
zfs_inode_update(zp);
zfs_inode_set_ops(zsb, ip);
- if (insert_inode_locked(ip))
- goto error;
-
- if (dentry) {
- if (zpl_xattr_security_init(ip, dip, &dentry->d_name))
- goto error;
-
- d_instantiate(dentry, ip);
- }
+ /*
+ * The only way insert_inode_locked() can fail is if the ip->i_ino
+ * number is already hashed for this super block. This can never
+ * happen because the inode numbers map 1:1 with the object numbers.
+ *
+ * The one exception is rolling back a mounted file system, but in
+ * this case all the active inode are unhashed during the rollback.
+ */
+ VERIFY3S(insert_inode_locked(ip), ==, 0);
mutex_enter(&zsb->z_znodes_lock);
list_insert_tail(&zsb->z_all_znodes, zp);
+ zsb->z_nr_znodes++;
membar_producer();
mutex_exit(&zsb->z_znodes_lock);
error:
unlock_new_inode(ip);
iput(ip);
- return NULL;
+ return (NULL);
}
/*
zsb = ZTOZSB(zp);
ip = ZTOI(zp);
+ /* Skip .zfs control nodes which do not exist on disk. */
+ if (zfsctl_is_node(ip))
+ return;
+
sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16);
sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16);
sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16);
spin_lock(&ip->i_lock);
ip->i_generation = zp->z_gen;
- ip->i_uid = zp->z_uid;
- ip->i_gid = zp->z_gid;
- ip->i_nlink = zp->z_links;
+ ip->i_uid = SUID_TO_KUID(zp->z_uid);
+ ip->i_gid = SGID_TO_KGID(zp->z_gid);
+ set_nlink(ip, zp->z_links);
ip->i_mode = zp->z_mode;
ip->i_blkbits = SPA_MINBLOCKSHIFT;
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize,
err = zap_create_claim_norm(zsb->z_os, obj,
zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
obj_type, bonuslen, tx);
- ASSERT3U(err, ==, 0);
+ ASSERT0(err);
} else {
obj = zap_create_norm(zsb->z_os,
zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
err = dmu_object_claim(zsb->z_os, obj,
DMU_OT_PLAIN_FILE_CONTENTS, 0,
obj_type, bonuslen, tx);
- ASSERT3U(err, ==, 0);
+ ASSERT0(err);
} else {
obj = dmu_object_alloc(zsb->z_os,
DMU_OT_PLAIN_FILE_CONTENTS, 0,
* order for DMU_OT_ZNODE is critical since it needs to be constructed
* in the old znode_phys_t format. Don't change this ordering
*/
- sa_attrs = kmem_alloc(sizeof(sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
+ sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_PUSHPAGE);
if (obj_type == DMU_OT_ZNODE) {
SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
if (!(flag & IS_ROOT_NODE)) {
*zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
- vap->va_dentry, ZTOI(dzp));
- ASSERT(*zpp != NULL);
- ASSERT(dzp != NULL);
+ ZTOI(dzp));
+ VERIFY(*zpp != NULL);
+ VERIFY(dzp != NULL);
} else {
/*
* If we are creating the root node, the "parent" we
if (obj_type == DMU_OT_ZNODE ||
acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
- ASSERT3S(err, ==, 0);
+ ASSERT0(err);
}
- kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * ZPL_END);
+ kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
ZFS_OBJ_HOLD_EXIT(zsb, obj);
}
/*
- * zfs_xvattr_set only updates the in-core attributes
- * it is assumed the caller will be doing an sa_bulk_update
- * to push the changes out
+ * Update in-core attributes. It is assumed the caller will be doing an
+ * sa_bulk_update to push the changes out.
*/
void
zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
iput(ip);
- return (EINVAL);
+ return (SET_ERROR(EINVAL));
}
hdl = dmu_buf_get_user(db);
mutex_enter(&zp->z_lock);
ASSERT3U(zp->z_id, ==, obj_num);
if (zp->z_unlinked) {
- err = ENOENT;
+ err = SET_ERROR(ENOENT);
} else {
igrab(ZTOI(zp));
*zpp = zp;
* bonus buffer.
*/
zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
- doi.doi_bonus_type, obj_num, NULL, NULL, NULL);
+ doi.doi_bonus_type, obj_num, NULL, NULL);
if (zp == NULL) {
- err = ENOENT;
+ err = SET_ERROR(ENOENT);
} else {
*zpp = zp;
}
zfs_acl_free(zp->z_acl_cached);
zp->z_acl_cached = NULL;
}
-
mutex_exit(&zp->z_acl_lock);
+
+ rw_enter(&zp->z_xattr_lock, RW_WRITER);
+ if (zp->z_xattr_cached) {
+ nvlist_free(zp->z_xattr_cached);
+ zp->z_xattr_cached = NULL;
+ }
+
+ if (zp->z_xattr_parent) {
+ iput(ZTOI(zp->z_xattr_parent));
+ zp->z_xattr_parent = NULL;
+ }
+ rw_exit(&zp->z_xattr_lock);
+
ASSERT(zp->z_sa_hdl == NULL);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
- return (EINVAL);
+ return (SET_ERROR(EINVAL));
}
zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);
if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
- return (EIO);
+ return (SET_ERROR(EIO));
}
zp->z_mode = mode;
if (gen != zp->z_gen) {
zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
- return (EIO);
+ return (SET_ERROR(EIO));
}
zp->z_unlinked = (zp->z_links == 0);
zp->z_blksz = doi.doi_data_block_size;
+ zfs_inode_update(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
if (flag & ATTR_ATIME) {
ZFS_TIME_ENCODE(&now, zp->z_atime);
+ ZTOI(zp)->i_atime.tv_sec = zp->z_atime[0];
+ ZTOI(zp)->i_atime.tv_nsec = zp->z_atime[1];
}
if (flag & ATTR_MTIME) {
if (error == ENOTSUP)
return;
- ASSERT3U(error, ==, 0);
+ ASSERT0(error);
/* What blocksize did we actually get? */
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
* IN: zp - znode of file to free data in.
* end - new end-of-file
*
- * RETURN: 0 if success
- * error code if failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_extend(znode_t *zp, uint64_t end)
zfs_range_unlock(rl);
return (0);
}
-top:
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
newblksz = 0;
}
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
- if (error == ERESTART) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
dmu_tx_abort(tx);
zfs_range_unlock(rl);
return (error);
* off - start of section to free.
* len - length of section to free.
*
- * RETURN: 0 if success
- * error code if failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
* IN: zp - znode of file to free data in.
* end - new end-of-file.
*
- * RETURN: 0 if success
- * error code if failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_trunc(znode_t *zp, uint64_t end)
* flag - current file open mode flags.
* log - TRUE if this action should be logged
*
- * RETURN: 0 if success
- * error code if failure
+ * RETURN: 0 on success, error code on failure
*/
int
zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
if (ip->i_flock && mandatory_lock(ip)) {
uint64_t length = (len ? len : zp->z_size - off);
if (!lock_may_write(ip, off, length))
- return (EAGAIN);
+ return (SET_ERROR(EAGAIN));
}
if (len == 0) {
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
- if (error == ERESTART) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto log;
- }
dmu_tx_abort(tx);
return (error);
}
vattr.va_uid = crgetuid(cr);
vattr.va_gid = crgetgid(cr);
- rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+ rootzp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
rootzp->z_moved = 0;
rootzp->z_unlinked = 0;
rootzp->z_atime_dirty = 0;
rootzp->z_is_sa = USE_SA(version, os);
- zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
+ zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE | KM_NODEBUG);
zsb->z_os = os;
zsb->z_parent = zsb;
zsb->z_version = version;
zsb->z_use_sa = USE_SA(version, os);
zsb->z_norm = norm;
- sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
+ sb = kmem_zalloc(sizeof (struct super_block), KM_PUSHPAGE);
sb->s_fs_info = zsb;
ZTOI(rootzp)->i_sb = sb;
atomic_set(&ZTOI(rootzp)->i_count, 0);
sa_handle_destroy(rootzp->z_sa_hdl);
- kmem_free(sb, sizeof (struct super_block));
- kmem_free(zsb, sizeof (zfs_sb_t));
kmem_cache_free(znode_cache, rootzp);
/*
* Create shares directory
*/
-
error = zfs_create_share_dir(zsb, tx);
-
ASSERT(error == 0);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
mutex_destroy(&zsb->z_hold_mtx[i]);
+
+ kmem_free(sb, sizeof (struct super_block));
+ kmem_free(zsb, sizeof (zfs_sb_t));
}
#endif /* _KERNEL */
(doi.doi_bonus_type == DMU_OT_ZNODE &&
doi.doi_bonus_size < sizeof (znode_phys_t))) {
sa_buf_rele(*db, tag);
- return (ENOTSUP);
+ return (SET_ERROR(ENOTSUP));
}
error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
sa_hdl = hdl;
for (;;) {
- uint64_t pobj;
+ uint64_t pobj = 0;
char component[MAXNAMELEN + 2];
size_t complen;
- int is_xattrdir;
+ int is_xattrdir = 0;
if (prevdb)
zfs_release_sa_handle(prevhdl, prevdb, FTAG);