*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
*/
/* Portions Copyright 2007 Jeremy Teo */
#include <sys/zfs_rlock.h>
#include <sys/zfs_fuid.h>
#include <sys/zfs_vnops.h>
+#include <sys/zfs_ctldir.h>
#include <sys/dnode.h>
#include <sys/fs/zfs.h>
#include <sys/kidmap.h>
rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
+ rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&zp->z_range_avl, zfs_range_compare,
zp->z_dirlocks = NULL;
zp->z_acl_cached = NULL;
+ zp->z_xattr_cached = NULL;
+ zp->z_xattr_parent = NULL;
zp->z_moved = 0;
return (0);
}
rw_destroy(&zp->z_parent_lock);
rw_destroy(&zp->z_name_lock);
mutex_destroy(&zp->z_acl_lock);
+ rw_destroy(&zp->z_xattr_lock);
avl_destroy(&zp->z_range_avl);
mutex_destroy(&zp->z_range_lock);
ASSERT(zp->z_dirlocks == NULL);
ASSERT(zp->z_acl_cached == NULL);
+ ASSERT(zp->z_xattr_cached == NULL);
+ ASSERT(zp->z_xattr_parent == NULL);
}
void
int
zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
{
-#ifdef HAVE_SHARE
+#ifdef HAVE_SMB_SHARE
zfs_acl_ids_t acl_ids;
vattr_t vattr;
znode_t *sharezp;
vattr.va_uid = crgetuid(kcred);
vattr.va_gid = crgetgid(kcred);
- sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+ sharezp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
sharezp->z_moved = 0;
sharezp->z_unlinked = 0;
sharezp->z_atime_dirty = 0;
return (error);
#else
return (0);
-#endif /* HAVE_SHARE */
-}
-
-/*
- * define a couple of values we need available
- * for both 64 and 32 bit environments.
- */
-#ifndef NBITSMINOR64
-#define NBITSMINOR64 32
-#endif
-#ifndef MAXMAJ64
-#define MAXMAJ64 0xffffffffUL
-#endif
-#ifndef MAXMIN64
-#define MAXMIN64 0xffffffffUL
-#endif
-
-/*
- * Create special expldev for ZFS private use.
- * Can't use standard expldev since it doesn't do
- * what we want. The standard expldev() takes a
- * dev32_t in LP64 and expands it to a long dev_t.
- * We need an interface that takes a dev32_t in ILP32
- * and expands it to a long dev_t.
- */
-static uint64_t
-zfs_expldev(dev_t dev)
-{
-#ifndef _LP64
- major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32;
- return (((uint64_t)major << NBITSMINOR64) |
- ((minor_t)dev & MAXMIN32));
-#else
- return (dev);
-#endif
+#endif /* HAVE_SMB_SHARE */
}
static void
{
znode_t *zp;
- zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
+ zp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
*ip = ZTOI(zp);
return (0);
znode_t *zp = ITOZ(ip);
zfs_sb_t *zsb = ZTOZSB(zp);
+ if (zfsctl_is_node(ip))
+ zfsctl_inode_destroy(ip);
+
mutex_enter(&zsb->z_znodes_lock);
- list_remove(&zsb->z_all_znodes, zp);
+ if (list_link_active(&zp->z_link_node)) {
+ list_remove(&zsb->z_all_znodes, zp);
+ zsb->z_nr_znodes--;
+ }
mutex_exit(&zsb->z_znodes_lock);
if (zp->z_acl_cached) {
zp->z_acl_cached = NULL;
}
+ if (zp->z_xattr_cached) {
+ nvlist_free(zp->z_xattr_cached);
+ zp->z_xattr_cached = NULL;
+ }
+
+ if (zp->z_xattr_parent) {
+ iput(ZTOI(zp->z_xattr_parent));
+ zp->z_xattr_parent = NULL;
+ }
+
kmem_cache_free(znode_cache, zp);
}
static void
zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
{
- uint64_t rdev;
+ uint64_t rdev = 0;
switch (ip->i_mode & S_IFMT) {
case S_IFREG:
ip->i_op = &zpl_symlink_inode_operations;
break;
+ /*
+ * rdev is only stored in a SA only for device files.
+ */
case S_IFCHR:
case S_IFBLK:
- case S_IFIFO:
- case S_IFSOCK:
VERIFY(sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb),
&rdev, sizeof (rdev)) == 0);
+ /*FALLTHROUGH*/
+ case S_IFIFO:
+ case S_IFSOCK:
init_special_inode(ip, ip->i_mode, rdev);
ip->i_op = &zpl_special_inode_operations;
break;
*/
static znode_t *
zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
- dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
+ dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl,
+ struct inode *dip)
{
znode_t *zp;
struct inode *ip;
zp = ITOZ(ip);
ASSERT(zp->z_dirlocks == NULL);
+ ASSERT3P(zp->z_acl_cached, ==, NULL);
+ ASSERT3P(zp->z_xattr_cached, ==, NULL);
+ ASSERT3P(zp->z_xattr_parent, ==, NULL);
zp->z_moved = 0;
zp->z_sa_hdl = NULL;
zp->z_unlinked = 0;
zp->z_blksz = blksz;
zp->z_seq = 0x7A4653;
zp->z_sync_cnt = 0;
+ zp->z_is_zvol = B_FALSE;
+ zp->z_is_mapped = B_FALSE;
+ zp->z_is_ctldir = B_FALSE;
+ zp->z_is_stale = B_FALSE;
zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
goto error;
}
+ /*
+ * xattr znodes hold a reference on their unique parent
+ */
+ if (dip && zp->z_pflags & ZFS_XATTR) {
+ igrab(dip);
+ zp->z_xattr_parent = ITOZ(dip);
+ }
+
ip->i_ino = obj;
- ip->i_mode = zp->z_mode;
- ip->i_mtime = ip->i_atime = ip->i_ctime = CURRENT_TIME_SEC;
+ zfs_inode_update(zp);
zfs_inode_set_ops(zsb, ip);
- if (insert_inode_locked(ip))
- goto error;
+ /*
+ * The only way insert_inode_locked() can fail is if the ip->i_ino
+ * number is already hashed for this super block. This can never
+ * happen because the inode numbers map 1:1 with the object numbers.
+ *
+ * The one exception is rolling back a mounted file system, but in
+ * this case all the active inode are unhashed during the rollback.
+ */
+ VERIFY3S(insert_inode_locked(ip), ==, 0);
mutex_enter(&zsb->z_znodes_lock);
list_insert_tail(&zsb->z_all_znodes, zp);
+ zsb->z_nr_znodes++;
membar_producer();
mutex_exit(&zsb->z_znodes_lock);
error:
unlock_new_inode(ip);
iput(ip);
- return NULL;
+ return (NULL);
}
/*
zsb = ZTOZSB(zp);
ip = ZTOI(zp);
+ /* Skip .zfs control nodes which do not exist on disk. */
+ if (zfsctl_is_node(ip))
+ return;
+
sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16);
sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16);
sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16);
spin_lock(&ip->i_lock);
ip->i_generation = zp->z_gen;
- ip->i_uid = zp->z_uid;
- ip->i_gid = zp->z_gid;
- ip->i_nlink = zp->z_links;
+ ip->i_uid = SUID_TO_KUID(zp->z_uid);
+ ip->i_gid = SGID_TO_KGID(zp->z_gid);
+ set_nlink(ip, zp->z_links);
ip->i_mode = zp->z_mode;
ip->i_blkbits = SPA_MINBLOCKSHIFT;
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize,
err = zap_create_claim_norm(zsb->z_os, obj,
zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
obj_type, bonuslen, tx);
- ASSERT3U(err, ==, 0);
+ ASSERT0(err);
} else {
obj = zap_create_norm(zsb->z_os,
zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
err = dmu_object_claim(zsb->z_os, obj,
DMU_OT_PLAIN_FILE_CONTENTS, 0,
obj_type, bonuslen, tx);
- ASSERT3U(err, ==, 0);
+ ASSERT0(err);
} else {
obj = dmu_object_alloc(zsb->z_os,
DMU_OT_PLAIN_FILE_CONTENTS, 0,
size = links = 0;
}
- if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) ||
- S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))
- rdev = zfs_expldev(vap->va_rdev);
+ if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
+ rdev = vap->va_rdev;
parent = dzp->z_id;
mode = acl_ids->z_mode;
* order for DMU_OT_ZNODE is critical since it needs to be constructed
* in the old znode_phys_t format. Don't change this ordering
*/
- sa_attrs = kmem_alloc(sizeof(sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
+ sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_PUSHPAGE);
if (obj_type == DMU_OT_ZNODE) {
SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
&empty_xattr, 8);
}
if (obj_type == DMU_OT_ZNODE ||
- (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode) ||
- S_ISFIFO(vap->va_mode) || S_ISSOCK(vap->va_mode))) {
+ (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
NULL, &rdev, 8);
}
VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
if (!(flag & IS_ROOT_NODE)) {
- *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl);
- ASSERT(*zpp != NULL);
- ASSERT(dzp != NULL);
- err = zpl_xattr_security_init(ZTOI(*zpp), ZTOI(dzp));
- ASSERT3S(err, ==, 0);
+ *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl,
+ ZTOI(dzp));
+ VERIFY(*zpp != NULL);
+ VERIFY(dzp != NULL);
} else {
/*
* If we are creating the root node, the "parent" we
if (obj_type == DMU_OT_ZNODE ||
acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
- ASSERT3S(err, ==, 0);
+ ASSERT0(err);
}
- kmem_free(sa_attrs, sizeof(sa_bulk_attr_t) * ZPL_END);
+ kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
ZFS_OBJ_HOLD_EXIT(zsb, obj);
}
+/*
+ * Update in-core attributes. It is assumed the caller will be doing an
+ * sa_bulk_update to push the changes out.
+ */
+void
+zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
+{
+ xoptattr_t *xoap;
+
+ xoap = xva_getxoptattr(xvap);
+ ASSERT(xoap);
+
+ if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
+ uint64_t times[2];
+ ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
+ (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
+ ×, sizeof (times), tx);
+ XVA_SET_RTN(xvap, XAT_CREATETIME);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
+ ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_READONLY);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
+ ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_HIDDEN);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
+ ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_SYSTEM);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
+ ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_ARCHIVE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
+ ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
+ ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_NOUNLINK);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
+ ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_APPENDONLY);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
+ ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_NODUMP);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
+ ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_OPAQUE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
+ ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
+ xoap->xoa_av_quarantined, zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
+ ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
+ zfs_sa_set_scanstamp(zp, xvap, tx);
+ XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
+ ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_REPARSE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
+ ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_OFFLINE);
+ }
+ if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
+ ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
+ zp->z_pflags, tx);
+ XVA_SET_RTN(xvap, XAT_SPARSE);
+ }
+}
+
int
zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
{
znode_t *zp;
int err;
sa_handle_t *hdl;
+ struct inode *ip;
*zpp = NULL;
+again:
+ ip = ilookup(zsb->z_sb, obj_num);
+
ZFS_OBJ_HOLD_ENTER(zsb, obj_num);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ iput(ip);
return (err);
}
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
- return (EINVAL);
+ iput(ip);
+ return (SET_ERROR(EINVAL));
}
hdl = dmu_buf_get_user(db);
if (hdl != NULL) {
- zp = sa_get_userdata(hdl);
+ if (ip == NULL) {
+ /*
+ * ilookup returned NULL, which means
+ * the znode is dying - but the SA handle isn't
+ * quite dead yet, we need to drop any locks
+ * we're holding, re-schedule the task and try again.
+ */
+ sa_buf_rele(db, NULL);
+ ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+
+ schedule();
+ goto again;
+ }
+ zp = sa_get_userdata(hdl);
/*
* Since "SA" does immediate eviction we
mutex_enter(&zp->z_lock);
ASSERT3U(zp->z_id, ==, obj_num);
if (zp->z_unlinked) {
- err = ENOENT;
+ err = SET_ERROR(ENOENT);
} else {
igrab(ZTOI(zp));
*zpp = zp;
sa_buf_rele(db, NULL);
mutex_exit(&zp->z_lock);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
+ iput(ip);
return (err);
}
+ ASSERT3P(ip, ==, NULL);
+
/*
* Not found create new znode/vnode but only if file exists.
*
* bonus buffer.
*/
zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
- doi.doi_bonus_type, obj_num, NULL);
+ doi.doi_bonus_type, obj_num, NULL, NULL);
if (zp == NULL) {
- err = ENOENT;
+ err = SET_ERROR(ENOENT);
} else {
*zpp = zp;
}
zfs_acl_free(zp->z_acl_cached);
zp->z_acl_cached = NULL;
}
-
mutex_exit(&zp->z_acl_lock);
+
+ rw_enter(&zp->z_xattr_lock, RW_WRITER);
+ if (zp->z_xattr_cached) {
+ nvlist_free(zp->z_xattr_cached);
+ zp->z_xattr_cached = NULL;
+ }
+
+ if (zp->z_xattr_parent) {
+ iput(ZTOI(zp->z_xattr_parent));
+ zp->z_xattr_parent = NULL;
+ }
+ rw_exit(&zp->z_xattr_lock);
+
ASSERT(zp->z_sa_hdl == NULL);
err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
if (err) {
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
- return (EINVAL);
+ return (SET_ERROR(EINVAL));
}
zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);
if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
- return (EIO);
+ return (SET_ERROR(EIO));
}
zp->z_mode = mode;
if (gen != zp->z_gen) {
zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
- return (EIO);
+ return (SET_ERROR(EIO));
}
zp->z_unlinked = (zp->z_links == 0);
zp->z_blksz = doi.doi_data_block_size;
+ zfs_inode_update(zp);
ZFS_OBJ_HOLD_EXIT(zsb, obj_num);
{
zfs_sb_t *zsb = ZTOZSB(zp);
uint64_t z_id = zp->z_id;
+ boolean_t drop_mutex = 0;
ASSERT(zp->z_sa_hdl);
/*
- * Don't allow a zfs_zget() while were trying to release this znode
+ * Don't allow a zfs_zget() while were trying to release this znode.
+ *
+ * Linux allows direct memory reclaim which means that any KM_SLEEP
+ * allocation may trigger inode eviction. This can lead to a deadlock
+ * through the ->shrink_icache_memory()->evict()->zfs_inactive()->
+ * zfs_zinactive() call path. To avoid this deadlock the process
+ * must not reacquire the mutex when it is already holding it.
*/
- ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+ if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) {
+ ZFS_OBJ_HOLD_ENTER(zsb, z_id);
+ drop_mutex = 1;
+ }
+
mutex_enter(&zp->z_lock);
/*
*/
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
+ if (drop_mutex)
+ ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
zfs_rmnode(zp);
return;
}
mutex_exit(&zp->z_lock);
zfs_znode_dmu_fini(zp);
- ZFS_OBJ_HOLD_EXIT(zsb, z_id);
+
+ if (drop_mutex)
+ ZFS_OBJ_HOLD_EXIT(zsb, z_id);
}
void
if (flag & ATTR_ATIME) {
ZFS_TIME_ENCODE(&now, zp->z_atime);
+ ZTOI(zp)->i_atime.tv_sec = zp->z_atime[0];
+ ZTOI(zp)->i_atime.tv_nsec = zp->z_atime[1];
}
if (flag & ATTR_MTIME) {
if (error == ENOTSUP)
return;
- ASSERT3U(error, ==, 0);
+ ASSERT0(error);
/* What blocksize did we actually get? */
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
}
-#ifdef HAVE_MMAP
-/*
- * This is a dummy interface used when pvn_vplist_dirty() should *not*
- * be calling back into the fs for a putpage(). E.g.: when truncating
- * a file, the pages being "thrown away* don't need to be written out.
- */
-/* ARGSUSED */
-static int
-zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
- int flags, cred_t *cr)
-{
- ASSERT(0);
- return (0);
-}
-#endif /* HAVE_MMAP */
-
/*
* Increase the file length
*
* IN: zp - znode of file to free data in.
* end - new end-of-file
*
- * RETURN: 0 if success
- * error code if failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_extend(znode_t *zp, uint64_t end)
zfs_range_unlock(rl);
return (0);
}
-top:
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
newblksz = 0;
}
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
- if (error == ERESTART) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto top;
- }
dmu_tx_abort(tx);
zfs_range_unlock(rl);
return (error);
* off - start of section to free.
* len - length of section to free.
*
- * RETURN: 0 if success
- * error code if failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
* IN: zp - znode of file to free data in.
* end - new end-of-file.
*
- * RETURN: 0 if success
- * error code if failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_trunc(znode_t *zp, uint64_t end)
* flag - current file open mode flags.
* log - TRUE if this action should be logged
*
- * RETURN: 0 if success
- * error code if failure
+ * RETURN: 0 on success, error code on failure
*/
int
zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
{
-#ifdef HAVE_MANDLOCKS
struct inode *ip = ZTOI(zp);
-#endif /* HAVE_MANDLOCKS */
dmu_tx_t *tx;
zfs_sb_t *zsb = ZTOZSB(zp);
zilog_t *zilog = zsb->z_log;
return (error);
}
-#ifdef HAVE_MANDLOCKS
/*
* Check for any locks in the region to be freed.
*/
-
- if (MANDLOCK(ip, (mode_t)mode)) {
+ if (ip->i_flock && mandatory_lock(ip)) {
uint64_t length = (len ? len : zp->z_size - off);
- if (error = chklock(ip, FWRITE, off, length, flag, NULL))
- return (error);
+ if (!lock_may_write(ip, off, length))
+ return (SET_ERROR(EAGAIN));
}
-#endif /* HAVE_MANDLOCKS */
if (len == 0) {
error = zfs_trunc(zp, off);
tx = dmu_tx_create(zsb->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
- if (error == ERESTART) {
- dmu_tx_wait(tx);
- dmu_tx_abort(tx);
- goto log;
- }
dmu_tx_abort(tx);
return (error);
}
void
zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
{
+ struct super_block *sb;
+ zfs_sb_t *zsb;
uint64_t moid, obj, sa_obj, version;
+ uint64_t sense = ZFS_CASE_SENSITIVE;
uint64_t norm = 0;
nvpair_t *elem;
int error;
- timestruc_t now;
- dmu_buf_t *db;
- znode_phys_t *pzp;
+ int i;
+ znode_t *rootzp = NULL;
+ vattr_t vattr;
+ znode_t *zp;
+ zfs_acl_ids_t acl_ids;
/*
* First attempt to create master node.
ASSERT(error == 0);
if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
norm = val;
+ else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
+ sense = val;
}
ASSERT(version != 0);
error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
ASSERT(error == 0);
/*
- * Create root znode with code free of VFS dependencies. This
- * is important because without a registered filesystem and super
- * block all the required VFS hooks will be missing. The critical
- * thing is to just crete the required root znode.
+ * Create root znode. Create minimal znode/inode/zsb/sb
+ * to allow zfs_mknode to work.
*/
- obj = zap_create_norm(os, norm, DMU_OT_DIRECTORY_CONTENTS,
- DMU_OT_ZNODE, sizeof (znode_phys_t), tx);
+ vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
+ vattr.va_mode = S_IFDIR|0755;
+ vattr.va_uid = crgetuid(cr);
+ vattr.va_gid = crgetgid(cr);
+
+ rootzp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
+ rootzp->z_moved = 0;
+ rootzp->z_unlinked = 0;
+ rootzp->z_atime_dirty = 0;
+ rootzp->z_is_sa = USE_SA(version, os);
+
+ zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_PUSHPAGE | KM_NODEBUG);
+ zsb->z_os = os;
+ zsb->z_parent = zsb;
+ zsb->z_version = version;
+ zsb->z_use_fuids = USE_FUIDS(version, os);
+ zsb->z_use_sa = USE_SA(version, os);
+ zsb->z_norm = norm;
+
+ sb = kmem_zalloc(sizeof (struct super_block), KM_PUSHPAGE);
+ sb->s_fs_info = zsb;
- VERIFY(0 == dmu_bonus_hold(os, obj, FTAG, &db));
- dmu_buf_will_dirty(db, tx);
+ ZTOI(rootzp)->i_sb = sb;
+
+ error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
+ &zsb->z_attr_table);
+
+ ASSERT(error == 0);
/*
- * Initialize the znode physical data to zero.
+ * Fold case on file systems that are always or sometimes case
+ * insensitive.
*/
- ASSERT(db->db_size >= sizeof (znode_phys_t));
- bzero(db->db_data, db->db_size);
- pzp = db->db_data;
+ if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
+ zsb->z_norm |= U8_TEXTPREP_TOUPPER;
- if (USE_FUIDS(version, os))
- pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
+ mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&zsb->z_all_znodes, sizeof (znode_t),
+ offsetof(znode_t, z_link_node));
- pzp->zp_size = 2; /* "." and ".." */
- pzp->zp_links = 2;
- pzp->zp_parent = obj;
- pzp->zp_gen = dmu_tx_get_txg(tx);
- pzp->zp_mode = S_IFDIR | 0755;
- pzp->zp_flags = ZFS_ACL_TRIVIAL;
+ for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+ mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
- gethrestime(&now);
+ VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
+ cr, NULL, &acl_ids));
+ zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
+ ASSERT3P(zp, ==, rootzp);
+ error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
+ ASSERT(error == 0);
+ zfs_acl_ids_free(&acl_ids);
- ZFS_TIME_ENCODE(&now, pzp->zp_crtime);
- ZFS_TIME_ENCODE(&now, pzp->zp_ctime);
- ZFS_TIME_ENCODE(&now, pzp->zp_atime);
- ZFS_TIME_ENCODE(&now, pzp->zp_mtime);
+ atomic_set(&ZTOI(rootzp)->i_count, 0);
+ sa_handle_destroy(rootzp->z_sa_hdl);
+ kmem_cache_free(znode_cache, rootzp);
- error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &obj, tx);
+ /*
+ * Create shares directory
+ */
+ error = zfs_create_share_dir(zsb, tx);
ASSERT(error == 0);
- dmu_buf_rele(db, FTAG);
-}
+ for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
+ mutex_destroy(&zsb->z_hold_mtx[i]);
+ kmem_free(sb, sizeof (struct super_block));
+ kmem_free(zsb, sizeof (zfs_sb_t));
+}
#endif /* _KERNEL */
static int
static int
zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
- dmu_buf_t **db)
+ dmu_buf_t **db, void *tag)
{
dmu_object_info_t doi;
int error;
- if ((error = sa_buf_hold(osp, obj, FTAG, db)) != 0)
+ if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
return (error);
dmu_object_info_from_db(*db, &doi);
doi.doi_bonus_type != DMU_OT_ZNODE) ||
(doi.doi_bonus_type == DMU_OT_ZNODE &&
doi.doi_bonus_size < sizeof (znode_phys_t))) {
- sa_buf_rele(*db, FTAG);
- return (ENOTSUP);
+ sa_buf_rele(*db, tag);
+ return (SET_ERROR(ENOTSUP));
}
error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
if (error != 0) {
- sa_buf_rele(*db, FTAG);
+ sa_buf_rele(*db, tag);
return (error);
}
}
void
-zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db)
+zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
{
sa_handle_destroy(hdl);
- sa_buf_rele(db, FTAG);
+ sa_buf_rele(db, tag);
}
/*
sa_hdl = hdl;
for (;;) {
- uint64_t pobj;
+ uint64_t pobj = 0;
char component[MAXNAMELEN + 2];
size_t complen;
- int is_xattrdir;
+ int is_xattrdir = 0;
if (prevdb)
- zfs_release_sa_handle(prevhdl, prevdb);
+ zfs_release_sa_handle(prevhdl, prevdb, FTAG);
if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
&is_xattrdir)) != 0)
prevhdl = sa_hdl;
prevdb = sa_db;
}
- error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db);
+ error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
if (error != 0) {
sa_hdl = prevhdl;
sa_db = prevdb;
if (sa_hdl != NULL && sa_hdl != hdl) {
ASSERT(sa_db != NULL);
- zfs_release_sa_handle(sa_hdl, sa_db);
+ zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
}
if (error == 0)
if (error != 0)
return (error);
- error = zfs_grab_sa_handle(osp, obj, &hdl, &db);
+ error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
if (error != 0)
return (error);
error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
- zfs_release_sa_handle(hdl, db);
+ zfs_release_sa_handle(hdl, db, FTAG);
return (error);
}
if (error != 0)
return (error);
- error = zfs_grab_sa_handle(osp, obj, &hdl, &db);
+ error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
if (error != 0)
return (error);
error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
if (error != 0) {
- zfs_release_sa_handle(hdl, db);
+ zfs_release_sa_handle(hdl, db, FTAG);
return (error);
}
error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
- zfs_release_sa_handle(hdl, db);
+ zfs_release_sa_handle(hdl, db, FTAG);
return (error);
}