*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
return (0);
unregister:
- /*
- * We may attempt to unregister some callbacks that are not
- * registered, but this is OK; it will simply return ENOMSG,
- * which we will ignore.
- */
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
- atime_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RELATIME),
- relatime_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
- xattr_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
- blksz_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
- readonly_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
- devices_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
- setuid_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
- exec_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
- snapdir_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLTYPE),
- acltype_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
- acl_inherit_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
- vscan_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_NBMAND),
- nbmand_changed_cb, zsb);
-
+ dsl_prop_unregister_all(ds, zsb);
return (error);
}
EXPORT_SYMBOL(zfs_register_callbacks);
{
switch (type) {
case ZFS_PROP_USERUSED:
+ case ZFS_PROP_USEROBJUSED:
return (DMU_USERUSED_OBJECT);
case ZFS_PROP_GROUPUSED:
+ case ZFS_PROP_GROUPOBJUSED:
return (DMU_GROUPUSED_OBJECT);
case ZFS_PROP_USERQUOTA:
return (zsb->z_userquota_obj);
case ZFS_PROP_GROUPQUOTA:
return (zsb->z_groupquota_obj);
+ case ZFS_PROP_USEROBJQUOTA:
+ return (zsb->z_userobjquota_obj);
+ case ZFS_PROP_GROUPOBJQUOTA:
+ return (zsb->z_groupobjquota_obj);
default:
- return (SET_ERROR(ENOTSUP));
+ return (ZFS_NO_OBJECT);
}
- return (0);
}
int
zap_attribute_t za;
zfs_useracct_t *buf = vbuf;
uint64_t obj;
+ int offset = 0;
if (!dmu_objset_userspace_present(zsb->z_os))
return (SET_ERROR(ENOTSUP));
+ if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
+ type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA) &&
+ !dmu_objset_userobjspace_present(zsb->z_os))
+ return (SET_ERROR(ENOTSUP));
+
obj = zfs_userquota_prop_to_obj(zsb, type);
- if (obj == 0) {
+ if (obj == ZFS_NO_OBJECT) {
*bufsizep = 0;
return (0);
}
+ if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED)
+ offset = DMU_OBJACCT_PREFIX_LEN;
+
for (zap_cursor_init_serialized(&zc, zsb->z_os, obj, *cookiep);
(error = zap_cursor_retrieve(&zc, &za)) == 0;
zap_cursor_advance(&zc)) {
*bufsizep)
break;
- fuidstr_to_sid(zsb, za.za_name,
+ /*
+ * skip object quota (with zap name prefix DMU_OBJACCT_PREFIX)
+ * when dealing with block quota and vice versa.
+ */
+ if ((offset > 0) != (strncmp(za.za_name, DMU_OBJACCT_PREFIX,
+ DMU_OBJACCT_PREFIX_LEN) == 0))
+ continue;
+
+ fuidstr_to_sid(zsb, za.za_name + offset,
buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
buf->zu_space = za.za_first_integer;
zfs_userspace_one(zfs_sb_t *zsb, zfs_userquota_prop_t type,
const char *domain, uint64_t rid, uint64_t *valp)
{
- char buf[32];
+ char buf[20 + DMU_OBJACCT_PREFIX_LEN];
+ int offset = 0;
int err;
uint64_t obj;
if (!dmu_objset_userspace_present(zsb->z_os))
return (SET_ERROR(ENOTSUP));
+ if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
+ type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA) &&
+ !dmu_objset_userobjspace_present(zsb->z_os))
+ return (SET_ERROR(ENOTSUP));
+
obj = zfs_userquota_prop_to_obj(zsb, type);
- if (obj == 0)
+ if (obj == ZFS_NO_OBJECT)
return (0);
- err = id_to_fuidstr(zsb, domain, rid, buf, B_FALSE);
+ if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED) {
+ strncpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN);
+ offset = DMU_OBJACCT_PREFIX_LEN;
+ }
+
+ err = id_to_fuidstr(zsb, domain, rid, buf + offset, B_FALSE);
if (err)
return (err);
uint64_t *objp;
boolean_t fuid_dirtied;
- if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
- return (SET_ERROR(EINVAL));
-
if (zsb->z_version < ZPL_VERSION_USERSPACE)
return (SET_ERROR(ENOTSUP));
- objp = (type == ZFS_PROP_USERQUOTA) ? &zsb->z_userquota_obj :
- &zsb->z_groupquota_obj;
+ switch (type) {
+ case ZFS_PROP_USERQUOTA:
+ objp = &zsb->z_userquota_obj;
+ break;
+ case ZFS_PROP_GROUPQUOTA:
+ objp = &zsb->z_groupquota_obj;
+ break;
+ case ZFS_PROP_USEROBJQUOTA:
+ objp = &zsb->z_userobjquota_obj;
+ break;
+ case ZFS_PROP_GROUPOBJQUOTA:
+ objp = &zsb->z_groupobjquota_obj;
+ break;
+ default:
+ return (SET_ERROR(EINVAL));
+ }
err = id_to_fuidstr(zsb, domain, rid, buf, B_TRUE);
if (err)
}
EXPORT_SYMBOL(zfs_set_userquota);
+boolean_t
+zfs_fuid_overobjquota(zfs_sb_t *zsb, boolean_t isgroup, uint64_t fuid)
+{
+ char buf[20 + DMU_OBJACCT_PREFIX_LEN];
+ uint64_t used, quota, usedobj, quotaobj;
+ int err;
+
+ if (!dmu_objset_userobjspace_present(zsb->z_os)) {
+ if (dmu_objset_userobjspace_upgradable(zsb->z_os))
+ dmu_objset_userobjspace_upgrade(zsb->z_os);
+ return (B_FALSE);
+ }
+
+ usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
+ quotaobj = isgroup ? zsb->z_groupobjquota_obj : zsb->z_userobjquota_obj;
+ if (quotaobj == 0 || zsb->z_replay)
+ return (B_FALSE);
+
+ (void) sprintf(buf, "%llx", (longlong_t)fuid);
+ err = zap_lookup(zsb->z_os, quotaobj, buf, 8, 1, "a);
+ if (err != 0)
+ return (B_FALSE);
+
+ (void) sprintf(buf, DMU_OBJACCT_PREFIX "%llx", (longlong_t)fuid);
+ err = zap_lookup(zsb->z_os, usedobj, buf, 8, 1, &used);
+ if (err != 0)
+ return (B_FALSE);
+ return (used >= quota);
+}
+
boolean_t
zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup, uint64_t fuid)
{
- char buf[32];
+ char buf[20];
uint64_t used, quota, usedobj, quotaobj;
int err;
{
uint64_t fuid;
uint64_t quotaobj;
+ struct inode *ip = ZTOI(zp);
quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
- fuid = isgroup ? zp->z_gid : zp->z_uid;
+ fuid = isgroup ? KGID_TO_SGID(ip->i_gid) : KUID_TO_SUID(ip->i_uid);
if (quotaobj == 0 || zsb->z_replay)
return (B_FALSE);
objset_t *os;
zfs_sb_t *zsb;
uint64_t zval;
- int i, error;
+ int i, size, error;
uint64_t sa_obj;
zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
/*
* Initialize the zfs-specific filesystem structure.
- * Should probably make this a kmem cache, shuffle fields,
- * and just bzero up to z_hold_mtx[].
+ * Should probably make this a kmem cache, shuffle fields.
*/
zsb->z_sb = NULL;
zsb->z_parent = zsb;
if (error && error != ENOENT)
goto out;
+ error = zap_lookup(os, MASTER_NODE_OBJ,
+ zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
+ 8, 1, &zsb->z_userobjquota_obj);
+ if (error && error != ENOENT)
+ goto out;
+
+ error = zap_lookup(os, MASTER_NODE_OBJ,
+ zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
+ 8, 1, &zsb->z_groupobjquota_obj);
+ if (error && error != ENOENT)
+ goto out;
+
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
&zsb->z_fuid_obj);
if (error && error != ENOENT)
rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
- zsb->z_hold_mtx = vmem_zalloc(sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ,
- KM_SLEEP);
- for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
- mutex_init(&zsb->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
+ size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
+ zsb->z_hold_size = size;
+ zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP);
+ zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+ for (i = 0; i != size; i++) {
+ avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare,
+ sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
+ mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+ }
*zsbp = zsb;
return (0);
dmu_objset_disown(os, zsb);
*zsbp = NULL;
- vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ);
kmem_free(zsb, sizeof (zfs_sb_t));
return (error);
}
void
zfs_sb_free(zfs_sb_t *zsb)
{
- int i;
+ int i, size = zsb->z_hold_size;
zfs_fuid_destroy(zsb);
rrm_destroy(&zsb->z_teardown_lock);
rw_destroy(&zsb->z_teardown_inactive_lock);
rw_destroy(&zsb->z_fuid_lock);
- for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
- mutex_destroy(&zsb->z_hold_mtx[i]);
- vmem_free(zsb->z_hold_mtx, sizeof (kmutex_t) * ZFS_OBJ_MTX_SZ);
+ for (i = 0; i != size; i++) {
+ avl_destroy(&zsb->z_hold_trees[i]);
+ mutex_destroy(&zsb->z_hold_locks[i]);
+ }
+ vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size);
+ vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size);
zfs_mntopts_free(zsb->z_mntopts);
kmem_free(zsb, sizeof (zfs_sb_t));
}
zfs_unregister_callbacks(zfs_sb_t *zsb)
{
objset_t *os = zsb->z_os;
- struct dsl_dataset *ds;
-
- /*
- * Unregister properties.
- */
- if (!dmu_objset_is_snapshot(os)) {
- ds = dmu_objset_ds(os);
- VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "relatime", relatime_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
- zsb) == 0);
- VERIFY(dsl_prop_unregister(ds, "acltype", acltype_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "aclinherit",
- acl_inherit_changed_cb, zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "vscan",
- vscan_changed_cb, zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "nbmand",
- nbmand_changed_cb, zsb) == 0);
- }
+ if (!dmu_objset_is_snapshot(os))
+ dsl_prop_unregister_all(dmu_objset_ds(os), zsb);
}
EXPORT_SYMBOL(zfs_unregister_callbacks);
statp->f_fsid.val[0] = (uint32_t)fsid;
statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
statp->f_type = ZFS_SUPER_MAGIC;
- statp->f_namelen = ZFS_MAXNAMELEN;
+ statp->f_namelen = MAXNAMELEN - 1;
/*
* We have all of 40 characters to stuff a string here.
}
EXPORT_SYMBOL(zfs_root);
-#if !defined(HAVE_SPLIT_SHRINKER_CALLBACK) && !defined(HAVE_SHRINK) && \
- defined(HAVE_D_PRUNE_ALIASES)
+#ifdef HAVE_D_PRUNE_ALIASES
/*
* Linux kernels older than 3.1 do not support a per-filesystem shrinker.
* To accommodate this we must improvise and manually walk the list of znodes
} else {
*objects = (*shrinker->scan_objects)(shrinker, &sc);
}
+
#elif defined(HAVE_SPLIT_SHRINKER_CALLBACK)
*objects = (*shrinker->scan_objects)(shrinker, &sc);
#elif defined(HAVE_SHRINK)
*objects = (*shrinker->shrink)(shrinker, &sc);
#elif defined(HAVE_D_PRUNE_ALIASES)
+#define D_PRUNE_ALIASES_IS_DEFAULT
*objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
#else
#error "No available dentry and inode cache pruning mechanism."
#endif
+
+#if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT)
+#undef D_PRUNE_ALIASES_IS_DEFAULT
+ /*
+ * Fall back to zfs_sb_prune_aliases if the kernel's per-superblock
+ * shrinker couldn't free anything, possibly due to the inodes being
+ * allocated in a different memcg.
+ */
+ if (*objects == 0)
+ *objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
+#endif
+
ZFS_EXIT(zsb);
dprintf_ds(zsb->z_os->os_dsl_dataset,
dmu_objset_set_user(zsb->z_os, zsb);
mutex_exit(&zsb->z_os->os_user_ptr_lock);
} else {
- error = zfs_sb_setup(zsb, B_TRUE);
+ if ((error = zfs_sb_setup(zsb, B_TRUE)))
+ goto out;
}
/* Allocate a root inode for the filesystem. */
if (error) {
dmu_objset_disown(zsb->z_os, zsb);
zfs_sb_free(zsb);
+ /*
+ * make sure we don't have dangling sb->s_fs_info which
+ * zfs_preumount will use.
+ */
+ sb->s_fs_info = NULL;
}
return (error);
{
zfs_sb_t *zsb = sb->s_fs_info;
- if (zsb)
+ /* zsb is NULL when zfs_domount fails during mount */
+ if (zsb) {
zfsctl_destroy(sb->s_fs_info);
+ /*
+ * Wait for iput_async before entering evict_inodes in
+ * generic_shutdown_super. The reason we must finish before
+ * evict_inodes is when lazytime is on, or when zfs_purgedir
+ * calls zfs_zget, iput would bump i_count from 0 to 1. This
+ * would race with the i_count check in evict_inodes. This means
+ * it could destroy the inode while we are still using it.
+ *
+ * We wait for two passes. xattr directories in the first pass
+ * may add xattr entries in zfs_purgedir, so in the second pass
+ * we wait for them. We don't use taskq_wait here because it is
+ * a pool wide taskq. Other mounted filesystems can constantly
+ * do iput_async and there's no guarantee when taskq will be
+ * empty.
+ */
+ taskq_wait_outstanding(dsl_pool_iput_taskq(
+ dmu_objset_pool(zsb->z_os)), 0);
+ taskq_wait_outstanding(dsl_pool_iput_taskq(
+ dmu_objset_pool(zsb->z_os)), 0);
+ }
}
EXPORT_SYMBOL(zfs_preumount);
ZFS_EXIT(zsb);
return (err);
}
+
+ /* Don't export xattr stuff */
+ if (zp->z_pflags & ZFS_XATTR) {
+ iput(ZTOI(zp));
+ ZFS_EXIT(zsb);
+ return (SET_ERROR(ENOENT));
+ }
+
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &zp_gen,
sizeof (uint64_t));
zp_gen = zp_gen & gen_mask;
if (zp_gen == 0)
zp_gen = 1;
+ if ((fid_gen == 0) && (zsb->z_root == object))
+ fid_gen = zp_gen;
if (zp->z_unlinked || zp_gen != fid_gen) {
dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
fid_gen);
iput(ZTOI(zp));
ZFS_EXIT(zsb);
- return (SET_ERROR(EINVAL));
+ return (SET_ERROR(ENOENT));
}
*ipp = ZTOI(zp);