]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/dmu_objset.c
Pool allocation classes
[mirror_zfs.git] / module / zfs / dmu_objset.c
index 609e43fe84bcd9a9d8cac502fcfb2622c39059bd..3c9a817f7bec6398e28c1d5a97bbf84c91d532d5 100644 (file)
@@ -28,6 +28,7 @@
  * Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
  */
 
 /* Portions Copyright 2010 Robert Milkowski */
 #include <sys/zfs_onexit.h>
 #include <sys/dsl_destroy.h>
 #include <sys/vdev.h>
+#include <sys/zfeature.h>
 #include <sys/policy.h>
 #include <sys/spa_impl.h>
 #include <sys/dmu_send.h>
+#include <sys/zfs_project.h>
+#include "zfs_namecheck.h"
 
 /*
  * Needed to close a window in dnode_move() that allows the objset to be freed
@@ -79,6 +83,8 @@ int dmu_find_threads = 0;
  */
 int dmu_rescan_dnode_threshold = 1 << DN_MAX_INDBLKSHIFT;
 
+static char *upgrade_tag = "upgrade_tag";
+
 static void dmu_objset_find_dp_cb(void *arg);
 
 static void dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb);
@@ -308,6 +314,20 @@ dnodesize_changed_cb(void *arg, uint64_t newval)
        }
 }
 
+static void
+smallblk_changed_cb(void *arg, uint64_t newval)
+{
+       objset_t *os = arg;
+
+       /*
+        * Inheritance and range checking should have been done by now.
+        */
+       ASSERT(newval <= SPA_OLD_MAXBLOCKSIZE);
+       ASSERT(ISP2(newval));
+
+       os->os_zpl_special_smallblock = newval;
+}
+
 static void
 logbias_changed_cb(void *arg, uint64_t newval)
 {
@@ -333,14 +353,17 @@ dmu_objset_byteswap(void *buf, size_t size)
 {
        objset_phys_t *osp = buf;
 
-       ASSERT(size == OBJSET_OLD_PHYS_SIZE || size == sizeof (objset_phys_t));
+       ASSERT(size == OBJSET_PHYS_SIZE_V1 || size == OBJSET_PHYS_SIZE_V2 ||
+           size == sizeof (objset_phys_t));
        dnode_byteswap(&osp->os_meta_dnode);
        byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
        osp->os_type = BSWAP_64(osp->os_type);
        osp->os_flags = BSWAP_64(osp->os_flags);
-       if (size == sizeof (objset_phys_t)) {
+       if (size >= OBJSET_PHYS_SIZE_V2) {
                dnode_byteswap(&osp->os_userused_dnode);
                dnode_byteswap(&osp->os_groupused_dnode);
+               if (size >= sizeof (objset_phys_t))
+                       dnode_byteswap(&osp->os_projectused_dnode);
        }
 }
 
@@ -376,6 +399,10 @@ dnode_multilist_index_func(multilist_t *ml, void *obj)
            multilist_get_num_sublists(ml));
 }
 
+/*
+ * Instantiates the objset_t in-memory structure corresponding to the
+ * objset_phys_t that's pointed to by the specified blkptr_t.
+ */
 int
 dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
     objset_t **osp)
@@ -385,6 +412,17 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 
        ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
 
+       /*
+        * The $ORIGIN dataset (if it exists) doesn't have an associated
+        * objset, so there's no reason to open it. The $ORIGIN dataset
+        * will not exist on pools older than SPA_VERSION_ORIGIN.
+        */
+       if (ds != NULL && spa_get_dsl(spa) != NULL &&
+           spa_get_dsl(spa)->dp_origin_snap != NULL) {
+               ASSERT3P(ds->ds_dir, !=,
+                   spa_get_dsl(spa)->dp_origin_snap->ds_dir);
+       }
+
        os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
        os->os_dsl_dataset = ds;
        os->os_spa = spa;
@@ -392,6 +430,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
        if (!BP_IS_HOLE(os->os_rootbp)) {
                arc_flags_t aflags = ARC_FLAG_WAIT;
                zbookmark_phys_t zb;
+               int size;
                enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
                SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
                    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
@@ -417,12 +456,19 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
                        return (err);
                }
 
+               if (spa_version(spa) < SPA_VERSION_USERSPACE)
+                       size = OBJSET_PHYS_SIZE_V1;
+               else if (!spa_feature_is_enabled(spa,
+                   SPA_FEATURE_PROJECT_QUOTA))
+                       size = OBJSET_PHYS_SIZE_V2;
+               else
+                       size = sizeof (objset_phys_t);
+
                /* Increase the blocksize if we are permitted. */
-               if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
-                   arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
+               if (arc_buf_size(os->os_phys_buf) < size) {
                        arc_buf_t *buf = arc_alloc_buf(spa, &os->os_phys_buf,
-                           ARC_BUFC_METADATA, sizeof (objset_phys_t));
-                       bzero(buf->b_data, sizeof (objset_phys_t));
+                           ARC_BUFC_METADATA, size);
+                       bzero(buf->b_data, size);
                        bcopy(os->os_phys_buf->b_data, buf->b_data,
                            arc_buf_size(os->os_phys_buf));
                        arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
@@ -433,12 +479,20 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
                os->os_flags = os->os_phys->os_flags;
        } else {
                int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
-                   sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
+                   sizeof (objset_phys_t) : OBJSET_PHYS_SIZE_V1;
                os->os_phys_buf = arc_alloc_buf(spa, &os->os_phys_buf,
                    ARC_BUFC_METADATA, size);
                os->os_phys = os->os_phys_buf->b_data;
                bzero(os->os_phys, size);
        }
+       /*
+        * These properties will be filled in by the logic in zfs_get_zplprop()
+        * when they are queried for the first time.
+        */
+       os->os_version = OBJSET_PROP_UNINITIALIZED;
+       os->os_normalization = OBJSET_PROP_UNINITIALIZED;
+       os->os_utf8only = OBJSET_PROP_UNINITIALIZED;
+       os->os_casesensitivity = OBJSET_PROP_UNINITIALIZED;
 
        /*
         * Note: the changed_cb will be called once before the register
@@ -516,6 +570,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
                                    zfs_prop_to_name(ZFS_PROP_DNODESIZE),
                                    dnodesize_changed_cb, os);
                        }
+                       if (err == 0) {
+                               err = dsl_prop_register(ds,
+                                   zfs_prop_to_name(
+                                   ZFS_PROP_SPECIAL_SMALL_BLOCKS),
+                                   smallblk_changed_cb, os);
+                       }
                }
                if (needlock)
                        dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
@@ -565,11 +625,15 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
 
        dnode_special_open(os, &os->os_phys->os_meta_dnode,
            DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
-       if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
+       if (OBJSET_BUF_HAS_USERUSED(os->os_phys_buf)) {
                dnode_special_open(os, &os->os_phys->os_userused_dnode,
                    DMU_USERUSED_OBJECT, &os->os_userused_dnode);
                dnode_special_open(os, &os->os_phys->os_groupused_dnode,
                    DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
+               if (OBJSET_BUF_HAS_PROJECTUSED(os->os_phys_buf))
+                       dnode_special_open(os,
+                           &os->os_phys->os_projectused_dnode,
+                           DMU_PROJECTUSED_OBJECT, &os->os_projectused_dnode);
        }
 
        mutex_init(&os->os_upgrade_lock, NULL, MUTEX_DEFAULT, NULL);
@@ -660,12 +724,19 @@ dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
                return (SET_ERROR(EINVAL));
        } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
                return (SET_ERROR(EROFS));
+       } else if (!readonly && decrypt &&
+           dsl_dir_incompatible_encryption_version(ds->ds_dir)) {
+               return (SET_ERROR(EROFS));
        }
 
        /* if we are decrypting, we can now check MACs in os->os_phys_buf */
        if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) {
+               zbookmark_phys_t zb;
+
+               SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
+                   ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
                err = arc_untransform((*osp)->os_phys_buf, (*osp)->os_spa,
-                   ds->ds_object, B_FALSE);
+                   &zb, B_FALSE);
                if (err != 0)
                        return (err);
 
@@ -704,13 +775,19 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
                return (err);
        }
 
-       dsl_pool_rele(dp, FTAG);
-
-       /* user accounting requires the dataset to be decrypted */
-       if (dmu_objset_userobjspace_upgradable(*osp) &&
+       /*
+        * User accounting requires the dataset to be decrypted and rw.
+        * We also don't begin user accounting during claiming to help
+        * speed up pool import times and to keep this txg reserved
+        * completely for recovery work.
+        */
+       if ((dmu_objset_userobjspace_upgradable(*osp) ||
+           dmu_objset_projectquota_upgradable(*osp)) &&
+           !readonly && !dp->dp_spa->spa_claiming &&
            (ds->ds_dir->dd_crypto_obj == 0 || decrypt))
-               dmu_objset_userobjspace_upgrade(*osp);
+               dmu_objset_id_quota_upgrade(*osp);
 
+       dsl_pool_rele(dp, FTAG);
        return (0);
 }
 
@@ -763,24 +840,22 @@ dmu_objset_rele(objset_t *os, void *tag)
  * same name so that it can be partially torn down and reconstructed.
  */
 void
-dmu_objset_refresh_ownership(objset_t *os, boolean_t decrypt, void *tag)
+dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds,
+    boolean_t decrypt, void *tag)
 {
        dsl_pool_t *dp;
-       dsl_dataset_t *ds, *newds;
        char name[ZFS_MAX_DATASET_NAME_LEN];
 
-       ds = os->os_dsl_dataset;
        VERIFY3P(ds, !=, NULL);
        VERIFY3P(ds->ds_owner, ==, tag);
        VERIFY(dsl_dataset_long_held(ds));
 
        dsl_dataset_name(ds, name);
-       dp = dmu_objset_pool(os);
+       dp = ds->ds_dir->dd_pool;
        dsl_pool_config_enter(dp, FTAG);
-       dmu_objset_disown(os, decrypt, tag);
+       dsl_dataset_disown(ds, decrypt, tag);
        VERIFY0(dsl_dataset_own(dp, name,
-           (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, &newds));
-       VERIFY3P(newds, ==, os->os_dsl_dataset);
+           (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, newds));
        dsl_pool_config_exit(dp, FTAG);
 }
 
@@ -830,6 +905,8 @@ dmu_objset_evict_dbufs(objset_t *os)
        kmem_free(dn_marker, sizeof (dnode_t));
 
        if (DMU_USERUSED_DNODE(os) != NULL) {
+               if (DMU_PROJECTUSED_DNODE(os) != NULL)
+                       dnode_evict_dbufs(DMU_PROJECTUSED_DNODE(os));
                dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
                dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
        }
@@ -852,11 +929,9 @@ dmu_objset_evict_dbufs(objset_t *os)
 void
 dmu_objset_evict(objset_t *os)
 {
-       int t;
-
        dsl_dataset_t *ds = os->os_dsl_dataset;
 
-       for (t = 0; t < TXG_SIZE; t++)
+       for (int t = 0; t < TXG_SIZE; t++)
                ASSERT(!dmu_objset_is_dirty(os, t));
 
        if (ds)
@@ -886,6 +961,8 @@ dmu_objset_evict_done(objset_t *os)
 
        dnode_special_close(&os->os_meta_dnode);
        if (DMU_USERUSED_DNODE(os)) {
+               if (DMU_PROJECTUSED_DNODE(os))
+                       dnode_special_close(&os->os_projectused_dnode);
                dnode_special_close(&os->os_userused_dnode);
                dnode_special_close(&os->os_groupused_dnode);
        }
@@ -917,7 +994,7 @@ dmu_objset_evict_done(objset_t *os)
        kmem_free(os, sizeof (objset_t));
 }
 
-timestruc_t
+inode_timespec_t
 dmu_objset_snap_cmtime(objset_t *os)
 {
        return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
@@ -1001,6 +1078,12 @@ dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
                        os->os_phys->os_flags |=
                            OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
                }
+               if (dmu_objset_projectquota_enabled(os)) {
+                       ds->ds_feature_activation_needed[
+                           SPA_FEATURE_PROJECT_QUOTA] = B_TRUE;
+                       os->os_phys->os_flags |=
+                           OBJSET_FLAG_PROJECTQUOTA_COMPLETE;
+               }
                os->os_flags = os->os_phys->os_flags;
        }
 
@@ -1044,6 +1127,9 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
        if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN)
                return (SET_ERROR(ENAMETOOLONG));
 
+       if (dataset_nestcheck(doca->doca_name) != 0)
+               return (SET_ERROR(ENAMETOOLONG));
+
        error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
        if (error != 0)
                return (error);
@@ -1052,7 +1138,7 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
                return (SET_ERROR(EEXIST));
        }
 
-       error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp);
+       error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp, NULL);
        if (error != 0) {
                dsl_dir_rele(pdd, FTAG);
                return (error);
@@ -1169,7 +1255,7 @@ dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
         * allocated. Rather than adding NULL checks throughout this code
         * or adding dummy dcp's to all of the callers we simply create a
         * dummy one here and use that. This zero dcp will have the same
-        * effect as asking for inheritence of all encryption params.
+        * effect as asking for inheritance of all encryption params.
         */
        doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp;
 
@@ -1283,6 +1369,101 @@ dmu_objset_clone(const char *clone, const char *origin)
            6, ZFS_SPACE_CHECK_NORMAL));
 }
 
+static int
+dmu_objset_remap_indirects_impl(objset_t *os, uint64_t last_removed_txg)
+{
+       int error = 0;
+       uint64_t object = 0;
+       while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
+               error = dmu_object_remap_indirects(os, object,
+                   last_removed_txg);
+               /*
+                * If the ZPL removed the object before we managed to dnode_hold
+                * it, we would get an ENOENT. If the ZPL declares its intent
+                * to remove the object (dnode_free) before we manage to
+                * dnode_hold it, we would get an EEXIST. In either case, we
+                * want to continue remapping the other objects in the objset;
+                * in all other cases, we want to break early.
+                */
+               if (error != 0 && error != ENOENT && error != EEXIST) {
+                       break;
+               }
+       }
+       if (error == ESRCH) {
+               error = 0;
+       }
+       return (error);
+}
+
+int
+dmu_objset_remap_indirects(const char *fsname)
+{
+       int error = 0;
+       objset_t *os = NULL;
+       uint64_t last_removed_txg;
+       uint64_t remap_start_txg;
+       dsl_dir_t *dd;
+
+       error = dmu_objset_hold(fsname, FTAG, &os);
+       if (error != 0) {
+               return (error);
+       }
+       dd = dmu_objset_ds(os)->ds_dir;
+
+       if (!spa_feature_is_enabled(dmu_objset_spa(os),
+           SPA_FEATURE_OBSOLETE_COUNTS)) {
+               dmu_objset_rele(os, FTAG);
+               return (SET_ERROR(ENOTSUP));
+       }
+
+       if (dsl_dataset_is_snapshot(dmu_objset_ds(os))) {
+               dmu_objset_rele(os, FTAG);
+               return (SET_ERROR(EINVAL));
+       }
+
+       /*
+        * If there has not been a removal, we're done.
+        */
+       last_removed_txg = spa_get_last_removal_txg(dmu_objset_spa(os));
+       if (last_removed_txg == -1ULL) {
+               dmu_objset_rele(os, FTAG);
+               return (0);
+       }
+
+       /*
+        * If we have remapped since the last removal, we're done.
+        */
+       if (dsl_dir_is_zapified(dd)) {
+               uint64_t last_remap_txg;
+               if (zap_lookup(spa_meta_objset(dmu_objset_spa(os)),
+                   dd->dd_object, DD_FIELD_LAST_REMAP_TXG,
+                   sizeof (last_remap_txg), 1, &last_remap_txg) == 0 &&
+                   last_remap_txg > last_removed_txg) {
+                       dmu_objset_rele(os, FTAG);
+                       return (0);
+               }
+       }
+
+       dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
+       dsl_pool_rele(dmu_objset_pool(os), FTAG);
+
+       remap_start_txg = spa_last_synced_txg(dmu_objset_spa(os));
+       error = dmu_objset_remap_indirects_impl(os, last_removed_txg);
+       if (error == 0) {
+               /*
+                * We update the last_remap_txg to be the start txg so that
+                * we can guarantee that every block older than last_remap_txg
+                * that can be remapped has been remapped.
+                */
+               error = dsl_dir_update_last_remap_txg(dd, remap_start_txg);
+       }
+
+       dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
+       dsl_dataset_rele(dmu_objset_ds(os), FTAG);
+
+       return (error);
+}
+
 int
 dmu_objset_snapshot_one(const char *fsname, const char *snapname)
 {
@@ -1313,6 +1494,7 @@ dmu_objset_upgrade_task_cb(void *data)
        os->os_upgrade_exit = B_TRUE;
        os->os_upgrade_id = 0;
        mutex_exit(&os->os_upgrade_lock);
+       dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
 }
 
 static void
@@ -1321,6 +1503,9 @@ dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
        if (os->os_upgrade_id != 0)
                return;
 
+       ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
+       dsl_dataset_long_hold(dmu_objset_ds(os), upgrade_tag);
+
        mutex_enter(&os->os_upgrade_lock);
        if (os->os_upgrade_id == 0 && os->os_upgrade_status == 0) {
                os->os_upgrade_exit = B_FALSE;
@@ -1328,8 +1513,10 @@ dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
                os->os_upgrade_id = taskq_dispatch(
                    os->os_spa->spa_upgrade_taskq,
                    dmu_objset_upgrade_task_cb, os, TQ_SLEEP);
-               if (os->os_upgrade_id == TASKQID_INVALID)
+               if (os->os_upgrade_id == TASKQID_INVALID) {
+                       dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
                        os->os_upgrade_status = ENOMEM;
+               }
        }
        mutex_exit(&os->os_upgrade_lock);
 }
@@ -1345,7 +1532,9 @@ dmu_objset_upgrade_stop(objset_t *os)
                os->os_upgrade_id = 0;
                mutex_exit(&os->os_upgrade_lock);
 
-               taskq_cancel_id(os->os_spa->spa_upgrade_taskq, id);
+               if ((taskq_cancel_id(os->os_spa->spa_upgrade_taskq, id)) == 0) {
+                       dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
+               }
                txg_wait_synced(os->os_spa->spa_dsl_pool, 0);
        } else {
                mutex_exit(&os->os_upgrade_lock);
@@ -1370,10 +1559,23 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
                ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
                multilist_sublist_remove(list, dn);
 
+               /*
+                * If we are not doing useraccounting (os_synced_dnodes == NULL)
+                * we are done with this dnode for this txg. Unset dn_dirty_txg
+                * if later txgs aren't dirtying it so that future holders do
+                * not get a stale value. Otherwise, we will do this in
+                * userquota_updates_task() when processing has completely
+                * finished for this txg.
+                */
                multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
                if (newlist != NULL) {
                        (void) dnode_add_ref(dn, newlist);
                        multilist_insert(newlist, dn);
+               } else {
+                       mutex_enter(&dn->dn_mtx);
+                       if (dn->dn_dirty_txg == tx->tx_txg)
+                               dn->dn_dirty_txg = 0;
+                       mutex_exit(&dn->dn_mtx);
                }
 
                dnode_sync(dn, tx);
@@ -1384,8 +1586,6 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
 static void
 dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
 {
-       int i;
-
        blkptr_t *bp = zio->io_bp;
        objset_t *os = arg;
        dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
@@ -1399,9 +1599,9 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
         * Update rootbp fill count: it should be the number of objects
         * allocated in the object set (not counting the "special"
         * objects that are stored in the objset_phys_t -- the meta
-        * dnode and user/group accounting objects).
+        * dnode and user/group/project accounting objects).
         */
-       for (i = 0; i < dnp->dn_nblkptr; i++)
+       for (int i = 0; i < dnp->dn_nblkptr; i++)
                fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
 
        BP_SET_FILL(bp, fill);
@@ -1495,13 +1695,14 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
        dmu_write_policy(os, NULL, 0, 0, &zp);
 
        /*
-        * If we are either claiming the ZIL or doing a raw receive write out
-        * the os_phys_buf raw. Neither of these actions will effect the MAC
-        * at this point.
+        * If we are either claiming the ZIL or doing a raw receive, write
+        * out the os_phys_buf raw. Neither of these actions will effect the
+        * MAC at this point.
         */
-       if (arc_is_unauthenticated(os->os_phys_buf) || os->os_next_write_raw) {
+       if (os->os_raw_receive ||
+           os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
                ASSERT(os->os_encrypted);
-               os->os_next_write_raw = B_FALSE;
+               os->os_next_write_raw[tx->tx_txg & TXG_MASK] = B_FALSE;
                arc_convert_to_raw(os->os_phys_buf,
                    os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
                    DMU_OT_OBJSET, NULL, NULL, NULL);
@@ -1528,6 +1729,12 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
                dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
        }
 
+       if (DMU_PROJECTUSED_DNODE(os) &&
+           DMU_PROJECTUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
+               DMU_PROJECTUSED_DNODE(os)->dn_zio = zio;
+               dnode_sync(DMU_PROJECTUSED_DNODE(os), tx);
+       }
+
        txgoff = tx->tx_txg & TXG_MASK;
 
        if (dmu_objset_userused_enabled(os) &&
@@ -1611,6 +1818,14 @@ dmu_objset_userobjused_enabled(objset_t *os)
            spa_feature_is_enabled(os->os_spa, SPA_FEATURE_USEROBJ_ACCOUNTING));
 }
 
+boolean_t
+dmu_objset_projectquota_enabled(objset_t *os)
+{
+       return (used_cbs[os->os_phys->os_type] != NULL &&
+           DMU_PROJECTUSED_DNODE(os) != NULL &&
+           spa_feature_is_enabled(os->os_spa, SPA_FEATURE_PROJECT_QUOTA));
+}
+
 typedef struct userquota_node {
        /* must be in the first filed, see userquota_update_cache() */
        char            uqn_id[20 + DMU_OBJACCT_PREFIX_LEN];
@@ -1621,6 +1836,7 @@ typedef struct userquota_node {
 typedef struct userquota_cache {
        avl_tree_t uqc_user_deltas;
        avl_tree_t uqc_group_deltas;
+       avl_tree_t uqc_project_deltas;
 } userquota_cache_t;
 
 static int
@@ -1673,6 +1889,19 @@ do_userquota_cacheflush(objset_t *os, userquota_cache_t *cache, dmu_tx_t *tx)
                kmem_free(uqn, sizeof (*uqn));
        }
        avl_destroy(&cache->uqc_group_deltas);
+
+       if (dmu_objset_projectquota_enabled(os)) {
+               cookie = NULL;
+               while ((uqn = avl_destroy_nodes(&cache->uqc_project_deltas,
+                   &cookie)) != NULL) {
+                       mutex_enter(&os->os_userused_lock);
+                       VERIFY0(zap_increment(os, DMU_PROJECTUSED_OBJECT,
+                           uqn->uqn_id, uqn->uqn_delta, tx));
+                       mutex_exit(&os->os_userused_lock);
+                       kmem_free(uqn, sizeof (*uqn));
+               }
+               avl_destroy(&cache->uqc_project_deltas);
+       }
 }
 
 static void
@@ -1697,10 +1926,11 @@ userquota_update_cache(avl_tree_t *avl, const char *id, int64_t delta)
 }
 
 static void
-do_userquota_update(userquota_cache_t *cache, uint64_t used, uint64_t flags,
-    uint64_t user, uint64_t group, boolean_t subtract)
+do_userquota_update(objset_t *os, userquota_cache_t *cache, uint64_t used,
+    uint64_t flags, uint64_t user, uint64_t group, uint64_t project,
+    boolean_t subtract)
 {
-       if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
+       if (flags & DNODE_FLAG_USERUSED_ACCOUNTED) {
                int64_t delta = DNODE_MIN_SIZE + used;
                char name[20];
 
@@ -1712,12 +1942,18 @@ do_userquota_update(userquota_cache_t *cache, uint64_t used, uint64_t flags,
 
                (void) sprintf(name, "%llx", (longlong_t)group);
                userquota_update_cache(&cache->uqc_group_deltas, name, delta);
+
+               if (dmu_objset_projectquota_enabled(os)) {
+                       (void) sprintf(name, "%llx", (longlong_t)project);
+                       userquota_update_cache(&cache->uqc_project_deltas,
+                           name, delta);
+               }
        }
 }
 
 static void
-do_userobjquota_update(userquota_cache_t *cache, uint64_t flags,
-    uint64_t user, uint64_t group, boolean_t subtract)
+do_userobjquota_update(objset_t *os, userquota_cache_t *cache, uint64_t flags,
+    uint64_t user, uint64_t group, uint64_t project, boolean_t subtract)
 {
        if (flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) {
                char name[20 + DMU_OBJACCT_PREFIX_LEN];
@@ -1730,6 +1966,13 @@ do_userobjquota_update(userquota_cache_t *cache, uint64_t flags,
                (void) snprintf(name, sizeof (name), DMU_OBJACCT_PREFIX "%llx",
                    (longlong_t)group);
                userquota_update_cache(&cache->uqc_group_deltas, name, delta);
+
+               if (dmu_objset_projectquota_enabled(os)) {
+                       (void) snprintf(name, sizeof (name),
+                           DMU_OBJACCT_PREFIX "%llx", (longlong_t)project);
+                       userquota_update_cache(&cache->uqc_project_deltas,
+                           name, delta);
+               }
        }
 }
 
@@ -1757,6 +2000,10 @@ userquota_updates_task(void *arg)
            sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
        avl_create(&cache.uqc_group_deltas, userquota_compare,
            sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
+       if (dmu_objset_projectquota_enabled(os))
+               avl_create(&cache.uqc_project_deltas, userquota_compare,
+                   sizeof (userquota_node_t), offsetof(userquota_node_t,
+                   uqn_node));
 
        while ((dn = multilist_sublist_head(list)) != NULL) {
                int flags;
@@ -1768,18 +2015,21 @@ userquota_updates_task(void *arg)
                flags = dn->dn_id_flags;
                ASSERT(flags);
                if (flags & DN_ID_OLD_EXIST)  {
-                       do_userquota_update(&cache,
-                           dn->dn_oldused, dn->dn_oldflags,
-                           dn->dn_olduid, dn->dn_oldgid, B_TRUE);
-                       do_userobjquota_update(&cache, dn->dn_oldflags,
-                           dn->dn_olduid, dn->dn_oldgid, B_TRUE);
+                       do_userquota_update(os, &cache, dn->dn_oldused,
+                           dn->dn_oldflags, dn->dn_olduid, dn->dn_oldgid,
+                           dn->dn_oldprojid, B_TRUE);
+                       do_userobjquota_update(os, &cache, dn->dn_oldflags,
+                           dn->dn_olduid, dn->dn_oldgid,
+                           dn->dn_oldprojid, B_TRUE);
                }
                if (flags & DN_ID_NEW_EXIST) {
-                       do_userquota_update(&cache,
+                       do_userquota_update(os, &cache,
                            DN_USED_BYTES(dn->dn_phys), dn->dn_phys->dn_flags,
-                           dn->dn_newuid, dn->dn_newgid, B_FALSE);
-                       do_userobjquota_update(&cache, dn->dn_phys->dn_flags,
-                           dn->dn_newuid, dn->dn_newgid, B_FALSE);
+                           dn->dn_newuid, dn->dn_newgid,
+                           dn->dn_newprojid, B_FALSE);
+                       do_userobjquota_update(os, &cache,
+                           dn->dn_phys->dn_flags, dn->dn_newuid, dn->dn_newgid,
+                           dn->dn_newprojid, B_FALSE);
                }
 
                mutex_enter(&dn->dn_mtx);
@@ -1788,6 +2038,7 @@ userquota_updates_task(void *arg)
                if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
                        dn->dn_olduid = dn->dn_newuid;
                        dn->dn_oldgid = dn->dn_newgid;
+                       dn->dn_oldprojid = dn->dn_newprojid;
                        dn->dn_id_flags |= DN_ID_OLD_EXIST;
                        if (dn->dn_bonuslen == 0)
                                dn->dn_id_flags |= DN_ID_CHKED_SPILL;
@@ -1795,6 +2046,8 @@ userquota_updates_task(void *arg)
                                dn->dn_id_flags |= DN_ID_CHKED_BONUS;
                }
                dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
+               if (dn->dn_dirty_txg == spa_syncing_txg(os->os_spa))
+                       dn->dn_dirty_txg = 0;
                mutex_exit(&dn->dn_mtx);
 
                multilist_sublist_remove(list, dn);
@@ -1811,11 +2064,20 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
        if (!dmu_objset_userused_enabled(os))
                return;
 
-       /* if this is a raw receive just return and handle accounting later */
+       /*
+        * If this is a raw receive just return and handle accounting
+        * later when we have the keys loaded. We also don't do user
+        * accounting during claiming since the datasets are not owned
+        * for the duration of claiming and this txg should only be
+        * used for recovery.
+        */
        if (os->os_encrypted && dmu_objset_is_receiving(os))
                return;
 
-       /* Allocate the user/groupused objects if necessary. */
+       if (tx->tx_txg <= os->os_spa->spa_claim_max_txg)
+               return;
+
+       /* Allocate the user/group/project used objects if necessary. */
        if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
                VERIFY0(zap_create_claim(os,
                    DMU_USERUSED_OBJECT,
@@ -1825,6 +2087,12 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
                    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
        }
 
+       if (dmu_objset_projectquota_enabled(os) &&
+           DMU_PROJECTUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
+               VERIFY0(zap_create_claim(os, DMU_PROJECTUSED_OBJECT,
+                   DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+       }
+
        for (int i = 0;
            i < multilist_get_num_sublists(os->os_synced_dnodes); i++) {
                userquota_updates_arg_t *uua =
@@ -1887,6 +2155,7 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
        dmu_buf_impl_t *db = NULL;
        uint64_t *user = NULL;
        uint64_t *group = NULL;
+       uint64_t *project = NULL;
        int flags = dn->dn_id_flags;
        int error;
        boolean_t have_spill = B_FALSE;
@@ -1944,9 +2213,11 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
                ASSERT(data);
                user = &dn->dn_olduid;
                group = &dn->dn_oldgid;
+               project = &dn->dn_oldprojid;
        } else if (data) {
                user = &dn->dn_newuid;
                group = &dn->dn_newgid;
+               project = &dn->dn_newprojid;
        }
 
        /*
@@ -1954,7 +2225,7 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
         * type has changed and that type isn't an object type to track
         */
        error = used_cbs[os->os_phys->os_type](dn->dn_bonustype, data,
-           user, group);
+           user, group, project);
 
        /*
         * Preserve existing uid/gid when the callback can't determine
@@ -1967,9 +2238,11 @@ dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
                if (flags & DN_ID_OLD_EXIST) {
                        dn->dn_newuid = dn->dn_olduid;
                        dn->dn_newgid = dn->dn_oldgid;
+                       dn->dn_newprojid = dn->dn_oldprojid;
                } else {
                        dn->dn_newuid = 0;
                        dn->dn_newgid = 0;
+                       dn->dn_newprojid = ZFS_DEFAULT_PROJID;
                }
                error = 0;
        }
@@ -2007,6 +2280,13 @@ dmu_objset_userobjspace_present(objset_t *os)
            OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE);
 }
 
+boolean_t
+dmu_objset_projectquota_present(objset_t *os)
+{
+       return (os->os_phys->os_flags &
+           OBJSET_FLAG_PROJECTQUOTA_COMPLETE);
+}
+
 static int
 dmu_objset_space_upgrade(objset_t *os)
 {
@@ -2076,33 +2356,43 @@ dmu_objset_userspace_upgrade(objset_t *os)
 }
 
 static int
-dmu_objset_userobjspace_upgrade_cb(objset_t *os)
+dmu_objset_id_quota_upgrade_cb(objset_t *os)
 {
        int err = 0;
 
-       if (dmu_objset_userobjspace_present(os))
+       if (dmu_objset_userobjspace_present(os) &&
+           dmu_objset_projectquota_present(os))
                return (0);
        if (dmu_objset_is_snapshot(os))
                return (SET_ERROR(EINVAL));
        if (!dmu_objset_userobjused_enabled(os))
                return (SET_ERROR(ENOTSUP));
+       if (!dmu_objset_projectquota_enabled(os) &&
+           dmu_objset_userobjspace_present(os))
+               return (SET_ERROR(ENOTSUP));
 
        dmu_objset_ds(os)->ds_feature_activation_needed[
            SPA_FEATURE_USEROBJ_ACCOUNTING] = B_TRUE;
+       if (dmu_objset_projectquota_enabled(os))
+               dmu_objset_ds(os)->ds_feature_activation_needed[
+                   SPA_FEATURE_PROJECT_QUOTA] = B_TRUE;
 
        err = dmu_objset_space_upgrade(os);
        if (err)
                return (err);
 
        os->os_flags |= OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
+       if (dmu_objset_projectquota_enabled(os))
+               os->os_flags |= OBJSET_FLAG_PROJECTQUOTA_COMPLETE;
+
        txg_wait_synced(dmu_objset_pool(os), 0);
        return (0);
 }
 
 void
-dmu_objset_userobjspace_upgrade(objset_t *os)
+dmu_objset_id_quota_upgrade(objset_t *os)
 {
-       dmu_objset_upgrade(os, dmu_objset_userobjspace_upgrade_cb);
+       dmu_objset_upgrade(os, dmu_objset_id_quota_upgrade_cb);
 }
 
 boolean_t
@@ -2114,6 +2404,15 @@ dmu_objset_userobjspace_upgradable(objset_t *os)
            !dmu_objset_userobjspace_present(os));
 }
 
+boolean_t
+dmu_objset_projectquota_upgradable(objset_t *os)
+{
+       return (dmu_objset_type(os) == DMU_OST_ZFS &&
+           !dmu_objset_is_snapshot(os) &&
+           dmu_objset_projectquota_enabled(os) &&
+           !dmu_objset_projectquota_present(os));
+}
+
 void
 dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
     uint64_t *usedobjsp, uint64_t *availobjsp)
@@ -2273,7 +2572,6 @@ static void
 dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
 {
        dsl_pool_t *dp = dcp->dc_dp;
-       dmu_objset_find_ctx_t *child_dcp;
        dsl_dir_t *dd;
        dsl_dataset_t *ds;
        zap_cursor_t zc;
@@ -2315,7 +2613,7 @@ dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
                            sizeof (uint64_t));
                        ASSERT3U(attr->za_num_integers, ==, 1);
 
-                       child_dcp =
+                       dmu_objset_find_ctx_t *child_dcp =
                            kmem_alloc(sizeof (*child_dcp), KM_SLEEP);
                        *child_dcp = *dcp;
                        child_dcp->dc_ddobj = attr->za_first_integer;
@@ -2630,6 +2928,13 @@ dmu_objset_find(char *name, int func(const char *, void *), void *arg,
        return (error);
 }
 
+boolean_t
+dmu_objset_incompatible_encryption_version(objset_t *os)
+{
+       return (dsl_dir_incompatible_encryption_version(
+           os->os_dsl_dataset->ds_dir));
+}
+
 void
 dmu_objset_set_user(objset_t *os, void *user_ptr)
 {
@@ -2677,7 +2982,7 @@ dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx)
        }
 }
 
-#if defined(_KERNEL) && defined(HAVE_SPL)
+#if defined(_KERNEL)
 EXPORT_SYMBOL(dmu_objset_zil);
 EXPORT_SYMBOL(dmu_objset_pool);
 EXPORT_SYMBOL(dmu_objset_ds);
@@ -2716,7 +3021,10 @@ EXPORT_SYMBOL(dmu_objset_userused_enabled);
 EXPORT_SYMBOL(dmu_objset_userspace_upgrade);
 EXPORT_SYMBOL(dmu_objset_userspace_present);
 EXPORT_SYMBOL(dmu_objset_userobjused_enabled);
-EXPORT_SYMBOL(dmu_objset_userobjspace_upgrade);
 EXPORT_SYMBOL(dmu_objset_userobjspace_upgradable);
 EXPORT_SYMBOL(dmu_objset_userobjspace_present);
+EXPORT_SYMBOL(dmu_objset_projectquota_enabled);
+EXPORT_SYMBOL(dmu_objset_projectquota_present);
+EXPORT_SYMBOL(dmu_objset_projectquota_upgradable);
+EXPORT_SYMBOL(dmu_objset_id_quota_upgrade);
 #endif