]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/spa_misc.c
Pool allocation classes
[mirror_zfs.git] / module / zfs / spa_misc.c
index 44ceb42d46a215c5386dd5dfe294b940fa090732..2c500c010c35ceab917cf8434327bb343e515409 100644 (file)
@@ -25,6 +25,7 @@
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  * Copyright (c) 2017 Datto Inc.
+ * Copyright (c) 2017, Intel Corporation.
  */
 
 #include <sys/zfs_context.h>
@@ -408,6 +409,19 @@ spa_load_note(spa_t *spa, const char *fmt, ...)
            spa->spa_trust_config ? "trusted" : "untrusted", buf);
 }
 
+/*
+ * By default dedup and user data indirects land in the special class
+ */
+int zfs_ddt_data_is_special = B_TRUE;
+int zfs_user_indirect_is_special = B_TRUE;
+
+/*
+ * The percentage of special class final space reserved for metadata only.
+ * Once we allocate 100 - zfs_special_class_metadata_reserve_pct we only
+ * let metadata into the class.
+ */
+int zfs_special_class_metadata_reserve_pct = 25;
+
 /*
  * ==========================================================================
  * SPA config locking
@@ -1159,6 +1173,8 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
         */
        ASSERT(metaslab_class_validate(spa_normal_class(spa)) == 0);
        ASSERT(metaslab_class_validate(spa_log_class(spa)) == 0);
+       ASSERT(metaslab_class_validate(spa_special_class(spa)) == 0);
+       ASSERT(metaslab_class_validate(spa_dedup_class(spa)) == 0);
 
        spa_config_exit(spa, SCL_ALL, spa);
 
@@ -1554,6 +1570,16 @@ zfs_strtonum(const char *str, char **nptr)
        return (val);
 }
 
+void
+spa_activate_allocation_classes(spa_t *spa, dmu_tx_t *tx)
+{
+       /*
+        * We bump the feature refcount for each special vdev added to the pool
+        */
+       ASSERT(spa_feature_is_enabled(spa, SPA_FEATURE_ALLOCATION_CLASSES));
+       spa_feature_incr(spa, SPA_FEATURE_ALLOCATION_CLASSES, tx);
+}
+
 /*
  * ==========================================================================
  * Accessor functions
@@ -1811,6 +1837,79 @@ spa_log_class(spa_t *spa)
        return (spa->spa_log_class);
 }
 
+metaslab_class_t *
+spa_special_class(spa_t *spa)
+{
+       return (spa->spa_special_class);
+}
+
+metaslab_class_t *
+spa_dedup_class(spa_t *spa)
+{
+       return (spa->spa_dedup_class);
+}
+
+/*
+ * Locate an appropriate allocation class
+ */
+metaslab_class_t *
+spa_preferred_class(spa_t *spa, uint64_t size, dmu_object_type_t objtype,
+    uint_t level, uint_t special_smallblk)
+{
+       if (DMU_OT_IS_ZIL(objtype)) {
+               if (spa->spa_log_class->mc_groups != 0)
+                       return (spa_log_class(spa));
+               else
+                       return (spa_normal_class(spa));
+       }
+
+       boolean_t has_special_class = spa->spa_special_class->mc_groups != 0;
+
+       if (DMU_OT_IS_DDT(objtype)) {
+               if (spa->spa_dedup_class->mc_groups != 0)
+                       return (spa_dedup_class(spa));
+               else if (has_special_class && zfs_ddt_data_is_special)
+                       return (spa_special_class(spa));
+               else
+                       return (spa_normal_class(spa));
+       }
+
+       /* Indirect blocks for user data can land in special if allowed */
+       if (level > 0 && (DMU_OT_IS_FILE(objtype) || objtype == DMU_OT_ZVOL)) {
+               if (has_special_class && zfs_user_indirect_is_special)
+                       return (spa_special_class(spa));
+               else
+                       return (spa_normal_class(spa));
+       }
+
+       if (DMU_OT_IS_METADATA(objtype) || level > 0) {
+               if (has_special_class)
+                       return (spa_special_class(spa));
+               else
+                       return (spa_normal_class(spa));
+       }
+
+       /*
+        * Allow small file blocks in special class in some cases (like
+        * for the dRAID vdev feature). But always leave a reserve of
+        * zfs_special_class_metadata_reserve_pct exclusively for metadata.
+        */
+       if (DMU_OT_IS_FILE(objtype) &&
+           has_special_class && size < special_smallblk) {
+               metaslab_class_t *special = spa_special_class(spa);
+               uint64_t alloc = metaslab_class_get_alloc(special);
+               uint64_t space = metaslab_class_get_space(special);
+               uint64_t limit =
+                   (space * (100 - zfs_special_class_metadata_reserve_pct))
+                   / 100;
+
+               if (alloc < limit)
+                       return (special);
+       }
+
+       return (spa_normal_class(spa));
+}
+
 void
 spa_evicting_os_register(spa_t *spa, objset_t *os)
 {
@@ -2500,6 +2599,8 @@ EXPORT_SYMBOL(spa_update_dspace);
 EXPORT_SYMBOL(spa_deflate);
 EXPORT_SYMBOL(spa_normal_class);
 EXPORT_SYMBOL(spa_log_class);
+EXPORT_SYMBOL(spa_special_class);
+EXPORT_SYMBOL(spa_preferred_class);
 EXPORT_SYMBOL(spa_max_replication);
 EXPORT_SYMBOL(spa_prev_software_version);
 EXPORT_SYMBOL(spa_get_failmode);
@@ -2579,5 +2680,13 @@ MODULE_PARM_DESC(spa_asize_inflation,
 
 module_param(spa_slop_shift, int, 0644);
 MODULE_PARM_DESC(spa_slop_shift, "Reserved free space in pool");
+
+module_param(zfs_ddt_data_is_special, int, 0644);
+MODULE_PARM_DESC(zfs_ddt_data_is_special,
+       "Place DDT data into the special class");
+
+module_param(zfs_user_indirect_is_special, int, 0644);
+MODULE_PARM_DESC(zfs_user_indirect_is_special,
+       "Place user data indirect blocks into the special class");
 /* END CSTYLED */
 #endif