]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Introduce a tunable to exclude special class buffers from L2ARC
authorGeorge Amanakis <gamanakis@gmail.com>
Thu, 11 Nov 2021 20:52:16 +0000 (21:52 +0100)
committerTony Hutter <hutter2@llnl.gov>
Wed, 14 Sep 2022 18:27:00 +0000 (11:27 -0700)
Special allocation class or dedup vdevs may have roughly the same
performance as L2ARC vdevs. Introduce a new tunable to exclude those
buffers from being cacheable on L2ARC.

Reviewed-by: Don Brady <don.brady@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: George Amanakis <gamanakis@gmail.com>
Closes #11761
Closes #12285

include/sys/arc.h
include/sys/dbuf.h
include/sys/dmu_objset.h
man/man4/zfs.4
module/zfs/arc.c
module/zfs/dbuf.c
module/zfs/dmu.c
module/zfs/dmu_objset.c

index a3241f3685a660dbda3492c3f1410feb2c1aa7b6..5d8176894e60466f561570725cdfb19cfad73395 100644 (file)
@@ -85,6 +85,7 @@ typedef void arc_prune_func_t(int64_t bytes, void *priv);
 
 /* Shared module parameters */
 extern int zfs_arc_average_blocksize;
+extern int l2arc_exclude_special;
 
 /* generic arc_done_func_t's which you can use */
 arc_read_done_func_t arc_bcopy_func;
index 93d80066be82d7868dbacac0a14519de23e97d09..2e7385113ec51ef256abd84145ec5e1333fd1258 100644 (file)
@@ -441,16 +441,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
        (dbuf_is_metadata(_db) &&                                       \
        ((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
 
-#define        DBUF_IS_L2CACHEABLE(_db)                                        \
-       ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL ||       \
-       (dbuf_is_metadata(_db) &&                                       \
-       ((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
-
-#define        DNODE_LEVEL_IS_L2CACHEABLE(_dn, _level)                         \
-       ((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_ALL ||       \
-       (((_level) > 0 ||                                               \
-       DMU_OT_IS_METADATA((_dn)->dn_handle->dnh_dnode->dn_type)) &&    \
-       ((_dn)->dn_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
+boolean_t dbuf_is_l2cacheable(dmu_buf_impl_t *db);
 
 #ifdef ZFS_DEBUG
 
index e89ee64ea686afc250d61bc312168e74f866e484..7ade2dc91247eb5f9b34633bfc935bff8000a366 100644 (file)
@@ -200,10 +200,6 @@ struct objset {
 #define        DMU_GROUPUSED_DNODE(os) ((os)->os_groupused_dnode.dnh_dnode)
 #define        DMU_PROJECTUSED_DNODE(os) ((os)->os_projectused_dnode.dnh_dnode)
 
-#define        DMU_OS_IS_L2CACHEABLE(os)                               \
-       ((os)->os_secondary_cache == ZFS_CACHE_ALL ||           \
-       (os)->os_secondary_cache == ZFS_CACHE_METADATA)
-
 /* called from zpl */
 int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
 int dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
index 6495e9b4cd208ed66867ec9aa203fdccc478982d..c32dd4b1b27f844d376510c749dfc0f6f062d7ef 100644 (file)
@@ -109,6 +109,11 @@ A value of
 .Sy 100
 disables this feature.
 .
+.It Sy l2arc_exclude_special Ns = Ns Sy 0 Ns | Ns 1 Pq int
+Controls whether buffers present on special vdevs are eligibile for caching
+into L2ARC.
+If set to 1, exclude dbufs on special vdevs from being cached to L2ARC.
+.
 .It Sy l2arc_mfuonly Ns = Ns Sy 0 Ns | Ns 1 Pq  int
 Controls whether only MFU metadata and data are cached from ARC into L2ARC.
 This may be desired to avoid wasting space on L2ARC when reading/writing large
index 215250ea6fec0cd4e5f5d8abae025cfa16890472..0ba366f1858f88a960ae5a153107b577938206ac 100644 (file)
@@ -877,6 +877,14 @@ static void l2arc_hdr_arcstats_update(arc_buf_hdr_t *hdr, boolean_t incr,
 #define        l2arc_hdr_arcstats_decrement_state(hdr) \
        l2arc_hdr_arcstats_update((hdr), B_FALSE, B_TRUE)
 
+/*
+ * l2arc_exclude_special : A zfs module parameter that controls whether buffers
+ *             present on special vdevs are eligibile for caching in L2ARC. If
+ *             set to 1, exclude dbufs on special vdevs from being cached to
+ *             L2ARC.
+ */
+int l2arc_exclude_special = 0;
+
 /*
  * l2arc_mfuonly : A ZFS module parameter that controls whether only MFU
  *             metadata and data are cached from ARC into L2ARC.
@@ -11136,6 +11144,10 @@ ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, rebuild_blocks_min_l2size, ULONG, ZMOD_RW,
 ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, mfuonly, INT, ZMOD_RW,
        "Cache only MFU data from ARC into L2ARC");
 
+ZFS_MODULE_PARAM(zfs_l2arc, l2arc_, exclude_special, INT, ZMOD_RW,
+       "If set to 1 exclude dbufs on special vdevs from being cached to "
+       "L2ARC.");
+
 ZFS_MODULE_PARAM_CALL(zfs_arc, zfs_arc_, lotsfree_percent, param_set_arc_int,
        param_get_int, ZMOD_RW, "System free memory I/O throttle in bytes");
 
index e687d96501ed075dc0cbd31a7add9ec740e9c722..1a022c8b8a0765d61da75e87504c4b21da8be790 100644 (file)
@@ -53,6 +53,7 @@
 #include <cityhash.h>
 #include <sys/spa_impl.h>
 #include <sys/wmsum.h>
+#include <sys/vdev_impl.h>
 
 kstat_t *dbuf_ksp;
 
@@ -594,6 +595,68 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
        }
 }
 
+/*
+ * We want to exclude buffers that are on a special allocation class from
+ * L2ARC.
+ */
+boolean_t
+dbuf_is_l2cacheable(dmu_buf_impl_t *db)
+{
+       vdev_t *vd = NULL;
+       zfs_cache_type_t cache = db->db_objset->os_secondary_cache;
+       blkptr_t *bp = db->db_blkptr;
+
+       if (bp != NULL && !BP_IS_HOLE(bp)) {
+               uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+               vdev_t *rvd = db->db_objset->os_spa->spa_root_vdev;
+
+               if (vdev < rvd->vdev_children)
+                       vd = rvd->vdev_child[vdev];
+
+               if (cache == ZFS_CACHE_ALL ||
+                   (dbuf_is_metadata(db) && cache == ZFS_CACHE_METADATA)) {
+                       if (vd == NULL)
+                               return (B_TRUE);
+
+                       if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+                           vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+                           l2arc_exclude_special == 0)
+                               return (B_TRUE);
+               }
+       }
+
+       return (B_FALSE);
+}
+
+static inline boolean_t
+dnode_level_is_l2cacheable(blkptr_t *bp, dnode_t *dn, int64_t level)
+{
+       vdev_t *vd = NULL;
+       zfs_cache_type_t cache = dn->dn_objset->os_secondary_cache;
+
+       if (bp != NULL && !BP_IS_HOLE(bp)) {
+               uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+               vdev_t *rvd = dn->dn_objset->os_spa->spa_root_vdev;
+
+               if (vdev < rvd->vdev_children)
+                       vd = rvd->vdev_child[vdev];
+
+               if (cache == ZFS_CACHE_ALL || ((level > 0 ||
+                   DMU_OT_IS_METADATA(dn->dn_handle->dnh_dnode->dn_type)) &&
+                   cache == ZFS_CACHE_METADATA)) {
+                       if (vd == NULL)
+                               return (B_TRUE);
+
+                       if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+                           vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+                           l2arc_exclude_special == 0)
+                               return (B_TRUE);
+               }
+       }
+
+       return (B_FALSE);
+}
+
 
 /*
  * This function *must* return indices evenly distributed between all
@@ -1523,7 +1586,7 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags,
        DTRACE_SET_STATE(db, "read issued");
        mutex_exit(&db->db_mtx);
 
-       if (DBUF_IS_L2CACHEABLE(db))
+       if (dbuf_is_l2cacheable(db))
                aflags |= ARC_FLAG_L2CACHE;
 
        dbuf_add_ref(db, NULL);
@@ -3372,7 +3435,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
        dpa->dpa_arg = arg;
 
        /* flag if L2ARC eligible, l2arc_noprefetch then decides */
-       if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
+       if (dnode_level_is_l2cacheable(&bp, dn, level))
                dpa->dpa_aflags |= ARC_FLAG_L2CACHE;
 
        /*
@@ -3390,7 +3453,7 @@ dbuf_prefetch_impl(dnode_t *dn, int64_t level, uint64_t blkid,
                zbookmark_phys_t zb;
 
                /* flag if L2ARC eligible, l2arc_noprefetch then decides */
-               if (DNODE_LEVEL_IS_L2CACHEABLE(dn, level))
+               if (dnode_level_is_l2cacheable(&bp, dn, level))
                        iter_aflags |= ARC_FLAG_L2CACHE;
 
                SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
@@ -4989,7 +5052,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
                        children_ready_cb = dbuf_write_children_ready;
 
                dr->dr_zio = arc_write(pio, os->os_spa, txg,
-                   &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
+                   &dr->dr_bp_copy, data, dbuf_is_l2cacheable(db),
                    &zp, dbuf_write_ready,
                    children_ready_cb, dbuf_write_physdone,
                    dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
index 4e7127bd1babe10c7423431db97358c3c2a0ca65..e38c9b452a284e3ddfa97c385c196ac5cba13962 100644 (file)
@@ -1846,7 +1846,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
        dsa->dsa_tx = NULL;
 
        zio_nowait(arc_write(pio, os->os_spa, txg,
-           zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
+           zgd->zgd_bp, dr->dt.dl.dr_data, dbuf_is_l2cacheable(db),
            &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
            ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
 
index b9380890230c40c755cdd68d08e4f5b5267e5cfe..a8975797e8af3304f7f6523c3719a665191eb2de 100644 (file)
@@ -63,6 +63,8 @@
 #include <sys/dmu_recv.h>
 #include <sys/zfs_project.h>
 #include "zfs_namecheck.h"
+#include <sys/vdev_impl.h>
+#include <sys/arc.h>
 
 /*
  * Needed to close a window in dnode_move() that allows the objset to be freed
@@ -411,6 +413,34 @@ dnode_multilist_index_func(multilist_t *ml, void *obj)
            multilist_get_num_sublists(ml));
 }
 
+static inline boolean_t
+dmu_os_is_l2cacheable(objset_t *os)
+{
+       vdev_t *vd = NULL;
+       zfs_cache_type_t cache = os->os_secondary_cache;
+       blkptr_t *bp = os->os_rootbp;
+
+       if (bp != NULL && !BP_IS_HOLE(bp)) {
+               uint64_t vdev = DVA_GET_VDEV(bp->blk_dva);
+               vdev_t *rvd = os->os_spa->spa_root_vdev;
+
+               if (vdev < rvd->vdev_children)
+                       vd = rvd->vdev_child[vdev];
+
+               if (cache == ZFS_CACHE_ALL || cache == ZFS_CACHE_METADATA) {
+                       if (vd == NULL)
+                               return (B_TRUE);
+
+                       if ((vd->vdev_alloc_bias != VDEV_BIAS_SPECIAL &&
+                           vd->vdev_alloc_bias != VDEV_BIAS_DEDUP) ||
+                           l2arc_exclude_special == 0)
+                               return (B_TRUE);
+               }
+       }
+
+       return (B_FALSE);
+}
+
 /*
  * Instantiates the objset_t in-memory structure corresponding to the
  * objset_phys_t that's pointed to by the specified blkptr_t.
@@ -453,7 +483,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
                SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
                    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 
-               if (DMU_OS_IS_L2CACHEABLE(os))
+               if (dmu_os_is_l2cacheable(os))
                        aflags |= ARC_FLAG_L2CACHE;
 
                if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
@@ -1661,7 +1691,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
        }
 
        zio = arc_write(pio, os->os_spa, tx->tx_txg,
-           blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
+           blkptr_copy, os->os_phys_buf, dmu_os_is_l2cacheable(os),
            &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
            os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);