]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Fix ARC hit rate
authorBrian Behlendorf <behlendorf1@llnl.gov>
Mon, 8 Jan 2018 17:52:36 +0000 (09:52 -0800)
committerGitHub <noreply@github.com>
Mon, 8 Jan 2018 17:52:36 +0000 (09:52 -0800)
When the compressed ARC feature was added in commit d3c2ae1
the method of reference counting in the ARC was modified.  As
part of this accounting change the arc_buf_add_ref() function
was removed entirely.

This would have be fine but the arc_buf_add_ref() function
served a second undocumented purpose of updating the ARC access
information when taking a hold on a dbuf.  Without this logic
in place a cached dbuf would not migrate its associated
arc_buf_hdr_t to the MFU list.  This would negatively impact
the ARC hit rate, particularly on systems with a small ARC.

This change reinstates the missing call to arc_access() from
dbuf_hold() by implementing a new arc_buf_access() function.

Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Reviewed-by: Tim Chase <tim@chase2k.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #6171
Closes #6852
Closes #6989

include/sys/arc.h
module/zfs/arc.c
module/zfs/dbuf.c

index 0e7a85188b73a25e0b129ae5eeb77744282cd5b7..de280362d4d2160232a5134f5d1881e89494f6ca 100644 (file)
@@ -263,6 +263,7 @@ void arc_buf_destroy(arc_buf_t *buf, void *tag);
 void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
 uint64_t arc_buf_size(arc_buf_t *buf);
 uint64_t arc_buf_lsize(arc_buf_t *buf);
+void arc_buf_access(arc_buf_t *buf);
 void arc_release(arc_buf_t *buf, void *tag);
 int arc_released(arc_buf_t *buf);
 void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
index 476351eb42844130c860e6c6120d260ebcfa613f..45b0abe7fd6c28fc2507ea9604b7c3d559896bc5 100644 (file)
@@ -448,9 +448,14 @@ typedef struct arc_stats {
         * by multiple buffers.
         */
        kstat_named_t arcstat_mutex_miss;
+       /*
+        * Number of buffers skipped when updating the access state due to the
+        * header having already been released after acquiring the hash lock.
+        */
+       kstat_named_t arcstat_access_skip;
        /*
         * Number of buffers skipped because they have I/O in progress, are
-        * indrect prefetch buffers that have not lived long enough, or are
+        * indirect prefetch buffers that have not lived long enough, or are
         * not from the spa we're trying to evict from.
         */
        kstat_named_t arcstat_evict_skip;
@@ -688,6 +693,7 @@ static arc_stats_t arc_stats = {
        { "mfu_ghost_hits",             KSTAT_DATA_UINT64 },
        { "deleted",                    KSTAT_DATA_UINT64 },
        { "mutex_miss",                 KSTAT_DATA_UINT64 },
+       { "access_skip",                KSTAT_DATA_UINT64 },
        { "evict_skip",                 KSTAT_DATA_UINT64 },
        { "evict_not_enough",           KSTAT_DATA_UINT64 },
        { "evict_l2_cached",            KSTAT_DATA_UINT64 },
@@ -5611,6 +5617,50 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
        }
 }
 
+/*
+ * This routine is called by dbuf_hold() to update the arc_access() state
+ * which otherwise would be skipped for entries in the dbuf cache.
+ */
+void
+arc_buf_access(arc_buf_t *buf)
+{
+       mutex_enter(&buf->b_evict_lock);
+       arc_buf_hdr_t *hdr = buf->b_hdr;
+
+       /*
+        * Avoid taking the hash_lock when possible as an optimization.
+        * The header must be checked again under the hash_lock in order
+        * to handle the case where it is concurrently being released.
+        */
+       if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) {
+               mutex_exit(&buf->b_evict_lock);
+               return;
+       }
+
+       kmutex_t *hash_lock = HDR_LOCK(hdr);
+       mutex_enter(hash_lock);
+
+       if (hdr->b_l1hdr.b_state == arc_anon || HDR_EMPTY(hdr)) {
+               mutex_exit(hash_lock);
+               mutex_exit(&buf->b_evict_lock);
+               ARCSTAT_BUMP(arcstat_access_skip);
+               return;
+       }
+
+       mutex_exit(&buf->b_evict_lock);
+
+       ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
+           hdr->b_l1hdr.b_state == arc_mfu);
+
+       DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
+       arc_access(hdr, hash_lock);
+       mutex_exit(hash_lock);
+
+       ARCSTAT_BUMP(arcstat_hits);
+       ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr) && !HDR_PRESCIENT_PREFETCH(hdr),
+           demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, hits);
+}
+
 /* a generic arc_read_done_func_t which you can use */
 /* ARGSUSED */
 void
index 190d0656a42f8ae52d1ca3a0f251eedd4f3e206a..517a284de2be86cac3c451a1fb301845885a132a 100644 (file)
@@ -2821,8 +2821,10 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
                return (SET_ERROR(ENOENT));
        }
 
-       if (dh->dh_db->db_buf != NULL)
+       if (dh->dh_db->db_buf != NULL) {
+               arc_buf_access(dh->dh_db->db_buf);
                ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
+       }
 
        ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));