]> git.proxmox.com Git - mirror_zfs-debian.git/blobdiff - include/sys/arc_impl.h
Imported Upstream version 0.6.5.3
[mirror_zfs-debian.git] / include / sys / arc_impl.h
index e7068ea188e37f65cca2bd4e6c90ed24f501836f..a9dbfc8dd73e29dbbbff3afc0aae8d096d7d2f06 100644 (file)
@@ -67,15 +67,25 @@ extern "C" {
  */
 
 typedef struct arc_state {
-       list_t  arcs_list[ARC_BUFC_NUMTYPES];   /* list of evictable buffers */
-       uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
-       uint64_t arcs_size;     /* total amount of data in this state */
-       kmutex_t arcs_mtx;
+       /*
+        * list of evictable buffers
+        */
+       multilist_t arcs_list[ARC_BUFC_NUMTYPES];
+       /*
+        * total amount of evictable data in this state
+        */
+       uint64_t arcs_lsize[ARC_BUFC_NUMTYPES];
+       /*
+        * total amount of data in this state; this includes: evictable,
+        * non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
+        */
+       refcount_t arcs_size;
+       /*
+        * supports the "dbufs" kstat
+        */
        arc_state_type_t arcs_state;
 } arc_state_t;
 
-typedef struct l2arc_buf_hdr l2arc_buf_hdr_t;
-
 typedef struct arc_callback arc_callback_t;
 
 struct arc_callback {
@@ -96,31 +106,49 @@ struct arc_write_callback {
        arc_buf_t       *awcb_buf;
 };
 
-struct arc_buf_hdr {
-       /* protected by hash lock */
-       dva_t                   b_dva;
-       uint64_t                b_birth;
-       uint64_t                b_cksum0;
-
+/*
+ * ARC buffers are separated into multiple structs as a memory saving measure:
+ *   - Common fields struct, always defined, and embedded within it:
+ *       - L2-only fields, always allocated but undefined when not in L2ARC
+ *       - L1-only fields, only allocated when in L1ARC
+ *
+ *           Buffer in L1                     Buffer only in L2
+ *    +------------------------+          +------------------------+
+ *    | arc_buf_hdr_t          |          | arc_buf_hdr_t          |
+ *    |                        |          |                        |
+ *    |                        |          |                        |
+ *    |                        |          |                        |
+ *    +------------------------+          +------------------------+
+ *    | l2arc_buf_hdr_t        |          | l2arc_buf_hdr_t        |
+ *    | (undefined if L1-only) |          |                        |
+ *    +------------------------+          +------------------------+
+ *    | l1arc_buf_hdr_t        |
+ *    |                        |
+ *    |                        |
+ *    |                        |
+ *    |                        |
+ *    +------------------------+
+ *
+ * Because it's possible for the L2ARC to become extremely large, we can wind
+ * up eating a lot of memory in L2ARC buffer headers, so the size of a header
+ * is minimized by only allocating the fields necessary for an L1-cached buffer
+ * when a header is actually in the L1 cache. The sub-headers (l1arc_buf_hdr and
+ * l2arc_buf_hdr) are embedded rather than allocated separately to save a couple
+ * words in pointers. arc_hdr_realloc() is used to switch a header between
+ * these two allocation states.
+ */
+typedef struct l1arc_buf_hdr {
        kmutex_t                b_freeze_lock;
-       zio_cksum_t             *b_freeze_cksum;
 
-       arc_buf_hdr_t           *b_hash_next;
        arc_buf_t               *b_buf;
-       uint32_t                b_flags;
        uint32_t                b_datacnt;
-
-       arc_callback_t          *b_acb;
+       /* for waiting on writes to complete */
        kcondvar_t              b_cv;
 
-       /* immutable */
-       arc_buf_contents_t      b_type;
-       uint64_t                b_size;
-       uint64_t                b_spa;
 
        /* protected by arc state mutex */
        arc_state_t             *b_state;
-       list_node_t             b_arc_node;
+       multilist_node_t        b_arc_node;
 
        /* updated atomically */
        clock_t                 b_arc_access;
@@ -133,9 +161,10 @@ struct arc_buf_hdr {
        /* self protecting */
        refcount_t              b_refcnt;
 
-       l2arc_buf_hdr_t         *b_l2hdr;
-       list_node_t             b_l2node;
-};
+       arc_callback_t          *b_acb;
+       /* temporary buffer holder for in-flight compressed data */
+       void                    *b_tmp_cdata;
+} l1arc_buf_hdr_t;
 
 typedef struct l2arc_dev {
        vdev_t                  *l2ad_vdev;     /* vdev */
@@ -143,18 +172,55 @@ typedef struct l2arc_dev {
        uint64_t                l2ad_hand;      /* next write location */
        uint64_t                l2ad_start;     /* first addr on device */
        uint64_t                l2ad_end;       /* last addr on device */
-       uint64_t                l2ad_evict;     /* last addr eviction reached */
        boolean_t               l2ad_first;     /* first sweep through */
        boolean_t               l2ad_writing;   /* currently writing */
-       list_t                  *l2ad_buflist;  /* buffer list */
+       kmutex_t                l2ad_mtx;       /* lock for buffer list */
+       list_t                  l2ad_buflist;   /* buffer list */
        list_node_t             l2ad_node;      /* device list node */
+       refcount_t              l2ad_alloc;     /* allocated bytes */
 } l2arc_dev_t;
 
+typedef struct l2arc_buf_hdr {
+       /* protected by arc_buf_hdr mutex */
+       l2arc_dev_t             *b_dev;         /* L2ARC device */
+       uint64_t                b_daddr;        /* disk address, offset byte */
+       /* real alloc'd buffer size depending on b_compress applied */
+       uint32_t                b_hits;
+       int32_t                 b_asize;
+       uint8_t                 b_compress;
+
+       list_node_t             b_l2node;
+} l2arc_buf_hdr_t;
+
 typedef struct l2arc_write_callback {
        l2arc_dev_t     *l2wcb_dev;             /* device info */
        arc_buf_hdr_t   *l2wcb_head;            /* head of write buflist */
 } l2arc_write_callback_t;
 
+struct arc_buf_hdr {
+       /* protected by hash lock */
+       dva_t                   b_dva;
+       uint64_t                b_birth;
+       /*
+        * Even though this checksum is only set/verified when a buffer is in
+        * the L1 cache, it needs to be in the set of common fields because it
+        * must be preserved from the time before a buffer is written out to
+        * L2ARC until after it is read back in.
+        */
+       zio_cksum_t             *b_freeze_cksum;
+
+       arc_buf_hdr_t           *b_hash_next;
+       arc_flags_t             b_flags;
+
+       /* immutable */
+       int32_t                 b_size;
+       uint64_t                b_spa;
+
+       /* L2ARC fields. Undefined when not in L2ARC. */
+       l2arc_buf_hdr_t         b_l2hdr;
+       /* L1ARC fields. Undefined when in l2arc_only state */
+       l1arc_buf_hdr_t         b_l1hdr;
+};
 #ifdef __cplusplus
 }
 #endif