]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
29809a6c | 23 | * Copyright (c) 2012 by Delphix. All rights reserved. |
34dc7c2f BB |
24 | */ |
25 | ||
428870ff BB |
26 | /* Portions Copyright 2010 Robert Milkowski */ |
27 | ||
34dc7c2f BB |
28 | #ifndef _SYS_ZIL_IMPL_H |
29 | #define _SYS_ZIL_IMPL_H | |
30 | ||
34dc7c2f BB |
31 | #include <sys/zil.h> |
32 | #include <sys/dmu_objset.h> | |
33 | ||
34 | #ifdef __cplusplus | |
35 | extern "C" { | |
36 | #endif | |
37 | ||
38 | /* | |
39 | * Log write buffer. | |
40 | */ | |
41 | typedef struct lwb { | |
42 | zilog_t *lwb_zilog; /* back pointer to log struct */ | |
43 | blkptr_t lwb_blk; /* on disk address of this log blk */ | |
d1d7e268 | 44 | boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */ |
1b7c1e5c | 45 | boolean_t lwb_slog; /* lwb_blk is on SLOG device */ |
34dc7c2f BB |
46 | int lwb_nused; /* # used bytes in buffer */ |
47 | int lwb_sz; /* size of block and buffer */ | |
48 | char *lwb_buf; /* log write buffer */ | |
49 | zio_t *lwb_zio; /* zio for this buffer */ | |
428870ff | 50 | dmu_tx_t *lwb_tx; /* tx for log block allocation */ |
34dc7c2f | 51 | uint64_t lwb_max_txg; /* highest txg in this lwb */ |
34dc7c2f BB |
52 | list_node_t lwb_node; /* zilog->zl_lwb_list linkage */ |
53 | } lwb_t; | |
54 | ||
572e2857 BB |
55 | /* |
56 | * Intent log transaction lists | |
57 | */ | |
58 | typedef struct itxs { | |
59 | list_t i_sync_list; /* list of synchronous itxs */ | |
60 | avl_tree_t i_async_tree; /* tree of foids for async itxs */ | |
61 | } itxs_t; | |
62 | ||
63 | typedef struct itxg { | |
64 | kmutex_t itxg_lock; /* lock for this structure */ | |
65 | uint64_t itxg_txg; /* txg for this chain */ | |
572e2857 BB |
66 | itxs_t *itxg_itxs; /* sync and async itxs */ |
67 | } itxg_t; | |
68 | ||
69 | /* for async nodes we build up an AVL tree of lists of async itxs per file */ | |
70 | typedef struct itx_async_node { | |
71 | uint64_t ia_foid; /* file object id */ | |
72 | list_t ia_list; /* list of async itxs for this foid */ | |
73 | avl_node_t ia_node; /* AVL tree linkage */ | |
74 | } itx_async_node_t; | |
75 | ||
34dc7c2f BB |
76 | /* |
77 | * Vdev flushing: during a zil_commit(), we build up an AVL tree of the vdevs | |
78 | * we've touched so we know which ones need a write cache flush at the end. | |
79 | */ | |
80 | typedef struct zil_vdev_node { | |
81 | uint64_t zv_vdev; /* vdev to be flushed */ | |
82 | avl_node_t zv_node; /* AVL tree linkage */ | |
83 | } zil_vdev_node_t; | |
84 | ||
428870ff BB |
85 | #define ZIL_PREV_BLKS 16 |
86 | ||
34dc7c2f BB |
87 | /* |
88 | * Stable storage intent log management structure. One per dataset. | |
89 | */ | |
90 | struct zilog { | |
91 | kmutex_t zl_lock; /* protects most zilog_t fields */ | |
92 | struct dsl_pool *zl_dmu_pool; /* DSL pool */ | |
93 | spa_t *zl_spa; /* handle for read/write log */ | |
94 | const zil_header_t *zl_header; /* log header buffer */ | |
95 | objset_t *zl_os; /* object set we're logging */ | |
96 | zil_get_data_t *zl_get_data; /* callback to get object content */ | |
97 | zio_t *zl_root_zio; /* log writer root zio */ | |
428870ff | 98 | uint64_t zl_lr_seq; /* on-disk log record sequence number */ |
428870ff | 99 | uint64_t zl_commit_lr_seq; /* last committed on-disk lr seq */ |
34dc7c2f | 100 | uint64_t zl_destroy_txg; /* txg of last zil_destroy() */ |
fb5f0bc8 BB |
101 | uint64_t zl_replayed_seq[TXG_SIZE]; /* last replayed rec seq */ |
102 | uint64_t zl_replaying_seq; /* current replay seq number */ | |
34dc7c2f BB |
103 | uint32_t zl_suspend; /* log suspend count */ |
104 | kcondvar_t zl_cv_writer; /* log writer thread completion */ | |
105 | kcondvar_t zl_cv_suspend; /* log suspend completion */ | |
106 | uint8_t zl_suspending; /* log is currently suspending */ | |
107 | uint8_t zl_keep_first; /* keep first log block in destroy */ | |
fb5f0bc8 | 108 | uint8_t zl_replay; /* replaying records while set */ |
34dc7c2f BB |
109 | uint8_t zl_stop_sync; /* for debugging */ |
110 | uint8_t zl_writer; /* boolean: write setup in progress */ | |
428870ff BB |
111 | uint8_t zl_logbias; /* latency or throughput */ |
112 | uint8_t zl_sync; /* synchronous or asynchronous */ | |
113 | int zl_parse_error; /* last zil_parse() error */ | |
114 | uint64_t zl_parse_blk_seq; /* highest blk seq on last parse */ | |
115 | uint64_t zl_parse_lr_seq; /* highest lr seq on last parse */ | |
116 | uint64_t zl_parse_blk_count; /* number of blocks parsed */ | |
117 | uint64_t zl_parse_lr_count; /* number of log records parsed */ | |
572e2857 BB |
118 | uint64_t zl_next_batch; /* next batch number */ |
119 | uint64_t zl_com_batch; /* committed batch number */ | |
120 | kcondvar_t zl_cv_batch[2]; /* batch condition variables */ | |
121 | itxg_t zl_itxg[TXG_SIZE]; /* intent log txg chains */ | |
122 | list_t zl_itx_commit_list; /* itx list to be committed */ | |
34dc7c2f | 123 | uint64_t zl_cur_used; /* current commit log size used */ |
34dc7c2f BB |
124 | list_t zl_lwb_list; /* in-flight log write list */ |
125 | kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */ | |
126 | avl_tree_t zl_vdev_tree; /* vdevs to flush in zil_commit() */ | |
428870ff | 127 | avl_tree_t zl_bp_tree; /* track bps during log parse */ |
34dc7c2f BB |
128 | clock_t zl_replay_time; /* lbolt of when replay started */ |
129 | uint64_t zl_replay_blks; /* number of log blocks replayed */ | |
428870ff BB |
130 | zil_header_t zl_old_header; /* debugging aid */ |
131 | uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */ | |
132 | uint_t zl_prev_rotor; /* rotor for zl_prev[] */ | |
29809a6c | 133 | txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */ |
34dc7c2f BB |
134 | }; |
135 | ||
428870ff | 136 | typedef struct zil_bp_node { |
34dc7c2f BB |
137 | dva_t zn_dva; |
138 | avl_node_t zn_node; | |
428870ff | 139 | } zil_bp_node_t; |
34dc7c2f | 140 | |
1b7c1e5c GDN |
141 | /* |
142 | * Maximum amount of write data that can be put into single log block. | |
143 | */ | |
f1512ee6 | 144 | #define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \ |
9babb374 BB |
145 | sizeof (lr_write_t)) |
146 | ||
1b7c1e5c GDN |
147 | /* |
148 | * Maximum amount of log space we agree to waste to reduce number of | |
149 | * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%). | |
150 | */ | |
151 | #define ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8) | |
152 | ||
153 | /* | |
154 | * Maximum amount of write data for WR_COPIED. Fall back to WR_NEED_COPY | |
155 | * as more space efficient if we can't fit at least two log records into | |
156 | * maximum sized log block. | |
157 | */ | |
158 | #define ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \ | |
159 | sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t)) | |
160 | ||
34dc7c2f BB |
161 | #ifdef __cplusplus |
162 | } | |
163 | #endif | |
164 | ||
165 | #endif /* _SYS_ZIL_IMPL_H */ |