]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/dsl_synctask.c
ZIL: Remove 128K into 2x68K LWB split optimization
[mirror_zfs.git] / module / zfs / dsl_synctask.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
1d3ba0bf 9 * or https://opensource.org/licenses/CDDL-1.0.
34dc7c2f
BB
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
d2734cce 23 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
34dc7c2f
BB
24 */
25
34dc7c2f
BB
26#include <sys/dmu.h>
27#include <sys/dmu_tx.h>
28#include <sys/dsl_pool.h>
29#include <sys/dsl_dir.h>
30#include <sys/dsl_synctask.h>
428870ff 31#include <sys/metaslab.h>
34dc7c2f
BB
32
33#define DST_AVG_BLKSHIFT 14
34
34dc7c2f 35static int
13fe0198 36dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
34dc7c2f 37{
14e4e3cb 38 (void) arg, (void) tx;
34dc7c2f
BB
39 return (0);
40}
41
d2734cce
SD
42static int
43dsl_sync_task_common(const char *pool, dsl_checkfunc_t *checkfunc,
186898bb 44 dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg,
d2734cce 45 int blocks_modified, zfs_space_check_t space_check, boolean_t early)
34dc7c2f 46{
13fe0198 47 spa_t *spa;
34dc7c2f 48 dmu_tx_t *tx;
13fe0198
MA
49 int err;
50 dsl_sync_task_t dst = { { { NULL } } };
51 dsl_pool_t *dp;
34dc7c2f 52
13fe0198
MA
53 err = spa_open(pool, &spa, FTAG);
54 if (err != 0)
55 return (err);
56 dp = spa_get_dsl(spa);
34dc7c2f 57
13fe0198
MA
58top:
59 tx = dmu_tx_create_dd(dp->dp_mos_dir);
60 VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
61
62 dst.dst_pool = dp;
63 dst.dst_txg = dmu_tx_get_txg(tx);
64 dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
3d45fdd6 65 dst.dst_space_check = space_check;
13fe0198
MA
66 dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
67 dst.dst_syncfunc = syncfunc;
68 dst.dst_arg = arg;
69 dst.dst_error = 0;
70 dst.dst_nowaiter = B_FALSE;
71
72 dsl_pool_config_enter(dp, FTAG);
73 err = dst.dst_checkfunc(arg, tx);
74 dsl_pool_config_exit(dp, FTAG);
75
76 if (err != 0) {
34dc7c2f 77 dmu_tx_commit(tx);
13fe0198
MA
78 spa_close(spa, FTAG);
79 return (err);
34dc7c2f
BB
80 }
81
d2734cce
SD
82 txg_list_t *task_list = (early) ?
83 &dp->dp_early_sync_tasks : &dp->dp_sync_tasks;
84 VERIFY(txg_list_add_tail(task_list, &dst, dst.dst_txg));
34dc7c2f
BB
85
86 dmu_tx_commit(tx);
87
186898bb
DB
88 if (sigfunc != NULL && txg_wait_synced_sig(dp, dst.dst_txg)) {
89 /* current contract is to call func once */
90 sigfunc(arg, tx);
91 sigfunc = NULL; /* in case we're performing an EAGAIN retry */
92 }
13fe0198 93 txg_wait_synced(dp, dst.dst_txg);
34dc7c2f 94
13fe0198
MA
95 if (dst.dst_error == EAGAIN) {
96 txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
34dc7c2f 97 goto top;
428870ff 98 }
34dc7c2f 99
13fe0198
MA
100 spa_close(spa, FTAG);
101 return (dst.dst_error);
34dc7c2f
BB
102}
103
d2734cce
SD
104/*
105 * Called from open context to perform a callback in syncing context. Waits
106 * for the operation to complete.
107 *
108 * The checkfunc will be called from open context as a preliminary check
109 * which can quickly fail. If it succeeds, it will be called again from
110 * syncing context. The checkfunc should generally be designed to work
111 * properly in either context, but if necessary it can check
112 * dmu_tx_is_syncing(tx).
113 *
114 * The synctask infrastructure enforces proper locking strategy with respect
115 * to the dp_config_rwlock -- the lock will always be held when the callbacks
116 * are called. It will be held for read during the open-context (preliminary)
117 * call to the checkfunc, and then held for write from syncing context during
118 * the calls to the check and sync funcs.
119 *
120 * A dataset or pool name can be passed as the first argument. Typically,
121 * the check func will hold, check the return value of the hold, and then
122 * release the dataset. The sync func will VERIFYO(hold()) the dataset.
123 * This is safe because no changes can be made between the check and sync funcs,
124 * and the sync func will only be called if the check func successfully opened
125 * the dataset.
126 */
127int
128dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
129 dsl_syncfunc_t *syncfunc, void *arg,
130 int blocks_modified, zfs_space_check_t space_check)
131{
186898bb 132 return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg,
d2734cce
SD
133 blocks_modified, space_check, B_FALSE));
134}
135
136/*
137 * An early synctask works exactly as a standard synctask with one important
138 * difference on the way it is handled during syncing context. Standard
139 * synctasks run after we've written out all the dirty blocks of dirty
140 * datasets. Early synctasks are executed before writing out any dirty data,
141 * and thus before standard synctasks.
142 *
143 * For that reason, early synctasks can affect the process of writing dirty
144 * changes to disk for the txg that they run and should be used with caution.
145 * In addition, early synctasks should not dirty any metaslabs as this would
e1cfd73f 146 * invalidate the precondition/invariant for subsequent early synctasks.
d2734cce
SD
147 * [see dsl_pool_sync() and dsl_early_sync_task_verify()]
148 */
149int
150dsl_early_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
151 dsl_syncfunc_t *syncfunc, void *arg,
152 int blocks_modified, zfs_space_check_t space_check)
153{
186898bb 154 return (dsl_sync_task_common(pool, checkfunc, syncfunc, NULL, arg,
d2734cce
SD
155 blocks_modified, space_check, B_TRUE));
156}
157
186898bb
DB
158/*
159 * A standard synctask that can be interrupted from a signal. The sigfunc
160 * is called once if a signal occurred while waiting for the task to sync.
161 */
162int
163dsl_sync_task_sig(const char *pool, dsl_checkfunc_t *checkfunc,
164 dsl_syncfunc_t *syncfunc, dsl_sigfunc_t *sigfunc, void *arg,
165 int blocks_modified, zfs_space_check_t space_check)
166{
167 return (dsl_sync_task_common(pool, checkfunc, syncfunc, sigfunc, arg,
168 blocks_modified, space_check, B_FALSE));
169}
170
d2734cce
SD
171static void
172dsl_sync_task_nowait_common(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
38080324 173 dmu_tx_t *tx, boolean_t early)
34dc7c2f 174{
13fe0198 175 dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
34dc7c2f 176
13fe0198
MA
177 dst->dst_pool = dp;
178 dst->dst_txg = dmu_tx_get_txg(tx);
38080324 179 dst->dst_space_check = ZFS_SPACE_CHECK_NONE;
13fe0198
MA
180 dst->dst_checkfunc = dsl_null_checkfunc;
181 dst->dst_syncfunc = syncfunc;
182 dst->dst_arg = arg;
183 dst->dst_error = 0;
184 dst->dst_nowaiter = B_TRUE;
34dc7c2f 185
d2734cce
SD
186 txg_list_t *task_list = (early) ?
187 &dp->dp_early_sync_tasks : &dp->dp_sync_tasks;
188 VERIFY(txg_list_add_tail(task_list, dst, dst->dst_txg));
189}
190
191void
192dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
38080324 193 dmu_tx_t *tx)
d2734cce 194{
38080324 195 dsl_sync_task_nowait_common(dp, syncfunc, arg, tx, B_FALSE);
d2734cce
SD
196}
197
198void
199dsl_early_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
38080324 200 dmu_tx_t *tx)
d2734cce 201{
38080324 202 dsl_sync_task_nowait_common(dp, syncfunc, arg, tx, B_TRUE);
34dc7c2f
BB
203}
204
13fe0198
MA
205/*
206 * Called in syncing context to execute the synctask.
207 */
34dc7c2f 208void
13fe0198 209dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx)
34dc7c2f 210{
13fe0198 211 dsl_pool_t *dp = dst->dst_pool;
34dc7c2f 212
13fe0198 213 ASSERT0(dst->dst_error);
34dc7c2f
BB
214
215 /*
3d45fdd6
MA
216 * Check for sufficient space.
217 *
218 * When the sync task was created, the caller specified the
219 * type of space checking required. See the comment in
220 * zfs_space_check_t for details on the semantics of each
221 * type of space checking.
222 *
223 * We just check against what's on-disk; we don't want any
224 * in-flight accounting to get in our way, because open context
225 * may have already used up various in-core limits
226 * (arc_tempreserve, dsl_pool_tempreserve).
34dc7c2f 227 */
3d45fdd6 228 if (dst->dst_space_check != ZFS_SPACE_CHECK_NONE) {
d2734cce
SD
229 uint64_t quota = dsl_pool_unreserved_space(dp,
230 dst->dst_space_check);
3d45fdd6 231 uint64_t used = dsl_dir_phys(dp->dp_root_dir)->dd_used_bytes;
d2734cce 232
3d45fdd6 233 /* MOS space is triple-dittoed, so we multiply by 3. */
d2734cce 234 if (used + dst->dst_space * 3 > quota) {
3d45fdd6
MA
235 dst->dst_error = SET_ERROR(ENOSPC);
236 if (dst->dst_nowaiter)
237 kmem_free(dst, sizeof (*dst));
238 return;
239 }
428870ff 240 }
34dc7c2f
BB
241
242 /*
13fe0198 243 * Check for errors by calling checkfunc.
34dc7c2f 244 */
13fe0198
MA
245 rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
246 dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx);
247 if (dst->dst_error == 0)
248 dst->dst_syncfunc(dst->dst_arg, tx);
249 rrw_exit(&dp->dp_config_rwlock, FTAG);
250 if (dst->dst_nowaiter)
251 kmem_free(dst, sizeof (*dst));
34dc7c2f 252}
c28b2279 253
93ce2b4c 254#if defined(_KERNEL)
e7440015
BB
255EXPORT_SYMBOL(dsl_sync_task);
256EXPORT_SYMBOL(dsl_sync_task_nowait);
c28b2279 257#endif