]>
git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/txg.c
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Portions Copyright 2011 Martin Matuska
24 * Copyright (c) 2012 by Delphix. All rights reserved.
27 #include <sys/zfs_context.h>
28 #include <sys/txg_impl.h>
29 #include <sys/dmu_impl.h>
30 #include <sys/dmu_tx.h>
31 #include <sys/dsl_pool.h>
32 #include <sys/dsl_scan.h>
33 #include <sys/callb.h>
34 #include <sys/spa_impl.h>
37 * Pool-wide transaction groups.
40 static void txg_sync_thread(dsl_pool_t
*dp
);
41 static void txg_quiesce_thread(dsl_pool_t
*dp
);
43 int zfs_txg_timeout
= 5; /* max seconds worth of delta per txg */
46 * Prepare the txg subsystem.
49 txg_init(dsl_pool_t
*dp
, uint64_t txg
)
51 tx_state_t
*tx
= &dp
->dp_tx
;
53 bzero(tx
, sizeof (tx_state_t
));
55 tx
->tx_cpu
= vmem_zalloc(max_ncpus
* sizeof (tx_cpu_t
), KM_SLEEP
);
57 for (c
= 0; c
< max_ncpus
; c
++) {
60 mutex_init(&tx
->tx_cpu
[c
].tc_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
61 for (i
= 0; i
< TXG_SIZE
; i
++) {
62 cv_init(&tx
->tx_cpu
[c
].tc_cv
[i
], NULL
, CV_DEFAULT
,
64 list_create(&tx
->tx_cpu
[c
].tc_callbacks
[i
],
65 sizeof (dmu_tx_callback_t
),
66 offsetof(dmu_tx_callback_t
, dcb_node
));
70 mutex_init(&tx
->tx_sync_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
72 cv_init(&tx
->tx_sync_more_cv
, NULL
, CV_DEFAULT
, NULL
);
73 cv_init(&tx
->tx_sync_done_cv
, NULL
, CV_DEFAULT
, NULL
);
74 cv_init(&tx
->tx_quiesce_more_cv
, NULL
, CV_DEFAULT
, NULL
);
75 cv_init(&tx
->tx_quiesce_done_cv
, NULL
, CV_DEFAULT
, NULL
);
76 cv_init(&tx
->tx_exit_cv
, NULL
, CV_DEFAULT
, NULL
);
78 tx
->tx_open_txg
= txg
;
82 * Close down the txg subsystem.
85 txg_fini(dsl_pool_t
*dp
)
87 tx_state_t
*tx
= &dp
->dp_tx
;
90 ASSERT(tx
->tx_threads
== 0);
92 mutex_destroy(&tx
->tx_sync_lock
);
94 cv_destroy(&tx
->tx_sync_more_cv
);
95 cv_destroy(&tx
->tx_sync_done_cv
);
96 cv_destroy(&tx
->tx_quiesce_more_cv
);
97 cv_destroy(&tx
->tx_quiesce_done_cv
);
98 cv_destroy(&tx
->tx_exit_cv
);
100 for (c
= 0; c
< max_ncpus
; c
++) {
103 mutex_destroy(&tx
->tx_cpu
[c
].tc_lock
);
104 for (i
= 0; i
< TXG_SIZE
; i
++) {
105 cv_destroy(&tx
->tx_cpu
[c
].tc_cv
[i
]);
106 list_destroy(&tx
->tx_cpu
[c
].tc_callbacks
[i
]);
110 if (tx
->tx_commit_cb_taskq
!= NULL
)
111 taskq_destroy(tx
->tx_commit_cb_taskq
);
113 vmem_free(tx
->tx_cpu
, max_ncpus
* sizeof (tx_cpu_t
));
115 bzero(tx
, sizeof (tx_state_t
));
119 * Start syncing transaction groups.
122 txg_sync_start(dsl_pool_t
*dp
)
124 tx_state_t
*tx
= &dp
->dp_tx
;
126 mutex_enter(&tx
->tx_sync_lock
);
128 dprintf("pool %p\n", dp
);
130 ASSERT(tx
->tx_threads
== 0);
134 tx
->tx_quiesce_thread
= thread_create(NULL
, 0, txg_quiesce_thread
,
135 dp
, 0, &p0
, TS_RUN
, minclsyspri
);
138 * The sync thread can need a larger-than-default stack size on
139 * 32-bit x86. This is due in part to nested pools and
140 * scrub_visitbp() recursion.
142 tx
->tx_sync_thread
= thread_create(NULL
, 32<<10, txg_sync_thread
,
143 dp
, 0, &p0
, TS_RUN
, minclsyspri
);
145 mutex_exit(&tx
->tx_sync_lock
);
149 txg_thread_enter(tx_state_t
*tx
, callb_cpr_t
*cpr
)
151 CALLB_CPR_INIT(cpr
, &tx
->tx_sync_lock
, callb_generic_cpr
, FTAG
);
152 mutex_enter(&tx
->tx_sync_lock
);
156 txg_thread_exit(tx_state_t
*tx
, callb_cpr_t
*cpr
, kthread_t
**tpp
)
158 ASSERT(*tpp
!= NULL
);
161 cv_broadcast(&tx
->tx_exit_cv
);
162 CALLB_CPR_EXIT(cpr
); /* drops &tx->tx_sync_lock */
167 txg_thread_wait(tx_state_t
*tx
, callb_cpr_t
*cpr
, kcondvar_t
*cv
, uint64_t time
)
169 CALLB_CPR_SAFE_BEGIN(cpr
);
172 (void) cv_timedwait_interruptible(cv
, &tx
->tx_sync_lock
,
173 ddi_get_lbolt() + time
);
175 cv_wait_interruptible(cv
, &tx
->tx_sync_lock
);
177 CALLB_CPR_SAFE_END(cpr
, &tx
->tx_sync_lock
);
181 * Stop syncing transaction groups.
184 txg_sync_stop(dsl_pool_t
*dp
)
186 tx_state_t
*tx
= &dp
->dp_tx
;
188 dprintf("pool %p\n", dp
);
190 * Finish off any work in progress.
192 ASSERT(tx
->tx_threads
== 2);
195 * We need to ensure that we've vacated the deferred space_maps.
197 txg_wait_synced(dp
, tx
->tx_open_txg
+ TXG_DEFER_SIZE
);
200 * Wake all sync threads and wait for them to die.
202 mutex_enter(&tx
->tx_sync_lock
);
204 ASSERT(tx
->tx_threads
== 2);
208 cv_broadcast(&tx
->tx_quiesce_more_cv
);
209 cv_broadcast(&tx
->tx_quiesce_done_cv
);
210 cv_broadcast(&tx
->tx_sync_more_cv
);
212 while (tx
->tx_threads
!= 0)
213 cv_wait(&tx
->tx_exit_cv
, &tx
->tx_sync_lock
);
217 mutex_exit(&tx
->tx_sync_lock
);
221 txg_hold_open(dsl_pool_t
*dp
, txg_handle_t
*th
)
223 tx_state_t
*tx
= &dp
->dp_tx
;
228 * It appears the processor id is simply used as a "random"
229 * number to index into the array, and there isn't any other
230 * significance to the chosen tx_cpu. Because.. Why not use
231 * the current cpu to index into the array?
234 tc
= &tx
->tx_cpu
[CPU_SEQID
];
237 mutex_enter(&tc
->tc_lock
);
239 txg
= tx
->tx_open_txg
;
240 tc
->tc_count
[txg
& TXG_MASK
]++;
249 txg_rele_to_quiesce(txg_handle_t
*th
)
251 tx_cpu_t
*tc
= th
->th_cpu
;
253 mutex_exit(&tc
->tc_lock
);
257 txg_register_callbacks(txg_handle_t
*th
, list_t
*tx_callbacks
)
259 tx_cpu_t
*tc
= th
->th_cpu
;
260 int g
= th
->th_txg
& TXG_MASK
;
262 mutex_enter(&tc
->tc_lock
);
263 list_move_tail(&tc
->tc_callbacks
[g
], tx_callbacks
);
264 mutex_exit(&tc
->tc_lock
);
268 txg_rele_to_sync(txg_handle_t
*th
)
270 tx_cpu_t
*tc
= th
->th_cpu
;
271 int g
= th
->th_txg
& TXG_MASK
;
273 mutex_enter(&tc
->tc_lock
);
274 ASSERT(tc
->tc_count
[g
] != 0);
275 if (--tc
->tc_count
[g
] == 0)
276 cv_broadcast(&tc
->tc_cv
[g
]);
277 mutex_exit(&tc
->tc_lock
);
279 th
->th_cpu
= NULL
; /* defensive */
283 txg_quiesce(dsl_pool_t
*dp
, uint64_t txg
)
287 tx_state_t
*tx
= &dp
->dp_tx
;
288 int g
= txg
& TXG_MASK
;
292 * Grab all tx_cpu locks so nobody else can get into this txg.
294 for (c
= 0; c
< max_ncpus
; c
++)
295 mutex_enter(&tx
->tx_cpu
[c
].tc_lock
);
297 ASSERT(txg
== tx
->tx_open_txg
);
301 * Measure how long the txg was open and replace the kstat.
303 th
= dsl_pool_txg_history_get(dp
, txg
);
304 th
->th_kstat
.open_time
= gethrtime() - th
->th_kstat
.birth
;
305 th
->th_kstat
.state
= TXG_STATE_QUIESCING
;
306 dsl_pool_txg_history_put(th
);
307 dsl_pool_txg_history_add(dp
, tx
->tx_open_txg
);
310 * Now that we've incremented tx_open_txg, we can let threads
311 * enter the next transaction group.
313 for (c
= 0; c
< max_ncpus
; c
++)
314 mutex_exit(&tx
->tx_cpu
[c
].tc_lock
);
317 * Quiesce the transaction group by waiting for everyone to txg_exit().
321 for (c
= 0; c
< max_ncpus
; c
++) {
322 tx_cpu_t
*tc
= &tx
->tx_cpu
[c
];
323 mutex_enter(&tc
->tc_lock
);
324 while (tc
->tc_count
[g
] != 0)
325 cv_wait(&tc
->tc_cv
[g
], &tc
->tc_lock
);
326 mutex_exit(&tc
->tc_lock
);
330 * Measure how long the txg took to quiesce.
332 th
= dsl_pool_txg_history_get(dp
, txg
);
333 th
->th_kstat
.quiesce_time
= gethrtime() - start
;
334 dsl_pool_txg_history_put(th
);
338 txg_do_callbacks(list_t
*cb_list
)
340 dmu_tx_do_callbacks(cb_list
, 0);
342 list_destroy(cb_list
);
344 kmem_free(cb_list
, sizeof (list_t
));
348 * Dispatch the commit callbacks registered on this txg to worker threads.
351 txg_dispatch_callbacks(dsl_pool_t
*dp
, uint64_t txg
)
354 tx_state_t
*tx
= &dp
->dp_tx
;
357 for (c
= 0; c
< max_ncpus
; c
++) {
358 tx_cpu_t
*tc
= &tx
->tx_cpu
[c
];
359 /* No need to lock tx_cpu_t at this point */
361 int g
= txg
& TXG_MASK
;
363 if (list_is_empty(&tc
->tc_callbacks
[g
]))
366 if (tx
->tx_commit_cb_taskq
== NULL
) {
368 * Commit callback taskq hasn't been created yet.
370 tx
->tx_commit_cb_taskq
= taskq_create("tx_commit_cb",
371 100, minclsyspri
, max_ncpus
, INT_MAX
,
372 TASKQ_THREADS_CPU_PCT
| TASKQ_PREPOPULATE
);
375 cb_list
= kmem_alloc(sizeof (list_t
), KM_PUSHPAGE
);
376 list_create(cb_list
, sizeof (dmu_tx_callback_t
),
377 offsetof(dmu_tx_callback_t
, dcb_node
));
379 list_move_tail(cb_list
, &tc
->tc_callbacks
[g
]);
381 (void) taskq_dispatch(tx
->tx_commit_cb_taskq
, (task_func_t
*)
382 txg_do_callbacks
, cb_list
, TQ_SLEEP
);
387 * Wait for pending commit callbacks of already-synced transactions to finish
389 * Calling this function from within a commit callback will deadlock.
392 txg_wait_callbacks(dsl_pool_t
*dp
)
394 tx_state_t
*tx
= &dp
->dp_tx
;
396 if (tx
->tx_commit_cb_taskq
!= NULL
)
397 taskq_wait(tx
->tx_commit_cb_taskq
);
401 txg_sync_thread(dsl_pool_t
*dp
)
403 spa_t
*spa
= dp
->dp_spa
;
404 tx_state_t
*tx
= &dp
->dp_tx
;
406 uint64_t start
, delta
;
410 * Annotate this process with a flag that indicates that it is
411 * unsafe to use KM_SLEEP during memory allocations due to the
412 * potential for a deadlock. KM_PUSHPAGE should be used instead.
414 current
->flags
|= PF_NOFS
;
417 txg_thread_enter(tx
, &cpr
);
423 uint64_t timer
, timeout
;
426 timeout
= zfs_txg_timeout
* hz
;
429 * We sync when we're scanning, there's someone waiting
430 * on us, or the quiesce thread has handed off a txg to
431 * us, or we have reached our timeout.
433 timer
= (delta
>= timeout
? 0 : timeout
- delta
);
434 while (!dsl_scan_active(dp
->dp_scan
) &&
435 !tx
->tx_exiting
&& timer
> 0 &&
436 tx
->tx_synced_txg
>= tx
->tx_sync_txg_waiting
&&
437 tx
->tx_quiesced_txg
== 0) {
438 dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
439 tx
->tx_synced_txg
, tx
->tx_sync_txg_waiting
, dp
);
440 txg_thread_wait(tx
, &cpr
, &tx
->tx_sync_more_cv
, timer
);
441 delta
= ddi_get_lbolt() - start
;
442 timer
= (delta
> timeout
? 0 : timeout
- delta
);
446 * Wait until the quiesce thread hands off a txg to us,
447 * prompting it to do so if necessary.
449 while (!tx
->tx_exiting
&& tx
->tx_quiesced_txg
== 0) {
450 if (tx
->tx_quiesce_txg_waiting
< tx
->tx_open_txg
+1)
451 tx
->tx_quiesce_txg_waiting
= tx
->tx_open_txg
+1;
452 cv_broadcast(&tx
->tx_quiesce_more_cv
);
453 txg_thread_wait(tx
, &cpr
, &tx
->tx_quiesce_done_cv
, 0);
457 txg_thread_exit(tx
, &cpr
, &tx
->tx_sync_thread
);
460 * Consume the quiesced txg which has been handed off to
461 * us. This may cause the quiescing thread to now be
462 * able to quiesce another txg, so we must signal it.
464 txg
= tx
->tx_quiesced_txg
;
465 tx
->tx_quiesced_txg
= 0;
466 tx
->tx_syncing_txg
= txg
;
467 cv_broadcast(&tx
->tx_quiesce_more_cv
);
469 th
= dsl_pool_txg_history_get(dp
, txg
);
470 th
->th_kstat
.state
= TXG_STATE_SYNCING
;
471 vdev_get_stats(spa
->spa_root_vdev
, &th
->th_vs1
);
472 dsl_pool_txg_history_put(th
);
474 dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
475 txg
, tx
->tx_quiesce_txg_waiting
, tx
->tx_sync_txg_waiting
);
476 mutex_exit(&tx
->tx_sync_lock
);
478 start
= ddi_get_lbolt();
479 hrstart
= gethrtime();
481 delta
= ddi_get_lbolt() - start
;
483 mutex_enter(&tx
->tx_sync_lock
);
484 tx
->tx_synced_txg
= txg
;
485 tx
->tx_syncing_txg
= 0;
486 cv_broadcast(&tx
->tx_sync_done_cv
);
489 * Dispatch commit callbacks to worker threads.
491 txg_dispatch_callbacks(dp
, txg
);
494 * Measure the txg sync time determine the amount of I/O done.
496 th
= dsl_pool_txg_history_get(dp
, txg
);
497 vdev_get_stats(spa
->spa_root_vdev
, &th
->th_vs2
);
498 th
->th_kstat
.sync_time
= gethrtime() - hrstart
;
499 th
->th_kstat
.nread
= th
->th_vs2
.vs_bytes
[ZIO_TYPE_READ
] -
500 th
->th_vs1
.vs_bytes
[ZIO_TYPE_READ
];
501 th
->th_kstat
.nwritten
= th
->th_vs2
.vs_bytes
[ZIO_TYPE_WRITE
] -
502 th
->th_vs1
.vs_bytes
[ZIO_TYPE_WRITE
];
503 th
->th_kstat
.reads
= th
->th_vs2
.vs_ops
[ZIO_TYPE_READ
] -
504 th
->th_vs1
.vs_ops
[ZIO_TYPE_READ
];
505 th
->th_kstat
.writes
= th
->th_vs2
.vs_ops
[ZIO_TYPE_WRITE
] -
506 th
->th_vs1
.vs_ops
[ZIO_TYPE_WRITE
];
507 th
->th_kstat
.state
= TXG_STATE_COMMITTED
;
508 dsl_pool_txg_history_put(th
);
513 txg_quiesce_thread(dsl_pool_t
*dp
)
515 tx_state_t
*tx
= &dp
->dp_tx
;
518 txg_thread_enter(tx
, &cpr
);
524 * We quiesce when there's someone waiting on us.
525 * However, we can only have one txg in "quiescing" or
526 * "quiesced, waiting to sync" state. So we wait until
527 * the "quiesced, waiting to sync" txg has been consumed
528 * by the sync thread.
530 while (!tx
->tx_exiting
&&
531 (tx
->tx_open_txg
>= tx
->tx_quiesce_txg_waiting
||
532 tx
->tx_quiesced_txg
!= 0))
533 txg_thread_wait(tx
, &cpr
, &tx
->tx_quiesce_more_cv
, 0);
536 txg_thread_exit(tx
, &cpr
, &tx
->tx_quiesce_thread
);
538 txg
= tx
->tx_open_txg
;
539 dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
540 txg
, tx
->tx_quiesce_txg_waiting
,
541 tx
->tx_sync_txg_waiting
);
542 mutex_exit(&tx
->tx_sync_lock
);
543 txg_quiesce(dp
, txg
);
544 mutex_enter(&tx
->tx_sync_lock
);
547 * Hand this txg off to the sync thread.
549 dprintf("quiesce done, handing off txg %llu\n", txg
);
550 tx
->tx_quiesced_txg
= txg
;
551 cv_broadcast(&tx
->tx_sync_more_cv
);
552 cv_broadcast(&tx
->tx_quiesce_done_cv
);
557 * Delay this thread by 'ticks' if we are still in the open transaction
558 * group and there is already a waiting txg quiesing or quiesced. Abort
559 * the delay if this txg stalls or enters the quiesing state.
562 txg_delay(dsl_pool_t
*dp
, uint64_t txg
, int ticks
)
564 tx_state_t
*tx
= &dp
->dp_tx
;
565 clock_t timeout
= ddi_get_lbolt() + ticks
;
567 /* don't delay if this txg could transition to quiesing immediately */
568 if (tx
->tx_open_txg
> txg
||
569 tx
->tx_syncing_txg
== txg
-1 || tx
->tx_synced_txg
== txg
-1)
572 mutex_enter(&tx
->tx_sync_lock
);
573 if (tx
->tx_open_txg
> txg
|| tx
->tx_synced_txg
== txg
-1) {
574 mutex_exit(&tx
->tx_sync_lock
);
578 while (ddi_get_lbolt() < timeout
&&
579 tx
->tx_syncing_txg
< txg
-1 && !txg_stalled(dp
))
580 (void) cv_timedwait(&tx
->tx_quiesce_more_cv
, &tx
->tx_sync_lock
,
583 DMU_TX_STAT_BUMP(dmu_tx_delay
);
585 mutex_exit(&tx
->tx_sync_lock
);
589 txg_wait_synced(dsl_pool_t
*dp
, uint64_t txg
)
591 tx_state_t
*tx
= &dp
->dp_tx
;
593 mutex_enter(&tx
->tx_sync_lock
);
594 ASSERT(tx
->tx_threads
== 2);
596 txg
= tx
->tx_open_txg
+ TXG_DEFER_SIZE
;
597 if (tx
->tx_sync_txg_waiting
< txg
)
598 tx
->tx_sync_txg_waiting
= txg
;
599 dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
600 txg
, tx
->tx_quiesce_txg_waiting
, tx
->tx_sync_txg_waiting
);
601 while (tx
->tx_synced_txg
< txg
) {
602 dprintf("broadcasting sync more "
603 "tx_synced=%llu waiting=%llu dp=%p\n",
604 tx
->tx_synced_txg
, tx
->tx_sync_txg_waiting
, dp
);
605 cv_broadcast(&tx
->tx_sync_more_cv
);
606 cv_wait(&tx
->tx_sync_done_cv
, &tx
->tx_sync_lock
);
608 mutex_exit(&tx
->tx_sync_lock
);
612 txg_wait_open(dsl_pool_t
*dp
, uint64_t txg
)
614 tx_state_t
*tx
= &dp
->dp_tx
;
616 mutex_enter(&tx
->tx_sync_lock
);
617 ASSERT(tx
->tx_threads
== 2);
619 txg
= tx
->tx_open_txg
+ 1;
620 if (tx
->tx_quiesce_txg_waiting
< txg
)
621 tx
->tx_quiesce_txg_waiting
= txg
;
622 dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
623 txg
, tx
->tx_quiesce_txg_waiting
, tx
->tx_sync_txg_waiting
);
624 while (tx
->tx_open_txg
< txg
) {
625 cv_broadcast(&tx
->tx_quiesce_more_cv
);
626 cv_wait(&tx
->tx_quiesce_done_cv
, &tx
->tx_sync_lock
);
628 mutex_exit(&tx
->tx_sync_lock
);
632 txg_stalled(dsl_pool_t
*dp
)
634 tx_state_t
*tx
= &dp
->dp_tx
;
635 return (tx
->tx_quiesce_txg_waiting
> tx
->tx_open_txg
);
639 txg_sync_waiting(dsl_pool_t
*dp
)
641 tx_state_t
*tx
= &dp
->dp_tx
;
643 return (tx
->tx_syncing_txg
<= tx
->tx_sync_txg_waiting
||
644 tx
->tx_quiesced_txg
!= 0);
648 * Per-txg object lists.
651 txg_list_create(txg_list_t
*tl
, size_t offset
)
655 mutex_init(&tl
->tl_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
657 tl
->tl_offset
= offset
;
659 for (t
= 0; t
< TXG_SIZE
; t
++)
660 tl
->tl_head
[t
] = NULL
;
664 txg_list_destroy(txg_list_t
*tl
)
668 for (t
= 0; t
< TXG_SIZE
; t
++)
669 ASSERT(txg_list_empty(tl
, t
));
671 mutex_destroy(&tl
->tl_lock
);
675 txg_list_empty(txg_list_t
*tl
, uint64_t txg
)
677 return (tl
->tl_head
[txg
& TXG_MASK
] == NULL
);
681 * Add an entry to the list.
682 * Returns 0 if it's a new entry, 1 if it's already there.
685 txg_list_add(txg_list_t
*tl
, void *p
, uint64_t txg
)
687 int t
= txg
& TXG_MASK
;
688 txg_node_t
*tn
= (txg_node_t
*)((char *)p
+ tl
->tl_offset
);
691 mutex_enter(&tl
->tl_lock
);
692 already_on_list
= tn
->tn_member
[t
];
693 if (!already_on_list
) {
694 tn
->tn_member
[t
] = 1;
695 tn
->tn_next
[t
] = tl
->tl_head
[t
];
698 mutex_exit(&tl
->tl_lock
);
700 return (already_on_list
);
704 * Add an entry to the end of the list (walks list to find end).
705 * Returns 0 if it's a new entry, 1 if it's already there.
708 txg_list_add_tail(txg_list_t
*tl
, void *p
, uint64_t txg
)
710 int t
= txg
& TXG_MASK
;
711 txg_node_t
*tn
= (txg_node_t
*)((char *)p
+ tl
->tl_offset
);
714 mutex_enter(&tl
->tl_lock
);
715 already_on_list
= tn
->tn_member
[t
];
716 if (!already_on_list
) {
719 for (tp
= &tl
->tl_head
[t
]; *tp
!= NULL
; tp
= &(*tp
)->tn_next
[t
])
722 tn
->tn_member
[t
] = 1;
723 tn
->tn_next
[t
] = NULL
;
726 mutex_exit(&tl
->tl_lock
);
728 return (already_on_list
);
732 * Remove the head of the list and return it.
735 txg_list_remove(txg_list_t
*tl
, uint64_t txg
)
737 int t
= txg
& TXG_MASK
;
741 mutex_enter(&tl
->tl_lock
);
742 if ((tn
= tl
->tl_head
[t
]) != NULL
) {
743 p
= (char *)tn
- tl
->tl_offset
;
744 tl
->tl_head
[t
] = tn
->tn_next
[t
];
745 tn
->tn_next
[t
] = NULL
;
746 tn
->tn_member
[t
] = 0;
748 mutex_exit(&tl
->tl_lock
);
754 * Remove a specific item from the list and return it.
757 txg_list_remove_this(txg_list_t
*tl
, void *p
, uint64_t txg
)
759 int t
= txg
& TXG_MASK
;
760 txg_node_t
*tn
, **tp
;
762 mutex_enter(&tl
->tl_lock
);
764 for (tp
= &tl
->tl_head
[t
]; (tn
= *tp
) != NULL
; tp
= &tn
->tn_next
[t
]) {
765 if ((char *)tn
- tl
->tl_offset
== p
) {
766 *tp
= tn
->tn_next
[t
];
767 tn
->tn_next
[t
] = NULL
;
768 tn
->tn_member
[t
] = 0;
769 mutex_exit(&tl
->tl_lock
);
774 mutex_exit(&tl
->tl_lock
);
780 txg_list_member(txg_list_t
*tl
, void *p
, uint64_t txg
)
782 int t
= txg
& TXG_MASK
;
783 txg_node_t
*tn
= (txg_node_t
*)((char *)p
+ tl
->tl_offset
);
785 return (tn
->tn_member
[t
]);
789 * Walk a txg list -- only safe if you know it's not changing.
792 txg_list_head(txg_list_t
*tl
, uint64_t txg
)
794 int t
= txg
& TXG_MASK
;
795 txg_node_t
*tn
= tl
->tl_head
[t
];
797 return (tn
== NULL
? NULL
: (char *)tn
- tl
->tl_offset
);
801 txg_list_next(txg_list_t
*tl
, void *p
, uint64_t txg
)
803 int t
= txg
& TXG_MASK
;
804 txg_node_t
*tn
= (txg_node_t
*)((char *)p
+ tl
->tl_offset
);
808 return (tn
== NULL
? NULL
: (char *)tn
- tl
->tl_offset
);
811 #if defined(_KERNEL) && defined(HAVE_SPL)
812 EXPORT_SYMBOL(txg_init
);
813 EXPORT_SYMBOL(txg_fini
);
814 EXPORT_SYMBOL(txg_sync_start
);
815 EXPORT_SYMBOL(txg_sync_stop
);
816 EXPORT_SYMBOL(txg_hold_open
);
817 EXPORT_SYMBOL(txg_rele_to_quiesce
);
818 EXPORT_SYMBOL(txg_rele_to_sync
);
819 EXPORT_SYMBOL(txg_register_callbacks
);
820 EXPORT_SYMBOL(txg_delay
);
821 EXPORT_SYMBOL(txg_wait_synced
);
822 EXPORT_SYMBOL(txg_wait_open
);
823 EXPORT_SYMBOL(txg_wait_callbacks
);
824 EXPORT_SYMBOL(txg_stalled
);
825 EXPORT_SYMBOL(txg_sync_waiting
);
827 module_param(zfs_txg_timeout
, int, 0644);
828 MODULE_PARM_DESC(zfs_txg_timeout
, "Max seconds worth of delta per txg");