4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Iterate over all children of the current object. This includes the normal
28 * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
29 * walk all datasets in the pool, and construct a directed graph of the form:
39 * @yesterday ----> foo
41 * In order to construct this graph, we have to walk every dataset in the pool,
42 * because the clone parent is stored as a property of the child, not the
43 * parent. The parent only keeps track of the number of clones.
45 * In the normal case (without clones) this would be rather expensive. To avoid
46 * unnecessary computation, we first try a walk of the subtree hierarchy
47 * starting from the initial node. At each dataset, we construct a node in the
48 * graph and an edge leading from its parent. If we don't see any snapshots
49 * with a non-zero clone count, then we are finished.
51 * If we do find a cloned snapshot, then we finish the walk of the current
52 * subtree, but indicate that we need to do a complete walk. We then perform a
53 * global walk of all datasets, avoiding the subtree we already processed.
55 * At the end of this, we'll end up with a directed graph of all relevant (and
56 * possible some irrelevant) datasets in the system. We need to both find our
57 * limiting subgraph and determine a safe ordering in which to destroy the
58 * datasets. We do a topological ordering of our graph starting at our target
59 * dataset, and then walk the results in reverse.
61 * It's possible for the graph to have cycles if, for example, the user renames
62 * a clone to be the parent of its origin snapshot. The user can request to
63 * generate an error in this case, or ignore the cycle and continue.
65 * When removing datasets, we want to destroy the snapshots in chronological
66 * order (because this is the most efficient method). In order to accomplish
67 * this, we store the creation transaction group with each vertex and keep each
68 * vertex's edges sorted according to this value. The topological sort will
69 * automatically walk the snapshots in the correct order.
82 #include "libzfs_impl.h"
83 #include "zfs_namecheck.h"
85 #define MIN_EDGECOUNT 4
88 * Vertex structure. Indexed by dataset name, this structure maintains a list
89 * of edges to other vertices.
92 typedef struct zfs_vertex
{
93 char zv_dataset
[ZFS_MAXNAMELEN
];
94 struct zfs_vertex
*zv_next
;
97 struct zfs_edge
**zv_edges
;
109 * Edge structure. Simply maintains a pointer to the destination vertex. There
110 * is no need to store the source vertex, since we only use edges in the context
111 * of the source vertex.
113 typedef struct zfs_edge
{
114 zfs_vertex_t
*ze_dest
;
115 struct zfs_edge
*ze_next
;
118 #define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */
121 * Graph structure. Vertices are maintained in a hash indexed by dataset name.
123 typedef struct zfs_graph
{
124 zfs_vertex_t
**zg_hash
;
132 * Allocate a new edge pointing to the target vertex.
135 zfs_edge_create(libzfs_handle_t
*hdl
, zfs_vertex_t
*dest
)
137 zfs_edge_t
*zep
= zfs_alloc(hdl
, sizeof (zfs_edge_t
));
151 zfs_edge_destroy(zfs_edge_t
*zep
)
157 * Allocate a new vertex with the given name.
159 static zfs_vertex_t
*
160 zfs_vertex_create(libzfs_handle_t
*hdl
, const char *dataset
)
162 zfs_vertex_t
*zvp
= zfs_alloc(hdl
, sizeof (zfs_vertex_t
));
167 assert(strlen(dataset
) < ZFS_MAXNAMELEN
);
169 (void) strlcpy(zvp
->zv_dataset
, dataset
, sizeof (zvp
->zv_dataset
));
171 if ((zvp
->zv_edges
= zfs_alloc(hdl
,
172 MIN_EDGECOUNT
* sizeof (void *))) == NULL
) {
177 zvp
->zv_edgealloc
= MIN_EDGECOUNT
;
183 * Destroy a vertex. Frees up any associated edges.
186 zfs_vertex_destroy(zfs_vertex_t
*zvp
)
190 for (i
= 0; i
< zvp
->zv_edgecount
; i
++)
191 zfs_edge_destroy(zvp
->zv_edges
[i
]);
198 * Given a vertex, add an edge to the destination vertex.
201 zfs_vertex_add_edge(libzfs_handle_t
*hdl
, zfs_vertex_t
*zvp
,
204 zfs_edge_t
*zep
= zfs_edge_create(hdl
, dest
);
209 if (zvp
->zv_edgecount
== zvp
->zv_edgealloc
) {
212 if ((ptr
= zfs_realloc(hdl
, zvp
->zv_edges
,
213 zvp
->zv_edgealloc
* sizeof (void *),
214 zvp
->zv_edgealloc
* 2 * sizeof (void *))) == NULL
)
218 zvp
->zv_edgealloc
*= 2;
221 zvp
->zv_edges
[zvp
->zv_edgecount
++] = zep
;
227 zfs_edge_compare(const void *a
, const void *b
)
229 const zfs_edge_t
*ea
= *((zfs_edge_t
**)a
);
230 const zfs_edge_t
*eb
= *((zfs_edge_t
**)b
);
232 if (ea
->ze_dest
->zv_txg
< eb
->ze_dest
->zv_txg
)
234 if (ea
->ze_dest
->zv_txg
> eb
->ze_dest
->zv_txg
)
240 * Sort the given vertex edges according to the creation txg of each vertex.
243 zfs_vertex_sort_edges(zfs_vertex_t
*zvp
)
245 if (zvp
->zv_edgecount
== 0)
248 qsort(zvp
->zv_edges
, zvp
->zv_edgecount
, sizeof (void *),
253 * Construct a new graph object. We allow the size to be specified as a
254 * parameter so in the future we can size the hash according to the number of
255 * datasets in the pool.
258 zfs_graph_create(libzfs_handle_t
*hdl
, const char *dataset
, size_t size
)
260 zfs_graph_t
*zgp
= zfs_alloc(hdl
, sizeof (zfs_graph_t
));
266 if ((zgp
->zg_hash
= zfs_alloc(hdl
,
267 size
* sizeof (zfs_vertex_t
*))) == NULL
) {
272 zgp
->zg_root
= dataset
;
273 zgp
->zg_clone_count
= 0;
279 * Destroy a graph object. We have to iterate over all the hash chains,
280 * destroying each vertex in the process.
283 zfs_graph_destroy(zfs_graph_t
*zgp
)
286 zfs_vertex_t
*current
, *next
;
288 for (i
= 0; i
< zgp
->zg_size
; i
++) {
289 current
= zgp
->zg_hash
[i
];
290 while (current
!= NULL
) {
291 next
= current
->zv_next
;
292 zfs_vertex_destroy(current
);
302 * Graph hash function. Classic bernstein k=33 hash function, taken from
303 * usr/src/cmd/sgs/tools/common/strhash.c
306 zfs_graph_hash(zfs_graph_t
*zgp
, const char *str
)
311 while ((c
= *str
++) != 0)
312 hash
= ((hash
<< 5) + hash
) + c
; /* hash * 33 + c */
314 return (hash
% zgp
->zg_size
);
318 * Given a dataset name, finds the associated vertex, creating it if necessary.
320 static zfs_vertex_t
*
321 zfs_graph_lookup(libzfs_handle_t
*hdl
, zfs_graph_t
*zgp
, const char *dataset
,
324 size_t idx
= zfs_graph_hash(zgp
, dataset
);
327 for (zvp
= zgp
->zg_hash
[idx
]; zvp
!= NULL
; zvp
= zvp
->zv_next
) {
328 if (strcmp(zvp
->zv_dataset
, dataset
) == 0) {
329 if (zvp
->zv_txg
== 0)
335 if ((zvp
= zfs_vertex_create(hdl
, dataset
)) == NULL
)
338 zvp
->zv_next
= zgp
->zg_hash
[idx
];
340 zgp
->zg_hash
[idx
] = zvp
;
347 * Given two dataset names, create an edge between them. For the source vertex,
348 * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
349 * created it as a destination of another edge. If 'dest' is NULL, then this
350 * is an individual vertex (i.e. the starting vertex), so don't add an edge.
353 zfs_graph_add(libzfs_handle_t
*hdl
, zfs_graph_t
*zgp
, const char *source
,
354 const char *dest
, uint64_t txg
)
356 zfs_vertex_t
*svp
, *dvp
;
358 if ((svp
= zfs_graph_lookup(hdl
, zgp
, source
, 0)) == NULL
)
360 svp
->zv_visited
= VISIT_SEEN
;
362 dvp
= zfs_graph_lookup(hdl
, zgp
, dest
, txg
);
365 if (zfs_vertex_add_edge(hdl
, svp
, dvp
) != 0)
373 * Iterate over all children of the given dataset, adding any vertices
374 * as necessary. Returns -1 if there was an error, or 0 otherwise.
375 * This is a simple recursive algorithm - the ZFS namespace typically
376 * is very flat. We manually invoke the necessary ioctl() calls to
377 * avoid the overhead and additional semantics of zfs_open().
380 iterate_children(libzfs_handle_t
*hdl
, zfs_graph_t
*zgp
, const char *dataset
)
382 zfs_cmd_t zc
= { "\0", "\0", "\0", "\0", 0 };
386 * Look up the source vertex, and avoid it if we've seen it before.
388 zvp
= zfs_graph_lookup(hdl
, zgp
, dataset
, 0);
391 if (zvp
->zv_visited
== VISIT_SEEN
)
395 * Iterate over all children
397 for ((void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
));
398 ioctl(hdl
->libzfs_fd
, ZFS_IOC_DATASET_LIST_NEXT
, &zc
) == 0;
399 (void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
))) {
401 * Get statistics for this dataset, to determine the type of the
402 * dataset and clone statistics. If this fails, the dataset has
403 * since been removed, and we're pretty much screwed anyway.
405 zc
.zc_objset_stats
.dds_origin
[0] = '\0';
406 if (ioctl(hdl
->libzfs_fd
, ZFS_IOC_OBJSET_STATS
, &zc
) != 0)
409 if (zc
.zc_objset_stats
.dds_origin
[0] != '\0') {
410 if (zfs_graph_add(hdl
, zgp
,
411 zc
.zc_objset_stats
.dds_origin
, zc
.zc_name
,
412 zc
.zc_objset_stats
.dds_creation_txg
) != 0)
415 * Count origins only if they are contained in the graph
417 if (isa_child_of(zc
.zc_objset_stats
.dds_origin
,
419 zgp
->zg_clone_count
--;
423 * Add an edge between the parent and the child.
425 if (zfs_graph_add(hdl
, zgp
, dataset
, zc
.zc_name
,
426 zc
.zc_objset_stats
.dds_creation_txg
) != 0)
430 * Recursively visit child
432 if (iterate_children(hdl
, zgp
, zc
.zc_name
))
437 * Now iterate over all snapshots.
439 bzero(&zc
, sizeof (zc
));
441 for ((void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
));
442 ioctl(hdl
->libzfs_fd
, ZFS_IOC_SNAPSHOT_LIST_NEXT
, &zc
) == 0;
443 (void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
))) {
446 * Get statistics for this dataset, to determine the type of the
447 * dataset and clone statistics. If this fails, the dataset has
448 * since been removed, and we're pretty much screwed anyway.
450 if (ioctl(hdl
->libzfs_fd
, ZFS_IOC_OBJSET_STATS
, &zc
) != 0)
454 * Add an edge between the parent and the child.
456 if (zfs_graph_add(hdl
, zgp
, dataset
, zc
.zc_name
,
457 zc
.zc_objset_stats
.dds_creation_txg
) != 0)
460 zgp
->zg_clone_count
+= zc
.zc_objset_stats
.dds_num_clones
;
463 zvp
->zv_visited
= VISIT_SEEN
;
469 * Returns false if there are no snapshots with dependent clones in this
470 * subtree or if all of those clones are also in this subtree. Returns
471 * true if there is an error or there are external dependents.
474 external_dependents(libzfs_handle_t
*hdl
, zfs_graph_t
*zgp
, const char *dataset
)
476 zfs_cmd_t zc
= { "\0", "\0", "\0", "\0", 0 };
479 * Check whether this dataset is a clone or has clones since
480 * iterate_children() only checks the children.
482 (void) strlcpy(zc
.zc_name
, dataset
, sizeof (zc
.zc_name
));
483 if (ioctl(hdl
->libzfs_fd
, ZFS_IOC_OBJSET_STATS
, &zc
) != 0)
486 if (zc
.zc_objset_stats
.dds_origin
[0] != '\0') {
487 if (zfs_graph_add(hdl
, zgp
,
488 zc
.zc_objset_stats
.dds_origin
, zc
.zc_name
,
489 zc
.zc_objset_stats
.dds_creation_txg
) != 0)
491 if (isa_child_of(zc
.zc_objset_stats
.dds_origin
, dataset
))
492 zgp
->zg_clone_count
--;
495 if ((zc
.zc_objset_stats
.dds_num_clones
) ||
496 iterate_children(hdl
, zgp
, dataset
))
499 return (zgp
->zg_clone_count
!= 0);
503 * Construct a complete graph of all necessary vertices. First, iterate over
504 * only our object's children. If no cloned snapshots are found, or all of
505 * the cloned snapshots are in this subtree then return a graph of the subtree.
506 * Otherwise, start at the root of the pool and iterate over all datasets.
509 construct_graph(libzfs_handle_t
*hdl
, const char *dataset
)
511 zfs_graph_t
*zgp
= zfs_graph_create(hdl
, dataset
, ZFS_GRAPH_SIZE
);
517 if ((strchr(dataset
, '/') == NULL
) ||
518 (external_dependents(hdl
, zgp
, dataset
))) {
520 * Determine pool name and try again.
522 int len
= strcspn(dataset
, "/@") + 1;
523 char *pool
= zfs_alloc(hdl
, len
);
526 zfs_graph_destroy(zgp
);
529 (void) strlcpy(pool
, dataset
, len
);
531 if (iterate_children(hdl
, zgp
, pool
) == -1 ||
532 zfs_graph_add(hdl
, zgp
, pool
, NULL
, 0) != 0) {
534 zfs_graph_destroy(zgp
);
540 if (ret
== -1 || zfs_graph_add(hdl
, zgp
, dataset
, NULL
, 0) != 0) {
541 zfs_graph_destroy(zgp
);
549 * Given a graph, do a recursive topological sort into the given array. This is
550 * really just a depth first search, so that the deepest nodes appear first.
551 * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
554 topo_sort(libzfs_handle_t
*hdl
, boolean_t allowrecursion
, char **result
,
555 size_t *idx
, zfs_vertex_t
*zgv
)
559 if (zgv
->zv_visited
== VISIT_SORT_PRE
&& !allowrecursion
) {
561 * If we've already seen this vertex as part of our depth-first
562 * search, then we have a cyclic dependency, and we must return
565 zfs_error_aux(hdl
, dgettext(TEXT_DOMAIN
,
566 "recursive dependency at '%s'"),
568 return (zfs_error(hdl
, EZFS_RECURSIVE
,
569 dgettext(TEXT_DOMAIN
,
570 "cannot determine dependent datasets")));
571 } else if (zgv
->zv_visited
>= VISIT_SORT_PRE
) {
573 * If we've already processed this as part of the topological
574 * sort, then don't bother doing so again.
579 zgv
->zv_visited
= VISIT_SORT_PRE
;
581 /* avoid doing a search if we don't have to */
582 zfs_vertex_sort_edges(zgv
);
583 for (i
= 0; i
< zgv
->zv_edgecount
; i
++) {
584 if (topo_sort(hdl
, allowrecursion
, result
, idx
,
585 zgv
->zv_edges
[i
]->ze_dest
) != 0)
589 /* we may have visited this in the course of the above */
590 if (zgv
->zv_visited
== VISIT_SORT_POST
)
593 if ((result
[*idx
] = zfs_alloc(hdl
,
594 strlen(zgv
->zv_dataset
) + 1)) == NULL
)
597 (void) strcpy(result
[*idx
], zgv
->zv_dataset
);
599 zgv
->zv_visited
= VISIT_SORT_POST
;
604 * The only public interface for this file. Do the dirty work of constructing a
605 * child list for the given object. Construct the graph, do the toplogical
606 * sort, and then return the array of strings to the caller.
608 * The 'allowrecursion' parameter controls behavior when cycles are found. If
609 * it is set, the the cycle is ignored and the results returned as if the cycle
610 * did not exist. If it is not set, then the routine will generate an error if
614 get_dependents(libzfs_handle_t
*hdl
, boolean_t allowrecursion
,
615 const char *dataset
, char ***result
, size_t *count
)
620 if ((zgp
= construct_graph(hdl
, dataset
)) == NULL
)
623 if ((*result
= zfs_alloc(hdl
,
624 zgp
->zg_nvertex
* sizeof (char *))) == NULL
) {
625 zfs_graph_destroy(zgp
);
629 if ((zvp
= zfs_graph_lookup(hdl
, zgp
, dataset
, 0)) == NULL
) {
631 zfs_graph_destroy(zgp
);
636 if (topo_sort(hdl
, allowrecursion
, *result
, count
, zvp
) != 0) {
638 zfs_graph_destroy(zgp
);
643 * Get rid of the last entry, which is our starting vertex and not
644 * strictly a dependent.
647 free((*result
)[*count
- 1]);
650 zfs_graph_destroy(zgp
);