]> git.proxmox.com Git - mirror_zfs-debian.git/blob - lib/libzfs/libzfs_graph.c
Merge branch 'dracut'
[mirror_zfs-debian.git] / lib / libzfs / libzfs_graph.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Iterate over all children of the current object. This includes the normal
28 * dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
29 * walk all datasets in the pool, and construct a directed graph of the form:
30 *
31 * home
32 * |
33 * +----+----+
34 * | |
35 * v v ws
36 * bar baz |
37 * | |
38 * v v
39 * @yesterday ----> foo
40 *
41 * In order to construct this graph, we have to walk every dataset in the pool,
42 * because the clone parent is stored as a property of the child, not the
43 * parent. The parent only keeps track of the number of clones.
44 *
45 * In the normal case (without clones) this would be rather expensive. To avoid
46 * unnecessary computation, we first try a walk of the subtree hierarchy
47 * starting from the initial node. At each dataset, we construct a node in the
48 * graph and an edge leading from its parent. If we don't see any snapshots
49 * with a non-zero clone count, then we are finished.
50 *
51 * If we do find a cloned snapshot, then we finish the walk of the current
52 * subtree, but indicate that we need to do a complete walk. We then perform a
53 * global walk of all datasets, avoiding the subtree we already processed.
54 *
55 * At the end of this, we'll end up with a directed graph of all relevant (and
56 * possible some irrelevant) datasets in the system. We need to both find our
57 * limiting subgraph and determine a safe ordering in which to destroy the
58 * datasets. We do a topological ordering of our graph starting at our target
59 * dataset, and then walk the results in reverse.
60 *
61 * It's possible for the graph to have cycles if, for example, the user renames
62 * a clone to be the parent of its origin snapshot. The user can request to
63 * generate an error in this case, or ignore the cycle and continue.
64 *
65 * When removing datasets, we want to destroy the snapshots in chronological
66 * order (because this is the most efficient method). In order to accomplish
67 * this, we store the creation transaction group with each vertex and keep each
68 * vertex's edges sorted according to this value. The topological sort will
69 * automatically walk the snapshots in the correct order.
70 */
71
72 #include <assert.h>
73 #include <libintl.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <strings.h>
78 #include <unistd.h>
79
80 #include <libzfs.h>
81
82 #include "libzfs_impl.h"
83 #include "zfs_namecheck.h"
84
85 #define MIN_EDGECOUNT 4
86
87 /*
88 * Vertex structure. Indexed by dataset name, this structure maintains a list
89 * of edges to other vertices.
90 */
91 struct zfs_edge;
92 typedef struct zfs_vertex {
93 char zv_dataset[ZFS_MAXNAMELEN];
94 struct zfs_vertex *zv_next;
95 int zv_visited;
96 uint64_t zv_txg;
97 struct zfs_edge **zv_edges;
98 int zv_edgecount;
99 int zv_edgealloc;
100 } zfs_vertex_t;
101
102 enum {
103 VISIT_SEEN = 1,
104 VISIT_SORT_PRE,
105 VISIT_SORT_POST
106 };
107
108 /*
109 * Edge structure. Simply maintains a pointer to the destination vertex. There
110 * is no need to store the source vertex, since we only use edges in the context
111 * of the source vertex.
112 */
113 typedef struct zfs_edge {
114 zfs_vertex_t *ze_dest;
115 struct zfs_edge *ze_next;
116 } zfs_edge_t;
117
118 #define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */
119
120 /*
121 * Graph structure. Vertices are maintained in a hash indexed by dataset name.
122 */
123 typedef struct zfs_graph {
124 zfs_vertex_t **zg_hash;
125 size_t zg_size;
126 size_t zg_nvertex;
127 const char *zg_root;
128 int zg_clone_count;
129 } zfs_graph_t;
130
131 /*
132 * Allocate a new edge pointing to the target vertex.
133 */
134 static zfs_edge_t *
135 zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
136 {
137 zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
138
139 if (zep == NULL)
140 return (NULL);
141
142 zep->ze_dest = dest;
143
144 return (zep);
145 }
146
147 /*
148 * Destroy an edge.
149 */
150 static void
151 zfs_edge_destroy(zfs_edge_t *zep)
152 {
153 free(zep);
154 }
155
156 /*
157 * Allocate a new vertex with the given name.
158 */
159 static zfs_vertex_t *
160 zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
161 {
162 zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
163
164 if (zvp == NULL)
165 return (NULL);
166
167 assert(strlen(dataset) < ZFS_MAXNAMELEN);
168
169 (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
170
171 if ((zvp->zv_edges = zfs_alloc(hdl,
172 MIN_EDGECOUNT * sizeof (void *))) == NULL) {
173 free(zvp);
174 return (NULL);
175 }
176
177 zvp->zv_edgealloc = MIN_EDGECOUNT;
178
179 return (zvp);
180 }
181
182 /*
183 * Destroy a vertex. Frees up any associated edges.
184 */
185 static void
186 zfs_vertex_destroy(zfs_vertex_t *zvp)
187 {
188 int i;
189
190 for (i = 0; i < zvp->zv_edgecount; i++)
191 zfs_edge_destroy(zvp->zv_edges[i]);
192
193 free(zvp->zv_edges);
194 free(zvp);
195 }
196
197 /*
198 * Given a vertex, add an edge to the destination vertex.
199 */
200 static int
201 zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
202 zfs_vertex_t *dest)
203 {
204 zfs_edge_t *zep = zfs_edge_create(hdl, dest);
205
206 if (zep == NULL)
207 return (-1);
208
209 if (zvp->zv_edgecount == zvp->zv_edgealloc) {
210 void *ptr;
211
212 if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
213 zvp->zv_edgealloc * sizeof (void *),
214 zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
215 return (-1);
216
217 zvp->zv_edges = ptr;
218 zvp->zv_edgealloc *= 2;
219 }
220
221 zvp->zv_edges[zvp->zv_edgecount++] = zep;
222
223 return (0);
224 }
225
226 static int
227 zfs_edge_compare(const void *a, const void *b)
228 {
229 const zfs_edge_t *ea = *((zfs_edge_t **)a);
230 const zfs_edge_t *eb = *((zfs_edge_t **)b);
231
232 if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
233 return (-1);
234 if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
235 return (1);
236 return (0);
237 }
238
239 /*
240 * Sort the given vertex edges according to the creation txg of each vertex.
241 */
242 static void
243 zfs_vertex_sort_edges(zfs_vertex_t *zvp)
244 {
245 if (zvp->zv_edgecount == 0)
246 return;
247
248 qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
249 zfs_edge_compare);
250 }
251
252 /*
253 * Construct a new graph object. We allow the size to be specified as a
254 * parameter so in the future we can size the hash according to the number of
255 * datasets in the pool.
256 */
257 static zfs_graph_t *
258 zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
259 {
260 zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
261
262 if (zgp == NULL)
263 return (NULL);
264
265 zgp->zg_size = size;
266 if ((zgp->zg_hash = zfs_alloc(hdl,
267 size * sizeof (zfs_vertex_t *))) == NULL) {
268 free(zgp);
269 return (NULL);
270 }
271
272 zgp->zg_root = dataset;
273 zgp->zg_clone_count = 0;
274
275 return (zgp);
276 }
277
278 /*
279 * Destroy a graph object. We have to iterate over all the hash chains,
280 * destroying each vertex in the process.
281 */
282 static void
283 zfs_graph_destroy(zfs_graph_t *zgp)
284 {
285 int i;
286 zfs_vertex_t *current, *next;
287
288 for (i = 0; i < zgp->zg_size; i++) {
289 current = zgp->zg_hash[i];
290 while (current != NULL) {
291 next = current->zv_next;
292 zfs_vertex_destroy(current);
293 current = next;
294 }
295 }
296
297 free(zgp->zg_hash);
298 free(zgp);
299 }
300
301 /*
302 * Graph hash function. Classic bernstein k=33 hash function, taken from
303 * usr/src/cmd/sgs/tools/common/strhash.c
304 */
305 static size_t
306 zfs_graph_hash(zfs_graph_t *zgp, const char *str)
307 {
308 size_t hash = 5381;
309 int c;
310
311 while ((c = *str++) != 0)
312 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
313
314 return (hash % zgp->zg_size);
315 }
316
317 /*
318 * Given a dataset name, finds the associated vertex, creating it if necessary.
319 */
320 static zfs_vertex_t *
321 zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
322 uint64_t txg)
323 {
324 size_t idx = zfs_graph_hash(zgp, dataset);
325 zfs_vertex_t *zvp;
326
327 for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
328 if (strcmp(zvp->zv_dataset, dataset) == 0) {
329 if (zvp->zv_txg == 0)
330 zvp->zv_txg = txg;
331 return (zvp);
332 }
333 }
334
335 if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
336 return (NULL);
337
338 zvp->zv_next = zgp->zg_hash[idx];
339 zvp->zv_txg = txg;
340 zgp->zg_hash[idx] = zvp;
341 zgp->zg_nvertex++;
342
343 return (zvp);
344 }
345
346 /*
347 * Given two dataset names, create an edge between them. For the source vertex,
348 * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
349 * created it as a destination of another edge. If 'dest' is NULL, then this
350 * is an individual vertex (i.e. the starting vertex), so don't add an edge.
351 */
352 static int
353 zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
354 const char *dest, uint64_t txg)
355 {
356 zfs_vertex_t *svp, *dvp;
357
358 if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
359 return (-1);
360 svp->zv_visited = VISIT_SEEN;
361 if (dest != NULL) {
362 dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
363 if (dvp == NULL)
364 return (-1);
365 if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
366 return (-1);
367 }
368
369 return (0);
370 }
371
372 /*
373 * Iterate over all children of the given dataset, adding any vertices
374 * as necessary. Returns -1 if there was an error, or 0 otherwise.
375 * This is a simple recursive algorithm - the ZFS namespace typically
376 * is very flat. We manually invoke the necessary ioctl() calls to
377 * avoid the overhead and additional semantics of zfs_open().
378 */
379 static int
380 iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
381 {
382 zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
383 zfs_vertex_t *zvp;
384
385 /*
386 * Look up the source vertex, and avoid it if we've seen it before.
387 */
388 zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
389 if (zvp == NULL)
390 return (-1);
391 if (zvp->zv_visited == VISIT_SEEN)
392 return (0);
393
394 /*
395 * Iterate over all children
396 */
397 for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
398 ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
399 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
400 /*
401 * Get statistics for this dataset, to determine the type of the
402 * dataset and clone statistics. If this fails, the dataset has
403 * since been removed, and we're pretty much screwed anyway.
404 */
405 zc.zc_objset_stats.dds_origin[0] = '\0';
406 if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
407 continue;
408
409 if (zc.zc_objset_stats.dds_origin[0] != '\0') {
410 if (zfs_graph_add(hdl, zgp,
411 zc.zc_objset_stats.dds_origin, zc.zc_name,
412 zc.zc_objset_stats.dds_creation_txg) != 0)
413 return (-1);
414 /*
415 * Count origins only if they are contained in the graph
416 */
417 if (isa_child_of(zc.zc_objset_stats.dds_origin,
418 zgp->zg_root))
419 zgp->zg_clone_count--;
420 }
421
422 /*
423 * Add an edge between the parent and the child.
424 */
425 if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
426 zc.zc_objset_stats.dds_creation_txg) != 0)
427 return (-1);
428
429 /*
430 * Recursively visit child
431 */
432 if (iterate_children(hdl, zgp, zc.zc_name))
433 return (-1);
434 }
435
436 /*
437 * Now iterate over all snapshots.
438 */
439 bzero(&zc, sizeof (zc));
440
441 for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
442 ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
443 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
444
445 /*
446 * Get statistics for this dataset, to determine the type of the
447 * dataset and clone statistics. If this fails, the dataset has
448 * since been removed, and we're pretty much screwed anyway.
449 */
450 if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
451 continue;
452
453 /*
454 * Add an edge between the parent and the child.
455 */
456 if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
457 zc.zc_objset_stats.dds_creation_txg) != 0)
458 return (-1);
459
460 zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
461 }
462
463 zvp->zv_visited = VISIT_SEEN;
464
465 return (0);
466 }
467
468 /*
469 * Returns false if there are no snapshots with dependent clones in this
470 * subtree or if all of those clones are also in this subtree. Returns
471 * true if there is an error or there are external dependents.
472 */
473 static boolean_t
474 external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
475 {
476 zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
477
478 /*
479 * Check whether this dataset is a clone or has clones since
480 * iterate_children() only checks the children.
481 */
482 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
483 if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
484 return (B_TRUE);
485
486 if (zc.zc_objset_stats.dds_origin[0] != '\0') {
487 if (zfs_graph_add(hdl, zgp,
488 zc.zc_objset_stats.dds_origin, zc.zc_name,
489 zc.zc_objset_stats.dds_creation_txg) != 0)
490 return (B_TRUE);
491 if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
492 zgp->zg_clone_count--;
493 }
494
495 if ((zc.zc_objset_stats.dds_num_clones) ||
496 iterate_children(hdl, zgp, dataset))
497 return (B_TRUE);
498
499 return (zgp->zg_clone_count != 0);
500 }
501
502 /*
503 * Construct a complete graph of all necessary vertices. First, iterate over
504 * only our object's children. If no cloned snapshots are found, or all of
505 * the cloned snapshots are in this subtree then return a graph of the subtree.
506 * Otherwise, start at the root of the pool and iterate over all datasets.
507 */
508 static zfs_graph_t *
509 construct_graph(libzfs_handle_t *hdl, const char *dataset)
510 {
511 zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
512 int ret = 0;
513
514 if (zgp == NULL)
515 return (zgp);
516
517 if ((strchr(dataset, '/') == NULL) ||
518 (external_dependents(hdl, zgp, dataset))) {
519 /*
520 * Determine pool name and try again.
521 */
522 int len = strcspn(dataset, "/@") + 1;
523 char *pool = zfs_alloc(hdl, len);
524
525 if (pool == NULL) {
526 zfs_graph_destroy(zgp);
527 return (NULL);
528 }
529 (void) strlcpy(pool, dataset, len);
530
531 if (iterate_children(hdl, zgp, pool) == -1 ||
532 zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
533 free(pool);
534 zfs_graph_destroy(zgp);
535 return (NULL);
536 }
537 free(pool);
538 }
539
540 if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
541 zfs_graph_destroy(zgp);
542 return (NULL);
543 }
544
545 return (zgp);
546 }
547
548 /*
549 * Given a graph, do a recursive topological sort into the given array. This is
550 * really just a depth first search, so that the deepest nodes appear first.
551 * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
552 */
553 static int
554 topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
555 size_t *idx, zfs_vertex_t *zgv)
556 {
557 int i;
558
559 if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
560 /*
561 * If we've already seen this vertex as part of our depth-first
562 * search, then we have a cyclic dependency, and we must return
563 * an error.
564 */
565 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
566 "recursive dependency at '%s'"),
567 zgv->zv_dataset);
568 return (zfs_error(hdl, EZFS_RECURSIVE,
569 dgettext(TEXT_DOMAIN,
570 "cannot determine dependent datasets")));
571 } else if (zgv->zv_visited >= VISIT_SORT_PRE) {
572 /*
573 * If we've already processed this as part of the topological
574 * sort, then don't bother doing so again.
575 */
576 return (0);
577 }
578
579 zgv->zv_visited = VISIT_SORT_PRE;
580
581 /* avoid doing a search if we don't have to */
582 zfs_vertex_sort_edges(zgv);
583 for (i = 0; i < zgv->zv_edgecount; i++) {
584 if (topo_sort(hdl, allowrecursion, result, idx,
585 zgv->zv_edges[i]->ze_dest) != 0)
586 return (-1);
587 }
588
589 /* we may have visited this in the course of the above */
590 if (zgv->zv_visited == VISIT_SORT_POST)
591 return (0);
592
593 if ((result[*idx] = zfs_alloc(hdl,
594 strlen(zgv->zv_dataset) + 1)) == NULL)
595 return (-1);
596
597 (void) strcpy(result[*idx], zgv->zv_dataset);
598 *idx += 1;
599 zgv->zv_visited = VISIT_SORT_POST;
600 return (0);
601 }
602
603 /*
604 * The only public interface for this file. Do the dirty work of constructing a
605 * child list for the given object. Construct the graph, do the toplogical
606 * sort, and then return the array of strings to the caller.
607 *
608 * The 'allowrecursion' parameter controls behavior when cycles are found. If
609 * it is set, the the cycle is ignored and the results returned as if the cycle
610 * did not exist. If it is not set, then the routine will generate an error if
611 * a cycle is found.
612 */
613 int
614 get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
615 const char *dataset, char ***result, size_t *count)
616 {
617 zfs_graph_t *zgp;
618 zfs_vertex_t *zvp;
619
620 if ((zgp = construct_graph(hdl, dataset)) == NULL)
621 return (-1);
622
623 if ((*result = zfs_alloc(hdl,
624 zgp->zg_nvertex * sizeof (char *))) == NULL) {
625 zfs_graph_destroy(zgp);
626 return (-1);
627 }
628
629 if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
630 free(*result);
631 zfs_graph_destroy(zgp);
632 return (-1);
633 }
634
635 *count = 0;
636 if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
637 free(*result);
638 zfs_graph_destroy(zgp);
639 return (-1);
640 }
641
642 /*
643 * Get rid of the last entry, which is our starting vertex and not
644 * strictly a dependent.
645 */
646 assert(*count > 0);
647 free((*result)[*count - 1]);
648 (*count)--;
649
650 zfs_graph_destroy(zgp);
651
652 return (0);
653 }