lib/libzfs/libzfs_graph.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 /*
  27  * Iterate over all children of the current object.  This includes the normal
  28  * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
  29  * walk all datasets in the pool, and construct a directed graph of the form:
  30  *
  31  *                      home
  32  *                        |
  33  *                   +----+----+
  34  *                   |         |
  35  *                   v         v             ws
  36  *                  bar       baz             |
  37  *                             |              |
  38  *                             v              v
  39  *                          @yesterday ----> foo
  40  *
  41  * In order to construct this graph, we have to walk every dataset in the pool,
  42  * because the clone parent is stored as a property of the child, not the
  43  * parent.  The parent only keeps track of the number of clones.
  44  *
  45  * In the normal case (without clones) this would be rather expensive.  To avoid
  46  * unnecessary computation, we first try a walk of the subtree hierarchy
  47  * starting from the initial node.  At each dataset, we construct a node in the
  48  * graph and an edge leading from its parent.  If we don't see any snapshots
  49  * with a non-zero clone count, then we are finished.
  50  *
  51  * If we do find a cloned snapshot, then we finish the walk of the current
  52  * subtree, but indicate that we need to do a complete walk.  We then perform a
  53  * global walk of all datasets, avoiding the subtree we already processed.
  54  *
  55  * At the end of this, we'll end up with a directed graph of all relevant (and
  56  * possible some irrelevant) datasets in the system.  We need to both find our
  57  * limiting subgraph and determine a safe ordering in which to destroy the
  58  * datasets.  We do a topological ordering of our graph starting at our target
  59  * dataset, and then walk the results in reverse.
  60  *
  61  * It's possible for the graph to have cycles if, for example, the user renames
  62  * a clone to be the parent of its origin snapshot.  The user can request to
  63  * generate an error in this case, or ignore the cycle and continue.
  64  *
  65  * When removing datasets, we want to destroy the snapshots in chronological
  66  * order (because this is the most efficient method).  In order to accomplish
  67  * this, we store the creation transaction group with each vertex and keep each
  68  * vertex's edges sorted according to this value.  The topological sort will
  69  * automatically walk the snapshots in the correct order.
  70  */
  71
  72 #include <assert.h>
  73 #include <libintl.h>
  74 #include <stdio.h>
  75 #include <stdlib.h>
  76 #include <string.h>
  77 #include <strings.h>
  78 #include <unistd.h>
  79
  80 #include <libzfs.h>
  81
  82 #include "libzfs_impl.h"
  83 #include "zfs_namecheck.h"
  84
  85 #define MIN_EDGECOUNT   4
  86
  87 /*
  88  * Vertex structure.  Indexed by dataset name, this structure maintains a list
  89  * of edges to other vertices.
  90  */
  91 struct zfs_edge;
  92 typedef struct zfs_vertex {
  93         char                    zv_dataset[ZFS_MAXNAMELEN];
  94         struct zfs_vertex       *zv_next;
  95         int                     zv_visited;
  96         uint64_t                zv_txg;
  97         struct zfs_edge         **zv_edges;
  98         int                     zv_edgecount;
  99         int                     zv_edgealloc;
 100 } zfs_vertex_t;
 101
 102 enum {
 103         VISIT_SEEN = 1,
 104         VISIT_SORT_PRE,
 105         VISIT_SORT_POST
 106 };
 107
 108 /*
 109  * Edge structure.  Simply maintains a pointer to the destination vertex.  There
 110  * is no need to store the source vertex, since we only use edges in the context
 111  * of the source vertex.
 112  */
 113 typedef struct zfs_edge {
 114         zfs_vertex_t            *ze_dest;
 115         struct zfs_edge         *ze_next;
 116 } zfs_edge_t;
 117
 118 #define ZFS_GRAPH_SIZE          1027    /* this could be dynamic some day */
 119
 120 /*
 121  * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
 122  */
 123 typedef struct zfs_graph {
 124         zfs_vertex_t            **zg_hash;
 125         size_t                  zg_size;
 126         size_t                  zg_nvertex;
 127         const char              *zg_root;
 128         int                     zg_clone_count;
 129 } zfs_graph_t;
 130
 131 /*
 132  * Allocate a new edge pointing to the target vertex.
 133  */
 134 static zfs_edge_t *
 135 zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
 136 {
 137         zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
 138
 139         if (zep == NULL)
 140                 return (NULL);
 141
 142         zep->ze_dest = dest;
 143
 144         return (zep);
 145 }
 146
 147 /*
 148  * Destroy an edge.
 149  */
 150 static void
 151 zfs_edge_destroy(zfs_edge_t *zep)
 152 {
 153         free(zep);
 154 }
 155
 156 /*
 157  * Allocate a new vertex with the given name.
 158  */
 159 static zfs_vertex_t *
 160 zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
 161 {
 162         zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
 163
 164         if (zvp == NULL)
 165                 return (NULL);
 166
 167         assert(strlen(dataset) < ZFS_MAXNAMELEN);
 168
 169         (void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
 170
 171         if ((zvp->zv_edges = zfs_alloc(hdl,
 172             MIN_EDGECOUNT * sizeof (void *))) == NULL) {
 173                 free(zvp);
 174                 return (NULL);
 175         }
 176
 177         zvp->zv_edgealloc = MIN_EDGECOUNT;
 178
 179         return (zvp);
 180 }
 181
 182 /*
 183  * Destroy a vertex.  Frees up any associated edges.
 184  */
 185 static void
 186 zfs_vertex_destroy(zfs_vertex_t *zvp)
 187 {
 188         int i;
 189
 190         for (i = 0; i < zvp->zv_edgecount; i++)
 191                 zfs_edge_destroy(zvp->zv_edges[i]);
 192
 193         free(zvp->zv_edges);
 194         free(zvp);
 195 }
 196
 197 /*
 198  * Given a vertex, add an edge to the destination vertex.
 199  */
 200 static int
 201 zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
 202     zfs_vertex_t *dest)
 203 {
 204         zfs_edge_t *zep = zfs_edge_create(hdl, dest);
 205
 206         if (zep == NULL)
 207                 return (-1);
 208
 209         if (zvp->zv_edgecount == zvp->zv_edgealloc) {
 210                 void *ptr;
 211
 212                 if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
 213                     zvp->zv_edgealloc * sizeof (void *),
 214                     zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
 215                         return (-1);
 216
 217                 zvp->zv_edges = ptr;
 218                 zvp->zv_edgealloc *= 2;
 219         }
 220
 221         zvp->zv_edges[zvp->zv_edgecount++] = zep;
 222
 223         return (0);
 224 }
 225
 226 static int
 227 zfs_edge_compare(const void *a, const void *b)
 228 {
 229         const zfs_edge_t *ea = *((zfs_edge_t **)a);
 230         const zfs_edge_t *eb = *((zfs_edge_t **)b);
 231
 232         if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
 233                 return (-1);
 234         if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
 235                 return (1);
 236         return (0);
 237 }
 238
 239 /*
 240  * Sort the given vertex edges according to the creation txg of each vertex.
 241  */
 242 static void
 243 zfs_vertex_sort_edges(zfs_vertex_t *zvp)
 244 {
 245         if (zvp->zv_edgecount == 0)
 246                 return;
 247
 248         qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
 249             zfs_edge_compare);
 250 }
 251
 252 /*
 253  * Construct a new graph object.  We allow the size to be specified as a
 254  * parameter so in the future we can size the hash according to the number of
 255  * datasets in the pool.
 256  */
 257 static zfs_graph_t *
 258 zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
 259 {
 260         zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
 261
 262         if (zgp == NULL)
 263                 return (NULL);
 264
 265         zgp->zg_size = size;
 266         if ((zgp->zg_hash = zfs_alloc(hdl,
 267             size * sizeof (zfs_vertex_t *))) == NULL) {
 268                 free(zgp);
 269                 return (NULL);
 270         }
 271
 272         zgp->zg_root = dataset;
 273         zgp->zg_clone_count = 0;
 274
 275         return (zgp);
 276 }
 277
 278 /*
 279  * Destroy a graph object.  We have to iterate over all the hash chains,
 280  * destroying each vertex in the process.
 281  */
 282 static void
 283 zfs_graph_destroy(zfs_graph_t *zgp)
 284 {
 285         int i;
 286         zfs_vertex_t *current, *next;
 287
 288         for (i = 0; i < zgp->zg_size; i++) {
 289                 current = zgp->zg_hash[i];
 290                 while (current != NULL) {
 291                         next = current->zv_next;
 292                         zfs_vertex_destroy(current);
 293                         current = next;
 294                 }
 295         }
 296
 297         free(zgp->zg_hash);
 298         free(zgp);
 299 }
 300
 301 /*
 302  * Graph hash function.  Classic bernstein k=33 hash function, taken from
 303  * usr/src/cmd/sgs/tools/common/strhash.c
 304  */
 305 static size_t
 306 zfs_graph_hash(zfs_graph_t *zgp, const char *str)
 307 {
 308         size_t hash = 5381;
 309         int c;
 310
 311         while ((c = *str++) != 0)
 312                 hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
 313
 314         return (hash % zgp->zg_size);
 315 }
 316
 317 /*
 318  * Given a dataset name, finds the associated vertex, creating it if necessary.
 319  */
 320 static zfs_vertex_t *
 321 zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
 322     uint64_t txg)
 323 {
 324         size_t idx = zfs_graph_hash(zgp, dataset);
 325         zfs_vertex_t *zvp;
 326
 327         for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
 328                 if (strcmp(zvp->zv_dataset, dataset) == 0) {
 329                         if (zvp->zv_txg == 0)
 330                                 zvp->zv_txg = txg;
 331                         return (zvp);
 332                 }
 333         }
 334
 335         if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
 336                 return (NULL);
 337
 338         zvp->zv_next = zgp->zg_hash[idx];
 339         zvp->zv_txg = txg;
 340         zgp->zg_hash[idx] = zvp;
 341         zgp->zg_nvertex++;
 342
 343         return (zvp);
 344 }
 345
 346 /*
 347  * Given two dataset names, create an edge between them.  For the source vertex,
 348  * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
 349  * created it as a destination of another edge.  If 'dest' is NULL, then this
 350  * is an individual vertex (i.e. the starting vertex), so don't add an edge.
 351  */
 352 static int
 353 zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
 354     const char *dest, uint64_t txg)
 355 {
 356         zfs_vertex_t *svp, *dvp;
 357
 358         if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
 359                 return (-1);
 360         svp->zv_visited = VISIT_SEEN;
 361         if (dest != NULL) {
 362                 dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
 363                 if (dvp == NULL)
 364                         return (-1);
 365                 if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
 366                         return (-1);
 367         }
 368
 369         return (0);
 370 }
 371
 372 /*
 373  * Iterate over all children of the given dataset, adding any vertices
 374  * as necessary.  Returns -1 if there was an error, or 0 otherwise.
 375  * This is a simple recursive algorithm - the ZFS namespace typically
 376  * is very flat.  We manually invoke the necessary ioctl() calls to
 377  * avoid the overhead and additional semantics of zfs_open().
 378  */
 379 static int
 380 iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 381 {
 382         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
 383         zfs_vertex_t *zvp;
 384
 385         /*
 386          * Look up the source vertex, and avoid it if we've seen it before.
 387          */
 388         zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
 389         if (zvp == NULL)
 390                 return (-1);
 391         if (zvp->zv_visited == VISIT_SEEN)
 392                 return (0);
 393
 394         /*
 395          * Iterate over all children
 396          */
 397         for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 398             ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
 399             (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
 400                 /*
 401                  * Get statistics for this dataset, to determine the type of the
 402                  * dataset and clone statistics.  If this fails, the dataset has
 403                  * since been removed, and we're pretty much screwed anyway.
 404                  */
 405                 zc.zc_objset_stats.dds_origin[0] = '\0';
 406                 if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 407                         continue;
 408
 409                 if (zc.zc_objset_stats.dds_origin[0] != '\0') {
 410                         if (zfs_graph_add(hdl, zgp,
 411                             zc.zc_objset_stats.dds_origin, zc.zc_name,
 412                             zc.zc_objset_stats.dds_creation_txg) != 0)
 413                                 return (-1);
 414                         /*
 415                          * Count origins only if they are contained in the graph
 416                          */
 417                         if (isa_child_of(zc.zc_objset_stats.dds_origin,
 418                             zgp->zg_root))
 419                                 zgp->zg_clone_count--;
 420                 }
 421
 422                 /*
 423                  * Add an edge between the parent and the child.
 424                  */
 425                 if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
 426                     zc.zc_objset_stats.dds_creation_txg) != 0)
 427                         return (-1);
 428
 429                 /*
 430                  * Recursively visit child
 431                  */
 432                 if (iterate_children(hdl, zgp, zc.zc_name))
 433                         return (-1);
 434         }
 435
 436         /*
 437          * Now iterate over all snapshots.
 438          */
 439         bzero(&zc, sizeof (zc));
 440
 441         for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 442             ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
 443             (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
 444
 445                 /*
 446                  * Get statistics for this dataset, to determine the type of the
 447                  * dataset and clone statistics.  If this fails, the dataset has
 448                  * since been removed, and we're pretty much screwed anyway.
 449                  */
 450                 if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 451                         continue;
 452
 453                 /*
 454                  * Add an edge between the parent and the child.
 455                  */
 456                 if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
 457                     zc.zc_objset_stats.dds_creation_txg) != 0)
 458                         return (-1);
 459
 460                 zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
 461         }
 462
 463         zvp->zv_visited = VISIT_SEEN;
 464
 465         return (0);
 466 }
 467
 468 /*
 469  * Returns false if there are no snapshots with dependent clones in this
 470  * subtree or if all of those clones are also in this subtree.  Returns
 471  * true if there is an error or there are external dependents.
 472  */
 473 static boolean_t
 474 external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
 475 {
 476         zfs_cmd_t zc = { "\0", "\0", "\0", "\0", 0 };
 477
 478         /*
 479          * Check whether this dataset is a clone or has clones since
 480          * iterate_children() only checks the children.
 481          */
 482         (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 483         if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
 484                 return (B_TRUE);
 485
 486         if (zc.zc_objset_stats.dds_origin[0] != '\0') {
 487                 if (zfs_graph_add(hdl, zgp,
 488                     zc.zc_objset_stats.dds_origin, zc.zc_name,
 489                     zc.zc_objset_stats.dds_creation_txg) != 0)
 490                         return (B_TRUE);
 491                 if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
 492                         zgp->zg_clone_count--;
 493         }
 494
 495         if ((zc.zc_objset_stats.dds_num_clones) ||
 496             iterate_children(hdl, zgp, dataset))
 497                 return (B_TRUE);
 498
 499         return (zgp->zg_clone_count != 0);
 500 }
 501
 502 /*
 503  * Construct a complete graph of all necessary vertices.  First, iterate over
 504  * only our object's children.  If no cloned snapshots are found, or all of
 505  * the cloned snapshots are in this subtree then return a graph of the subtree.
 506  * Otherwise, start at the root of the pool and iterate over all datasets.
 507  */
 508 static zfs_graph_t *
 509 construct_graph(libzfs_handle_t *hdl, const char *dataset)
 510 {
 511         zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
 512         int ret = 0;
 513
 514         if (zgp == NULL)
 515                 return (zgp);
 516
 517         if ((strchr(dataset, '/') == NULL) ||
 518             (external_dependents(hdl, zgp, dataset))) {
 519                 /*
 520                  * Determine pool name and try again.
 521                  */
 522                 int len = strcspn(dataset, "/@") + 1;
 523                 char *pool = zfs_alloc(hdl, len);
 524
 525                 if (pool == NULL) {
 526                         zfs_graph_destroy(zgp);
 527                         return (NULL);
 528                 }
 529                 (void) strlcpy(pool, dataset, len);
 530
 531                 if (iterate_children(hdl, zgp, pool) == -1 ||
 532                     zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
 533                         free(pool);
 534                         zfs_graph_destroy(zgp);
 535                         return (NULL);
 536                 }
 537                 free(pool);
 538         }
 539
 540         if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
 541                 zfs_graph_destroy(zgp);
 542                 return (NULL);
 543         }
 544
 545         return (zgp);
 546 }
 547
 548 /*
 549  * Given a graph, do a recursive topological sort into the given array.  This is
 550  * really just a depth first search, so that the deepest nodes appear first.
 551  * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
 552  */
 553 static int
 554 topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
 555     size_t *idx, zfs_vertex_t *zgv)
 556 {
 557         int i;
 558
 559         if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
 560                 /*
 561                  * If we've already seen this vertex as part of our depth-first
 562                  * search, then we have a cyclic dependency, and we must return
 563                  * an error.
 564                  */
 565                 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 566                     "recursive dependency at '%s'"),
 567                     zgv->zv_dataset);
 568                 return (zfs_error(hdl, EZFS_RECURSIVE,
 569                     dgettext(TEXT_DOMAIN,
 570                     "cannot determine dependent datasets")));
 571         } else if (zgv->zv_visited >= VISIT_SORT_PRE) {
 572                 /*
 573                  * If we've already processed this as part of the topological
 574                  * sort, then don't bother doing so again.
 575                  */
 576                 return (0);
 577         }
 578
 579         zgv->zv_visited = VISIT_SORT_PRE;
 580
 581         /* avoid doing a search if we don't have to */
 582         zfs_vertex_sort_edges(zgv);
 583         for (i = 0; i < zgv->zv_edgecount; i++) {
 584                 if (topo_sort(hdl, allowrecursion, result, idx,
 585                     zgv->zv_edges[i]->ze_dest) != 0)
 586                         return (-1);
 587         }
 588
 589         /* we may have visited this in the course of the above */
 590         if (zgv->zv_visited == VISIT_SORT_POST)
 591                 return (0);
 592
 593         if ((result[*idx] = zfs_alloc(hdl,
 594             strlen(zgv->zv_dataset) + 1)) == NULL)
 595                 return (-1);
 596
 597         (void) strcpy(result[*idx], zgv->zv_dataset);
 598         *idx += 1;
 599         zgv->zv_visited = VISIT_SORT_POST;
 600         return (0);
 601 }
 602
 603 /*
 604  * The only public interface for this file.  Do the dirty work of constructing a
 605  * child list for the given object.  Construct the graph, do the toplogical
 606  * sort, and then return the array of strings to the caller.
 607  *
 608  * The 'allowrecursion' parameter controls behavior when cycles are found.  If
 609  * it is set, the the cycle is ignored and the results returned as if the cycle
 610  * did not exist.  If it is not set, then the routine will generate an error if
 611  * a cycle is found.
 612  */
 613 int
 614 get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
 615     const char *dataset, char ***result, size_t *count)
 616 {
 617         zfs_graph_t *zgp;
 618         zfs_vertex_t *zvp;
 619
 620         if ((zgp = construct_graph(hdl, dataset)) == NULL)
 621                 return (-1);
 622
 623         if ((*result = zfs_alloc(hdl,
 624             zgp->zg_nvertex * sizeof (char *))) == NULL) {
 625                 zfs_graph_destroy(zgp);
 626                 return (-1);
 627         }
 628
 629         if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
 630                 free(*result);
 631                 zfs_graph_destroy(zgp);
 632                 return (-1);
 633         }
 634
 635         *count = 0;
 636         if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
 637                 free(*result);
 638                 zfs_graph_destroy(zgp);
 639                 return (-1);
 640         }
 641
 642         /*
 643          * Get rid of the last entry, which is our starting vertex and not
 644          * strictly a dependent.
 645          */
 646         assert(*count > 0);
 647         free((*result)[*count - 1]);
 648         (*count)--;
 649
 650         zfs_graph_destroy(zgp);
 651
 652         return (0);
 653 }