[mirror_zfs-debian.git] / zfs / lib / libzfs / libzfs_graph.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * Iterate over all children of the current object.  This includes the normal
 * dataset hierarchy, but also arbitrary hierarchies due to clones.  We want to
 * walk all datasets in the pool, and construct a directed graph of the form:
 *
 * 			home
 *                        |
 *                   +----+----+
 *                   |         |
 *                   v         v             ws
 *                  bar       baz             |
 *                             |              |
 *                             v              v
 *                          @yesterday ----> foo
 *
 * In order to construct this graph, we have to walk every dataset in the pool,
 * because the clone parent is stored as a property of the child, not the
 * parent.  The parent only keeps track of the number of clones.
 *
 * In the normal case (without clones) this would be rather expensive.  To avoid
 * unnecessary computation, we first try a walk of the subtree hierarchy
 * starting from the initial node.  At each dataset, we construct a node in the
 * graph and an edge leading from its parent.  If we don't see any snapshots
 * with a non-zero clone count, then we are finished.
 *
 * If we do find a cloned snapshot, then we finish the walk of the current
 * subtree, but indicate that we need to do a complete walk.  We then perform a
 * global walk of all datasets, avoiding the subtree we already processed.
 *
 * At the end of this, we'll end up with a directed graph of all relevant (and
 * possible some irrelevant) datasets in the system.  We need to both find our
 * limiting subgraph and determine a safe ordering in which to destroy the
 * datasets.  We do a topological ordering of our graph starting at our target
 * dataset, and then walk the results in reverse.
 *
 * It's possible for the graph to have cycles if, for example, the user renames
 * a clone to be the parent of its origin snapshot.  The user can request to
 * generate an error in this case, or ignore the cycle and continue.
 *
 * When removing datasets, we want to destroy the snapshots in chronological
 * order (because this is the most efficient method).  In order to accomplish
 * this, we store the creation transaction group with each vertex and keep each
 * vertex's edges sorted according to this value.  The topological sort will
 * automatically walk the snapshots in the correct order.
 */

#include <assert.h>
#include <libintl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>

#include <libzfs.h>

#include "libzfs_impl.h"
#include "zfs_namecheck.h"

#define	MIN_EDGECOUNT	4

/*
 * Vertex structure.  Indexed by dataset name, this structure maintains a list
 * of edges to other vertices.
 */
struct zfs_edge;
typedef struct zfs_vertex {
	char			zv_dataset[ZFS_MAXNAMELEN];
	struct zfs_vertex	*zv_next;
	int			zv_visited;
	uint64_t		zv_txg;
	struct zfs_edge		**zv_edges;
	int			zv_edgecount;
	int			zv_edgealloc;
} zfs_vertex_t;

enum {
	VISIT_SEEN = 1,
	VISIT_SORT_PRE,
	VISIT_SORT_POST
};

/*
 * Edge structure.  Simply maintains a pointer to the destination vertex.  There
 * is no need to store the source vertex, since we only use edges in the context
 * of the source vertex.
 */
typedef struct zfs_edge {
	zfs_vertex_t		*ze_dest;
	struct zfs_edge		*ze_next;
} zfs_edge_t;

#define	ZFS_GRAPH_SIZE		1027	/* this could be dynamic some day */

/*
 * Graph structure.  Vertices are maintained in a hash indexed by dataset name.
 */
typedef struct zfs_graph {
	zfs_vertex_t		**zg_hash;
	size_t			zg_size;
	size_t			zg_nvertex;
	const char		*zg_root;
	int			zg_clone_count;
} zfs_graph_t;

/*
 * Allocate a new edge pointing to the target vertex.
 */
static zfs_edge_t *
zfs_edge_create(libzfs_handle_t *hdl, zfs_vertex_t *dest)
{
	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));

	if (zep == NULL)
		return (NULL);

	zep->ze_dest = dest;

	return (zep);
}

/*
 * Destroy an edge.
 */
static void
zfs_edge_destroy(zfs_edge_t *zep)
{
	free(zep);
}

/*
 * Allocate a new vertex with the given name.
 */
static zfs_vertex_t *
zfs_vertex_create(libzfs_handle_t *hdl, const char *dataset)
{
	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));

	if (zvp == NULL)
		return (NULL);

	assert(strlen(dataset) < ZFS_MAXNAMELEN);

	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));

	if ((zvp->zv_edges = zfs_alloc(hdl,
	    MIN_EDGECOUNT * sizeof (void *))) == NULL) {
		free(zvp);
		return (NULL);
	}

	zvp->zv_edgealloc = MIN_EDGECOUNT;

	return (zvp);
}

/*
 * Destroy a vertex.  Frees up any associated edges.
 */
static void
zfs_vertex_destroy(zfs_vertex_t *zvp)
{
	int i;

	for (i = 0; i < zvp->zv_edgecount; i++)
		zfs_edge_destroy(zvp->zv_edges[i]);

	free(zvp->zv_edges);
	free(zvp);
}

/*
 * Given a vertex, add an edge to the destination vertex.
 */
static int
zfs_vertex_add_edge(libzfs_handle_t *hdl, zfs_vertex_t *zvp,
    zfs_vertex_t *dest)
{
	zfs_edge_t *zep = zfs_edge_create(hdl, dest);

	if (zep == NULL)
		return (-1);

	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
		void *ptr;

		if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
		    zvp->zv_edgealloc * sizeof (void *),
		    zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
			return (-1);

		zvp->zv_edges = ptr;
		zvp->zv_edgealloc *= 2;
	}

	zvp->zv_edges[zvp->zv_edgecount++] = zep;

	return (0);
}

static int
zfs_edge_compare(const void *a, const void *b)
{
	const zfs_edge_t *ea = *((zfs_edge_t **)a);
	const zfs_edge_t *eb = *((zfs_edge_t **)b);

	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
		return (-1);
	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
		return (1);
	return (0);
}

/*
 * Sort the given vertex edges according to the creation txg of each vertex.
 */
static void
zfs_vertex_sort_edges(zfs_vertex_t *zvp)
{
	if (zvp->zv_edgecount == 0)
		return;

	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
	    zfs_edge_compare);
}

/*
 * Construct a new graph object.  We allow the size to be specified as a
 * parameter so in the future we can size the hash according to the number of
 * datasets in the pool.
 */
static zfs_graph_t *
zfs_graph_create(libzfs_handle_t *hdl, const char *dataset, size_t size)
{
	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));

	if (zgp == NULL)
		return (NULL);

	zgp->zg_size = size;
	if ((zgp->zg_hash = zfs_alloc(hdl,
	    size * sizeof (zfs_vertex_t *))) == NULL) {
		free(zgp);
		return (NULL);
	}

	zgp->zg_root = dataset;
	zgp->zg_clone_count = 0;

	return (zgp);
}

/*
 * Destroy a graph object.  We have to iterate over all the hash chains,
 * destroying each vertex in the process.
 */
static void
zfs_graph_destroy(zfs_graph_t *zgp)
{
	int i;
	zfs_vertex_t *current, *next;

	for (i = 0; i < zgp->zg_size; i++) {
		current = zgp->zg_hash[i];
		while (current != NULL) {
			next = current->zv_next;
			zfs_vertex_destroy(current);
			current = next;
		}
	}

	free(zgp->zg_hash);
	free(zgp);
}

/*
 * Graph hash function.  Classic bernstein k=33 hash function, taken from
 * usr/src/cmd/sgs/tools/common/strhash.c
 */
static size_t
zfs_graph_hash(zfs_graph_t *zgp, const char *str)
{
	size_t hash = 5381;
	int c;

	while ((c = *str++) != 0)
		hash = ((hash << 5) + hash) + c; /* hash * 33 + c */

	return (hash % zgp->zg_size);
}

/*
 * Given a dataset name, finds the associated vertex, creating it if necessary.
 */
static zfs_vertex_t *
zfs_graph_lookup(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset,
    uint64_t txg)
{
	size_t idx = zfs_graph_hash(zgp, dataset);
	zfs_vertex_t *zvp;

	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
		if (strcmp(zvp->zv_dataset, dataset) == 0) {
			if (zvp->zv_txg == 0)
				zvp->zv_txg = txg;
			return (zvp);
		}
	}

	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
		return (NULL);

	zvp->zv_next = zgp->zg_hash[idx];
	zvp->zv_txg = txg;
	zgp->zg_hash[idx] = zvp;
	zgp->zg_nvertex++;

	return (zvp);
}

/*
 * Given two dataset names, create an edge between them.  For the source vertex,
 * mark 'zv_visited' to indicate that we have seen this vertex, and not simply
 * created it as a destination of another edge.  If 'dest' is NULL, then this
 * is an individual vertex (i.e. the starting vertex), so don't add an edge.
 */
static int
zfs_graph_add(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *source,
    const char *dest, uint64_t txg)
{
	zfs_vertex_t *svp, *dvp;

	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
		return (-1);
	svp->zv_visited = VISIT_SEEN;
	if (dest != NULL) {
		dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
		if (dvp == NULL)
			return (-1);
		if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
			return (-1);
	}

	return (0);
}

/*
 * Iterate over all children of the given dataset, adding any vertices
 * as necessary.  Returns -1 if there was an error, or 0 otherwise.
 * This is a simple recursive algorithm - the ZFS namespace typically
 * is very flat.  We manually invoke the necessary ioctl() calls to
 * avoid the overhead and additional semantics of zfs_open().
 */
static int
iterate_children(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
{
	zfs_cmd_t zc = { 0 };
	zfs_vertex_t *zvp;

	/*
	 * Look up the source vertex, and avoid it if we've seen it before.
	 */
	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
	if (zvp == NULL)
		return (-1);
	if (zvp->zv_visited == VISIT_SEEN)
		return (0);

	/*
	 * Iterate over all children
	 */
	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
	    ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {

		/*
		 * Ignore private dataset names.
		 */
		if (dataset_name_hidden(zc.zc_name))
			continue;

		/*
		 * Get statistics for this dataset, to determine the type of the
		 * dataset and clone statistics.  If this fails, the dataset has
		 * since been removed, and we're pretty much screwed anyway.
		 */
		zc.zc_objset_stats.dds_origin[0] = '\0';
		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
			continue;

		if (zc.zc_objset_stats.dds_origin[0] != '\0') {
			if (zfs_graph_add(hdl, zgp,
			    zc.zc_objset_stats.dds_origin, zc.zc_name,
			    zc.zc_objset_stats.dds_creation_txg) != 0)
				return (-1);
			/*
			 * Count origins only if they are contained in the graph
			 */
			if (isa_child_of(zc.zc_objset_stats.dds_origin,
			    zgp->zg_root))
				zgp->zg_clone_count--;
		}

		/*
		 * Add an edge between the parent and the child.
		 */
		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
		    zc.zc_objset_stats.dds_creation_txg) != 0)
			return (-1);

		/*
		 * Recursively visit child
		 */
		if (iterate_children(hdl, zgp, zc.zc_name))
			return (-1);
	}

	/*
	 * Now iterate over all snapshots.
	 */
	bzero(&zc, sizeof (zc));

	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
	    ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
	    (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {

		/*
		 * Get statistics for this dataset, to determine the type of the
		 * dataset and clone statistics.  If this fails, the dataset has
		 * since been removed, and we're pretty much screwed anyway.
		 */
		if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
			continue;

		/*
		 * Add an edge between the parent and the child.
		 */
		if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
		    zc.zc_objset_stats.dds_creation_txg) != 0)
			return (-1);

		zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
	}

	zvp->zv_visited = VISIT_SEEN;

	return (0);
}

/*
 * Returns false if there are no snapshots with dependent clones in this
 * subtree or if all of those clones are also in this subtree.  Returns
 * true if there is an error or there are external dependents.
 */
static boolean_t
external_dependents(libzfs_handle_t *hdl, zfs_graph_t *zgp, const char *dataset)
{
	zfs_cmd_t zc = { 0 };

	/*
	 * Check whether this dataset is a clone or has clones since
	 * iterate_children() only checks the children.
	 */
	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
		return (B_TRUE);

	if (zc.zc_objset_stats.dds_origin[0] != '\0') {
		if (zfs_graph_add(hdl, zgp,
		    zc.zc_objset_stats.dds_origin, zc.zc_name,
		    zc.zc_objset_stats.dds_creation_txg) != 0)
			return (B_TRUE);
		if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
			zgp->zg_clone_count--;
	}

	if ((zc.zc_objset_stats.dds_num_clones) ||
	    iterate_children(hdl, zgp, dataset))
		return (B_TRUE);

	return (zgp->zg_clone_count != 0);
}

/*
 * Construct a complete graph of all necessary vertices.  First, iterate over
 * only our object's children.  If no cloned snapshots are found, or all of
 * the cloned snapshots are in this subtree then return a graph of the subtree.
 * Otherwise, start at the root of the pool and iterate over all datasets.
 */
static zfs_graph_t *
construct_graph(libzfs_handle_t *hdl, const char *dataset)
{
	zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
	int ret = 0;

	if (zgp == NULL)
		return (zgp);

	if ((strchr(dataset, '/') == NULL) ||
	    (external_dependents(hdl, zgp, dataset))) {
		/*
		 * Determine pool name and try again.
		 */
		int len = strcspn(dataset, "/@") + 1;
		char *pool = zfs_alloc(hdl, len);

		if (pool == NULL) {
			zfs_graph_destroy(zgp);
			return (NULL);
		}
		(void) strlcpy(pool, dataset, len);

		if (iterate_children(hdl, zgp, pool) == -1 ||
		    zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
			free(pool);
			zfs_graph_destroy(zgp);
			return (NULL);
		}
		free(pool);
	}

	if (ret == -1 || zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
		zfs_graph_destroy(zgp);
		return (NULL);
	}

	return (zgp);
}

/*
 * Given a graph, do a recursive topological sort into the given array.  This is
 * really just a depth first search, so that the deepest nodes appear first.
 * hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
 */
static int
topo_sort(libzfs_handle_t *hdl, boolean_t allowrecursion, char **result,
    size_t *idx, zfs_vertex_t *zgv)
{
	int i;

	if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
		/*
		 * If we've already seen this vertex as part of our depth-first
		 * search, then we have a cyclic dependency, and we must return
		 * an error.
		 */
		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
		    "recursive dependency at '%s'"),
		    zgv->zv_dataset);
		return (zfs_error(hdl, EZFS_RECURSIVE,
		    dgettext(TEXT_DOMAIN,
		    "cannot determine dependent datasets")));
	} else if (zgv->zv_visited >= VISIT_SORT_PRE) {
		/*
		 * If we've already processed this as part of the topological
		 * sort, then don't bother doing so again.
		 */
		return (0);
	}

	zgv->zv_visited = VISIT_SORT_PRE;

	/* avoid doing a search if we don't have to */
	zfs_vertex_sort_edges(zgv);
	for (i = 0; i < zgv->zv_edgecount; i++) {
		if (topo_sort(hdl, allowrecursion, result, idx,
		    zgv->zv_edges[i]->ze_dest) != 0)
			return (-1);
	}

	/* we may have visited this in the course of the above */
	if (zgv->zv_visited == VISIT_SORT_POST)
		return (0);

	if ((result[*idx] = zfs_alloc(hdl,
	    strlen(zgv->zv_dataset) + 1)) == NULL)
		return (-1);

	(void) strcpy(result[*idx], zgv->zv_dataset);
	*idx += 1;
	zgv->zv_visited = VISIT_SORT_POST;
	return (0);
}

/*
 * The only public interface for this file.  Do the dirty work of constructing a
 * child list for the given object.  Construct the graph, do the toplogical
 * sort, and then return the array of strings to the caller.
 *
 * The 'allowrecursion' parameter controls behavior when cycles are found.  If
 * it is set, the the cycle is ignored and the results returned as if the cycle
 * did not exist.  If it is not set, then the routine will generate an error if
 * a cycle is found.
 */
int
get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
    const char *dataset, char ***result, size_t *count)
{
	zfs_graph_t *zgp;
	zfs_vertex_t *zvp;

	if ((zgp = construct_graph(hdl, dataset)) == NULL)
		return (-1);

	if ((*result = zfs_alloc(hdl,
	    zgp->zg_nvertex * sizeof (char *))) == NULL) {
		zfs_graph_destroy(zgp);
		return (-1);
	}

	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
		free(*result);
		zfs_graph_destroy(zgp);
		return (-1);
	}

	*count = 0;
	if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
		free(*result);
		zfs_graph_destroy(zgp);
		return (-1);
	}

	/*
	 * Get rid of the last entry, which is our starting vertex and not
	 * strictly a dependent.
	 */
	assert(*count > 0);
	free((*result)[*count - 1]);
	(*count)--;

	zfs_graph_destroy(zgp);

	return (0);
}
Commit	Line	Data
34dc7c2f BB	1	/*
	2	* CDDL HEADER START
	3	*
	4	* The contents of this file are subject to the terms of the
	5	* Common Development and Distribution License (the "License").
	6	* You may not use this file except in compliance with the License.
	7	*
	8	* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
	9	* or http://www.opensolaris.org/os/licensing.
	10	* See the License for the specific language governing permissions
	11	* and limitations under the License.
	12	*
	13	* When distributing Covered Code, include this CDDL HEADER in each
	14	* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
	15	* If applicable, add the following below this CDDL HEADER, with the
	16	* fields enclosed by brackets "[]" replaced with your own identifying
	17	* information: Portions Copyright [yyyy] [name of copyright owner]
	18	*
	19	* CDDL HEADER END
	20	*/
	21	/*
	22	* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
	23	* Use is subject to license terms.
	24	*/
	25
b128c09f	26	#pragma ident "%Z%%M% %I% %E% SMI"
34dc7c2f BB	27
	28	/*
	29	* Iterate over all children of the current object. This includes the normal
	30	* dataset hierarchy, but also arbitrary hierarchies due to clones. We want to
	31	* walk all datasets in the pool, and construct a directed graph of the form:
	32	*
	33	* home
	34	* \|
	35	* +----+----+
	36	* \| \|
	37	* v v ws
	38	* bar baz \|
	39	* \| \|
	40	* v v
	41	* @yesterday ----> foo
	42	*
	43	* In order to construct this graph, we have to walk every dataset in the pool,
	44	* because the clone parent is stored as a property of the child, not the
	45	* parent. The parent only keeps track of the number of clones.
	46	*
	47	* In the normal case (without clones) this would be rather expensive. To avoid
	48	* unnecessary computation, we first try a walk of the subtree hierarchy
	49	* starting from the initial node. At each dataset, we construct a node in the
	50	* graph and an edge leading from its parent. If we don't see any snapshots
	51	* with a non-zero clone count, then we are finished.
	52	*
	53	* If we do find a cloned snapshot, then we finish the walk of the current
	54	* subtree, but indicate that we need to do a complete walk. We then perform a
	55	* global walk of all datasets, avoiding the subtree we already processed.
	56	*
	57	* At the end of this, we'll end up with a directed graph of all relevant (and
	58	* possible some irrelevant) datasets in the system. We need to both find our
	59	* limiting subgraph and determine a safe ordering in which to destroy the
	60	* datasets. We do a topological ordering of our graph starting at our target
	61	* dataset, and then walk the results in reverse.
	62	*
	63	* It's possible for the graph to have cycles if, for example, the user renames
	64	* a clone to be the parent of its origin snapshot. The user can request to
	65	* generate an error in this case, or ignore the cycle and continue.
	66	*
	67	* When removing datasets, we want to destroy the snapshots in chronological
	68	* order (because this is the most efficient method). In order to accomplish
	69	* this, we store the creation transaction group with each vertex and keep each
	70	* vertex's edges sorted according to this value. The topological sort will
	71	* automatically walk the snapshots in the correct order.
	72	*/
	73
	74	#include <assert.h>
	75	#include <libintl.h>
	76	#include <stdio.h>
	77	#include <stdlib.h>
	78	#include <string.h>
	79	#include <strings.h>
	80	#include <unistd.h>
	81
	82	#include <libzfs.h>
	83
	84	#include "libzfs_impl.h"
	85	#include "zfs_namecheck.h"
	86
	87	#define MIN_EDGECOUNT 4
	88
	89	/*
	90	* Vertex structure. Indexed by dataset name, this structure maintains a list
91	* of edges to other vertices.
92	*/
93	struct zfs_edge;
94	typedef struct zfs_vertex {
95	char zv_dataset[ZFS_MAXNAMELEN];
96	struct zfs_vertex *zv_next;
97	int zv_visited;
98	uint64_t zv_txg;
99	struct zfs_edge **zv_edges;
100	int zv_edgecount;
101	int zv_edgealloc;
102	} zfs_vertex_t;
103
104	enum {
105	VISIT_SEEN = 1,
106	VISIT_SORT_PRE,
107	VISIT_SORT_POST
108	};
109
110	/*
111	* Edge structure. Simply maintains a pointer to the destination vertex. There
112	* is no need to store the source vertex, since we only use edges in the context
113	* of the source vertex.
114	*/
115	typedef struct zfs_edge {
116	zfs_vertex_t *ze_dest;
117	struct zfs_edge *ze_next;
118	} zfs_edge_t;
119
120	#define ZFS_GRAPH_SIZE 1027 /* this could be dynamic some day */
121
122	/*
123	* Graph structure. Vertices are maintained in a hash indexed by dataset name.
124	*/
125	typedef struct zfs_graph {
126	zfs_vertex_t **zg_hash;
127	size_t zg_size;
128	size_t zg_nvertex;
129	const char *zg_root;
130	int zg_clone_count;
131	} zfs_graph_t;
132
133	/*
134	* Allocate a new edge pointing to the target vertex.
135	*/
136	static zfs_edge_t *
137	zfs_edge_create(libzfs_handle_t hdl, zfs_vertex_t dest)
138	{
139	zfs_edge_t *zep = zfs_alloc(hdl, sizeof (zfs_edge_t));
140
141	if (zep == NULL)
142	return (NULL);
143
144	zep->ze_dest = dest;
145
146	return (zep);
147	}
148
149	/*
150	* Destroy an edge.
151	*/
152	static void
153	zfs_edge_destroy(zfs_edge_t *zep)
154	{
155	free(zep);
156	}
157
158	/*
159	* Allocate a new vertex with the given name.
160	*/
161	static zfs_vertex_t *
162	zfs_vertex_create(libzfs_handle_t hdl, const char dataset)
163	{
164	zfs_vertex_t *zvp = zfs_alloc(hdl, sizeof (zfs_vertex_t));
165
166	if (zvp == NULL)
167	return (NULL);
168
169	assert(strlen(dataset) < ZFS_MAXNAMELEN);
170
171	(void) strlcpy(zvp->zv_dataset, dataset, sizeof (zvp->zv_dataset));
172
173	if ((zvp->zv_edges = zfs_alloc(hdl,
174	MIN_EDGECOUNT * sizeof (void *))) == NULL) {
175	free(zvp);
176	return (NULL);
177	}
178
179	zvp->zv_edgealloc = MIN_EDGECOUNT;
180
181	return (zvp);
182	}
183
184	/*
185	* Destroy a vertex. Frees up any associated edges.
186	*/
187	static void
188	zfs_vertex_destroy(zfs_vertex_t *zvp)
189	{
190	int i;
191
192	for (i = 0; i < zvp->zv_edgecount; i++)
193	zfs_edge_destroy(zvp->zv_edges[i]);
194
195	free(zvp->zv_edges);
196	free(zvp);
197	}
198
199	/*
200	* Given a vertex, add an edge to the destination vertex.
201	*/
202	static int
203	zfs_vertex_add_edge(libzfs_handle_t hdl, zfs_vertex_t zvp,
204	zfs_vertex_t *dest)
205	{
206	zfs_edge_t *zep = zfs_edge_create(hdl, dest);
207
208	if (zep == NULL)
209	return (-1);
210
211	if (zvp->zv_edgecount == zvp->zv_edgealloc) {
212	void *ptr;
213
214	if ((ptr = zfs_realloc(hdl, zvp->zv_edges,
215	zvp->zv_edgealloc * sizeof (void *),
216	zvp->zv_edgealloc * 2 * sizeof (void *))) == NULL)
217	return (-1);
218
219	zvp->zv_edges = ptr;
220	zvp->zv_edgealloc *= 2;
221	}
222
223	zvp->zv_edges[zvp->zv_edgecount++] = zep;
224
225	return (0);
226	}
227
228	static int
229	zfs_edge_compare(const void a, const void b)
230	{
231	const zfs_edge_t ea = ((zfs_edge_t **)a);
232	const zfs_edge_t eb = ((zfs_edge_t **)b);
233
234	if (ea->ze_dest->zv_txg < eb->ze_dest->zv_txg)
235	return (-1);
236	if (ea->ze_dest->zv_txg > eb->ze_dest->zv_txg)
237	return (1);
238	return (0);
239	}
240
241	/*
242	* Sort the given vertex edges according to the creation txg of each vertex.
243	*/
244	static void
245	zfs_vertex_sort_edges(zfs_vertex_t *zvp)
246	{
247	if (zvp->zv_edgecount == 0)
248	return;
249
250	qsort(zvp->zv_edges, zvp->zv_edgecount, sizeof (void *),
251	zfs_edge_compare);
252	}
253
254	/*
255	* Construct a new graph object. We allow the size to be specified as a
256	* parameter so in the future we can size the hash according to the number of
257	* datasets in the pool.
258	*/
259	static zfs_graph_t *
260	zfs_graph_create(libzfs_handle_t hdl, const char dataset, size_t size)
261	{
262	zfs_graph_t *zgp = zfs_alloc(hdl, sizeof (zfs_graph_t));
263
264	if (zgp == NULL)
265	return (NULL);
266
267	zgp->zg_size = size;
268	if ((zgp->zg_hash = zfs_alloc(hdl,
269	size * sizeof (zfs_vertex_t *))) == NULL) {
270	free(zgp);
271	return (NULL);
272	}
273
274	zgp->zg_root = dataset;
275	zgp->zg_clone_count = 0;
276
277	return (zgp);
278	}
279
280	/*
281	* Destroy a graph object. We have to iterate over all the hash chains,
282	* destroying each vertex in the process.
283	*/
284	static void
285	zfs_graph_destroy(zfs_graph_t *zgp)
286	{
287	int i;
288	zfs_vertex_t current, next;
289
290	for (i = 0; i < zgp->zg_size; i++) {
291	current = zgp->zg_hash[i];
292	while (current != NULL) {
293	next = current->zv_next;
294	zfs_vertex_destroy(current);
295	current = next;
296	}
297	}
298
299	free(zgp->zg_hash);
300	free(zgp);
301	}
302
303	/*
304	* Graph hash function. Classic bernstein k=33 hash function, taken from
305	* usr/src/cmd/sgs/tools/common/strhash.c
306	*/
307	static size_t
308	zfs_graph_hash(zfs_graph_t zgp, const char str)
309	{
310	size_t hash = 5381;
311	int c;
312
313	while ((c = *str++) != 0)
314	hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
315
316	return (hash % zgp->zg_size);
317	}
318
319	/*
320	* Given a dataset name, finds the associated vertex, creating it if necessary.
321	*/
322	static zfs_vertex_t *
323	zfs_graph_lookup(libzfs_handle_t hdl, zfs_graph_t zgp, const char *dataset,
324	uint64_t txg)
325	{
326	size_t idx = zfs_graph_hash(zgp, dataset);
327	zfs_vertex_t *zvp;
328
329	for (zvp = zgp->zg_hash[idx]; zvp != NULL; zvp = zvp->zv_next) {
330	if (strcmp(zvp->zv_dataset, dataset) == 0) {
331	if (zvp->zv_txg == 0)
332	zvp->zv_txg = txg;
333	return (zvp);
334	}
335	}
336
337	if ((zvp = zfs_vertex_create(hdl, dataset)) == NULL)
338	return (NULL);
339
340	zvp->zv_next = zgp->zg_hash[idx];
341	zvp->zv_txg = txg;
342	zgp->zg_hash[idx] = zvp;
343	zgp->zg_nvertex++;
344
345	return (zvp);
346	}
347
348	/*
349	* Given two dataset names, create an edge between them. For the source vertex,
350	* mark 'zv_visited' to indicate that we have seen this vertex, and not simply
351	* created it as a destination of another edge. If 'dest' is NULL, then this
352	* is an individual vertex (i.e. the starting vertex), so don't add an edge.
353	*/
354	static int
355	zfs_graph_add(libzfs_handle_t hdl, zfs_graph_t zgp, const char *source,
356	const char *dest, uint64_t txg)
357	{
358	zfs_vertex_t svp, dvp;
359
360	if ((svp = zfs_graph_lookup(hdl, zgp, source, 0)) == NULL)
361	return (-1);
362	svp->zv_visited = VISIT_SEEN;
363	if (dest != NULL) {
364	dvp = zfs_graph_lookup(hdl, zgp, dest, txg);
365	if (dvp == NULL)
366	return (-1);
367	if (zfs_vertex_add_edge(hdl, svp, dvp) != 0)
368	return (-1);
369	}
370
371	return (0);
372	}
373
374	/*
375	* Iterate over all children of the given dataset, adding any vertices
376	* as necessary. Returns -1 if there was an error, or 0 otherwise.
377	* This is a simple recursive algorithm - the ZFS namespace typically
378	* is very flat. We manually invoke the necessary ioctl() calls to
379	* avoid the overhead and additional semantics of zfs_open().
380	*/
381	static int
382	iterate_children(libzfs_handle_t hdl, zfs_graph_t zgp, const char *dataset)
383	{
384	zfs_cmd_t zc = { 0 };
385	zfs_vertex_t *zvp;
386
387	/*
388	* Look up the source vertex, and avoid it if we've seen it before.
389	*/
390	zvp = zfs_graph_lookup(hdl, zgp, dataset, 0);
391	if (zvp == NULL)
392	return (-1);
393	if (zvp->zv_visited == VISIT_SEEN)
394	return (0);
395
396	/*
397	* Iterate over all children
398	*/
399	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
400	ioctl(hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
401	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
402
403	/*
404	* Ignore private dataset names.
405	*/
406	if (dataset_name_hidden(zc.zc_name))
407	continue;
408
409	/*
410	* Get statistics for this dataset, to determine the type of the
411	* dataset and clone statistics. If this fails, the dataset has
412	* since been removed, and we're pretty much screwed anyway.
413	*/
414	zc.zc_objset_stats.dds_origin[0] = '\0';
415	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
416	continue;
417
418	if (zc.zc_objset_stats.dds_origin[0] != '\0') {
419	if (zfs_graph_add(hdl, zgp,
420	zc.zc_objset_stats.dds_origin, zc.zc_name,
421	zc.zc_objset_stats.dds_creation_txg) != 0)
422	return (-1);
423	/*
424	* Count origins only if they are contained in the graph
425	*/
426	if (isa_child_of(zc.zc_objset_stats.dds_origin,
427	zgp->zg_root))
428	zgp->zg_clone_count--;
429	}
430
431	/*
432	* Add an edge between the parent and the child.
433	*/
434	if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
435	zc.zc_objset_stats.dds_creation_txg) != 0)
436	return (-1);
437
438	/*
439	* Recursively visit child
440	*/
441	if (iterate_children(hdl, zgp, zc.zc_name))
442	return (-1);
443	}
444
445	/*
446	* Now iterate over all snapshots.
447	*/
448	bzero(&zc, sizeof (zc));
449
450	for ((void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
451	ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT, &zc) == 0;
452	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name))) {
453
454	/*
455	* Get statistics for this dataset, to determine the type of the
456	* dataset and clone statistics. If this fails, the dataset has
457	* since been removed, and we're pretty much screwed anyway.
458	*/
459	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
460	continue;
461
462	/*
463	* Add an edge between the parent and the child.
464	*/
465	if (zfs_graph_add(hdl, zgp, dataset, zc.zc_name,
466	zc.zc_objset_stats.dds_creation_txg) != 0)
467	return (-1);
468
469	zgp->zg_clone_count += zc.zc_objset_stats.dds_num_clones;
470	}
471
472	zvp->zv_visited = VISIT_SEEN;
473
474	return (0);
475	}
476
477	/*
478	* Returns false if there are no snapshots with dependent clones in this
479	* subtree or if all of those clones are also in this subtree. Returns
480	* true if there is an error or there are external dependents.
481	*/
482	static boolean_t
483	external_dependents(libzfs_handle_t hdl, zfs_graph_t zgp, const char *dataset)
484	{
485	zfs_cmd_t zc = { 0 };
486
487	/*
488	* Check whether this dataset is a clone or has clones since
489	* iterate_children() only checks the children.
490	*/
491	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
492	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0)
493	return (B_TRUE);
494
495	if (zc.zc_objset_stats.dds_origin[0] != '\0') {
496	if (zfs_graph_add(hdl, zgp,
497	zc.zc_objset_stats.dds_origin, zc.zc_name,
498	zc.zc_objset_stats.dds_creation_txg) != 0)
499	return (B_TRUE);
500	if (isa_child_of(zc.zc_objset_stats.dds_origin, dataset))
501	zgp->zg_clone_count--;
502	}
503
504	if ((zc.zc_objset_stats.dds_num_clones) \|\|
505	iterate_children(hdl, zgp, dataset))
506	return (B_TRUE);
507
508	return (zgp->zg_clone_count != 0);
509	}
510
511	/*
512	* Construct a complete graph of all necessary vertices. First, iterate over
513	* only our object's children. If no cloned snapshots are found, or all of
514	* the cloned snapshots are in this subtree then return a graph of the subtree.
515	* Otherwise, start at the root of the pool and iterate over all datasets.
516	*/
517	static zfs_graph_t *
518	construct_graph(libzfs_handle_t hdl, const char dataset)
519	{
520	zfs_graph_t *zgp = zfs_graph_create(hdl, dataset, ZFS_GRAPH_SIZE);
521	int ret = 0;
522
523	if (zgp == NULL)
524	return (zgp);
525
526	if ((strchr(dataset, '/') == NULL) \|\|
527	(external_dependents(hdl, zgp, dataset))) {
528	/*
529	* Determine pool name and try again.
530	*/
531	int len = strcspn(dataset, "/@") + 1;
532	char *pool = zfs_alloc(hdl, len);
533
534	if (pool == NULL) {
535	zfs_graph_destroy(zgp);
536	return (NULL);
537	}
538	(void) strlcpy(pool, dataset, len);
539
540	if (iterate_children(hdl, zgp, pool) == -1 \|\|
541	zfs_graph_add(hdl, zgp, pool, NULL, 0) != 0) {
542	free(pool);
543	zfs_graph_destroy(zgp);
544	return (NULL);
545	}
546	free(pool);
547	}
548
549	if (ret == -1 \|\| zfs_graph_add(hdl, zgp, dataset, NULL, 0) != 0) {
550	zfs_graph_destroy(zgp);
551	return (NULL);
552	}
553
554	return (zgp);
555	}
556
557	/*
558	* Given a graph, do a recursive topological sort into the given array. This is
559	* really just a depth first search, so that the deepest nodes appear first.
560	* hijack the 'zv_visited' marker to avoid visiting the same vertex twice.
561	*/
562	static int
563	topo_sort(libzfs_handle_t hdl, boolean_t allowrecursion, char *result,
564	size_t idx, zfs_vertex_t zgv)
565	{
566	int i;
567
568	if (zgv->zv_visited == VISIT_SORT_PRE && !allowrecursion) {
569	/*
570	* If we've already seen this vertex as part of our depth-first
571	* search, then we have a cyclic dependency, and we must return
572	* an error.
573	*/
574	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
575	"recursive dependency at '%s'"),
576	zgv->zv_dataset);
577	return (zfs_error(hdl, EZFS_RECURSIVE,
578	dgettext(TEXT_DOMAIN,
579	"cannot determine dependent datasets")));
580	} else if (zgv->zv_visited >= VISIT_SORT_PRE) {
581	/*
582	* If we've already processed this as part of the topological
583	* sort, then don't bother doing so again.
584	*/
585	return (0);
586	}
587
588	zgv->zv_visited = VISIT_SORT_PRE;
589
590	/* avoid doing a search if we don't have to */
591	zfs_vertex_sort_edges(zgv);
592	for (i = 0; i < zgv->zv_edgecount; i++) {
593	if (topo_sort(hdl, allowrecursion, result, idx,
594	zgv->zv_edges[i]->ze_dest) != 0)
595	return (-1);
596	}
597
598	/* we may have visited this in the course of the above */
599	if (zgv->zv_visited == VISIT_SORT_POST)
600	return (0);
601
602	if ((result[*idx] = zfs_alloc(hdl,
603	strlen(zgv->zv_dataset) + 1)) == NULL)
604	return (-1);
605
606	(void) strcpy(result[*idx], zgv->zv_dataset);
607	*idx += 1;
608	zgv->zv_visited = VISIT_SORT_POST;
609	return (0);
610	}
611
612	/*
613	* The only public interface for this file. Do the dirty work of constructing a
614	* child list for the given object. Construct the graph, do the toplogical
615	* sort, and then return the array of strings to the caller.
616	*
617	* The 'allowrecursion' parameter controls behavior when cycles are found. If
618	* it is set, the the cycle is ignored and the results returned as if the cycle
619	* did not exist. If it is not set, then the routine will generate an error if
620	* a cycle is found.
621	*/
622	int
623	get_dependents(libzfs_handle_t *hdl, boolean_t allowrecursion,
624	const char dataset, char *result, size_t count)
625	{
626	zfs_graph_t *zgp;
627	zfs_vertex_t *zvp;
628
629	if ((zgp = construct_graph(hdl, dataset)) == NULL)
630	return (-1);
631
632	if ((*result = zfs_alloc(hdl,
633	zgp->zg_nvertex * sizeof (char *))) == NULL) {
634	zfs_graph_destroy(zgp);
635	return (-1);
636	}
637
638	if ((zvp = zfs_graph_lookup(hdl, zgp, dataset, 0)) == NULL) {
639	free(*result);
640	zfs_graph_destroy(zgp);
641	return (-1);
642	}
643
644	*count = 0;
645	if (topo_sort(hdl, allowrecursion, *result, count, zvp) != 0) {
646	free(*result);
647	zfs_graph_destroy(zgp);
648	return (-1);
649	}
650
651	/*
652	* Get rid of the last entry, which is our starting vertex and not
653	* strictly a dependent.
654	*/
655	assert(*count > 0);
656	free((result)[count - 1]);
657	(*count)--;
658
659	zfs_graph_destroy(zgp);
660
661	return (0);
662	}