4891 want zdb option to dump all metadata
Reviewed by: Sonu Pillai <sonu.pillai@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Approved by: Garrett D'Amore <garrett@damore.org>
We'd like a way for zdb to dump metadata in a machine-readable
format, so that we can bring that back from a customer site for
in-house diagnosis. Think of it as a crash dump for zpools,
which can be used for post-mortem analysis of a malfunctioning
pool
References:
https://www.illumos.org/issues/4891
https://github.com/illumos/illumos-gate/commit/
df15e41
Porting notes:
- [cmd/zdb/zdb.c]
-
a5778ea zdb: Introduce -V for verbatim import
- In main() getopt 'opt' variable removed and the code was
brought back in line with illumos.
- [lib/libzpool/kernel.c]
-
1e33ac1 Fix Solaris thread dependency by using pthreads
-
f0e324f Update utsname support
-
4d58b69 Fix vn_open/vn_rdwr error handling
- In vn_open() allocate 'dumppath' on heap instead of stack
- Properly handle 'dump_fd == -1' error path
- Free 'realpath' after added vn_dumpdir_code block
Ported-by: kernelOfTruth kerneloftruth@gmail.com
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
{
(void) fprintf(stderr,
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
- "[-U config] [-I inflight I/Os] poolname [object...]\n"
+ "[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n"
" %s [-divPA] [-e -p path...] [-U config] dataset "
"[object...]\n"
" %s -mM [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
(void) fprintf(stderr, " -R read and display block from a "
"device\n\n");
(void) fprintf(stderr, " Below options are intended for use "
- "with other options (except -l):\n");
+ "with other options:\n");
(void) fprintf(stderr, " -A ignore assertions (-A), enable "
"panic recovery (-AA) or both (-AAA)\n");
(void) fprintf(stderr, " -F attempt automatic rewind within "
"has altroot/not in a cachefile\n");
(void) fprintf(stderr, " -p <path> -- use one or more with "
"-e to specify path to vdev dir\n");
+ (void) fprintf(stderr, " -x <dumpdir> -- "
+ "dump all read blocks into specified directory\n");
(void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, " -I <number of inflight I/Os> -- "
- "specify the maximum number of checksumming I/Os "
- "[default is 200]\n");
+ "specify the maximum number of "
+ "checksumming I/Os [default is 200]\n");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
int flags = ZFS_IMPORT_MISSING_LOG;
int rewind = ZPOOL_NEVER_REWIND;
char *spa_config_path_env;
- const char *opts = "bcdhilmMI:suCDRSAFLXevp:t:U:PV";
boolean_t target_is_spa = B_TRUE;
(void) setrlimit(RLIMIT_NOFILE, &rl);
if (spa_config_path_env != NULL)
spa_config_path = spa_config_path_env;
- while ((c = getopt(argc, argv, opts)) != -1) {
+ while ((c = getopt(argc, argv,
+ "bcdhilmMI:suCDRSAFLXx:evp:t:U:PV")) != -1) {
switch (c) {
case 'b':
case 'c':
}
searchdirs[nsearch++] = optarg;
break;
+ case 'x':
+ vn_dumpdir = optarg;
+ break;
case 't':
max_txg = strtoull(optarg, NULL, 0);
if (max_txg < TXG_INITIAL) {
uint64_t v_size;
int v_fd;
char *v_path;
+ int v_dump_fd;
} vnode_t;
+extern char *vn_dumpdir;
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
typedef struct xoptattr {
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
+#include <libgen.h>
#include <sys/signal.h>
#include <sys/spa.h>
#include <sys/stat.h>
struct utsname hw_utsname;
vmem_t *zio_arena = NULL;
+/* If set, all blocks read will be copied to the specified directory. */
+char *vn_dumpdir = NULL;
+
/* this only exists to have its address taken */
struct proc p0;
vn_open(char *path, int x1, int flags, int mode, vnode_t **vpp, int x2, int x3)
{
int fd;
+ int dump_fd;
vnode_t *vp;
int old_umask = 0;
char *realpath;
* FREAD and FWRITE to the corresponding O_RDONLY, O_WRONLY, and O_RDWR.
*/
fd = open64(realpath, flags - FREAD, mode);
- free(realpath);
+ err = errno;
if (flags & FCREAT)
(void) umask(old_umask);
+ if (vn_dumpdir != NULL) {
+ char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
+ (void) snprintf(dumppath, MAXPATHLEN,
+ "%s/%s", vn_dumpdir, basename(realpath));
+ dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
+ umem_free(dumppath, MAXPATHLEN);
+ if (dump_fd == -1) {
+ err = errno;
+ free(realpath);
+ close(fd);
+ return (err);
+ }
+ } else {
+ dump_fd = -1;
+ }
+
+ free(realpath);
+
if (fd == -1)
- return (errno);
+ return (err);
if (fstat64_blk(fd, &st) == -1) {
err = errno;
vp->v_fd = fd;
vp->v_size = st.st_size;
vp->v_path = spa_strdup(path);
+ vp->v_dump_fd = dump_fd;
return (0);
}
if (uio == UIO_READ) {
rc = pread64(vp->v_fd, addr, len, offset);
+ if (vp->v_dump_fd != -1) {
+ int status =
+ pwrite64(vp->v_dump_fd, addr, rc, offset);
+ ASSERT(status != -1);
+ }
} else {
/*
* To simulate partial disk writes, we split writes into two
vn_close(vnode_t *vp)
{
close(vp->v_fd);
+ if (vp->v_dump_fd != -1)
+ close(vp->v_dump_fd);
spa_strfree(vp->v_path);
umem_free(vp, sizeof (vnode_t));
}
.\"
.\"
.\" Copyright 2012, Richard Lowe.
-.\" Copyright (c) 2012 by Delphix. All rights reserved.
+.\" Copyright (c) 2012, 2014 by Delphix. All rights reserved.
.\"
.TH "ZDB" "8" "February 15, 2012" "" ""
.SH "SYNOPSIS"
\fBzdb\fR [-CumdibcsDvhLMXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR]
- [-U \fIcache\fR] [-I \fIinflight I/Os\fR]
+ [-U \fIcache\fR] [-I \fIinflight I/Os\fR] [-x \fIdumpdir\fR]
[\fIpoolname\fR [\fIobject\fR ...]]
.P
\fB-p\fR flag specifies the path under which devices are to be searched.
.RE
+.sp
+.ne 2
+.na
+\fB-x\fR \fIdumpdir\fR
+.ad
+.sp .6
+.RS 4n
+All blocks accessed will be copied to files in the specified directory.
+The blocks will be placed in sparse files whose name is the same as
+that of the file or device read. zdb can be then run on the generated files.
+Note that the \fB-bbc\fR flags are sufficient to access (and thus copy)
+all metadata on the pool.
+.RE
+
.sp
.ne 2
.na