]> git.proxmox.com Git - mirror_zfs.git/blame - cmd/zinject/zinject.c
Update zfs destroy test scripts
[mirror_zfs.git] / cmd / zinject / zinject.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
26ef0cc7 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
34dc7c2f
BB
24 */
25
34dc7c2f
BB
26/*
27 * ZFS Fault Injector
28 *
29 * This userland component takes a set of options and uses libzpool to translate
30 * from a user-visible object type and name to an internal representation.
31 * There are two basic types of faults: device faults and data faults.
32 *
33 *
34 * DEVICE FAULTS
35 *
36 * Errors can be injected into a particular vdev using the '-d' option. This
37 * option takes a path or vdev GUID to uniquely identify the device within a
38 * pool. There are two types of errors that can be injected, EIO and ENXIO,
b128c09f 39 * that can be controlled through the '-e' option. The default is ENXIO. For
34dc7c2f
BB
40 * EIO failures, any attempt to read data from the device will return EIO, but
41 * subsequent attempt to reopen the device will succeed. For ENXIO failures,
42 * any attempt to read from the device will return EIO, but any attempt to
43 * reopen the device will also return ENXIO.
b128c09f 44 * For label faults, the -L option must be specified. This allows faults
428870ff
BB
45 * to be injected into either the nvlist, uberblock, pad1, or pad2 region
46 * of all the labels for the specified device.
34dc7c2f
BB
47 *
48 * This form of the command looks like:
49 *
428870ff 50 * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
34dc7c2f
BB
51 *
52 *
53 * DATA FAULTS
54 *
55 * We begin with a tuple of the form:
56 *
57 * <type,level,range,object>
58 *
59 * type A string describing the type of data to target. Each type
60 * implicitly describes how to interpret 'object'. Currently,
61 * the following values are supported:
62 *
63 * data User data for a file
64 * dnode Dnode for a file or directory
65 *
66 * The following MOS objects are special. Instead of injecting
67 * errors on a particular object or blkid, we inject errors across
68 * all objects of the given type.
69 *
70 * mos Any data in the MOS
71 * mosdir object directory
72 * config pool configuration
428870ff 73 * bpobj blkptr list
34dc7c2f
BB
74 * spacemap spacemap
75 * metaslab metaslab
76 * errlog persistent error log
77 *
78 * level Object level. Defaults to '0', not applicable to all types. If
79 * a range is given, this corresponds to the indirect block
80 * corresponding to the specific range.
81 *
82 * range A numerical range [start,end) within the object. Defaults to
83 * the full size of the file.
84 *
85 * object A string describing the logical location of the object. For
86 * files and directories (currently the only supported types),
87 * this is the path of the object on disk.
88 *
89 * This is translated, via libzpool, into the following internal representation:
90 *
91 * <type,objset,object,level,range>
92 *
93 * These types should be self-explanatory. This tuple is then passed to the
94 * kernel via a special ioctl() to initiate fault injection for the given
95 * object. Note that 'type' is not strictly necessary for fault injection, but
96 * is used when translating existing faults into a human-readable string.
97 *
98 *
99 * The command itself takes one of the forms:
100 *
101 * zinject
102 * zinject <-a | -u pool>
103 * zinject -c <id|all>
104 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
105 * [-r range] <object>
106 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
107 *
108 * With no arguments, the command prints all currently registered injection
109 * handlers, with their numeric identifiers.
110 *
111 * The '-c' option will clear the given handler, or all handlers if 'all' is
112 * specified.
113 *
114 * The '-e' option takes a string describing the errno to simulate. This must
115 * be either 'io' or 'checksum'. In most cases this will result in the same
116 * behavior, but RAID-Z will produce a different set of ereports for this
117 * situation.
118 *
119 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is
120 * specified, then the ARC cache is flushed appropriately. If '-u' is
121 * specified, then the underlying SPA is unloaded. Either of these flags can be
122 * specified independently of any other handlers. The '-m' flag automatically
123 * does an unmount and remount of the underlying dataset to aid in flushing the
124 * cache.
125 *
126 * The '-f' flag controls the frequency of errors injected, expressed as a
127 * integer percentage between 1 and 100. The default is 100.
128 *
129 * The this form is responsible for actually injecting the handler into the
130 * framework. It takes the arguments described above, translates them to the
131 * internal tuple using libzpool, and then issues an ioctl() to register the
132 * handler.
133 *
134 * The final form can target a specific bookmark, regardless of whether a
135 * human-readable interface has been designed. It allows developers to specify
136 * a particular block by number.
137 */
138
139#include <errno.h>
140#include <fcntl.h>
141#include <stdio.h>
142#include <stdlib.h>
143#include <strings.h>
144#include <unistd.h>
145
146#include <sys/fs/zfs.h>
147#include <sys/mount.h>
148
149#include <libzfs.h>
150
151#undef verify /* both libzfs.h and zfs_context.h want to define this */
152
153#include "zinject.h"
154
155libzfs_handle_t *g_zfs;
156int zfs_fd;
157
158#define ECKSUM EBADE
159
160static const char *errtable[TYPE_INVAL] = {
161 "data",
162 "dnode",
163 "mos",
164 "mosdir",
165 "metaslab",
166 "config",
428870ff 167 "bpobj",
34dc7c2f 168 "spacemap",
b128c09f
BB
169 "errlog",
170 "uber",
428870ff
BB
171 "nvlist",
172 "pad1",
173 "pad2"
34dc7c2f
BB
174};
175
176static err_type_t
177name_to_type(const char *arg)
178{
179 int i;
180 for (i = 0; i < TYPE_INVAL; i++)
181 if (strcmp(errtable[i], arg) == 0)
182 return (i);
183
184 return (TYPE_INVAL);
185}
186
187static const char *
188type_to_name(uint64_t type)
189{
190 switch (type) {
191 case DMU_OT_OBJECT_DIRECTORY:
192 return ("mosdir");
193 case DMU_OT_OBJECT_ARRAY:
194 return ("metaslab");
195 case DMU_OT_PACKED_NVLIST:
196 return ("config");
428870ff
BB
197 case DMU_OT_BPOBJ:
198 return ("bpobj");
34dc7c2f
BB
199 case DMU_OT_SPACE_MAP:
200 return ("spacemap");
201 case DMU_OT_ERROR_LOG:
202 return ("errlog");
203 default:
204 return ("-");
205 }
206}
207
208
209/*
210 * Print usage message.
211 */
212void
213usage(void)
214{
215 (void) printf(
216 "usage:\n"
217 "\n"
218 "\tzinject\n"
219 "\n"
220 "\t\tList all active injection records.\n"
221 "\n"
222 "\tzinject -c <id|all>\n"
223 "\n"
224 "\t\tClear the particular record (if given a numeric ID), or\n"
2627e752 225 "\t\tall records if 'all' is specified.\n"
34dc7c2f 226 "\n"
428870ff
BB
227 "\tzinject -p <function name> pool\n"
228 "\t\tInject a panic fault at the specified function. Only \n"
229 "\t\tfunctions which call spa_vdev_config_exit(), or \n"
230 "\t\tspa_vdev_exit() will trigger a panic.\n"
231 "\n"
232 "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
233 "\t [-T <read|write|free|claim|all> pool\n"
b128c09f 234 "\t\tInject a fault into a particular device or the device's\n"
428870ff
BB
235 "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
236 "\t\t'pad1', or 'pad2'.\n"
572e2857 237 "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n"
34dc7c2f 238 "\n"
cc92e9d0
GW
239 "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
240 "\t\tPerform a specific action on a particular device.\n"
428870ff 241 "\n"
26ef0cc7
TH
242 "\tzinject -d device -D latency:lanes pool\n"
243 "\n"
244 "\t\tAdd an artificial delay to IO requests on a particular\n"
245 "\t\tdevice, such that the requests take a minimum of 'latency'\n"
246 "\t\tmilliseconds to complete. Each delay has an associated\n"
247 "\t\tnumber of 'lanes' which defines the number of concurrent\n"
248 "\t\tIO requests that can be processed.\n"
249 "\n"
250 "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
251 "\t\tthe device will only be able to service a single IO request\n"
252 "\t\tat a time with each request taking 10 ms to complete. So,\n"
253 "\t\tif only a single request is submitted every 10 ms, the\n"
254 "\t\taverage latency will be 10 ms; but if more than one request\n"
255 "\t\tis submitted every 10 ms, the average latency will be more\n"
256 "\t\tthan 10 ms.\n"
257 "\n"
258 "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
259 "\t\tlanes (-D 10:2), then the device will be able to service\n"
260 "\t\ttwo requests at a time, each with a minimum latency of\n"
261 "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
262 "\t\tthe average latency will be 10 ms; but if more than two\n"
263 "\t\trequests are submitted every 10 ms, the average latency\n"
264 "\t\twill be more than 10 ms.\n"
265 "\n"
266 "\t\tAlso note, these delays are additive. So two invocations\n"
267 "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
268 "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
269 "\t\tlanes with differing target latencies. For example, an\n"
270 "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
271 "\t\tcreate 3 lanes on the device; one lane with a latency\n"
272 "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
273 "\n"
428870ff
BB
274 "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
275 "\t\tCause the pool to stop writing blocks yet not\n"
276 "\t\treport errors for a duration. Simulates buggy hardware\n"
277 "\t\tthat fails to honor cache flush requests.\n"
278 "\t\tDefault duration is 30 seconds. The machine is panicked\n"
279 "\t\tat the end of the duration.\n"
280 "\n"
34dc7c2f
BB
281 "\tzinject -b objset:object:level:blkid pool\n"
282 "\n"
283 "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
284 "\t\tspecified by the remaining tuple. Each number is in\n"
285 "\t\thexidecimal, and only one block can be specified.\n"
286 "\n"
287 "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
288 "\t [-a] [-m] [-u] [-f freq] <object>\n"
289 "\n"
290 "\t\tInject an error into the object specified by the '-t' option\n"
291 "\t\tand the object descriptor. The 'object' parameter is\n"
2627e752 292 "\t\tinterpreted depending on the '-t' option.\n"
34dc7c2f
BB
293 "\n"
294 "\t\t-q\tQuiet mode. Only print out the handler number added.\n"
295 "\t\t-e\tInject a specific error. Must be either 'io' or\n"
296 "\t\t\t'checksum'. Default is 'io'.\n"
297 "\t\t-l\tInject error at a particular block level. Default is "
298 "0.\n"
299 "\t\t-m\tAutomatically remount underlying filesystem.\n"
300 "\t\t-r\tInject error over a particular logical range of an\n"
301 "\t\t\tobject. Will be translated to the appropriate blkid\n"
302 "\t\t\trange according to the object's properties.\n"
303 "\t\t-a\tFlush the ARC cache. Can be specified without any\n"
304 "\t\t\tassociated object.\n"
305 "\t\t-u\tUnload the associated pool. Can be specified with only\n"
306 "\t\t\ta pool object.\n"
307 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
308 "\t\t\ta percentage between 1 and 100.\n"
309 "\n"
310 "\t-t data\t\tInject an error into the plain file contents of a\n"
311 "\t\t\tfile. The object must be specified as a complete path\n"
312 "\t\t\tto a file on a ZFS filesystem.\n"
313 "\n"
314 "\t-t dnode\tInject an error into the metadnode in the block\n"
315 "\t\t\tcorresponding to the dnode for a file or directory. The\n"
316 "\t\t\t'-r' option is incompatible with this mode. The object\n"
317 "\t\t\tis specified as a complete path to a file or directory\n"
318 "\t\t\ton a ZFS filesystem.\n"
319 "\n"
320 "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
428870ff 321 "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n"
34dc7c2f
BB
322 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
323 "\t\t\tthe poolname.\n");
324}
325
326static int
327iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
328 void *data)
329{
d3773fda 330 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
331 int ret;
332
34dc7c2f
BB
333 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
334 if ((ret = func((int)zc.zc_guid, zc.zc_name,
335 &zc.zc_inject_record, data)) != 0)
336 return (ret);
337
428870ff
BB
338 if (errno != ENOENT) {
339 (void) fprintf(stderr, "Unable to list handlers: %s\n",
340 strerror(errno));
341 return (-1);
342 }
343
34dc7c2f
BB
344 return (0);
345}
346
347static int
348print_data_handler(int id, const char *pool, zinject_record_t *record,
349 void *data)
350{
351 int *count = data;
352
428870ff 353 if (record->zi_guid != 0 || record->zi_func[0] != '\0')
34dc7c2f
BB
354 return (0);
355
356 if (*count == 0) {
357 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n",
358 "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE");
359 (void) printf("--- --------------- ------ "
360 "------ -------- --- ---------------\n");
361 }
362
363 *count += 1;
364
365 (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool,
366 (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
367 type_to_name(record->zi_type), record->zi_level);
368
369 if (record->zi_start == 0 &&
370 record->zi_end == -1ULL)
371 (void) printf("all\n");
372 else
373 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
374 (u_longlong_t)record->zi_end);
375
376 return (0);
377}
378
379static int
380print_device_handler(int id, const char *pool, zinject_record_t *record,
381 void *data)
382{
383 int *count = data;
384
428870ff 385 if (record->zi_guid == 0 || record->zi_func[0] != '\0')
34dc7c2f
BB
386 return (0);
387
26ef0cc7
TH
388 if (record->zi_cmd == ZINJECT_DELAY_IO)
389 return (0);
390
34dc7c2f
BB
391 if (*count == 0) {
392 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID");
393 (void) printf("--- --------------- ----------------\n");
394 }
395
396 *count += 1;
397
398 (void) printf("%3d %-15s %llx\n", id, pool,
399 (u_longlong_t)record->zi_guid);
400
401 return (0);
402}
403
26ef0cc7
TH
404static int
405print_delay_handler(int id, const char *pool, zinject_record_t *record,
406 void *data)
407{
408 int *count = data;
409
410 if (record->zi_guid == 0 || record->zi_func[0] != '\0')
411 return (0);
412
413 if (record->zi_cmd != ZINJECT_DELAY_IO)
414 return (0);
415
416 if (*count == 0) {
417 (void) printf("%3s %-15s %-15s %-15s %s\n",
418 "ID", "POOL", "DELAY (ms)", "LANES", "GUID");
419 (void) printf("--- --------------- --------------- "
420 "--------------- ----------------\n");
421 }
422
423 *count += 1;
424
425 (void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool,
426 (u_longlong_t)NSEC2MSEC(record->zi_timer),
427 (u_longlong_t)record->zi_nlanes,
428 (u_longlong_t)record->zi_guid);
429
430 return (0);
431}
432
428870ff
BB
433static int
434print_panic_handler(int id, const char *pool, zinject_record_t *record,
435 void *data)
436{
437 int *count = data;
438
439 if (record->zi_func[0] == '\0')
440 return (0);
441
442 if (*count == 0) {
443 (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION");
444 (void) printf("--- --------------- ----------------\n");
445 }
446
447 *count += 1;
448
449 (void) printf("%3d %-15s %s\n", id, pool, record->zi_func);
450
451 return (0);
452}
453
34dc7c2f
BB
454/*
455 * Print all registered error handlers. Returns the number of handlers
456 * registered.
457 */
458static int
459print_all_handlers(void)
460{
572e2857 461 int count = 0, total = 0;
34dc7c2f
BB
462
463 (void) iter_handlers(print_device_handler, &count);
572e2857
BB
464 if (count > 0) {
465 total += count;
466 (void) printf("\n");
467 count = 0;
468 }
469
26ef0cc7
TH
470 (void) iter_handlers(print_delay_handler, &count);
471 if (count > 0) {
472 total += count;
473 (void) printf("\n");
474 count = 0;
475 }
476
34dc7c2f 477 (void) iter_handlers(print_data_handler, &count);
572e2857
BB
478 if (count > 0) {
479 total += count;
480 (void) printf("\n");
481 count = 0;
482 }
483
428870ff 484 (void) iter_handlers(print_panic_handler, &count);
34dc7c2f 485
572e2857 486 return (count + total);
34dc7c2f
BB
487}
488
489/* ARGSUSED */
490static int
491cancel_one_handler(int id, const char *pool, zinject_record_t *record,
492 void *data)
493{
d3773fda 494 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
495
496 zc.zc_guid = (uint64_t)id;
497
498 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
499 (void) fprintf(stderr, "failed to remove handler %d: %s\n",
500 id, strerror(errno));
501 return (1);
502 }
503
504 return (0);
505}
506
507/*
508 * Remove all fault injection handlers.
509 */
510static int
511cancel_all_handlers(void)
512{
513 int ret = iter_handlers(cancel_one_handler, NULL);
514
428870ff
BB
515 if (ret == 0)
516 (void) printf("removed all registered handlers\n");
34dc7c2f
BB
517
518 return (ret);
519}
520
521/*
522 * Remove a specific fault injection handler.
523 */
524static int
525cancel_handler(int id)
526{
d3773fda 527 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
528
529 zc.zc_guid = (uint64_t)id;
530
531 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
532 (void) fprintf(stderr, "failed to remove handler %d: %s\n",
533 id, strerror(errno));
534 return (1);
535 }
536
537 (void) printf("removed handler %d\n", id);
538
539 return (0);
540}
541
542/*
543 * Register a new fault injection handler.
544 */
545static int
546register_handler(const char *pool, int flags, zinject_record_t *record,
547 int quiet)
548{
d3773fda 549 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
550
551 (void) strcpy(zc.zc_name, pool);
552 zc.zc_inject_record = *record;
553 zc.zc_guid = flags;
554
555 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
556 (void) fprintf(stderr, "failed to add handler: %s\n",
557 strerror(errno));
558 return (1);
559 }
560
561 if (flags & ZINJECT_NULL)
562 return (0);
563
564 if (quiet) {
565 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
566 } else {
567 (void) printf("Added handler %llu with the following "
568 "properties:\n", (u_longlong_t)zc.zc_guid);
569 (void) printf(" pool: %s\n", pool);
570 if (record->zi_guid) {
571 (void) printf(" vdev: %llx\n",
572 (u_longlong_t)record->zi_guid);
428870ff
BB
573 } else if (record->zi_func[0] != '\0') {
574 (void) printf(" panic function: %s\n",
575 record->zi_func);
576 } else if (record->zi_duration > 0) {
577 (void) printf(" time: %lld seconds\n",
578 (u_longlong_t)record->zi_duration);
579 } else if (record->zi_duration < 0) {
580 (void) printf(" txgs: %lld \n",
581 (u_longlong_t)-record->zi_duration);
34dc7c2f
BB
582 } else {
583 (void) printf("objset: %llu\n",
584 (u_longlong_t)record->zi_objset);
585 (void) printf("object: %llu\n",
586 (u_longlong_t)record->zi_object);
587 (void) printf(" type: %llu\n",
588 (u_longlong_t)record->zi_type);
589 (void) printf(" level: %d\n", record->zi_level);
590 if (record->zi_start == 0 &&
591 record->zi_end == -1ULL)
592 (void) printf(" range: all\n");
593 else
594 (void) printf(" range: [%llu, %llu)\n",
595 (u_longlong_t)record->zi_start,
596 (u_longlong_t)record->zi_end);
597 }
598 }
599
600 return (0);
601}
602
428870ff
BB
603int
604perform_action(const char *pool, zinject_record_t *record, int cmd)
605{
d3773fda 606 zfs_cmd_t zc = {"\0"};
428870ff
BB
607
608 ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
609 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
610 zc.zc_guid = record->zi_guid;
611 zc.zc_cookie = cmd;
612
613 if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
614 return (0);
615
616 return (1);
617}
618
26ef0cc7
TH
619static int
620parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
621{
622 unsigned long scan_delay;
623 unsigned long scan_nlanes;
624
625 if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
626 return (1);
627
628 /*
629 * We explicitly disallow a delay of zero here, because we key
630 * off this value being non-zero in translate_device(), to
631 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
632 */
633 if (scan_delay == 0)
634 return (1);
635
636 /*
637 * The units for the CLI delay parameter is milliseconds, but
638 * the data passed to the kernel is interpreted as nanoseconds.
639 * Thus we scale the milliseconds to nanoseconds here, and this
640 * nanosecond value is used to pass the delay to the kernel.
641 */
642 *delay = MSEC2NSEC(scan_delay);
643 *nlanes = scan_nlanes;
644
645 return (0);
646}
647
34dc7c2f
BB
648int
649main(int argc, char **argv)
650{
651 int c;
652 char *range = NULL;
653 char *cancel = NULL;
654 char *end;
655 char *raw = NULL;
656 char *device = NULL;
657 int level = 0;
658 int quiet = 0;
659 int error = 0;
660 int domount = 0;
428870ff
BB
661 int io_type = ZIO_TYPES;
662 int action = VDEV_STATE_UNKNOWN;
34dc7c2f 663 err_type_t type = TYPE_INVAL;
b128c09f 664 err_type_t label = TYPE_INVAL;
34dc7c2f 665 zinject_record_t record = { 0 };
a64f903b
GN
666 char pool[MAXNAMELEN] = "";
667 char dataset[MAXNAMELEN] = "";
149e873a 668 zfs_handle_t *zhp = NULL;
428870ff
BB
669 int nowrites = 0;
670 int dur_txg = 0;
671 int dur_secs = 0;
34dc7c2f
BB
672 int ret;
673 int flags = 0;
674
65037d9b
BB
675 if ((g_zfs = libzfs_init()) == NULL) {
676 (void) fprintf(stderr, "%s", libzfs_error_init(errno));
937210a5 677 return (1);
65037d9b 678 }
937210a5
BB
679
680 libzfs_print_on_error(g_zfs, B_TRUE);
681
682 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
683 (void) fprintf(stderr, "failed to open ZFS device\n");
684 return (1);
685 }
686
34dc7c2f
BB
687 if (argc == 1) {
688 /*
689 * No arguments. Print the available handlers. If there are no
690 * available handlers, direct the user to '-h' for help
691 * information.
692 */
693 if (print_all_handlers() == 0) {
694 (void) printf("No handlers registered.\n");
695 (void) printf("Run 'zinject -h' for usage "
696 "information.\n");
697 }
698
699 return (0);
700 }
701
428870ff 702 while ((c = getopt(argc, argv,
cc92e9d0 703 ":aA:b:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
34dc7c2f
BB
704 switch (c) {
705 case 'a':
706 flags |= ZINJECT_FLUSH_ARC;
707 break;
428870ff
BB
708 case 'A':
709 if (strcasecmp(optarg, "degrade") == 0) {
710 action = VDEV_STATE_DEGRADED;
711 } else if (strcasecmp(optarg, "fault") == 0) {
712 action = VDEV_STATE_FAULTED;
713 } else {
714 (void) fprintf(stderr, "invalid action '%s': "
715 "must be 'degrade' or 'fault'\n", optarg);
716 usage();
717 return (1);
718 }
719 break;
34dc7c2f
BB
720 case 'b':
721 raw = optarg;
722 break;
723 case 'c':
724 cancel = optarg;
725 break;
726 case 'd':
727 device = optarg;
728 break;
cc92e9d0
GW
729 case 'D':
730 errno = 0;
26ef0cc7
TH
731 ret = parse_delay(optarg, &record.zi_timer,
732 &record.zi_nlanes);
733 if (ret != 0) {
734
cc92e9d0
GW
735 (void) fprintf(stderr, "invalid i/o delay "
736 "value: '%s'\n", optarg);
737 usage();
738 return (1);
739 }
740 break;
34dc7c2f
BB
741 case 'e':
742 if (strcasecmp(optarg, "io") == 0) {
743 error = EIO;
744 } else if (strcasecmp(optarg, "checksum") == 0) {
745 error = ECKSUM;
746 } else if (strcasecmp(optarg, "nxio") == 0) {
747 error = ENXIO;
572e2857
BB
748 } else if (strcasecmp(optarg, "dtl") == 0) {
749 error = ECHILD;
34dc7c2f
BB
750 } else {
751 (void) fprintf(stderr, "invalid error type "
752 "'%s': must be 'io', 'checksum' or "
753 "'nxio'\n", optarg);
754 usage();
755 return (1);
756 }
757 break;
758 case 'f':
759 record.zi_freq = atoi(optarg);
760 if (record.zi_freq < 1 || record.zi_freq > 100) {
761 (void) fprintf(stderr, "frequency range must "
762 "be in the range (0, 100]\n");
763 return (1);
764 }
765 break;
9babb374
BB
766 case 'F':
767 record.zi_failfast = B_TRUE;
768 break;
428870ff
BB
769 case 'g':
770 dur_txg = 1;
771 record.zi_duration = (int)strtol(optarg, &end, 10);
772 if (record.zi_duration <= 0 || *end != '\0') {
773 (void) fprintf(stderr, "invalid duration '%s': "
774 "must be a positive integer\n", optarg);
775 usage();
776 return (1);
777 }
778 /* store duration of txgs as its negative */
779 record.zi_duration *= -1;
780 break;
34dc7c2f
BB
781 case 'h':
782 usage();
783 return (0);
428870ff
BB
784 case 'I':
785 /* default duration, if one hasn't yet been defined */
786 nowrites = 1;
787 if (dur_secs == 0 && dur_txg == 0)
788 record.zi_duration = 30;
789 break;
34dc7c2f
BB
790 case 'l':
791 level = (int)strtol(optarg, &end, 10);
792 if (*end != '\0') {
793 (void) fprintf(stderr, "invalid level '%s': "
794 "must be an integer\n", optarg);
795 usage();
796 return (1);
797 }
798 break;
799 case 'm':
800 domount = 1;
801 break;
428870ff
BB
802 case 'p':
803 (void) strlcpy(record.zi_func, optarg,
804 sizeof (record.zi_func));
cc92e9d0 805 record.zi_cmd = ZINJECT_PANIC;
428870ff 806 break;
34dc7c2f
BB
807 case 'q':
808 quiet = 1;
809 break;
810 case 'r':
811 range = optarg;
812 break;
428870ff
BB
813 case 's':
814 dur_secs = 1;
815 record.zi_duration = (int)strtol(optarg, &end, 10);
816 if (record.zi_duration <= 0 || *end != '\0') {
817 (void) fprintf(stderr, "invalid duration '%s': "
818 "must be a positive integer\n", optarg);
819 usage();
820 return (1);
821 }
822 break;
823 case 'T':
824 if (strcasecmp(optarg, "read") == 0) {
825 io_type = ZIO_TYPE_READ;
826 } else if (strcasecmp(optarg, "write") == 0) {
827 io_type = ZIO_TYPE_WRITE;
828 } else if (strcasecmp(optarg, "free") == 0) {
829 io_type = ZIO_TYPE_FREE;
830 } else if (strcasecmp(optarg, "claim") == 0) {
831 io_type = ZIO_TYPE_CLAIM;
832 } else if (strcasecmp(optarg, "all") == 0) {
833 io_type = ZIO_TYPES;
834 } else {
835 (void) fprintf(stderr, "invalid I/O type "
836 "'%s': must be 'read', 'write', 'free', "
837 "'claim' or 'all'\n", optarg);
838 usage();
839 return (1);
840 }
841 break;
34dc7c2f 842 case 't':
b128c09f
BB
843 if ((type = name_to_type(optarg)) == TYPE_INVAL &&
844 !MOS_TYPE(type)) {
34dc7c2f
BB
845 (void) fprintf(stderr, "invalid type '%s'\n",
846 optarg);
847 usage();
848 return (1);
849 }
850 break;
851 case 'u':
852 flags |= ZINJECT_UNLOAD_SPA;
853 break;
b128c09f
BB
854 case 'L':
855 if ((label = name_to_type(optarg)) == TYPE_INVAL &&
856 !LABEL_TYPE(type)) {
857 (void) fprintf(stderr, "invalid label type "
858 "'%s'\n", optarg);
859 usage();
860 return (1);
861 }
862 break;
34dc7c2f
BB
863 case ':':
864 (void) fprintf(stderr, "option -%c requires an "
865 "operand\n", optopt);
866 usage();
867 return (1);
868 case '?':
869 (void) fprintf(stderr, "invalid option '%c'\n",
870 optopt);
871 usage();
872 return (2);
873 }
874 }
875
876 argc -= optind;
877 argv += optind;
878
cc92e9d0
GW
879 if (record.zi_duration != 0)
880 record.zi_cmd = ZINJECT_IGNORED_WRITES;
881
34dc7c2f
BB
882 if (cancel != NULL) {
883 /*
884 * '-c' is invalid with any other options.
885 */
886 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
cc92e9d0 887 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
34dc7c2f
BB
888 (void) fprintf(stderr, "cancel (-c) incompatible with "
889 "any other options\n");
890 usage();
891 return (2);
892 }
893 if (argc != 0) {
894 (void) fprintf(stderr, "extraneous argument to '-c'\n");
895 usage();
896 return (2);
897 }
898
899 if (strcmp(cancel, "all") == 0) {
900 return (cancel_all_handlers());
901 } else {
902 int id = (int)strtol(cancel, &end, 10);
903 if (*end != '\0') {
904 (void) fprintf(stderr, "invalid handle id '%s':"
905 " must be an integer or 'all'\n", cancel);
906 usage();
907 return (1);
908 }
909 return (cancel_handler(id));
910 }
911 }
912
913 if (device != NULL) {
914 /*
915 * Device (-d) injection uses a completely different mechanism
916 * for doing injection, so handle it separately here.
917 */
918 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
cc92e9d0 919 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
34dc7c2f
BB
920 (void) fprintf(stderr, "device (-d) incompatible with "
921 "data error injection\n");
922 usage();
923 return (2);
924 }
925
926 if (argc != 1) {
927 (void) fprintf(stderr, "device (-d) injection requires "
928 "a single pool name\n");
929 usage();
930 return (2);
931 }
932
933 (void) strcpy(pool, argv[0]);
934 dataset[0] = '\0';
935
936 if (error == ECKSUM) {
937 (void) fprintf(stderr, "device error type must be "
938 "'io' or 'nxio'\n");
939 return (1);
940 }
941
428870ff 942 record.zi_iotype = io_type;
b128c09f 943 if (translate_device(pool, device, label, &record) != 0)
34dc7c2f
BB
944 return (1);
945 if (!error)
946 error = ENXIO;
428870ff
BB
947
948 if (action != VDEV_STATE_UNKNOWN)
949 return (perform_action(pool, &record, action));
950
34dc7c2f 951 } else if (raw != NULL) {
428870ff 952 if (range != NULL || type != TYPE_INVAL || level != 0 ||
cc92e9d0 953 record.zi_cmd != ZINJECT_UNINITIALIZED) {
34dc7c2f
BB
954 (void) fprintf(stderr, "raw (-b) format with "
955 "any other options\n");
956 usage();
957 return (2);
958 }
959
960 if (argc != 1) {
961 (void) fprintf(stderr, "raw (-b) format expects a "
962 "single pool name\n");
963 usage();
964 return (2);
965 }
966
967 (void) strcpy(pool, argv[0]);
968 dataset[0] = '\0';
969
970 if (error == ENXIO) {
971 (void) fprintf(stderr, "data error type must be "
972 "'checksum' or 'io'\n");
973 return (1);
974 }
975
cc92e9d0 976 record.zi_cmd = ZINJECT_DATA_FAULT;
34dc7c2f
BB
977 if (translate_raw(raw, &record) != 0)
978 return (1);
979 if (!error)
980 error = EIO;
cc92e9d0 981 } else if (record.zi_cmd == ZINJECT_PANIC) {
428870ff 982 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
cc92e9d0 983 level != 0 || device != NULL) {
428870ff
BB
984 (void) fprintf(stderr, "panic (-p) incompatible with "
985 "other options\n");
986 usage();
987 return (2);
988 }
989
990 if (argc < 1 || argc > 2) {
991 (void) fprintf(stderr, "panic (-p) injection requires "
992 "a single pool name and an optional id\n");
993 usage();
994 return (2);
995 }
996
997 (void) strcpy(pool, argv[0]);
998 if (argv[1] != NULL)
999 record.zi_type = atoi(argv[1]);
1000 dataset[0] = '\0';
cc92e9d0 1001 } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
428870ff
BB
1002 if (nowrites == 0) {
1003 (void) fprintf(stderr, "-s or -g meaningless "
1004 "without -I (ignore writes)\n");
1005 usage();
1006 return (2);
1007 } else if (dur_secs && dur_txg) {
1008 (void) fprintf(stderr, "choose a duration either "
1009 "in seconds (-s) or a number of txgs (-g) "
1010 "but not both\n");
1011 usage();
1012 return (2);
1013 } else if (argc != 1) {
1014 (void) fprintf(stderr, "ignore writes (-I) "
1015 "injection requires a single pool name\n");
1016 usage();
1017 return (2);
1018 }
1019
1020 (void) strcpy(pool, argv[0]);
1021 dataset[0] = '\0';
34dc7c2f
BB
1022 } else if (type == TYPE_INVAL) {
1023 if (flags == 0) {
1024 (void) fprintf(stderr, "at least one of '-b', '-d', "
428870ff
BB
1025 "'-t', '-a', '-p', '-I' or '-u' "
1026 "must be specified\n");
34dc7c2f
BB
1027 usage();
1028 return (2);
1029 }
1030
1031 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
1032 (void) strcpy(pool, argv[0]);
1033 dataset[0] = '\0';
1034 } else if (argc != 0) {
1035 (void) fprintf(stderr, "extraneous argument for "
1036 "'-f'\n");
1037 usage();
1038 return (2);
1039 }
1040
1041 flags |= ZINJECT_NULL;
1042 } else {
1043 if (argc != 1) {
1044 (void) fprintf(stderr, "missing object\n");
1045 usage();
1046 return (2);
1047 }
1048
1049 if (error == ENXIO) {
1050 (void) fprintf(stderr, "data error type must be "
1051 "'checksum' or 'io'\n");
1052 return (1);
1053 }
1054
cc92e9d0 1055 record.zi_cmd = ZINJECT_DATA_FAULT;
34dc7c2f
BB
1056 if (translate_record(type, argv[0], range, level, &record, pool,
1057 dataset) != 0)
1058 return (1);
1059 if (!error)
1060 error = EIO;
1061 }
1062
1063 /*
1064 * If this is pool-wide metadata, unmount everything. The ioctl() will
1065 * unload the pool, so that we trigger spa-wide reopen of metadata next
1066 * time we access the pool.
1067 */
1068 if (dataset[0] != '\0' && domount) {
1069 if ((zhp = zfs_open(g_zfs, dataset, ZFS_TYPE_DATASET)) == NULL)
1070 return (1);
34dc7c2f
BB
1071 if (zfs_unmount(zhp, NULL, 0) != 0)
1072 return (1);
1073 }
1074
1075 record.zi_error = error;
1076
1077 ret = register_handler(pool, flags, &record, quiet);
1078
1079 if (dataset[0] != '\0' && domount)
1080 ret = (zfs_mount(zhp, NULL, 0) != 0);
1081
1082 libzfs_fini(g_zfs);
1083
1084 return (ret);
1085}