]> git.proxmox.com Git - mirror_zfs.git/blame - cmd/zinject/zinject.c
Make zpool status counters match error events count
[mirror_zfs.git] / cmd / zinject / zinject.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
26ef0cc7 23 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
0241e491 24 * Copyright (c) 2017, Intel Corporation.
34dc7c2f
BB
25 */
26
34dc7c2f
BB
27/*
28 * ZFS Fault Injector
29 *
30 * This userland component takes a set of options and uses libzpool to translate
31 * from a user-visible object type and name to an internal representation.
32 * There are two basic types of faults: device faults and data faults.
33 *
34 *
35 * DEVICE FAULTS
36 *
37 * Errors can be injected into a particular vdev using the '-d' option. This
38 * option takes a path or vdev GUID to uniquely identify the device within a
d977122d
DB
39 * pool. There are four types of errors that can be injected, IO, ENXIO,
40 * ECHILD, and EILSEQ. These can be controlled through the '-e' option and the
41 * default is ENXIO. For EIO failures, any attempt to read data from the device
42 * will return EIO, but a subsequent attempt to reopen the device will succeed.
43 * For ENXIO failures, any attempt to read from the device will return EIO, but
44 * any attempt to reopen the device will also return ENXIO. The EILSEQ failures
45 * only apply to read operations (-T read) and will flip a bit after the device
46 * has read the original data.
47 *
b128c09f 48 * For label faults, the -L option must be specified. This allows faults
428870ff
BB
49 * to be injected into either the nvlist, uberblock, pad1, or pad2 region
50 * of all the labels for the specified device.
34dc7c2f
BB
51 *
52 * This form of the command looks like:
53 *
428870ff 54 * zinject -d device [-e errno] [-L <uber | nvlist | pad1 | pad2>] pool
34dc7c2f
BB
55 *
56 *
57 * DATA FAULTS
58 *
59 * We begin with a tuple of the form:
60 *
61 * <type,level,range,object>
62 *
63 * type A string describing the type of data to target. Each type
64 * implicitly describes how to interpret 'object'. Currently,
65 * the following values are supported:
66 *
67 * data User data for a file
68 * dnode Dnode for a file or directory
69 *
70 * The following MOS objects are special. Instead of injecting
71 * errors on a particular object or blkid, we inject errors across
72 * all objects of the given type.
73 *
74 * mos Any data in the MOS
75 * mosdir object directory
76 * config pool configuration
428870ff 77 * bpobj blkptr list
34dc7c2f
BB
78 * spacemap spacemap
79 * metaslab metaslab
80 * errlog persistent error log
81 *
82 * level Object level. Defaults to '0', not applicable to all types. If
83 * a range is given, this corresponds to the indirect block
84 * corresponding to the specific range.
85 *
86 * range A numerical range [start,end) within the object. Defaults to
87 * the full size of the file.
88 *
89 * object A string describing the logical location of the object. For
90 * files and directories (currently the only supported types),
91 * this is the path of the object on disk.
92 *
93 * This is translated, via libzpool, into the following internal representation:
94 *
95 * <type,objset,object,level,range>
96 *
97 * These types should be self-explanatory. This tuple is then passed to the
98 * kernel via a special ioctl() to initiate fault injection for the given
99 * object. Note that 'type' is not strictly necessary for fault injection, but
100 * is used when translating existing faults into a human-readable string.
101 *
102 *
103 * The command itself takes one of the forms:
104 *
105 * zinject
106 * zinject <-a | -u pool>
107 * zinject -c <id|all>
108 * zinject [-q] <-t type> [-f freq] [-u] [-a] [-m] [-e errno] [-l level]
109 * [-r range] <object>
110 * zinject [-f freq] [-a] [-m] [-u] -b objset:object:level:start:end pool
111 *
112 * With no arguments, the command prints all currently registered injection
113 * handlers, with their numeric identifiers.
114 *
115 * The '-c' option will clear the given handler, or all handlers if 'all' is
116 * specified.
117 *
118 * The '-e' option takes a string describing the errno to simulate. This must
c3bd3fb4
TC
119 * be one of 'io', 'checksum', 'decompress', or 'decrypt'. In most cases this
120 * will result in the same behavior, but RAID-Z will produce a different set of
121 * ereports for this situation.
34dc7c2f
BB
122 *
123 * The '-a', '-u', and '-m' flags toggle internal flush behavior. If '-a' is
124 * specified, then the ARC cache is flushed appropriately. If '-u' is
125 * specified, then the underlying SPA is unloaded. Either of these flags can be
126 * specified independently of any other handlers. The '-m' flag automatically
127 * does an unmount and remount of the underlying dataset to aid in flushing the
128 * cache.
129 *
130 * The '-f' flag controls the frequency of errors injected, expressed as a
0241e491 131 * real number percentage between 0.0001 and 100. The default is 100.
34dc7c2f
BB
132 *
133 * The this form is responsible for actually injecting the handler into the
134 * framework. It takes the arguments described above, translates them to the
135 * internal tuple using libzpool, and then issues an ioctl() to register the
136 * handler.
137 *
138 * The final form can target a specific bookmark, regardless of whether a
139 * human-readable interface has been designed. It allows developers to specify
140 * a particular block by number.
141 */
142
143#include <errno.h>
144#include <fcntl.h>
145#include <stdio.h>
146#include <stdlib.h>
147#include <strings.h>
148#include <unistd.h>
149
150#include <sys/fs/zfs.h>
151#include <sys/mount.h>
152
153#include <libzfs.h>
154
155#undef verify /* both libzfs.h and zfs_context.h want to define this */
156
157#include "zinject.h"
158
159libzfs_handle_t *g_zfs;
160int zfs_fd;
161
162#define ECKSUM EBADE
163
164static const char *errtable[TYPE_INVAL] = {
165 "data",
166 "dnode",
167 "mos",
168 "mosdir",
169 "metaslab",
170 "config",
428870ff 171 "bpobj",
34dc7c2f 172 "spacemap",
b128c09f
BB
173 "errlog",
174 "uber",
428870ff
BB
175 "nvlist",
176 "pad1",
177 "pad2"
34dc7c2f
BB
178};
179
180static err_type_t
181name_to_type(const char *arg)
182{
183 int i;
184 for (i = 0; i < TYPE_INVAL; i++)
185 if (strcmp(errtable[i], arg) == 0)
186 return (i);
187
188 return (TYPE_INVAL);
189}
190
191static const char *
192type_to_name(uint64_t type)
193{
194 switch (type) {
195 case DMU_OT_OBJECT_DIRECTORY:
196 return ("mosdir");
197 case DMU_OT_OBJECT_ARRAY:
198 return ("metaslab");
199 case DMU_OT_PACKED_NVLIST:
200 return ("config");
428870ff
BB
201 case DMU_OT_BPOBJ:
202 return ("bpobj");
34dc7c2f
BB
203 case DMU_OT_SPACE_MAP:
204 return ("spacemap");
205 case DMU_OT_ERROR_LOG:
206 return ("errlog");
207 default:
208 return ("-");
209 }
210}
211
212
213/*
214 * Print usage message.
215 */
216void
217usage(void)
218{
219 (void) printf(
220 "usage:\n"
221 "\n"
222 "\tzinject\n"
223 "\n"
224 "\t\tList all active injection records.\n"
225 "\n"
226 "\tzinject -c <id|all>\n"
227 "\n"
228 "\t\tClear the particular record (if given a numeric ID), or\n"
2627e752 229 "\t\tall records if 'all' is specified.\n"
34dc7c2f 230 "\n"
428870ff
BB
231 "\tzinject -p <function name> pool\n"
232 "\t\tInject a panic fault at the specified function. Only \n"
233 "\t\tfunctions which call spa_vdev_config_exit(), or \n"
234 "\t\tspa_vdev_exit() will trigger a panic.\n"
235 "\n"
236 "\tzinject -d device [-e errno] [-L <nvlist|uber|pad1|pad2>] [-F]\n"
d977122d 237 "\t\t[-T <read|write|free|claim|all>] [-f frequency] pool\n\n"
b128c09f 238 "\t\tInject a fault into a particular device or the device's\n"
428870ff
BB
239 "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n "
240 "\t\t'pad1', or 'pad2'.\n"
d977122d
DB
241 "\t\t'errno' can be 'nxio' (the default), 'io', 'dtl', or\n"
242 "\t\t'corrupt' (bit flip).\n"
0241e491
DB
243 "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n"
244 "\t\tdevice error injection to a percentage of the IOs.\n"
34dc7c2f 245 "\n"
cc92e9d0
GW
246 "\tzinject -d device -A <degrade|fault> -D <delay secs> pool\n"
247 "\t\tPerform a specific action on a particular device.\n"
428870ff 248 "\n"
26ef0cc7
TH
249 "\tzinject -d device -D latency:lanes pool\n"
250 "\n"
251 "\t\tAdd an artificial delay to IO requests on a particular\n"
252 "\t\tdevice, such that the requests take a minimum of 'latency'\n"
253 "\t\tmilliseconds to complete. Each delay has an associated\n"
254 "\t\tnumber of 'lanes' which defines the number of concurrent\n"
255 "\t\tIO requests that can be processed.\n"
256 "\n"
257 "\t\tFor example, with a single lane delay of 10 ms (-D 10:1),\n"
258 "\t\tthe device will only be able to service a single IO request\n"
259 "\t\tat a time with each request taking 10 ms to complete. So,\n"
260 "\t\tif only a single request is submitted every 10 ms, the\n"
261 "\t\taverage latency will be 10 ms; but if more than one request\n"
262 "\t\tis submitted every 10 ms, the average latency will be more\n"
263 "\t\tthan 10 ms.\n"
264 "\n"
265 "\t\tSimilarly, if a delay of 10 ms is specified to have two\n"
266 "\t\tlanes (-D 10:2), then the device will be able to service\n"
267 "\t\ttwo requests at a time, each with a minimum latency of\n"
268 "\t\t10 ms. So, if two requests are submitted every 10 ms, then\n"
269 "\t\tthe average latency will be 10 ms; but if more than two\n"
270 "\t\trequests are submitted every 10 ms, the average latency\n"
271 "\t\twill be more than 10 ms.\n"
272 "\n"
273 "\t\tAlso note, these delays are additive. So two invocations\n"
274 "\t\tof '-D 10:1', is roughly equivalent to a single invocation\n"
275 "\t\tof '-D 10:2'. This also means, one can specify multiple\n"
276 "\t\tlanes with differing target latencies. For example, an\n"
277 "\t\tinvocation of '-D 10:1' followed by '-D 25:2' will\n"
278 "\t\tcreate 3 lanes on the device; one lane with a latency\n"
279 "\t\tof 10 ms and two lanes with a 25 ms latency.\n"
280 "\n"
428870ff
BB
281 "\tzinject -I [-s <seconds> | -g <txgs>] pool\n"
282 "\t\tCause the pool to stop writing blocks yet not\n"
283 "\t\treport errors for a duration. Simulates buggy hardware\n"
284 "\t\tthat fails to honor cache flush requests.\n"
285 "\t\tDefault duration is 30 seconds. The machine is panicked\n"
286 "\t\tat the end of the duration.\n"
287 "\n"
34dc7c2f
BB
288 "\tzinject -b objset:object:level:blkid pool\n"
289 "\n"
290 "\t\tInject an error into pool 'pool' with the numeric bookmark\n"
291 "\t\tspecified by the remaining tuple. Each number is in\n"
4e33ba4c 292 "\t\thexadecimal, and only one block can be specified.\n"
34dc7c2f
BB
293 "\n"
294 "\tzinject [-q] <-t type> [-e errno] [-l level] [-r range]\n"
295 "\t [-a] [-m] [-u] [-f freq] <object>\n"
296 "\n"
297 "\t\tInject an error into the object specified by the '-t' option\n"
298 "\t\tand the object descriptor. The 'object' parameter is\n"
2627e752 299 "\t\tinterpreted depending on the '-t' option.\n"
34dc7c2f
BB
300 "\n"
301 "\t\t-q\tQuiet mode. Only print out the handler number added.\n"
be9a5c35 302 "\t\t-e\tInject a specific error. Must be one of 'io',\n"
c3bd3fb4 303 "\t\t\t'checksum', 'decompress', or decrypt. Default is 'io'.\n"
34dc7c2f
BB
304 "\t\t-l\tInject error at a particular block level. Default is "
305 "0.\n"
306 "\t\t-m\tAutomatically remount underlying filesystem.\n"
307 "\t\t-r\tInject error over a particular logical range of an\n"
308 "\t\t\tobject. Will be translated to the appropriate blkid\n"
309 "\t\t\trange according to the object's properties.\n"
310 "\t\t-a\tFlush the ARC cache. Can be specified without any\n"
311 "\t\t\tassociated object.\n"
312 "\t\t-u\tUnload the associated pool. Can be specified with only\n"
313 "\t\t\ta pool object.\n"
314 "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n"
0241e491 315 "\t\t\ta percentage between 0.0001 and 100.\n"
34dc7c2f
BB
316 "\n"
317 "\t-t data\t\tInject an error into the plain file contents of a\n"
318 "\t\t\tfile. The object must be specified as a complete path\n"
319 "\t\t\tto a file on a ZFS filesystem.\n"
320 "\n"
321 "\t-t dnode\tInject an error into the metadnode in the block\n"
322 "\t\t\tcorresponding to the dnode for a file or directory. The\n"
323 "\t\t\t'-r' option is incompatible with this mode. The object\n"
324 "\t\t\tis specified as a complete path to a file or directory\n"
325 "\t\t\ton a ZFS filesystem.\n"
326 "\n"
327 "\t-t <mos>\tInject errors into the MOS for objects of the given\n"
428870ff 328 "\t\t\ttype. Valid types are: mos, mosdir, config, bpobj,\n"
34dc7c2f
BB
329 "\t\t\tspacemap, metaslab, errlog. The only valid <object> is\n"
330 "\t\t\tthe poolname.\n");
331}
332
333static int
334iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
335 void *data)
336{
d3773fda 337 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
338 int ret;
339
34dc7c2f
BB
340 while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
341 if ((ret = func((int)zc.zc_guid, zc.zc_name,
342 &zc.zc_inject_record, data)) != 0)
343 return (ret);
344
428870ff
BB
345 if (errno != ENOENT) {
346 (void) fprintf(stderr, "Unable to list handlers: %s\n",
347 strerror(errno));
348 return (-1);
349 }
350
34dc7c2f
BB
351 return (0);
352}
353
354static int
355print_data_handler(int id, const char *pool, zinject_record_t *record,
356 void *data)
357{
358 int *count = data;
359
428870ff 360 if (record->zi_guid != 0 || record->zi_func[0] != '\0')
34dc7c2f
BB
361 return (0);
362
363 if (*count == 0) {
364 (void) printf("%3s %-15s %-6s %-6s %-8s %3s %-15s\n",
365 "ID", "POOL", "OBJSET", "OBJECT", "TYPE", "LVL", "RANGE");
366 (void) printf("--- --------------- ------ "
367 "------ -------- --- ---------------\n");
368 }
369
370 *count += 1;
371
372 (void) printf("%3d %-15s %-6llu %-6llu %-8s %3d ", id, pool,
373 (u_longlong_t)record->zi_objset, (u_longlong_t)record->zi_object,
374 type_to_name(record->zi_type), record->zi_level);
375
376 if (record->zi_start == 0 &&
377 record->zi_end == -1ULL)
378 (void) printf("all\n");
379 else
380 (void) printf("[%llu, %llu]\n", (u_longlong_t)record->zi_start,
381 (u_longlong_t)record->zi_end);
382
383 return (0);
384}
385
386static int
387print_device_handler(int id, const char *pool, zinject_record_t *record,
388 void *data)
389{
390 int *count = data;
391
428870ff 392 if (record->zi_guid == 0 || record->zi_func[0] != '\0')
34dc7c2f
BB
393 return (0);
394
26ef0cc7
TH
395 if (record->zi_cmd == ZINJECT_DELAY_IO)
396 return (0);
397
34dc7c2f
BB
398 if (*count == 0) {
399 (void) printf("%3s %-15s %s\n", "ID", "POOL", "GUID");
400 (void) printf("--- --------------- ----------------\n");
401 }
402
403 *count += 1;
404
405 (void) printf("%3d %-15s %llx\n", id, pool,
406 (u_longlong_t)record->zi_guid);
407
408 return (0);
409}
410
26ef0cc7
TH
411static int
412print_delay_handler(int id, const char *pool, zinject_record_t *record,
413 void *data)
414{
415 int *count = data;
416
417 if (record->zi_guid == 0 || record->zi_func[0] != '\0')
418 return (0);
419
420 if (record->zi_cmd != ZINJECT_DELAY_IO)
421 return (0);
422
423 if (*count == 0) {
424 (void) printf("%3s %-15s %-15s %-15s %s\n",
425 "ID", "POOL", "DELAY (ms)", "LANES", "GUID");
426 (void) printf("--- --------------- --------------- "
427 "--------------- ----------------\n");
428 }
429
430 *count += 1;
431
432 (void) printf("%3d %-15s %-15llu %-15llu %llx\n", id, pool,
433 (u_longlong_t)NSEC2MSEC(record->zi_timer),
434 (u_longlong_t)record->zi_nlanes,
435 (u_longlong_t)record->zi_guid);
436
437 return (0);
438}
439
428870ff
BB
440static int
441print_panic_handler(int id, const char *pool, zinject_record_t *record,
442 void *data)
443{
444 int *count = data;
445
446 if (record->zi_func[0] == '\0')
447 return (0);
448
449 if (*count == 0) {
450 (void) printf("%3s %-15s %s\n", "ID", "POOL", "FUNCTION");
451 (void) printf("--- --------------- ----------------\n");
452 }
453
454 *count += 1;
455
456 (void) printf("%3d %-15s %s\n", id, pool, record->zi_func);
457
458 return (0);
459}
460
34dc7c2f
BB
461/*
462 * Print all registered error handlers. Returns the number of handlers
463 * registered.
464 */
465static int
466print_all_handlers(void)
467{
572e2857 468 int count = 0, total = 0;
34dc7c2f
BB
469
470 (void) iter_handlers(print_device_handler, &count);
572e2857
BB
471 if (count > 0) {
472 total += count;
473 (void) printf("\n");
474 count = 0;
475 }
476
26ef0cc7
TH
477 (void) iter_handlers(print_delay_handler, &count);
478 if (count > 0) {
479 total += count;
480 (void) printf("\n");
481 count = 0;
482 }
483
34dc7c2f 484 (void) iter_handlers(print_data_handler, &count);
572e2857
BB
485 if (count > 0) {
486 total += count;
487 (void) printf("\n");
488 count = 0;
489 }
490
428870ff 491 (void) iter_handlers(print_panic_handler, &count);
34dc7c2f 492
572e2857 493 return (count + total);
34dc7c2f
BB
494}
495
496/* ARGSUSED */
497static int
498cancel_one_handler(int id, const char *pool, zinject_record_t *record,
499 void *data)
500{
d3773fda 501 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
502
503 zc.zc_guid = (uint64_t)id;
504
505 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
506 (void) fprintf(stderr, "failed to remove handler %d: %s\n",
507 id, strerror(errno));
508 return (1);
509 }
510
511 return (0);
512}
513
514/*
515 * Remove all fault injection handlers.
516 */
517static int
518cancel_all_handlers(void)
519{
520 int ret = iter_handlers(cancel_one_handler, NULL);
521
428870ff
BB
522 if (ret == 0)
523 (void) printf("removed all registered handlers\n");
34dc7c2f
BB
524
525 return (ret);
526}
527
528/*
529 * Remove a specific fault injection handler.
530 */
531static int
532cancel_handler(int id)
533{
d3773fda 534 zfs_cmd_t zc = {"\0"};
34dc7c2f
BB
535
536 zc.zc_guid = (uint64_t)id;
537
538 if (ioctl(zfs_fd, ZFS_IOC_CLEAR_FAULT, &zc) != 0) {
539 (void) fprintf(stderr, "failed to remove handler %d: %s\n",
540 id, strerror(errno));
541 return (1);
542 }
543
544 (void) printf("removed handler %d\n", id);
545
546 return (0);
547}
548
549/*
550 * Register a new fault injection handler.
551 */
552static int
553register_handler(const char *pool, int flags, zinject_record_t *record,
554 int quiet)
555{
d3773fda 556 zfs_cmd_t zc = {"\0"};
34dc7c2f 557
0b78aeae 558 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
34dc7c2f
BB
559 zc.zc_inject_record = *record;
560 zc.zc_guid = flags;
561
562 if (ioctl(zfs_fd, ZFS_IOC_INJECT_FAULT, &zc) != 0) {
563 (void) fprintf(stderr, "failed to add handler: %s\n",
e89f1295 564 errno == EDOM ? "block level exceeds max level of object" :
34dc7c2f
BB
565 strerror(errno));
566 return (1);
567 }
568
569 if (flags & ZINJECT_NULL)
570 return (0);
571
572 if (quiet) {
573 (void) printf("%llu\n", (u_longlong_t)zc.zc_guid);
574 } else {
575 (void) printf("Added handler %llu with the following "
576 "properties:\n", (u_longlong_t)zc.zc_guid);
577 (void) printf(" pool: %s\n", pool);
578 if (record->zi_guid) {
579 (void) printf(" vdev: %llx\n",
580 (u_longlong_t)record->zi_guid);
428870ff
BB
581 } else if (record->zi_func[0] != '\0') {
582 (void) printf(" panic function: %s\n",
583 record->zi_func);
584 } else if (record->zi_duration > 0) {
585 (void) printf(" time: %lld seconds\n",
586 (u_longlong_t)record->zi_duration);
587 } else if (record->zi_duration < 0) {
588 (void) printf(" txgs: %lld \n",
589 (u_longlong_t)-record->zi_duration);
34dc7c2f
BB
590 } else {
591 (void) printf("objset: %llu\n",
592 (u_longlong_t)record->zi_objset);
593 (void) printf("object: %llu\n",
594 (u_longlong_t)record->zi_object);
595 (void) printf(" type: %llu\n",
596 (u_longlong_t)record->zi_type);
597 (void) printf(" level: %d\n", record->zi_level);
598 if (record->zi_start == 0 &&
599 record->zi_end == -1ULL)
600 (void) printf(" range: all\n");
601 else
602 (void) printf(" range: [%llu, %llu)\n",
603 (u_longlong_t)record->zi_start,
604 (u_longlong_t)record->zi_end);
605 }
606 }
607
608 return (0);
609}
610
428870ff
BB
611int
612perform_action(const char *pool, zinject_record_t *record, int cmd)
613{
d3773fda 614 zfs_cmd_t zc = {"\0"};
428870ff
BB
615
616 ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
617 (void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
618 zc.zc_guid = record->zi_guid;
619 zc.zc_cookie = cmd;
620
621 if (ioctl(zfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
622 return (0);
623
624 return (1);
625}
626
26ef0cc7
TH
627static int
628parse_delay(char *str, uint64_t *delay, uint64_t *nlanes)
629{
630 unsigned long scan_delay;
631 unsigned long scan_nlanes;
632
633 if (sscanf(str, "%lu:%lu", &scan_delay, &scan_nlanes) != 2)
634 return (1);
635
636 /*
637 * We explicitly disallow a delay of zero here, because we key
638 * off this value being non-zero in translate_device(), to
639 * determine if the fault is a ZINJECT_DELAY_IO fault or not.
640 */
641 if (scan_delay == 0)
642 return (1);
643
644 /*
645 * The units for the CLI delay parameter is milliseconds, but
646 * the data passed to the kernel is interpreted as nanoseconds.
647 * Thus we scale the milliseconds to nanoseconds here, and this
648 * nanosecond value is used to pass the delay to the kernel.
649 */
650 *delay = MSEC2NSEC(scan_delay);
651 *nlanes = scan_nlanes;
652
653 return (0);
654}
655
0241e491
DB
656static int
657parse_frequency(const char *str, uint32_t *percent)
658{
659 double val;
660 char *post;
661
662 val = strtod(str, &post);
663 if (post == NULL || *post != '\0')
664 return (EINVAL);
665
666 /* valid range is [0.0001, 100.0] */
667 val /= 100.0f;
668 if (val < 0.000001f || val > 1.0f)
669 return (ERANGE);
670
671 /* convert to an integer for use by kernel */
672 *percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX));
673
674 return (0);
675}
676
34dc7c2f
BB
677int
678main(int argc, char **argv)
679{
680 int c;
681 char *range = NULL;
682 char *cancel = NULL;
683 char *end;
684 char *raw = NULL;
685 char *device = NULL;
686 int level = 0;
687 int quiet = 0;
688 int error = 0;
689 int domount = 0;
428870ff
BB
690 int io_type = ZIO_TYPES;
691 int action = VDEV_STATE_UNKNOWN;
34dc7c2f 692 err_type_t type = TYPE_INVAL;
b128c09f 693 err_type_t label = TYPE_INVAL;
34dc7c2f 694 zinject_record_t record = { 0 };
a64f903b
GN
695 char pool[MAXNAMELEN] = "";
696 char dataset[MAXNAMELEN] = "";
149e873a 697 zfs_handle_t *zhp = NULL;
428870ff
BB
698 int nowrites = 0;
699 int dur_txg = 0;
700 int dur_secs = 0;
34dc7c2f
BB
701 int ret;
702 int flags = 0;
703
65037d9b
BB
704 if ((g_zfs = libzfs_init()) == NULL) {
705 (void) fprintf(stderr, "%s", libzfs_error_init(errno));
937210a5 706 return (1);
65037d9b 707 }
937210a5
BB
708
709 libzfs_print_on_error(g_zfs, B_TRUE);
710
711 if ((zfs_fd = open(ZFS_DEV, O_RDWR)) < 0) {
712 (void) fprintf(stderr, "failed to open ZFS device\n");
5df39c1e 713 libzfs_fini(g_zfs);
937210a5
BB
714 return (1);
715 }
716
34dc7c2f
BB
717 if (argc == 1) {
718 /*
719 * No arguments. Print the available handlers. If there are no
720 * available handlers, direct the user to '-h' for help
721 * information.
722 */
723 if (print_all_handlers() == 0) {
724 (void) printf("No handlers registered.\n");
725 (void) printf("Run 'zinject -h' for usage "
726 "information.\n");
727 }
5df39c1e 728 libzfs_fini(g_zfs);
34dc7c2f
BB
729 return (0);
730 }
731
428870ff 732 while ((c = getopt(argc, argv,
cc92e9d0 733 ":aA:b:d:D:f:Fg:qhIc:t:T:l:mr:s:e:uL:p:")) != -1) {
34dc7c2f
BB
734 switch (c) {
735 case 'a':
736 flags |= ZINJECT_FLUSH_ARC;
737 break;
428870ff
BB
738 case 'A':
739 if (strcasecmp(optarg, "degrade") == 0) {
740 action = VDEV_STATE_DEGRADED;
741 } else if (strcasecmp(optarg, "fault") == 0) {
742 action = VDEV_STATE_FAULTED;
743 } else {
744 (void) fprintf(stderr, "invalid action '%s': "
745 "must be 'degrade' or 'fault'\n", optarg);
746 usage();
5df39c1e 747 libzfs_fini(g_zfs);
428870ff
BB
748 return (1);
749 }
750 break;
34dc7c2f
BB
751 case 'b':
752 raw = optarg;
753 break;
754 case 'c':
755 cancel = optarg;
756 break;
757 case 'd':
758 device = optarg;
759 break;
cc92e9d0
GW
760 case 'D':
761 errno = 0;
26ef0cc7
TH
762 ret = parse_delay(optarg, &record.zi_timer,
763 &record.zi_nlanes);
764 if (ret != 0) {
765
cc92e9d0
GW
766 (void) fprintf(stderr, "invalid i/o delay "
767 "value: '%s'\n", optarg);
768 usage();
5df39c1e 769 libzfs_fini(g_zfs);
cc92e9d0
GW
770 return (1);
771 }
772 break;
34dc7c2f
BB
773 case 'e':
774 if (strcasecmp(optarg, "io") == 0) {
775 error = EIO;
776 } else if (strcasecmp(optarg, "checksum") == 0) {
777 error = ECKSUM;
c3bd3fb4
TC
778 } else if (strcasecmp(optarg, "decompress") == 0) {
779 error = EINVAL;
be9a5c35
TC
780 } else if (strcasecmp(optarg, "decrypt") == 0) {
781 error = EACCES;
34dc7c2f
BB
782 } else if (strcasecmp(optarg, "nxio") == 0) {
783 error = ENXIO;
572e2857
BB
784 } else if (strcasecmp(optarg, "dtl") == 0) {
785 error = ECHILD;
d977122d
DB
786 } else if (strcasecmp(optarg, "corrupt") == 0) {
787 error = EILSEQ;
34dc7c2f
BB
788 } else {
789 (void) fprintf(stderr, "invalid error type "
790 "'%s': must be 'io', 'checksum' or "
791 "'nxio'\n", optarg);
792 usage();
5df39c1e 793 libzfs_fini(g_zfs);
34dc7c2f
BB
794 return (1);
795 }
796 break;
797 case 'f':
0241e491
DB
798 ret = parse_frequency(optarg, &record.zi_freq);
799 if (ret != 0) {
800 (void) fprintf(stderr, "%sfrequency value must "
801 "be in the range [0.0001, 100.0]\n",
802 ret == EINVAL ? "invalid value: " :
803 ret == ERANGE ? "out of range: " : "");
5df39c1e 804 libzfs_fini(g_zfs);
34dc7c2f
BB
805 return (1);
806 }
807 break;
9babb374
BB
808 case 'F':
809 record.zi_failfast = B_TRUE;
810 break;
428870ff
BB
811 case 'g':
812 dur_txg = 1;
813 record.zi_duration = (int)strtol(optarg, &end, 10);
814 if (record.zi_duration <= 0 || *end != '\0') {
815 (void) fprintf(stderr, "invalid duration '%s': "
816 "must be a positive integer\n", optarg);
817 usage();
5df39c1e 818 libzfs_fini(g_zfs);
428870ff
BB
819 return (1);
820 }
821 /* store duration of txgs as its negative */
822 record.zi_duration *= -1;
823 break;
34dc7c2f
BB
824 case 'h':
825 usage();
5df39c1e 826 libzfs_fini(g_zfs);
34dc7c2f 827 return (0);
428870ff
BB
828 case 'I':
829 /* default duration, if one hasn't yet been defined */
830 nowrites = 1;
831 if (dur_secs == 0 && dur_txg == 0)
832 record.zi_duration = 30;
833 break;
34dc7c2f
BB
834 case 'l':
835 level = (int)strtol(optarg, &end, 10);
836 if (*end != '\0') {
837 (void) fprintf(stderr, "invalid level '%s': "
838 "must be an integer\n", optarg);
839 usage();
5df39c1e 840 libzfs_fini(g_zfs);
34dc7c2f
BB
841 return (1);
842 }
843 break;
844 case 'm':
845 domount = 1;
846 break;
428870ff
BB
847 case 'p':
848 (void) strlcpy(record.zi_func, optarg,
849 sizeof (record.zi_func));
cc92e9d0 850 record.zi_cmd = ZINJECT_PANIC;
428870ff 851 break;
34dc7c2f
BB
852 case 'q':
853 quiet = 1;
854 break;
855 case 'r':
856 range = optarg;
e89f1295 857 flags |= ZINJECT_CALC_RANGE;
34dc7c2f 858 break;
428870ff
BB
859 case 's':
860 dur_secs = 1;
861 record.zi_duration = (int)strtol(optarg, &end, 10);
862 if (record.zi_duration <= 0 || *end != '\0') {
863 (void) fprintf(stderr, "invalid duration '%s': "
864 "must be a positive integer\n", optarg);
865 usage();
5df39c1e 866 libzfs_fini(g_zfs);
428870ff
BB
867 return (1);
868 }
869 break;
870 case 'T':
871 if (strcasecmp(optarg, "read") == 0) {
872 io_type = ZIO_TYPE_READ;
873 } else if (strcasecmp(optarg, "write") == 0) {
874 io_type = ZIO_TYPE_WRITE;
875 } else if (strcasecmp(optarg, "free") == 0) {
876 io_type = ZIO_TYPE_FREE;
877 } else if (strcasecmp(optarg, "claim") == 0) {
878 io_type = ZIO_TYPE_CLAIM;
879 } else if (strcasecmp(optarg, "all") == 0) {
880 io_type = ZIO_TYPES;
881 } else {
882 (void) fprintf(stderr, "invalid I/O type "
883 "'%s': must be 'read', 'write', 'free', "
884 "'claim' or 'all'\n", optarg);
885 usage();
5df39c1e 886 libzfs_fini(g_zfs);
428870ff
BB
887 return (1);
888 }
889 break;
34dc7c2f 890 case 't':
b128c09f
BB
891 if ((type = name_to_type(optarg)) == TYPE_INVAL &&
892 !MOS_TYPE(type)) {
34dc7c2f
BB
893 (void) fprintf(stderr, "invalid type '%s'\n",
894 optarg);
895 usage();
5df39c1e 896 libzfs_fini(g_zfs);
34dc7c2f
BB
897 return (1);
898 }
899 break;
900 case 'u':
901 flags |= ZINJECT_UNLOAD_SPA;
902 break;
b128c09f
BB
903 case 'L':
904 if ((label = name_to_type(optarg)) == TYPE_INVAL &&
905 !LABEL_TYPE(type)) {
906 (void) fprintf(stderr, "invalid label type "
907 "'%s'\n", optarg);
908 usage();
5df39c1e 909 libzfs_fini(g_zfs);
b128c09f
BB
910 return (1);
911 }
912 break;
34dc7c2f
BB
913 case ':':
914 (void) fprintf(stderr, "option -%c requires an "
915 "operand\n", optopt);
916 usage();
5df39c1e 917 libzfs_fini(g_zfs);
34dc7c2f
BB
918 return (1);
919 case '?':
920 (void) fprintf(stderr, "invalid option '%c'\n",
921 optopt);
922 usage();
5df39c1e 923 libzfs_fini(g_zfs);
34dc7c2f
BB
924 return (2);
925 }
926 }
927
928 argc -= optind;
929 argv += optind;
930
cc92e9d0
GW
931 if (record.zi_duration != 0)
932 record.zi_cmd = ZINJECT_IGNORED_WRITES;
933
34dc7c2f
BB
934 if (cancel != NULL) {
935 /*
936 * '-c' is invalid with any other options.
937 */
938 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
0241e491
DB
939 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED ||
940 record.zi_freq > 0) {
34dc7c2f
BB
941 (void) fprintf(stderr, "cancel (-c) incompatible with "
942 "any other options\n");
943 usage();
5df39c1e 944 libzfs_fini(g_zfs);
34dc7c2f
BB
945 return (2);
946 }
947 if (argc != 0) {
948 (void) fprintf(stderr, "extraneous argument to '-c'\n");
949 usage();
5df39c1e 950 libzfs_fini(g_zfs);
34dc7c2f
BB
951 return (2);
952 }
953
954 if (strcmp(cancel, "all") == 0) {
955 return (cancel_all_handlers());
956 } else {
957 int id = (int)strtol(cancel, &end, 10);
958 if (*end != '\0') {
959 (void) fprintf(stderr, "invalid handle id '%s':"
960 " must be an integer or 'all'\n", cancel);
961 usage();
5df39c1e 962 libzfs_fini(g_zfs);
34dc7c2f
BB
963 return (1);
964 }
965 return (cancel_handler(id));
966 }
967 }
968
969 if (device != NULL) {
970 /*
971 * Device (-d) injection uses a completely different mechanism
972 * for doing injection, so handle it separately here.
973 */
974 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
cc92e9d0 975 level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) {
34dc7c2f
BB
976 (void) fprintf(stderr, "device (-d) incompatible with "
977 "data error injection\n");
978 usage();
5df39c1e 979 libzfs_fini(g_zfs);
34dc7c2f
BB
980 return (2);
981 }
982
983 if (argc != 1) {
984 (void) fprintf(stderr, "device (-d) injection requires "
985 "a single pool name\n");
986 usage();
5df39c1e 987 libzfs_fini(g_zfs);
34dc7c2f
BB
988 return (2);
989 }
990
5df39c1e 991 (void) strlcpy(pool, argv[0], sizeof (pool));
34dc7c2f
BB
992 dataset[0] = '\0';
993
994 if (error == ECKSUM) {
995 (void) fprintf(stderr, "device error type must be "
d977122d
DB
996 "'io', 'nxio' or 'corrupt'\n");
997 libzfs_fini(g_zfs);
998 return (1);
999 }
1000
1001 if (error == EILSEQ &&
1002 (record.zi_freq == 0 || io_type != ZIO_TYPE_READ)) {
1003 (void) fprintf(stderr, "device corrupt errors require "
1004 "io type read and a frequency value\n");
5df39c1e 1005 libzfs_fini(g_zfs);
34dc7c2f
BB
1006 return (1);
1007 }
1008
428870ff 1009 record.zi_iotype = io_type;
5df39c1e 1010 if (translate_device(pool, device, label, &record) != 0) {
1011 libzfs_fini(g_zfs);
34dc7c2f 1012 return (1);
5df39c1e 1013 }
34dc7c2f
BB
1014 if (!error)
1015 error = ENXIO;
428870ff
BB
1016
1017 if (action != VDEV_STATE_UNKNOWN)
1018 return (perform_action(pool, &record, action));
1019
34dc7c2f 1020 } else if (raw != NULL) {
428870ff 1021 if (range != NULL || type != TYPE_INVAL || level != 0 ||
0241e491
DB
1022 record.zi_cmd != ZINJECT_UNINITIALIZED ||
1023 record.zi_freq > 0) {
34dc7c2f
BB
1024 (void) fprintf(stderr, "raw (-b) format with "
1025 "any other options\n");
1026 usage();
5df39c1e 1027 libzfs_fini(g_zfs);
34dc7c2f
BB
1028 return (2);
1029 }
1030
1031 if (argc != 1) {
1032 (void) fprintf(stderr, "raw (-b) format expects a "
1033 "single pool name\n");
1034 usage();
5df39c1e 1035 libzfs_fini(g_zfs);
34dc7c2f
BB
1036 return (2);
1037 }
1038
5df39c1e 1039 (void) strlcpy(pool, argv[0], sizeof (pool));
34dc7c2f
BB
1040 dataset[0] = '\0';
1041
1042 if (error == ENXIO) {
1043 (void) fprintf(stderr, "data error type must be "
1044 "'checksum' or 'io'\n");
5df39c1e 1045 libzfs_fini(g_zfs);
34dc7c2f
BB
1046 return (1);
1047 }
1048
cc92e9d0 1049 record.zi_cmd = ZINJECT_DATA_FAULT;
5df39c1e 1050 if (translate_raw(raw, &record) != 0) {
1051 libzfs_fini(g_zfs);
34dc7c2f 1052 return (1);
5df39c1e 1053 }
34dc7c2f
BB
1054 if (!error)
1055 error = EIO;
cc92e9d0 1056 } else if (record.zi_cmd == ZINJECT_PANIC) {
428870ff 1057 if (raw != NULL || range != NULL || type != TYPE_INVAL ||
0241e491 1058 level != 0 || device != NULL || record.zi_freq > 0) {
428870ff
BB
1059 (void) fprintf(stderr, "panic (-p) incompatible with "
1060 "other options\n");
1061 usage();
5df39c1e 1062 libzfs_fini(g_zfs);
428870ff
BB
1063 return (2);
1064 }
1065
1066 if (argc < 1 || argc > 2) {
1067 (void) fprintf(stderr, "panic (-p) injection requires "
1068 "a single pool name and an optional id\n");
1069 usage();
5df39c1e 1070 libzfs_fini(g_zfs);
428870ff
BB
1071 return (2);
1072 }
1073
5df39c1e 1074 (void) strlcpy(pool, argv[0], sizeof (pool));
428870ff
BB
1075 if (argv[1] != NULL)
1076 record.zi_type = atoi(argv[1]);
1077 dataset[0] = '\0';
cc92e9d0 1078 } else if (record.zi_cmd == ZINJECT_IGNORED_WRITES) {
428870ff
BB
1079 if (nowrites == 0) {
1080 (void) fprintf(stderr, "-s or -g meaningless "
1081 "without -I (ignore writes)\n");
1082 usage();
5df39c1e 1083 libzfs_fini(g_zfs);
428870ff
BB
1084 return (2);
1085 } else if (dur_secs && dur_txg) {
1086 (void) fprintf(stderr, "choose a duration either "
1087 "in seconds (-s) or a number of txgs (-g) "
1088 "but not both\n");
1089 usage();
5df39c1e 1090 libzfs_fini(g_zfs);
428870ff
BB
1091 return (2);
1092 } else if (argc != 1) {
1093 (void) fprintf(stderr, "ignore writes (-I) "
1094 "injection requires a single pool name\n");
1095 usage();
5df39c1e 1096 libzfs_fini(g_zfs);
428870ff
BB
1097 return (2);
1098 }
1099
5df39c1e 1100 (void) strlcpy(pool, argv[0], sizeof (pool));
428870ff 1101 dataset[0] = '\0';
34dc7c2f
BB
1102 } else if (type == TYPE_INVAL) {
1103 if (flags == 0) {
1104 (void) fprintf(stderr, "at least one of '-b', '-d', "
428870ff
BB
1105 "'-t', '-a', '-p', '-I' or '-u' "
1106 "must be specified\n");
34dc7c2f 1107 usage();
5df39c1e 1108 libzfs_fini(g_zfs);
34dc7c2f
BB
1109 return (2);
1110 }
1111
1112 if (argc == 1 && (flags & ZINJECT_UNLOAD_SPA)) {
5df39c1e 1113 (void) strlcpy(pool, argv[0], sizeof (pool));
34dc7c2f
BB
1114 dataset[0] = '\0';
1115 } else if (argc != 0) {
1116 (void) fprintf(stderr, "extraneous argument for "
1117 "'-f'\n");
1118 usage();
5df39c1e 1119 libzfs_fini(g_zfs);
34dc7c2f
BB
1120 return (2);
1121 }
1122
1123 flags |= ZINJECT_NULL;
1124 } else {
1125 if (argc != 1) {
1126 (void) fprintf(stderr, "missing object\n");
1127 usage();
5df39c1e 1128 libzfs_fini(g_zfs);
34dc7c2f
BB
1129 return (2);
1130 }
1131
d977122d 1132 if (error == ENXIO || error == EILSEQ) {
34dc7c2f
BB
1133 (void) fprintf(stderr, "data error type must be "
1134 "'checksum' or 'io'\n");
5df39c1e 1135 libzfs_fini(g_zfs);
34dc7c2f
BB
1136 return (1);
1137 }
1138
be9a5c35
TC
1139 if (error == EACCES) {
1140 if (type != TYPE_DATA) {
1141 (void) fprintf(stderr, "decryption errors "
1142 "may only be injected for 'data' types\n");
1143 libzfs_fini(g_zfs);
1144 return (1);
1145 }
1146
1147 record.zi_cmd = ZINJECT_DECRYPT_FAULT;
1148 /*
1149 * Internally, ZFS actually uses ECKSUM for decryption
1150 * errors since EACCES is used to indicate the key was
1151 * not found.
1152 */
1153 error = ECKSUM;
1154 } else {
1155 record.zi_cmd = ZINJECT_DATA_FAULT;
1156 }
1157
34dc7c2f 1158 if (translate_record(type, argv[0], range, level, &record, pool,
5df39c1e 1159 dataset) != 0) {
02730c33 1160 libzfs_fini(g_zfs);
34dc7c2f 1161 return (1);
5df39c1e 1162 }
34dc7c2f
BB
1163 if (!error)
1164 error = EIO;
1165 }
1166
1167 /*
1168 * If this is pool-wide metadata, unmount everything. The ioctl() will
1169 * unload the pool, so that we trigger spa-wide reopen of metadata next
1170 * time we access the pool.
1171 */
1172 if (dataset[0] != '\0' && domount) {
5df39c1e 1173 if ((zhp = zfs_open(g_zfs, dataset,
02730c33 1174 ZFS_TYPE_DATASET)) == NULL) {
5df39c1e 1175 libzfs_fini(g_zfs);
34dc7c2f 1176 return (1);
5df39c1e 1177 }
1178 if (zfs_unmount(zhp, NULL, 0) != 0) {
1179 libzfs_fini(g_zfs);
34dc7c2f 1180 return (1);
5df39c1e 1181 }
34dc7c2f
BB
1182 }
1183
1184 record.zi_error = error;
1185
1186 ret = register_handler(pool, flags, &record, quiet);
1187
1188 if (dataset[0] != '\0' && domount)
1189 ret = (zfs_mount(zhp, NULL, 0) != 0);
1190
1191 libzfs_fini(g_zfs);
1192
1193 return (ret);
1194}