]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/zfs_debug.c
Illumos 4390 - I/O errors can corrupt space map when deleting fs/vol
[mirror_zfs.git] / module / zfs / zfs_debug.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 */
25
26 #include <sys/zfs_context.h>
27
28 #if !defined(_KERNEL) || !defined(__linux__)
29 list_t zfs_dbgmsgs;
30 int zfs_dbgmsg_size;
31 kmutex_t zfs_dbgmsgs_lock;
32 int zfs_dbgmsg_maxsize = 4<<20; /* 4MB */
33 #endif
34
35 /*
36 * Enable various debugging features.
37 */
38 int zfs_flags = 0;
39
40 /*
41 * zfs_recover can be set to nonzero to attempt to recover from
42 * otherwise-fatal errors, typically caused by on-disk corruption. When
43 * set, calls to zfs_panic_recover() will turn into warning messages.
44 * This should only be used as a last resort, as it typically results
45 * in leaked space, or worse.
46 */
47 int zfs_recover = B_FALSE;
48
49 /*
50 * If destroy encounters an EIO while reading metadata (e.g. indirect
51 * blocks), space referenced by the missing metadata can not be freed.
52 * Normally this causes the background destroy to become "stalled", as
53 * it is unable to make forward progress. While in this stalled state,
54 * all remaining space to free from the error-encountering filesystem is
55 * "temporarily leaked". Set this flag to cause it to ignore the EIO,
56 * permanently leak the space from indirect blocks that can not be read,
57 * and continue to free everything else that it can.
58 *
59 * The default, "stalling" behavior is useful if the storage partially
60 * fails (i.e. some but not all i/os fail), and then later recovers. In
61 * this case, we will be able to continue pool operations while it is
62 * partially failed, and when it recovers, we can continue to free the
63 * space, with no leaks. However, note that this case is actually
64 * fairly rare.
65 *
66 * Typically pools either (a) fail completely (but perhaps temporarily,
67 * e.g. a top-level vdev going offline), or (b) have localized,
68 * permanent errors (e.g. disk returns the wrong data due to bit flip or
69 * firmware bug). In case (a), this setting does not matter because the
70 * pool will be suspended and the sync thread will not be able to make
71 * forward progress regardless. In case (b), because the error is
72 * permanent, the best we can do is leak the minimum amount of space,
73 * which is what setting this flag will do. Therefore, it is reasonable
74 * for this flag to normally be set, but we chose the more conservative
75 * approach of not setting it, so that there is no possibility of
76 * leaking space in the "partial temporary" failure case.
77 */
78 int zfs_free_leak_on_eio = B_FALSE;
79
80
81 void
82 zfs_panic_recover(const char *fmt, ...)
83 {
84 va_list adx;
85
86 va_start(adx, fmt);
87 vcmn_err(zfs_recover ? CE_WARN : CE_PANIC, fmt, adx);
88 va_end(adx);
89 }
90
91 /*
92 * Debug logging is enabled by default for production kernel builds.
93 * The overhead for this is negligible and the logs can be valuable when
94 * debugging. For non-production user space builds all debugging except
95 * logging is enabled since performance is no longer a concern.
96 */
97 void
98 zfs_dbgmsg_init(void)
99 {
100 #if !defined(_KERNEL) || !defined(__linux__)
101 list_create(&zfs_dbgmsgs, sizeof (zfs_dbgmsg_t),
102 offsetof(zfs_dbgmsg_t, zdm_node));
103 mutex_init(&zfs_dbgmsgs_lock, NULL, MUTEX_DEFAULT, NULL);
104 #endif
105
106 if (zfs_flags == 0) {
107 #if defined(_KERNEL)
108 zfs_flags = ZFS_DEBUG_DPRINTF;
109 spl_debug_set_mask(spl_debug_get_mask() | SD_DPRINTF);
110 spl_debug_set_subsys(spl_debug_get_subsys() | SS_USER1);
111 #else
112 zfs_flags = ~ZFS_DEBUG_DPRINTF;
113 #endif /* _KERNEL */
114 }
115 }
116
117 void
118 zfs_dbgmsg_fini(void)
119 {
120 #if !defined(_KERNEL) || !defined(__linux__)
121 zfs_dbgmsg_t *zdm;
122
123 while ((zdm = list_remove_head(&zfs_dbgmsgs)) != NULL) {
124 int size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg);
125 kmem_free(zdm, size);
126 zfs_dbgmsg_size -= size;
127 }
128 mutex_destroy(&zfs_dbgmsgs_lock);
129 ASSERT0(zfs_dbgmsg_size);
130 #endif
131 }
132
133 #if !defined(_KERNEL) || !defined(__linux__)
134 /*
135 * Print these messages by running:
136 * echo ::zfs_dbgmsg | mdb -k
137 *
138 * Monitor these messages by running:
139 * dtrace -q -n 'zfs-dbgmsg{printf("%s\n", stringof(arg0))}'
140 */
141 void
142 zfs_dbgmsg(const char *fmt, ...)
143 {
144 int size;
145 va_list adx;
146 zfs_dbgmsg_t *zdm;
147
148 va_start(adx, fmt);
149 size = vsnprintf(NULL, 0, fmt, adx);
150 va_end(adx);
151
152 /*
153 * There is one byte of string in sizeof (zfs_dbgmsg_t), used
154 * for the terminating null.
155 */
156 zdm = kmem_alloc(sizeof (zfs_dbgmsg_t) + size, KM_SLEEP);
157 zdm->zdm_timestamp = gethrestime_sec();
158
159 va_start(adx, fmt);
160 (void) vsnprintf(zdm->zdm_msg, size + 1, fmt, adx);
161 va_end(adx);
162
163 DTRACE_PROBE1(zfs__dbgmsg, char *, zdm->zdm_msg);
164
165 mutex_enter(&zfs_dbgmsgs_lock);
166 list_insert_tail(&zfs_dbgmsgs, zdm);
167 zfs_dbgmsg_size += sizeof (zfs_dbgmsg_t) + size;
168 while (zfs_dbgmsg_size > zfs_dbgmsg_maxsize) {
169 zdm = list_remove_head(&zfs_dbgmsgs);
170 size = sizeof (zfs_dbgmsg_t) + strlen(zdm->zdm_msg);
171 kmem_free(zdm, size);
172 zfs_dbgmsg_size -= size;
173 }
174 mutex_exit(&zfs_dbgmsgs_lock);
175 }
176
177 void
178 zfs_dbgmsg_print(const char *tag)
179 {
180 zfs_dbgmsg_t *zdm;
181
182 (void) printf("ZFS_DBGMSG(%s):\n", tag);
183 mutex_enter(&zfs_dbgmsgs_lock);
184 for (zdm = list_head(&zfs_dbgmsgs); zdm;
185 zdm = list_next(&zfs_dbgmsgs, zdm))
186 (void) printf("%s\n", zdm->zdm_msg);
187 mutex_exit(&zfs_dbgmsgs_lock);
188 }
189 #endif
190
191 #if defined(_KERNEL)
192 module_param(zfs_flags, int, 0644);
193 MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
194
195 module_param(zfs_recover, int, 0644);
196 MODULE_PARM_DESC(zfs_recover, "Set to attempt to recover from fatal errors");
197
198 module_param(zfs_free_leak_on_eio, int, 0644);
199 MODULE_PARM_DESC(zfs_free_leak_on_eio,
200 "Set to ignore IO errors during free and permanently leak the space");
201 #endif /* _KERNEL */