]> git.proxmox.com Git - mirror_zfs.git/blame - cmd/zed/agents/zfs_agents.c
zed: detect and offline physically removed devices
[mirror_zfs.git] / cmd / zed / agents / zfs_agents.c
CommitLineData
976246fa
DB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6 * You can obtain a copy of the license from the top-level file
7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8 * You may not use this file except in compliance with the license.
9 *
10 * CDDL HEADER END
11 */
12
13/*
14 * Copyright (c) 2016, Intel Corporation.
d48091de 15 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
976246fa
DB
16 */
17
18#include <libnvpair.h>
19#include <libzfs.h>
20#include <stddef.h>
21#include <stdlib.h>
22#include <string.h>
23#include <sys/list.h>
24#include <sys/time.h>
25#include <sys/sysevent/eventdefs.h>
26#include <sys/sysevent/dev.h>
27#include <sys/fm/protocol.h>
28#include <sys/fm/fs/zfs.h>
29#include <pthread.h>
30#include <unistd.h>
31
32#include "zfs_agents.h"
33#include "fmd_api.h"
34#include "../zed_log.h"
35
36/*
37 * agent dispatch code
38 */
39
40static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER;
41static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER;
42static list_t agent_events; /* list of pending events */
43static int agent_exiting;
44
45typedef struct agent_event {
46 char ae_class[64];
47 char ae_subclass[32];
48 nvlist_t *ae_nvl;
49 list_node_t ae_node;
50} agent_event_t;
51
52pthread_t g_agents_tid;
53
54libzfs_handle_t *g_zfs_hdl;
55
56/* guid search data */
d48091de 57typedef enum device_type {
58 DEVICE_TYPE_L2ARC, /* l2arc device */
59 DEVICE_TYPE_SPARE, /* spare device */
60 DEVICE_TYPE_PRIMARY /* any primary pool storage device */
61} device_type_t;
62
976246fa
DB
63typedef struct guid_search {
64 uint64_t gs_pool_guid;
65 uint64_t gs_vdev_guid;
66 char *gs_devid;
d48091de 67 device_type_t gs_vdev_type;
68 uint64_t gs_vdev_expandtime; /* vdev expansion time */
976246fa
DB
69} guid_search_t;
70
d48091de 71/*
72 * Walks the vdev tree recursively looking for a matching devid.
73 * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
74 */
75static boolean_t
976246fa
DB
76zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
77{
78 guid_search_t *gsp = arg;
79 char *path = NULL;
80 uint_t c, children;
81 nvlist_t **child;
82
83 /*
84 * First iterate over any children.
85 */
86 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
87 &child, &children) == 0) {
d48091de 88 for (c = 0; c < children; c++) {
89 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
90 gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
91 return (B_TRUE);
92 }
93 }
976246fa
DB
94 }
95 /*
d48091de 96 * Iterate over any spares and cache devices
976246fa 97 */
d48091de 98 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
99 &child, &children) == 0) {
100 for (c = 0; c < children; c++) {
101 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
102 gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
103 return (B_TRUE);
104 }
105 }
106 }
107 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
108 &child, &children) == 0) {
109 for (c = 0; c < children; c++) {
110 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
111 gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
112 return (B_TRUE);
113 }
114 }
115 }
116 /*
117 * On a devid match, grab the vdev guid and expansion time, if any.
118 */
119 if ((nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
976246fa
DB
120 (strcmp(gsp->gs_devid, path) == 0)) {
121 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
122 &gsp->gs_vdev_guid);
d48091de 123 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
124 &gsp->gs_vdev_expandtime);
125 return (B_TRUE);
976246fa 126 }
d48091de 127
128 return (B_FALSE);
976246fa
DB
129}
130
131static int
132zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
133{
134 guid_search_t *gsp = arg;
135 nvlist_t *config, *nvl;
136
137 /*
138 * For each vdev in this pool, look for a match by devid
139 */
140 if ((config = zpool_get_config(zhp, NULL)) != NULL) {
141 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
142 &nvl) == 0) {
d48091de 143 (void) zfs_agent_iter_vdev(zhp, nvl, gsp);
976246fa
DB
144 }
145 }
146 /*
147 * if a match was found then grab the pool guid
148 */
149 if (gsp->gs_vdev_guid) {
150 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
151 &gsp->gs_pool_guid);
152 }
153
154 zpool_close(zhp);
155 return (gsp->gs_vdev_guid != 0);
156}
157
158void
159zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
160{
161 agent_event_t *event;
162
163 if (subclass == NULL)
164 subclass = "";
165
166 event = malloc(sizeof (agent_event_t));
167 if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
168 if (event)
169 free(event);
170 return;
171 }
172
173 if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
174 class = EC_ZFS;
175 subclass = ESC_ZFS_VDEV_CHECK;
176 }
177
178 /*
179 * On ZFS on Linux, we don't get the expected FM_RESOURCE_REMOVED
180 * ereport from vdev_disk layer after a hot unplug. Fortunately we
181 * get a EC_DEV_REMOVE from our disk monitor and it is a suitable
182 * proxy so we remap it here for the benefit of the diagnosis engine.
183 */
184 if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
185 (strcmp(subclass, ESC_DISK) == 0) &&
186 (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
187 nvlist_exists(nvl, DEV_IDENTIFIER))) {
188 nvlist_t *payload = event->ae_nvl;
189 struct timeval tv;
190 int64_t tod[2];
191 uint64_t pool_guid = 0, vdev_guid = 0;
d48091de 192 guid_search_t search = { 0 };
193 device_type_t devtype = DEVICE_TYPE_PRIMARY;
976246fa
DB
194
195 class = "resource.fs.zfs.removed";
196 subclass = "";
197
198 (void) nvlist_add_string(payload, FM_CLASS, class);
199 (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
200 (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
201
d48091de 202 (void) gettimeofday(&tv, NULL);
203 tod[0] = tv.tv_sec;
204 tod[1] = tv.tv_usec;
205 (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
206
976246fa 207 /*
d48091de 208 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
209 * ZFS_EV_POOL_GUID may be missing so find them.
976246fa 210 */
d48091de 211 (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
212 &search.gs_devid);
213 (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
214 pool_guid = search.gs_pool_guid;
215 vdev_guid = search.gs_vdev_guid;
216 devtype = search.gs_vdev_type;
976246fa 217
d48091de 218 /*
219 * We want to avoid reporting "remove" events coming from
220 * libudev for VDEVs which were expanded recently (10s) and
221 * avoid activating spares in response to partitions being
222 * deleted and created in rapid succession.
223 */
224 if (search.gs_vdev_expandtime != 0 &&
225 search.gs_vdev_expandtime + 10 > tv.tv_sec) {
226 zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
227 "for recently expanded device '%s'", EC_DEV_REMOVE,
228 search.gs_devid);
229 goto out;
976246fa
DB
230 }
231
232 (void) nvlist_add_uint64(payload,
233 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
234 (void) nvlist_add_uint64(payload,
235 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
d48091de 236 switch (devtype) {
237 case DEVICE_TYPE_L2ARC:
238 (void) nvlist_add_string(payload,
239 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
240 VDEV_TYPE_L2CACHE);
241 break;
242 case DEVICE_TYPE_SPARE:
243 (void) nvlist_add_string(payload,
244 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
245 break;
246 case DEVICE_TYPE_PRIMARY:
247 (void) nvlist_add_string(payload,
248 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
249 break;
250 }
976246fa
DB
251
252 zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
253 EC_DEV_REMOVE, class);
254 }
255
256 (void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
257 (void) strlcpy(event->ae_subclass, subclass,
258 sizeof (event->ae_subclass));
259
260 (void) pthread_mutex_lock(&agent_lock);
261 list_insert_tail(&agent_events, event);
262 (void) pthread_mutex_unlock(&agent_lock);
263
d48091de 264out:
976246fa
DB
265 (void) pthread_cond_signal(&agent_cond);
266}
267
268static void
269zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
270{
271 /*
272 * The diagnosis engine subscribes to the following events.
273 * On illumos these subscriptions reside in:
274 * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
275 */
276 if (strstr(class, "ereport.fs.zfs.") != NULL ||
277 strstr(class, "resource.fs.zfs.") != NULL ||
278 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
279 strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
280 strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
281 fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
282 }
283
284 /*
285 * The retire agent subscribes to the following events.
286 * On illumos these subscriptions reside in:
287 * /usr/lib/fm/fmd/plugins/zfs-retire.conf
288 *
4e33ba4c 289 * NOTE: faults events come directly from our diagnosis engine
976246fa
DB
290 * and will not pass through the zfs kernel module.
291 */
292 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
293 strcmp(class, "resource.fs.zfs.removed") == 0 ||
294 strcmp(class, "resource.fs.zfs.statechange") == 0 ||
295 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {
296 fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
297 }
298
299 /*
300 * The SLM module only consumes disk events and vdev check events
301 *
302 * NOTE: disk events come directly from disk monitor and will
303 * not pass through the zfs kernel module.
304 */
305 if (strstr(class, "EC_dev_") != NULL ||
306 strcmp(class, EC_ZFS) == 0) {
307 (void) zfs_slm_event(class, subclass, nvl);
308 }
309}
310
311/*
312 * Events are consumed and dispatched from this thread
313 * An agent can also post an event so event list lock
314 * is not held when calling an agent.
315 * One event is consumed at a time.
316 */
317static void *
318zfs_agent_consumer_thread(void *arg)
319{
320 for (;;) {
321 agent_event_t *event;
322
323 (void) pthread_mutex_lock(&agent_lock);
324
325 /* wait for an event to show up */
326 while (!agent_exiting && list_is_empty(&agent_events))
327 (void) pthread_cond_wait(&agent_cond, &agent_lock);
328
329 if (agent_exiting) {
330 (void) pthread_mutex_unlock(&agent_lock);
331 zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
332 "exiting");
333 return (NULL);
334 }
335
336 if ((event = (list_head(&agent_events))) != NULL) {
337 list_remove(&agent_events, event);
338
339 (void) pthread_mutex_unlock(&agent_lock);
340
341 /* dispatch to all event subscribers */
342 zfs_agent_dispatch(event->ae_class, event->ae_subclass,
343 event->ae_nvl);
344
345 nvlist_free(event->ae_nvl);
346 free(event);
347 continue;
348 }
349
350 (void) pthread_mutex_unlock(&agent_lock);
351 }
352
353 return (NULL);
354}
355
356void
357zfs_agent_init(libzfs_handle_t *zfs_hdl)
358{
359 fmd_hdl_t *hdl;
360
361 g_zfs_hdl = zfs_hdl;
362
363 if (zfs_slm_init() != 0)
364 zed_log_die("Failed to initialize zfs slm");
365 zed_log_msg(LOG_INFO, "Add Agent: init");
366
367 hdl = fmd_module_hdl("zfs-diagnosis");
368 _zfs_diagnosis_init(hdl);
369 if (!fmd_module_initialized(hdl))
370 zed_log_die("Failed to initialize zfs diagnosis");
371
372 hdl = fmd_module_hdl("zfs-retire");
373 _zfs_retire_init(hdl);
374 if (!fmd_module_initialized(hdl))
375 zed_log_die("Failed to initialize zfs retire");
376
377 list_create(&agent_events, sizeof (agent_event_t),
378 offsetof(struct agent_event, ae_node));
379
380 if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
381 NULL) != 0) {
382 list_destroy(&agent_events);
383 zed_log_die("Failed to initialize agents");
384 }
385}
386
387void
388zfs_agent_fini(void)
389{
390 fmd_hdl_t *hdl;
391 agent_event_t *event;
392
393 agent_exiting = 1;
394 (void) pthread_cond_signal(&agent_cond);
395
396 /* wait for zfs_enum_pools thread to complete */
397 (void) pthread_join(g_agents_tid, NULL);
398
399 /* drain any pending events */
400 while ((event = (list_head(&agent_events))) != NULL) {
401 list_remove(&agent_events, event);
402 nvlist_free(event->ae_nvl);
403 free(event);
404 }
405
406 list_destroy(&agent_events);
407
408 if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
409 _zfs_retire_fini(hdl);
410 fmd_hdl_unregister(hdl);
411 }
412 if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
413 _zfs_diagnosis_fini(hdl);
414 fmd_hdl_unregister(hdl);
415 }
416
417 zed_log_msg(LOG_INFO, "Add Agent: fini");
418 zfs_slm_fini();
419
420 g_zfs_hdl = NULL;
421}