]> git.proxmox.com Git - mirror_zfs.git/blame - cmd/zed/agents/zfs_agents.c
zed: mark disks as REMOVED when they are removed
[mirror_zfs.git] / cmd / zed / agents / zfs_agents.c
CommitLineData
976246fa
DB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6 * You can obtain a copy of the license from the top-level file
7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8 * You may not use this file except in compliance with the license.
9 *
10 * CDDL HEADER END
11 */
12
13/*
14 * Copyright (c) 2016, Intel Corporation.
d48091de 15 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
32366649 16 * Copyright (c) 2021 Hewlett Packard Enterprise Development LP
976246fa
DB
17 */
18
19#include <libnvpair.h>
20#include <libzfs.h>
21#include <stddef.h>
22#include <stdlib.h>
23#include <string.h>
24#include <sys/list.h>
25#include <sys/time.h>
26#include <sys/sysevent/eventdefs.h>
27#include <sys/sysevent/dev.h>
28#include <sys/fm/protocol.h>
29#include <sys/fm/fs/zfs.h>
30#include <pthread.h>
31#include <unistd.h>
32
33#include "zfs_agents.h"
34#include "fmd_api.h"
35#include "../zed_log.h"
36
37/*
38 * agent dispatch code
39 */
40
41static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER;
42static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER;
43static list_t agent_events; /* list of pending events */
44static int agent_exiting;
45
46typedef struct agent_event {
47 char ae_class[64];
48 char ae_subclass[32];
49 nvlist_t *ae_nvl;
50 list_node_t ae_node;
51} agent_event_t;
52
53pthread_t g_agents_tid;
54
55libzfs_handle_t *g_zfs_hdl;
56
57/* guid search data */
d48091de 58typedef enum device_type {
59 DEVICE_TYPE_L2ARC, /* l2arc device */
60 DEVICE_TYPE_SPARE, /* spare device */
61 DEVICE_TYPE_PRIMARY /* any primary pool storage device */
62} device_type_t;
63
976246fa
DB
64typedef struct guid_search {
65 uint64_t gs_pool_guid;
66 uint64_t gs_vdev_guid;
67 char *gs_devid;
d48091de 68 device_type_t gs_vdev_type;
69 uint64_t gs_vdev_expandtime; /* vdev expansion time */
976246fa
DB
70} guid_search_t;
71
d48091de 72/*
73 * Walks the vdev tree recursively looking for a matching devid.
74 * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
75 */
76static boolean_t
976246fa
DB
77zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
78{
79 guid_search_t *gsp = arg;
80 char *path = NULL;
81 uint_t c, children;
82 nvlist_t **child;
55c12724 83 uint64_t vdev_guid;
976246fa
DB
84
85 /*
86 * First iterate over any children.
87 */
88 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
89 &child, &children) == 0) {
d48091de 90 for (c = 0; c < children; c++) {
91 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
92 gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
93 return (B_TRUE);
94 }
95 }
976246fa
DB
96 }
97 /*
d48091de 98 * Iterate over any spares and cache devices
976246fa 99 */
d48091de 100 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
101 &child, &children) == 0) {
102 for (c = 0; c < children; c++) {
103 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
55c12724 104 gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
d48091de 105 return (B_TRUE);
106 }
107 }
108 }
109 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
110 &child, &children) == 0) {
111 for (c = 0; c < children; c++) {
112 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
55c12724 113 gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
d48091de 114 return (B_TRUE);
115 }
116 }
117 }
118 /*
119 * On a devid match, grab the vdev guid and expansion time, if any.
120 */
4b5c9d9f
MA
121 if (gsp->gs_devid != NULL &&
122 (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
976246fa
DB
123 (strcmp(gsp->gs_devid, path) == 0)) {
124 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
125 &gsp->gs_vdev_guid);
d48091de 126 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
127 &gsp->gs_vdev_expandtime);
128 return (B_TRUE);
976246fa 129 }
55c12724
AH
130 /*
131 * Otherwise, on a vdev guid match, grab the devid and expansion
132 * time. The devid might be missing on removal since its not part
133 * of blkid cache and L2ARC VDEV does not contain pool guid in its
134 * blkid, so this is a special case for L2ARC VDEV.
135 */
136 else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL &&
137 nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 &&
138 gsp->gs_vdev_guid == vdev_guid) {
139 (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID,
140 &gsp->gs_devid);
141 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
142 &gsp->gs_vdev_expandtime);
143 return (B_TRUE);
144 }
d48091de 145
146 return (B_FALSE);
976246fa
DB
147}
148
149static int
150zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
151{
152 guid_search_t *gsp = arg;
153 nvlist_t *config, *nvl;
154
155 /*
156 * For each vdev in this pool, look for a match by devid
157 */
158 if ((config = zpool_get_config(zhp, NULL)) != NULL) {
159 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
160 &nvl) == 0) {
d48091de 161 (void) zfs_agent_iter_vdev(zhp, nvl, gsp);
976246fa
DB
162 }
163 }
164 /*
165 * if a match was found then grab the pool guid
166 */
55c12724 167 if (gsp->gs_vdev_guid && gsp->gs_devid) {
976246fa
DB
168 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
169 &gsp->gs_pool_guid);
170 }
171
172 zpool_close(zhp);
173 return (gsp->gs_vdev_guid != 0);
174}
175
176void
177zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
178{
179 agent_event_t *event;
180
181 if (subclass == NULL)
182 subclass = "";
183
184 event = malloc(sizeof (agent_event_t));
185 if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
186 if (event)
187 free(event);
188 return;
189 }
190
191 if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
192 class = EC_ZFS;
193 subclass = ESC_ZFS_VDEV_CHECK;
194 }
195
196 /*
d0249a4b
BB
197 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
198 * from the vdev_disk layer after a hot unplug. Fortunately we do
199 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
976246fa 200 * proxy so we remap it here for the benefit of the diagnosis engine.
0aacde2e
RM
201 * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa
202 * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful.
976246fa
DB
203 */
204 if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
205 (strcmp(subclass, ESC_DISK) == 0) &&
206 (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
207 nvlist_exists(nvl, DEV_IDENTIFIER))) {
208 nvlist_t *payload = event->ae_nvl;
209 struct timeval tv;
210 int64_t tod[2];
211 uint64_t pool_guid = 0, vdev_guid = 0;
d48091de 212 guid_search_t search = { 0 };
213 device_type_t devtype = DEVICE_TYPE_PRIMARY;
55c12724 214 char *devid = NULL;
976246fa
DB
215
216 class = "resource.fs.zfs.removed";
217 subclass = "";
218
219 (void) nvlist_add_string(payload, FM_CLASS, class);
55c12724 220 (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid);
976246fa
DB
221 (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
222 (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
223
d48091de 224 (void) gettimeofday(&tv, NULL);
225 tod[0] = tv.tv_sec;
226 tod[1] = tv.tv_usec;
227 (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
228
976246fa 229 /*
55c12724
AH
230 * If devid is missing but vdev_guid is available, find devid
231 * and pool_guid from vdev_guid.
d48091de 232 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
233 * ZFS_EV_POOL_GUID may be missing so find them.
976246fa 234 */
55c12724
AH
235 if (devid == NULL || pool_guid == 0 || vdev_guid == 0) {
236 if (devid == NULL)
237 search.gs_vdev_guid = vdev_guid;
238 else
239 search.gs_devid = devid;
240 zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
241 if (devid == NULL)
242 devid = search.gs_devid;
243 if (pool_guid == 0)
244 pool_guid = search.gs_pool_guid;
245 if (vdev_guid == 0)
246 vdev_guid = search.gs_vdev_guid;
247 devtype = search.gs_vdev_type;
32366649 248 }
976246fa 249
d48091de 250 /*
251 * We want to avoid reporting "remove" events coming from
252 * libudev for VDEVs which were expanded recently (10s) and
253 * avoid activating spares in response to partitions being
254 * deleted and created in rapid succession.
255 */
256 if (search.gs_vdev_expandtime != 0 &&
257 search.gs_vdev_expandtime + 10 > tv.tv_sec) {
258 zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
259 "for recently expanded device '%s'", EC_DEV_REMOVE,
55c12724
AH
260 devid);
261 fnvlist_free(payload);
262 free(event);
d48091de 263 goto out;
976246fa
DB
264 }
265
266 (void) nvlist_add_uint64(payload,
267 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
268 (void) nvlist_add_uint64(payload,
269 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
d48091de 270 switch (devtype) {
271 case DEVICE_TYPE_L2ARC:
272 (void) nvlist_add_string(payload,
273 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
274 VDEV_TYPE_L2CACHE);
275 break;
276 case DEVICE_TYPE_SPARE:
277 (void) nvlist_add_string(payload,
278 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
279 break;
280 case DEVICE_TYPE_PRIMARY:
281 (void) nvlist_add_string(payload,
282 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
283 break;
284 }
976246fa
DB
285
286 zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
287 EC_DEV_REMOVE, class);
288 }
289
290 (void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
291 (void) strlcpy(event->ae_subclass, subclass,
292 sizeof (event->ae_subclass));
293
294 (void) pthread_mutex_lock(&agent_lock);
295 list_insert_tail(&agent_events, event);
296 (void) pthread_mutex_unlock(&agent_lock);
297
d48091de 298out:
976246fa
DB
299 (void) pthread_cond_signal(&agent_cond);
300}
301
302static void
303zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
304{
305 /*
306 * The diagnosis engine subscribes to the following events.
307 * On illumos these subscriptions reside in:
308 * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
309 */
310 if (strstr(class, "ereport.fs.zfs.") != NULL ||
311 strstr(class, "resource.fs.zfs.") != NULL ||
312 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
313 strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
314 strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
315 fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
316 }
317
318 /*
319 * The retire agent subscribes to the following events.
320 * On illumos these subscriptions reside in:
321 * /usr/lib/fm/fmd/plugins/zfs-retire.conf
322 *
4e33ba4c 323 * NOTE: faults events come directly from our diagnosis engine
976246fa
DB
324 * and will not pass through the zfs kernel module.
325 */
326 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
327 strcmp(class, "resource.fs.zfs.removed") == 0 ||
328 strcmp(class, "resource.fs.zfs.statechange") == 0 ||
329 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {
330 fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
331 }
332
333 /*
334 * The SLM module only consumes disk events and vdev check events
335 *
336 * NOTE: disk events come directly from disk monitor and will
337 * not pass through the zfs kernel module.
338 */
339 if (strstr(class, "EC_dev_") != NULL ||
340 strcmp(class, EC_ZFS) == 0) {
341 (void) zfs_slm_event(class, subclass, nvl);
342 }
343}
344
345/*
346 * Events are consumed and dispatched from this thread
347 * An agent can also post an event so event list lock
348 * is not held when calling an agent.
349 * One event is consumed at a time.
350 */
351static void *
352zfs_agent_consumer_thread(void *arg)
353{
16529f30
AZ
354 (void) arg;
355
976246fa
DB
356 for (;;) {
357 agent_event_t *event;
358
359 (void) pthread_mutex_lock(&agent_lock);
360
361 /* wait for an event to show up */
362 while (!agent_exiting && list_is_empty(&agent_events))
363 (void) pthread_cond_wait(&agent_cond, &agent_lock);
364
365 if (agent_exiting) {
366 (void) pthread_mutex_unlock(&agent_lock);
367 zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
368 "exiting");
369 return (NULL);
370 }
371
372 if ((event = (list_head(&agent_events))) != NULL) {
373 list_remove(&agent_events, event);
374
375 (void) pthread_mutex_unlock(&agent_lock);
376
377 /* dispatch to all event subscribers */
378 zfs_agent_dispatch(event->ae_class, event->ae_subclass,
379 event->ae_nvl);
380
381 nvlist_free(event->ae_nvl);
382 free(event);
383 continue;
384 }
385
386 (void) pthread_mutex_unlock(&agent_lock);
387 }
388
389 return (NULL);
390}
391
392void
393zfs_agent_init(libzfs_handle_t *zfs_hdl)
394{
395 fmd_hdl_t *hdl;
396
397 g_zfs_hdl = zfs_hdl;
398
399 if (zfs_slm_init() != 0)
400 zed_log_die("Failed to initialize zfs slm");
401 zed_log_msg(LOG_INFO, "Add Agent: init");
402
403 hdl = fmd_module_hdl("zfs-diagnosis");
404 _zfs_diagnosis_init(hdl);
405 if (!fmd_module_initialized(hdl))
406 zed_log_die("Failed to initialize zfs diagnosis");
407
408 hdl = fmd_module_hdl("zfs-retire");
409 _zfs_retire_init(hdl);
410 if (!fmd_module_initialized(hdl))
411 zed_log_die("Failed to initialize zfs retire");
412
413 list_create(&agent_events, sizeof (agent_event_t),
414 offsetof(struct agent_event, ae_node));
415
416 if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
417 NULL) != 0) {
418 list_destroy(&agent_events);
419 zed_log_die("Failed to initialize agents");
420 }
3ef80eef 421 pthread_setname_np(g_agents_tid, "agents");
976246fa
DB
422}
423
424void
425zfs_agent_fini(void)
426{
427 fmd_hdl_t *hdl;
428 agent_event_t *event;
429
430 agent_exiting = 1;
431 (void) pthread_cond_signal(&agent_cond);
432
433 /* wait for zfs_enum_pools thread to complete */
434 (void) pthread_join(g_agents_tid, NULL);
435
436 /* drain any pending events */
437 while ((event = (list_head(&agent_events))) != NULL) {
438 list_remove(&agent_events, event);
439 nvlist_free(event->ae_nvl);
440 free(event);
441 }
442
443 list_destroy(&agent_events);
444
445 if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
446 _zfs_retire_fini(hdl);
447 fmd_hdl_unregister(hdl);
448 }
449 if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
450 _zfs_diagnosis_fini(hdl);
451 fmd_hdl_unregister(hdl);
452 }
453
454 zed_log_msg(LOG_INFO, "Add Agent: fini");
455 zfs_slm_fini();
456
457 g_zfs_hdl = NULL;
458}