]> git.proxmox.com Git - mirror_zfs.git/blame - cmd/zed/agents/zfs_agents.c
Various ZED fixes
[mirror_zfs.git] / cmd / zed / agents / zfs_agents.c
CommitLineData
976246fa
DB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6 * You can obtain a copy of the license from the top-level file
7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8 * You may not use this file except in compliance with the license.
9 *
10 * CDDL HEADER END
11 */
12
13/*
14 * Copyright (c) 2016, Intel Corporation.
15 */
16
17#include <libnvpair.h>
18#include <libzfs.h>
19#include <stddef.h>
20#include <stdlib.h>
21#include <string.h>
22#include <sys/list.h>
23#include <sys/time.h>
24#include <sys/sysevent/eventdefs.h>
25#include <sys/sysevent/dev.h>
26#include <sys/fm/protocol.h>
27#include <sys/fm/fs/zfs.h>
28#include <pthread.h>
29#include <unistd.h>
30
31#include "zfs_agents.h"
32#include "fmd_api.h"
33#include "../zed_log.h"
34
35/*
36 * agent dispatch code
37 */
38
39static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER;
40static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER;
41static list_t agent_events; /* list of pending events */
42static int agent_exiting;
43
44typedef struct agent_event {
45 char ae_class[64];
46 char ae_subclass[32];
47 nvlist_t *ae_nvl;
48 list_node_t ae_node;
49} agent_event_t;
50
51pthread_t g_agents_tid;
52
53libzfs_handle_t *g_zfs_hdl;
54
55/* guid search data */
56typedef struct guid_search {
57 uint64_t gs_pool_guid;
58 uint64_t gs_vdev_guid;
59 char *gs_devid;
60} guid_search_t;
61
62static void
63zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
64{
65 guid_search_t *gsp = arg;
66 char *path = NULL;
67 uint_t c, children;
68 nvlist_t **child;
69
70 /*
71 * First iterate over any children.
72 */
73 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
74 &child, &children) == 0) {
75 for (c = 0; c < children; c++)
76 zfs_agent_iter_vdev(zhp, child[c], gsp);
77 return;
78 }
79 /*
80 * On a devid match, grab the vdev guid
81 */
82 if ((gsp->gs_vdev_guid == 0) &&
83 (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
84 (strcmp(gsp->gs_devid, path) == 0)) {
85 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
86 &gsp->gs_vdev_guid);
87 }
88}
89
90static int
91zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
92{
93 guid_search_t *gsp = arg;
94 nvlist_t *config, *nvl;
95
96 /*
97 * For each vdev in this pool, look for a match by devid
98 */
99 if ((config = zpool_get_config(zhp, NULL)) != NULL) {
100 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
101 &nvl) == 0) {
102 zfs_agent_iter_vdev(zhp, nvl, gsp);
103 }
104 }
105 /*
106 * if a match was found then grab the pool guid
107 */
108 if (gsp->gs_vdev_guid) {
109 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
110 &gsp->gs_pool_guid);
111 }
112
113 zpool_close(zhp);
114 return (gsp->gs_vdev_guid != 0);
115}
116
117void
118zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
119{
120 agent_event_t *event;
121
122 if (subclass == NULL)
123 subclass = "";
124
125 event = malloc(sizeof (agent_event_t));
126 if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
127 if (event)
128 free(event);
129 return;
130 }
131
132 if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
133 class = EC_ZFS;
134 subclass = ESC_ZFS_VDEV_CHECK;
135 }
136
137 /*
138 * On ZFS on Linux, we don't get the expected FM_RESOURCE_REMOVED
139 * ereport from vdev_disk layer after a hot unplug. Fortunately we
140 * get a EC_DEV_REMOVE from our disk monitor and it is a suitable
141 * proxy so we remap it here for the benefit of the diagnosis engine.
142 */
143 if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
144 (strcmp(subclass, ESC_DISK) == 0) &&
145 (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
146 nvlist_exists(nvl, DEV_IDENTIFIER))) {
147 nvlist_t *payload = event->ae_nvl;
148 struct timeval tv;
149 int64_t tod[2];
150 uint64_t pool_guid = 0, vdev_guid = 0;
151
152 class = "resource.fs.zfs.removed";
153 subclass = "";
154
155 (void) nvlist_add_string(payload, FM_CLASS, class);
156 (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
157 (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
158
159 /*
160 * For multipath, ZFS_EV_VDEV_GUID is missing so find it.
161 */
162 if (vdev_guid == 0) {
163 guid_search_t search = { 0 };
164
165 (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
166 &search.gs_devid);
167
168 (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool,
169 &search);
170 pool_guid = search.gs_pool_guid;
171 vdev_guid = search.gs_vdev_guid;
172 }
173
174 (void) nvlist_add_uint64(payload,
175 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
176 (void) nvlist_add_uint64(payload,
177 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
178
179 (void) gettimeofday(&tv, NULL);
180 tod[0] = tv.tv_sec;
181 tod[1] = tv.tv_usec;
182 (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
183
184 zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
185 EC_DEV_REMOVE, class);
186 }
187
188 (void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
189 (void) strlcpy(event->ae_subclass, subclass,
190 sizeof (event->ae_subclass));
191
192 (void) pthread_mutex_lock(&agent_lock);
193 list_insert_tail(&agent_events, event);
194 (void) pthread_mutex_unlock(&agent_lock);
195
196 (void) pthread_cond_signal(&agent_cond);
197}
198
199static void
200zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
201{
202 /*
203 * The diagnosis engine subscribes to the following events.
204 * On illumos these subscriptions reside in:
205 * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
206 */
207 if (strstr(class, "ereport.fs.zfs.") != NULL ||
208 strstr(class, "resource.fs.zfs.") != NULL ||
209 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
210 strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
211 strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
212 fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
213 }
214
215 /*
216 * The retire agent subscribes to the following events.
217 * On illumos these subscriptions reside in:
218 * /usr/lib/fm/fmd/plugins/zfs-retire.conf
219 *
4e33ba4c 220 * NOTE: faults events come directly from our diagnosis engine
976246fa
DB
221 * and will not pass through the zfs kernel module.
222 */
223 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
224 strcmp(class, "resource.fs.zfs.removed") == 0 ||
225 strcmp(class, "resource.fs.zfs.statechange") == 0 ||
226 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {
227 fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
228 }
229
230 /*
231 * The SLM module only consumes disk events and vdev check events
232 *
233 * NOTE: disk events come directly from disk monitor and will
234 * not pass through the zfs kernel module.
235 */
236 if (strstr(class, "EC_dev_") != NULL ||
237 strcmp(class, EC_ZFS) == 0) {
238 (void) zfs_slm_event(class, subclass, nvl);
239 }
240}
241
242/*
243 * Events are consumed and dispatched from this thread
244 * An agent can also post an event so event list lock
245 * is not held when calling an agent.
246 * One event is consumed at a time.
247 */
248static void *
249zfs_agent_consumer_thread(void *arg)
250{
251 for (;;) {
252 agent_event_t *event;
253
254 (void) pthread_mutex_lock(&agent_lock);
255
256 /* wait for an event to show up */
257 while (!agent_exiting && list_is_empty(&agent_events))
258 (void) pthread_cond_wait(&agent_cond, &agent_lock);
259
260 if (agent_exiting) {
261 (void) pthread_mutex_unlock(&agent_lock);
262 zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
263 "exiting");
264 return (NULL);
265 }
266
267 if ((event = (list_head(&agent_events))) != NULL) {
268 list_remove(&agent_events, event);
269
270 (void) pthread_mutex_unlock(&agent_lock);
271
272 /* dispatch to all event subscribers */
273 zfs_agent_dispatch(event->ae_class, event->ae_subclass,
274 event->ae_nvl);
275
276 nvlist_free(event->ae_nvl);
277 free(event);
278 continue;
279 }
280
281 (void) pthread_mutex_unlock(&agent_lock);
282 }
283
284 return (NULL);
285}
286
287void
288zfs_agent_init(libzfs_handle_t *zfs_hdl)
289{
290 fmd_hdl_t *hdl;
291
292 g_zfs_hdl = zfs_hdl;
293
294 if (zfs_slm_init() != 0)
295 zed_log_die("Failed to initialize zfs slm");
296 zed_log_msg(LOG_INFO, "Add Agent: init");
297
298 hdl = fmd_module_hdl("zfs-diagnosis");
299 _zfs_diagnosis_init(hdl);
300 if (!fmd_module_initialized(hdl))
301 zed_log_die("Failed to initialize zfs diagnosis");
302
303 hdl = fmd_module_hdl("zfs-retire");
304 _zfs_retire_init(hdl);
305 if (!fmd_module_initialized(hdl))
306 zed_log_die("Failed to initialize zfs retire");
307
308 list_create(&agent_events, sizeof (agent_event_t),
309 offsetof(struct agent_event, ae_node));
310
311 if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
312 NULL) != 0) {
313 list_destroy(&agent_events);
314 zed_log_die("Failed to initialize agents");
315 }
316}
317
318void
319zfs_agent_fini(void)
320{
321 fmd_hdl_t *hdl;
322 agent_event_t *event;
323
324 agent_exiting = 1;
325 (void) pthread_cond_signal(&agent_cond);
326
327 /* wait for zfs_enum_pools thread to complete */
328 (void) pthread_join(g_agents_tid, NULL);
329
330 /* drain any pending events */
331 while ((event = (list_head(&agent_events))) != NULL) {
332 list_remove(&agent_events, event);
333 nvlist_free(event->ae_nvl);
334 free(event);
335 }
336
337 list_destroy(&agent_events);
338
339 if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
340 _zfs_retire_fini(hdl);
341 fmd_hdl_unregister(hdl);
342 }
343 if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
344 _zfs_diagnosis_fini(hdl);
345 fmd_hdl_unregister(hdl);
346 }
347
348 zed_log_msg(LOG_INFO, "Add Agent: fini");
349 zfs_slm_fini();
350
351 g_zfs_hdl = NULL;
352}