]>
Commit | Line | Data |
---|---|---|
976246fa DB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License Version 1.0 (CDDL-1.0). | |
6 | * You can obtain a copy of the license from the top-level file | |
7 | * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. | |
8 | * You may not use this file except in compliance with the license. | |
9 | * | |
10 | * CDDL HEADER END | |
11 | */ | |
12 | ||
13 | /* | |
14 | * Copyright (c) 2016, Intel Corporation. | |
d48091de | 15 | * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> |
976246fa DB |
16 | */ |
17 | ||
18 | #include <libnvpair.h> | |
19 | #include <libzfs.h> | |
20 | #include <stddef.h> | |
21 | #include <stdlib.h> | |
22 | #include <string.h> | |
23 | #include <sys/list.h> | |
24 | #include <sys/time.h> | |
25 | #include <sys/sysevent/eventdefs.h> | |
26 | #include <sys/sysevent/dev.h> | |
27 | #include <sys/fm/protocol.h> | |
28 | #include <sys/fm/fs/zfs.h> | |
29 | #include <pthread.h> | |
30 | #include <unistd.h> | |
31 | ||
32 | #include "zfs_agents.h" | |
33 | #include "fmd_api.h" | |
34 | #include "../zed_log.h" | |
35 | ||
36 | /* | |
37 | * agent dispatch code | |
38 | */ | |
39 | ||
40 | static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER; | |
41 | static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER; | |
42 | static list_t agent_events; /* list of pending events */ | |
43 | static int agent_exiting; | |
44 | ||
45 | typedef struct agent_event { | |
46 | char ae_class[64]; | |
47 | char ae_subclass[32]; | |
48 | nvlist_t *ae_nvl; | |
49 | list_node_t ae_node; | |
50 | } agent_event_t; | |
51 | ||
52 | pthread_t g_agents_tid; | |
53 | ||
54 | libzfs_handle_t *g_zfs_hdl; | |
55 | ||
56 | /* guid search data */ | |
d48091de | 57 | typedef enum device_type { |
58 | DEVICE_TYPE_L2ARC, /* l2arc device */ | |
59 | DEVICE_TYPE_SPARE, /* spare device */ | |
60 | DEVICE_TYPE_PRIMARY /* any primary pool storage device */ | |
61 | } device_type_t; | |
62 | ||
976246fa DB |
63 | typedef struct guid_search { |
64 | uint64_t gs_pool_guid; | |
65 | uint64_t gs_vdev_guid; | |
66 | char *gs_devid; | |
d48091de | 67 | device_type_t gs_vdev_type; |
68 | uint64_t gs_vdev_expandtime; /* vdev expansion time */ | |
976246fa DB |
69 | } guid_search_t; |
70 | ||
d48091de | 71 | /* |
72 | * Walks the vdev tree recursively looking for a matching devid. | |
73 | * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise. | |
74 | */ | |
75 | static boolean_t | |
976246fa DB |
76 | zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) |
77 | { | |
78 | guid_search_t *gsp = arg; | |
79 | char *path = NULL; | |
80 | uint_t c, children; | |
81 | nvlist_t **child; | |
82 | ||
83 | /* | |
84 | * First iterate over any children. | |
85 | */ | |
86 | if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, | |
87 | &child, &children) == 0) { | |
d48091de | 88 | for (c = 0; c < children; c++) { |
89 | if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { | |
90 | gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY; | |
91 | return (B_TRUE); | |
92 | } | |
93 | } | |
976246fa DB |
94 | } |
95 | /* | |
d48091de | 96 | * Iterate over any spares and cache devices |
976246fa | 97 | */ |
d48091de | 98 | if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, |
99 | &child, &children) == 0) { | |
100 | for (c = 0; c < children; c++) { | |
101 | if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { | |
102 | gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; | |
103 | return (B_TRUE); | |
104 | } | |
105 | } | |
106 | } | |
107 | if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, | |
108 | &child, &children) == 0) { | |
109 | for (c = 0; c < children; c++) { | |
110 | if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { | |
111 | gsp->gs_vdev_type = DEVICE_TYPE_SPARE; | |
112 | return (B_TRUE); | |
113 | } | |
114 | } | |
115 | } | |
116 | /* | |
117 | * On a devid match, grab the vdev guid and expansion time, if any. | |
118 | */ | |
119 | if ((nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && | |
976246fa DB |
120 | (strcmp(gsp->gs_devid, path) == 0)) { |
121 | (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, | |
122 | &gsp->gs_vdev_guid); | |
d48091de | 123 | (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, |
124 | &gsp->gs_vdev_expandtime); | |
125 | return (B_TRUE); | |
976246fa | 126 | } |
d48091de | 127 | |
128 | return (B_FALSE); | |
976246fa DB |
129 | } |
130 | ||
131 | static int | |
132 | zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) | |
133 | { | |
134 | guid_search_t *gsp = arg; | |
135 | nvlist_t *config, *nvl; | |
136 | ||
137 | /* | |
138 | * For each vdev in this pool, look for a match by devid | |
139 | */ | |
140 | if ((config = zpool_get_config(zhp, NULL)) != NULL) { | |
141 | if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, | |
142 | &nvl) == 0) { | |
d48091de | 143 | (void) zfs_agent_iter_vdev(zhp, nvl, gsp); |
976246fa DB |
144 | } |
145 | } | |
146 | /* | |
147 | * if a match was found then grab the pool guid | |
148 | */ | |
149 | if (gsp->gs_vdev_guid) { | |
150 | (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, | |
151 | &gsp->gs_pool_guid); | |
152 | } | |
153 | ||
154 | zpool_close(zhp); | |
155 | return (gsp->gs_vdev_guid != 0); | |
156 | } | |
157 | ||
158 | void | |
159 | zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) | |
160 | { | |
161 | agent_event_t *event; | |
162 | ||
163 | if (subclass == NULL) | |
164 | subclass = ""; | |
165 | ||
166 | event = malloc(sizeof (agent_event_t)); | |
167 | if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) { | |
168 | if (event) | |
169 | free(event); | |
170 | return; | |
171 | } | |
172 | ||
173 | if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) { | |
174 | class = EC_ZFS; | |
175 | subclass = ESC_ZFS_VDEV_CHECK; | |
176 | } | |
177 | ||
178 | /* | |
179 | * On ZFS on Linux, we don't get the expected FM_RESOURCE_REMOVED | |
180 | * ereport from vdev_disk layer after a hot unplug. Fortunately we | |
181 | * get a EC_DEV_REMOVE from our disk monitor and it is a suitable | |
182 | * proxy so we remap it here for the benefit of the diagnosis engine. | |
183 | */ | |
184 | if ((strcmp(class, EC_DEV_REMOVE) == 0) && | |
185 | (strcmp(subclass, ESC_DISK) == 0) && | |
186 | (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) || | |
187 | nvlist_exists(nvl, DEV_IDENTIFIER))) { | |
188 | nvlist_t *payload = event->ae_nvl; | |
189 | struct timeval tv; | |
190 | int64_t tod[2]; | |
191 | uint64_t pool_guid = 0, vdev_guid = 0; | |
d48091de | 192 | guid_search_t search = { 0 }; |
193 | device_type_t devtype = DEVICE_TYPE_PRIMARY; | |
976246fa DB |
194 | |
195 | class = "resource.fs.zfs.removed"; | |
196 | subclass = ""; | |
197 | ||
198 | (void) nvlist_add_string(payload, FM_CLASS, class); | |
199 | (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); | |
200 | (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); | |
201 | ||
d48091de | 202 | (void) gettimeofday(&tv, NULL); |
203 | tod[0] = tv.tv_sec; | |
204 | tod[1] = tv.tv_usec; | |
205 | (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); | |
206 | ||
976246fa | 207 | /* |
d48091de | 208 | * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or |
209 | * ZFS_EV_POOL_GUID may be missing so find them. | |
976246fa | 210 | */ |
d48091de | 211 | (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, |
212 | &search.gs_devid); | |
213 | (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); | |
214 | pool_guid = search.gs_pool_guid; | |
215 | vdev_guid = search.gs_vdev_guid; | |
216 | devtype = search.gs_vdev_type; | |
976246fa | 217 | |
d48091de | 218 | /* |
219 | * We want to avoid reporting "remove" events coming from | |
220 | * libudev for VDEVs which were expanded recently (10s) and | |
221 | * avoid activating spares in response to partitions being | |
222 | * deleted and created in rapid succession. | |
223 | */ | |
224 | if (search.gs_vdev_expandtime != 0 && | |
225 | search.gs_vdev_expandtime + 10 > tv.tv_sec) { | |
226 | zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " | |
227 | "for recently expanded device '%s'", EC_DEV_REMOVE, | |
228 | search.gs_devid); | |
229 | goto out; | |
976246fa DB |
230 | } |
231 | ||
232 | (void) nvlist_add_uint64(payload, | |
233 | FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid); | |
234 | (void) nvlist_add_uint64(payload, | |
235 | FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid); | |
d48091de | 236 | switch (devtype) { |
237 | case DEVICE_TYPE_L2ARC: | |
238 | (void) nvlist_add_string(payload, | |
239 | FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, | |
240 | VDEV_TYPE_L2CACHE); | |
241 | break; | |
242 | case DEVICE_TYPE_SPARE: | |
243 | (void) nvlist_add_string(payload, | |
244 | FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE); | |
245 | break; | |
246 | case DEVICE_TYPE_PRIMARY: | |
247 | (void) nvlist_add_string(payload, | |
248 | FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK); | |
249 | break; | |
250 | } | |
976246fa DB |
251 | |
252 | zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'", | |
253 | EC_DEV_REMOVE, class); | |
254 | } | |
255 | ||
256 | (void) strlcpy(event->ae_class, class, sizeof (event->ae_class)); | |
257 | (void) strlcpy(event->ae_subclass, subclass, | |
258 | sizeof (event->ae_subclass)); | |
259 | ||
260 | (void) pthread_mutex_lock(&agent_lock); | |
261 | list_insert_tail(&agent_events, event); | |
262 | (void) pthread_mutex_unlock(&agent_lock); | |
263 | ||
d48091de | 264 | out: |
976246fa DB |
265 | (void) pthread_cond_signal(&agent_cond); |
266 | } | |
267 | ||
268 | static void | |
269 | zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl) | |
270 | { | |
271 | /* | |
272 | * The diagnosis engine subscribes to the following events. | |
273 | * On illumos these subscriptions reside in: | |
274 | * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf | |
275 | */ | |
276 | if (strstr(class, "ereport.fs.zfs.") != NULL || | |
277 | strstr(class, "resource.fs.zfs.") != NULL || | |
278 | strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 || | |
279 | strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 || | |
280 | strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) { | |
281 | fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class); | |
282 | } | |
283 | ||
284 | /* | |
285 | * The retire agent subscribes to the following events. | |
286 | * On illumos these subscriptions reside in: | |
287 | * /usr/lib/fm/fmd/plugins/zfs-retire.conf | |
288 | * | |
4e33ba4c | 289 | * NOTE: faults events come directly from our diagnosis engine |
976246fa DB |
290 | * and will not pass through the zfs kernel module. |
291 | */ | |
292 | if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || | |
293 | strcmp(class, "resource.fs.zfs.removed") == 0 || | |
294 | strcmp(class, "resource.fs.zfs.statechange") == 0 || | |
295 | strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) { | |
296 | fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class); | |
297 | } | |
298 | ||
299 | /* | |
300 | * The SLM module only consumes disk events and vdev check events | |
301 | * | |
302 | * NOTE: disk events come directly from disk monitor and will | |
303 | * not pass through the zfs kernel module. | |
304 | */ | |
305 | if (strstr(class, "EC_dev_") != NULL || | |
306 | strcmp(class, EC_ZFS) == 0) { | |
307 | (void) zfs_slm_event(class, subclass, nvl); | |
308 | } | |
309 | } | |
310 | ||
311 | /* | |
312 | * Events are consumed and dispatched from this thread | |
313 | * An agent can also post an event so event list lock | |
314 | * is not held when calling an agent. | |
315 | * One event is consumed at a time. | |
316 | */ | |
317 | static void * | |
318 | zfs_agent_consumer_thread(void *arg) | |
319 | { | |
320 | for (;;) { | |
321 | agent_event_t *event; | |
322 | ||
323 | (void) pthread_mutex_lock(&agent_lock); | |
324 | ||
325 | /* wait for an event to show up */ | |
326 | while (!agent_exiting && list_is_empty(&agent_events)) | |
327 | (void) pthread_cond_wait(&agent_cond, &agent_lock); | |
328 | ||
329 | if (agent_exiting) { | |
330 | (void) pthread_mutex_unlock(&agent_lock); | |
331 | zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: " | |
332 | "exiting"); | |
333 | return (NULL); | |
334 | } | |
335 | ||
336 | if ((event = (list_head(&agent_events))) != NULL) { | |
337 | list_remove(&agent_events, event); | |
338 | ||
339 | (void) pthread_mutex_unlock(&agent_lock); | |
340 | ||
341 | /* dispatch to all event subscribers */ | |
342 | zfs_agent_dispatch(event->ae_class, event->ae_subclass, | |
343 | event->ae_nvl); | |
344 | ||
345 | nvlist_free(event->ae_nvl); | |
346 | free(event); | |
347 | continue; | |
348 | } | |
349 | ||
350 | (void) pthread_mutex_unlock(&agent_lock); | |
351 | } | |
352 | ||
353 | return (NULL); | |
354 | } | |
355 | ||
356 | void | |
357 | zfs_agent_init(libzfs_handle_t *zfs_hdl) | |
358 | { | |
359 | fmd_hdl_t *hdl; | |
360 | ||
361 | g_zfs_hdl = zfs_hdl; | |
362 | ||
363 | if (zfs_slm_init() != 0) | |
364 | zed_log_die("Failed to initialize zfs slm"); | |
365 | zed_log_msg(LOG_INFO, "Add Agent: init"); | |
366 | ||
367 | hdl = fmd_module_hdl("zfs-diagnosis"); | |
368 | _zfs_diagnosis_init(hdl); | |
369 | if (!fmd_module_initialized(hdl)) | |
370 | zed_log_die("Failed to initialize zfs diagnosis"); | |
371 | ||
372 | hdl = fmd_module_hdl("zfs-retire"); | |
373 | _zfs_retire_init(hdl); | |
374 | if (!fmd_module_initialized(hdl)) | |
375 | zed_log_die("Failed to initialize zfs retire"); | |
376 | ||
377 | list_create(&agent_events, sizeof (agent_event_t), | |
378 | offsetof(struct agent_event, ae_node)); | |
379 | ||
380 | if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread, | |
381 | NULL) != 0) { | |
382 | list_destroy(&agent_events); | |
383 | zed_log_die("Failed to initialize agents"); | |
384 | } | |
385 | } | |
386 | ||
387 | void | |
388 | zfs_agent_fini(void) | |
389 | { | |
390 | fmd_hdl_t *hdl; | |
391 | agent_event_t *event; | |
392 | ||
393 | agent_exiting = 1; | |
394 | (void) pthread_cond_signal(&agent_cond); | |
395 | ||
396 | /* wait for zfs_enum_pools thread to complete */ | |
397 | (void) pthread_join(g_agents_tid, NULL); | |
398 | ||
399 | /* drain any pending events */ | |
400 | while ((event = (list_head(&agent_events))) != NULL) { | |
401 | list_remove(&agent_events, event); | |
402 | nvlist_free(event->ae_nvl); | |
403 | free(event); | |
404 | } | |
405 | ||
406 | list_destroy(&agent_events); | |
407 | ||
408 | if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) { | |
409 | _zfs_retire_fini(hdl); | |
410 | fmd_hdl_unregister(hdl); | |
411 | } | |
412 | if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) { | |
413 | _zfs_diagnosis_fini(hdl); | |
414 | fmd_hdl_unregister(hdl); | |
415 | } | |
416 | ||
417 | zed_log_msg(LOG_INFO, "Add Agent: fini"); | |
418 | zfs_slm_fini(); | |
419 | ||
420 | g_zfs_hdl = NULL; | |
421 | } |