]>
Commit | Line | Data |
---|---|---|
976246fa DB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License Version 1.0 (CDDL-1.0). | |
6 | * You can obtain a copy of the license from the top-level file | |
7 | * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. | |
8 | * You may not use this file except in compliance with the license. | |
9 | * | |
10 | * CDDL HEADER END | |
11 | */ | |
12 | ||
13 | /* | |
14 | * Copyright (c) 2016, Intel Corporation. | |
d48091de | 15 | * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com> |
32366649 | 16 | * Copyright (c) 2021 Hewlett Packard Enterprise Development LP |
976246fa DB |
17 | */ |
18 | ||
19 | #include <libnvpair.h> | |
20 | #include <libzfs.h> | |
21 | #include <stddef.h> | |
22 | #include <stdlib.h> | |
23 | #include <string.h> | |
24 | #include <sys/list.h> | |
25 | #include <sys/time.h> | |
26 | #include <sys/sysevent/eventdefs.h> | |
27 | #include <sys/sysevent/dev.h> | |
28 | #include <sys/fm/protocol.h> | |
29 | #include <sys/fm/fs/zfs.h> | |
30 | #include <pthread.h> | |
31 | #include <unistd.h> | |
32 | ||
33 | #include "zfs_agents.h" | |
34 | #include "fmd_api.h" | |
35 | #include "../zed_log.h" | |
36 | ||
37 | /* | |
38 | * agent dispatch code | |
39 | */ | |
40 | ||
41 | static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER; | |
42 | static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER; | |
43 | static list_t agent_events; /* list of pending events */ | |
44 | static int agent_exiting; | |
45 | ||
46 | typedef struct agent_event { | |
47 | char ae_class[64]; | |
48 | char ae_subclass[32]; | |
49 | nvlist_t *ae_nvl; | |
50 | list_node_t ae_node; | |
51 | } agent_event_t; | |
52 | ||
53 | pthread_t g_agents_tid; | |
54 | ||
55 | libzfs_handle_t *g_zfs_hdl; | |
56 | ||
57 | /* guid search data */ | |
d48091de | 58 | typedef enum device_type { |
59 | DEVICE_TYPE_L2ARC, /* l2arc device */ | |
60 | DEVICE_TYPE_SPARE, /* spare device */ | |
61 | DEVICE_TYPE_PRIMARY /* any primary pool storage device */ | |
62 | } device_type_t; | |
63 | ||
976246fa DB |
64 | typedef struct guid_search { |
65 | uint64_t gs_pool_guid; | |
66 | uint64_t gs_vdev_guid; | |
d1807f16 | 67 | const char *gs_devid; |
d48091de | 68 | device_type_t gs_vdev_type; |
69 | uint64_t gs_vdev_expandtime; /* vdev expansion time */ | |
976246fa DB |
70 | } guid_search_t; |
71 | ||
d48091de | 72 | /* |
73 | * Walks the vdev tree recursively looking for a matching devid. | |
74 | * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise. | |
75 | */ | |
76 | static boolean_t | |
976246fa DB |
77 | zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg) |
78 | { | |
79 | guid_search_t *gsp = arg; | |
d1807f16 | 80 | const char *path = NULL; |
976246fa DB |
81 | uint_t c, children; |
82 | nvlist_t **child; | |
55c12724 | 83 | uint64_t vdev_guid; |
976246fa DB |
84 | |
85 | /* | |
86 | * First iterate over any children. | |
87 | */ | |
88 | if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, | |
89 | &child, &children) == 0) { | |
d48091de | 90 | for (c = 0; c < children; c++) { |
91 | if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { | |
92 | gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY; | |
93 | return (B_TRUE); | |
94 | } | |
95 | } | |
976246fa DB |
96 | } |
97 | /* | |
d48091de | 98 | * Iterate over any spares and cache devices |
976246fa | 99 | */ |
d48091de | 100 | if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES, |
101 | &child, &children) == 0) { | |
102 | for (c = 0; c < children; c++) { | |
103 | if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { | |
55c12724 | 104 | gsp->gs_vdev_type = DEVICE_TYPE_SPARE; |
d48091de | 105 | return (B_TRUE); |
106 | } | |
107 | } | |
108 | } | |
109 | if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE, | |
110 | &child, &children) == 0) { | |
111 | for (c = 0; c < children; c++) { | |
112 | if (zfs_agent_iter_vdev(zhp, child[c], gsp)) { | |
55c12724 | 113 | gsp->gs_vdev_type = DEVICE_TYPE_L2ARC; |
d48091de | 114 | return (B_TRUE); |
115 | } | |
116 | } | |
117 | } | |
118 | /* | |
119 | * On a devid match, grab the vdev guid and expansion time, if any. | |
120 | */ | |
4b5c9d9f MA |
121 | if (gsp->gs_devid != NULL && |
122 | (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) && | |
976246fa DB |
123 | (strcmp(gsp->gs_devid, path) == 0)) { |
124 | (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, | |
125 | &gsp->gs_vdev_guid); | |
d48091de | 126 | (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, |
127 | &gsp->gs_vdev_expandtime); | |
128 | return (B_TRUE); | |
976246fa | 129 | } |
55c12724 AH |
130 | /* |
131 | * Otherwise, on a vdev guid match, grab the devid and expansion | |
132 | * time. The devid might be missing on removal since its not part | |
133 | * of blkid cache and L2ARC VDEV does not contain pool guid in its | |
134 | * blkid, so this is a special case for L2ARC VDEV. | |
135 | */ | |
136 | else if (gsp->gs_vdev_guid != 0 && gsp->gs_devid == NULL && | |
137 | nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID, &vdev_guid) == 0 && | |
138 | gsp->gs_vdev_guid == vdev_guid) { | |
139 | (void) nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, | |
140 | &gsp->gs_devid); | |
141 | (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME, | |
142 | &gsp->gs_vdev_expandtime); | |
143 | return (B_TRUE); | |
144 | } | |
d48091de | 145 | |
146 | return (B_FALSE); | |
976246fa DB |
147 | } |
148 | ||
149 | static int | |
150 | zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg) | |
151 | { | |
152 | guid_search_t *gsp = arg; | |
153 | nvlist_t *config, *nvl; | |
154 | ||
155 | /* | |
156 | * For each vdev in this pool, look for a match by devid | |
157 | */ | |
158 | if ((config = zpool_get_config(zhp, NULL)) != NULL) { | |
159 | if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, | |
160 | &nvl) == 0) { | |
d48091de | 161 | (void) zfs_agent_iter_vdev(zhp, nvl, gsp); |
976246fa DB |
162 | } |
163 | } | |
164 | /* | |
165 | * if a match was found then grab the pool guid | |
166 | */ | |
55c12724 | 167 | if (gsp->gs_vdev_guid && gsp->gs_devid) { |
976246fa DB |
168 | (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, |
169 | &gsp->gs_pool_guid); | |
170 | } | |
171 | ||
172 | zpool_close(zhp); | |
5091867e | 173 | return (gsp->gs_devid != NULL && gsp->gs_vdev_guid != 0); |
976246fa DB |
174 | } |
175 | ||
176 | void | |
177 | zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl) | |
178 | { | |
179 | agent_event_t *event; | |
180 | ||
181 | if (subclass == NULL) | |
182 | subclass = ""; | |
183 | ||
184 | event = malloc(sizeof (agent_event_t)); | |
185 | if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) { | |
186 | if (event) | |
187 | free(event); | |
188 | return; | |
189 | } | |
190 | ||
191 | if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) { | |
192 | class = EC_ZFS; | |
193 | subclass = ESC_ZFS_VDEV_CHECK; | |
194 | } | |
195 | ||
196 | /* | |
d0249a4b BB |
197 | * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport |
198 | * from the vdev_disk layer after a hot unplug. Fortunately we do | |
199 | * get an EC_DEV_REMOVE from our disk monitor and it is a suitable | |
976246fa | 200 | * proxy so we remap it here for the benefit of the diagnosis engine. |
0aacde2e RM |
201 | * Starting in OpenZFS 2.0, we do get FM_RESOURCE_REMOVED from the spa |
202 | * layer. Processing multiple FM_RESOURCE_REMOVED events is not harmful. | |
976246fa DB |
203 | */ |
204 | if ((strcmp(class, EC_DEV_REMOVE) == 0) && | |
205 | (strcmp(subclass, ESC_DISK) == 0) && | |
206 | (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) || | |
207 | nvlist_exists(nvl, DEV_IDENTIFIER))) { | |
208 | nvlist_t *payload = event->ae_nvl; | |
209 | struct timeval tv; | |
210 | int64_t tod[2]; | |
211 | uint64_t pool_guid = 0, vdev_guid = 0; | |
d48091de | 212 | guid_search_t search = { 0 }; |
213 | device_type_t devtype = DEVICE_TYPE_PRIMARY; | |
d1807f16 | 214 | const char *devid = NULL; |
976246fa DB |
215 | |
216 | class = "resource.fs.zfs.removed"; | |
217 | subclass = ""; | |
218 | ||
219 | (void) nvlist_add_string(payload, FM_CLASS, class); | |
55c12724 | 220 | (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER, &devid); |
976246fa DB |
221 | (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid); |
222 | (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid); | |
223 | ||
d48091de | 224 | (void) gettimeofday(&tv, NULL); |
225 | tod[0] = tv.tv_sec; | |
226 | tod[1] = tv.tv_usec; | |
227 | (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2); | |
228 | ||
976246fa | 229 | /* |
55c12724 AH |
230 | * If devid is missing but vdev_guid is available, find devid |
231 | * and pool_guid from vdev_guid. | |
d48091de | 232 | * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or |
233 | * ZFS_EV_POOL_GUID may be missing so find them. | |
976246fa | 234 | */ |
55c12724 AH |
235 | if (devid == NULL || pool_guid == 0 || vdev_guid == 0) { |
236 | if (devid == NULL) | |
237 | search.gs_vdev_guid = vdev_guid; | |
238 | else | |
239 | search.gs_devid = devid; | |
240 | zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search); | |
241 | if (devid == NULL) | |
242 | devid = search.gs_devid; | |
243 | if (pool_guid == 0) | |
244 | pool_guid = search.gs_pool_guid; | |
245 | if (vdev_guid == 0) | |
246 | vdev_guid = search.gs_vdev_guid; | |
247 | devtype = search.gs_vdev_type; | |
32366649 | 248 | } |
976246fa | 249 | |
d48091de | 250 | /* |
251 | * We want to avoid reporting "remove" events coming from | |
252 | * libudev for VDEVs which were expanded recently (10s) and | |
253 | * avoid activating spares in response to partitions being | |
254 | * deleted and created in rapid succession. | |
255 | */ | |
256 | if (search.gs_vdev_expandtime != 0 && | |
257 | search.gs_vdev_expandtime + 10 > tv.tv_sec) { | |
258 | zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' " | |
259 | "for recently expanded device '%s'", EC_DEV_REMOVE, | |
55c12724 AH |
260 | devid); |
261 | fnvlist_free(payload); | |
262 | free(event); | |
d48091de | 263 | goto out; |
976246fa DB |
264 | } |
265 | ||
266 | (void) nvlist_add_uint64(payload, | |
267 | FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid); | |
268 | (void) nvlist_add_uint64(payload, | |
269 | FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid); | |
d48091de | 270 | switch (devtype) { |
271 | case DEVICE_TYPE_L2ARC: | |
272 | (void) nvlist_add_string(payload, | |
273 | FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, | |
274 | VDEV_TYPE_L2CACHE); | |
275 | break; | |
276 | case DEVICE_TYPE_SPARE: | |
277 | (void) nvlist_add_string(payload, | |
278 | FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE); | |
279 | break; | |
280 | case DEVICE_TYPE_PRIMARY: | |
281 | (void) nvlist_add_string(payload, | |
282 | FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK); | |
283 | break; | |
284 | } | |
976246fa DB |
285 | |
286 | zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'", | |
287 | EC_DEV_REMOVE, class); | |
288 | } | |
289 | ||
290 | (void) strlcpy(event->ae_class, class, sizeof (event->ae_class)); | |
291 | (void) strlcpy(event->ae_subclass, subclass, | |
292 | sizeof (event->ae_subclass)); | |
293 | ||
294 | (void) pthread_mutex_lock(&agent_lock); | |
295 | list_insert_tail(&agent_events, event); | |
296 | (void) pthread_mutex_unlock(&agent_lock); | |
297 | ||
d48091de | 298 | out: |
976246fa DB |
299 | (void) pthread_cond_signal(&agent_cond); |
300 | } | |
301 | ||
302 | static void | |
303 | zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl) | |
304 | { | |
305 | /* | |
306 | * The diagnosis engine subscribes to the following events. | |
307 | * On illumos these subscriptions reside in: | |
308 | * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf | |
309 | */ | |
310 | if (strstr(class, "ereport.fs.zfs.") != NULL || | |
311 | strstr(class, "resource.fs.zfs.") != NULL || | |
312 | strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 || | |
313 | strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 || | |
314 | strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) { | |
315 | fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class); | |
316 | } | |
317 | ||
318 | /* | |
319 | * The retire agent subscribes to the following events. | |
320 | * On illumos these subscriptions reside in: | |
321 | * /usr/lib/fm/fmd/plugins/zfs-retire.conf | |
322 | * | |
4e33ba4c | 323 | * NOTE: faults events come directly from our diagnosis engine |
976246fa DB |
324 | * and will not pass through the zfs kernel module. |
325 | */ | |
326 | if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 || | |
327 | strcmp(class, "resource.fs.zfs.removed") == 0 || | |
328 | strcmp(class, "resource.fs.zfs.statechange") == 0 || | |
329 | strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) { | |
330 | fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class); | |
331 | } | |
332 | ||
333 | /* | |
334 | * The SLM module only consumes disk events and vdev check events | |
335 | * | |
336 | * NOTE: disk events come directly from disk monitor and will | |
337 | * not pass through the zfs kernel module. | |
338 | */ | |
339 | if (strstr(class, "EC_dev_") != NULL || | |
340 | strcmp(class, EC_ZFS) == 0) { | |
341 | (void) zfs_slm_event(class, subclass, nvl); | |
342 | } | |
343 | } | |
344 | ||
345 | /* | |
346 | * Events are consumed and dispatched from this thread | |
347 | * An agent can also post an event so event list lock | |
348 | * is not held when calling an agent. | |
349 | * One event is consumed at a time. | |
350 | */ | |
351 | static void * | |
352 | zfs_agent_consumer_thread(void *arg) | |
353 | { | |
16529f30 AZ |
354 | (void) arg; |
355 | ||
976246fa DB |
356 | for (;;) { |
357 | agent_event_t *event; | |
358 | ||
359 | (void) pthread_mutex_lock(&agent_lock); | |
360 | ||
361 | /* wait for an event to show up */ | |
362 | while (!agent_exiting && list_is_empty(&agent_events)) | |
363 | (void) pthread_cond_wait(&agent_cond, &agent_lock); | |
364 | ||
365 | if (agent_exiting) { | |
366 | (void) pthread_mutex_unlock(&agent_lock); | |
367 | zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: " | |
368 | "exiting"); | |
369 | return (NULL); | |
370 | } | |
371 | ||
b3ad3f48 | 372 | if ((event = list_remove_head(&agent_events)) != NULL) { |
976246fa DB |
373 | (void) pthread_mutex_unlock(&agent_lock); |
374 | ||
375 | /* dispatch to all event subscribers */ | |
376 | zfs_agent_dispatch(event->ae_class, event->ae_subclass, | |
377 | event->ae_nvl); | |
378 | ||
379 | nvlist_free(event->ae_nvl); | |
380 | free(event); | |
381 | continue; | |
382 | } | |
383 | ||
384 | (void) pthread_mutex_unlock(&agent_lock); | |
385 | } | |
386 | ||
387 | return (NULL); | |
388 | } | |
389 | ||
390 | void | |
391 | zfs_agent_init(libzfs_handle_t *zfs_hdl) | |
392 | { | |
393 | fmd_hdl_t *hdl; | |
394 | ||
395 | g_zfs_hdl = zfs_hdl; | |
396 | ||
397 | if (zfs_slm_init() != 0) | |
398 | zed_log_die("Failed to initialize zfs slm"); | |
399 | zed_log_msg(LOG_INFO, "Add Agent: init"); | |
400 | ||
401 | hdl = fmd_module_hdl("zfs-diagnosis"); | |
402 | _zfs_diagnosis_init(hdl); | |
403 | if (!fmd_module_initialized(hdl)) | |
404 | zed_log_die("Failed to initialize zfs diagnosis"); | |
405 | ||
406 | hdl = fmd_module_hdl("zfs-retire"); | |
407 | _zfs_retire_init(hdl); | |
408 | if (!fmd_module_initialized(hdl)) | |
409 | zed_log_die("Failed to initialize zfs retire"); | |
410 | ||
411 | list_create(&agent_events, sizeof (agent_event_t), | |
412 | offsetof(struct agent_event, ae_node)); | |
413 | ||
414 | if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread, | |
415 | NULL) != 0) { | |
416 | list_destroy(&agent_events); | |
417 | zed_log_die("Failed to initialize agents"); | |
418 | } | |
3ef80eef | 419 | pthread_setname_np(g_agents_tid, "agents"); |
976246fa DB |
420 | } |
421 | ||
422 | void | |
423 | zfs_agent_fini(void) | |
424 | { | |
425 | fmd_hdl_t *hdl; | |
426 | agent_event_t *event; | |
427 | ||
428 | agent_exiting = 1; | |
429 | (void) pthread_cond_signal(&agent_cond); | |
430 | ||
431 | /* wait for zfs_enum_pools thread to complete */ | |
432 | (void) pthread_join(g_agents_tid, NULL); | |
433 | ||
434 | /* drain any pending events */ | |
b3ad3f48 | 435 | while ((event = list_remove_head(&agent_events)) != NULL) { |
976246fa DB |
436 | nvlist_free(event->ae_nvl); |
437 | free(event); | |
438 | } | |
439 | ||
440 | list_destroy(&agent_events); | |
441 | ||
442 | if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) { | |
443 | _zfs_retire_fini(hdl); | |
444 | fmd_hdl_unregister(hdl); | |
445 | } | |
446 | if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) { | |
447 | _zfs_diagnosis_fini(hdl); | |
448 | fmd_hdl_unregister(hdl); | |
449 | } | |
450 | ||
451 | zed_log_msg(LOG_INFO, "Add Agent: fini"); | |
452 | zfs_slm_fini(); | |
453 | ||
454 | g_zfs_hdl = NULL; | |
455 | } |