]>
Commit | Line | Data |
---|---|---|
d02ca379 DB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License Version 1.0 (CDDL-1.0). | |
6 | * You can obtain a copy of the license from the top-level file | |
7 | * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>. | |
8 | * You may not use this file except in compliance with the license. | |
9 | * | |
10 | * CDDL HEADER END | |
11 | */ | |
12 | ||
13 | /* | |
7a4500a1 | 14 | * Copyright (c) 2016, 2017, Intel Corporation. |
d02ca379 DB |
15 | */ |
16 | ||
17 | #ifdef HAVE_LIBUDEV | |
18 | ||
19 | #include <errno.h> | |
20 | #include <fcntl.h> | |
21 | #include <libnvpair.h> | |
22 | #include <libudev.h> | |
23 | #include <libzfs.h> | |
24 | #include <pthread.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
27 | ||
28 | #include <sys/sysevent/eventdefs.h> | |
29 | #include <sys/sysevent/dev.h> | |
30 | ||
31 | #include "zed_log.h" | |
32 | #include "zed_disk_event.h" | |
33 | #include "agents/zfs_agents.h" | |
34 | ||
35 | /* | |
36 | * Portions of ZED need to see disk events for disks belonging to ZFS pools. | |
37 | * A libudev monitor is established to monitor block device actions and pass | |
38 | * them on to internal ZED logic modules. Initially, zfs_mod.c is the only | |
39 | * consumer and is the Linux equivalent for the illumos syseventd ZFS SLM | |
aeb39df7 | 40 | * module responsible for handling disk events for ZFS. |
d02ca379 DB |
41 | */ |
42 | ||
43 | pthread_t g_mon_tid; | |
44 | struct udev *g_udev; | |
45 | struct udev_monitor *g_mon; | |
46 | ||
47 | ||
48 | #define DEV_BYID_PATH "/dev/disk/by-id/" | |
49 | ||
50 | /* 64MB is minimum usable disk for ZFS */ | |
51 | #define MINIMUM_SECTORS 131072 | |
52 | ||
53 | ||
54 | /* | |
55 | * Post disk event to SLM module | |
56 | * | |
57 | * occurs in the context of monitor thread | |
58 | */ | |
59 | static void | |
60 | zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl) | |
61 | { | |
62 | char *strval; | |
63 | uint64_t numval; | |
64 | ||
65 | zed_log_msg(LOG_INFO, "zed_disk_event:"); | |
66 | zed_log_msg(LOG_INFO, "\tclass: %s", class); | |
67 | zed_log_msg(LOG_INFO, "\tsubclass: %s", subclass); | |
68 | if (nvlist_lookup_string(nvl, DEV_NAME, &strval) == 0) | |
69 | zed_log_msg(LOG_INFO, "\t%s: %s", DEV_NAME, strval); | |
70 | if (nvlist_lookup_string(nvl, DEV_PATH, &strval) == 0) | |
71 | zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval); | |
72 | if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0) | |
73 | zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval); | |
74 | if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0) | |
75 | zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval); | |
76 | if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0) | |
77 | zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval); | |
78 | if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0) | |
79 | zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval); | |
80 | if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0) | |
81 | zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_VDEV_GUID, numval); | |
82 | ||
976246fa | 83 | (void) zfs_agent_post_event(class, subclass, nvl); |
d02ca379 DB |
84 | } |
85 | ||
86 | /* | |
87 | * dev_event_nvlist: place event schema into an nv pair list | |
88 | * | |
89 | * NAME VALUE (example) | |
90 | * -------------- -------------------------------------------------------- | |
91 | * DEV_NAME /dev/sdl | |
92 | * DEV_PATH /devices/pci0000:00/0000:00:03.0/0000:04:00.0/host0/... | |
93 | * DEV_IDENTIFIER ata-Hitachi_HTS725050A9A362_100601PCG420VLJ37DMC | |
94 | * DEV_PHYS_PATH pci-0000:04:00.0-sas-0x4433221101000000-lun-0 | |
95 | * DEV_IS_PART --- | |
96 | * DEV_SIZE 500107862016 | |
97 | * ZFS_EV_POOL_GUID 17523635698032189180 | |
98 | * ZFS_EV_VDEV_GUID 14663607734290803088 | |
99 | */ | |
100 | static nvlist_t * | |
101 | dev_event_nvlist(struct udev_device *dev) | |
102 | { | |
103 | nvlist_t *nvl; | |
104 | char strval[128]; | |
105 | const char *value, *path; | |
106 | uint64_t guid; | |
107 | ||
108 | if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) | |
109 | return (NULL); | |
110 | ||
111 | if (zfs_device_get_devid(dev, strval, sizeof (strval)) == 0) | |
112 | (void) nvlist_add_string(nvl, DEV_IDENTIFIER, strval); | |
113 | if (zfs_device_get_physical(dev, strval, sizeof (strval)) == 0) | |
114 | (void) nvlist_add_string(nvl, DEV_PHYS_PATH, strval); | |
115 | if ((path = udev_device_get_devnode(dev)) != NULL) | |
116 | (void) nvlist_add_string(nvl, DEV_NAME, path); | |
117 | if ((value = udev_device_get_devpath(dev)) != NULL) | |
118 | (void) nvlist_add_string(nvl, DEV_PATH, value); | |
119 | value = udev_device_get_devtype(dev); | |
120 | if ((value != NULL && strcmp("partition", value) == 0) || | |
121 | (udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER") | |
122 | != NULL)) { | |
123 | (void) nvlist_add_boolean(nvl, DEV_IS_PART); | |
124 | } | |
125 | if ((value = udev_device_get_sysattr_value(dev, "size")) != NULL) { | |
126 | uint64_t numval = DEV_BSIZE; | |
127 | ||
128 | numval *= strtoull(value, NULL, 10); | |
129 | (void) nvlist_add_uint64(nvl, DEV_SIZE, numval); | |
130 | } | |
131 | ||
132 | /* | |
133 | * Grab the pool and vdev guids from blkid cache | |
134 | */ | |
135 | value = udev_device_get_property_value(dev, "ID_FS_UUID"); | |
136 | if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0) | |
137 | (void) nvlist_add_uint64(nvl, ZFS_EV_POOL_GUID, guid); | |
138 | ||
139 | value = udev_device_get_property_value(dev, "ID_FS_UUID_SUB"); | |
140 | if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0) | |
141 | (void) nvlist_add_uint64(nvl, ZFS_EV_VDEV_GUID, guid); | |
142 | ||
143 | /* | |
144 | * Either a vdev guid or a devid must be present for matching | |
145 | */ | |
146 | if (!nvlist_exists(nvl, DEV_IDENTIFIER) && | |
147 | !nvlist_exists(nvl, ZFS_EV_VDEV_GUID)) { | |
148 | nvlist_free(nvl); | |
149 | return (NULL); | |
150 | } | |
151 | ||
152 | return (nvl); | |
153 | } | |
154 | ||
155 | /* | |
156 | * Listen for block device uevents | |
157 | */ | |
158 | static void * | |
159 | zed_udev_monitor(void *arg) | |
160 | { | |
161 | struct udev_monitor *mon = arg; | |
6078881a | 162 | char *tmp, *tmp2; |
d02ca379 | 163 | |
95401cb6 | 164 | zed_log_msg(LOG_INFO, "Waiting for new udev disk events..."); |
d02ca379 DB |
165 | |
166 | while (1) { | |
167 | struct udev_device *dev; | |
168 | const char *action, *type, *part, *sectors; | |
169 | const char *bus, *uuid; | |
170 | const char *class, *subclass; | |
171 | nvlist_t *nvl; | |
172 | boolean_t is_zfs = B_FALSE; | |
173 | ||
174 | /* allow a cancellation while blocked (recvmsg) */ | |
175 | pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); | |
176 | ||
177 | /* blocks at recvmsg until an event occurs */ | |
178 | if ((dev = udev_monitor_receive_device(mon)) == NULL) { | |
179 | zed_log_msg(LOG_WARNING, "zed_udev_monitor: receive " | |
180 | "device error %d", errno); | |
181 | continue; | |
182 | } | |
183 | ||
184 | /* allow all steps to complete before a cancellation */ | |
185 | pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); | |
186 | ||
187 | /* | |
4e33ba4c | 188 | * Strongly typed device is the preferred filter |
d02ca379 DB |
189 | */ |
190 | type = udev_device_get_property_value(dev, "ID_FS_TYPE"); | |
191 | if (type != NULL && type[0] != '\0') { | |
192 | if (strcmp(type, "zfs_member") == 0) { | |
193 | is_zfs = B_TRUE; | |
194 | } else { | |
195 | /* not ours, so skip */ | |
196 | zed_log_msg(LOG_INFO, "zed_udev_monitor: skip " | |
197 | "%s (in use by %s)", | |
198 | udev_device_get_devnode(dev), type); | |
199 | udev_device_unref(dev); | |
200 | continue; | |
201 | } | |
202 | } | |
203 | ||
204 | /* | |
205 | * if this is a disk and it is partitioned, then the | |
206 | * zfs label will reside in a DEVTYPE=partition and | |
207 | * we can skip passing this event | |
208 | */ | |
209 | type = udev_device_get_property_value(dev, "DEVTYPE"); | |
210 | part = udev_device_get_property_value(dev, | |
211 | "ID_PART_TABLE_TYPE"); | |
212 | if (type != NULL && type[0] != '\0' && | |
213 | strcmp(type, "disk") == 0 && | |
214 | part != NULL && part[0] != '\0') { | |
215 | /* skip and wait for partition event */ | |
d02ca379 DB |
216 | udev_device_unref(dev); |
217 | continue; | |
218 | } | |
219 | ||
220 | /* | |
221 | * ignore small partitions | |
222 | */ | |
223 | sectors = udev_device_get_property_value(dev, | |
224 | "ID_PART_ENTRY_SIZE"); | |
225 | if (sectors == NULL) | |
226 | sectors = udev_device_get_sysattr_value(dev, "size"); | |
227 | if (sectors != NULL && | |
228 | strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) { | |
229 | udev_device_unref(dev); | |
230 | continue; | |
231 | } | |
232 | ||
233 | /* | |
234 | * If the blkid probe didn't find ZFS, then a persistent | |
235 | * device id string is required in the message schema | |
236 | * for matching with vdevs. Preflight here for expected | |
237 | * udev information. | |
238 | */ | |
239 | bus = udev_device_get_property_value(dev, "ID_BUS"); | |
240 | uuid = udev_device_get_property_value(dev, "DM_UUID"); | |
241 | if (!is_zfs && (bus == NULL && uuid == NULL)) { | |
242 | zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid " | |
243 | "source", udev_device_get_devnode(dev)); | |
244 | udev_device_unref(dev); | |
245 | continue; | |
246 | } | |
247 | ||
248 | action = udev_device_get_action(dev); | |
249 | if (strcmp(action, "add") == 0) { | |
250 | class = EC_DEV_ADD; | |
251 | subclass = ESC_DISK; | |
252 | } else if (strcmp(action, "remove") == 0) { | |
253 | class = EC_DEV_REMOVE; | |
254 | subclass = ESC_DISK; | |
255 | } else if (strcmp(action, "change") == 0) { | |
256 | class = EC_DEV_STATUS; | |
257 | subclass = ESC_DEV_DLE; | |
258 | } else { | |
259 | zed_log_msg(LOG_WARNING, "zed_udev_monitor: %s unknown", | |
260 | action); | |
261 | udev_device_unref(dev); | |
262 | continue; | |
263 | } | |
264 | ||
265 | /* | |
266 | * Special case an EC_DEV_ADD for multipath devices | |
267 | * | |
268 | * When a multipath device is created, udev reports the | |
269 | * following: | |
270 | * | |
271 | * 1. "add" event of the dm device for the multipath device | |
272 | * (like /dev/dm-3). | |
273 | * 2. "change" event to create the actual multipath device | |
274 | * symlink (like /dev/mapper/mpatha). The event also | |
275 | * passes back the relevant DM vars we care about, like | |
276 | * DM_UUID. | |
277 | * 3. Another "change" event identical to #2 (that we ignore). | |
278 | * | |
279 | * To get the behavior we want, we treat the "change" event | |
280 | * in #2 as a "add" event; as if "/dev/mapper/mpatha" was | |
281 | * a new disk being added. | |
282 | */ | |
283 | if (strcmp(class, EC_DEV_STATUS) == 0 && | |
284 | udev_device_get_property_value(dev, "DM_UUID") && | |
285 | udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) { | |
02730c33 | 286 | tmp = (char *)udev_device_get_devnode(dev); |
1bbd8770 | 287 | tmp2 = zfs_get_underlying_path(tmp); |
6078881a TH |
288 | if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) { |
289 | /* | |
290 | * We have a real underlying device, which | |
291 | * means that this multipath "change" event is | |
292 | * an "add" event. | |
293 | * | |
294 | * If the multipath device and the underlying | |
295 | * dev are the same name (i.e. /dev/dm-5), then | |
296 | * there is no real underlying disk for this | |
297 | * multipath device, and so this "change" event | |
976246fa | 298 | * really is a multipath removal. |
6078881a TH |
299 | */ |
300 | class = EC_DEV_ADD; | |
301 | subclass = ESC_DISK; | |
302 | } else { | |
976246fa DB |
303 | tmp = (char *) |
304 | udev_device_get_property_value(dev, | |
305 | "DM_NR_VALID_PATHS"); | |
306 | /* treat as a multipath remove */ | |
307 | if (tmp != NULL && strcmp(tmp, "0") == 0) { | |
308 | class = EC_DEV_REMOVE; | |
309 | subclass = ESC_DISK; | |
310 | } | |
6078881a TH |
311 | } |
312 | free(tmp2); | |
d02ca379 DB |
313 | } |
314 | ||
7a4500a1 SV |
315 | /* |
316 | * Special case an EC_DEV_ADD for scsi_debug devices | |
317 | * | |
318 | * These devices require a udevadm trigger command after | |
319 | * creation in order to register the vdev_id scsidebug alias | |
320 | * rule (adds a persistent path (phys_path) used for fault | |
321 | * management automated tests in the ZFS test suite. | |
322 | * | |
323 | * After udevadm trigger command, event registers as a "change" | |
324 | * event but needs to instead be handled as another "add" event | |
325 | * to allow for disk labeling and partitioning to occur. | |
326 | */ | |
327 | if (strcmp(class, EC_DEV_STATUS) == 0 && | |
328 | udev_device_get_property_value(dev, "ID_VDEV") && | |
329 | udev_device_get_property_value(dev, "ID_MODEL")) { | |
330 | const char *id_model, *id_model_sd = "scsi_debug"; | |
331 | ||
332 | id_model = udev_device_get_property_value(dev, | |
333 | "ID_MODEL"); | |
334 | if (strcmp(id_model, id_model_sd) == 0) { | |
335 | class = EC_DEV_ADD; | |
336 | subclass = ESC_DISK; | |
337 | } | |
338 | } | |
339 | ||
d02ca379 DB |
340 | if ((nvl = dev_event_nvlist(dev)) != NULL) { |
341 | zed_udev_event(class, subclass, nvl); | |
342 | nvlist_free(nvl); | |
343 | } | |
344 | ||
345 | udev_device_unref(dev); | |
346 | } | |
347 | ||
348 | return (NULL); | |
349 | } | |
350 | ||
351 | int | |
352 | zed_disk_event_init() | |
353 | { | |
354 | int fd, fflags; | |
355 | ||
356 | if ((g_udev = udev_new()) == NULL) { | |
357 | zed_log_msg(LOG_WARNING, "udev_new failed (%d)", errno); | |
358 | return (-1); | |
359 | } | |
360 | ||
361 | /* Set up a udev monitor for block devices */ | |
362 | g_mon = udev_monitor_new_from_netlink(g_udev, "udev"); | |
363 | udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", "disk"); | |
364 | udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", | |
365 | "partition"); | |
366 | udev_monitor_enable_receiving(g_mon); | |
367 | ||
368 | /* Make sure monitoring socket is blocking */ | |
369 | fd = udev_monitor_get_fd(g_mon); | |
370 | if ((fflags = fcntl(fd, F_GETFL)) & O_NONBLOCK) | |
371 | (void) fcntl(fd, F_SETFL, fflags & ~O_NONBLOCK); | |
372 | ||
373 | /* spawn a thread to monitor events */ | |
374 | if (pthread_create(&g_mon_tid, NULL, zed_udev_monitor, g_mon) != 0) { | |
375 | udev_monitor_unref(g_mon); | |
376 | udev_unref(g_udev); | |
377 | zed_log_msg(LOG_WARNING, "pthread_create failed"); | |
378 | return (-1); | |
379 | } | |
380 | ||
381 | zed_log_msg(LOG_INFO, "zed_disk_event_init"); | |
382 | ||
383 | return (0); | |
384 | } | |
385 | ||
386 | void | |
387 | zed_disk_event_fini() | |
388 | { | |
389 | /* cancel monitor thread at recvmsg() */ | |
390 | (void) pthread_cancel(g_mon_tid); | |
391 | (void) pthread_join(g_mon_tid, NULL); | |
392 | ||
393 | /* cleanup udev resources */ | |
394 | udev_monitor_unref(g_mon); | |
395 | udev_unref(g_udev); | |
396 | ||
397 | zed_log_msg(LOG_INFO, "zed_disk_event_fini"); | |
398 | } | |
399 | ||
400 | #else | |
401 | ||
402 | #include "zed_disk_event.h" | |
403 | ||
404 | int | |
405 | zed_disk_event_init() | |
406 | { | |
407 | return (0); | |
408 | } | |
409 | ||
410 | void | |
411 | zed_disk_event_fini() | |
412 | { | |
413 | } | |
414 | ||
415 | #endif /* HAVE_LIBUDEV */ |