]> git.proxmox.com Git - mirror_zfs.git/blame - cmd/zed/zed_disk_event.c
Enable remaining tests
[mirror_zfs.git] / cmd / zed / zed_disk_event.c
CommitLineData
d02ca379
DB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6 * You can obtain a copy of the license from the top-level file
7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8 * You may not use this file except in compliance with the license.
9 *
10 * CDDL HEADER END
11 */
12
13/*
7a4500a1 14 * Copyright (c) 2016, 2017, Intel Corporation.
d02ca379
DB
15 */
16
17#ifdef HAVE_LIBUDEV
18
19#include <errno.h>
20#include <fcntl.h>
21#include <libnvpair.h>
22#include <libudev.h>
23#include <libzfs.h>
24#include <pthread.h>
25#include <stdlib.h>
26#include <string.h>
27
28#include <sys/sysevent/eventdefs.h>
29#include <sys/sysevent/dev.h>
30
31#include "zed_log.h"
32#include "zed_disk_event.h"
33#include "agents/zfs_agents.h"
34
35/*
36 * Portions of ZED need to see disk events for disks belonging to ZFS pools.
37 * A libudev monitor is established to monitor block device actions and pass
38 * them on to internal ZED logic modules. Initially, zfs_mod.c is the only
39 * consumer and is the Linux equivalent for the illumos syseventd ZFS SLM
40 * module responsible for handeling disk events for ZFS.
41 */
42
43pthread_t g_mon_tid;
44struct udev *g_udev;
45struct udev_monitor *g_mon;
46
47
48#define DEV_BYID_PATH "/dev/disk/by-id/"
49
50/* 64MB is minimum usable disk for ZFS */
51#define MINIMUM_SECTORS 131072
52
53
54/*
55 * Post disk event to SLM module
56 *
57 * occurs in the context of monitor thread
58 */
59static void
60zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
61{
62 char *strval;
63 uint64_t numval;
64
65 zed_log_msg(LOG_INFO, "zed_disk_event:");
66 zed_log_msg(LOG_INFO, "\tclass: %s", class);
67 zed_log_msg(LOG_INFO, "\tsubclass: %s", subclass);
68 if (nvlist_lookup_string(nvl, DEV_NAME, &strval) == 0)
69 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_NAME, strval);
70 if (nvlist_lookup_string(nvl, DEV_PATH, &strval) == 0)
71 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
72 if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
73 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
74 if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
75 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
76 if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
77 zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
78 if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
79 zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
80 if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
81 zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_VDEV_GUID, numval);
82
976246fa 83 (void) zfs_agent_post_event(class, subclass, nvl);
d02ca379
DB
84}
85
86/*
87 * dev_event_nvlist: place event schema into an nv pair list
88 *
89 * NAME VALUE (example)
90 * -------------- --------------------------------------------------------
91 * DEV_NAME /dev/sdl
92 * DEV_PATH /devices/pci0000:00/0000:00:03.0/0000:04:00.0/host0/...
93 * DEV_IDENTIFIER ata-Hitachi_HTS725050A9A362_100601PCG420VLJ37DMC
94 * DEV_PHYS_PATH pci-0000:04:00.0-sas-0x4433221101000000-lun-0
95 * DEV_IS_PART ---
96 * DEV_SIZE 500107862016
97 * ZFS_EV_POOL_GUID 17523635698032189180
98 * ZFS_EV_VDEV_GUID 14663607734290803088
99 */
100static nvlist_t *
101dev_event_nvlist(struct udev_device *dev)
102{
103 nvlist_t *nvl;
104 char strval[128];
105 const char *value, *path;
106 uint64_t guid;
107
108 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
109 return (NULL);
110
111 if (zfs_device_get_devid(dev, strval, sizeof (strval)) == 0)
112 (void) nvlist_add_string(nvl, DEV_IDENTIFIER, strval);
113 if (zfs_device_get_physical(dev, strval, sizeof (strval)) == 0)
114 (void) nvlist_add_string(nvl, DEV_PHYS_PATH, strval);
115 if ((path = udev_device_get_devnode(dev)) != NULL)
116 (void) nvlist_add_string(nvl, DEV_NAME, path);
117 if ((value = udev_device_get_devpath(dev)) != NULL)
118 (void) nvlist_add_string(nvl, DEV_PATH, value);
119 value = udev_device_get_devtype(dev);
120 if ((value != NULL && strcmp("partition", value) == 0) ||
121 (udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER")
122 != NULL)) {
123 (void) nvlist_add_boolean(nvl, DEV_IS_PART);
124 }
125 if ((value = udev_device_get_sysattr_value(dev, "size")) != NULL) {
126 uint64_t numval = DEV_BSIZE;
127
128 numval *= strtoull(value, NULL, 10);
129 (void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
130 }
131
132 /*
133 * Grab the pool and vdev guids from blkid cache
134 */
135 value = udev_device_get_property_value(dev, "ID_FS_UUID");
136 if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
137 (void) nvlist_add_uint64(nvl, ZFS_EV_POOL_GUID, guid);
138
139 value = udev_device_get_property_value(dev, "ID_FS_UUID_SUB");
140 if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
141 (void) nvlist_add_uint64(nvl, ZFS_EV_VDEV_GUID, guid);
142
143 /*
144 * Either a vdev guid or a devid must be present for matching
145 */
146 if (!nvlist_exists(nvl, DEV_IDENTIFIER) &&
147 !nvlist_exists(nvl, ZFS_EV_VDEV_GUID)) {
148 nvlist_free(nvl);
149 return (NULL);
150 }
151
152 return (nvl);
153}
154
155/*
156 * Listen for block device uevents
157 */
158static void *
159zed_udev_monitor(void *arg)
160{
161 struct udev_monitor *mon = arg;
6078881a 162 char *tmp, *tmp2;
d02ca379 163
95401cb6 164 zed_log_msg(LOG_INFO, "Waiting for new udev disk events...");
d02ca379
DB
165
166 while (1) {
167 struct udev_device *dev;
168 const char *action, *type, *part, *sectors;
169 const char *bus, *uuid;
170 const char *class, *subclass;
171 nvlist_t *nvl;
172 boolean_t is_zfs = B_FALSE;
173
174 /* allow a cancellation while blocked (recvmsg) */
175 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
176
177 /* blocks at recvmsg until an event occurs */
178 if ((dev = udev_monitor_receive_device(mon)) == NULL) {
179 zed_log_msg(LOG_WARNING, "zed_udev_monitor: receive "
180 "device error %d", errno);
181 continue;
182 }
183
184 /* allow all steps to complete before a cancellation */
185 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
186
187 /*
4e33ba4c 188 * Strongly typed device is the preferred filter
d02ca379
DB
189 */
190 type = udev_device_get_property_value(dev, "ID_FS_TYPE");
191 if (type != NULL && type[0] != '\0') {
192 if (strcmp(type, "zfs_member") == 0) {
193 is_zfs = B_TRUE;
194 } else {
195 /* not ours, so skip */
196 zed_log_msg(LOG_INFO, "zed_udev_monitor: skip "
197 "%s (in use by %s)",
198 udev_device_get_devnode(dev), type);
199 udev_device_unref(dev);
200 continue;
201 }
202 }
203
204 /*
205 * if this is a disk and it is partitioned, then the
206 * zfs label will reside in a DEVTYPE=partition and
207 * we can skip passing this event
208 */
209 type = udev_device_get_property_value(dev, "DEVTYPE");
210 part = udev_device_get_property_value(dev,
211 "ID_PART_TABLE_TYPE");
212 if (type != NULL && type[0] != '\0' &&
213 strcmp(type, "disk") == 0 &&
214 part != NULL && part[0] != '\0') {
215 /* skip and wait for partition event */
d02ca379
DB
216 udev_device_unref(dev);
217 continue;
218 }
219
220 /*
221 * ignore small partitions
222 */
223 sectors = udev_device_get_property_value(dev,
224 "ID_PART_ENTRY_SIZE");
225 if (sectors == NULL)
226 sectors = udev_device_get_sysattr_value(dev, "size");
227 if (sectors != NULL &&
228 strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) {
229 udev_device_unref(dev);
230 continue;
231 }
232
233 /*
234 * If the blkid probe didn't find ZFS, then a persistent
235 * device id string is required in the message schema
236 * for matching with vdevs. Preflight here for expected
237 * udev information.
238 */
239 bus = udev_device_get_property_value(dev, "ID_BUS");
240 uuid = udev_device_get_property_value(dev, "DM_UUID");
241 if (!is_zfs && (bus == NULL && uuid == NULL)) {
242 zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid "
243 "source", udev_device_get_devnode(dev));
244 udev_device_unref(dev);
245 continue;
246 }
247
248 action = udev_device_get_action(dev);
249 if (strcmp(action, "add") == 0) {
250 class = EC_DEV_ADD;
251 subclass = ESC_DISK;
252 } else if (strcmp(action, "remove") == 0) {
253 class = EC_DEV_REMOVE;
254 subclass = ESC_DISK;
255 } else if (strcmp(action, "change") == 0) {
256 class = EC_DEV_STATUS;
257 subclass = ESC_DEV_DLE;
258 } else {
259 zed_log_msg(LOG_WARNING, "zed_udev_monitor: %s unknown",
260 action);
261 udev_device_unref(dev);
262 continue;
263 }
264
265 /*
266 * Special case an EC_DEV_ADD for multipath devices
267 *
268 * When a multipath device is created, udev reports the
269 * following:
270 *
271 * 1. "add" event of the dm device for the multipath device
272 * (like /dev/dm-3).
273 * 2. "change" event to create the actual multipath device
274 * symlink (like /dev/mapper/mpatha). The event also
275 * passes back the relevant DM vars we care about, like
276 * DM_UUID.
277 * 3. Another "change" event identical to #2 (that we ignore).
278 *
279 * To get the behavior we want, we treat the "change" event
280 * in #2 as a "add" event; as if "/dev/mapper/mpatha" was
281 * a new disk being added.
282 */
283 if (strcmp(class, EC_DEV_STATUS) == 0 &&
284 udev_device_get_property_value(dev, "DM_UUID") &&
285 udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) {
02730c33 286 tmp = (char *)udev_device_get_devnode(dev);
1bbd8770 287 tmp2 = zfs_get_underlying_path(tmp);
6078881a
TH
288 if (tmp && tmp2 && (strcmp(tmp, tmp2) != 0)) {
289 /*
290 * We have a real underlying device, which
291 * means that this multipath "change" event is
292 * an "add" event.
293 *
294 * If the multipath device and the underlying
295 * dev are the same name (i.e. /dev/dm-5), then
296 * there is no real underlying disk for this
297 * multipath device, and so this "change" event
976246fa 298 * really is a multipath removal.
6078881a
TH
299 */
300 class = EC_DEV_ADD;
301 subclass = ESC_DISK;
302 } else {
976246fa
DB
303 tmp = (char *)
304 udev_device_get_property_value(dev,
305 "DM_NR_VALID_PATHS");
306 /* treat as a multipath remove */
307 if (tmp != NULL && strcmp(tmp, "0") == 0) {
308 class = EC_DEV_REMOVE;
309 subclass = ESC_DISK;
310 }
6078881a
TH
311 }
312 free(tmp2);
d02ca379
DB
313 }
314
7a4500a1
SV
315 /*
316 * Special case an EC_DEV_ADD for scsi_debug devices
317 *
318 * These devices require a udevadm trigger command after
319 * creation in order to register the vdev_id scsidebug alias
320 * rule (adds a persistent path (phys_path) used for fault
321 * management automated tests in the ZFS test suite.
322 *
323 * After udevadm trigger command, event registers as a "change"
324 * event but needs to instead be handled as another "add" event
325 * to allow for disk labeling and partitioning to occur.
326 */
327 if (strcmp(class, EC_DEV_STATUS) == 0 &&
328 udev_device_get_property_value(dev, "ID_VDEV") &&
329 udev_device_get_property_value(dev, "ID_MODEL")) {
330 const char *id_model, *id_model_sd = "scsi_debug";
331
332 id_model = udev_device_get_property_value(dev,
333 "ID_MODEL");
334 if (strcmp(id_model, id_model_sd) == 0) {
335 class = EC_DEV_ADD;
336 subclass = ESC_DISK;
337 }
338 }
339
d02ca379
DB
340 if ((nvl = dev_event_nvlist(dev)) != NULL) {
341 zed_udev_event(class, subclass, nvl);
342 nvlist_free(nvl);
343 }
344
345 udev_device_unref(dev);
346 }
347
348 return (NULL);
349}
350
351int
352zed_disk_event_init()
353{
354 int fd, fflags;
355
356 if ((g_udev = udev_new()) == NULL) {
357 zed_log_msg(LOG_WARNING, "udev_new failed (%d)", errno);
358 return (-1);
359 }
360
361 /* Set up a udev monitor for block devices */
362 g_mon = udev_monitor_new_from_netlink(g_udev, "udev");
363 udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", "disk");
364 udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block",
365 "partition");
366 udev_monitor_enable_receiving(g_mon);
367
368 /* Make sure monitoring socket is blocking */
369 fd = udev_monitor_get_fd(g_mon);
370 if ((fflags = fcntl(fd, F_GETFL)) & O_NONBLOCK)
371 (void) fcntl(fd, F_SETFL, fflags & ~O_NONBLOCK);
372
373 /* spawn a thread to monitor events */
374 if (pthread_create(&g_mon_tid, NULL, zed_udev_monitor, g_mon) != 0) {
375 udev_monitor_unref(g_mon);
376 udev_unref(g_udev);
377 zed_log_msg(LOG_WARNING, "pthread_create failed");
378 return (-1);
379 }
380
381 zed_log_msg(LOG_INFO, "zed_disk_event_init");
382
383 return (0);
384}
385
386void
387zed_disk_event_fini()
388{
389 /* cancel monitor thread at recvmsg() */
390 (void) pthread_cancel(g_mon_tid);
391 (void) pthread_join(g_mon_tid, NULL);
392
393 /* cleanup udev resources */
394 udev_monitor_unref(g_mon);
395 udev_unref(g_udev);
396
397 zed_log_msg(LOG_INFO, "zed_disk_event_fini");
398}
399
400#else
401
402#include "zed_disk_event.h"
403
404int
405zed_disk_event_init()
406{
407 return (0);
408}
409
410void
411zed_disk_event_fini()
412{
413}
414
415#endif /* HAVE_LIBUDEV */