]> git.proxmox.com Git - systemd.git/blame - src/udev/udevd.c
Imported Upstream version 231
[systemd.git] / src / udev / udevd.c
CommitLineData
663996b3
MS
1/*
2 * Copyright (C) 2004-2012 Kay Sievers <kay@vrfy.org>
3 * Copyright (C) 2004 Chris Friesen <chris_friesen@sympatico.ca>
4 * Copyright (C) 2009 Canonical Ltd.
5 * Copyright (C) 2009 Scott James Remnant <scott@netsplit.com>
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
663996b3 21#include <errno.h>
6300502b
MP
22#include <fcntl.h>
23#include <getopt.h>
24#include <signal.h>
25#include <stdbool.h>
26#include <stddef.h>
663996b3
MS
27#include <stdio.h>
28#include <stdlib.h>
663996b3 29#include <string.h>
6300502b 30#include <sys/epoll.h>
60f067b4 31#include <sys/file.h>
6300502b
MP
32#include <sys/inotify.h>
33#include <sys/ioctl.h>
34#include <sys/mount.h>
663996b3 35#include <sys/prctl.h>
663996b3 36#include <sys/signalfd.h>
6300502b 37#include <sys/socket.h>
663996b3 38#include <sys/stat.h>
6300502b
MP
39#include <sys/time.h>
40#include <sys/wait.h>
41#include <unistd.h>
663996b3 42
663996b3 43#include "sd-daemon.h"
86f210e9
MP
44#include "sd-event.h"
45
db2df898 46#include "alloc-util.h"
663996b3 47#include "cgroup-util.h"
6300502b 48#include "cpu-set-util.h"
663996b3 49#include "dev-setup.h"
db2df898 50#include "fd-util.h"
663996b3 51#include "fileio.h"
e3bff60a 52#include "formats-util.h"
db2df898 53#include "fs-util.h"
e3bff60a 54#include "hashmap.h"
db2df898 55#include "io-util.h"
6300502b 56#include "netlink-util.h"
db2df898
MP
57#include "parse-util.h"
58#include "proc-cmdline.h"
6300502b
MP
59#include "process-util.h"
60#include "selinux-util.h"
61#include "signal-util.h"
db2df898
MP
62#include "socket-util.h"
63#include "string-util.h"
6300502b
MP
64#include "terminal-util.h"
65#include "udev-util.h"
66#include "udev.h"
db2df898 67#include "user-util.h"
663996b3 68
5eef597e
MP
69static bool arg_debug = false;
70static int arg_daemonize = false;
71static int arg_resolve_names = 1;
e3bff60a 72static unsigned arg_children_max;
5eef597e
MP
73static int arg_exec_delay;
74static usec_t arg_event_timeout_usec = 180 * USEC_PER_SEC;
75static usec_t arg_event_timeout_warn_usec = 180 * USEC_PER_SEC / 3;
e3bff60a
MP
76
77typedef struct Manager {
78 struct udev *udev;
86f210e9 79 sd_event *event;
e3bff60a
MP
80 Hashmap *workers;
81 struct udev_list_node events;
86f210e9 82 const char *cgroup;
e3bff60a 83 pid_t pid; /* the process that originally allocated the manager object */
e3bff60a
MP
84
85 struct udev_rules *rules;
86 struct udev_list properties;
87
88 struct udev_monitor *monitor;
89 struct udev_ctrl *ctrl;
90 struct udev_ctrl_connection *ctrl_conn_blocking;
e3bff60a 91 int fd_inotify;
e3bff60a
MP
92 int worker_watch[2];
93
86f210e9
MP
94 sd_event_source *ctrl_event;
95 sd_event_source *uevent_event;
96 sd_event_source *inotify_event;
97
98 usec_t last_usec;
99
e3bff60a 100 bool stop_exec_queue:1;
e3bff60a
MP
101 bool exit:1;
102} Manager;
663996b3
MS
103
104enum event_state {
105 EVENT_UNDEF,
106 EVENT_QUEUED,
107 EVENT_RUNNING,
108};
109
110struct event {
111 struct udev_list_node node;
e3bff60a 112 Manager *manager;
663996b3
MS
113 struct udev *udev;
114 struct udev_device *dev;
e3bff60a
MP
115 struct udev_device *dev_kernel;
116 struct worker *worker;
663996b3 117 enum event_state state;
663996b3
MS
118 unsigned long long int delaying_seqnum;
119 unsigned long long int seqnum;
120 const char *devpath;
121 size_t devpath_len;
122 const char *devpath_old;
123 dev_t devnum;
124 int ifindex;
125 bool is_block;
86f210e9
MP
126 sd_event_source *timeout_warning;
127 sd_event_source *timeout;
663996b3
MS
128};
129
5eef597e 130static inline struct event *node_to_event(struct udev_list_node *node) {
663996b3
MS
131 return container_of(node, struct event, node);
132}
133
e3bff60a 134static void event_queue_cleanup(Manager *manager, enum event_state type);
663996b3
MS
135
136enum worker_state {
137 WORKER_UNDEF,
138 WORKER_RUNNING,
139 WORKER_IDLE,
140 WORKER_KILLED,
141};
142
143struct worker {
e3bff60a 144 Manager *manager;
663996b3 145 struct udev_list_node node;
663996b3
MS
146 int refcount;
147 pid_t pid;
148 struct udev_monitor *monitor;
149 enum worker_state state;
150 struct event *event;
663996b3
MS
151};
152
153/* passed from worker to main process */
154struct worker_message {
663996b3
MS
155};
156
e3bff60a
MP
157static void event_free(struct event *event) {
158 int r;
159
160 if (!event)
161 return;
663996b3 162
663996b3 163 udev_list_node_remove(&event->node);
663996b3 164 udev_device_unref(event->dev);
e3bff60a
MP
165 udev_device_unref(event->dev_kernel);
166
86f210e9
MP
167 sd_event_source_unref(event->timeout_warning);
168 sd_event_source_unref(event->timeout);
169
e3bff60a
MP
170 if (event->worker)
171 event->worker->event = NULL;
172
173 assert(event->manager);
174
175 if (udev_list_node_is_empty(&event->manager->events)) {
176 /* only clean up the queue from the process that created it */
177 if (event->manager->pid == getpid()) {
178 r = unlink("/run/udev/queue");
179 if (r < 0)
180 log_warning_errno(errno, "could not unlink /run/udev/queue: %m");
181 }
182 }
183
663996b3
MS
184 free(event);
185}
186
e3bff60a
MP
187static void worker_free(struct worker *worker) {
188 if (!worker)
189 return;
663996b3 190
e3bff60a
MP
191 assert(worker->manager);
192
db2df898 193 hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
663996b3 194 udev_monitor_unref(worker->monitor);
e3bff60a
MP
195 event_free(worker->event);
196
663996b3
MS
197 free(worker);
198}
199
e3bff60a
MP
200static void manager_workers_free(Manager *manager) {
201 struct worker *worker;
202 Iterator i;
203
204 assert(manager);
205
206 HASHMAP_FOREACH(worker, manager->workers, i)
207 worker_free(worker);
208
209 manager->workers = hashmap_free(manager->workers);
210}
211
212static int worker_new(struct worker **ret, Manager *manager, struct udev_monitor *worker_monitor, pid_t pid) {
213 _cleanup_free_ struct worker *worker = NULL;
214 int r;
215
216 assert(ret);
217 assert(manager);
218 assert(worker_monitor);
219 assert(pid > 1);
220
221 worker = new0(struct worker, 1);
222 if (!worker)
223 return -ENOMEM;
224
225 worker->refcount = 1;
226 worker->manager = manager;
227 /* close monitor, but keep address around */
228 udev_monitor_disconnect(worker_monitor);
229 worker->monitor = udev_monitor_ref(worker_monitor);
230 worker->pid = pid;
231
232 r = hashmap_ensure_allocated(&manager->workers, NULL);
233 if (r < 0)
234 return r;
235
db2df898 236 r = hashmap_put(manager->workers, PID_TO_PTR(pid), worker);
e3bff60a
MP
237 if (r < 0)
238 return r;
239
240 *ret = worker;
241 worker = NULL;
242
243 return 0;
244}
245
246static int on_event_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
247 struct event *event = userdata;
248
249 assert(event);
250 assert(event->worker);
251
252 kill_and_sigcont(event->worker->pid, SIGKILL);
253 event->worker->state = WORKER_KILLED;
254
255 log_error("seq %llu '%s' killed", udev_device_get_seqnum(event->dev), event->devpath);
256
257 return 1;
258}
259
260static int on_event_timeout_warning(sd_event_source *s, uint64_t usec, void *userdata) {
261 struct event *event = userdata;
262
263 assert(event);
264
265 log_warning("seq %llu '%s' is taking a long time", udev_device_get_seqnum(event->dev), event->devpath);
266
267 return 1;
268}
269
270static void worker_attach_event(struct worker *worker, struct event *event) {
86f210e9
MP
271 sd_event *e;
272 uint64_t usec;
86f210e9 273
e3bff60a 274 assert(worker);
86f210e9 275 assert(worker->manager);
e3bff60a
MP
276 assert(event);
277 assert(!event->worker);
278 assert(!worker->event);
279
280 worker->state = WORKER_RUNNING;
281 worker->event = event;
282 event->state = EVENT_RUNNING;
e3bff60a 283 event->worker = worker;
86f210e9
MP
284
285 e = worker->manager->event;
286
13d276d0 287 assert_se(sd_event_now(e, clock_boottime_or_monotonic(), &usec) >= 0);
86f210e9
MP
288
289 (void) sd_event_add_time(e, &event->timeout_warning, clock_boottime_or_monotonic(),
290 usec + arg_event_timeout_warn_usec, USEC_PER_SEC, on_event_timeout_warning, event);
291
292 (void) sd_event_add_time(e, &event->timeout, clock_boottime_or_monotonic(),
293 usec + arg_event_timeout_usec, USEC_PER_SEC, on_event_timeout, event);
e3bff60a
MP
294}
295
296static void manager_free(Manager *manager) {
297 if (!manager)
663996b3 298 return;
e3bff60a
MP
299
300 udev_builtin_exit(manager->udev);
301
86f210e9
MP
302 sd_event_source_unref(manager->ctrl_event);
303 sd_event_source_unref(manager->uevent_event);
304 sd_event_source_unref(manager->inotify_event);
305
e3bff60a 306 udev_unref(manager->udev);
86f210e9 307 sd_event_unref(manager->event);
e3bff60a
MP
308 manager_workers_free(manager);
309 event_queue_cleanup(manager, EVENT_UNDEF);
310
311 udev_monitor_unref(manager->monitor);
312 udev_ctrl_unref(manager->ctrl);
313 udev_ctrl_connection_unref(manager->ctrl_conn_blocking);
314
315 udev_list_cleanup(&manager->properties);
316 udev_rules_unref(manager->rules);
e3bff60a 317
e3bff60a
MP
318 safe_close(manager->fd_inotify);
319 safe_close_pair(manager->worker_watch);
320
321 free(manager);
663996b3
MS
322}
323
e3bff60a 324DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
663996b3 325
e3bff60a
MP
326static int worker_send_message(int fd) {
327 struct worker_message message = {};
663996b3 328
e3bff60a 329 return loop_write(fd, &message, sizeof(message), false);
663996b3
MS
330}
331
e3bff60a 332static void worker_spawn(Manager *manager, struct event *event) {
663996b3 333 struct udev *udev = event->udev;
e3bff60a 334 _cleanup_udev_monitor_unref_ struct udev_monitor *worker_monitor = NULL;
663996b3 335 pid_t pid;
fb183854 336 int r = 0;
663996b3
MS
337
338 /* listen for new events */
339 worker_monitor = udev_monitor_new_from_netlink(udev, NULL);
340 if (worker_monitor == NULL)
341 return;
342 /* allow the main daemon netlink address to send devices to the worker */
e3bff60a 343 udev_monitor_allow_unicast_sender(worker_monitor, manager->monitor);
fb183854
MP
344 r = udev_monitor_enable_receiving(worker_monitor);
345 if (r < 0)
346 log_error_errno(r, "worker: could not enable receiving of device: %m");
663996b3 347
663996b3
MS
348 pid = fork();
349 switch (pid) {
350 case 0: {
351 struct udev_device *dev = NULL;
4c89c718 352 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
663996b3 353 int fd_monitor;
e3bff60a 354 _cleanup_close_ int fd_signal = -1, fd_ep = -1;
86f210e9
MP
355 struct epoll_event ep_signal = { .events = EPOLLIN };
356 struct epoll_event ep_monitor = { .events = EPOLLIN };
663996b3 357 sigset_t mask;
663996b3
MS
358
359 /* take initial device from queue */
360 dev = event->dev;
361 event->dev = NULL;
362
86f210e9
MP
363 unsetenv("NOTIFY_SOCKET");
364
e3bff60a
MP
365 manager_workers_free(manager);
366 event_queue_cleanup(manager, EVENT_UNDEF);
86f210e9 367
e3bff60a 368 manager->monitor = udev_monitor_unref(manager->monitor);
86f210e9 369 manager->ctrl_conn_blocking = udev_ctrl_connection_unref(manager->ctrl_conn_blocking);
e3bff60a 370 manager->ctrl = udev_ctrl_unref(manager->ctrl);
e3bff60a 371 manager->worker_watch[READ_END] = safe_close(manager->worker_watch[READ_END]);
86f210e9
MP
372
373 manager->ctrl_event = sd_event_source_unref(manager->ctrl_event);
374 manager->uevent_event = sd_event_source_unref(manager->uevent_event);
375 manager->inotify_event = sd_event_source_unref(manager->inotify_event);
376
377 manager->event = sd_event_unref(manager->event);
663996b3
MS
378
379 sigfillset(&mask);
380 fd_signal = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
381 if (fd_signal < 0) {
e3bff60a 382 r = log_error_errno(errno, "error creating signalfd %m");
663996b3
MS
383 goto out;
384 }
86f210e9
MP
385 ep_signal.data.fd = fd_signal;
386
387 fd_monitor = udev_monitor_get_fd(worker_monitor);
388 ep_monitor.data.fd = fd_monitor;
663996b3
MS
389
390 fd_ep = epoll_create1(EPOLL_CLOEXEC);
391 if (fd_ep < 0) {
e3bff60a 392 r = log_error_errno(errno, "error creating epoll fd: %m");
663996b3
MS
393 goto out;
394 }
395
663996b3
MS
396 if (epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_signal, &ep_signal) < 0 ||
397 epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_monitor, &ep_monitor) < 0) {
e3bff60a 398 r = log_error_errno(errno, "fail to add fds to epoll: %m");
663996b3
MS
399 goto out;
400 }
401
aa27b158
MP
402 /* Request TERM signal if parent exits.
403 Ignore error, not much we can do in that case. */
404 (void) prctl(PR_SET_PDEATHSIG, SIGTERM);
663996b3 405
aa27b158 406 /* Reset OOM score, we only protect the main daemon. */
7035cd9e 407 write_string_file("/proc/self/oom_score_adj", "0", 0);
663996b3
MS
408
409 for (;;) {
410 struct udev_event *udev_event;
60f067b4 411 int fd_lock = -1;
663996b3 412
86f210e9
MP
413 assert(dev);
414
60f067b4 415 log_debug("seq %llu running", udev_device_get_seqnum(dev));
663996b3
MS
416 udev_event = udev_event_new(dev);
417 if (udev_event == NULL) {
e3bff60a 418 r = -ENOMEM;
663996b3
MS
419 goto out;
420 }
421
5eef597e
MP
422 if (arg_exec_delay > 0)
423 udev_event->exec_delay = arg_exec_delay;
663996b3 424
60f067b4 425 /*
5eef597e 426 * Take a shared lock on the device node; this establishes
60f067b4 427 * a concept of device "ownership" to serialize device
5eef597e 428 * access. External processes holding an exclusive lock will
60f067b4 429 * cause udev to skip the event handling; in the case udev
5eef597e 430 * acquired the lock, the external process can block until
60f067b4
JS
431 * udev has finished its event handling.
432 */
5eef597e
MP
433 if (!streq_ptr(udev_device_get_action(dev), "remove") &&
434 streq_ptr("block", udev_device_get_subsystem(dev)) &&
435 !startswith(udev_device_get_sysname(dev), "dm-") &&
436 !startswith(udev_device_get_sysname(dev), "md")) {
60f067b4
JS
437 struct udev_device *d = dev;
438
439 if (streq_ptr("partition", udev_device_get_devtype(d)))
440 d = udev_device_get_parent(d);
441
442 if (d) {
443 fd_lock = open(udev_device_get_devnode(d), O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
444 if (fd_lock >= 0 && flock(fd_lock, LOCK_SH|LOCK_NB) < 0) {
f47781d8 445 log_debug_errno(errno, "Unable to flock(%s), skipping event handling: %m", udev_device_get_devnode(d));
60f067b4
JS
446 fd_lock = safe_close(fd_lock);
447 goto skip;
448 }
449 }
450 }
451
5eef597e
MP
452 /* needed for renaming netifs */
453 udev_event->rtnl = rtnl;
454
663996b3 455 /* apply rules, create node, symlinks */
f47781d8
MP
456 udev_event_execute_rules(udev_event,
457 arg_event_timeout_usec, arg_event_timeout_warn_usec,
e3bff60a 458 &manager->properties,
86f210e9 459 manager->rules);
5eef597e 460
f47781d8 461 udev_event_execute_run(udev_event,
86f210e9 462 arg_event_timeout_usec, arg_event_timeout_warn_usec);
663996b3 463
f47781d8
MP
464 if (udev_event->rtnl)
465 /* in case rtnl was initialized */
86f210e9 466 rtnl = sd_netlink_ref(udev_event->rtnl);
663996b3
MS
467
468 /* apply/restore inotify watch */
60f067b4 469 if (udev_event->inotify_watch) {
663996b3
MS
470 udev_watch_begin(udev, dev);
471 udev_device_update_db(dev);
472 }
473
60f067b4
JS
474 safe_close(fd_lock);
475
663996b3
MS
476 /* send processed event back to libudev listeners */
477 udev_monitor_send_device(worker_monitor, NULL, dev);
478
60f067b4 479skip:
e3bff60a 480 log_debug("seq %llu processed", udev_device_get_seqnum(dev));
663996b3 481
e3bff60a
MP
482 /* send udevd the result of the event execution */
483 r = worker_send_message(manager->worker_watch[WRITE_END]);
484 if (r < 0)
485 log_error_errno(r, "failed to send result of seq %llu to main daemon: %m",
486 udev_device_get_seqnum(dev));
663996b3
MS
487
488 udev_device_unref(dev);
489 dev = NULL;
490
663996b3
MS
491 udev_event_unref(udev_event);
492
493 /* wait for more device messages from main udevd, or term signal */
494 while (dev == NULL) {
495 struct epoll_event ev[4];
496 int fdcount;
497 int i;
498
499 fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), -1);
500 if (fdcount < 0) {
501 if (errno == EINTR)
502 continue;
e3bff60a 503 r = log_error_errno(errno, "failed to poll: %m");
663996b3
MS
504 goto out;
505 }
506
507 for (i = 0; i < fdcount; i++) {
508 if (ev[i].data.fd == fd_monitor && ev[i].events & EPOLLIN) {
509 dev = udev_monitor_receive_device(worker_monitor);
510 break;
511 } else if (ev[i].data.fd == fd_signal && ev[i].events & EPOLLIN) {
512 struct signalfd_siginfo fdsi;
513 ssize_t size;
514
515 size = read(fd_signal, &fdsi, sizeof(struct signalfd_siginfo));
516 if (size != sizeof(struct signalfd_siginfo))
517 continue;
518 switch (fdsi.ssi_signo) {
519 case SIGTERM:
520 goto out;
521 }
522 }
523 }
524 }
525 }
526out:
527 udev_device_unref(dev);
e3bff60a 528 manager_free(manager);
663996b3 529 log_close();
e3bff60a 530 _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
663996b3
MS
531 }
532 case -1:
663996b3 533 event->state = EVENT_QUEUED;
f47781d8 534 log_error_errno(errno, "fork of child failed: %m");
663996b3
MS
535 break;
536 default:
e3bff60a
MP
537 {
538 struct worker *worker;
e3bff60a
MP
539
540 r = worker_new(&worker, manager, worker_monitor, pid);
541 if (r < 0)
542 return;
543
544 worker_attach_event(worker, event);
545
e735f4d4 546 log_debug("seq %llu forked new worker ["PID_FMT"]", udev_device_get_seqnum(event->dev), pid);
663996b3
MS
547 break;
548 }
e3bff60a 549 }
663996b3
MS
550}
551
e3bff60a
MP
552static void event_run(Manager *manager, struct event *event) {
553 struct worker *worker;
554 Iterator i;
555
556 assert(manager);
557 assert(event);
663996b3 558
e3bff60a 559 HASHMAP_FOREACH(worker, manager->workers, i) {
663996b3
MS
560 ssize_t count;
561
562 if (worker->state != WORKER_IDLE)
563 continue;
564
e3bff60a 565 count = udev_monitor_send_device(manager->monitor, worker->monitor, event->dev);
663996b3 566 if (count < 0) {
e735f4d4
MP
567 log_error_errno(errno, "worker ["PID_FMT"] did not accept message %zi (%m), kill it",
568 worker->pid, count);
663996b3
MS
569 kill(worker->pid, SIGKILL);
570 worker->state = WORKER_KILLED;
571 continue;
572 }
e3bff60a 573 worker_attach_event(worker, event);
663996b3
MS
574 return;
575 }
576
e3bff60a 577 if (hashmap_size(manager->workers) >= arg_children_max) {
5eef597e 578 if (arg_children_max > 1)
e3bff60a 579 log_debug("maximum number (%i) of children reached", hashmap_size(manager->workers));
663996b3
MS
580 return;
581 }
582
583 /* start new worker and pass initial device */
e3bff60a 584 worker_spawn(manager, event);
663996b3
MS
585}
586
e3bff60a 587static int event_queue_insert(Manager *manager, struct udev_device *dev) {
663996b3 588 struct event *event;
e3bff60a
MP
589 int r;
590
591 assert(manager);
592 assert(dev);
593
86f210e9
MP
594 /* only one process can add events to the queue */
595 if (manager->pid == 0)
596 manager->pid = getpid();
597
e3bff60a 598 assert(manager->pid == getpid());
663996b3 599
60f067b4 600 event = new0(struct event, 1);
e3bff60a
MP
601 if (!event)
602 return -ENOMEM;
663996b3
MS
603
604 event->udev = udev_device_get_udev(dev);
e3bff60a 605 event->manager = manager;
663996b3 606 event->dev = dev;
e3bff60a
MP
607 event->dev_kernel = udev_device_shallow_clone(dev);
608 udev_device_copy_properties(event->dev_kernel, dev);
663996b3
MS
609 event->seqnum = udev_device_get_seqnum(dev);
610 event->devpath = udev_device_get_devpath(dev);
611 event->devpath_len = strlen(event->devpath);
612 event->devpath_old = udev_device_get_devpath_old(dev);
613 event->devnum = udev_device_get_devnum(dev);
614 event->is_block = streq("block", udev_device_get_subsystem(dev));
615 event->ifindex = udev_device_get_ifindex(dev);
663996b3 616
60f067b4 617 log_debug("seq %llu queued, '%s' '%s'", udev_device_get_seqnum(dev),
663996b3
MS
618 udev_device_get_action(dev), udev_device_get_subsystem(dev));
619
620 event->state = EVENT_QUEUED;
e3bff60a
MP
621
622 if (udev_list_node_is_empty(&manager->events)) {
623 r = touch("/run/udev/queue");
624 if (r < 0)
625 log_warning_errno(r, "could not touch /run/udev/queue: %m");
626 }
627
628 udev_list_node_append(&event->node, &manager->events);
629
663996b3
MS
630 return 0;
631}
632
e3bff60a
MP
633static void manager_kill_workers(Manager *manager) {
634 struct worker *worker;
635 Iterator i;
663996b3 636
e3bff60a 637 assert(manager);
663996b3 638
e3bff60a 639 HASHMAP_FOREACH(worker, manager->workers, i) {
663996b3
MS
640 if (worker->state == WORKER_KILLED)
641 continue;
642
643 worker->state = WORKER_KILLED;
644 kill(worker->pid, SIGTERM);
645 }
646}
647
648/* lookup event for identical, parent, child device */
e3bff60a 649static bool is_devpath_busy(Manager *manager, struct event *event) {
663996b3
MS
650 struct udev_list_node *loop;
651 size_t common;
652
653 /* check if queue contains events we depend on */
e3bff60a 654 udev_list_node_foreach(loop, &manager->events) {
663996b3
MS
655 struct event *loop_event = node_to_event(loop);
656
657 /* we already found a later event, earlier can not block us, no need to check again */
658 if (loop_event->seqnum < event->delaying_seqnum)
659 continue;
660
661 /* event we checked earlier still exists, no need to check again */
662 if (loop_event->seqnum == event->delaying_seqnum)
663 return true;
664
665 /* found ourself, no later event can block us */
666 if (loop_event->seqnum >= event->seqnum)
667 break;
668
669 /* check major/minor */
670 if (major(event->devnum) != 0 && event->devnum == loop_event->devnum && event->is_block == loop_event->is_block)
671 return true;
672
673 /* check network device ifindex */
674 if (event->ifindex != 0 && event->ifindex == loop_event->ifindex)
675 return true;
676
677 /* check our old name */
678 if (event->devpath_old != NULL && streq(loop_event->devpath, event->devpath_old)) {
679 event->delaying_seqnum = loop_event->seqnum;
680 return true;
681 }
682
683 /* compare devpath */
684 common = MIN(loop_event->devpath_len, event->devpath_len);
685
686 /* one devpath is contained in the other? */
687 if (memcmp(loop_event->devpath, event->devpath, common) != 0)
688 continue;
689
690 /* identical device event found */
691 if (loop_event->devpath_len == event->devpath_len) {
692 /* devices names might have changed/swapped in the meantime */
693 if (major(event->devnum) != 0 && (event->devnum != loop_event->devnum || event->is_block != loop_event->is_block))
694 continue;
695 if (event->ifindex != 0 && event->ifindex != loop_event->ifindex)
696 continue;
697 event->delaying_seqnum = loop_event->seqnum;
698 return true;
699 }
700
663996b3
MS
701 /* parent device event found */
702 if (event->devpath[common] == '/') {
703 event->delaying_seqnum = loop_event->seqnum;
704 return true;
705 }
706
707 /* child device event found */
708 if (loop_event->devpath[common] == '/') {
709 event->delaying_seqnum = loop_event->seqnum;
710 return true;
711 }
712
713 /* no matching device */
714 continue;
715 }
716
717 return false;
718}
719
86f210e9
MP
720static int on_exit_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
721 Manager *manager = userdata;
722
723 assert(manager);
724
725 log_error_errno(ETIMEDOUT, "giving up waiting for workers to finish");
726
727 sd_event_exit(manager->event, -ETIMEDOUT);
728
729 return 1;
730}
731
732static void manager_exit(Manager *manager) {
733 uint64_t usec;
734 int r;
735
736 assert(manager);
737
738 manager->exit = true;
739
740 sd_notify(false,
741 "STOPPING=1\n"
742 "STATUS=Starting shutdown...");
743
744 /* close sources of new events and discard buffered events */
745 manager->ctrl_event = sd_event_source_unref(manager->ctrl_event);
746 manager->ctrl = udev_ctrl_unref(manager->ctrl);
747
748 manager->inotify_event = sd_event_source_unref(manager->inotify_event);
749 manager->fd_inotify = safe_close(manager->fd_inotify);
750
751 manager->uevent_event = sd_event_source_unref(manager->uevent_event);
752 manager->monitor = udev_monitor_unref(manager->monitor);
753
754 /* discard queued events and kill workers */
755 event_queue_cleanup(manager, EVENT_QUEUED);
756 manager_kill_workers(manager);
757
13d276d0 758 assert_se(sd_event_now(manager->event, clock_boottime_or_monotonic(), &usec) >= 0);
86f210e9
MP
759
760 r = sd_event_add_time(manager->event, NULL, clock_boottime_or_monotonic(),
761 usec + 30 * USEC_PER_SEC, USEC_PER_SEC, on_exit_timeout, manager);
762 if (r < 0)
763 return;
764}
765
766/* reload requested, HUP signal received, rules changed, builtin changed */
767static void manager_reload(Manager *manager) {
768
769 assert(manager);
770
771 sd_notify(false,
772 "RELOADING=1\n"
773 "STATUS=Flushing configuration...");
774
775 manager_kill_workers(manager);
776 manager->rules = udev_rules_unref(manager->rules);
777 udev_builtin_exit(manager->udev);
778
779 sd_notify(false,
780 "READY=1\n"
781 "STATUS=Processing...");
782}
783
e3bff60a 784static void event_queue_start(Manager *manager) {
663996b3 785 struct udev_list_node *loop;
86f210e9 786 usec_t usec;
663996b3 787
e3bff60a
MP
788 assert(manager);
789
86f210e9
MP
790 if (udev_list_node_is_empty(&manager->events) ||
791 manager->exit || manager->stop_exec_queue)
792 return;
793
13d276d0
MP
794 assert_se(sd_event_now(manager->event, clock_boottime_or_monotonic(), &usec) >= 0);
795 /* check for changed config, every 3 seconds at most */
796 if (manager->last_usec == 0 ||
797 (usec - manager->last_usec) > 3 * USEC_PER_SEC) {
798 if (udev_rules_check_timestamp(manager->rules) ||
799 udev_builtin_validate(manager->udev))
800 manager_reload(manager);
86f210e9 801
13d276d0 802 manager->last_usec = usec;
86f210e9
MP
803 }
804
805 udev_builtin_init(manager->udev);
806
807 if (!manager->rules) {
808 manager->rules = udev_rules_new(manager->udev, arg_resolve_names);
809 if (!manager->rules)
810 return;
811 }
812
e3bff60a 813 udev_list_node_foreach(loop, &manager->events) {
663996b3
MS
814 struct event *event = node_to_event(loop);
815
816 if (event->state != EVENT_QUEUED)
817 continue;
818
819 /* do not start event if parent or child event is still running */
e3bff60a 820 if (is_devpath_busy(manager, event))
663996b3
MS
821 continue;
822
e3bff60a 823 event_run(manager, event);
663996b3
MS
824 }
825}
826
e3bff60a 827static void event_queue_cleanup(Manager *manager, enum event_state match_type) {
663996b3
MS
828 struct udev_list_node *loop, *tmp;
829
e3bff60a 830 udev_list_node_foreach_safe(loop, tmp, &manager->events) {
663996b3
MS
831 struct event *event = node_to_event(loop);
832
833 if (match_type != EVENT_UNDEF && match_type != event->state)
834 continue;
835
e3bff60a 836 event_free(event);
663996b3
MS
837 }
838}
839
e3bff60a
MP
840static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
841 Manager *manager = userdata;
842
843 assert(manager);
844
663996b3
MS
845 for (;;) {
846 struct worker_message msg;
e3bff60a
MP
847 struct iovec iovec = {
848 .iov_base = &msg,
849 .iov_len = sizeof(msg),
850 };
851 union {
852 struct cmsghdr cmsghdr;
853 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
854 } control = {};
855 struct msghdr msghdr = {
856 .msg_iov = &iovec,
857 .msg_iovlen = 1,
858 .msg_control = &control,
859 .msg_controllen = sizeof(control),
860 };
861 struct cmsghdr *cmsg;
663996b3 862 ssize_t size;
e3bff60a
MP
863 struct ucred *ucred = NULL;
864 struct worker *worker;
663996b3 865
e3bff60a
MP
866 size = recvmsg(fd, &msghdr, MSG_DONTWAIT);
867 if (size < 0) {
868 if (errno == EINTR)
869 continue;
870 else if (errno == EAGAIN)
871 /* nothing more to read */
872 break;
663996b3 873
e3bff60a
MP
874 return log_error_errno(errno, "failed to receive message: %m");
875 } else if (size != sizeof(struct worker_message)) {
876 log_warning_errno(EIO, "ignoring worker message with invalid size %zi bytes", size);
877 continue;
878 }
663996b3 879
86f210e9 880 CMSG_FOREACH(cmsg, &msghdr) {
e3bff60a
MP
881 if (cmsg->cmsg_level == SOL_SOCKET &&
882 cmsg->cmsg_type == SCM_CREDENTIALS &&
883 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
884 ucred = (struct ucred*) CMSG_DATA(cmsg);
885 }
663996b3 886
e3bff60a
MP
887 if (!ucred || ucred->pid <= 0) {
888 log_warning_errno(EIO, "ignoring worker message without valid PID");
889 continue;
663996b3 890 }
e3bff60a
MP
891
892 /* lookup worker who sent the signal */
db2df898 893 worker = hashmap_get(manager->workers, PID_TO_PTR(ucred->pid));
e3bff60a
MP
894 if (!worker) {
895 log_debug("worker ["PID_FMT"] returned, but is no longer tracked", ucred->pid);
896 continue;
897 }
898
899 if (worker->state != WORKER_KILLED)
900 worker->state = WORKER_IDLE;
901
902 /* worker returned */
903 event_free(worker->event);
663996b3 904 }
e3bff60a 905
86f210e9
MP
906 /* we have free workers, try to schedule events */
907 event_queue_start(manager);
908
e3bff60a
MP
909 return 1;
910}
911
912static int on_uevent(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
913 Manager *manager = userdata;
914 struct udev_device *dev;
915 int r;
916
917 assert(manager);
918
919 dev = udev_monitor_receive_device(manager->monitor);
920 if (dev) {
921 udev_device_ensure_usec_initialized(dev, NULL);
922 r = event_queue_insert(manager, dev);
923 if (r < 0)
924 udev_device_unref(dev);
86f210e9
MP
925 else
926 /* we have fresh events, try to schedule them */
927 event_queue_start(manager);
e3bff60a
MP
928 }
929
930 return 1;
663996b3
MS
931}
932
933/* receive the udevd message from userspace */
e3bff60a
MP
934static int on_ctrl_msg(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
935 Manager *manager = userdata;
936 _cleanup_udev_ctrl_connection_unref_ struct udev_ctrl_connection *ctrl_conn = NULL;
937 _cleanup_udev_ctrl_msg_unref_ struct udev_ctrl_msg *ctrl_msg = NULL;
663996b3
MS
938 const char *str;
939 int i;
940
e3bff60a
MP
941 assert(manager);
942
943 ctrl_conn = udev_ctrl_get_connection(manager->ctrl);
944 if (!ctrl_conn)
945 return 1;
663996b3
MS
946
947 ctrl_msg = udev_ctrl_receive_msg(ctrl_conn);
e3bff60a
MP
948 if (!ctrl_msg)
949 return 1;
663996b3
MS
950
951 i = udev_ctrl_get_set_log_level(ctrl_msg);
952 if (i >= 0) {
60f067b4 953 log_debug("udevd message (SET_LOG_LEVEL) received, log_priority=%i", i);
663996b3 954 log_set_max_level(i);
e3bff60a 955 manager_kill_workers(manager);
663996b3
MS
956 }
957
958 if (udev_ctrl_get_stop_exec_queue(ctrl_msg) > 0) {
60f067b4 959 log_debug("udevd message (STOP_EXEC_QUEUE) received");
e3bff60a 960 manager->stop_exec_queue = true;
663996b3
MS
961 }
962
963 if (udev_ctrl_get_start_exec_queue(ctrl_msg) > 0) {
60f067b4 964 log_debug("udevd message (START_EXEC_QUEUE) received");
e3bff60a 965 manager->stop_exec_queue = false;
86f210e9 966 event_queue_start(manager);
663996b3
MS
967 }
968
969 if (udev_ctrl_get_reload(ctrl_msg) > 0) {
60f067b4 970 log_debug("udevd message (RELOAD) received");
86f210e9 971 manager_reload(manager);
663996b3
MS
972 }
973
974 str = udev_ctrl_get_set_env(ctrl_msg);
975 if (str != NULL) {
e3bff60a 976 _cleanup_free_ char *key = NULL;
663996b3
MS
977
978 key = strdup(str);
e3bff60a 979 if (key) {
663996b3
MS
980 char *val;
981
982 val = strchr(key, '=');
983 if (val != NULL) {
984 val[0] = '\0';
985 val = &val[1];
986 if (val[0] == '\0') {
60f067b4 987 log_debug("udevd message (ENV) received, unset '%s'", key);
e3bff60a 988 udev_list_entry_add(&manager->properties, key, NULL);
663996b3 989 } else {
60f067b4 990 log_debug("udevd message (ENV) received, set '%s=%s'", key, val);
e3bff60a 991 udev_list_entry_add(&manager->properties, key, val);
663996b3 992 }
e3bff60a 993 } else
60f067b4 994 log_error("wrong key format '%s'", key);
663996b3 995 }
e3bff60a 996 manager_kill_workers(manager);
663996b3
MS
997 }
998
999 i = udev_ctrl_get_set_children_max(ctrl_msg);
1000 if (i >= 0) {
60f067b4 1001 log_debug("udevd message (SET_MAX_CHILDREN) received, children_max=%i", i);
5eef597e 1002 arg_children_max = i;
663996b3
MS
1003 }
1004
1005 if (udev_ctrl_get_ping(ctrl_msg) > 0)
60f067b4 1006 log_debug("udevd message (SYNC) received");
663996b3
MS
1007
1008 if (udev_ctrl_get_exit(ctrl_msg) > 0) {
60f067b4 1009 log_debug("udevd message (EXIT) received");
86f210e9 1010 manager_exit(manager);
e3bff60a
MP
1011 /* keep reference to block the client until we exit
1012 TODO: deal with several blocking exit requests */
1013 manager->ctrl_conn_blocking = udev_ctrl_connection_ref(ctrl_conn);
663996b3 1014 }
e3bff60a
MP
1015
1016 return 1;
663996b3
MS
1017}
1018
60f067b4
JS
1019static int synthesize_change(struct udev_device *dev) {
1020 char filename[UTIL_PATH_SIZE];
1021 int r;
1022
1023 if (streq_ptr("block", udev_device_get_subsystem(dev)) &&
1024 streq_ptr("disk", udev_device_get_devtype(dev)) &&
1025 !startswith(udev_device_get_sysname(dev), "dm-")) {
1026 bool part_table_read = false;
1027 bool has_partitions = false;
1028 int fd;
1029 struct udev *udev = udev_device_get_udev(dev);
1030 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
1031 struct udev_list_entry *item;
1032
1033 /*
1034 * Try to re-read the partition table. This only succeeds if
1035 * none of the devices is busy. The kernel returns 0 if no
1036 * partition table is found, and we will not get an event for
1037 * the disk.
1038 */
1039 fd = open(udev_device_get_devnode(dev), O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
1040 if (fd >= 0) {
1041 r = flock(fd, LOCK_EX|LOCK_NB);
1042 if (r >= 0)
1043 r = ioctl(fd, BLKRRPART, 0);
1044
1045 close(fd);
1046 if (r >= 0)
1047 part_table_read = true;
1048 }
1049
1050 /* search for partitions */
1051 e = udev_enumerate_new(udev);
1052 if (!e)
1053 return -ENOMEM;
1054
1055 r = udev_enumerate_add_match_parent(e, dev);
1056 if (r < 0)
1057 return r;
1058
1059 r = udev_enumerate_add_match_subsystem(e, "block");
1060 if (r < 0)
1061 return r;
1062
1063 r = udev_enumerate_scan_devices(e);
1064 if (r < 0)
1065 return r;
1066
1067 udev_list_entry_foreach(item, udev_enumerate_get_list_entry(e)) {
1068 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1069
1070 d = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
1071 if (!d)
1072 continue;
1073
1074 if (!streq_ptr("partition", udev_device_get_devtype(d)))
1075 continue;
1076
1077 has_partitions = true;
1078 break;
1079 }
1080
1081 /*
1082 * We have partitions and re-read the table, the kernel already sent
1083 * out a "change" event for the disk, and "remove/add" for all
1084 * partitions.
1085 */
1086 if (part_table_read && has_partitions)
1087 return 0;
1088
1089 /*
1090 * We have partitions but re-reading the partition table did not
1091 * work, synthesize "change" for the disk and all partitions.
1092 */
1093 log_debug("device %s closed, synthesising 'change'", udev_device_get_devnode(dev));
1094 strscpyl(filename, sizeof(filename), udev_device_get_syspath(dev), "/uevent", NULL);
7035cd9e 1095 write_string_file(filename, "change", WRITE_STRING_FILE_CREATE);
60f067b4
JS
1096
1097 udev_list_entry_foreach(item, udev_enumerate_get_list_entry(e)) {
1098 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1099
1100 d = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
1101 if (!d)
1102 continue;
1103
1104 if (!streq_ptr("partition", udev_device_get_devtype(d)))
1105 continue;
1106
1107 log_debug("device %s closed, synthesising partition '%s' 'change'",
1108 udev_device_get_devnode(dev), udev_device_get_devnode(d));
1109 strscpyl(filename, sizeof(filename), udev_device_get_syspath(d), "/uevent", NULL);
7035cd9e 1110 write_string_file(filename, "change", WRITE_STRING_FILE_CREATE);
60f067b4
JS
1111 }
1112
1113 return 0;
1114 }
1115
1116 log_debug("device %s closed, synthesising 'change'", udev_device_get_devnode(dev));
1117 strscpyl(filename, sizeof(filename), udev_device_get_syspath(dev), "/uevent", NULL);
7035cd9e 1118 write_string_file(filename, "change", WRITE_STRING_FILE_CREATE);
60f067b4
JS
1119
1120 return 0;
1121}
1122
e3bff60a
MP
1123static int on_inotify(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1124 Manager *manager = userdata;
e735f4d4 1125 union inotify_event_buffer buffer;
f47781d8
MP
1126 struct inotify_event *e;
1127 ssize_t l;
663996b3 1128
e3bff60a
MP
1129 assert(manager);
1130
1131 l = read(fd, &buffer, sizeof(buffer));
f47781d8
MP
1132 if (l < 0) {
1133 if (errno == EAGAIN || errno == EINTR)
e3bff60a 1134 return 1;
663996b3 1135
f47781d8 1136 return log_error_errno(errno, "Failed to read inotify fd: %m");
663996b3
MS
1137 }
1138
f47781d8 1139 FOREACH_INOTIFY_EVENT(e, buffer, l) {
e3bff60a 1140 _cleanup_udev_device_unref_ struct udev_device *dev = NULL;
663996b3 1141
e3bff60a 1142 dev = udev_watch_lookup(manager->udev, e->wd);
60f067b4
JS
1143 if (!dev)
1144 continue;
663996b3 1145
f47781d8 1146 log_debug("inotify event: %x for %s", e->mask, udev_device_get_devnode(dev));
e3bff60a 1147 if (e->mask & IN_CLOSE_WRITE) {
60f067b4 1148 synthesize_change(dev);
663996b3 1149
e3bff60a
MP
1150 /* settle might be waiting on us to determine the queue
1151 * state. If we just handled an inotify event, we might have
1152 * generated a "change" event, but we won't have queued up
1153 * the resultant uevent yet. Do that.
1154 */
1155 on_uevent(NULL, -1, 0, manager);
1156 } else if (e->mask & IN_IGNORED)
1157 udev_watch_end(manager->udev, dev);
663996b3
MS
1158 }
1159
e3bff60a 1160 return 1;
663996b3
MS
1161}
1162
e3bff60a
MP
1163static int on_sigterm(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1164 Manager *manager = userdata;
663996b3 1165
e3bff60a 1166 assert(manager);
663996b3 1167
86f210e9 1168 manager_exit(manager);
663996b3 1169
e3bff60a
MP
1170 return 1;
1171}
663996b3 1172
e3bff60a
MP
1173static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1174 Manager *manager = userdata;
60f067b4 1175
e3bff60a
MP
1176 assert(manager);
1177
86f210e9 1178 manager_reload(manager);
e3bff60a
MP
1179
1180 return 1;
1181}
1182
1183static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1184 Manager *manager = userdata;
1185
1186 assert(manager);
1187
1188 for (;;) {
1189 pid_t pid;
1190 int status;
1191 struct worker *worker;
1192
1193 pid = waitpid(-1, &status, WNOHANG);
1194 if (pid <= 0)
1195 break;
1196
db2df898 1197 worker = hashmap_get(manager->workers, PID_TO_PTR(pid));
e3bff60a
MP
1198 if (!worker) {
1199 log_warning("worker ["PID_FMT"] is unknown, ignoring", pid);
1200 continue;
1201 }
1202
1203 if (WIFEXITED(status)) {
1204 if (WEXITSTATUS(status) == 0)
1205 log_debug("worker ["PID_FMT"] exited", pid);
1206 else
1207 log_warning("worker ["PID_FMT"] exited with return code %i", pid, WEXITSTATUS(status));
1208 } else if (WIFSIGNALED(status)) {
1209 log_warning("worker ["PID_FMT"] terminated by signal %i (%s)", pid, WTERMSIG(status), strsignal(WTERMSIG(status)));
1210 } else if (WIFSTOPPED(status)) {
1211 log_info("worker ["PID_FMT"] stopped", pid);
1212 continue;
1213 } else if (WIFCONTINUED(status)) {
1214 log_info("worker ["PID_FMT"] continued", pid);
1215 continue;
1216 } else
1217 log_warning("worker ["PID_FMT"] exit with status 0x%04x", pid, status);
1218
1219 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
1220 if (worker->event) {
1221 log_error("worker ["PID_FMT"] failed while handling '%s'", pid, worker->event->devpath);
1222 /* delete state from disk */
1223 udev_device_delete_db(worker->event->dev);
1224 udev_device_tag_index(worker->event->dev, NULL, false);
1225 /* forward kernel event without amending it */
1226 udev_monitor_send_device(manager->monitor, NULL, worker->event->dev_kernel);
663996b3
MS
1227 }
1228 }
e3bff60a
MP
1229
1230 worker_free(worker);
663996b3 1231 }
e3bff60a 1232
86f210e9
MP
1233 /* we can start new workers, try to schedule events */
1234 event_queue_start(manager);
1235
1236 return 1;
1237}
1238
1239static int on_post(sd_event_source *s, void *userdata) {
1240 Manager *manager = userdata;
1241 int r;
1242
1243 assert(manager);
1244
1245 if (udev_list_node_is_empty(&manager->events)) {
1246 /* no pending events */
1247 if (!hashmap_isempty(manager->workers)) {
1248 /* there are idle workers */
1249 log_debug("cleanup idle workers");
1250 manager_kill_workers(manager);
1251 } else {
1252 /* we are idle */
1253 if (manager->exit) {
1254 r = sd_event_exit(manager->event, 0);
1255 if (r < 0)
1256 return r;
1257 } else if (manager->cgroup)
1258 /* cleanup possible left-over processes in our cgroup */
5a920b42 1259 cg_kill(SYSTEMD_CGROUP_CONTROLLER, manager->cgroup, SIGKILL, CGROUP_IGNORE_SELF, NULL, NULL, NULL);
86f210e9
MP
1260 }
1261 }
1262
e3bff60a 1263 return 1;
663996b3
MS
1264}
1265
86f210e9
MP
1266static int listen_fds(int *rctrl, int *rnetlink) {
1267 _cleanup_udev_unref_ struct udev *udev = NULL;
1268 int ctrl_fd = -1, netlink_fd = -1;
1269 int fd, n, r;
1270
1271 assert(rctrl);
1272 assert(rnetlink);
663996b3
MS
1273
1274 n = sd_listen_fds(true);
86f210e9
MP
1275 if (n < 0)
1276 return n;
663996b3
MS
1277
1278 for (fd = SD_LISTEN_FDS_START; fd < n + SD_LISTEN_FDS_START; fd++) {
1279 if (sd_is_socket(fd, AF_LOCAL, SOCK_SEQPACKET, -1)) {
86f210e9
MP
1280 if (ctrl_fd >= 0)
1281 return -EINVAL;
1282 ctrl_fd = fd;
663996b3
MS
1283 continue;
1284 }
1285
1286 if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1)) {
86f210e9
MP
1287 if (netlink_fd >= 0)
1288 return -EINVAL;
1289 netlink_fd = fd;
663996b3
MS
1290 continue;
1291 }
1292
86f210e9
MP
1293 return -EINVAL;
1294 }
1295
1296 if (ctrl_fd < 0) {
1297 _cleanup_udev_ctrl_unref_ struct udev_ctrl *ctrl = NULL;
1298
1299 udev = udev_new();
1300 if (!udev)
1301 return -ENOMEM;
1302
1303 ctrl = udev_ctrl_new(udev);
1304 if (!ctrl)
1305 return log_error_errno(EINVAL, "error initializing udev control socket");
1306
1307 r = udev_ctrl_enable_receiving(ctrl);
1308 if (r < 0)
1309 return log_error_errno(EINVAL, "error binding udev control socket");
1310
1311 fd = udev_ctrl_get_fd(ctrl);
1312 if (fd < 0)
1313 return log_error_errno(EIO, "could not get ctrl fd");
1314
1315 ctrl_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
1316 if (ctrl_fd < 0)
1317 return log_error_errno(errno, "could not dup ctrl fd: %m");
1318 }
1319
1320 if (netlink_fd < 0) {
1321 _cleanup_udev_monitor_unref_ struct udev_monitor *monitor = NULL;
1322
1323 if (!udev) {
1324 udev = udev_new();
1325 if (!udev)
1326 return -ENOMEM;
1327 }
1328
1329 monitor = udev_monitor_new_from_netlink(udev, "kernel");
1330 if (!monitor)
1331 return log_error_errno(EINVAL, "error initializing netlink socket");
1332
1333 (void) udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024);
1334
1335 r = udev_monitor_enable_receiving(monitor);
1336 if (r < 0)
1337 return log_error_errno(EINVAL, "error binding netlink socket");
1338
1339 fd = udev_monitor_get_fd(monitor);
1340 if (fd < 0)
1341 return log_error_errno(netlink_fd, "could not get uevent fd: %m");
1342
1343 netlink_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
1344 if (ctrl_fd < 0)
1345 return log_error_errno(errno, "could not dup netlink fd: %m");
663996b3
MS
1346 }
1347
86f210e9
MP
1348 *rctrl = ctrl_fd;
1349 *rnetlink = netlink_fd;
663996b3 1350
663996b3
MS
1351 return 0;
1352}
1353
1354/*
f47781d8 1355 * read the kernel command line, in case we need to get into debug mode
e3bff60a
MP
1356 * udev.log-priority=<level> syslog priority
1357 * udev.children-max=<number of workers> events are fully serialized if set to 1
1358 * udev.exec-delay=<number of seconds> delay execution of every executed program
1359 * udev.event-timeout=<number of seconds> seconds to wait before terminating an event
663996b3 1360 */
e3bff60a 1361static int parse_proc_cmdline_item(const char *key, const char *value) {
7035cd9e 1362 const char *full_key = key;
60f067b4 1363 int r;
663996b3 1364
e3bff60a 1365 assert(key);
663996b3 1366
e3bff60a
MP
1367 if (!value)
1368 return 0;
663996b3 1369
e3bff60a
MP
1370 if (startswith(key, "rd."))
1371 key += strlen("rd.");
663996b3 1372
e3bff60a
MP
1373 if (startswith(key, "udev."))
1374 key += strlen("udev.");
1375 else
1376 return 0;
663996b3 1377
e3bff60a
MP
1378 if (streq(key, "log-priority")) {
1379 int prio;
663996b3 1380
e3bff60a 1381 prio = util_log_priority(value);
7035cd9e
MP
1382 if (prio < 0)
1383 goto invalid;
e3bff60a
MP
1384 log_set_max_level(prio);
1385 } else if (streq(key, "children-max")) {
1386 r = safe_atou(value, &arg_children_max);
1387 if (r < 0)
7035cd9e 1388 goto invalid;
e3bff60a
MP
1389 } else if (streq(key, "exec-delay")) {
1390 r = safe_atoi(value, &arg_exec_delay);
1391 if (r < 0)
7035cd9e 1392 goto invalid;
e3bff60a
MP
1393 } else if (streq(key, "event-timeout")) {
1394 r = safe_atou64(value, &arg_event_timeout_usec);
1395 if (r < 0)
7035cd9e
MP
1396 goto invalid;
1397 arg_event_timeout_usec *= USEC_PER_SEC;
1398 arg_event_timeout_warn_usec = (arg_event_timeout_usec / 3) ? : 1;
663996b3 1399 }
e3bff60a 1400
7035cd9e
MP
1401 return 0;
1402invalid:
1403 log_warning("invalid %s ignored: %s", full_key, value);
e3bff60a 1404 return 0;
663996b3
MS
1405}
1406
5eef597e
MP
1407static void help(void) {
1408 printf("%s [OPTIONS...]\n\n"
1409 "Manages devices.\n\n"
e735f4d4
MP
1410 " -h --help Print this message\n"
1411 " --version Print version of the program\n"
1412 " --daemon Detach and run in the background\n"
1413 " --debug Enable debug output\n"
1414 " --children-max=INT Set maximum number of workers\n"
1415 " --exec-delay=SECONDS Seconds to wait before executing RUN=\n"
1416 " --event-timeout=SECONDS Seconds to wait before terminating an event\n"
1417 " --resolve-names=early|late|never\n"
1418 " When to resolve users and groups\n"
5eef597e
MP
1419 , program_invocation_short_name);
1420}
1421
1422static int parse_argv(int argc, char *argv[]) {
663996b3 1423 static const struct option options[] = {
5eef597e
MP
1424 { "daemon", no_argument, NULL, 'd' },
1425 { "debug", no_argument, NULL, 'D' },
1426 { "children-max", required_argument, NULL, 'c' },
1427 { "exec-delay", required_argument, NULL, 'e' },
1428 { "event-timeout", required_argument, NULL, 't' },
1429 { "resolve-names", required_argument, NULL, 'N' },
1430 { "help", no_argument, NULL, 'h' },
1431 { "version", no_argument, NULL, 'V' },
663996b3
MS
1432 {}
1433 };
663996b3 1434
5eef597e 1435 int c;
663996b3 1436
5eef597e
MP
1437 assert(argc >= 0);
1438 assert(argv);
14228c0d 1439
7035cd9e 1440 while ((c = getopt_long(argc, argv, "c:de:Dt:N:hV", options, NULL)) >= 0) {
5eef597e 1441 int r;
14228c0d 1442
5eef597e 1443 switch (c) {
663996b3 1444
663996b3 1445 case 'd':
5eef597e 1446 arg_daemonize = true;
663996b3
MS
1447 break;
1448 case 'c':
e3bff60a 1449 r = safe_atou(optarg, &arg_children_max);
5eef597e
MP
1450 if (r < 0)
1451 log_warning("Invalid --children-max ignored: %s", optarg);
663996b3
MS
1452 break;
1453 case 'e':
5eef597e
MP
1454 r = safe_atoi(optarg, &arg_exec_delay);
1455 if (r < 0)
1456 log_warning("Invalid --exec-delay ignored: %s", optarg);
1457 break;
1458 case 't':
1459 r = safe_atou64(optarg, &arg_event_timeout_usec);
1460 if (r < 0)
1461 log_warning("Invalid --event-timeout ignored: %s", optarg);
1462 else {
1463 arg_event_timeout_usec *= USEC_PER_SEC;
1464 arg_event_timeout_warn_usec = (arg_event_timeout_usec / 3) ? : 1;
1465 }
663996b3
MS
1466 break;
1467 case 'D':
5eef597e 1468 arg_debug = true;
663996b3
MS
1469 break;
1470 case 'N':
1471 if (streq(optarg, "early")) {
5eef597e 1472 arg_resolve_names = 1;
663996b3 1473 } else if (streq(optarg, "late")) {
5eef597e 1474 arg_resolve_names = 0;
663996b3 1475 } else if (streq(optarg, "never")) {
5eef597e 1476 arg_resolve_names = -1;
663996b3 1477 } else {
60f067b4 1478 log_error("resolve-names must be early, late or never");
5eef597e 1479 return 0;
663996b3
MS
1480 }
1481 break;
1482 case 'h':
5eef597e
MP
1483 help();
1484 return 0;
663996b3
MS
1485 case 'V':
1486 printf("%s\n", VERSION);
5eef597e
MP
1487 return 0;
1488 case '?':
1489 return -EINVAL;
663996b3 1490 default:
5eef597e
MP
1491 assert_not_reached("Unhandled option");
1492
663996b3
MS
1493 }
1494 }
1495
5eef597e
MP
1496 return 1;
1497}
1498
86f210e9 1499static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
e3bff60a 1500 _cleanup_(manager_freep) Manager *manager = NULL;
86f210e9 1501 int r, fd_worker, one = 1;
5eef597e 1502
e3bff60a 1503 assert(ret);
86f210e9
MP
1504 assert(fd_ctrl >= 0);
1505 assert(fd_uevent >= 0);
e3bff60a
MP
1506
1507 manager = new0(Manager, 1);
1508 if (!manager)
1509 return log_oom();
1510
e3bff60a
MP
1511 manager->fd_inotify = -1;
1512 manager->worker_watch[WRITE_END] = -1;
1513 manager->worker_watch[READ_END] = -1;
1514
1515 manager->udev = udev_new();
1516 if (!manager->udev)
1517 return log_error_errno(errno, "could not allocate udev context: %m");
1518
1519 udev_builtin_init(manager->udev);
1520
1521 manager->rules = udev_rules_new(manager->udev, arg_resolve_names);
1522 if (!manager->rules)
1523 return log_error_errno(ENOMEM, "error reading rules");
1524
1525 udev_list_node_init(&manager->events);
1526 udev_list_init(manager->udev, &manager->properties, true);
1527
86f210e9 1528 manager->cgroup = cgroup;
e3bff60a 1529
86f210e9
MP
1530 manager->ctrl = udev_ctrl_new_from_fd(manager->udev, fd_ctrl);
1531 if (!manager->ctrl)
1532 return log_error_errno(EINVAL, "error taking over udev control socket");
e3bff60a 1533
86f210e9
MP
1534 manager->monitor = udev_monitor_new_from_netlink_fd(manager->udev, "kernel", fd_uevent);
1535 if (!manager->monitor)
1536 return log_error_errno(EINVAL, "error taking over netlink socket");
e3bff60a
MP
1537
1538 /* unnamed socket from workers to the main daemon */
1539 r = socketpair(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0, manager->worker_watch);
1540 if (r < 0)
1541 return log_error_errno(errno, "error creating socketpair: %m");
1542
86f210e9 1543 fd_worker = manager->worker_watch[READ_END];
e3bff60a 1544
86f210e9 1545 r = setsockopt(fd_worker, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
e3bff60a
MP
1546 if (r < 0)
1547 return log_error_errno(errno, "could not enable SO_PASSCRED: %m");
1548
1549 manager->fd_inotify = udev_watch_init(manager->udev);
1550 if (manager->fd_inotify < 0)
1551 return log_error_errno(ENOMEM, "error initializing inotify");
1552
1553 udev_watch_restore(manager->udev);
1554
1555 /* block and listen to all signals on signalfd */
86f210e9
MP
1556 assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGHUP, SIGCHLD, -1) >= 0);
1557
1558 r = sd_event_default(&manager->event);
1559 if (r < 0)
db2df898 1560 return log_error_errno(r, "could not allocate event loop: %m");
86f210e9
MP
1561
1562 r = sd_event_add_signal(manager->event, NULL, SIGINT, on_sigterm, manager);
1563 if (r < 0)
1564 return log_error_errno(r, "error creating sigint event source: %m");
1565
1566 r = sd_event_add_signal(manager->event, NULL, SIGTERM, on_sigterm, manager);
1567 if (r < 0)
1568 return log_error_errno(r, "error creating sigterm event source: %m");
1569
1570 r = sd_event_add_signal(manager->event, NULL, SIGHUP, on_sighup, manager);
1571 if (r < 0)
1572 return log_error_errno(r, "error creating sighup event source: %m");
1573
1574 r = sd_event_add_signal(manager->event, NULL, SIGCHLD, on_sigchld, manager);
1575 if (r < 0)
1576 return log_error_errno(r, "error creating sigchld event source: %m");
1577
1578 r = sd_event_set_watchdog(manager->event, true);
1579 if (r < 0)
1580 return log_error_errno(r, "error creating watchdog event source: %m");
1581
1582 r = sd_event_add_io(manager->event, &manager->ctrl_event, fd_ctrl, EPOLLIN, on_ctrl_msg, manager);
1583 if (r < 0)
1584 return log_error_errno(r, "error creating ctrl event source: %m");
1585
1586 /* This needs to be after the inotify and uevent handling, to make sure
1587 * that the ping is send back after fully processing the pending uevents
1588 * (including the synthetic ones we may create due to inotify events).
1589 */
1590 r = sd_event_source_set_priority(manager->ctrl_event, SD_EVENT_PRIORITY_IDLE);
1591 if (r < 0)
1592 return log_error_errno(r, "cold not set IDLE event priority for ctrl event source: %m");
1593
1594 r = sd_event_add_io(manager->event, &manager->inotify_event, manager->fd_inotify, EPOLLIN, on_inotify, manager);
1595 if (r < 0)
1596 return log_error_errno(r, "error creating inotify event source: %m");
1597
1598 r = sd_event_add_io(manager->event, &manager->uevent_event, fd_uevent, EPOLLIN, on_uevent, manager);
1599 if (r < 0)
1600 return log_error_errno(r, "error creating uevent event source: %m");
1601
1602 r = sd_event_add_io(manager->event, NULL, fd_worker, EPOLLIN, on_worker, manager);
1603 if (r < 0)
1604 return log_error_errno(r, "error creating worker event source: %m");
1605
1606 r = sd_event_add_post(manager->event, NULL, on_post, manager);
1607 if (r < 0)
1608 return log_error_errno(r, "error creating post event source: %m");
e3bff60a
MP
1609
1610 *ret = manager;
1611 manager = NULL;
1612
86f210e9 1613 return 0;
e3bff60a
MP
1614}
1615
fb183854 1616static int run(int fd_ctrl, int fd_uevent, const char *cgroup) {
e3bff60a 1617 _cleanup_(manager_freep) Manager *manager = NULL;
fb183854
MP
1618 int r;
1619
1620 r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
1621 if (r < 0) {
1622 r = log_error_errno(r, "failed to allocate manager object: %m");
1623 goto exit;
1624 }
1625
1626 r = udev_rules_apply_static_dev_perms(manager->rules);
1627 if (r < 0)
1628 log_error_errno(r, "failed to apply permissions on static device nodes: %m");
1629
1630 (void) sd_notify(false,
1631 "READY=1\n"
1632 "STATUS=Processing...");
1633
1634 r = sd_event_loop(manager->event);
1635 if (r < 0) {
1636 log_error_errno(r, "event loop failed: %m");
1637 goto exit;
1638 }
1639
1640 sd_event_get_exit_code(manager->event, &r);
1641
1642exit:
1643 sd_notify(false,
1644 "STOPPING=1\n"
1645 "STATUS=Shutting down...");
1646 if (manager)
1647 udev_ctrl_cleanup(manager->ctrl);
1648 return r;
1649}
1650
1651int main(int argc, char *argv[]) {
86f210e9 1652 _cleanup_free_ char *cgroup = NULL;
4c89c718
MP
1653 int fd_ctrl = -1, fd_uevent = -1;
1654 int r;
5eef597e
MP
1655
1656 log_set_target(LOG_TARGET_AUTO);
1657 log_parse_environment();
1658 log_open();
1659
5eef597e
MP
1660 r = parse_argv(argc, argv);
1661 if (r <= 0)
1662 goto exit;
1663
e3bff60a
MP
1664 r = parse_proc_cmdline(parse_proc_cmdline_item);
1665 if (r < 0)
1666 log_warning_errno(r, "failed to parse kernel command line, ignoring: %m");
663996b3 1667
fb183854
MP
1668 if (arg_debug) {
1669 log_set_target(LOG_TARGET_CONSOLE);
5eef597e 1670 log_set_max_level(LOG_DEBUG);
fb183854 1671 }
5eef597e 1672
663996b3 1673 if (getuid() != 0) {
e3bff60a 1674 r = log_error_errno(EPERM, "root privileges required");
5eef597e
MP
1675 goto exit;
1676 }
1677
e3bff60a
MP
1678 if (arg_children_max == 0) {
1679 cpu_set_t cpu_set;
5eef597e 1680
e3bff60a 1681 arg_children_max = 8;
663996b3 1682
6300502b 1683 if (sched_getaffinity(0, sizeof(cpu_set), &cpu_set) == 0)
86f210e9 1684 arg_children_max += CPU_COUNT(&cpu_set) * 2;
f47781d8 1685
e3bff60a 1686 log_debug("set children_max to %u", arg_children_max);
5eef597e 1687 }
663996b3 1688
e3bff60a
MP
1689 /* set umask before creating any file/directory */
1690 r = chdir("/");
1691 if (r < 0) {
1692 r = log_error_errno(errno, "could not change dir to /: %m");
1693 goto exit;
663996b3
MS
1694 }
1695
e3bff60a
MP
1696 umask(022);
1697
aa27b158 1698 r = mac_selinux_init();
e3bff60a
MP
1699 if (r < 0) {
1700 log_error_errno(r, "could not initialize labelling: %m");
663996b3
MS
1701 goto exit;
1702 }
1703
e3bff60a
MP
1704 r = mkdir("/run/udev", 0755);
1705 if (r < 0 && errno != EEXIST) {
1706 r = log_error_errno(errno, "could not create /run/udev: %m");
663996b3
MS
1707 goto exit;
1708 }
1709
e3bff60a 1710 dev_setup(NULL, UID_INVALID, GID_INVALID);
5eef597e 1711
86f210e9
MP
1712 if (getppid() == 1) {
1713 /* get our own cgroup, we regularly kill everything udev has left behind
1714 we only do this on systemd systems, and only if we are directly spawned
1715 by PID1. otherwise we are not guaranteed to have a dedicated cgroup */
1716 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
fb183854 1717 if (r < 0) {
aa27b158 1718 if (r == -ENOENT || r == -ENOMEDIUM)
fb183854
MP
1719 log_debug_errno(r, "did not find dedicated cgroup: %m");
1720 else
1721 log_warning_errno(r, "failed to get cgroup: %m");
1722 }
86f210e9 1723 }
e3bff60a 1724
86f210e9
MP
1725 r = listen_fds(&fd_ctrl, &fd_uevent);
1726 if (r < 0) {
1727 r = log_error_errno(r, "could not listen on fds: %m");
1728 goto exit;
1729 }
5eef597e
MP
1730
1731 if (arg_daemonize) {
663996b3
MS
1732 pid_t pid;
1733
86f210e9
MP
1734 log_info("starting version " VERSION);
1735
1736 /* connect /dev/null to stdin, stdout, stderr */
1737 if (log_get_max_level() < LOG_DEBUG)
1738 (void) make_null_stdio();
1739
663996b3
MS
1740 pid = fork();
1741 switch (pid) {
1742 case 0:
1743 break;
1744 case -1:
e3bff60a 1745 r = log_error_errno(errno, "fork of daemon failed: %m");
663996b3
MS
1746 goto exit;
1747 default:
e3bff60a
MP
1748 mac_selinux_finish();
1749 log_close();
1750 _exit(EXIT_SUCCESS);
663996b3
MS
1751 }
1752
1753 setsid();
1754
7035cd9e 1755 write_string_file("/proc/self/oom_score_adj", "-1000", 0);
86f210e9 1756 }
663996b3 1757
fb183854 1758 r = run(fd_ctrl, fd_uevent, cgroup);
86f210e9 1759
663996b3 1760exit:
5eef597e 1761 mac_selinux_finish();
663996b3 1762 log_close();
e3bff60a 1763 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
663996b3 1764}