]> git.proxmox.com Git - systemd.git/blame - src/udev/udevd.c
New upstream version 236
[systemd.git] / src / udev / udevd.c
CommitLineData
52ad194e 1/* SPDX-License-Identifier: GPL-2.0+ */
663996b3
MS
2/*
3 * Copyright (C) 2004-2012 Kay Sievers <kay@vrfy.org>
4 * Copyright (C) 2004 Chris Friesen <chris_friesen@sympatico.ca>
5 * Copyright (C) 2009 Canonical Ltd.
6 * Copyright (C) 2009 Scott James Remnant <scott@netsplit.com>
7 *
8 * This program is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21
663996b3 22#include <errno.h>
6300502b
MP
23#include <fcntl.h>
24#include <getopt.h>
25#include <signal.h>
26#include <stdbool.h>
27#include <stddef.h>
663996b3
MS
28#include <stdio.h>
29#include <stdlib.h>
663996b3 30#include <string.h>
6300502b 31#include <sys/epoll.h>
60f067b4 32#include <sys/file.h>
6300502b
MP
33#include <sys/inotify.h>
34#include <sys/ioctl.h>
35#include <sys/mount.h>
663996b3 36#include <sys/prctl.h>
663996b3 37#include <sys/signalfd.h>
6300502b 38#include <sys/socket.h>
663996b3 39#include <sys/stat.h>
6300502b
MP
40#include <sys/time.h>
41#include <sys/wait.h>
42#include <unistd.h>
663996b3 43
663996b3 44#include "sd-daemon.h"
86f210e9
MP
45#include "sd-event.h"
46
db2df898 47#include "alloc-util.h"
663996b3 48#include "cgroup-util.h"
6300502b 49#include "cpu-set-util.h"
663996b3 50#include "dev-setup.h"
db2df898 51#include "fd-util.h"
663996b3 52#include "fileio.h"
2897b343 53#include "format-util.h"
db2df898 54#include "fs-util.h"
e3bff60a 55#include "hashmap.h"
db2df898 56#include "io-util.h"
52ad194e 57#include "list.h"
6300502b 58#include "netlink-util.h"
db2df898
MP
59#include "parse-util.h"
60#include "proc-cmdline.h"
6300502b
MP
61#include "process-util.h"
62#include "selinux-util.h"
63#include "signal-util.h"
db2df898
MP
64#include "socket-util.h"
65#include "string-util.h"
6300502b
MP
66#include "terminal-util.h"
67#include "udev-util.h"
68#include "udev.h"
db2df898 69#include "user-util.h"
663996b3 70
5eef597e
MP
71static bool arg_debug = false;
72static int arg_daemonize = false;
73static int arg_resolve_names = 1;
e3bff60a 74static unsigned arg_children_max;
5eef597e
MP
75static int arg_exec_delay;
76static usec_t arg_event_timeout_usec = 180 * USEC_PER_SEC;
77static usec_t arg_event_timeout_warn_usec = 180 * USEC_PER_SEC / 3;
e3bff60a
MP
78
79typedef struct Manager {
80 struct udev *udev;
86f210e9 81 sd_event *event;
e3bff60a 82 Hashmap *workers;
52ad194e 83 LIST_HEAD(struct event, events);
86f210e9 84 const char *cgroup;
e3bff60a 85 pid_t pid; /* the process that originally allocated the manager object */
e3bff60a
MP
86
87 struct udev_rules *rules;
88 struct udev_list properties;
89
90 struct udev_monitor *monitor;
91 struct udev_ctrl *ctrl;
92 struct udev_ctrl_connection *ctrl_conn_blocking;
e3bff60a 93 int fd_inotify;
e3bff60a
MP
94 int worker_watch[2];
95
86f210e9
MP
96 sd_event_source *ctrl_event;
97 sd_event_source *uevent_event;
98 sd_event_source *inotify_event;
99
100 usec_t last_usec;
101
e3bff60a 102 bool stop_exec_queue:1;
e3bff60a
MP
103 bool exit:1;
104} Manager;
663996b3
MS
105
106enum event_state {
107 EVENT_UNDEF,
108 EVENT_QUEUED,
109 EVENT_RUNNING,
110};
111
112struct event {
52ad194e 113 LIST_FIELDS(struct event, event);
e3bff60a 114 Manager *manager;
663996b3
MS
115 struct udev *udev;
116 struct udev_device *dev;
e3bff60a
MP
117 struct udev_device *dev_kernel;
118 struct worker *worker;
663996b3 119 enum event_state state;
663996b3
MS
120 unsigned long long int delaying_seqnum;
121 unsigned long long int seqnum;
122 const char *devpath;
123 size_t devpath_len;
124 const char *devpath_old;
125 dev_t devnum;
126 int ifindex;
127 bool is_block;
86f210e9
MP
128 sd_event_source *timeout_warning;
129 sd_event_source *timeout;
663996b3
MS
130};
131
e3bff60a 132static void event_queue_cleanup(Manager *manager, enum event_state type);
663996b3
MS
133
134enum worker_state {
135 WORKER_UNDEF,
136 WORKER_RUNNING,
137 WORKER_IDLE,
138 WORKER_KILLED,
139};
140
141struct worker {
e3bff60a 142 Manager *manager;
663996b3
MS
143 int refcount;
144 pid_t pid;
145 struct udev_monitor *monitor;
146 enum worker_state state;
147 struct event *event;
663996b3
MS
148};
149
150/* passed from worker to main process */
151struct worker_message {
663996b3
MS
152};
153
e3bff60a
MP
154static void event_free(struct event *event) {
155 int r;
156
157 if (!event)
158 return;
52ad194e 159 assert(event->manager);
663996b3 160
52ad194e 161 LIST_REMOVE(event, event->manager->events, event);
663996b3 162 udev_device_unref(event->dev);
e3bff60a
MP
163 udev_device_unref(event->dev_kernel);
164
86f210e9
MP
165 sd_event_source_unref(event->timeout_warning);
166 sd_event_source_unref(event->timeout);
167
e3bff60a
MP
168 if (event->worker)
169 event->worker->event = NULL;
170
52ad194e 171 if (LIST_IS_EMPTY(event->manager->events)) {
e3bff60a 172 /* only clean up the queue from the process that created it */
f5e65279 173 if (event->manager->pid == getpid_cached()) {
e3bff60a
MP
174 r = unlink("/run/udev/queue");
175 if (r < 0)
176 log_warning_errno(errno, "could not unlink /run/udev/queue: %m");
177 }
178 }
179
663996b3
MS
180 free(event);
181}
182
e3bff60a
MP
183static void worker_free(struct worker *worker) {
184 if (!worker)
185 return;
663996b3 186
e3bff60a
MP
187 assert(worker->manager);
188
db2df898 189 hashmap_remove(worker->manager->workers, PID_TO_PTR(worker->pid));
663996b3 190 udev_monitor_unref(worker->monitor);
e3bff60a
MP
191 event_free(worker->event);
192
663996b3
MS
193 free(worker);
194}
195
e3bff60a
MP
196static void manager_workers_free(Manager *manager) {
197 struct worker *worker;
198 Iterator i;
199
200 assert(manager);
201
202 HASHMAP_FOREACH(worker, manager->workers, i)
203 worker_free(worker);
204
205 manager->workers = hashmap_free(manager->workers);
206}
207
208static int worker_new(struct worker **ret, Manager *manager, struct udev_monitor *worker_monitor, pid_t pid) {
209 _cleanup_free_ struct worker *worker = NULL;
210 int r;
211
212 assert(ret);
213 assert(manager);
214 assert(worker_monitor);
215 assert(pid > 1);
216
217 worker = new0(struct worker, 1);
218 if (!worker)
219 return -ENOMEM;
220
221 worker->refcount = 1;
222 worker->manager = manager;
223 /* close monitor, but keep address around */
224 udev_monitor_disconnect(worker_monitor);
225 worker->monitor = udev_monitor_ref(worker_monitor);
226 worker->pid = pid;
227
228 r = hashmap_ensure_allocated(&manager->workers, NULL);
229 if (r < 0)
230 return r;
231
db2df898 232 r = hashmap_put(manager->workers, PID_TO_PTR(pid), worker);
e3bff60a
MP
233 if (r < 0)
234 return r;
235
236 *ret = worker;
237 worker = NULL;
238
239 return 0;
240}
241
242static int on_event_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
243 struct event *event = userdata;
244
245 assert(event);
246 assert(event->worker);
247
248 kill_and_sigcont(event->worker->pid, SIGKILL);
249 event->worker->state = WORKER_KILLED;
250
251 log_error("seq %llu '%s' killed", udev_device_get_seqnum(event->dev), event->devpath);
252
253 return 1;
254}
255
256static int on_event_timeout_warning(sd_event_source *s, uint64_t usec, void *userdata) {
257 struct event *event = userdata;
258
259 assert(event);
260
261 log_warning("seq %llu '%s' is taking a long time", udev_device_get_seqnum(event->dev), event->devpath);
262
263 return 1;
264}
265
266static void worker_attach_event(struct worker *worker, struct event *event) {
86f210e9
MP
267 sd_event *e;
268 uint64_t usec;
86f210e9 269
e3bff60a 270 assert(worker);
86f210e9 271 assert(worker->manager);
e3bff60a
MP
272 assert(event);
273 assert(!event->worker);
274 assert(!worker->event);
275
276 worker->state = WORKER_RUNNING;
277 worker->event = event;
278 event->state = EVENT_RUNNING;
e3bff60a 279 event->worker = worker;
86f210e9
MP
280
281 e = worker->manager->event;
282
81c58355 283 assert_se(sd_event_now(e, CLOCK_MONOTONIC, &usec) >= 0);
86f210e9 284
81c58355 285 (void) sd_event_add_time(e, &event->timeout_warning, CLOCK_MONOTONIC,
86f210e9
MP
286 usec + arg_event_timeout_warn_usec, USEC_PER_SEC, on_event_timeout_warning, event);
287
81c58355 288 (void) sd_event_add_time(e, &event->timeout, CLOCK_MONOTONIC,
86f210e9 289 usec + arg_event_timeout_usec, USEC_PER_SEC, on_event_timeout, event);
e3bff60a
MP
290}
291
292static void manager_free(Manager *manager) {
293 if (!manager)
663996b3 294 return;
e3bff60a
MP
295
296 udev_builtin_exit(manager->udev);
297
86f210e9
MP
298 sd_event_source_unref(manager->ctrl_event);
299 sd_event_source_unref(manager->uevent_event);
300 sd_event_source_unref(manager->inotify_event);
301
e3bff60a 302 udev_unref(manager->udev);
86f210e9 303 sd_event_unref(manager->event);
e3bff60a
MP
304 manager_workers_free(manager);
305 event_queue_cleanup(manager, EVENT_UNDEF);
306
307 udev_monitor_unref(manager->monitor);
308 udev_ctrl_unref(manager->ctrl);
309 udev_ctrl_connection_unref(manager->ctrl_conn_blocking);
310
311 udev_list_cleanup(&manager->properties);
312 udev_rules_unref(manager->rules);
e3bff60a 313
e3bff60a
MP
314 safe_close(manager->fd_inotify);
315 safe_close_pair(manager->worker_watch);
316
317 free(manager);
663996b3
MS
318}
319
e3bff60a 320DEFINE_TRIVIAL_CLEANUP_FUNC(Manager*, manager_free);
663996b3 321
e3bff60a
MP
322static int worker_send_message(int fd) {
323 struct worker_message message = {};
663996b3 324
e3bff60a 325 return loop_write(fd, &message, sizeof(message), false);
663996b3
MS
326}
327
e3bff60a 328static void worker_spawn(Manager *manager, struct event *event) {
663996b3 329 struct udev *udev = event->udev;
e3bff60a 330 _cleanup_udev_monitor_unref_ struct udev_monitor *worker_monitor = NULL;
663996b3 331 pid_t pid;
fb183854 332 int r = 0;
663996b3
MS
333
334 /* listen for new events */
335 worker_monitor = udev_monitor_new_from_netlink(udev, NULL);
336 if (worker_monitor == NULL)
337 return;
338 /* allow the main daemon netlink address to send devices to the worker */
e3bff60a 339 udev_monitor_allow_unicast_sender(worker_monitor, manager->monitor);
fb183854
MP
340 r = udev_monitor_enable_receiving(worker_monitor);
341 if (r < 0)
342 log_error_errno(r, "worker: could not enable receiving of device: %m");
663996b3 343
663996b3
MS
344 pid = fork();
345 switch (pid) {
346 case 0: {
347 struct udev_device *dev = NULL;
4c89c718 348 _cleanup_(sd_netlink_unrefp) sd_netlink *rtnl = NULL;
663996b3 349 int fd_monitor;
e3bff60a 350 _cleanup_close_ int fd_signal = -1, fd_ep = -1;
86f210e9
MP
351 struct epoll_event ep_signal = { .events = EPOLLIN };
352 struct epoll_event ep_monitor = { .events = EPOLLIN };
663996b3 353 sigset_t mask;
663996b3
MS
354
355 /* take initial device from queue */
356 dev = event->dev;
357 event->dev = NULL;
358
86f210e9
MP
359 unsetenv("NOTIFY_SOCKET");
360
e3bff60a
MP
361 manager_workers_free(manager);
362 event_queue_cleanup(manager, EVENT_UNDEF);
86f210e9 363
e3bff60a 364 manager->monitor = udev_monitor_unref(manager->monitor);
86f210e9 365 manager->ctrl_conn_blocking = udev_ctrl_connection_unref(manager->ctrl_conn_blocking);
e3bff60a 366 manager->ctrl = udev_ctrl_unref(manager->ctrl);
e3bff60a 367 manager->worker_watch[READ_END] = safe_close(manager->worker_watch[READ_END]);
86f210e9
MP
368
369 manager->ctrl_event = sd_event_source_unref(manager->ctrl_event);
370 manager->uevent_event = sd_event_source_unref(manager->uevent_event);
371 manager->inotify_event = sd_event_source_unref(manager->inotify_event);
372
373 manager->event = sd_event_unref(manager->event);
663996b3
MS
374
375 sigfillset(&mask);
376 fd_signal = signalfd(-1, &mask, SFD_NONBLOCK|SFD_CLOEXEC);
377 if (fd_signal < 0) {
e3bff60a 378 r = log_error_errno(errno, "error creating signalfd %m");
663996b3
MS
379 goto out;
380 }
86f210e9
MP
381 ep_signal.data.fd = fd_signal;
382
383 fd_monitor = udev_monitor_get_fd(worker_monitor);
384 ep_monitor.data.fd = fd_monitor;
663996b3
MS
385
386 fd_ep = epoll_create1(EPOLL_CLOEXEC);
387 if (fd_ep < 0) {
e3bff60a 388 r = log_error_errno(errno, "error creating epoll fd: %m");
663996b3
MS
389 goto out;
390 }
391
663996b3
MS
392 if (epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_signal, &ep_signal) < 0 ||
393 epoll_ctl(fd_ep, EPOLL_CTL_ADD, fd_monitor, &ep_monitor) < 0) {
e3bff60a 394 r = log_error_errno(errno, "fail to add fds to epoll: %m");
663996b3
MS
395 goto out;
396 }
397
aa27b158
MP
398 /* Request TERM signal if parent exits.
399 Ignore error, not much we can do in that case. */
400 (void) prctl(PR_SET_PDEATHSIG, SIGTERM);
663996b3 401
aa27b158 402 /* Reset OOM score, we only protect the main daemon. */
7035cd9e 403 write_string_file("/proc/self/oom_score_adj", "0", 0);
663996b3
MS
404
405 for (;;) {
406 struct udev_event *udev_event;
60f067b4 407 int fd_lock = -1;
663996b3 408
86f210e9
MP
409 assert(dev);
410
60f067b4 411 log_debug("seq %llu running", udev_device_get_seqnum(dev));
663996b3
MS
412 udev_event = udev_event_new(dev);
413 if (udev_event == NULL) {
e3bff60a 414 r = -ENOMEM;
663996b3
MS
415 goto out;
416 }
417
5eef597e
MP
418 if (arg_exec_delay > 0)
419 udev_event->exec_delay = arg_exec_delay;
663996b3 420
60f067b4 421 /*
5eef597e 422 * Take a shared lock on the device node; this establishes
60f067b4 423 * a concept of device "ownership" to serialize device
5eef597e 424 * access. External processes holding an exclusive lock will
60f067b4 425 * cause udev to skip the event handling; in the case udev
5eef597e 426 * acquired the lock, the external process can block until
60f067b4
JS
427 * udev has finished its event handling.
428 */
5eef597e
MP
429 if (!streq_ptr(udev_device_get_action(dev), "remove") &&
430 streq_ptr("block", udev_device_get_subsystem(dev)) &&
431 !startswith(udev_device_get_sysname(dev), "dm-") &&
432 !startswith(udev_device_get_sysname(dev), "md")) {
60f067b4
JS
433 struct udev_device *d = dev;
434
435 if (streq_ptr("partition", udev_device_get_devtype(d)))
436 d = udev_device_get_parent(d);
437
438 if (d) {
439 fd_lock = open(udev_device_get_devnode(d), O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
440 if (fd_lock >= 0 && flock(fd_lock, LOCK_SH|LOCK_NB) < 0) {
f47781d8 441 log_debug_errno(errno, "Unable to flock(%s), skipping event handling: %m", udev_device_get_devnode(d));
60f067b4
JS
442 fd_lock = safe_close(fd_lock);
443 goto skip;
444 }
445 }
446 }
447
5eef597e
MP
448 /* needed for renaming netifs */
449 udev_event->rtnl = rtnl;
450
663996b3 451 /* apply rules, create node, symlinks */
f47781d8
MP
452 udev_event_execute_rules(udev_event,
453 arg_event_timeout_usec, arg_event_timeout_warn_usec,
e3bff60a 454 &manager->properties,
86f210e9 455 manager->rules);
5eef597e 456
f47781d8 457 udev_event_execute_run(udev_event,
86f210e9 458 arg_event_timeout_usec, arg_event_timeout_warn_usec);
663996b3 459
f47781d8
MP
460 if (udev_event->rtnl)
461 /* in case rtnl was initialized */
86f210e9 462 rtnl = sd_netlink_ref(udev_event->rtnl);
663996b3
MS
463
464 /* apply/restore inotify watch */
60f067b4 465 if (udev_event->inotify_watch) {
663996b3
MS
466 udev_watch_begin(udev, dev);
467 udev_device_update_db(dev);
468 }
469
60f067b4
JS
470 safe_close(fd_lock);
471
663996b3
MS
472 /* send processed event back to libudev listeners */
473 udev_monitor_send_device(worker_monitor, NULL, dev);
474
60f067b4 475skip:
e3bff60a 476 log_debug("seq %llu processed", udev_device_get_seqnum(dev));
663996b3 477
e3bff60a
MP
478 /* send udevd the result of the event execution */
479 r = worker_send_message(manager->worker_watch[WRITE_END]);
480 if (r < 0)
481 log_error_errno(r, "failed to send result of seq %llu to main daemon: %m",
482 udev_device_get_seqnum(dev));
663996b3
MS
483
484 udev_device_unref(dev);
485 dev = NULL;
486
663996b3
MS
487 udev_event_unref(udev_event);
488
489 /* wait for more device messages from main udevd, or term signal */
490 while (dev == NULL) {
491 struct epoll_event ev[4];
492 int fdcount;
493 int i;
494
495 fdcount = epoll_wait(fd_ep, ev, ELEMENTSOF(ev), -1);
496 if (fdcount < 0) {
497 if (errno == EINTR)
498 continue;
e3bff60a 499 r = log_error_errno(errno, "failed to poll: %m");
663996b3
MS
500 goto out;
501 }
502
503 for (i = 0; i < fdcount; i++) {
504 if (ev[i].data.fd == fd_monitor && ev[i].events & EPOLLIN) {
505 dev = udev_monitor_receive_device(worker_monitor);
506 break;
507 } else if (ev[i].data.fd == fd_signal && ev[i].events & EPOLLIN) {
508 struct signalfd_siginfo fdsi;
509 ssize_t size;
510
511 size = read(fd_signal, &fdsi, sizeof(struct signalfd_siginfo));
512 if (size != sizeof(struct signalfd_siginfo))
513 continue;
514 switch (fdsi.ssi_signo) {
515 case SIGTERM:
516 goto out;
517 }
518 }
519 }
520 }
521 }
522out:
523 udev_device_unref(dev);
e3bff60a 524 manager_free(manager);
663996b3 525 log_close();
e3bff60a 526 _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
663996b3
MS
527 }
528 case -1:
663996b3 529 event->state = EVENT_QUEUED;
f47781d8 530 log_error_errno(errno, "fork of child failed: %m");
663996b3
MS
531 break;
532 default:
e3bff60a
MP
533 {
534 struct worker *worker;
e3bff60a
MP
535
536 r = worker_new(&worker, manager, worker_monitor, pid);
537 if (r < 0)
538 return;
539
540 worker_attach_event(worker, event);
541
e735f4d4 542 log_debug("seq %llu forked new worker ["PID_FMT"]", udev_device_get_seqnum(event->dev), pid);
663996b3
MS
543 break;
544 }
e3bff60a 545 }
663996b3
MS
546}
547
e3bff60a
MP
548static void event_run(Manager *manager, struct event *event) {
549 struct worker *worker;
550 Iterator i;
551
552 assert(manager);
553 assert(event);
663996b3 554
e3bff60a 555 HASHMAP_FOREACH(worker, manager->workers, i) {
663996b3
MS
556 ssize_t count;
557
558 if (worker->state != WORKER_IDLE)
559 continue;
560
e3bff60a 561 count = udev_monitor_send_device(manager->monitor, worker->monitor, event->dev);
663996b3 562 if (count < 0) {
e735f4d4
MP
563 log_error_errno(errno, "worker ["PID_FMT"] did not accept message %zi (%m), kill it",
564 worker->pid, count);
663996b3
MS
565 kill(worker->pid, SIGKILL);
566 worker->state = WORKER_KILLED;
567 continue;
568 }
e3bff60a 569 worker_attach_event(worker, event);
663996b3
MS
570 return;
571 }
572
e3bff60a 573 if (hashmap_size(manager->workers) >= arg_children_max) {
5eef597e 574 if (arg_children_max > 1)
e3bff60a 575 log_debug("maximum number (%i) of children reached", hashmap_size(manager->workers));
663996b3
MS
576 return;
577 }
578
579 /* start new worker and pass initial device */
e3bff60a 580 worker_spawn(manager, event);
663996b3
MS
581}
582
e3bff60a 583static int event_queue_insert(Manager *manager, struct udev_device *dev) {
663996b3 584 struct event *event;
e3bff60a
MP
585 int r;
586
587 assert(manager);
588 assert(dev);
589
86f210e9
MP
590 /* only one process can add events to the queue */
591 if (manager->pid == 0)
f5e65279 592 manager->pid = getpid_cached();
86f210e9 593
f5e65279 594 assert(manager->pid == getpid_cached());
663996b3 595
60f067b4 596 event = new0(struct event, 1);
e3bff60a
MP
597 if (!event)
598 return -ENOMEM;
663996b3
MS
599
600 event->udev = udev_device_get_udev(dev);
e3bff60a 601 event->manager = manager;
663996b3 602 event->dev = dev;
e3bff60a
MP
603 event->dev_kernel = udev_device_shallow_clone(dev);
604 udev_device_copy_properties(event->dev_kernel, dev);
663996b3
MS
605 event->seqnum = udev_device_get_seqnum(dev);
606 event->devpath = udev_device_get_devpath(dev);
607 event->devpath_len = strlen(event->devpath);
608 event->devpath_old = udev_device_get_devpath_old(dev);
609 event->devnum = udev_device_get_devnum(dev);
610 event->is_block = streq("block", udev_device_get_subsystem(dev));
611 event->ifindex = udev_device_get_ifindex(dev);
663996b3 612
60f067b4 613 log_debug("seq %llu queued, '%s' '%s'", udev_device_get_seqnum(dev),
663996b3
MS
614 udev_device_get_action(dev), udev_device_get_subsystem(dev));
615
616 event->state = EVENT_QUEUED;
e3bff60a 617
52ad194e 618 if (LIST_IS_EMPTY(manager->events)) {
e3bff60a
MP
619 r = touch("/run/udev/queue");
620 if (r < 0)
621 log_warning_errno(r, "could not touch /run/udev/queue: %m");
622 }
623
52ad194e 624 LIST_APPEND(event, manager->events, event);
e3bff60a 625
663996b3
MS
626 return 0;
627}
628
e3bff60a
MP
629static void manager_kill_workers(Manager *manager) {
630 struct worker *worker;
631 Iterator i;
663996b3 632
e3bff60a 633 assert(manager);
663996b3 634
e3bff60a 635 HASHMAP_FOREACH(worker, manager->workers, i) {
663996b3
MS
636 if (worker->state == WORKER_KILLED)
637 continue;
638
639 worker->state = WORKER_KILLED;
640 kill(worker->pid, SIGTERM);
641 }
642}
643
644/* lookup event for identical, parent, child device */
e3bff60a 645static bool is_devpath_busy(Manager *manager, struct event *event) {
52ad194e 646 struct event *loop_event;
663996b3
MS
647 size_t common;
648
649 /* check if queue contains events we depend on */
52ad194e 650 LIST_FOREACH(event, loop_event, manager->events) {
663996b3
MS
651 /* we already found a later event, earlier can not block us, no need to check again */
652 if (loop_event->seqnum < event->delaying_seqnum)
653 continue;
654
655 /* event we checked earlier still exists, no need to check again */
656 if (loop_event->seqnum == event->delaying_seqnum)
657 return true;
658
659 /* found ourself, no later event can block us */
660 if (loop_event->seqnum >= event->seqnum)
661 break;
662
663 /* check major/minor */
664 if (major(event->devnum) != 0 && event->devnum == loop_event->devnum && event->is_block == loop_event->is_block)
665 return true;
666
667 /* check network device ifindex */
668 if (event->ifindex != 0 && event->ifindex == loop_event->ifindex)
669 return true;
670
671 /* check our old name */
672 if (event->devpath_old != NULL && streq(loop_event->devpath, event->devpath_old)) {
673 event->delaying_seqnum = loop_event->seqnum;
674 return true;
675 }
676
677 /* compare devpath */
678 common = MIN(loop_event->devpath_len, event->devpath_len);
679
680 /* one devpath is contained in the other? */
681 if (memcmp(loop_event->devpath, event->devpath, common) != 0)
682 continue;
683
684 /* identical device event found */
685 if (loop_event->devpath_len == event->devpath_len) {
686 /* devices names might have changed/swapped in the meantime */
687 if (major(event->devnum) != 0 && (event->devnum != loop_event->devnum || event->is_block != loop_event->is_block))
688 continue;
689 if (event->ifindex != 0 && event->ifindex != loop_event->ifindex)
690 continue;
691 event->delaying_seqnum = loop_event->seqnum;
692 return true;
693 }
694
663996b3
MS
695 /* parent device event found */
696 if (event->devpath[common] == '/') {
697 event->delaying_seqnum = loop_event->seqnum;
698 return true;
699 }
700
701 /* child device event found */
702 if (loop_event->devpath[common] == '/') {
703 event->delaying_seqnum = loop_event->seqnum;
704 return true;
705 }
706
707 /* no matching device */
708 continue;
709 }
710
711 return false;
712}
713
86f210e9
MP
714static int on_exit_timeout(sd_event_source *s, uint64_t usec, void *userdata) {
715 Manager *manager = userdata;
716
717 assert(manager);
718
719 log_error_errno(ETIMEDOUT, "giving up waiting for workers to finish");
720
721 sd_event_exit(manager->event, -ETIMEDOUT);
722
723 return 1;
724}
725
726static void manager_exit(Manager *manager) {
727 uint64_t usec;
728 int r;
729
730 assert(manager);
731
732 manager->exit = true;
733
734 sd_notify(false,
735 "STOPPING=1\n"
736 "STATUS=Starting shutdown...");
737
738 /* close sources of new events and discard buffered events */
739 manager->ctrl_event = sd_event_source_unref(manager->ctrl_event);
740 manager->ctrl = udev_ctrl_unref(manager->ctrl);
741
742 manager->inotify_event = sd_event_source_unref(manager->inotify_event);
743 manager->fd_inotify = safe_close(manager->fd_inotify);
744
745 manager->uevent_event = sd_event_source_unref(manager->uevent_event);
746 manager->monitor = udev_monitor_unref(manager->monitor);
747
748 /* discard queued events and kill workers */
749 event_queue_cleanup(manager, EVENT_QUEUED);
750 manager_kill_workers(manager);
751
81c58355 752 assert_se(sd_event_now(manager->event, CLOCK_MONOTONIC, &usec) >= 0);
86f210e9 753
81c58355 754 r = sd_event_add_time(manager->event, NULL, CLOCK_MONOTONIC,
86f210e9
MP
755 usec + 30 * USEC_PER_SEC, USEC_PER_SEC, on_exit_timeout, manager);
756 if (r < 0)
757 return;
758}
759
760/* reload requested, HUP signal received, rules changed, builtin changed */
761static void manager_reload(Manager *manager) {
762
763 assert(manager);
764
765 sd_notify(false,
766 "RELOADING=1\n"
767 "STATUS=Flushing configuration...");
768
769 manager_kill_workers(manager);
770 manager->rules = udev_rules_unref(manager->rules);
771 udev_builtin_exit(manager->udev);
772
8a584da2
MP
773 sd_notifyf(false,
774 "READY=1\n"
775 "STATUS=Processing with %u children at max", arg_children_max);
86f210e9
MP
776}
777
e3bff60a 778static void event_queue_start(Manager *manager) {
52ad194e 779 struct event *event;
86f210e9 780 usec_t usec;
663996b3 781
e3bff60a
MP
782 assert(manager);
783
52ad194e 784 if (LIST_IS_EMPTY(manager->events) ||
86f210e9
MP
785 manager->exit || manager->stop_exec_queue)
786 return;
787
81c58355 788 assert_se(sd_event_now(manager->event, CLOCK_MONOTONIC, &usec) >= 0);
13d276d0
MP
789 /* check for changed config, every 3 seconds at most */
790 if (manager->last_usec == 0 ||
791 (usec - manager->last_usec) > 3 * USEC_PER_SEC) {
792 if (udev_rules_check_timestamp(manager->rules) ||
793 udev_builtin_validate(manager->udev))
794 manager_reload(manager);
86f210e9 795
13d276d0 796 manager->last_usec = usec;
86f210e9
MP
797 }
798
799 udev_builtin_init(manager->udev);
800
801 if (!manager->rules) {
802 manager->rules = udev_rules_new(manager->udev, arg_resolve_names);
803 if (!manager->rules)
804 return;
805 }
806
52ad194e 807 LIST_FOREACH(event,event,manager->events) {
663996b3
MS
808 if (event->state != EVENT_QUEUED)
809 continue;
810
811 /* do not start event if parent or child event is still running */
e3bff60a 812 if (is_devpath_busy(manager, event))
663996b3
MS
813 continue;
814
e3bff60a 815 event_run(manager, event);
663996b3
MS
816 }
817}
818
e3bff60a 819static void event_queue_cleanup(Manager *manager, enum event_state match_type) {
52ad194e 820 struct event *event, *tmp;
663996b3 821
52ad194e 822 LIST_FOREACH_SAFE(event, event, tmp, manager->events) {
663996b3
MS
823 if (match_type != EVENT_UNDEF && match_type != event->state)
824 continue;
825
e3bff60a 826 event_free(event);
663996b3
MS
827 }
828}
829
e3bff60a
MP
830static int on_worker(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
831 Manager *manager = userdata;
832
833 assert(manager);
834
663996b3
MS
835 for (;;) {
836 struct worker_message msg;
e3bff60a
MP
837 struct iovec iovec = {
838 .iov_base = &msg,
839 .iov_len = sizeof(msg),
840 };
841 union {
842 struct cmsghdr cmsghdr;
843 uint8_t buf[CMSG_SPACE(sizeof(struct ucred))];
844 } control = {};
845 struct msghdr msghdr = {
846 .msg_iov = &iovec,
847 .msg_iovlen = 1,
848 .msg_control = &control,
849 .msg_controllen = sizeof(control),
850 };
851 struct cmsghdr *cmsg;
663996b3 852 ssize_t size;
e3bff60a
MP
853 struct ucred *ucred = NULL;
854 struct worker *worker;
663996b3 855
e3bff60a
MP
856 size = recvmsg(fd, &msghdr, MSG_DONTWAIT);
857 if (size < 0) {
858 if (errno == EINTR)
859 continue;
860 else if (errno == EAGAIN)
861 /* nothing more to read */
862 break;
663996b3 863
e3bff60a
MP
864 return log_error_errno(errno, "failed to receive message: %m");
865 } else if (size != sizeof(struct worker_message)) {
866 log_warning_errno(EIO, "ignoring worker message with invalid size %zi bytes", size);
867 continue;
868 }
663996b3 869
86f210e9 870 CMSG_FOREACH(cmsg, &msghdr) {
e3bff60a
MP
871 if (cmsg->cmsg_level == SOL_SOCKET &&
872 cmsg->cmsg_type == SCM_CREDENTIALS &&
873 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
874 ucred = (struct ucred*) CMSG_DATA(cmsg);
875 }
663996b3 876
e3bff60a
MP
877 if (!ucred || ucred->pid <= 0) {
878 log_warning_errno(EIO, "ignoring worker message without valid PID");
879 continue;
663996b3 880 }
e3bff60a
MP
881
882 /* lookup worker who sent the signal */
db2df898 883 worker = hashmap_get(manager->workers, PID_TO_PTR(ucred->pid));
e3bff60a
MP
884 if (!worker) {
885 log_debug("worker ["PID_FMT"] returned, but is no longer tracked", ucred->pid);
886 continue;
887 }
888
889 if (worker->state != WORKER_KILLED)
890 worker->state = WORKER_IDLE;
891
892 /* worker returned */
893 event_free(worker->event);
663996b3 894 }
e3bff60a 895
86f210e9
MP
896 /* we have free workers, try to schedule events */
897 event_queue_start(manager);
898
e3bff60a
MP
899 return 1;
900}
901
902static int on_uevent(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
903 Manager *manager = userdata;
904 struct udev_device *dev;
905 int r;
906
907 assert(manager);
908
909 dev = udev_monitor_receive_device(manager->monitor);
910 if (dev) {
911 udev_device_ensure_usec_initialized(dev, NULL);
912 r = event_queue_insert(manager, dev);
913 if (r < 0)
914 udev_device_unref(dev);
86f210e9
MP
915 else
916 /* we have fresh events, try to schedule them */
917 event_queue_start(manager);
e3bff60a
MP
918 }
919
920 return 1;
663996b3
MS
921}
922
923/* receive the udevd message from userspace */
e3bff60a
MP
924static int on_ctrl_msg(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
925 Manager *manager = userdata;
926 _cleanup_udev_ctrl_connection_unref_ struct udev_ctrl_connection *ctrl_conn = NULL;
927 _cleanup_udev_ctrl_msg_unref_ struct udev_ctrl_msg *ctrl_msg = NULL;
663996b3
MS
928 const char *str;
929 int i;
930
e3bff60a
MP
931 assert(manager);
932
933 ctrl_conn = udev_ctrl_get_connection(manager->ctrl);
934 if (!ctrl_conn)
935 return 1;
663996b3
MS
936
937 ctrl_msg = udev_ctrl_receive_msg(ctrl_conn);
e3bff60a
MP
938 if (!ctrl_msg)
939 return 1;
663996b3
MS
940
941 i = udev_ctrl_get_set_log_level(ctrl_msg);
942 if (i >= 0) {
60f067b4 943 log_debug("udevd message (SET_LOG_LEVEL) received, log_priority=%i", i);
663996b3 944 log_set_max_level(i);
e3bff60a 945 manager_kill_workers(manager);
663996b3
MS
946 }
947
948 if (udev_ctrl_get_stop_exec_queue(ctrl_msg) > 0) {
60f067b4 949 log_debug("udevd message (STOP_EXEC_QUEUE) received");
e3bff60a 950 manager->stop_exec_queue = true;
663996b3
MS
951 }
952
953 if (udev_ctrl_get_start_exec_queue(ctrl_msg) > 0) {
60f067b4 954 log_debug("udevd message (START_EXEC_QUEUE) received");
e3bff60a 955 manager->stop_exec_queue = false;
86f210e9 956 event_queue_start(manager);
663996b3
MS
957 }
958
959 if (udev_ctrl_get_reload(ctrl_msg) > 0) {
60f067b4 960 log_debug("udevd message (RELOAD) received");
86f210e9 961 manager_reload(manager);
663996b3
MS
962 }
963
964 str = udev_ctrl_get_set_env(ctrl_msg);
965 if (str != NULL) {
e3bff60a 966 _cleanup_free_ char *key = NULL;
663996b3
MS
967
968 key = strdup(str);
e3bff60a 969 if (key) {
663996b3
MS
970 char *val;
971
972 val = strchr(key, '=');
973 if (val != NULL) {
974 val[0] = '\0';
975 val = &val[1];
976 if (val[0] == '\0') {
60f067b4 977 log_debug("udevd message (ENV) received, unset '%s'", key);
e3bff60a 978 udev_list_entry_add(&manager->properties, key, NULL);
663996b3 979 } else {
60f067b4 980 log_debug("udevd message (ENV) received, set '%s=%s'", key, val);
e3bff60a 981 udev_list_entry_add(&manager->properties, key, val);
663996b3 982 }
e3bff60a 983 } else
60f067b4 984 log_error("wrong key format '%s'", key);
663996b3 985 }
e3bff60a 986 manager_kill_workers(manager);
663996b3
MS
987 }
988
989 i = udev_ctrl_get_set_children_max(ctrl_msg);
990 if (i >= 0) {
60f067b4 991 log_debug("udevd message (SET_MAX_CHILDREN) received, children_max=%i", i);
5eef597e 992 arg_children_max = i;
8a584da2
MP
993
994 (void) sd_notifyf(false,
995 "READY=1\n"
996 "STATUS=Processing with %u children at max", arg_children_max);
663996b3
MS
997 }
998
999 if (udev_ctrl_get_ping(ctrl_msg) > 0)
60f067b4 1000 log_debug("udevd message (SYNC) received");
663996b3
MS
1001
1002 if (udev_ctrl_get_exit(ctrl_msg) > 0) {
60f067b4 1003 log_debug("udevd message (EXIT) received");
86f210e9 1004 manager_exit(manager);
e3bff60a
MP
1005 /* keep reference to block the client until we exit
1006 TODO: deal with several blocking exit requests */
1007 manager->ctrl_conn_blocking = udev_ctrl_connection_ref(ctrl_conn);
663996b3 1008 }
e3bff60a
MP
1009
1010 return 1;
663996b3
MS
1011}
1012
60f067b4
JS
1013static int synthesize_change(struct udev_device *dev) {
1014 char filename[UTIL_PATH_SIZE];
1015 int r;
1016
1017 if (streq_ptr("block", udev_device_get_subsystem(dev)) &&
1018 streq_ptr("disk", udev_device_get_devtype(dev)) &&
1019 !startswith(udev_device_get_sysname(dev), "dm-")) {
1020 bool part_table_read = false;
1021 bool has_partitions = false;
1022 int fd;
1023 struct udev *udev = udev_device_get_udev(dev);
1024 _cleanup_udev_enumerate_unref_ struct udev_enumerate *e = NULL;
1025 struct udev_list_entry *item;
1026
1027 /*
1028 * Try to re-read the partition table. This only succeeds if
1029 * none of the devices is busy. The kernel returns 0 if no
1030 * partition table is found, and we will not get an event for
1031 * the disk.
1032 */
1033 fd = open(udev_device_get_devnode(dev), O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_NONBLOCK);
1034 if (fd >= 0) {
1035 r = flock(fd, LOCK_EX|LOCK_NB);
1036 if (r >= 0)
1037 r = ioctl(fd, BLKRRPART, 0);
1038
1039 close(fd);
1040 if (r >= 0)
1041 part_table_read = true;
1042 }
1043
1044 /* search for partitions */
1045 e = udev_enumerate_new(udev);
1046 if (!e)
1047 return -ENOMEM;
1048
1049 r = udev_enumerate_add_match_parent(e, dev);
1050 if (r < 0)
1051 return r;
1052
1053 r = udev_enumerate_add_match_subsystem(e, "block");
1054 if (r < 0)
1055 return r;
1056
1057 r = udev_enumerate_scan_devices(e);
1058 if (r < 0)
1059 return r;
1060
1061 udev_list_entry_foreach(item, udev_enumerate_get_list_entry(e)) {
1062 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1063
1064 d = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
1065 if (!d)
1066 continue;
1067
1068 if (!streq_ptr("partition", udev_device_get_devtype(d)))
1069 continue;
1070
1071 has_partitions = true;
1072 break;
1073 }
1074
1075 /*
1076 * We have partitions and re-read the table, the kernel already sent
1077 * out a "change" event for the disk, and "remove/add" for all
1078 * partitions.
1079 */
1080 if (part_table_read && has_partitions)
1081 return 0;
1082
1083 /*
1084 * We have partitions but re-reading the partition table did not
1085 * work, synthesize "change" for the disk and all partitions.
1086 */
1087 log_debug("device %s closed, synthesising 'change'", udev_device_get_devnode(dev));
1088 strscpyl(filename, sizeof(filename), udev_device_get_syspath(dev), "/uevent", NULL);
7035cd9e 1089 write_string_file(filename, "change", WRITE_STRING_FILE_CREATE);
60f067b4
JS
1090
1091 udev_list_entry_foreach(item, udev_enumerate_get_list_entry(e)) {
1092 _cleanup_udev_device_unref_ struct udev_device *d = NULL;
1093
1094 d = udev_device_new_from_syspath(udev, udev_list_entry_get_name(item));
1095 if (!d)
1096 continue;
1097
1098 if (!streq_ptr("partition", udev_device_get_devtype(d)))
1099 continue;
1100
1101 log_debug("device %s closed, synthesising partition '%s' 'change'",
1102 udev_device_get_devnode(dev), udev_device_get_devnode(d));
1103 strscpyl(filename, sizeof(filename), udev_device_get_syspath(d), "/uevent", NULL);
7035cd9e 1104 write_string_file(filename, "change", WRITE_STRING_FILE_CREATE);
60f067b4
JS
1105 }
1106
1107 return 0;
1108 }
1109
1110 log_debug("device %s closed, synthesising 'change'", udev_device_get_devnode(dev));
1111 strscpyl(filename, sizeof(filename), udev_device_get_syspath(dev), "/uevent", NULL);
7035cd9e 1112 write_string_file(filename, "change", WRITE_STRING_FILE_CREATE);
60f067b4
JS
1113
1114 return 0;
1115}
1116
e3bff60a
MP
1117static int on_inotify(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1118 Manager *manager = userdata;
e735f4d4 1119 union inotify_event_buffer buffer;
f47781d8
MP
1120 struct inotify_event *e;
1121 ssize_t l;
663996b3 1122
e3bff60a
MP
1123 assert(manager);
1124
1125 l = read(fd, &buffer, sizeof(buffer));
f47781d8 1126 if (l < 0) {
f5e65279 1127 if (IN_SET(errno, EAGAIN, EINTR))
e3bff60a 1128 return 1;
663996b3 1129
f47781d8 1130 return log_error_errno(errno, "Failed to read inotify fd: %m");
663996b3
MS
1131 }
1132
f47781d8 1133 FOREACH_INOTIFY_EVENT(e, buffer, l) {
e3bff60a 1134 _cleanup_udev_device_unref_ struct udev_device *dev = NULL;
663996b3 1135
e3bff60a 1136 dev = udev_watch_lookup(manager->udev, e->wd);
60f067b4
JS
1137 if (!dev)
1138 continue;
663996b3 1139
f47781d8 1140 log_debug("inotify event: %x for %s", e->mask, udev_device_get_devnode(dev));
e3bff60a 1141 if (e->mask & IN_CLOSE_WRITE) {
60f067b4 1142 synthesize_change(dev);
663996b3 1143
e3bff60a
MP
1144 /* settle might be waiting on us to determine the queue
1145 * state. If we just handled an inotify event, we might have
1146 * generated a "change" event, but we won't have queued up
1147 * the resultant uevent yet. Do that.
1148 */
1149 on_uevent(NULL, -1, 0, manager);
1150 } else if (e->mask & IN_IGNORED)
1151 udev_watch_end(manager->udev, dev);
663996b3
MS
1152 }
1153
e3bff60a 1154 return 1;
663996b3
MS
1155}
1156
e3bff60a
MP
1157static int on_sigterm(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1158 Manager *manager = userdata;
663996b3 1159
e3bff60a 1160 assert(manager);
663996b3 1161
86f210e9 1162 manager_exit(manager);
663996b3 1163
e3bff60a
MP
1164 return 1;
1165}
663996b3 1166
e3bff60a
MP
1167static int on_sighup(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1168 Manager *manager = userdata;
60f067b4 1169
e3bff60a
MP
1170 assert(manager);
1171
86f210e9 1172 manager_reload(manager);
e3bff60a
MP
1173
1174 return 1;
1175}
1176
1177static int on_sigchld(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1178 Manager *manager = userdata;
1179
1180 assert(manager);
1181
1182 for (;;) {
1183 pid_t pid;
1184 int status;
1185 struct worker *worker;
1186
1187 pid = waitpid(-1, &status, WNOHANG);
1188 if (pid <= 0)
1189 break;
1190
db2df898 1191 worker = hashmap_get(manager->workers, PID_TO_PTR(pid));
e3bff60a
MP
1192 if (!worker) {
1193 log_warning("worker ["PID_FMT"] is unknown, ignoring", pid);
1194 continue;
1195 }
1196
1197 if (WIFEXITED(status)) {
1198 if (WEXITSTATUS(status) == 0)
1199 log_debug("worker ["PID_FMT"] exited", pid);
1200 else
1201 log_warning("worker ["PID_FMT"] exited with return code %i", pid, WEXITSTATUS(status));
1202 } else if (WIFSIGNALED(status)) {
2897b343 1203 log_warning("worker ["PID_FMT"] terminated by signal %i (%s)", pid, WTERMSIG(status), signal_to_string(WTERMSIG(status)));
e3bff60a
MP
1204 } else if (WIFSTOPPED(status)) {
1205 log_info("worker ["PID_FMT"] stopped", pid);
1206 continue;
1207 } else if (WIFCONTINUED(status)) {
1208 log_info("worker ["PID_FMT"] continued", pid);
1209 continue;
1210 } else
1211 log_warning("worker ["PID_FMT"] exit with status 0x%04x", pid, status);
1212
1213 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
1214 if (worker->event) {
1215 log_error("worker ["PID_FMT"] failed while handling '%s'", pid, worker->event->devpath);
1216 /* delete state from disk */
1217 udev_device_delete_db(worker->event->dev);
1218 udev_device_tag_index(worker->event->dev, NULL, false);
1219 /* forward kernel event without amending it */
1220 udev_monitor_send_device(manager->monitor, NULL, worker->event->dev_kernel);
663996b3
MS
1221 }
1222 }
e3bff60a
MP
1223
1224 worker_free(worker);
663996b3 1225 }
e3bff60a 1226
86f210e9
MP
1227 /* we can start new workers, try to schedule events */
1228 event_queue_start(manager);
1229
1230 return 1;
1231}
1232
1233static int on_post(sd_event_source *s, void *userdata) {
1234 Manager *manager = userdata;
1235 int r;
1236
1237 assert(manager);
1238
52ad194e 1239 if (LIST_IS_EMPTY(manager->events)) {
86f210e9
MP
1240 /* no pending events */
1241 if (!hashmap_isempty(manager->workers)) {
1242 /* there are idle workers */
1243 log_debug("cleanup idle workers");
1244 manager_kill_workers(manager);
1245 } else {
1246 /* we are idle */
1247 if (manager->exit) {
1248 r = sd_event_exit(manager->event, 0);
1249 if (r < 0)
1250 return r;
1251 } else if (manager->cgroup)
1252 /* cleanup possible left-over processes in our cgroup */
5a920b42 1253 cg_kill(SYSTEMD_CGROUP_CONTROLLER, manager->cgroup, SIGKILL, CGROUP_IGNORE_SELF, NULL, NULL, NULL);
86f210e9
MP
1254 }
1255 }
1256
e3bff60a 1257 return 1;
663996b3
MS
1258}
1259
86f210e9
MP
1260static int listen_fds(int *rctrl, int *rnetlink) {
1261 _cleanup_udev_unref_ struct udev *udev = NULL;
1262 int ctrl_fd = -1, netlink_fd = -1;
1263 int fd, n, r;
1264
1265 assert(rctrl);
1266 assert(rnetlink);
663996b3
MS
1267
1268 n = sd_listen_fds(true);
86f210e9
MP
1269 if (n < 0)
1270 return n;
663996b3
MS
1271
1272 for (fd = SD_LISTEN_FDS_START; fd < n + SD_LISTEN_FDS_START; fd++) {
1273 if (sd_is_socket(fd, AF_LOCAL, SOCK_SEQPACKET, -1)) {
86f210e9
MP
1274 if (ctrl_fd >= 0)
1275 return -EINVAL;
1276 ctrl_fd = fd;
663996b3
MS
1277 continue;
1278 }
1279
1280 if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1)) {
86f210e9
MP
1281 if (netlink_fd >= 0)
1282 return -EINVAL;
1283 netlink_fd = fd;
663996b3
MS
1284 continue;
1285 }
1286
86f210e9
MP
1287 return -EINVAL;
1288 }
1289
1290 if (ctrl_fd < 0) {
1291 _cleanup_udev_ctrl_unref_ struct udev_ctrl *ctrl = NULL;
1292
1293 udev = udev_new();
1294 if (!udev)
1295 return -ENOMEM;
1296
1297 ctrl = udev_ctrl_new(udev);
1298 if (!ctrl)
1299 return log_error_errno(EINVAL, "error initializing udev control socket");
1300
1301 r = udev_ctrl_enable_receiving(ctrl);
1302 if (r < 0)
1303 return log_error_errno(EINVAL, "error binding udev control socket");
1304
1305 fd = udev_ctrl_get_fd(ctrl);
1306 if (fd < 0)
1307 return log_error_errno(EIO, "could not get ctrl fd");
1308
1309 ctrl_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
1310 if (ctrl_fd < 0)
1311 return log_error_errno(errno, "could not dup ctrl fd: %m");
1312 }
1313
1314 if (netlink_fd < 0) {
1315 _cleanup_udev_monitor_unref_ struct udev_monitor *monitor = NULL;
1316
1317 if (!udev) {
1318 udev = udev_new();
1319 if (!udev)
1320 return -ENOMEM;
1321 }
1322
1323 monitor = udev_monitor_new_from_netlink(udev, "kernel");
1324 if (!monitor)
1325 return log_error_errno(EINVAL, "error initializing netlink socket");
1326
1327 (void) udev_monitor_set_receive_buffer_size(monitor, 128 * 1024 * 1024);
1328
1329 r = udev_monitor_enable_receiving(monitor);
1330 if (r < 0)
1331 return log_error_errno(EINVAL, "error binding netlink socket");
1332
1333 fd = udev_monitor_get_fd(monitor);
1334 if (fd < 0)
1335 return log_error_errno(netlink_fd, "could not get uevent fd: %m");
1336
1337 netlink_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2897b343 1338 if (netlink_fd < 0)
86f210e9 1339 return log_error_errno(errno, "could not dup netlink fd: %m");
663996b3
MS
1340 }
1341
86f210e9
MP
1342 *rctrl = ctrl_fd;
1343 *rnetlink = netlink_fd;
663996b3 1344
663996b3
MS
1345 return 0;
1346}
1347
1348/*
f47781d8 1349 * read the kernel command line, in case we need to get into debug mode
2897b343
MP
1350 * udev.log_priority=<level> syslog priority
1351 * udev.children_max=<number of workers> events are fully serialized if set to 1
1352 * udev.exec_delay=<number of seconds> delay execution of every executed program
1353 * udev.event_timeout=<number of seconds> seconds to wait before terminating an event
663996b3 1354 */
8a584da2
MP
1355static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1356 int r = 0;
663996b3 1357
e3bff60a 1358 assert(key);
663996b3 1359
e3bff60a
MP
1360 if (!value)
1361 return 0;
663996b3 1362
2897b343
MP
1363 if (proc_cmdline_key_streq(key, "udev.log_priority")) {
1364
1365 if (proc_cmdline_value_missing(key, value))
1366 return 0;
1367
8a584da2
MP
1368 r = util_log_priority(value);
1369 if (r >= 0)
1370 log_set_max_level(r);
2897b343
MP
1371
1372 } else if (proc_cmdline_key_streq(key, "udev.event_timeout")) {
1373
1374 if (proc_cmdline_value_missing(key, value))
1375 return 0;
1376
8a584da2
MP
1377 r = safe_atou64(value, &arg_event_timeout_usec);
1378 if (r >= 0) {
1379 arg_event_timeout_usec *= USEC_PER_SEC;
1380 arg_event_timeout_warn_usec = (arg_event_timeout_usec / 3) ? : 1;
1381 }
2897b343
MP
1382
1383 } else if (proc_cmdline_key_streq(key, "udev.children_max")) {
1384
1385 if (proc_cmdline_value_missing(key, value))
1386 return 0;
1387
e3bff60a 1388 r = safe_atou(value, &arg_children_max);
2897b343
MP
1389
1390 } else if (proc_cmdline_key_streq(key, "udev.exec_delay")) {
1391
1392 if (proc_cmdline_value_missing(key, value))
1393 return 0;
1394
e3bff60a 1395 r = safe_atoi(value, &arg_exec_delay);
2897b343
MP
1396
1397 } else if (startswith(key, "udev."))
8a584da2 1398 log_warning("Unknown udev kernel command line option \"%s\"", key);
e3bff60a 1399
8a584da2
MP
1400 if (r < 0)
1401 log_warning_errno(r, "Failed to parse \"%s=%s\", ignoring: %m", key, value);
2897b343 1402
e3bff60a 1403 return 0;
663996b3
MS
1404}
1405
5eef597e
MP
1406static void help(void) {
1407 printf("%s [OPTIONS...]\n\n"
1408 "Manages devices.\n\n"
e735f4d4 1409 " -h --help Print this message\n"
52ad194e
MB
1410 " -V --version Print version of the program\n"
1411 " -d --daemon Detach and run in the background\n"
1412 " -D --debug Enable debug output\n"
1413 " -c --children-max=INT Set maximum number of workers\n"
1414 " -e --exec-delay=SECONDS Seconds to wait before executing RUN=\n"
1415 " -t --event-timeout=SECONDS Seconds to wait before terminating an event\n"
1416 " -N --resolve-names=early|late|never\n"
e735f4d4 1417 " When to resolve users and groups\n"
5eef597e
MP
1418 , program_invocation_short_name);
1419}
1420
1421static int parse_argv(int argc, char *argv[]) {
663996b3 1422 static const struct option options[] = {
5eef597e
MP
1423 { "daemon", no_argument, NULL, 'd' },
1424 { "debug", no_argument, NULL, 'D' },
1425 { "children-max", required_argument, NULL, 'c' },
1426 { "exec-delay", required_argument, NULL, 'e' },
1427 { "event-timeout", required_argument, NULL, 't' },
1428 { "resolve-names", required_argument, NULL, 'N' },
1429 { "help", no_argument, NULL, 'h' },
1430 { "version", no_argument, NULL, 'V' },
663996b3
MS
1431 {}
1432 };
663996b3 1433
5eef597e 1434 int c;
663996b3 1435
5eef597e
MP
1436 assert(argc >= 0);
1437 assert(argv);
14228c0d 1438
7035cd9e 1439 while ((c = getopt_long(argc, argv, "c:de:Dt:N:hV", options, NULL)) >= 0) {
5eef597e 1440 int r;
14228c0d 1441
5eef597e 1442 switch (c) {
663996b3 1443
663996b3 1444 case 'd':
5eef597e 1445 arg_daemonize = true;
663996b3
MS
1446 break;
1447 case 'c':
e3bff60a 1448 r = safe_atou(optarg, &arg_children_max);
5eef597e
MP
1449 if (r < 0)
1450 log_warning("Invalid --children-max ignored: %s", optarg);
663996b3
MS
1451 break;
1452 case 'e':
5eef597e
MP
1453 r = safe_atoi(optarg, &arg_exec_delay);
1454 if (r < 0)
1455 log_warning("Invalid --exec-delay ignored: %s", optarg);
1456 break;
1457 case 't':
1458 r = safe_atou64(optarg, &arg_event_timeout_usec);
1459 if (r < 0)
1460 log_warning("Invalid --event-timeout ignored: %s", optarg);
1461 else {
1462 arg_event_timeout_usec *= USEC_PER_SEC;
1463 arg_event_timeout_warn_usec = (arg_event_timeout_usec / 3) ? : 1;
1464 }
663996b3
MS
1465 break;
1466 case 'D':
5eef597e 1467 arg_debug = true;
663996b3
MS
1468 break;
1469 case 'N':
1470 if (streq(optarg, "early")) {
5eef597e 1471 arg_resolve_names = 1;
663996b3 1472 } else if (streq(optarg, "late")) {
5eef597e 1473 arg_resolve_names = 0;
663996b3 1474 } else if (streq(optarg, "never")) {
5eef597e 1475 arg_resolve_names = -1;
663996b3 1476 } else {
60f067b4 1477 log_error("resolve-names must be early, late or never");
5eef597e 1478 return 0;
663996b3
MS
1479 }
1480 break;
1481 case 'h':
5eef597e
MP
1482 help();
1483 return 0;
663996b3 1484 case 'V':
81c58355 1485 printf("%s\n", PACKAGE_VERSION);
5eef597e
MP
1486 return 0;
1487 case '?':
1488 return -EINVAL;
663996b3 1489 default:
5eef597e
MP
1490 assert_not_reached("Unhandled option");
1491
663996b3
MS
1492 }
1493 }
1494
5eef597e
MP
1495 return 1;
1496}
1497
86f210e9 1498static int manager_new(Manager **ret, int fd_ctrl, int fd_uevent, const char *cgroup) {
e3bff60a 1499 _cleanup_(manager_freep) Manager *manager = NULL;
86f210e9 1500 int r, fd_worker, one = 1;
5eef597e 1501
e3bff60a 1502 assert(ret);
86f210e9
MP
1503 assert(fd_ctrl >= 0);
1504 assert(fd_uevent >= 0);
e3bff60a
MP
1505
1506 manager = new0(Manager, 1);
1507 if (!manager)
1508 return log_oom();
1509
e3bff60a
MP
1510 manager->fd_inotify = -1;
1511 manager->worker_watch[WRITE_END] = -1;
1512 manager->worker_watch[READ_END] = -1;
1513
1514 manager->udev = udev_new();
1515 if (!manager->udev)
1516 return log_error_errno(errno, "could not allocate udev context: %m");
1517
1518 udev_builtin_init(manager->udev);
1519
1520 manager->rules = udev_rules_new(manager->udev, arg_resolve_names);
1521 if (!manager->rules)
1522 return log_error_errno(ENOMEM, "error reading rules");
1523
52ad194e 1524 LIST_HEAD_INIT(manager->events);
e3bff60a
MP
1525 udev_list_init(manager->udev, &manager->properties, true);
1526
86f210e9 1527 manager->cgroup = cgroup;
e3bff60a 1528
86f210e9
MP
1529 manager->ctrl = udev_ctrl_new_from_fd(manager->udev, fd_ctrl);
1530 if (!manager->ctrl)
1531 return log_error_errno(EINVAL, "error taking over udev control socket");
e3bff60a 1532
86f210e9
MP
1533 manager->monitor = udev_monitor_new_from_netlink_fd(manager->udev, "kernel", fd_uevent);
1534 if (!manager->monitor)
1535 return log_error_errno(EINVAL, "error taking over netlink socket");
e3bff60a
MP
1536
1537 /* unnamed socket from workers to the main daemon */
1538 r = socketpair(AF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0, manager->worker_watch);
1539 if (r < 0)
1540 return log_error_errno(errno, "error creating socketpair: %m");
1541
86f210e9 1542 fd_worker = manager->worker_watch[READ_END];
e3bff60a 1543
86f210e9 1544 r = setsockopt(fd_worker, SOL_SOCKET, SO_PASSCRED, &one, sizeof(one));
e3bff60a
MP
1545 if (r < 0)
1546 return log_error_errno(errno, "could not enable SO_PASSCRED: %m");
1547
1548 manager->fd_inotify = udev_watch_init(manager->udev);
1549 if (manager->fd_inotify < 0)
1550 return log_error_errno(ENOMEM, "error initializing inotify");
1551
1552 udev_watch_restore(manager->udev);
1553
1554 /* block and listen to all signals on signalfd */
86f210e9
MP
1555 assert_se(sigprocmask_many(SIG_BLOCK, NULL, SIGTERM, SIGINT, SIGHUP, SIGCHLD, -1) >= 0);
1556
1557 r = sd_event_default(&manager->event);
1558 if (r < 0)
db2df898 1559 return log_error_errno(r, "could not allocate event loop: %m");
86f210e9
MP
1560
1561 r = sd_event_add_signal(manager->event, NULL, SIGINT, on_sigterm, manager);
1562 if (r < 0)
1563 return log_error_errno(r, "error creating sigint event source: %m");
1564
1565 r = sd_event_add_signal(manager->event, NULL, SIGTERM, on_sigterm, manager);
1566 if (r < 0)
1567 return log_error_errno(r, "error creating sigterm event source: %m");
1568
1569 r = sd_event_add_signal(manager->event, NULL, SIGHUP, on_sighup, manager);
1570 if (r < 0)
1571 return log_error_errno(r, "error creating sighup event source: %m");
1572
1573 r = sd_event_add_signal(manager->event, NULL, SIGCHLD, on_sigchld, manager);
1574 if (r < 0)
1575 return log_error_errno(r, "error creating sigchld event source: %m");
1576
1577 r = sd_event_set_watchdog(manager->event, true);
1578 if (r < 0)
1579 return log_error_errno(r, "error creating watchdog event source: %m");
1580
1581 r = sd_event_add_io(manager->event, &manager->ctrl_event, fd_ctrl, EPOLLIN, on_ctrl_msg, manager);
1582 if (r < 0)
1583 return log_error_errno(r, "error creating ctrl event source: %m");
1584
1585 /* This needs to be after the inotify and uevent handling, to make sure
1586 * that the ping is send back after fully processing the pending uevents
1587 * (including the synthetic ones we may create due to inotify events).
1588 */
1589 r = sd_event_source_set_priority(manager->ctrl_event, SD_EVENT_PRIORITY_IDLE);
1590 if (r < 0)
1591 return log_error_errno(r, "cold not set IDLE event priority for ctrl event source: %m");
1592
1593 r = sd_event_add_io(manager->event, &manager->inotify_event, manager->fd_inotify, EPOLLIN, on_inotify, manager);
1594 if (r < 0)
1595 return log_error_errno(r, "error creating inotify event source: %m");
1596
1597 r = sd_event_add_io(manager->event, &manager->uevent_event, fd_uevent, EPOLLIN, on_uevent, manager);
1598 if (r < 0)
1599 return log_error_errno(r, "error creating uevent event source: %m");
1600
1601 r = sd_event_add_io(manager->event, NULL, fd_worker, EPOLLIN, on_worker, manager);
1602 if (r < 0)
1603 return log_error_errno(r, "error creating worker event source: %m");
1604
1605 r = sd_event_add_post(manager->event, NULL, on_post, manager);
1606 if (r < 0)
1607 return log_error_errno(r, "error creating post event source: %m");
e3bff60a
MP
1608
1609 *ret = manager;
1610 manager = NULL;
1611
86f210e9 1612 return 0;
e3bff60a
MP
1613}
1614
fb183854 1615static int run(int fd_ctrl, int fd_uevent, const char *cgroup) {
e3bff60a 1616 _cleanup_(manager_freep) Manager *manager = NULL;
fb183854
MP
1617 int r;
1618
1619 r = manager_new(&manager, fd_ctrl, fd_uevent, cgroup);
1620 if (r < 0) {
1621 r = log_error_errno(r, "failed to allocate manager object: %m");
1622 goto exit;
1623 }
1624
1625 r = udev_rules_apply_static_dev_perms(manager->rules);
1626 if (r < 0)
1627 log_error_errno(r, "failed to apply permissions on static device nodes: %m");
1628
8a584da2
MP
1629 (void) sd_notifyf(false,
1630 "READY=1\n"
1631 "STATUS=Processing with %u children at max", arg_children_max);
fb183854
MP
1632
1633 r = sd_event_loop(manager->event);
1634 if (r < 0) {
1635 log_error_errno(r, "event loop failed: %m");
1636 goto exit;
1637 }
1638
1639 sd_event_get_exit_code(manager->event, &r);
1640
1641exit:
1642 sd_notify(false,
1643 "STOPPING=1\n"
1644 "STATUS=Shutting down...");
1645 if (manager)
1646 udev_ctrl_cleanup(manager->ctrl);
1647 return r;
1648}
1649
1650int main(int argc, char *argv[]) {
86f210e9 1651 _cleanup_free_ char *cgroup = NULL;
4c89c718
MP
1652 int fd_ctrl = -1, fd_uevent = -1;
1653 int r;
5eef597e
MP
1654
1655 log_set_target(LOG_TARGET_AUTO);
81c58355 1656 udev_parse_config();
5eef597e
MP
1657 log_parse_environment();
1658 log_open();
1659
5eef597e
MP
1660 r = parse_argv(argc, argv);
1661 if (r <= 0)
1662 goto exit;
1663
2897b343 1664 r = proc_cmdline_parse(parse_proc_cmdline_item, NULL, PROC_CMDLINE_STRIP_RD_PREFIX);
e3bff60a
MP
1665 if (r < 0)
1666 log_warning_errno(r, "failed to parse kernel command line, ignoring: %m");
663996b3 1667
fb183854
MP
1668 if (arg_debug) {
1669 log_set_target(LOG_TARGET_CONSOLE);
5eef597e 1670 log_set_max_level(LOG_DEBUG);
fb183854 1671 }
5eef597e 1672
52ad194e
MB
1673 r = must_be_root();
1674 if (r < 0)
5eef597e 1675 goto exit;
5eef597e 1676
e3bff60a
MP
1677 if (arg_children_max == 0) {
1678 cpu_set_t cpu_set;
5eef597e 1679
e3bff60a 1680 arg_children_max = 8;
663996b3 1681
6300502b 1682 if (sched_getaffinity(0, sizeof(cpu_set), &cpu_set) == 0)
86f210e9 1683 arg_children_max += CPU_COUNT(&cpu_set) * 2;
f47781d8 1684
e3bff60a 1685 log_debug("set children_max to %u", arg_children_max);
5eef597e 1686 }
663996b3 1687
e3bff60a
MP
1688 /* set umask before creating any file/directory */
1689 r = chdir("/");
1690 if (r < 0) {
1691 r = log_error_errno(errno, "could not change dir to /: %m");
1692 goto exit;
663996b3
MS
1693 }
1694
e3bff60a
MP
1695 umask(022);
1696
aa27b158 1697 r = mac_selinux_init();
e3bff60a
MP
1698 if (r < 0) {
1699 log_error_errno(r, "could not initialize labelling: %m");
663996b3
MS
1700 goto exit;
1701 }
1702
e3bff60a
MP
1703 r = mkdir("/run/udev", 0755);
1704 if (r < 0 && errno != EEXIST) {
1705 r = log_error_errno(errno, "could not create /run/udev: %m");
663996b3
MS
1706 goto exit;
1707 }
1708
e3bff60a 1709 dev_setup(NULL, UID_INVALID, GID_INVALID);
5eef597e 1710
86f210e9
MP
1711 if (getppid() == 1) {
1712 /* get our own cgroup, we regularly kill everything udev has left behind
1713 we only do this on systemd systems, and only if we are directly spawned
1714 by PID1. otherwise we are not guaranteed to have a dedicated cgroup */
1715 r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &cgroup);
fb183854 1716 if (r < 0) {
52ad194e 1717 if (IN_SET(r, -ENOENT, -ENOMEDIUM))
fb183854
MP
1718 log_debug_errno(r, "did not find dedicated cgroup: %m");
1719 else
1720 log_warning_errno(r, "failed to get cgroup: %m");
1721 }
86f210e9 1722 }
e3bff60a 1723
86f210e9
MP
1724 r = listen_fds(&fd_ctrl, &fd_uevent);
1725 if (r < 0) {
1726 r = log_error_errno(r, "could not listen on fds: %m");
1727 goto exit;
1728 }
5eef597e
MP
1729
1730 if (arg_daemonize) {
663996b3
MS
1731 pid_t pid;
1732
81c58355 1733 log_info("starting version " PACKAGE_VERSION);
86f210e9
MP
1734
1735 /* connect /dev/null to stdin, stdout, stderr */
8a584da2
MP
1736 if (log_get_max_level() < LOG_DEBUG) {
1737 r = make_null_stdio();
1738 if (r < 0)
1739 log_warning_errno(r, "Failed to redirect standard streams to /dev/null: %m");
1740 }
1741
1742
86f210e9 1743
663996b3
MS
1744 pid = fork();
1745 switch (pid) {
1746 case 0:
1747 break;
1748 case -1:
e3bff60a 1749 r = log_error_errno(errno, "fork of daemon failed: %m");
663996b3
MS
1750 goto exit;
1751 default:
e3bff60a
MP
1752 mac_selinux_finish();
1753 log_close();
1754 _exit(EXIT_SUCCESS);
663996b3
MS
1755 }
1756
1757 setsid();
1758
7035cd9e 1759 write_string_file("/proc/self/oom_score_adj", "-1000", 0);
86f210e9 1760 }
663996b3 1761
fb183854 1762 r = run(fd_ctrl, fd_uevent, cgroup);
86f210e9 1763
663996b3 1764exit:
5eef597e 1765 mac_selinux_finish();
663996b3 1766 log_close();
e3bff60a 1767 return r < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
663996b3 1768}