]> git.proxmox.com Git - systemd.git/blame - src/libsystemd/sd-event/sd-event.c
Imported Upstream version 231
[systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
60f067b4
JS
1/***
2 This file is part of systemd.
3
4 Copyright 2013 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
20#include <sys/epoll.h>
21#include <sys/timerfd.h>
22#include <sys/wait.h>
60f067b4 23
60f067b4 24#include "sd-daemon.h"
db2df898
MP
25#include "sd-event.h"
26#include "sd-id128.h"
27
28#include "alloc-util.h"
29#include "fd-util.h"
60f067b4 30#include "hashmap.h"
db2df898
MP
31#include "list.h"
32#include "macro.h"
60f067b4 33#include "missing.h"
db2df898
MP
34#include "prioq.h"
35#include "process-util.h"
60f067b4 36#include "set.h"
86f210e9 37#include "signal-util.h"
4c89c718 38#include "string-table.h"
db2df898
MP
39#include "string-util.h"
40#include "time-util.h"
41#include "util.h"
60f067b4 42
60f067b4
JS
43#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
44
45typedef enum EventSourceType {
46 SOURCE_IO,
47 SOURCE_TIME_REALTIME,
5eef597e 48 SOURCE_TIME_BOOTTIME,
60f067b4
JS
49 SOURCE_TIME_MONOTONIC,
50 SOURCE_TIME_REALTIME_ALARM,
51 SOURCE_TIME_BOOTTIME_ALARM,
52 SOURCE_SIGNAL,
53 SOURCE_CHILD,
54 SOURCE_DEFER,
55 SOURCE_POST,
56 SOURCE_EXIT,
57 SOURCE_WATCHDOG,
58 _SOURCE_EVENT_SOURCE_TYPE_MAX,
59 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
60} EventSourceType;
61
4c89c718
MP
62static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
63 [SOURCE_IO] = "io",
64 [SOURCE_TIME_REALTIME] = "realtime",
65 [SOURCE_TIME_BOOTTIME] = "bootime",
66 [SOURCE_TIME_MONOTONIC] = "monotonic",
67 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
68 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
69 [SOURCE_SIGNAL] = "signal",
70 [SOURCE_CHILD] = "child",
71 [SOURCE_DEFER] = "defer",
72 [SOURCE_POST] = "post",
73 [SOURCE_EXIT] = "exit",
74 [SOURCE_WATCHDOG] = "watchdog",
75};
76
77DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
78
d9dfd233
MP
79/* All objects we use in epoll events start with this value, so that
80 * we know how to dispatch it */
81typedef enum WakeupType {
82 WAKEUP_NONE,
83 WAKEUP_EVENT_SOURCE,
84 WAKEUP_CLOCK_DATA,
85 WAKEUP_SIGNAL_DATA,
86 _WAKEUP_TYPE_MAX,
87 _WAKEUP_TYPE_INVALID = -1,
88} WakeupType;
89
5eef597e 90#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
60f067b4
JS
91
92struct sd_event_source {
d9dfd233
MP
93 WakeupType wakeup;
94
60f067b4
JS
95 unsigned n_ref;
96
97 sd_event *event;
98 void *userdata;
99 sd_event_handler_t prepare;
100
f47781d8 101 char *description;
5eef597e 102
60f067b4
JS
103 EventSourceType type:5;
104 int enabled:3;
105 bool pending:1;
106 bool dispatching:1;
107 bool floating:1;
108
109 int64_t priority;
110 unsigned pending_index;
111 unsigned prepare_index;
5a920b42
MP
112 uint64_t pending_iteration;
113 uint64_t prepare_iteration;
60f067b4
JS
114
115 LIST_FIELDS(sd_event_source, sources);
116
117 union {
118 struct {
119 sd_event_io_handler_t callback;
120 int fd;
121 uint32_t events;
122 uint32_t revents;
123 bool registered:1;
124 } io;
125 struct {
126 sd_event_time_handler_t callback;
127 usec_t next, accuracy;
128 unsigned earliest_index;
129 unsigned latest_index;
130 } time;
131 struct {
132 sd_event_signal_handler_t callback;
133 struct signalfd_siginfo siginfo;
134 int sig;
135 } signal;
136 struct {
137 sd_event_child_handler_t callback;
138 siginfo_t siginfo;
139 pid_t pid;
140 int options;
141 } child;
142 struct {
143 sd_event_handler_t callback;
144 } defer;
145 struct {
146 sd_event_handler_t callback;
147 } post;
148 struct {
149 sd_event_handler_t callback;
150 unsigned prioq_index;
151 } exit;
152 };
153};
154
155struct clock_data {
d9dfd233 156 WakeupType wakeup;
60f067b4
JS
157 int fd;
158
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
164 * wakeups in */
165
166 Prioq *earliest;
167 Prioq *latest;
168 usec_t next;
5eef597e
MP
169
170 bool needs_rearm:1;
60f067b4
JS
171};
172
d9dfd233
MP
173struct signal_data {
174 WakeupType wakeup;
175
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
178 * time. */
179
180 int fd;
181 int64_t priority;
182 sigset_t sigset;
183 sd_event_source *current;
184};
185
60f067b4
JS
186struct sd_event {
187 unsigned n_ref;
188
189 int epoll_fd;
60f067b4
JS
190 int watchdog_fd;
191
192 Prioq *pending;
193 Prioq *prepare;
194
5eef597e 195 /* timerfd_create() only supports these five clocks so far. We
60f067b4
JS
196 * can add support for more clocks when the kernel learns to
197 * deal with them, too. */
198 struct clock_data realtime;
5eef597e 199 struct clock_data boottime;
60f067b4
JS
200 struct clock_data monotonic;
201 struct clock_data realtime_alarm;
202 struct clock_data boottime_alarm;
203
204 usec_t perturb;
205
d9dfd233
MP
206 sd_event_source **signal_sources; /* indexed by signal number */
207 Hashmap *signal_data; /* indexed by priority */
60f067b4
JS
208
209 Hashmap *child_sources;
210 unsigned n_enabled_child_sources;
211
212 Set *post_sources;
213
214 Prioq *exit;
215
216 pid_t original_pid;
217
5a920b42
MP
218 uint64_t iteration;
219 triple_timestamp timestamp;
60f067b4
JS
220 int state;
221
222 bool exit_requested:1;
223 bool need_process_child:1;
224 bool watchdog:1;
4c89c718 225 bool profile_delays:1;
60f067b4
JS
226
227 int exit_code;
228
229 pid_t tid;
230 sd_event **default_event_ptr;
231
232 usec_t watchdog_last, watchdog_period;
233
234 unsigned n_sources;
235
236 LIST_HEAD(sd_event_source, sources);
4c89c718
MP
237
238 usec_t last_run, last_log;
239 unsigned delays[sizeof(usec_t) * 8];
60f067b4
JS
240};
241
242static void source_disconnect(sd_event_source *s);
243
244static int pending_prioq_compare(const void *a, const void *b) {
245 const sd_event_source *x = a, *y = b;
246
247 assert(x->pending);
248 assert(y->pending);
249
250 /* Enabled ones first */
251 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
252 return -1;
253 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
254 return 1;
255
256 /* Lower priority values first */
257 if (x->priority < y->priority)
258 return -1;
259 if (x->priority > y->priority)
260 return 1;
261
262 /* Older entries first */
263 if (x->pending_iteration < y->pending_iteration)
264 return -1;
265 if (x->pending_iteration > y->pending_iteration)
266 return 1;
267
60f067b4
JS
268 return 0;
269}
270
271static int prepare_prioq_compare(const void *a, const void *b) {
272 const sd_event_source *x = a, *y = b;
273
274 assert(x->prepare);
275 assert(y->prepare);
276
6300502b
MP
277 /* Enabled ones first */
278 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
279 return -1;
280 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
281 return 1;
282
60f067b4
JS
283 /* Move most recently prepared ones last, so that we can stop
284 * preparing as soon as we hit one that has already been
285 * prepared in the current iteration */
286 if (x->prepare_iteration < y->prepare_iteration)
287 return -1;
288 if (x->prepare_iteration > y->prepare_iteration)
289 return 1;
290
60f067b4
JS
291 /* Lower priority values first */
292 if (x->priority < y->priority)
293 return -1;
294 if (x->priority > y->priority)
295 return 1;
296
60f067b4
JS
297 return 0;
298}
299
300static int earliest_time_prioq_compare(const void *a, const void *b) {
301 const sd_event_source *x = a, *y = b;
302
303 assert(EVENT_SOURCE_IS_TIME(x->type));
304 assert(x->type == y->type);
305
306 /* Enabled ones first */
307 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
308 return -1;
309 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
310 return 1;
311
312 /* Move the pending ones to the end */
313 if (!x->pending && y->pending)
314 return -1;
315 if (x->pending && !y->pending)
316 return 1;
317
318 /* Order by time */
319 if (x->time.next < y->time.next)
320 return -1;
321 if (x->time.next > y->time.next)
322 return 1;
323
60f067b4
JS
324 return 0;
325}
326
4c89c718
MP
327static usec_t time_event_source_latest(const sd_event_source *s) {
328 return usec_add(s->time.next, s->time.accuracy);
329}
330
60f067b4
JS
331static int latest_time_prioq_compare(const void *a, const void *b) {
332 const sd_event_source *x = a, *y = b;
333
334 assert(EVENT_SOURCE_IS_TIME(x->type));
335 assert(x->type == y->type);
336
337 /* Enabled ones first */
338 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
339 return -1;
340 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
341 return 1;
342
343 /* Move the pending ones to the end */
344 if (!x->pending && y->pending)
345 return -1;
346 if (x->pending && !y->pending)
347 return 1;
348
349 /* Order by time */
4c89c718 350 if (time_event_source_latest(x) < time_event_source_latest(y))
60f067b4 351 return -1;
4c89c718 352 if (time_event_source_latest(x) > time_event_source_latest(y))
60f067b4
JS
353 return 1;
354
60f067b4
JS
355 return 0;
356}
357
358static int exit_prioq_compare(const void *a, const void *b) {
359 const sd_event_source *x = a, *y = b;
360
361 assert(x->type == SOURCE_EXIT);
362 assert(y->type == SOURCE_EXIT);
363
364 /* Enabled ones first */
365 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
366 return -1;
367 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
368 return 1;
369
370 /* Lower priority values first */
371 if (x->priority < y->priority)
372 return -1;
373 if (x->priority > y->priority)
374 return 1;
375
60f067b4
JS
376 return 0;
377}
378
379static void free_clock_data(struct clock_data *d) {
380 assert(d);
d9dfd233 381 assert(d->wakeup == WAKEUP_CLOCK_DATA);
60f067b4
JS
382
383 safe_close(d->fd);
384 prioq_free(d->earliest);
385 prioq_free(d->latest);
386}
387
388static void event_free(sd_event *e) {
389 sd_event_source *s;
390
391 assert(e);
392
393 while ((s = e->sources)) {
394 assert(s->floating);
395 source_disconnect(s);
396 sd_event_source_unref(s);
397 }
398
399 assert(e->n_sources == 0);
400
401 if (e->default_event_ptr)
402 *(e->default_event_ptr) = NULL;
403
404 safe_close(e->epoll_fd);
60f067b4
JS
405 safe_close(e->watchdog_fd);
406
407 free_clock_data(&e->realtime);
5eef597e 408 free_clock_data(&e->boottime);
60f067b4
JS
409 free_clock_data(&e->monotonic);
410 free_clock_data(&e->realtime_alarm);
411 free_clock_data(&e->boottime_alarm);
412
413 prioq_free(e->pending);
414 prioq_free(e->prepare);
415 prioq_free(e->exit);
416
417 free(e->signal_sources);
d9dfd233 418 hashmap_free(e->signal_data);
60f067b4
JS
419
420 hashmap_free(e->child_sources);
421 set_free(e->post_sources);
422 free(e);
423}
424
425_public_ int sd_event_new(sd_event** ret) {
426 sd_event *e;
427 int r;
428
429 assert_return(ret, -EINVAL);
430
431 e = new0(sd_event, 1);
432 if (!e)
433 return -ENOMEM;
434
435 e->n_ref = 1;
d9dfd233 436 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
5eef597e 437 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
d9dfd233 438 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
60f067b4 439 e->original_pid = getpid();
5eef597e 440 e->perturb = USEC_INFINITY;
60f067b4 441
4c89c718
MP
442 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
443 if (r < 0)
60f067b4 444 goto fail;
60f067b4
JS
445
446 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
447 if (e->epoll_fd < 0) {
448 r = -errno;
449 goto fail;
450 }
451
4c89c718
MP
452 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
453 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
454 e->profile_delays = true;
455 }
456
60f067b4
JS
457 *ret = e;
458 return 0;
459
460fail:
461 event_free(e);
462 return r;
463}
464
465_public_ sd_event* sd_event_ref(sd_event *e) {
4c89c718
MP
466
467 if (!e)
468 return NULL;
60f067b4
JS
469
470 assert(e->n_ref >= 1);
471 e->n_ref++;
472
473 return e;
474}
475
476_public_ sd_event* sd_event_unref(sd_event *e) {
477
478 if (!e)
479 return NULL;
480
481 assert(e->n_ref >= 1);
482 e->n_ref--;
483
484 if (e->n_ref <= 0)
485 event_free(e);
486
487 return NULL;
488}
489
490static bool event_pid_changed(sd_event *e) {
491 assert(e);
492
e3bff60a 493 /* We don't support people creating an event loop and keeping
60f067b4
JS
494 * it around over a fork(). Let's complain. */
495
496 return e->original_pid != getpid();
497}
498
86f210e9 499static void source_io_unregister(sd_event_source *s) {
60f067b4
JS
500 int r;
501
502 assert(s);
503 assert(s->type == SOURCE_IO);
504
86f210e9
MP
505 if (event_pid_changed(s->event))
506 return;
507
60f067b4 508 if (!s->io.registered)
86f210e9 509 return;
60f067b4
JS
510
511 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
d9dfd233 512 if (r < 0)
4c89c718
MP
513 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
514 strna(s->description), event_source_type_to_string(s->type));
60f067b4
JS
515
516 s->io.registered = false;
60f067b4
JS
517}
518
519static int source_io_register(
520 sd_event_source *s,
521 int enabled,
522 uint32_t events) {
523
524 struct epoll_event ev = {};
525 int r;
526
527 assert(s);
528 assert(s->type == SOURCE_IO);
529 assert(enabled != SD_EVENT_OFF);
530
531 ev.events = events;
532 ev.data.ptr = s;
533
534 if (enabled == SD_EVENT_ONESHOT)
535 ev.events |= EPOLLONESHOT;
536
537 if (s->io.registered)
538 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
539 else
540 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
60f067b4
JS
541 if (r < 0)
542 return -errno;
543
544 s->io.registered = true;
545
546 return 0;
547}
548
549static clockid_t event_source_type_to_clock(EventSourceType t) {
550
551 switch (t) {
552
553 case SOURCE_TIME_REALTIME:
554 return CLOCK_REALTIME;
555
5eef597e
MP
556 case SOURCE_TIME_BOOTTIME:
557 return CLOCK_BOOTTIME;
558
60f067b4
JS
559 case SOURCE_TIME_MONOTONIC:
560 return CLOCK_MONOTONIC;
561
562 case SOURCE_TIME_REALTIME_ALARM:
563 return CLOCK_REALTIME_ALARM;
564
565 case SOURCE_TIME_BOOTTIME_ALARM:
566 return CLOCK_BOOTTIME_ALARM;
567
568 default:
569 return (clockid_t) -1;
570 }
571}
572
573static EventSourceType clock_to_event_source_type(clockid_t clock) {
574
575 switch (clock) {
576
577 case CLOCK_REALTIME:
578 return SOURCE_TIME_REALTIME;
579
5eef597e
MP
580 case CLOCK_BOOTTIME:
581 return SOURCE_TIME_BOOTTIME;
582
60f067b4
JS
583 case CLOCK_MONOTONIC:
584 return SOURCE_TIME_MONOTONIC;
585
586 case CLOCK_REALTIME_ALARM:
587 return SOURCE_TIME_REALTIME_ALARM;
588
589 case CLOCK_BOOTTIME_ALARM:
590 return SOURCE_TIME_BOOTTIME_ALARM;
591
592 default:
593 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
594 }
595}
596
597static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
598 assert(e);
599
600 switch (t) {
601
602 case SOURCE_TIME_REALTIME:
603 return &e->realtime;
604
5eef597e
MP
605 case SOURCE_TIME_BOOTTIME:
606 return &e->boottime;
607
60f067b4
JS
608 case SOURCE_TIME_MONOTONIC:
609 return &e->monotonic;
610
611 case SOURCE_TIME_REALTIME_ALARM:
612 return &e->realtime_alarm;
613
614 case SOURCE_TIME_BOOTTIME_ALARM:
615 return &e->boottime_alarm;
616
617 default:
618 return NULL;
619 }
620}
621
d9dfd233
MP
622static int event_make_signal_data(
623 sd_event *e,
624 int sig,
625 struct signal_data **ret) {
5eef597e 626
5eef597e 627 struct epoll_event ev = {};
d9dfd233
MP
628 struct signal_data *d;
629 bool added = false;
630 sigset_t ss_copy;
631 int64_t priority;
5eef597e
MP
632 int r;
633
634 assert(e);
635
86f210e9 636 if (event_pid_changed(e))
d9dfd233 637 return -ECHILD;
86f210e9 638
d9dfd233
MP
639 if (e->signal_sources && e->signal_sources[sig])
640 priority = e->signal_sources[sig]->priority;
641 else
642 priority = 0;
5eef597e 643
d9dfd233
MP
644 d = hashmap_get(e->signal_data, &priority);
645 if (d) {
646 if (sigismember(&d->sigset, sig) > 0) {
647 if (ret)
648 *ret = d;
649 return 0;
650 }
651 } else {
652 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
653 if (r < 0)
654 return r;
655
656 d = new0(struct signal_data, 1);
657 if (!d)
658 return -ENOMEM;
659
660 d->wakeup = WAKEUP_SIGNAL_DATA;
661 d->fd = -1;
662 d->priority = priority;
663
664 r = hashmap_put(e->signal_data, &d->priority, d);
4c89c718
MP
665 if (r < 0) {
666 free(d);
d9dfd233 667 return r;
4c89c718 668 }
d9dfd233
MP
669
670 added = true;
671 }
672
673 ss_copy = d->sigset;
674 assert_se(sigaddset(&ss_copy, sig) >= 0);
675
676 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
677 if (r < 0) {
678 r = -errno;
679 goto fail;
680 }
5eef597e 681
d9dfd233 682 d->sigset = ss_copy;
5eef597e 683
d9dfd233
MP
684 if (d->fd >= 0) {
685 if (ret)
686 *ret = d;
5eef597e 687 return 0;
d9dfd233
MP
688 }
689
690 d->fd = r;
5eef597e
MP
691
692 ev.events = EPOLLIN;
d9dfd233 693 ev.data.ptr = d;
5eef597e 694
d9dfd233
MP
695 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
696 if (r < 0) {
697 r = -errno;
698 goto fail;
5eef597e
MP
699 }
700
d9dfd233
MP
701 if (ret)
702 *ret = d;
703
5eef597e 704 return 0;
d9dfd233
MP
705
706fail:
707 if (added) {
708 d->fd = safe_close(d->fd);
709 hashmap_remove(e->signal_data, &d->priority);
710 free(d);
711 }
712
713 return r;
714}
715
716static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
717 assert(e);
718 assert(d);
719
720 /* Turns off the specified signal in the signal data
721 * object. If the signal mask of the object becomes empty that
722 * way removes it. */
723
724 if (sigismember(&d->sigset, sig) == 0)
725 return;
726
727 assert_se(sigdelset(&d->sigset, sig) >= 0);
728
729 if (sigisemptyset(&d->sigset)) {
730
731 /* If all the mask is all-zero we can get rid of the structure */
732 hashmap_remove(e->signal_data, &d->priority);
733 assert(!d->current);
734 safe_close(d->fd);
735 free(d);
736 return;
737 }
738
739 assert(d->fd >= 0);
740
741 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
742 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
743}
744
745static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
746 struct signal_data *d;
747 static const int64_t zero_priority = 0;
748
749 assert(e);
750
751 /* Rechecks if the specified signal is still something we are
752 * interested in. If not, we'll unmask it, and possibly drop
753 * the signalfd for it. */
754
755 if (sig == SIGCHLD &&
756 e->n_enabled_child_sources > 0)
757 return;
758
759 if (e->signal_sources &&
760 e->signal_sources[sig] &&
761 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
762 return;
763
764 /*
765 * The specified signal might be enabled in three different queues:
766 *
767 * 1) the one that belongs to the priority passed (if it is non-NULL)
768 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
769 * 3) the 0 priority (to cover the SIGCHLD case)
770 *
771 * Hence, let's remove it from all three here.
772 */
773
774 if (priority) {
775 d = hashmap_get(e->signal_data, priority);
776 if (d)
777 event_unmask_signal_data(e, d, sig);
778 }
779
780 if (e->signal_sources && e->signal_sources[sig]) {
781 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
782 if (d)
783 event_unmask_signal_data(e, d, sig);
784 }
785
786 d = hashmap_get(e->signal_data, &zero_priority);
787 if (d)
788 event_unmask_signal_data(e, d, sig);
5eef597e
MP
789}
790
60f067b4
JS
791static void source_disconnect(sd_event_source *s) {
792 sd_event *event;
793
794 assert(s);
795
796 if (!s->event)
797 return;
798
799 assert(s->event->n_sources > 0);
800
801 switch (s->type) {
802
803 case SOURCE_IO:
804 if (s->io.fd >= 0)
805 source_io_unregister(s);
806
807 break;
808
809 case SOURCE_TIME_REALTIME:
5eef597e 810 case SOURCE_TIME_BOOTTIME:
60f067b4
JS
811 case SOURCE_TIME_MONOTONIC:
812 case SOURCE_TIME_REALTIME_ALARM:
813 case SOURCE_TIME_BOOTTIME_ALARM: {
814 struct clock_data *d;
815
816 d = event_get_clock_data(s->event, s->type);
817 assert(d);
818
819 prioq_remove(d->earliest, s, &s->time.earliest_index);
820 prioq_remove(d->latest, s, &s->time.latest_index);
5eef597e 821 d->needs_rearm = true;
60f067b4
JS
822 break;
823 }
824
825 case SOURCE_SIGNAL:
826 if (s->signal.sig > 0) {
d9dfd233 827
60f067b4
JS
828 if (s->event->signal_sources)
829 s->event->signal_sources[s->signal.sig] = NULL;
5eef597e 830
d9dfd233 831 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
60f067b4
JS
832 }
833
834 break;
835
836 case SOURCE_CHILD:
837 if (s->child.pid > 0) {
838 if (s->enabled != SD_EVENT_OFF) {
839 assert(s->event->n_enabled_child_sources > 0);
840 s->event->n_enabled_child_sources--;
5eef597e 841 }
60f067b4 842
db2df898 843 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
d9dfd233 844 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
60f067b4
JS
845 }
846
847 break;
848
849 case SOURCE_DEFER:
850 /* nothing */
851 break;
852
853 case SOURCE_POST:
854 set_remove(s->event->post_sources, s);
855 break;
856
857 case SOURCE_EXIT:
858 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
859 break;
860
861 default:
862 assert_not_reached("Wut? I shouldn't exist.");
863 }
864
865 if (s->pending)
866 prioq_remove(s->event->pending, s, &s->pending_index);
867
868 if (s->prepare)
869 prioq_remove(s->event->prepare, s, &s->prepare_index);
870
871 event = s->event;
872
873 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
874 s->event = NULL;
875 LIST_REMOVE(sources, event->sources, s);
876 event->n_sources--;
877
878 if (!s->floating)
879 sd_event_unref(event);
880}
881
882static void source_free(sd_event_source *s) {
883 assert(s);
884
885 source_disconnect(s);
f47781d8 886 free(s->description);
60f067b4
JS
887 free(s);
888}
889
890static int source_set_pending(sd_event_source *s, bool b) {
891 int r;
892
893 assert(s);
894 assert(s->type != SOURCE_EXIT);
895
896 if (s->pending == b)
897 return 0;
898
899 s->pending = b;
900
901 if (b) {
902 s->pending_iteration = s->event->iteration;
903
904 r = prioq_put(s->event->pending, s, &s->pending_index);
905 if (r < 0) {
906 s->pending = false;
907 return r;
908 }
909 } else
910 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
911
912 if (EVENT_SOURCE_IS_TIME(s->type)) {
913 struct clock_data *d;
914
915 d = event_get_clock_data(s->event, s->type);
916 assert(d);
917
918 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
919 prioq_reshuffle(d->latest, s, &s->time.latest_index);
5eef597e 920 d->needs_rearm = true;
60f067b4
JS
921 }
922
d9dfd233
MP
923 if (s->type == SOURCE_SIGNAL && !b) {
924 struct signal_data *d;
925
926 d = hashmap_get(s->event->signal_data, &s->priority);
927 if (d && d->current == s)
928 d->current = NULL;
929 }
930
60f067b4
JS
931 return 0;
932}
933
934static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
935 sd_event_source *s;
936
937 assert(e);
938
939 s = new0(sd_event_source, 1);
940 if (!s)
941 return NULL;
942
943 s->n_ref = 1;
944 s->event = e;
945 s->floating = floating;
946 s->type = type;
947 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
948
949 if (!floating)
950 sd_event_ref(e);
951
952 LIST_PREPEND(sources, e->sources, s);
aa27b158 953 e->n_sources++;
60f067b4
JS
954
955 return s;
956}
957
958_public_ int sd_event_add_io(
959 sd_event *e,
960 sd_event_source **ret,
961 int fd,
962 uint32_t events,
963 sd_event_io_handler_t callback,
964 void *userdata) {
965
966 sd_event_source *s;
967 int r;
968
969 assert_return(e, -EINVAL);
13d276d0 970 assert_return(fd >= 0, -EBADF);
60f067b4
JS
971 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
972 assert_return(callback, -EINVAL);
973 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
974 assert_return(!event_pid_changed(e), -ECHILD);
975
976 s = source_new(e, !ret, SOURCE_IO);
977 if (!s)
978 return -ENOMEM;
979
d9dfd233 980 s->wakeup = WAKEUP_EVENT_SOURCE;
60f067b4
JS
981 s->io.fd = fd;
982 s->io.events = events;
983 s->io.callback = callback;
984 s->userdata = userdata;
985 s->enabled = SD_EVENT_ON;
986
987 r = source_io_register(s, s->enabled, events);
988 if (r < 0) {
989 source_free(s);
5eef597e 990 return r;
60f067b4
JS
991 }
992
993 if (ret)
994 *ret = s;
995
996 return 0;
997}
998
999static void initialize_perturb(sd_event *e) {
1000 sd_id128_t bootid = {};
1001
1002 /* When we sleep for longer, we try to realign the wakeup to
1003 the same time wihtin each minute/second/250ms, so that
1004 events all across the system can be coalesced into a single
1005 CPU wakeup. However, let's take some system-specific
1006 randomness for this value, so that in a network of systems
1007 with synced clocks timer events are distributed a
1008 bit. Here, we calculate a perturbation usec offset from the
1009 boot ID. */
1010
5eef597e 1011 if (_likely_(e->perturb != USEC_INFINITY))
60f067b4
JS
1012 return;
1013
1014 if (sd_id128_get_boot(&bootid) >= 0)
1015 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1016}
1017
1018static int event_setup_timer_fd(
1019 sd_event *e,
1020 struct clock_data *d,
1021 clockid_t clock) {
1022
1023 struct epoll_event ev = {};
1024 int r, fd;
1025
1026 assert(e);
1027 assert(d);
1028
1029 if (_likely_(d->fd >= 0))
1030 return 0;
1031
1032 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1033 if (fd < 0)
1034 return -errno;
1035
1036 ev.events = EPOLLIN;
d9dfd233 1037 ev.data.ptr = d;
60f067b4
JS
1038
1039 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1040 if (r < 0) {
1041 safe_close(fd);
1042 return -errno;
1043 }
1044
1045 d->fd = fd;
1046 return 0;
1047}
1048
e735f4d4
MP
1049static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1050 assert(s);
1051
1052 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1053}
1054
60f067b4
JS
1055_public_ int sd_event_add_time(
1056 sd_event *e,
1057 sd_event_source **ret,
1058 clockid_t clock,
1059 uint64_t usec,
1060 uint64_t accuracy,
1061 sd_event_time_handler_t callback,
1062 void *userdata) {
1063
1064 EventSourceType type;
1065 sd_event_source *s;
1066 struct clock_data *d;
1067 int r;
1068
1069 assert_return(e, -EINVAL);
60f067b4 1070 assert_return(accuracy != (uint64_t) -1, -EINVAL);
60f067b4
JS
1071 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1072 assert_return(!event_pid_changed(e), -ECHILD);
1073
5a920b42
MP
1074 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1075 return -EOPNOTSUPP;
1076
1077 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1078 if (type < 0)
aa27b158
MP
1079 return -EOPNOTSUPP;
1080
e735f4d4
MP
1081 if (!callback)
1082 callback = time_exit_callback;
1083
60f067b4
JS
1084 d = event_get_clock_data(e, type);
1085 assert(d);
1086
4c89c718
MP
1087 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1088 if (r < 0)
1089 return r;
60f067b4 1090
4c89c718
MP
1091 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1092 if (r < 0)
1093 return r;
60f067b4
JS
1094
1095 if (d->fd < 0) {
1096 r = event_setup_timer_fd(e, d, clock);
1097 if (r < 0)
1098 return r;
1099 }
1100
1101 s = source_new(e, !ret, type);
1102 if (!s)
1103 return -ENOMEM;
1104
1105 s->time.next = usec;
1106 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1107 s->time.callback = callback;
1108 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1109 s->userdata = userdata;
1110 s->enabled = SD_EVENT_ONESHOT;
1111
5eef597e
MP
1112 d->needs_rearm = true;
1113
60f067b4
JS
1114 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1115 if (r < 0)
1116 goto fail;
1117
1118 r = prioq_put(d->latest, s, &s->time.latest_index);
1119 if (r < 0)
1120 goto fail;
1121
1122 if (ret)
1123 *ret = s;
1124
1125 return 0;
1126
1127fail:
1128 source_free(s);
1129 return r;
1130}
1131
60f067b4
JS
1132static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1133 assert(s);
1134
1135 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1136}
1137
1138_public_ int sd_event_add_signal(
1139 sd_event *e,
1140 sd_event_source **ret,
1141 int sig,
1142 sd_event_signal_handler_t callback,
1143 void *userdata) {
1144
1145 sd_event_source *s;
d9dfd233 1146 struct signal_data *d;
60f067b4
JS
1147 sigset_t ss;
1148 int r;
1149
1150 assert_return(e, -EINVAL);
aa27b158 1151 assert_return(SIGNAL_VALID(sig), -EINVAL);
60f067b4
JS
1152 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1153 assert_return(!event_pid_changed(e), -ECHILD);
1154
1155 if (!callback)
1156 callback = signal_exit_callback;
1157
1158 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
db2df898
MP
1159 if (r != 0)
1160 return -r;
60f067b4
JS
1161
1162 if (!sigismember(&ss, sig))
1163 return -EBUSY;
1164
1165 if (!e->signal_sources) {
1166 e->signal_sources = new0(sd_event_source*, _NSIG);
1167 if (!e->signal_sources)
1168 return -ENOMEM;
1169 } else if (e->signal_sources[sig])
1170 return -EBUSY;
1171
1172 s = source_new(e, !ret, SOURCE_SIGNAL);
1173 if (!s)
1174 return -ENOMEM;
1175
1176 s->signal.sig = sig;
1177 s->signal.callback = callback;
1178 s->userdata = userdata;
1179 s->enabled = SD_EVENT_ON;
1180
1181 e->signal_sources[sig] = s;
60f067b4 1182
d9dfd233
MP
1183 r = event_make_signal_data(e, sig, &d);
1184 if (r < 0) {
1185 source_free(s);
1186 return r;
60f067b4
JS
1187 }
1188
f47781d8
MP
1189 /* Use the signal name as description for the event source by default */
1190 (void) sd_event_source_set_description(s, signal_to_string(sig));
1191
60f067b4
JS
1192 if (ret)
1193 *ret = s;
1194
1195 return 0;
1196}
1197
1198_public_ int sd_event_add_child(
1199 sd_event *e,
1200 sd_event_source **ret,
1201 pid_t pid,
1202 int options,
1203 sd_event_child_handler_t callback,
1204 void *userdata) {
1205
1206 sd_event_source *s;
1207 int r;
1208
1209 assert_return(e, -EINVAL);
1210 assert_return(pid > 1, -EINVAL);
1211 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1212 assert_return(options != 0, -EINVAL);
1213 assert_return(callback, -EINVAL);
1214 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1215 assert_return(!event_pid_changed(e), -ECHILD);
1216
5eef597e 1217 r = hashmap_ensure_allocated(&e->child_sources, NULL);
60f067b4
JS
1218 if (r < 0)
1219 return r;
1220
db2df898 1221 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
60f067b4
JS
1222 return -EBUSY;
1223
1224 s = source_new(e, !ret, SOURCE_CHILD);
1225 if (!s)
1226 return -ENOMEM;
1227
1228 s->child.pid = pid;
1229 s->child.options = options;
1230 s->child.callback = callback;
1231 s->userdata = userdata;
1232 s->enabled = SD_EVENT_ONESHOT;
1233
db2df898 1234 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
60f067b4
JS
1235 if (r < 0) {
1236 source_free(s);
1237 return r;
1238 }
1239
aa27b158 1240 e->n_enabled_child_sources++;
60f067b4 1241
d9dfd233
MP
1242 r = event_make_signal_data(e, SIGCHLD, NULL);
1243 if (r < 0) {
1244 e->n_enabled_child_sources--;
1245 source_free(s);
1246 return r;
60f067b4
JS
1247 }
1248
1249 e->need_process_child = true;
1250
1251 if (ret)
1252 *ret = s;
1253
1254 return 0;
1255}
1256
1257_public_ int sd_event_add_defer(
1258 sd_event *e,
1259 sd_event_source **ret,
1260 sd_event_handler_t callback,
1261 void *userdata) {
1262
1263 sd_event_source *s;
1264 int r;
1265
1266 assert_return(e, -EINVAL);
1267 assert_return(callback, -EINVAL);
1268 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1269 assert_return(!event_pid_changed(e), -ECHILD);
1270
1271 s = source_new(e, !ret, SOURCE_DEFER);
1272 if (!s)
1273 return -ENOMEM;
1274
1275 s->defer.callback = callback;
1276 s->userdata = userdata;
1277 s->enabled = SD_EVENT_ONESHOT;
1278
1279 r = source_set_pending(s, true);
1280 if (r < 0) {
1281 source_free(s);
1282 return r;
1283 }
1284
1285 if (ret)
1286 *ret = s;
1287
1288 return 0;
1289}
1290
1291_public_ int sd_event_add_post(
1292 sd_event *e,
1293 sd_event_source **ret,
1294 sd_event_handler_t callback,
1295 void *userdata) {
1296
1297 sd_event_source *s;
1298 int r;
1299
1300 assert_return(e, -EINVAL);
1301 assert_return(callback, -EINVAL);
1302 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1303 assert_return(!event_pid_changed(e), -ECHILD);
1304
5eef597e 1305 r = set_ensure_allocated(&e->post_sources, NULL);
60f067b4
JS
1306 if (r < 0)
1307 return r;
1308
1309 s = source_new(e, !ret, SOURCE_POST);
1310 if (!s)
1311 return -ENOMEM;
1312
1313 s->post.callback = callback;
1314 s->userdata = userdata;
1315 s->enabled = SD_EVENT_ON;
1316
1317 r = set_put(e->post_sources, s);
1318 if (r < 0) {
1319 source_free(s);
1320 return r;
1321 }
1322
1323 if (ret)
1324 *ret = s;
1325
1326 return 0;
1327}
1328
1329_public_ int sd_event_add_exit(
1330 sd_event *e,
1331 sd_event_source **ret,
1332 sd_event_handler_t callback,
1333 void *userdata) {
1334
1335 sd_event_source *s;
1336 int r;
1337
1338 assert_return(e, -EINVAL);
1339 assert_return(callback, -EINVAL);
1340 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1341 assert_return(!event_pid_changed(e), -ECHILD);
1342
4c89c718
MP
1343 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1344 if (r < 0)
1345 return r;
60f067b4
JS
1346
1347 s = source_new(e, !ret, SOURCE_EXIT);
1348 if (!s)
1349 return -ENOMEM;
1350
1351 s->exit.callback = callback;
1352 s->userdata = userdata;
1353 s->exit.prioq_index = PRIOQ_IDX_NULL;
1354 s->enabled = SD_EVENT_ONESHOT;
1355
1356 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1357 if (r < 0) {
1358 source_free(s);
1359 return r;
1360 }
1361
1362 if (ret)
1363 *ret = s;
1364
1365 return 0;
1366}
1367
1368_public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
4c89c718
MP
1369
1370 if (!s)
1371 return NULL;
60f067b4
JS
1372
1373 assert(s->n_ref >= 1);
1374 s->n_ref++;
1375
1376 return s;
1377}
1378
1379_public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1380
1381 if (!s)
1382 return NULL;
1383
1384 assert(s->n_ref >= 1);
1385 s->n_ref--;
1386
1387 if (s->n_ref <= 0) {
1388 /* Here's a special hack: when we are called from a
1389 * dispatch handler we won't free the event source
1390 * immediately, but we will detach the fd from the
1391 * epoll. This way it is safe for the caller to unref
1392 * the event source and immediately close the fd, but
1393 * we still retain a valid event source object after
1394 * the callback. */
1395
1396 if (s->dispatching) {
1397 if (s->type == SOURCE_IO)
1398 source_io_unregister(s);
1399
1400 source_disconnect(s);
1401 } else
1402 source_free(s);
1403 }
1404
1405 return NULL;
1406}
1407
f47781d8 1408_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
5eef597e 1409 assert_return(s, -EINVAL);
f47781d8 1410 assert_return(!event_pid_changed(s->event), -ECHILD);
5eef597e 1411
f47781d8 1412 return free_and_strdup(&s->description, description);
5eef597e
MP
1413}
1414
f47781d8 1415_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
5eef597e 1416 assert_return(s, -EINVAL);
f47781d8
MP
1417 assert_return(description, -EINVAL);
1418 assert_return(s->description, -ENXIO);
1419 assert_return(!event_pid_changed(s->event), -ECHILD);
5eef597e 1420
f47781d8 1421 *description = s->description;
5eef597e
MP
1422 return 0;
1423}
1424
60f067b4
JS
1425_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1426 assert_return(s, NULL);
1427
1428 return s->event;
1429}
1430
1431_public_ int sd_event_source_get_pending(sd_event_source *s) {
1432 assert_return(s, -EINVAL);
1433 assert_return(s->type != SOURCE_EXIT, -EDOM);
1434 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1435 assert_return(!event_pid_changed(s->event), -ECHILD);
1436
1437 return s->pending;
1438}
1439
1440_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1441 assert_return(s, -EINVAL);
1442 assert_return(s->type == SOURCE_IO, -EDOM);
1443 assert_return(!event_pid_changed(s->event), -ECHILD);
1444
1445 return s->io.fd;
1446}
1447
1448_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1449 int r;
1450
1451 assert_return(s, -EINVAL);
13d276d0 1452 assert_return(fd >= 0, -EBADF);
60f067b4
JS
1453 assert_return(s->type == SOURCE_IO, -EDOM);
1454 assert_return(!event_pid_changed(s->event), -ECHILD);
1455
1456 if (s->io.fd == fd)
1457 return 0;
1458
1459 if (s->enabled == SD_EVENT_OFF) {
1460 s->io.fd = fd;
1461 s->io.registered = false;
1462 } else {
1463 int saved_fd;
1464
1465 saved_fd = s->io.fd;
1466 assert(s->io.registered);
1467
1468 s->io.fd = fd;
1469 s->io.registered = false;
1470
1471 r = source_io_register(s, s->enabled, s->io.events);
1472 if (r < 0) {
1473 s->io.fd = saved_fd;
1474 s->io.registered = true;
1475 return r;
1476 }
1477
1478 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1479 }
1480
1481 return 0;
1482}
1483
1484_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1485 assert_return(s, -EINVAL);
1486 assert_return(events, -EINVAL);
1487 assert_return(s->type == SOURCE_IO, -EDOM);
1488 assert_return(!event_pid_changed(s->event), -ECHILD);
1489
1490 *events = s->io.events;
1491 return 0;
1492}
1493
1494_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1495 int r;
1496
1497 assert_return(s, -EINVAL);
1498 assert_return(s->type == SOURCE_IO, -EDOM);
1499 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1500 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1501 assert_return(!event_pid_changed(s->event), -ECHILD);
1502
5eef597e
MP
1503 /* edge-triggered updates are never skipped, so we can reset edges */
1504 if (s->io.events == events && !(events & EPOLLET))
60f067b4
JS
1505 return 0;
1506
1507 if (s->enabled != SD_EVENT_OFF) {
1508 r = source_io_register(s, s->enabled, events);
1509 if (r < 0)
1510 return r;
1511 }
1512
1513 s->io.events = events;
1514 source_set_pending(s, false);
1515
1516 return 0;
1517}
1518
1519_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1520 assert_return(s, -EINVAL);
1521 assert_return(revents, -EINVAL);
1522 assert_return(s->type == SOURCE_IO, -EDOM);
1523 assert_return(s->pending, -ENODATA);
1524 assert_return(!event_pid_changed(s->event), -ECHILD);
1525
1526 *revents = s->io.revents;
1527 return 0;
1528}
1529
1530_public_ int sd_event_source_get_signal(sd_event_source *s) {
1531 assert_return(s, -EINVAL);
1532 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1533 assert_return(!event_pid_changed(s->event), -ECHILD);
1534
1535 return s->signal.sig;
1536}
1537
1538_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1539 assert_return(s, -EINVAL);
1540 assert_return(!event_pid_changed(s->event), -ECHILD);
1541
1542 return s->priority;
1543}
1544
1545_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
d9dfd233
MP
1546 int r;
1547
60f067b4
JS
1548 assert_return(s, -EINVAL);
1549 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1550 assert_return(!event_pid_changed(s->event), -ECHILD);
1551
1552 if (s->priority == priority)
1553 return 0;
1554
d9dfd233
MP
1555 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1556 struct signal_data *old, *d;
1557
1558 /* Move us from the signalfd belonging to the old
1559 * priority to the signalfd of the new priority */
1560
1561 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1562
1563 s->priority = priority;
1564
1565 r = event_make_signal_data(s->event, s->signal.sig, &d);
1566 if (r < 0) {
1567 s->priority = old->priority;
1568 return r;
1569 }
1570
1571 event_unmask_signal_data(s->event, old, s->signal.sig);
1572 } else
1573 s->priority = priority;
60f067b4
JS
1574
1575 if (s->pending)
1576 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1577
1578 if (s->prepare)
1579 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1580
1581 if (s->type == SOURCE_EXIT)
1582 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1583
1584 return 0;
1585}
1586
1587_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1588 assert_return(s, -EINVAL);
1589 assert_return(m, -EINVAL);
1590 assert_return(!event_pid_changed(s->event), -ECHILD);
1591
1592 *m = s->enabled;
1593 return 0;
1594}
1595
1596_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1597 int r;
1598
1599 assert_return(s, -EINVAL);
1600 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1601 assert_return(!event_pid_changed(s->event), -ECHILD);
1602
1603 /* If we are dead anyway, we are fine with turning off
1604 * sources, but everything else needs to fail. */
1605 if (s->event->state == SD_EVENT_FINISHED)
1606 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1607
1608 if (s->enabled == m)
1609 return 0;
1610
1611 if (m == SD_EVENT_OFF) {
1612
1613 switch (s->type) {
1614
1615 case SOURCE_IO:
86f210e9 1616 source_io_unregister(s);
60f067b4
JS
1617 s->enabled = m;
1618 break;
1619
1620 case SOURCE_TIME_REALTIME:
5eef597e 1621 case SOURCE_TIME_BOOTTIME:
60f067b4
JS
1622 case SOURCE_TIME_MONOTONIC:
1623 case SOURCE_TIME_REALTIME_ALARM:
1624 case SOURCE_TIME_BOOTTIME_ALARM: {
1625 struct clock_data *d;
1626
1627 s->enabled = m;
1628 d = event_get_clock_data(s->event, s->type);
1629 assert(d);
1630
1631 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1632 prioq_reshuffle(d->latest, s, &s->time.latest_index);
5eef597e 1633 d->needs_rearm = true;
60f067b4
JS
1634 break;
1635 }
1636
1637 case SOURCE_SIGNAL:
1638 s->enabled = m;
5eef597e 1639
d9dfd233 1640 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
60f067b4
JS
1641 break;
1642
1643 case SOURCE_CHILD:
1644 s->enabled = m;
1645
1646 assert(s->event->n_enabled_child_sources > 0);
1647 s->event->n_enabled_child_sources--;
1648
d9dfd233 1649 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
60f067b4
JS
1650 break;
1651
1652 case SOURCE_EXIT:
1653 s->enabled = m;
1654 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1655 break;
1656
1657 case SOURCE_DEFER:
1658 case SOURCE_POST:
1659 s->enabled = m;
1660 break;
1661
1662 default:
1663 assert_not_reached("Wut? I shouldn't exist.");
1664 }
1665
1666 } else {
1667 switch (s->type) {
1668
1669 case SOURCE_IO:
1670 r = source_io_register(s, m, s->io.events);
1671 if (r < 0)
1672 return r;
1673
1674 s->enabled = m;
1675 break;
1676
1677 case SOURCE_TIME_REALTIME:
5eef597e 1678 case SOURCE_TIME_BOOTTIME:
60f067b4
JS
1679 case SOURCE_TIME_MONOTONIC:
1680 case SOURCE_TIME_REALTIME_ALARM:
1681 case SOURCE_TIME_BOOTTIME_ALARM: {
1682 struct clock_data *d;
1683
1684 s->enabled = m;
1685 d = event_get_clock_data(s->event, s->type);
1686 assert(d);
1687
1688 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1689 prioq_reshuffle(d->latest, s, &s->time.latest_index);
5eef597e 1690 d->needs_rearm = true;
60f067b4
JS
1691 break;
1692 }
1693
1694 case SOURCE_SIGNAL:
5eef597e
MP
1695
1696 s->enabled = m;
d9dfd233
MP
1697
1698 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1699 if (r < 0) {
1700 s->enabled = SD_EVENT_OFF;
1701 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1702 return r;
1703 }
1704
60f067b4
JS
1705 break;
1706
1707 case SOURCE_CHILD:
5eef597e 1708
d9dfd233 1709 if (s->enabled == SD_EVENT_OFF)
5eef597e 1710 s->event->n_enabled_child_sources++;
60f067b4
JS
1711
1712 s->enabled = m;
d9dfd233
MP
1713
1714 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1715 if (r < 0) {
1716 s->enabled = SD_EVENT_OFF;
1717 s->event->n_enabled_child_sources--;
1718 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1719 return r;
1720 }
1721
60f067b4
JS
1722 break;
1723
1724 case SOURCE_EXIT:
1725 s->enabled = m;
1726 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1727 break;
1728
1729 case SOURCE_DEFER:
1730 case SOURCE_POST:
1731 s->enabled = m;
1732 break;
1733
1734 default:
1735 assert_not_reached("Wut? I shouldn't exist.");
1736 }
1737 }
1738
1739 if (s->pending)
1740 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1741
1742 if (s->prepare)
1743 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1744
1745 return 0;
1746}
1747
1748_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1749 assert_return(s, -EINVAL);
1750 assert_return(usec, -EINVAL);
1751 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1752 assert_return(!event_pid_changed(s->event), -ECHILD);
1753
1754 *usec = s->time.next;
1755 return 0;
1756}
1757
1758_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1759 struct clock_data *d;
1760
1761 assert_return(s, -EINVAL);
60f067b4
JS
1762 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1763 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1764 assert_return(!event_pid_changed(s->event), -ECHILD);
1765
1766 s->time.next = usec;
1767
1768 source_set_pending(s, false);
1769
1770 d = event_get_clock_data(s->event, s->type);
1771 assert(d);
1772
1773 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1774 prioq_reshuffle(d->latest, s, &s->time.latest_index);
5eef597e 1775 d->needs_rearm = true;
60f067b4
JS
1776
1777 return 0;
1778}
1779
1780_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1781 assert_return(s, -EINVAL);
1782 assert_return(usec, -EINVAL);
1783 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1784 assert_return(!event_pid_changed(s->event), -ECHILD);
1785
1786 *usec = s->time.accuracy;
1787 return 0;
1788}
1789
1790_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1791 struct clock_data *d;
1792
1793 assert_return(s, -EINVAL);
1794 assert_return(usec != (uint64_t) -1, -EINVAL);
1795 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1796 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1797 assert_return(!event_pid_changed(s->event), -ECHILD);
1798
1799 if (usec == 0)
1800 usec = DEFAULT_ACCURACY_USEC;
1801
1802 s->time.accuracy = usec;
1803
1804 source_set_pending(s, false);
1805
1806 d = event_get_clock_data(s->event, s->type);
1807 assert(d);
1808
1809 prioq_reshuffle(d->latest, s, &s->time.latest_index);
5eef597e 1810 d->needs_rearm = true;
60f067b4
JS
1811
1812 return 0;
1813}
1814
1815_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1816 assert_return(s, -EINVAL);
1817 assert_return(clock, -EINVAL);
1818 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1819 assert_return(!event_pid_changed(s->event), -ECHILD);
1820
1821 *clock = event_source_type_to_clock(s->type);
1822 return 0;
1823}
1824
1825_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1826 assert_return(s, -EINVAL);
1827 assert_return(pid, -EINVAL);
1828 assert_return(s->type == SOURCE_CHILD, -EDOM);
1829 assert_return(!event_pid_changed(s->event), -ECHILD);
1830
1831 *pid = s->child.pid;
1832 return 0;
1833}
1834
1835_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1836 int r;
1837
1838 assert_return(s, -EINVAL);
1839 assert_return(s->type != SOURCE_EXIT, -EDOM);
1840 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1841 assert_return(!event_pid_changed(s->event), -ECHILD);
1842
1843 if (s->prepare == callback)
1844 return 0;
1845
1846 if (callback && s->prepare) {
1847 s->prepare = callback;
1848 return 0;
1849 }
1850
1851 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1852 if (r < 0)
1853 return r;
1854
1855 s->prepare = callback;
1856
1857 if (callback) {
1858 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1859 if (r < 0)
1860 return r;
1861 } else
1862 prioq_remove(s->event->prepare, s, &s->prepare_index);
1863
1864 return 0;
1865}
1866
1867_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1868 assert_return(s, NULL);
1869
1870 return s->userdata;
1871}
1872
1873_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1874 void *ret;
1875
1876 assert_return(s, NULL);
1877
1878 ret = s->userdata;
1879 s->userdata = userdata;
1880
1881 return ret;
1882}
1883
1884static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1885 usec_t c;
1886 assert(e);
1887 assert(a <= b);
1888
1889 if (a <= 0)
1890 return 0;
4c89c718
MP
1891 if (a >= USEC_INFINITY)
1892 return USEC_INFINITY;
60f067b4
JS
1893
1894 if (b <= a + 1)
1895 return a;
1896
1897 initialize_perturb(e);
1898
1899 /*
1900 Find a good time to wake up again between times a and b. We
1901 have two goals here:
1902
1903 a) We want to wake up as seldom as possible, hence prefer
1904 later times over earlier times.
1905
1906 b) But if we have to wake up, then let's make sure to
1907 dispatch as much as possible on the entire system.
1908
1909 We implement this by waking up everywhere at the same time
1910 within any given minute if we can, synchronised via the
1911 perturbation value determined from the boot ID. If we can't,
1912 then we try to find the same spot in every 10s, then 1s and
1913 then 250ms step. Otherwise, we pick the last possible time
1914 to wake up.
1915 */
1916
1917 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1918 if (c >= b) {
1919 if (_unlikely_(c < USEC_PER_MINUTE))
1920 return b;
1921
1922 c -= USEC_PER_MINUTE;
1923 }
1924
1925 if (c >= a)
1926 return c;
1927
1928 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1929 if (c >= b) {
1930 if (_unlikely_(c < USEC_PER_SEC*10))
1931 return b;
1932
1933 c -= USEC_PER_SEC*10;
1934 }
1935
1936 if (c >= a)
1937 return c;
1938
1939 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1940 if (c >= b) {
1941 if (_unlikely_(c < USEC_PER_SEC))
1942 return b;
1943
1944 c -= USEC_PER_SEC;
1945 }
1946
1947 if (c >= a)
1948 return c;
1949
1950 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1951 if (c >= b) {
1952 if (_unlikely_(c < USEC_PER_MSEC*250))
1953 return b;
1954
1955 c -= USEC_PER_MSEC*250;
1956 }
1957
1958 if (c >= a)
1959 return c;
1960
1961 return b;
1962}
1963
1964static int event_arm_timer(
1965 sd_event *e,
1966 struct clock_data *d) {
1967
1968 struct itimerspec its = {};
1969 sd_event_source *a, *b;
1970 usec_t t;
1971 int r;
1972
1973 assert(e);
1974 assert(d);
1975
5eef597e
MP
1976 if (!d->needs_rearm)
1977 return 0;
1978 else
1979 d->needs_rearm = false;
1980
60f067b4 1981 a = prioq_peek(d->earliest);
4c89c718 1982 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
60f067b4
JS
1983
1984 if (d->fd < 0)
1985 return 0;
1986
5eef597e 1987 if (d->next == USEC_INFINITY)
60f067b4
JS
1988 return 0;
1989
1990 /* disarm */
1991 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1992 if (r < 0)
1993 return r;
1994
5eef597e 1995 d->next = USEC_INFINITY;
60f067b4
JS
1996 return 0;
1997 }
1998
1999 b = prioq_peek(d->latest);
2000 assert_se(b && b->enabled != SD_EVENT_OFF);
2001
4c89c718 2002 t = sleep_between(e, a->time.next, time_event_source_latest(b));
60f067b4
JS
2003 if (d->next == t)
2004 return 0;
2005
2006 assert_se(d->fd >= 0);
2007
2008 if (t == 0) {
2009 /* We don' want to disarm here, just mean some time looooong ago. */
2010 its.it_value.tv_sec = 0;
2011 its.it_value.tv_nsec = 1;
2012 } else
2013 timespec_store(&its.it_value, t);
2014
2015 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2016 if (r < 0)
2017 return -errno;
2018
2019 d->next = t;
2020 return 0;
2021}
2022
2023static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2024 assert(e);
2025 assert(s);
2026 assert(s->type == SOURCE_IO);
2027
2028 /* If the event source was already pending, we just OR in the
2029 * new revents, otherwise we reset the value. The ORing is
2030 * necessary to handle EPOLLONESHOT events properly where
2031 * readability might happen independently of writability, and
2032 * we need to keep track of both */
2033
2034 if (s->pending)
2035 s->io.revents |= revents;
2036 else
2037 s->io.revents = revents;
2038
2039 return source_set_pending(s, true);
2040}
2041
2042static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2043 uint64_t x;
2044 ssize_t ss;
2045
2046 assert(e);
2047 assert(fd >= 0);
2048
2049 assert_return(events == EPOLLIN, -EIO);
2050
2051 ss = read(fd, &x, sizeof(x));
2052 if (ss < 0) {
2053 if (errno == EAGAIN || errno == EINTR)
2054 return 0;
2055
2056 return -errno;
2057 }
2058
2059 if (_unlikely_(ss != sizeof(x)))
2060 return -EIO;
2061
2062 if (next)
5eef597e 2063 *next = USEC_INFINITY;
60f067b4
JS
2064
2065 return 0;
2066}
2067
2068static int process_timer(
2069 sd_event *e,
2070 usec_t n,
2071 struct clock_data *d) {
2072
2073 sd_event_source *s;
2074 int r;
2075
2076 assert(e);
2077 assert(d);
2078
2079 for (;;) {
2080 s = prioq_peek(d->earliest);
2081 if (!s ||
2082 s->time.next > n ||
2083 s->enabled == SD_EVENT_OFF ||
2084 s->pending)
2085 break;
2086
2087 r = source_set_pending(s, true);
2088 if (r < 0)
2089 return r;
2090
2091 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2092 prioq_reshuffle(d->latest, s, &s->time.latest_index);
5eef597e 2093 d->needs_rearm = true;
60f067b4
JS
2094 }
2095
2096 return 0;
2097}
2098
2099static int process_child(sd_event *e) {
2100 sd_event_source *s;
2101 Iterator i;
2102 int r;
2103
2104 assert(e);
2105
2106 e->need_process_child = false;
2107
2108 /*
2109 So, this is ugly. We iteratively invoke waitid() with P_PID
2110 + WNOHANG for each PID we wait for, instead of using
2111 P_ALL. This is because we only want to get child
2112 information of very specific child processes, and not all
2113 of them. We might not have processed the SIGCHLD even of a
2114 previous invocation and we don't want to maintain a
2115 unbounded *per-child* event queue, hence we really don't
2116 want anything flushed out of the kernel's queue that we
2117 don't care about. Since this is O(n) this means that if you
2118 have a lot of processes you probably want to handle SIGCHLD
2119 yourself.
2120
2121 We do not reap the children here (by using WNOWAIT), this
2122 is only done after the event source is dispatched so that
2123 the callback still sees the process as a zombie.
2124 */
2125
2126 HASHMAP_FOREACH(s, e->child_sources, i) {
2127 assert(s->type == SOURCE_CHILD);
2128
2129 if (s->pending)
2130 continue;
2131
2132 if (s->enabled == SD_EVENT_OFF)
2133 continue;
2134
2135 zero(s->child.siginfo);
2136 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2137 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2138 if (r < 0)
2139 return -errno;
2140
2141 if (s->child.siginfo.si_pid != 0) {
2142 bool zombie =
2143 s->child.siginfo.si_code == CLD_EXITED ||
2144 s->child.siginfo.si_code == CLD_KILLED ||
2145 s->child.siginfo.si_code == CLD_DUMPED;
2146
2147 if (!zombie && (s->child.options & WEXITED)) {
2148 /* If the child isn't dead then let's
2149 * immediately remove the state change
2150 * from the queue, since there's no
2151 * benefit in leaving it queued */
2152
2153 assert(s->child.options & (WSTOPPED|WCONTINUED));
2154 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2155 }
2156
2157 r = source_set_pending(s, true);
2158 if (r < 0)
2159 return r;
2160 }
2161 }
2162
2163 return 0;
2164}
2165
d9dfd233 2166static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
60f067b4
JS
2167 bool read_one = false;
2168 int r;
2169
2170 assert(e);
60f067b4
JS
2171 assert_return(events == EPOLLIN, -EIO);
2172
d9dfd233
MP
2173 /* If there's a signal queued on this priority and SIGCHLD is
2174 on this priority too, then make sure to recheck the
2175 children we watch. This is because we only ever dequeue
2176 the first signal per priority, and if we dequeue one, and
2177 SIGCHLD might be enqueued later we wouldn't know, but we
2178 might have higher priority children we care about hence we
2179 need to check that explicitly. */
2180
2181 if (sigismember(&d->sigset, SIGCHLD))
2182 e->need_process_child = true;
2183
2184 /* If there's already an event source pending for this
2185 * priority we don't read another */
2186 if (d->current)
2187 return 0;
2188
60f067b4
JS
2189 for (;;) {
2190 struct signalfd_siginfo si;
5eef597e
MP
2191 ssize_t n;
2192 sd_event_source *s = NULL;
60f067b4 2193
d9dfd233 2194 n = read(d->fd, &si, sizeof(si));
5eef597e 2195 if (n < 0) {
60f067b4
JS
2196 if (errno == EAGAIN || errno == EINTR)
2197 return read_one;
2198
2199 return -errno;
2200 }
2201
5eef597e 2202 if (_unlikely_(n != sizeof(si)))
60f067b4
JS
2203 return -EIO;
2204
aa27b158 2205 assert(SIGNAL_VALID(si.ssi_signo));
5eef597e 2206
60f067b4
JS
2207 read_one = true;
2208
5eef597e
MP
2209 if (e->signal_sources)
2210 s = e->signal_sources[si.ssi_signo];
5eef597e
MP
2211 if (!s)
2212 continue;
d9dfd233
MP
2213 if (s->pending)
2214 continue;
60f067b4
JS
2215
2216 s->signal.siginfo = si;
d9dfd233
MP
2217 d->current = s;
2218
60f067b4
JS
2219 r = source_set_pending(s, true);
2220 if (r < 0)
2221 return r;
d9dfd233
MP
2222
2223 return 1;
60f067b4
JS
2224 }
2225}
2226
2227static int source_dispatch(sd_event_source *s) {
2228 int r = 0;
2229
2230 assert(s);
2231 assert(s->pending || s->type == SOURCE_EXIT);
2232
2233 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2234 r = source_set_pending(s, false);
2235 if (r < 0)
2236 return r;
2237 }
2238
2239 if (s->type != SOURCE_POST) {
2240 sd_event_source *z;
2241 Iterator i;
2242
2243 /* If we execute a non-post source, let's mark all
2244 * post sources as pending */
2245
2246 SET_FOREACH(z, s->event->post_sources, i) {
2247 if (z->enabled == SD_EVENT_OFF)
2248 continue;
2249
2250 r = source_set_pending(z, true);
2251 if (r < 0)
2252 return r;
2253 }
2254 }
2255
2256 if (s->enabled == SD_EVENT_ONESHOT) {
2257 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2258 if (r < 0)
2259 return r;
2260 }
2261
2262 s->dispatching = true;
2263
2264 switch (s->type) {
2265
2266 case SOURCE_IO:
2267 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2268 break;
2269
2270 case SOURCE_TIME_REALTIME:
5eef597e 2271 case SOURCE_TIME_BOOTTIME:
60f067b4
JS
2272 case SOURCE_TIME_MONOTONIC:
2273 case SOURCE_TIME_REALTIME_ALARM:
2274 case SOURCE_TIME_BOOTTIME_ALARM:
2275 r = s->time.callback(s, s->time.next, s->userdata);
2276 break;
2277
2278 case SOURCE_SIGNAL:
2279 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2280 break;
2281
2282 case SOURCE_CHILD: {
2283 bool zombie;
2284
2285 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2286 s->child.siginfo.si_code == CLD_KILLED ||
2287 s->child.siginfo.si_code == CLD_DUMPED;
2288
2289 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2290
2291 /* Now, reap the PID for good. */
2292 if (zombie)
2293 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2294
2295 break;
2296 }
2297
2298 case SOURCE_DEFER:
2299 r = s->defer.callback(s, s->userdata);
2300 break;
2301
2302 case SOURCE_POST:
2303 r = s->post.callback(s, s->userdata);
2304 break;
2305
2306 case SOURCE_EXIT:
2307 r = s->exit.callback(s, s->userdata);
2308 break;
2309
2310 case SOURCE_WATCHDOG:
2311 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2312 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2313 assert_not_reached("Wut? I shouldn't exist.");
2314 }
2315
2316 s->dispatching = false;
2317
4c89c718
MP
2318 if (r < 0)
2319 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2320 strna(s->description), event_source_type_to_string(s->type));
60f067b4
JS
2321
2322 if (s->n_ref == 0)
2323 source_free(s);
2324 else if (r < 0)
2325 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2326
2327 return 1;
2328}
2329
2330static int event_prepare(sd_event *e) {
2331 int r;
2332
2333 assert(e);
2334
2335 for (;;) {
2336 sd_event_source *s;
2337
2338 s = prioq_peek(e->prepare);
2339 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2340 break;
2341
2342 s->prepare_iteration = e->iteration;
2343 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2344 if (r < 0)
2345 return r;
2346
2347 assert(s->prepare);
2348
2349 s->dispatching = true;
2350 r = s->prepare(s, s->userdata);
2351 s->dispatching = false;
2352
4c89c718
MP
2353 if (r < 0)
2354 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2355 strna(s->description), event_source_type_to_string(s->type));
60f067b4
JS
2356
2357 if (s->n_ref == 0)
2358 source_free(s);
2359 else if (r < 0)
2360 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2361 }
2362
2363 return 0;
2364}
2365
2366static int dispatch_exit(sd_event *e) {
2367 sd_event_source *p;
2368 int r;
2369
2370 assert(e);
2371
2372 p = prioq_peek(e->exit);
2373 if (!p || p->enabled == SD_EVENT_OFF) {
2374 e->state = SD_EVENT_FINISHED;
2375 return 0;
2376 }
2377
2378 sd_event_ref(e);
2379 e->iteration++;
2380 e->state = SD_EVENT_EXITING;
2381
2382 r = source_dispatch(p);
2383
e3bff60a 2384 e->state = SD_EVENT_INITIAL;
60f067b4
JS
2385 sd_event_unref(e);
2386
2387 return r;
2388}
2389
2390static sd_event_source* event_next_pending(sd_event *e) {
2391 sd_event_source *p;
2392
2393 assert(e);
2394
2395 p = prioq_peek(e->pending);
2396 if (!p)
2397 return NULL;
2398
2399 if (p->enabled == SD_EVENT_OFF)
2400 return NULL;
2401
2402 return p;
2403}
2404
2405static int arm_watchdog(sd_event *e) {
2406 struct itimerspec its = {};
2407 usec_t t;
2408 int r;
2409
2410 assert(e);
2411 assert(e->watchdog_fd >= 0);
2412
2413 t = sleep_between(e,
2414 e->watchdog_last + (e->watchdog_period / 2),
2415 e->watchdog_last + (e->watchdog_period * 3 / 4));
2416
2417 timespec_store(&its.it_value, t);
2418
2419 /* Make sure we never set the watchdog to 0, which tells the
2420 * kernel to disable it. */
2421 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2422 its.it_value.tv_nsec = 1;
2423
2424 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2425 if (r < 0)
2426 return -errno;
2427
2428 return 0;
2429}
2430
2431static int process_watchdog(sd_event *e) {
2432 assert(e);
2433
2434 if (!e->watchdog)
2435 return 0;
2436
2437 /* Don't notify watchdog too often */
2438 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2439 return 0;
2440
2441 sd_notify(false, "WATCHDOG=1");
2442 e->watchdog_last = e->timestamp.monotonic;
2443
2444 return arm_watchdog(e);
2445}
2446
5eef597e
MP
2447_public_ int sd_event_prepare(sd_event *e) {
2448 int r;
60f067b4
JS
2449
2450 assert_return(e, -EINVAL);
2451 assert_return(!event_pid_changed(e), -ECHILD);
2452 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
e3bff60a 2453 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
60f067b4
JS
2454
2455 if (e->exit_requested)
5eef597e 2456 goto pending;
60f067b4 2457
60f067b4 2458 e->iteration++;
60f067b4 2459
4c89c718 2460 e->state = SD_EVENT_PREPARING;
60f067b4 2461 r = event_prepare(e);
4c89c718 2462 e->state = SD_EVENT_INITIAL;
60f067b4 2463 if (r < 0)
5eef597e 2464 return r;
60f067b4
JS
2465
2466 r = event_arm_timer(e, &e->realtime);
2467 if (r < 0)
5eef597e
MP
2468 return r;
2469
2470 r = event_arm_timer(e, &e->boottime);
2471 if (r < 0)
2472 return r;
60f067b4
JS
2473
2474 r = event_arm_timer(e, &e->monotonic);
2475 if (r < 0)
5eef597e 2476 return r;
60f067b4
JS
2477
2478 r = event_arm_timer(e, &e->realtime_alarm);
2479 if (r < 0)
5eef597e 2480 return r;
60f067b4
JS
2481
2482 r = event_arm_timer(e, &e->boottime_alarm);
2483 if (r < 0)
5eef597e 2484 return r;
60f067b4
JS
2485
2486 if (event_next_pending(e) || e->need_process_child)
5eef597e
MP
2487 goto pending;
2488
e3bff60a 2489 e->state = SD_EVENT_ARMED;
5eef597e
MP
2490
2491 return 0;
2492
2493pending:
e3bff60a 2494 e->state = SD_EVENT_ARMED;
5eef597e
MP
2495 r = sd_event_wait(e, 0);
2496 if (r == 0)
e3bff60a 2497 e->state = SD_EVENT_ARMED;
5eef597e
MP
2498
2499 return r;
2500}
2501
2502_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2503 struct epoll_event *ev_queue;
2504 unsigned ev_queue_max;
2505 int r, m, i;
2506
2507 assert_return(e, -EINVAL);
2508 assert_return(!event_pid_changed(e), -ECHILD);
2509 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
e3bff60a 2510 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
5eef597e
MP
2511
2512 if (e->exit_requested) {
2513 e->state = SD_EVENT_PENDING;
2514 return 1;
2515 }
60f067b4 2516
e3bff60a 2517 ev_queue_max = MAX(e->n_sources, 1u);
60f067b4
JS
2518 ev_queue = newa(struct epoll_event, ev_queue_max);
2519
2520 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2521 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2522 if (m < 0) {
5eef597e
MP
2523 if (errno == EINTR) {
2524 e->state = SD_EVENT_PENDING;
2525 return 1;
2526 }
2527
2528 r = -errno;
60f067b4
JS
2529 goto finish;
2530 }
2531
5a920b42 2532 triple_timestamp_get(&e->timestamp);
60f067b4
JS
2533
2534 for (i = 0; i < m; i++) {
2535
d9dfd233 2536 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
60f067b4 2537 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
d9dfd233
MP
2538 else {
2539 WakeupType *t = ev_queue[i].data.ptr;
2540
2541 switch (*t) {
2542
2543 case WAKEUP_EVENT_SOURCE:
2544 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2545 break;
60f067b4 2546
d9dfd233
MP
2547 case WAKEUP_CLOCK_DATA: {
2548 struct clock_data *d = ev_queue[i].data.ptr;
2549 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2550 break;
2551 }
2552
2553 case WAKEUP_SIGNAL_DATA:
2554 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2555 break;
2556
2557 default:
2558 assert_not_reached("Invalid wake-up pointer");
2559 }
2560 }
60f067b4
JS
2561 if (r < 0)
2562 goto finish;
2563 }
2564
2565 r = process_watchdog(e);
2566 if (r < 0)
2567 goto finish;
2568
2569 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2570 if (r < 0)
2571 goto finish;
2572
5a920b42 2573 r = process_timer(e, e->timestamp.boottime, &e->boottime);
5eef597e
MP
2574 if (r < 0)
2575 goto finish;
2576
60f067b4
JS
2577 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2578 if (r < 0)
2579 goto finish;
2580
2581 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2582 if (r < 0)
2583 goto finish;
2584
5a920b42 2585 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
60f067b4
JS
2586 if (r < 0)
2587 goto finish;
2588
2589 if (e->need_process_child) {
2590 r = process_child(e);
2591 if (r < 0)
2592 goto finish;
2593 }
2594
5eef597e
MP
2595 if (event_next_pending(e)) {
2596 e->state = SD_EVENT_PENDING;
2597
2598 return 1;
60f067b4
JS
2599 }
2600
5eef597e 2601 r = 0;
60f067b4
JS
2602
2603finish:
e3bff60a 2604 e->state = SD_EVENT_INITIAL;
60f067b4
JS
2605
2606 return r;
2607}
2608
5eef597e
MP
2609_public_ int sd_event_dispatch(sd_event *e) {
2610 sd_event_source *p;
2611 int r;
2612
2613 assert_return(e, -EINVAL);
2614 assert_return(!event_pid_changed(e), -ECHILD);
2615 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2616 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2617
2618 if (e->exit_requested)
2619 return dispatch_exit(e);
2620
2621 p = event_next_pending(e);
2622 if (p) {
2623 sd_event_ref(e);
2624
2625 e->state = SD_EVENT_RUNNING;
2626 r = source_dispatch(p);
e3bff60a 2627 e->state = SD_EVENT_INITIAL;
5eef597e
MP
2628
2629 sd_event_unref(e);
2630
2631 return r;
2632 }
2633
e3bff60a 2634 e->state = SD_EVENT_INITIAL;
5eef597e
MP
2635
2636 return 1;
2637}
2638
4c89c718
MP
2639static void event_log_delays(sd_event *e) {
2640 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2641 unsigned i;
2642 int o;
2643
2644 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2645 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2646 e->delays[i] = 0;
2647 }
2648 log_debug("Event loop iterations: %.*s", o, b);
2649}
2650
5eef597e
MP
2651_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2652 int r;
2653
2654 assert_return(e, -EINVAL);
2655 assert_return(!event_pid_changed(e), -ECHILD);
2656 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
e3bff60a 2657 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
5eef597e 2658
4c89c718
MP
2659 if (e->profile_delays && e->last_run) {
2660 usec_t this_run;
2661 unsigned l;
2662
2663 this_run = now(CLOCK_MONOTONIC);
2664
2665 l = u64log2(this_run - e->last_run);
2666 assert(l < sizeof(e->delays));
2667 e->delays[l]++;
2668
2669 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2670 event_log_delays(e);
2671 e->last_log = this_run;
2672 }
2673 }
2674
5eef597e 2675 r = sd_event_prepare(e);
e3bff60a
MP
2676 if (r == 0)
2677 /* There was nothing? Then wait... */
2678 r = sd_event_wait(e, timeout);
5eef597e 2679
4c89c718
MP
2680 if (e->profile_delays)
2681 e->last_run = now(CLOCK_MONOTONIC);
2682
e3bff60a
MP
2683 if (r > 0) {
2684 /* There's something now, then let's dispatch it */
2685 r = sd_event_dispatch(e);
2686 if (r < 0)
2687 return r;
2688
2689 return 1;
2690 }
2691
2692 return r;
5eef597e
MP
2693}
2694
60f067b4
JS
2695_public_ int sd_event_loop(sd_event *e) {
2696 int r;
2697
2698 assert_return(e, -EINVAL);
2699 assert_return(!event_pid_changed(e), -ECHILD);
e3bff60a 2700 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
60f067b4
JS
2701
2702 sd_event_ref(e);
2703
2704 while (e->state != SD_EVENT_FINISHED) {
2705 r = sd_event_run(e, (uint64_t) -1);
2706 if (r < 0)
2707 goto finish;
2708 }
2709
2710 r = e->exit_code;
2711
2712finish:
2713 sd_event_unref(e);
2714 return r;
2715}
2716
5eef597e
MP
2717_public_ int sd_event_get_fd(sd_event *e) {
2718
2719 assert_return(e, -EINVAL);
2720 assert_return(!event_pid_changed(e), -ECHILD);
2721
2722 return e->epoll_fd;
2723}
2724
60f067b4
JS
2725_public_ int sd_event_get_state(sd_event *e) {
2726 assert_return(e, -EINVAL);
2727 assert_return(!event_pid_changed(e), -ECHILD);
2728
2729 return e->state;
2730}
2731
2732_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2733 assert_return(e, -EINVAL);
2734 assert_return(code, -EINVAL);
2735 assert_return(!event_pid_changed(e), -ECHILD);
2736
2737 if (!e->exit_requested)
2738 return -ENODATA;
2739
2740 *code = e->exit_code;
2741 return 0;
2742}
2743
2744_public_ int sd_event_exit(sd_event *e, int code) {
2745 assert_return(e, -EINVAL);
2746 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2747 assert_return(!event_pid_changed(e), -ECHILD);
2748
2749 e->exit_requested = true;
2750 e->exit_code = code;
2751
2752 return 0;
2753}
2754
2755_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2756 assert_return(e, -EINVAL);
2757 assert_return(usec, -EINVAL);
2758 assert_return(!event_pid_changed(e), -ECHILD);
2759
5a920b42
MP
2760 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2761 return -EOPNOTSUPP;
2762
2763 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2764 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2765 * the purpose of getting the time this doesn't matter. */
aa27b158
MP
2766 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2767 return -EOPNOTSUPP;
2768
5a920b42 2769 if (!triple_timestamp_is_set(&e->timestamp)) {
13d276d0
MP
2770 /* Implicitly fall back to now() if we never ran
2771 * before and thus have no cached time. */
2772 *usec = now(clock);
2773 return 1;
2774 }
60f067b4 2775
5a920b42 2776 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
60f067b4
JS
2777 return 0;
2778}
2779
2780_public_ int sd_event_default(sd_event **ret) {
2781
2782 static thread_local sd_event *default_event = NULL;
2783 sd_event *e = NULL;
2784 int r;
2785
2786 if (!ret)
2787 return !!default_event;
2788
2789 if (default_event) {
2790 *ret = sd_event_ref(default_event);
2791 return 0;
2792 }
2793
2794 r = sd_event_new(&e);
2795 if (r < 0)
2796 return r;
2797
2798 e->default_event_ptr = &default_event;
2799 e->tid = gettid();
2800 default_event = e;
2801
2802 *ret = e;
2803 return 1;
2804}
2805
2806_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2807 assert_return(e, -EINVAL);
2808 assert_return(tid, -EINVAL);
2809 assert_return(!event_pid_changed(e), -ECHILD);
2810
2811 if (e->tid != 0) {
2812 *tid = e->tid;
2813 return 0;
2814 }
2815
2816 return -ENXIO;
2817}
2818
2819_public_ int sd_event_set_watchdog(sd_event *e, int b) {
2820 int r;
2821
2822 assert_return(e, -EINVAL);
2823 assert_return(!event_pid_changed(e), -ECHILD);
2824
2825 if (e->watchdog == !!b)
2826 return e->watchdog;
2827
2828 if (b) {
2829 struct epoll_event ev = {};
2830
2831 r = sd_watchdog_enabled(false, &e->watchdog_period);
2832 if (r <= 0)
2833 return r;
2834
2835 /* Issue first ping immediately */
2836 sd_notify(false, "WATCHDOG=1");
2837 e->watchdog_last = now(CLOCK_MONOTONIC);
2838
2839 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2840 if (e->watchdog_fd < 0)
2841 return -errno;
2842
2843 r = arm_watchdog(e);
2844 if (r < 0)
2845 goto fail;
2846
2847 ev.events = EPOLLIN;
2848 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2849
2850 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2851 if (r < 0) {
2852 r = -errno;
2853 goto fail;
2854 }
2855
2856 } else {
2857 if (e->watchdog_fd >= 0) {
2858 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2859 e->watchdog_fd = safe_close(e->watchdog_fd);
2860 }
2861 }
2862
2863 e->watchdog = !!b;
2864 return e->watchdog;
2865
2866fail:
2867 e->watchdog_fd = safe_close(e->watchdog_fd);
2868 return r;
2869}
2870
2871_public_ int sd_event_get_watchdog(sd_event *e) {
2872 assert_return(e, -EINVAL);
2873 assert_return(!event_pid_changed(e), -ECHILD);
2874
2875 return e->watchdog;
2876}
5a920b42
MP
2877
2878_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2879 assert_return(e, -EINVAL);
2880 assert_return(!event_pid_changed(e), -ECHILD);
2881
2882 *ret = e->iteration;
2883 return 0;
2884}