2 This file is part of systemd.
4 Copyright 2013 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 #include <sys/epoll.h>
21 #include <sys/timerfd.h>
24 #include "sd-daemon.h"
28 #include "alloc-util.h"
35 #include "process-util.h"
37 #include "signal-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "time-util.h"
43 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
45 typedef enum EventSourceType
{
49 SOURCE_TIME_MONOTONIC
,
50 SOURCE_TIME_REALTIME_ALARM
,
51 SOURCE_TIME_BOOTTIME_ALARM
,
58 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
59 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
62 static const char* const event_source_type_table
[_SOURCE_EVENT_SOURCE_TYPE_MAX
] = {
64 [SOURCE_TIME_REALTIME
] = "realtime",
65 [SOURCE_TIME_BOOTTIME
] = "bootime",
66 [SOURCE_TIME_MONOTONIC
] = "monotonic",
67 [SOURCE_TIME_REALTIME_ALARM
] = "realtime-alarm",
68 [SOURCE_TIME_BOOTTIME_ALARM
] = "boottime-alarm",
69 [SOURCE_SIGNAL
] = "signal",
70 [SOURCE_CHILD
] = "child",
71 [SOURCE_DEFER
] = "defer",
72 [SOURCE_POST
] = "post",
73 [SOURCE_EXIT
] = "exit",
74 [SOURCE_WATCHDOG
] = "watchdog",
77 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type
, int);
79 /* All objects we use in epoll events start with this value, so that
80 * we know how to dispatch it */
81 typedef enum WakeupType
{
87 _WAKEUP_TYPE_INVALID
= -1,
90 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
92 struct sd_event_source
{
99 sd_event_handler_t prepare
;
103 EventSourceType type
:5;
110 unsigned pending_index
;
111 unsigned prepare_index
;
112 unsigned pending_iteration
;
113 unsigned prepare_iteration
;
115 LIST_FIELDS(sd_event_source
, sources
);
119 sd_event_io_handler_t callback
;
126 sd_event_time_handler_t callback
;
127 usec_t next
, accuracy
;
128 unsigned earliest_index
;
129 unsigned latest_index
;
132 sd_event_signal_handler_t callback
;
133 struct signalfd_siginfo siginfo
;
137 sd_event_child_handler_t callback
;
143 sd_event_handler_t callback
;
146 sd_event_handler_t callback
;
149 sd_event_handler_t callback
;
150 unsigned prioq_index
;
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
183 sd_event_source
*current
;
195 /* timerfd_create() only supports these five clocks so far. We
196 * can add support for more clocks when the kernel learns to
197 * deal with them, too. */
198 struct clock_data realtime
;
199 struct clock_data boottime
;
200 struct clock_data monotonic
;
201 struct clock_data realtime_alarm
;
202 struct clock_data boottime_alarm
;
206 sd_event_source
**signal_sources
; /* indexed by signal number */
207 Hashmap
*signal_data
; /* indexed by priority */
209 Hashmap
*child_sources
;
210 unsigned n_enabled_child_sources
;
219 dual_timestamp timestamp
;
220 usec_t timestamp_boottime
;
223 bool exit_requested
:1;
224 bool need_process_child
:1;
226 bool profile_delays
:1;
231 sd_event
**default_event_ptr
;
233 usec_t watchdog_last
, watchdog_period
;
237 LIST_HEAD(sd_event_source
, sources
);
239 usec_t last_run
, last_log
;
240 unsigned delays
[sizeof(usec_t
) * 8];
243 static void source_disconnect(sd_event_source
*s
);
245 static int pending_prioq_compare(const void *a
, const void *b
) {
246 const sd_event_source
*x
= a
, *y
= b
;
251 /* Enabled ones first */
252 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
254 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
257 /* Lower priority values first */
258 if (x
->priority
< y
->priority
)
260 if (x
->priority
> y
->priority
)
263 /* Older entries first */
264 if (x
->pending_iteration
< y
->pending_iteration
)
266 if (x
->pending_iteration
> y
->pending_iteration
)
272 static int prepare_prioq_compare(const void *a
, const void *b
) {
273 const sd_event_source
*x
= a
, *y
= b
;
278 /* Enabled ones first */
279 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
281 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
284 /* Move most recently prepared ones last, so that we can stop
285 * preparing as soon as we hit one that has already been
286 * prepared in the current iteration */
287 if (x
->prepare_iteration
< y
->prepare_iteration
)
289 if (x
->prepare_iteration
> y
->prepare_iteration
)
292 /* Lower priority values first */
293 if (x
->priority
< y
->priority
)
295 if (x
->priority
> y
->priority
)
301 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
302 const sd_event_source
*x
= a
, *y
= b
;
304 assert(EVENT_SOURCE_IS_TIME(x
->type
));
305 assert(x
->type
== y
->type
);
307 /* Enabled ones first */
308 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
310 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
313 /* Move the pending ones to the end */
314 if (!x
->pending
&& y
->pending
)
316 if (x
->pending
&& !y
->pending
)
320 if (x
->time
.next
< y
->time
.next
)
322 if (x
->time
.next
> y
->time
.next
)
328 static usec_t
time_event_source_latest(const sd_event_source
*s
) {
329 return usec_add(s
->time
.next
, s
->time
.accuracy
);
332 static int latest_time_prioq_compare(const void *a
, const void *b
) {
333 const sd_event_source
*x
= a
, *y
= b
;
335 assert(EVENT_SOURCE_IS_TIME(x
->type
));
336 assert(x
->type
== y
->type
);
338 /* Enabled ones first */
339 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
341 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
344 /* Move the pending ones to the end */
345 if (!x
->pending
&& y
->pending
)
347 if (x
->pending
&& !y
->pending
)
351 if (time_event_source_latest(x
) < time_event_source_latest(y
))
353 if (time_event_source_latest(x
) > time_event_source_latest(y
))
359 static int exit_prioq_compare(const void *a
, const void *b
) {
360 const sd_event_source
*x
= a
, *y
= b
;
362 assert(x
->type
== SOURCE_EXIT
);
363 assert(y
->type
== SOURCE_EXIT
);
365 /* Enabled ones first */
366 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
368 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
371 /* Lower priority values first */
372 if (x
->priority
< y
->priority
)
374 if (x
->priority
> y
->priority
)
380 static void free_clock_data(struct clock_data
*d
) {
382 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
385 prioq_free(d
->earliest
);
386 prioq_free(d
->latest
);
389 static void event_free(sd_event
*e
) {
394 while ((s
= e
->sources
)) {
396 source_disconnect(s
);
397 sd_event_source_unref(s
);
400 assert(e
->n_sources
== 0);
402 if (e
->default_event_ptr
)
403 *(e
->default_event_ptr
) = NULL
;
405 safe_close(e
->epoll_fd
);
406 safe_close(e
->watchdog_fd
);
408 free_clock_data(&e
->realtime
);
409 free_clock_data(&e
->boottime
);
410 free_clock_data(&e
->monotonic
);
411 free_clock_data(&e
->realtime_alarm
);
412 free_clock_data(&e
->boottime_alarm
);
414 prioq_free(e
->pending
);
415 prioq_free(e
->prepare
);
418 free(e
->signal_sources
);
419 hashmap_free(e
->signal_data
);
421 hashmap_free(e
->child_sources
);
422 set_free(e
->post_sources
);
426 _public_
int sd_event_new(sd_event
** ret
) {
430 assert_return(ret
, -EINVAL
);
432 e
= new0(sd_event
, 1);
437 e
->watchdog_fd
= e
->epoll_fd
= e
->realtime
.fd
= e
->boottime
.fd
= e
->monotonic
.fd
= e
->realtime_alarm
.fd
= e
->boottime_alarm
.fd
= -1;
438 e
->realtime
.next
= e
->boottime
.next
= e
->monotonic
.next
= e
->realtime_alarm
.next
= e
->boottime_alarm
.next
= USEC_INFINITY
;
439 e
->realtime
.wakeup
= e
->boottime
.wakeup
= e
->monotonic
.wakeup
= e
->realtime_alarm
.wakeup
= e
->boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
;
440 e
->original_pid
= getpid();
441 e
->perturb
= USEC_INFINITY
;
443 r
= prioq_ensure_allocated(&e
->pending
, pending_prioq_compare
);
447 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
448 if (e
->epoll_fd
< 0) {
453 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
454 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
455 e
->profile_delays
= true;
466 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
471 assert(e
->n_ref
>= 1);
477 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
482 assert(e
->n_ref
>= 1);
491 static bool event_pid_changed(sd_event
*e
) {
494 /* We don't support people creating an event loop and keeping
495 * it around over a fork(). Let's complain. */
497 return e
->original_pid
!= getpid();
500 static void source_io_unregister(sd_event_source
*s
) {
504 assert(s
->type
== SOURCE_IO
);
506 if (event_pid_changed(s
->event
))
509 if (!s
->io
.registered
)
512 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
514 log_debug_errno(errno
, "Failed to remove source %s (type %s) from epoll: %m",
515 strna(s
->description
), event_source_type_to_string(s
->type
));
517 s
->io
.registered
= false;
520 static int source_io_register(
525 struct epoll_event ev
= {};
529 assert(s
->type
== SOURCE_IO
);
530 assert(enabled
!= SD_EVENT_OFF
);
535 if (enabled
== SD_EVENT_ONESHOT
)
536 ev
.events
|= EPOLLONESHOT
;
538 if (s
->io
.registered
)
539 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
541 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
545 s
->io
.registered
= true;
550 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
554 case SOURCE_TIME_REALTIME
:
555 return CLOCK_REALTIME
;
557 case SOURCE_TIME_BOOTTIME
:
558 return CLOCK_BOOTTIME
;
560 case SOURCE_TIME_MONOTONIC
:
561 return CLOCK_MONOTONIC
;
563 case SOURCE_TIME_REALTIME_ALARM
:
564 return CLOCK_REALTIME_ALARM
;
566 case SOURCE_TIME_BOOTTIME_ALARM
:
567 return CLOCK_BOOTTIME_ALARM
;
570 return (clockid_t
) -1;
574 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
579 return SOURCE_TIME_REALTIME
;
582 return SOURCE_TIME_BOOTTIME
;
584 case CLOCK_MONOTONIC
:
585 return SOURCE_TIME_MONOTONIC
;
587 case CLOCK_REALTIME_ALARM
:
588 return SOURCE_TIME_REALTIME_ALARM
;
590 case CLOCK_BOOTTIME_ALARM
:
591 return SOURCE_TIME_BOOTTIME_ALARM
;
594 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
598 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
603 case SOURCE_TIME_REALTIME
:
606 case SOURCE_TIME_BOOTTIME
:
609 case SOURCE_TIME_MONOTONIC
:
610 return &e
->monotonic
;
612 case SOURCE_TIME_REALTIME_ALARM
:
613 return &e
->realtime_alarm
;
615 case SOURCE_TIME_BOOTTIME_ALARM
:
616 return &e
->boottime_alarm
;
623 static int event_make_signal_data(
626 struct signal_data
**ret
) {
628 struct epoll_event ev
= {};
629 struct signal_data
*d
;
637 if (event_pid_changed(e
))
640 if (e
->signal_sources
&& e
->signal_sources
[sig
])
641 priority
= e
->signal_sources
[sig
]->priority
;
645 d
= hashmap_get(e
->signal_data
, &priority
);
647 if (sigismember(&d
->sigset
, sig
) > 0) {
653 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
657 d
= new0(struct signal_data
, 1);
661 d
->wakeup
= WAKEUP_SIGNAL_DATA
;
663 d
->priority
= priority
;
665 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
675 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
677 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
696 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
709 d
->fd
= safe_close(d
->fd
);
710 hashmap_remove(e
->signal_data
, &d
->priority
);
717 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
721 /* Turns off the specified signal in the signal data
722 * object. If the signal mask of the object becomes empty that
725 if (sigismember(&d
->sigset
, sig
) == 0)
728 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
730 if (sigisemptyset(&d
->sigset
)) {
732 /* If all the mask is all-zero we can get rid of the structure */
733 hashmap_remove(e
->signal_data
, &d
->priority
);
742 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
743 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
746 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
747 struct signal_data
*d
;
748 static const int64_t zero_priority
= 0;
752 /* Rechecks if the specified signal is still something we are
753 * interested in. If not, we'll unmask it, and possibly drop
754 * the signalfd for it. */
756 if (sig
== SIGCHLD
&&
757 e
->n_enabled_child_sources
> 0)
760 if (e
->signal_sources
&&
761 e
->signal_sources
[sig
] &&
762 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
766 * The specified signal might be enabled in three different queues:
768 * 1) the one that belongs to the priority passed (if it is non-NULL)
769 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
770 * 3) the 0 priority (to cover the SIGCHLD case)
772 * Hence, let's remove it from all three here.
776 d
= hashmap_get(e
->signal_data
, priority
);
778 event_unmask_signal_data(e
, d
, sig
);
781 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
782 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
784 event_unmask_signal_data(e
, d
, sig
);
787 d
= hashmap_get(e
->signal_data
, &zero_priority
);
789 event_unmask_signal_data(e
, d
, sig
);
792 static void source_disconnect(sd_event_source
*s
) {
800 assert(s
->event
->n_sources
> 0);
806 source_io_unregister(s
);
810 case SOURCE_TIME_REALTIME
:
811 case SOURCE_TIME_BOOTTIME
:
812 case SOURCE_TIME_MONOTONIC
:
813 case SOURCE_TIME_REALTIME_ALARM
:
814 case SOURCE_TIME_BOOTTIME_ALARM
: {
815 struct clock_data
*d
;
817 d
= event_get_clock_data(s
->event
, s
->type
);
820 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
821 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
822 d
->needs_rearm
= true;
827 if (s
->signal
.sig
> 0) {
829 if (s
->event
->signal_sources
)
830 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
832 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
838 if (s
->child
.pid
> 0) {
839 if (s
->enabled
!= SD_EVENT_OFF
) {
840 assert(s
->event
->n_enabled_child_sources
> 0);
841 s
->event
->n_enabled_child_sources
--;
844 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
845 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
855 set_remove(s
->event
->post_sources
, s
);
859 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
863 assert_not_reached("Wut? I shouldn't exist.");
867 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
870 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
874 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
876 LIST_REMOVE(sources
, event
->sources
, s
);
880 sd_event_unref(event
);
883 static void source_free(sd_event_source
*s
) {
886 source_disconnect(s
);
887 free(s
->description
);
891 static int source_set_pending(sd_event_source
*s
, bool b
) {
895 assert(s
->type
!= SOURCE_EXIT
);
903 s
->pending_iteration
= s
->event
->iteration
;
905 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
911 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
913 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
914 struct clock_data
*d
;
916 d
= event_get_clock_data(s
->event
, s
->type
);
919 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
920 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
921 d
->needs_rearm
= true;
924 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
925 struct signal_data
*d
;
927 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
928 if (d
&& d
->current
== s
)
935 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
940 s
= new0(sd_event_source
, 1);
946 s
->floating
= floating
;
948 s
->pending_index
= s
->prepare_index
= PRIOQ_IDX_NULL
;
953 LIST_PREPEND(sources
, e
->sources
, s
);
959 _public_
int sd_event_add_io(
961 sd_event_source
**ret
,
964 sd_event_io_handler_t callback
,
970 assert_return(e
, -EINVAL
);
971 assert_return(fd
>= 0, -EBADF
);
972 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
973 assert_return(callback
, -EINVAL
);
974 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
975 assert_return(!event_pid_changed(e
), -ECHILD
);
977 s
= source_new(e
, !ret
, SOURCE_IO
);
981 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
983 s
->io
.events
= events
;
984 s
->io
.callback
= callback
;
985 s
->userdata
= userdata
;
986 s
->enabled
= SD_EVENT_ON
;
988 r
= source_io_register(s
, s
->enabled
, events
);
1000 static void initialize_perturb(sd_event
*e
) {
1001 sd_id128_t bootid
= {};
1003 /* When we sleep for longer, we try to realign the wakeup to
1004 the same time wihtin each minute/second/250ms, so that
1005 events all across the system can be coalesced into a single
1006 CPU wakeup. However, let's take some system-specific
1007 randomness for this value, so that in a network of systems
1008 with synced clocks timer events are distributed a
1009 bit. Here, we calculate a perturbation usec offset from the
1012 if (_likely_(e
->perturb
!= USEC_INFINITY
))
1015 if (sd_id128_get_boot(&bootid
) >= 0)
1016 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
1019 static int event_setup_timer_fd(
1021 struct clock_data
*d
,
1024 struct epoll_event ev
= {};
1030 if (_likely_(d
->fd
>= 0))
1033 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1037 ev
.events
= EPOLLIN
;
1040 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1050 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1053 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1056 _public_
int sd_event_add_time(
1058 sd_event_source
**ret
,
1062 sd_event_time_handler_t callback
,
1065 EventSourceType type
;
1067 struct clock_data
*d
;
1070 assert_return(e
, -EINVAL
);
1071 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1072 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1073 assert_return(!event_pid_changed(e
), -ECHILD
);
1076 callback
= time_exit_callback
;
1078 type
= clock_to_event_source_type(clock
);
1079 assert_return(type
>= 0, -EOPNOTSUPP
);
1081 d
= event_get_clock_data(e
, type
);
1084 r
= prioq_ensure_allocated(&d
->earliest
, earliest_time_prioq_compare
);
1088 r
= prioq_ensure_allocated(&d
->latest
, latest_time_prioq_compare
);
1093 r
= event_setup_timer_fd(e
, d
, clock
);
1098 s
= source_new(e
, !ret
, type
);
1102 s
->time
.next
= usec
;
1103 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1104 s
->time
.callback
= callback
;
1105 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1106 s
->userdata
= userdata
;
1107 s
->enabled
= SD_EVENT_ONESHOT
;
1109 d
->needs_rearm
= true;
1111 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1115 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1129 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1132 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1135 _public_
int sd_event_add_signal(
1137 sd_event_source
**ret
,
1139 sd_event_signal_handler_t callback
,
1143 struct signal_data
*d
;
1147 assert_return(e
, -EINVAL
);
1148 assert_return(sig
> 0, -EINVAL
);
1149 assert_return(sig
< _NSIG
, -EINVAL
);
1150 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1151 assert_return(!event_pid_changed(e
), -ECHILD
);
1154 callback
= signal_exit_callback
;
1156 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1160 if (!sigismember(&ss
, sig
))
1163 if (!e
->signal_sources
) {
1164 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1165 if (!e
->signal_sources
)
1167 } else if (e
->signal_sources
[sig
])
1170 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1174 s
->signal
.sig
= sig
;
1175 s
->signal
.callback
= callback
;
1176 s
->userdata
= userdata
;
1177 s
->enabled
= SD_EVENT_ON
;
1179 e
->signal_sources
[sig
] = s
;
1181 r
= event_make_signal_data(e
, sig
, &d
);
1187 /* Use the signal name as description for the event source by default */
1188 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1196 _public_
int sd_event_add_child(
1198 sd_event_source
**ret
,
1201 sd_event_child_handler_t callback
,
1207 assert_return(e
, -EINVAL
);
1208 assert_return(pid
> 1, -EINVAL
);
1209 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1210 assert_return(options
!= 0, -EINVAL
);
1211 assert_return(callback
, -EINVAL
);
1212 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1213 assert_return(!event_pid_changed(e
), -ECHILD
);
1215 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1219 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1222 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1227 s
->child
.options
= options
;
1228 s
->child
.callback
= callback
;
1229 s
->userdata
= userdata
;
1230 s
->enabled
= SD_EVENT_ONESHOT
;
1232 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1238 e
->n_enabled_child_sources
++;
1240 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1242 e
->n_enabled_child_sources
--;
1247 e
->need_process_child
= true;
1255 _public_
int sd_event_add_defer(
1257 sd_event_source
**ret
,
1258 sd_event_handler_t callback
,
1264 assert_return(e
, -EINVAL
);
1265 assert_return(callback
, -EINVAL
);
1266 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1267 assert_return(!event_pid_changed(e
), -ECHILD
);
1269 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1273 s
->defer
.callback
= callback
;
1274 s
->userdata
= userdata
;
1275 s
->enabled
= SD_EVENT_ONESHOT
;
1277 r
= source_set_pending(s
, true);
1289 _public_
int sd_event_add_post(
1291 sd_event_source
**ret
,
1292 sd_event_handler_t callback
,
1298 assert_return(e
, -EINVAL
);
1299 assert_return(callback
, -EINVAL
);
1300 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1301 assert_return(!event_pid_changed(e
), -ECHILD
);
1303 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1307 s
= source_new(e
, !ret
, SOURCE_POST
);
1311 s
->post
.callback
= callback
;
1312 s
->userdata
= userdata
;
1313 s
->enabled
= SD_EVENT_ON
;
1315 r
= set_put(e
->post_sources
, s
);
1327 _public_
int sd_event_add_exit(
1329 sd_event_source
**ret
,
1330 sd_event_handler_t callback
,
1336 assert_return(e
, -EINVAL
);
1337 assert_return(callback
, -EINVAL
);
1338 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1339 assert_return(!event_pid_changed(e
), -ECHILD
);
1341 r
= prioq_ensure_allocated(&e
->exit
, exit_prioq_compare
);
1345 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1349 s
->exit
.callback
= callback
;
1350 s
->userdata
= userdata
;
1351 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1352 s
->enabled
= SD_EVENT_ONESHOT
;
1354 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1366 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1371 assert(s
->n_ref
>= 1);
1377 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1382 assert(s
->n_ref
>= 1);
1385 if (s
->n_ref
<= 0) {
1386 /* Here's a special hack: when we are called from a
1387 * dispatch handler we won't free the event source
1388 * immediately, but we will detach the fd from the
1389 * epoll. This way it is safe for the caller to unref
1390 * the event source and immediately close the fd, but
1391 * we still retain a valid event source object after
1394 if (s
->dispatching
) {
1395 if (s
->type
== SOURCE_IO
)
1396 source_io_unregister(s
);
1398 source_disconnect(s
);
1406 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1407 assert_return(s
, -EINVAL
);
1408 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1410 return free_and_strdup(&s
->description
, description
);
1413 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1414 assert_return(s
, -EINVAL
);
1415 assert_return(description
, -EINVAL
);
1416 assert_return(s
->description
, -ENXIO
);
1417 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1419 *description
= s
->description
;
1423 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1424 assert_return(s
, NULL
);
1429 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1430 assert_return(s
, -EINVAL
);
1431 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1432 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1433 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1438 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1439 assert_return(s
, -EINVAL
);
1440 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1441 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1446 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1449 assert_return(s
, -EINVAL
);
1450 assert_return(fd
>= 0, -EBADF
);
1451 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1452 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1457 if (s
->enabled
== SD_EVENT_OFF
) {
1459 s
->io
.registered
= false;
1463 saved_fd
= s
->io
.fd
;
1464 assert(s
->io
.registered
);
1467 s
->io
.registered
= false;
1469 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1471 s
->io
.fd
= saved_fd
;
1472 s
->io
.registered
= true;
1476 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1482 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1483 assert_return(s
, -EINVAL
);
1484 assert_return(events
, -EINVAL
);
1485 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1486 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1488 *events
= s
->io
.events
;
1492 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1495 assert_return(s
, -EINVAL
);
1496 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1497 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1498 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1499 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1501 /* edge-triggered updates are never skipped, so we can reset edges */
1502 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1505 if (s
->enabled
!= SD_EVENT_OFF
) {
1506 r
= source_io_register(s
, s
->enabled
, events
);
1511 s
->io
.events
= events
;
1512 source_set_pending(s
, false);
1517 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1518 assert_return(s
, -EINVAL
);
1519 assert_return(revents
, -EINVAL
);
1520 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1521 assert_return(s
->pending
, -ENODATA
);
1522 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1524 *revents
= s
->io
.revents
;
1528 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1529 assert_return(s
, -EINVAL
);
1530 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1531 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1533 return s
->signal
.sig
;
1536 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1537 assert_return(s
, -EINVAL
);
1538 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1543 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1546 assert_return(s
, -EINVAL
);
1547 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1548 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1550 if (s
->priority
== priority
)
1553 if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1554 struct signal_data
*old
, *d
;
1556 /* Move us from the signalfd belonging to the old
1557 * priority to the signalfd of the new priority */
1559 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1561 s
->priority
= priority
;
1563 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1565 s
->priority
= old
->priority
;
1569 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1571 s
->priority
= priority
;
1574 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1577 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1579 if (s
->type
== SOURCE_EXIT
)
1580 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1585 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1586 assert_return(s
, -EINVAL
);
1587 assert_return(m
, -EINVAL
);
1588 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1594 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1597 assert_return(s
, -EINVAL
);
1598 assert_return(m
== SD_EVENT_OFF
|| m
== SD_EVENT_ON
|| m
== SD_EVENT_ONESHOT
, -EINVAL
);
1599 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1601 /* If we are dead anyway, we are fine with turning off
1602 * sources, but everything else needs to fail. */
1603 if (s
->event
->state
== SD_EVENT_FINISHED
)
1604 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1606 if (s
->enabled
== m
)
1609 if (m
== SD_EVENT_OFF
) {
1614 source_io_unregister(s
);
1618 case SOURCE_TIME_REALTIME
:
1619 case SOURCE_TIME_BOOTTIME
:
1620 case SOURCE_TIME_MONOTONIC
:
1621 case SOURCE_TIME_REALTIME_ALARM
:
1622 case SOURCE_TIME_BOOTTIME_ALARM
: {
1623 struct clock_data
*d
;
1626 d
= event_get_clock_data(s
->event
, s
->type
);
1629 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1630 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1631 d
->needs_rearm
= true;
1638 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1644 assert(s
->event
->n_enabled_child_sources
> 0);
1645 s
->event
->n_enabled_child_sources
--;
1647 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1652 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1661 assert_not_reached("Wut? I shouldn't exist.");
1668 r
= source_io_register(s
, m
, s
->io
.events
);
1675 case SOURCE_TIME_REALTIME
:
1676 case SOURCE_TIME_BOOTTIME
:
1677 case SOURCE_TIME_MONOTONIC
:
1678 case SOURCE_TIME_REALTIME_ALARM
:
1679 case SOURCE_TIME_BOOTTIME_ALARM
: {
1680 struct clock_data
*d
;
1683 d
= event_get_clock_data(s
->event
, s
->type
);
1686 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1687 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1688 d
->needs_rearm
= true;
1696 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
1698 s
->enabled
= SD_EVENT_OFF
;
1699 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1707 if (s
->enabled
== SD_EVENT_OFF
)
1708 s
->event
->n_enabled_child_sources
++;
1712 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
1714 s
->enabled
= SD_EVENT_OFF
;
1715 s
->event
->n_enabled_child_sources
--;
1716 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1724 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1733 assert_not_reached("Wut? I shouldn't exist.");
1738 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1741 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1746 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
1747 assert_return(s
, -EINVAL
);
1748 assert_return(usec
, -EINVAL
);
1749 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1750 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1752 *usec
= s
->time
.next
;
1756 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
1757 struct clock_data
*d
;
1759 assert_return(s
, -EINVAL
);
1760 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1761 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1762 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1764 s
->time
.next
= usec
;
1766 source_set_pending(s
, false);
1768 d
= event_get_clock_data(s
->event
, s
->type
);
1771 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1772 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1773 d
->needs_rearm
= true;
1778 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
1779 assert_return(s
, -EINVAL
);
1780 assert_return(usec
, -EINVAL
);
1781 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1782 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1784 *usec
= s
->time
.accuracy
;
1788 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
1789 struct clock_data
*d
;
1791 assert_return(s
, -EINVAL
);
1792 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1793 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1794 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1795 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1798 usec
= DEFAULT_ACCURACY_USEC
;
1800 s
->time
.accuracy
= usec
;
1802 source_set_pending(s
, false);
1804 d
= event_get_clock_data(s
->event
, s
->type
);
1807 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1808 d
->needs_rearm
= true;
1813 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
1814 assert_return(s
, -EINVAL
);
1815 assert_return(clock
, -EINVAL
);
1816 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1817 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1819 *clock
= event_source_type_to_clock(s
->type
);
1823 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
1824 assert_return(s
, -EINVAL
);
1825 assert_return(pid
, -EINVAL
);
1826 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
1827 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1829 *pid
= s
->child
.pid
;
1833 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
1836 assert_return(s
, -EINVAL
);
1837 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1838 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1839 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1841 if (s
->prepare
== callback
)
1844 if (callback
&& s
->prepare
) {
1845 s
->prepare
= callback
;
1849 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
1853 s
->prepare
= callback
;
1856 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
1860 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1865 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
1866 assert_return(s
, NULL
);
1871 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
1874 assert_return(s
, NULL
);
1877 s
->userdata
= userdata
;
1882 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
1889 if (a
>= USEC_INFINITY
)
1890 return USEC_INFINITY
;
1895 initialize_perturb(e
);
1898 Find a good time to wake up again between times a and b. We
1899 have two goals here:
1901 a) We want to wake up as seldom as possible, hence prefer
1902 later times over earlier times.
1904 b) But if we have to wake up, then let's make sure to
1905 dispatch as much as possible on the entire system.
1907 We implement this by waking up everywhere at the same time
1908 within any given minute if we can, synchronised via the
1909 perturbation value determined from the boot ID. If we can't,
1910 then we try to find the same spot in every 10s, then 1s and
1911 then 250ms step. Otherwise, we pick the last possible time
1915 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
1917 if (_unlikely_(c
< USEC_PER_MINUTE
))
1920 c
-= USEC_PER_MINUTE
;
1926 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
1928 if (_unlikely_(c
< USEC_PER_SEC
*10))
1931 c
-= USEC_PER_SEC
*10;
1937 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
1939 if (_unlikely_(c
< USEC_PER_SEC
))
1948 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
1950 if (_unlikely_(c
< USEC_PER_MSEC
*250))
1953 c
-= USEC_PER_MSEC
*250;
1962 static int event_arm_timer(
1964 struct clock_data
*d
) {
1966 struct itimerspec its
= {};
1967 sd_event_source
*a
, *b
;
1974 if (!d
->needs_rearm
)
1977 d
->needs_rearm
= false;
1979 a
= prioq_peek(d
->earliest
);
1980 if (!a
|| a
->enabled
== SD_EVENT_OFF
|| a
->time
.next
== USEC_INFINITY
) {
1985 if (d
->next
== USEC_INFINITY
)
1989 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1993 d
->next
= USEC_INFINITY
;
1997 b
= prioq_peek(d
->latest
);
1998 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
2000 t
= sleep_between(e
, a
->time
.next
, time_event_source_latest(b
));
2004 assert_se(d
->fd
>= 0);
2007 /* We don' want to disarm here, just mean some time looooong ago. */
2008 its
.it_value
.tv_sec
= 0;
2009 its
.it_value
.tv_nsec
= 1;
2011 timespec_store(&its
.it_value
, t
);
2013 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2021 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
2024 assert(s
->type
== SOURCE_IO
);
2026 /* If the event source was already pending, we just OR in the
2027 * new revents, otherwise we reset the value. The ORing is
2028 * necessary to handle EPOLLONESHOT events properly where
2029 * readability might happen independently of writability, and
2030 * we need to keep track of both */
2033 s
->io
.revents
|= revents
;
2035 s
->io
.revents
= revents
;
2037 return source_set_pending(s
, true);
2040 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2047 assert_return(events
== EPOLLIN
, -EIO
);
2049 ss
= read(fd
, &x
, sizeof(x
));
2051 if (errno
== EAGAIN
|| errno
== EINTR
)
2057 if (_unlikely_(ss
!= sizeof(x
)))
2061 *next
= USEC_INFINITY
;
2066 static int process_timer(
2069 struct clock_data
*d
) {
2078 s
= prioq_peek(d
->earliest
);
2081 s
->enabled
== SD_EVENT_OFF
||
2085 r
= source_set_pending(s
, true);
2089 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2090 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2091 d
->needs_rearm
= true;
2097 static int process_child(sd_event
*e
) {
2104 e
->need_process_child
= false;
2107 So, this is ugly. We iteratively invoke waitid() with P_PID
2108 + WNOHANG for each PID we wait for, instead of using
2109 P_ALL. This is because we only want to get child
2110 information of very specific child processes, and not all
2111 of them. We might not have processed the SIGCHLD even of a
2112 previous invocation and we don't want to maintain a
2113 unbounded *per-child* event queue, hence we really don't
2114 want anything flushed out of the kernel's queue that we
2115 don't care about. Since this is O(n) this means that if you
2116 have a lot of processes you probably want to handle SIGCHLD
2119 We do not reap the children here (by using WNOWAIT), this
2120 is only done after the event source is dispatched so that
2121 the callback still sees the process as a zombie.
2124 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2125 assert(s
->type
== SOURCE_CHILD
);
2130 if (s
->enabled
== SD_EVENT_OFF
)
2133 zero(s
->child
.siginfo
);
2134 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2135 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2139 if (s
->child
.siginfo
.si_pid
!= 0) {
2141 s
->child
.siginfo
.si_code
== CLD_EXITED
||
2142 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2143 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2145 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2146 /* If the child isn't dead then let's
2147 * immediately remove the state change
2148 * from the queue, since there's no
2149 * benefit in leaving it queued */
2151 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2152 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2155 r
= source_set_pending(s
, true);
2164 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2165 bool read_one
= false;
2169 assert_return(events
== EPOLLIN
, -EIO
);
2171 /* If there's a signal queued on this priority and SIGCHLD is
2172 on this priority too, then make sure to recheck the
2173 children we watch. This is because we only ever dequeue
2174 the first signal per priority, and if we dequeue one, and
2175 SIGCHLD might be enqueued later we wouldn't know, but we
2176 might have higher priority children we care about hence we
2177 need to check that explicitly. */
2179 if (sigismember(&d
->sigset
, SIGCHLD
))
2180 e
->need_process_child
= true;
2182 /* If there's already an event source pending for this
2183 * priority we don't read another */
2188 struct signalfd_siginfo si
;
2190 sd_event_source
*s
= NULL
;
2192 n
= read(d
->fd
, &si
, sizeof(si
));
2194 if (errno
== EAGAIN
|| errno
== EINTR
)
2200 if (_unlikely_(n
!= sizeof(si
)))
2203 assert(si
.ssi_signo
< _NSIG
);
2207 if (e
->signal_sources
)
2208 s
= e
->signal_sources
[si
.ssi_signo
];
2214 s
->signal
.siginfo
= si
;
2217 r
= source_set_pending(s
, true);
2225 static int source_dispatch(sd_event_source
*s
) {
2229 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2231 if (s
->type
!= SOURCE_DEFER
&& s
->type
!= SOURCE_EXIT
) {
2232 r
= source_set_pending(s
, false);
2237 if (s
->type
!= SOURCE_POST
) {
2241 /* If we execute a non-post source, let's mark all
2242 * post sources as pending */
2244 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2245 if (z
->enabled
== SD_EVENT_OFF
)
2248 r
= source_set_pending(z
, true);
2254 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2255 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2260 s
->dispatching
= true;
2265 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2268 case SOURCE_TIME_REALTIME
:
2269 case SOURCE_TIME_BOOTTIME
:
2270 case SOURCE_TIME_MONOTONIC
:
2271 case SOURCE_TIME_REALTIME_ALARM
:
2272 case SOURCE_TIME_BOOTTIME_ALARM
:
2273 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2277 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2280 case SOURCE_CHILD
: {
2283 zombie
= s
->child
.siginfo
.si_code
== CLD_EXITED
||
2284 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2285 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2287 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2289 /* Now, reap the PID for good. */
2291 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2297 r
= s
->defer
.callback(s
, s
->userdata
);
2301 r
= s
->post
.callback(s
, s
->userdata
);
2305 r
= s
->exit
.callback(s
, s
->userdata
);
2308 case SOURCE_WATCHDOG
:
2309 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2310 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2311 assert_not_reached("Wut? I shouldn't exist.");
2314 s
->dispatching
= false;
2317 log_debug_errno(r
, "Event source %s (type %s) returned error, disabling: %m",
2318 strna(s
->description
), event_source_type_to_string(s
->type
));
2323 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2328 static int event_prepare(sd_event
*e
) {
2336 s
= prioq_peek(e
->prepare
);
2337 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2340 s
->prepare_iteration
= e
->iteration
;
2341 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2347 s
->dispatching
= true;
2348 r
= s
->prepare(s
, s
->userdata
);
2349 s
->dispatching
= false;
2352 log_debug_errno(r
, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2353 strna(s
->description
), event_source_type_to_string(s
->type
));
2358 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2364 static int dispatch_exit(sd_event
*e
) {
2370 p
= prioq_peek(e
->exit
);
2371 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2372 e
->state
= SD_EVENT_FINISHED
;
2378 e
->state
= SD_EVENT_EXITING
;
2380 r
= source_dispatch(p
);
2382 e
->state
= SD_EVENT_INITIAL
;
2388 static sd_event_source
* event_next_pending(sd_event
*e
) {
2393 p
= prioq_peek(e
->pending
);
2397 if (p
->enabled
== SD_EVENT_OFF
)
2403 static int arm_watchdog(sd_event
*e
) {
2404 struct itimerspec its
= {};
2409 assert(e
->watchdog_fd
>= 0);
2411 t
= sleep_between(e
,
2412 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2413 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2415 timespec_store(&its
.it_value
, t
);
2417 /* Make sure we never set the watchdog to 0, which tells the
2418 * kernel to disable it. */
2419 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
2420 its
.it_value
.tv_nsec
= 1;
2422 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2429 static int process_watchdog(sd_event
*e
) {
2435 /* Don't notify watchdog too often */
2436 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
2439 sd_notify(false, "WATCHDOG=1");
2440 e
->watchdog_last
= e
->timestamp
.monotonic
;
2442 return arm_watchdog(e
);
2445 _public_
int sd_event_prepare(sd_event
*e
) {
2448 assert_return(e
, -EINVAL
);
2449 assert_return(!event_pid_changed(e
), -ECHILD
);
2450 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2451 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2453 if (e
->exit_requested
)
2458 e
->state
= SD_EVENT_PREPARING
;
2459 r
= event_prepare(e
);
2460 e
->state
= SD_EVENT_INITIAL
;
2464 r
= event_arm_timer(e
, &e
->realtime
);
2468 r
= event_arm_timer(e
, &e
->boottime
);
2472 r
= event_arm_timer(e
, &e
->monotonic
);
2476 r
= event_arm_timer(e
, &e
->realtime_alarm
);
2480 r
= event_arm_timer(e
, &e
->boottime_alarm
);
2484 if (event_next_pending(e
) || e
->need_process_child
)
2487 e
->state
= SD_EVENT_ARMED
;
2492 e
->state
= SD_EVENT_ARMED
;
2493 r
= sd_event_wait(e
, 0);
2495 e
->state
= SD_EVENT_ARMED
;
2500 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
2501 struct epoll_event
*ev_queue
;
2502 unsigned ev_queue_max
;
2505 assert_return(e
, -EINVAL
);
2506 assert_return(!event_pid_changed(e
), -ECHILD
);
2507 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2508 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
2510 if (e
->exit_requested
) {
2511 e
->state
= SD_EVENT_PENDING
;
2515 ev_queue_max
= MAX(e
->n_sources
, 1u);
2516 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
2518 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
2519 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
2521 if (errno
== EINTR
) {
2522 e
->state
= SD_EVENT_PENDING
;
2530 dual_timestamp_get(&e
->timestamp
);
2531 e
->timestamp_boottime
= now(CLOCK_BOOTTIME
);
2533 for (i
= 0; i
< m
; i
++) {
2535 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
2536 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
2538 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
2542 case WAKEUP_EVENT_SOURCE
:
2543 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2546 case WAKEUP_CLOCK_DATA
: {
2547 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
2548 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
2552 case WAKEUP_SIGNAL_DATA
:
2553 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2557 assert_not_reached("Invalid wake-up pointer");
2564 r
= process_watchdog(e
);
2568 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
2572 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime
);
2576 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
2580 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
2584 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime_alarm
);
2588 if (e
->need_process_child
) {
2589 r
= process_child(e
);
2594 if (event_next_pending(e
)) {
2595 e
->state
= SD_EVENT_PENDING
;
2603 e
->state
= SD_EVENT_INITIAL
;
2608 _public_
int sd_event_dispatch(sd_event
*e
) {
2612 assert_return(e
, -EINVAL
);
2613 assert_return(!event_pid_changed(e
), -ECHILD
);
2614 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2615 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
2617 if (e
->exit_requested
)
2618 return dispatch_exit(e
);
2620 p
= event_next_pending(e
);
2624 e
->state
= SD_EVENT_RUNNING
;
2625 r
= source_dispatch(p
);
2626 e
->state
= SD_EVENT_INITIAL
;
2633 e
->state
= SD_EVENT_INITIAL
;
2638 static void event_log_delays(sd_event
*e
) {
2639 char b
[ELEMENTSOF(e
->delays
) * DECIMAL_STR_MAX(unsigned) + 1];
2643 for (i
= o
= 0; i
< ELEMENTSOF(e
->delays
); i
++) {
2644 o
+= snprintf(&b
[o
], sizeof(b
) - o
, "%u ", e
->delays
[i
]);
2647 log_debug("Event loop iterations: %.*s", o
, b
);
2650 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
2653 assert_return(e
, -EINVAL
);
2654 assert_return(!event_pid_changed(e
), -ECHILD
);
2655 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2656 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2658 if (e
->profile_delays
&& e
->last_run
) {
2662 this_run
= now(CLOCK_MONOTONIC
);
2664 l
= u64log2(this_run
- e
->last_run
);
2665 assert(l
< sizeof(e
->delays
));
2668 if (this_run
- e
->last_log
>= 5*USEC_PER_SEC
) {
2669 event_log_delays(e
);
2670 e
->last_log
= this_run
;
2674 r
= sd_event_prepare(e
);
2676 /* There was nothing? Then wait... */
2677 r
= sd_event_wait(e
, timeout
);
2679 if (e
->profile_delays
)
2680 e
->last_run
= now(CLOCK_MONOTONIC
);
2683 /* There's something now, then let's dispatch it */
2684 r
= sd_event_dispatch(e
);
2694 _public_
int sd_event_loop(sd_event
*e
) {
2697 assert_return(e
, -EINVAL
);
2698 assert_return(!event_pid_changed(e
), -ECHILD
);
2699 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2703 while (e
->state
!= SD_EVENT_FINISHED
) {
2704 r
= sd_event_run(e
, (uint64_t) -1);
2716 _public_
int sd_event_get_fd(sd_event
*e
) {
2718 assert_return(e
, -EINVAL
);
2719 assert_return(!event_pid_changed(e
), -ECHILD
);
2724 _public_
int sd_event_get_state(sd_event
*e
) {
2725 assert_return(e
, -EINVAL
);
2726 assert_return(!event_pid_changed(e
), -ECHILD
);
2731 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
2732 assert_return(e
, -EINVAL
);
2733 assert_return(code
, -EINVAL
);
2734 assert_return(!event_pid_changed(e
), -ECHILD
);
2736 if (!e
->exit_requested
)
2739 *code
= e
->exit_code
;
2743 _public_
int sd_event_exit(sd_event
*e
, int code
) {
2744 assert_return(e
, -EINVAL
);
2745 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2746 assert_return(!event_pid_changed(e
), -ECHILD
);
2748 e
->exit_requested
= true;
2749 e
->exit_code
= code
;
2754 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
2755 assert_return(e
, -EINVAL
);
2756 assert_return(usec
, -EINVAL
);
2757 assert_return(!event_pid_changed(e
), -ECHILD
);
2758 assert_return(IN_SET(clock
,
2760 CLOCK_REALTIME_ALARM
,
2763 CLOCK_BOOTTIME_ALARM
), -EOPNOTSUPP
);
2765 if (!dual_timestamp_is_set(&e
->timestamp
)) {
2766 /* Implicitly fall back to now() if we never ran
2767 * before and thus have no cached time. */
2774 case CLOCK_REALTIME
:
2775 case CLOCK_REALTIME_ALARM
:
2776 *usec
= e
->timestamp
.realtime
;
2779 case CLOCK_MONOTONIC
:
2780 *usec
= e
->timestamp
.monotonic
;
2784 *usec
= e
->timestamp_boottime
;
2791 _public_
int sd_event_default(sd_event
**ret
) {
2793 static thread_local sd_event
*default_event
= NULL
;
2798 return !!default_event
;
2800 if (default_event
) {
2801 *ret
= sd_event_ref(default_event
);
2805 r
= sd_event_new(&e
);
2809 e
->default_event_ptr
= &default_event
;
2817 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
2818 assert_return(e
, -EINVAL
);
2819 assert_return(tid
, -EINVAL
);
2820 assert_return(!event_pid_changed(e
), -ECHILD
);
2830 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
2833 assert_return(e
, -EINVAL
);
2834 assert_return(!event_pid_changed(e
), -ECHILD
);
2836 if (e
->watchdog
== !!b
)
2840 struct epoll_event ev
= {};
2842 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
2846 /* Issue first ping immediately */
2847 sd_notify(false, "WATCHDOG=1");
2848 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
2850 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
2851 if (e
->watchdog_fd
< 0)
2854 r
= arm_watchdog(e
);
2858 ev
.events
= EPOLLIN
;
2859 ev
.data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
);
2861 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
2868 if (e
->watchdog_fd
>= 0) {
2869 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
2870 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2878 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2882 _public_
int sd_event_get_watchdog(sd_event
*e
) {
2883 assert_return(e
, -EINVAL
);
2884 assert_return(!event_pid_changed(e
), -ECHILD
);