1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
26 #include "sd-daemon.h"
30 #include "alloc-util.h"
37 #include "process-util.h"
39 #include "signal-util.h"
40 #include "string-util.h"
41 #include "time-util.h"
44 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
46 typedef enum EventSourceType
{
50 SOURCE_TIME_MONOTONIC
,
51 SOURCE_TIME_REALTIME_ALARM
,
52 SOURCE_TIME_BOOTTIME_ALARM
,
59 _SOURCE_EVENT_SOURCE_TYPE_MAX
,
60 _SOURCE_EVENT_SOURCE_TYPE_INVALID
= -1
63 /* All objects we use in epoll events start with this value, so that
64 * we know how to dispatch it */
65 typedef enum WakeupType
{
71 _WAKEUP_TYPE_INVALID
= -1,
74 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
76 struct sd_event_source
{
83 sd_event_handler_t prepare
;
87 EventSourceType type
:5;
94 unsigned pending_index
;
95 unsigned prepare_index
;
96 unsigned pending_iteration
;
97 unsigned prepare_iteration
;
99 LIST_FIELDS(sd_event_source
, sources
);
103 sd_event_io_handler_t callback
;
110 sd_event_time_handler_t callback
;
111 usec_t next
, accuracy
;
112 unsigned earliest_index
;
113 unsigned latest_index
;
116 sd_event_signal_handler_t callback
;
117 struct signalfd_siginfo siginfo
;
121 sd_event_child_handler_t callback
;
127 sd_event_handler_t callback
;
130 sd_event_handler_t callback
;
133 sd_event_handler_t callback
;
134 unsigned prioq_index
;
143 /* For all clocks we maintain two priority queues each, one
144 * ordered for the earliest times the events may be
145 * dispatched, and one ordered by the latest times they must
146 * have been dispatched. The range between the top entries in
147 * the two prioqs is the time window we can freely schedule
160 /* For each priority we maintain one signal fd, so that we
161 * only have to dequeue a single event per priority at a
167 sd_event_source
*current
;
179 /* timerfd_create() only supports these five clocks so far. We
180 * can add support for more clocks when the kernel learns to
181 * deal with them, too. */
182 struct clock_data realtime
;
183 struct clock_data boottime
;
184 struct clock_data monotonic
;
185 struct clock_data realtime_alarm
;
186 struct clock_data boottime_alarm
;
190 sd_event_source
**signal_sources
; /* indexed by signal number */
191 Hashmap
*signal_data
; /* indexed by priority */
193 Hashmap
*child_sources
;
194 unsigned n_enabled_child_sources
;
203 dual_timestamp timestamp
;
204 usec_t timestamp_boottime
;
207 bool exit_requested
:1;
208 bool need_process_child
:1;
214 sd_event
**default_event_ptr
;
216 usec_t watchdog_last
, watchdog_period
;
220 LIST_HEAD(sd_event_source
, sources
);
223 static void source_disconnect(sd_event_source
*s
);
225 static int pending_prioq_compare(const void *a
, const void *b
) {
226 const sd_event_source
*x
= a
, *y
= b
;
231 /* Enabled ones first */
232 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
234 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
237 /* Lower priority values first */
238 if (x
->priority
< y
->priority
)
240 if (x
->priority
> y
->priority
)
243 /* Older entries first */
244 if (x
->pending_iteration
< y
->pending_iteration
)
246 if (x
->pending_iteration
> y
->pending_iteration
)
252 static int prepare_prioq_compare(const void *a
, const void *b
) {
253 const sd_event_source
*x
= a
, *y
= b
;
258 /* Enabled ones first */
259 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
261 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
264 /* Move most recently prepared ones last, so that we can stop
265 * preparing as soon as we hit one that has already been
266 * prepared in the current iteration */
267 if (x
->prepare_iteration
< y
->prepare_iteration
)
269 if (x
->prepare_iteration
> y
->prepare_iteration
)
272 /* Lower priority values first */
273 if (x
->priority
< y
->priority
)
275 if (x
->priority
> y
->priority
)
281 static int earliest_time_prioq_compare(const void *a
, const void *b
) {
282 const sd_event_source
*x
= a
, *y
= b
;
284 assert(EVENT_SOURCE_IS_TIME(x
->type
));
285 assert(x
->type
== y
->type
);
287 /* Enabled ones first */
288 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
290 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
293 /* Move the pending ones to the end */
294 if (!x
->pending
&& y
->pending
)
296 if (x
->pending
&& !y
->pending
)
300 if (x
->time
.next
< y
->time
.next
)
302 if (x
->time
.next
> y
->time
.next
)
308 static int latest_time_prioq_compare(const void *a
, const void *b
) {
309 const sd_event_source
*x
= a
, *y
= b
;
311 assert(EVENT_SOURCE_IS_TIME(x
->type
));
312 assert(x
->type
== y
->type
);
314 /* Enabled ones first */
315 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
317 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
320 /* Move the pending ones to the end */
321 if (!x
->pending
&& y
->pending
)
323 if (x
->pending
&& !y
->pending
)
327 if (x
->time
.next
+ x
->time
.accuracy
< y
->time
.next
+ y
->time
.accuracy
)
329 if (x
->time
.next
+ x
->time
.accuracy
> y
->time
.next
+ y
->time
.accuracy
)
335 static int exit_prioq_compare(const void *a
, const void *b
) {
336 const sd_event_source
*x
= a
, *y
= b
;
338 assert(x
->type
== SOURCE_EXIT
);
339 assert(y
->type
== SOURCE_EXIT
);
341 /* Enabled ones first */
342 if (x
->enabled
!= SD_EVENT_OFF
&& y
->enabled
== SD_EVENT_OFF
)
344 if (x
->enabled
== SD_EVENT_OFF
&& y
->enabled
!= SD_EVENT_OFF
)
347 /* Lower priority values first */
348 if (x
->priority
< y
->priority
)
350 if (x
->priority
> y
->priority
)
356 static void free_clock_data(struct clock_data
*d
) {
358 assert(d
->wakeup
== WAKEUP_CLOCK_DATA
);
361 prioq_free(d
->earliest
);
362 prioq_free(d
->latest
);
365 static void event_free(sd_event
*e
) {
370 while ((s
= e
->sources
)) {
372 source_disconnect(s
);
373 sd_event_source_unref(s
);
376 assert(e
->n_sources
== 0);
378 if (e
->default_event_ptr
)
379 *(e
->default_event_ptr
) = NULL
;
381 safe_close(e
->epoll_fd
);
382 safe_close(e
->watchdog_fd
);
384 free_clock_data(&e
->realtime
);
385 free_clock_data(&e
->boottime
);
386 free_clock_data(&e
->monotonic
);
387 free_clock_data(&e
->realtime_alarm
);
388 free_clock_data(&e
->boottime_alarm
);
390 prioq_free(e
->pending
);
391 prioq_free(e
->prepare
);
394 free(e
->signal_sources
);
395 hashmap_free(e
->signal_data
);
397 hashmap_free(e
->child_sources
);
398 set_free(e
->post_sources
);
402 _public_
int sd_event_new(sd_event
** ret
) {
406 assert_return(ret
, -EINVAL
);
408 e
= new0(sd_event
, 1);
413 e
->watchdog_fd
= e
->epoll_fd
= e
->realtime
.fd
= e
->boottime
.fd
= e
->monotonic
.fd
= e
->realtime_alarm
.fd
= e
->boottime_alarm
.fd
= -1;
414 e
->realtime
.next
= e
->boottime
.next
= e
->monotonic
.next
= e
->realtime_alarm
.next
= e
->boottime_alarm
.next
= USEC_INFINITY
;
415 e
->realtime
.wakeup
= e
->boottime
.wakeup
= e
->monotonic
.wakeup
= e
->realtime_alarm
.wakeup
= e
->boottime_alarm
.wakeup
= WAKEUP_CLOCK_DATA
;
416 e
->original_pid
= getpid();
417 e
->perturb
= USEC_INFINITY
;
419 e
->pending
= prioq_new(pending_prioq_compare
);
425 e
->epoll_fd
= epoll_create1(EPOLL_CLOEXEC
);
426 if (e
->epoll_fd
< 0) {
439 _public_ sd_event
* sd_event_ref(sd_event
*e
) {
440 assert_return(e
, NULL
);
442 assert(e
->n_ref
>= 1);
448 _public_ sd_event
* sd_event_unref(sd_event
*e
) {
453 assert(e
->n_ref
>= 1);
462 static bool event_pid_changed(sd_event
*e
) {
465 /* We don't support people creating an event loop and keeping
466 * it around over a fork(). Let's complain. */
468 return e
->original_pid
!= getpid();
471 static void source_io_unregister(sd_event_source
*s
) {
475 assert(s
->type
== SOURCE_IO
);
477 if (event_pid_changed(s
->event
))
480 if (!s
->io
.registered
)
483 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, s
->io
.fd
, NULL
);
485 log_debug_errno(errno
, "Failed to remove source %s from epoll: %m", strna(s
->description
));
487 s
->io
.registered
= false;
490 static int source_io_register(
495 struct epoll_event ev
= {};
499 assert(s
->type
== SOURCE_IO
);
500 assert(enabled
!= SD_EVENT_OFF
);
505 if (enabled
== SD_EVENT_ONESHOT
)
506 ev
.events
|= EPOLLONESHOT
;
508 if (s
->io
.registered
)
509 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_MOD
, s
->io
.fd
, &ev
);
511 r
= epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_ADD
, s
->io
.fd
, &ev
);
515 s
->io
.registered
= true;
520 static clockid_t
event_source_type_to_clock(EventSourceType t
) {
524 case SOURCE_TIME_REALTIME
:
525 return CLOCK_REALTIME
;
527 case SOURCE_TIME_BOOTTIME
:
528 return CLOCK_BOOTTIME
;
530 case SOURCE_TIME_MONOTONIC
:
531 return CLOCK_MONOTONIC
;
533 case SOURCE_TIME_REALTIME_ALARM
:
534 return CLOCK_REALTIME_ALARM
;
536 case SOURCE_TIME_BOOTTIME_ALARM
:
537 return CLOCK_BOOTTIME_ALARM
;
540 return (clockid_t
) -1;
544 static EventSourceType
clock_to_event_source_type(clockid_t clock
) {
549 return SOURCE_TIME_REALTIME
;
552 return SOURCE_TIME_BOOTTIME
;
554 case CLOCK_MONOTONIC
:
555 return SOURCE_TIME_MONOTONIC
;
557 case CLOCK_REALTIME_ALARM
:
558 return SOURCE_TIME_REALTIME_ALARM
;
560 case CLOCK_BOOTTIME_ALARM
:
561 return SOURCE_TIME_BOOTTIME_ALARM
;
564 return _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
568 static struct clock_data
* event_get_clock_data(sd_event
*e
, EventSourceType t
) {
573 case SOURCE_TIME_REALTIME
:
576 case SOURCE_TIME_BOOTTIME
:
579 case SOURCE_TIME_MONOTONIC
:
580 return &e
->monotonic
;
582 case SOURCE_TIME_REALTIME_ALARM
:
583 return &e
->realtime_alarm
;
585 case SOURCE_TIME_BOOTTIME_ALARM
:
586 return &e
->boottime_alarm
;
593 static int event_make_signal_data(
596 struct signal_data
**ret
) {
598 struct epoll_event ev
= {};
599 struct signal_data
*d
;
607 if (event_pid_changed(e
))
610 if (e
->signal_sources
&& e
->signal_sources
[sig
])
611 priority
= e
->signal_sources
[sig
]->priority
;
615 d
= hashmap_get(e
->signal_data
, &priority
);
617 if (sigismember(&d
->sigset
, sig
) > 0) {
623 r
= hashmap_ensure_allocated(&e
->signal_data
, &uint64_hash_ops
);
627 d
= new0(struct signal_data
, 1);
631 d
->wakeup
= WAKEUP_SIGNAL_DATA
;
633 d
->priority
= priority
;
635 r
= hashmap_put(e
->signal_data
, &d
->priority
, d
);
643 assert_se(sigaddset(&ss_copy
, sig
) >= 0);
645 r
= signalfd(d
->fd
, &ss_copy
, SFD_NONBLOCK
|SFD_CLOEXEC
);
664 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, d
->fd
, &ev
);
677 d
->fd
= safe_close(d
->fd
);
678 hashmap_remove(e
->signal_data
, &d
->priority
);
685 static void event_unmask_signal_data(sd_event
*e
, struct signal_data
*d
, int sig
) {
689 /* Turns off the specified signal in the signal data
690 * object. If the signal mask of the object becomes empty that
693 if (sigismember(&d
->sigset
, sig
) == 0)
696 assert_se(sigdelset(&d
->sigset
, sig
) >= 0);
698 if (sigisemptyset(&d
->sigset
)) {
700 /* If all the mask is all-zero we can get rid of the structure */
701 hashmap_remove(e
->signal_data
, &d
->priority
);
710 if (signalfd(d
->fd
, &d
->sigset
, SFD_NONBLOCK
|SFD_CLOEXEC
) < 0)
711 log_debug_errno(errno
, "Failed to unset signal bit, ignoring: %m");
714 static void event_gc_signal_data(sd_event
*e
, const int64_t *priority
, int sig
) {
715 struct signal_data
*d
;
716 static const int64_t zero_priority
= 0;
720 /* Rechecks if the specified signal is still something we are
721 * interested in. If not, we'll unmask it, and possibly drop
722 * the signalfd for it. */
724 if (sig
== SIGCHLD
&&
725 e
->n_enabled_child_sources
> 0)
728 if (e
->signal_sources
&&
729 e
->signal_sources
[sig
] &&
730 e
->signal_sources
[sig
]->enabled
!= SD_EVENT_OFF
)
734 * The specified signal might be enabled in three different queues:
736 * 1) the one that belongs to the priority passed (if it is non-NULL)
737 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
738 * 3) the 0 priority (to cover the SIGCHLD case)
740 * Hence, let's remove it from all three here.
744 d
= hashmap_get(e
->signal_data
, priority
);
746 event_unmask_signal_data(e
, d
, sig
);
749 if (e
->signal_sources
&& e
->signal_sources
[sig
]) {
750 d
= hashmap_get(e
->signal_data
, &e
->signal_sources
[sig
]->priority
);
752 event_unmask_signal_data(e
, d
, sig
);
755 d
= hashmap_get(e
->signal_data
, &zero_priority
);
757 event_unmask_signal_data(e
, d
, sig
);
760 static void source_disconnect(sd_event_source
*s
) {
768 assert(s
->event
->n_sources
> 0);
774 source_io_unregister(s
);
778 case SOURCE_TIME_REALTIME
:
779 case SOURCE_TIME_BOOTTIME
:
780 case SOURCE_TIME_MONOTONIC
:
781 case SOURCE_TIME_REALTIME_ALARM
:
782 case SOURCE_TIME_BOOTTIME_ALARM
: {
783 struct clock_data
*d
;
785 d
= event_get_clock_data(s
->event
, s
->type
);
788 prioq_remove(d
->earliest
, s
, &s
->time
.earliest_index
);
789 prioq_remove(d
->latest
, s
, &s
->time
.latest_index
);
790 d
->needs_rearm
= true;
795 if (s
->signal
.sig
> 0) {
797 if (s
->event
->signal_sources
)
798 s
->event
->signal_sources
[s
->signal
.sig
] = NULL
;
800 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
806 if (s
->child
.pid
> 0) {
807 if (s
->enabled
!= SD_EVENT_OFF
) {
808 assert(s
->event
->n_enabled_child_sources
> 0);
809 s
->event
->n_enabled_child_sources
--;
812 (void) hashmap_remove(s
->event
->child_sources
, PID_TO_PTR(s
->child
.pid
));
813 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
823 set_remove(s
->event
->post_sources
, s
);
827 prioq_remove(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
831 assert_not_reached("Wut? I shouldn't exist.");
835 prioq_remove(s
->event
->pending
, s
, &s
->pending_index
);
838 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
842 s
->type
= _SOURCE_EVENT_SOURCE_TYPE_INVALID
;
844 LIST_REMOVE(sources
, event
->sources
, s
);
848 sd_event_unref(event
);
851 static void source_free(sd_event_source
*s
) {
854 source_disconnect(s
);
855 free(s
->description
);
859 static int source_set_pending(sd_event_source
*s
, bool b
) {
863 assert(s
->type
!= SOURCE_EXIT
);
871 s
->pending_iteration
= s
->event
->iteration
;
873 r
= prioq_put(s
->event
->pending
, s
, &s
->pending_index
);
879 assert_se(prioq_remove(s
->event
->pending
, s
, &s
->pending_index
));
881 if (EVENT_SOURCE_IS_TIME(s
->type
)) {
882 struct clock_data
*d
;
884 d
= event_get_clock_data(s
->event
, s
->type
);
887 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
888 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
889 d
->needs_rearm
= true;
892 if (s
->type
== SOURCE_SIGNAL
&& !b
) {
893 struct signal_data
*d
;
895 d
= hashmap_get(s
->event
->signal_data
, &s
->priority
);
896 if (d
&& d
->current
== s
)
903 static sd_event_source
*source_new(sd_event
*e
, bool floating
, EventSourceType type
) {
908 s
= new0(sd_event_source
, 1);
914 s
->floating
= floating
;
916 s
->pending_index
= s
->prepare_index
= PRIOQ_IDX_NULL
;
921 LIST_PREPEND(sources
, e
->sources
, s
);
927 _public_
int sd_event_add_io(
929 sd_event_source
**ret
,
932 sd_event_io_handler_t callback
,
938 assert_return(e
, -EINVAL
);
939 assert_return(fd
>= 0, -EBADF
);
940 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
941 assert_return(callback
, -EINVAL
);
942 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
943 assert_return(!event_pid_changed(e
), -ECHILD
);
945 s
= source_new(e
, !ret
, SOURCE_IO
);
949 s
->wakeup
= WAKEUP_EVENT_SOURCE
;
951 s
->io
.events
= events
;
952 s
->io
.callback
= callback
;
953 s
->userdata
= userdata
;
954 s
->enabled
= SD_EVENT_ON
;
956 r
= source_io_register(s
, s
->enabled
, events
);
968 static void initialize_perturb(sd_event
*e
) {
969 sd_id128_t bootid
= {};
971 /* When we sleep for longer, we try to realign the wakeup to
972 the same time wihtin each minute/second/250ms, so that
973 events all across the system can be coalesced into a single
974 CPU wakeup. However, let's take some system-specific
975 randomness for this value, so that in a network of systems
976 with synced clocks timer events are distributed a
977 bit. Here, we calculate a perturbation usec offset from the
980 if (_likely_(e
->perturb
!= USEC_INFINITY
))
983 if (sd_id128_get_boot(&bootid
) >= 0)
984 e
->perturb
= (bootid
.qwords
[0] ^ bootid
.qwords
[1]) % USEC_PER_MINUTE
;
987 static int event_setup_timer_fd(
989 struct clock_data
*d
,
992 struct epoll_event ev
= {};
998 if (_likely_(d
->fd
>= 0))
1001 fd
= timerfd_create(clock
, TFD_NONBLOCK
|TFD_CLOEXEC
);
1005 ev
.events
= EPOLLIN
;
1008 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, fd
, &ev
);
1018 static int time_exit_callback(sd_event_source
*s
, uint64_t usec
, void *userdata
) {
1021 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1024 _public_
int sd_event_add_time(
1026 sd_event_source
**ret
,
1030 sd_event_time_handler_t callback
,
1033 EventSourceType type
;
1035 struct clock_data
*d
;
1038 assert_return(e
, -EINVAL
);
1039 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1040 assert_return(accuracy
!= (uint64_t) -1, -EINVAL
);
1041 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1042 assert_return(!event_pid_changed(e
), -ECHILD
);
1045 callback
= time_exit_callback
;
1047 type
= clock_to_event_source_type(clock
);
1048 assert_return(type
>= 0, -EOPNOTSUPP
);
1050 d
= event_get_clock_data(e
, type
);
1054 d
->earliest
= prioq_new(earliest_time_prioq_compare
);
1060 d
->latest
= prioq_new(latest_time_prioq_compare
);
1066 r
= event_setup_timer_fd(e
, d
, clock
);
1071 s
= source_new(e
, !ret
, type
);
1075 s
->time
.next
= usec
;
1076 s
->time
.accuracy
= accuracy
== 0 ? DEFAULT_ACCURACY_USEC
: accuracy
;
1077 s
->time
.callback
= callback
;
1078 s
->time
.earliest_index
= s
->time
.latest_index
= PRIOQ_IDX_NULL
;
1079 s
->userdata
= userdata
;
1080 s
->enabled
= SD_EVENT_ONESHOT
;
1082 d
->needs_rearm
= true;
1084 r
= prioq_put(d
->earliest
, s
, &s
->time
.earliest_index
);
1088 r
= prioq_put(d
->latest
, s
, &s
->time
.latest_index
);
1102 static int signal_exit_callback(sd_event_source
*s
, const struct signalfd_siginfo
*si
, void *userdata
) {
1105 return sd_event_exit(sd_event_source_get_event(s
), PTR_TO_INT(userdata
));
1108 _public_
int sd_event_add_signal(
1110 sd_event_source
**ret
,
1112 sd_event_signal_handler_t callback
,
1116 struct signal_data
*d
;
1120 assert_return(e
, -EINVAL
);
1121 assert_return(sig
> 0, -EINVAL
);
1122 assert_return(sig
< _NSIG
, -EINVAL
);
1123 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1124 assert_return(!event_pid_changed(e
), -ECHILD
);
1127 callback
= signal_exit_callback
;
1129 r
= pthread_sigmask(SIG_SETMASK
, NULL
, &ss
);
1133 if (!sigismember(&ss
, sig
))
1136 if (!e
->signal_sources
) {
1137 e
->signal_sources
= new0(sd_event_source
*, _NSIG
);
1138 if (!e
->signal_sources
)
1140 } else if (e
->signal_sources
[sig
])
1143 s
= source_new(e
, !ret
, SOURCE_SIGNAL
);
1147 s
->signal
.sig
= sig
;
1148 s
->signal
.callback
= callback
;
1149 s
->userdata
= userdata
;
1150 s
->enabled
= SD_EVENT_ON
;
1152 e
->signal_sources
[sig
] = s
;
1154 r
= event_make_signal_data(e
, sig
, &d
);
1160 /* Use the signal name as description for the event source by default */
1161 (void) sd_event_source_set_description(s
, signal_to_string(sig
));
1169 _public_
int sd_event_add_child(
1171 sd_event_source
**ret
,
1174 sd_event_child_handler_t callback
,
1180 assert_return(e
, -EINVAL
);
1181 assert_return(pid
> 1, -EINVAL
);
1182 assert_return(!(options
& ~(WEXITED
|WSTOPPED
|WCONTINUED
)), -EINVAL
);
1183 assert_return(options
!= 0, -EINVAL
);
1184 assert_return(callback
, -EINVAL
);
1185 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1186 assert_return(!event_pid_changed(e
), -ECHILD
);
1188 r
= hashmap_ensure_allocated(&e
->child_sources
, NULL
);
1192 if (hashmap_contains(e
->child_sources
, PID_TO_PTR(pid
)))
1195 s
= source_new(e
, !ret
, SOURCE_CHILD
);
1200 s
->child
.options
= options
;
1201 s
->child
.callback
= callback
;
1202 s
->userdata
= userdata
;
1203 s
->enabled
= SD_EVENT_ONESHOT
;
1205 r
= hashmap_put(e
->child_sources
, PID_TO_PTR(pid
), s
);
1211 e
->n_enabled_child_sources
++;
1213 r
= event_make_signal_data(e
, SIGCHLD
, NULL
);
1215 e
->n_enabled_child_sources
--;
1220 e
->need_process_child
= true;
1228 _public_
int sd_event_add_defer(
1230 sd_event_source
**ret
,
1231 sd_event_handler_t callback
,
1237 assert_return(e
, -EINVAL
);
1238 assert_return(callback
, -EINVAL
);
1239 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1240 assert_return(!event_pid_changed(e
), -ECHILD
);
1242 s
= source_new(e
, !ret
, SOURCE_DEFER
);
1246 s
->defer
.callback
= callback
;
1247 s
->userdata
= userdata
;
1248 s
->enabled
= SD_EVENT_ONESHOT
;
1250 r
= source_set_pending(s
, true);
1262 _public_
int sd_event_add_post(
1264 sd_event_source
**ret
,
1265 sd_event_handler_t callback
,
1271 assert_return(e
, -EINVAL
);
1272 assert_return(callback
, -EINVAL
);
1273 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1274 assert_return(!event_pid_changed(e
), -ECHILD
);
1276 r
= set_ensure_allocated(&e
->post_sources
, NULL
);
1280 s
= source_new(e
, !ret
, SOURCE_POST
);
1284 s
->post
.callback
= callback
;
1285 s
->userdata
= userdata
;
1286 s
->enabled
= SD_EVENT_ON
;
1288 r
= set_put(e
->post_sources
, s
);
1300 _public_
int sd_event_add_exit(
1302 sd_event_source
**ret
,
1303 sd_event_handler_t callback
,
1309 assert_return(e
, -EINVAL
);
1310 assert_return(callback
, -EINVAL
);
1311 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1312 assert_return(!event_pid_changed(e
), -ECHILD
);
1315 e
->exit
= prioq_new(exit_prioq_compare
);
1320 s
= source_new(e
, !ret
, SOURCE_EXIT
);
1324 s
->exit
.callback
= callback
;
1325 s
->userdata
= userdata
;
1326 s
->exit
.prioq_index
= PRIOQ_IDX_NULL
;
1327 s
->enabled
= SD_EVENT_ONESHOT
;
1329 r
= prioq_put(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1341 _public_ sd_event_source
* sd_event_source_ref(sd_event_source
*s
) {
1342 assert_return(s
, NULL
);
1344 assert(s
->n_ref
>= 1);
1350 _public_ sd_event_source
* sd_event_source_unref(sd_event_source
*s
) {
1355 assert(s
->n_ref
>= 1);
1358 if (s
->n_ref
<= 0) {
1359 /* Here's a special hack: when we are called from a
1360 * dispatch handler we won't free the event source
1361 * immediately, but we will detach the fd from the
1362 * epoll. This way it is safe for the caller to unref
1363 * the event source and immediately close the fd, but
1364 * we still retain a valid event source object after
1367 if (s
->dispatching
) {
1368 if (s
->type
== SOURCE_IO
)
1369 source_io_unregister(s
);
1371 source_disconnect(s
);
1379 _public_
int sd_event_source_set_description(sd_event_source
*s
, const char *description
) {
1380 assert_return(s
, -EINVAL
);
1381 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1383 return free_and_strdup(&s
->description
, description
);
1386 _public_
int sd_event_source_get_description(sd_event_source
*s
, const char **description
) {
1387 assert_return(s
, -EINVAL
);
1388 assert_return(description
, -EINVAL
);
1389 assert_return(s
->description
, -ENXIO
);
1390 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1392 *description
= s
->description
;
1396 _public_ sd_event
*sd_event_source_get_event(sd_event_source
*s
) {
1397 assert_return(s
, NULL
);
1402 _public_
int sd_event_source_get_pending(sd_event_source
*s
) {
1403 assert_return(s
, -EINVAL
);
1404 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1405 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1406 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1411 _public_
int sd_event_source_get_io_fd(sd_event_source
*s
) {
1412 assert_return(s
, -EINVAL
);
1413 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1414 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1419 _public_
int sd_event_source_set_io_fd(sd_event_source
*s
, int fd
) {
1422 assert_return(s
, -EINVAL
);
1423 assert_return(fd
>= 0, -EBADF
);
1424 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1425 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1430 if (s
->enabled
== SD_EVENT_OFF
) {
1432 s
->io
.registered
= false;
1436 saved_fd
= s
->io
.fd
;
1437 assert(s
->io
.registered
);
1440 s
->io
.registered
= false;
1442 r
= source_io_register(s
, s
->enabled
, s
->io
.events
);
1444 s
->io
.fd
= saved_fd
;
1445 s
->io
.registered
= true;
1449 epoll_ctl(s
->event
->epoll_fd
, EPOLL_CTL_DEL
, saved_fd
, NULL
);
1455 _public_
int sd_event_source_get_io_events(sd_event_source
*s
, uint32_t* events
) {
1456 assert_return(s
, -EINVAL
);
1457 assert_return(events
, -EINVAL
);
1458 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1459 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1461 *events
= s
->io
.events
;
1465 _public_
int sd_event_source_set_io_events(sd_event_source
*s
, uint32_t events
) {
1468 assert_return(s
, -EINVAL
);
1469 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1470 assert_return(!(events
& ~(EPOLLIN
|EPOLLOUT
|EPOLLRDHUP
|EPOLLPRI
|EPOLLERR
|EPOLLHUP
|EPOLLET
)), -EINVAL
);
1471 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1472 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1474 /* edge-triggered updates are never skipped, so we can reset edges */
1475 if (s
->io
.events
== events
&& !(events
& EPOLLET
))
1478 if (s
->enabled
!= SD_EVENT_OFF
) {
1479 r
= source_io_register(s
, s
->enabled
, events
);
1484 s
->io
.events
= events
;
1485 source_set_pending(s
, false);
1490 _public_
int sd_event_source_get_io_revents(sd_event_source
*s
, uint32_t* revents
) {
1491 assert_return(s
, -EINVAL
);
1492 assert_return(revents
, -EINVAL
);
1493 assert_return(s
->type
== SOURCE_IO
, -EDOM
);
1494 assert_return(s
->pending
, -ENODATA
);
1495 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1497 *revents
= s
->io
.revents
;
1501 _public_
int sd_event_source_get_signal(sd_event_source
*s
) {
1502 assert_return(s
, -EINVAL
);
1503 assert_return(s
->type
== SOURCE_SIGNAL
, -EDOM
);
1504 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1506 return s
->signal
.sig
;
1509 _public_
int sd_event_source_get_priority(sd_event_source
*s
, int64_t *priority
) {
1510 assert_return(s
, -EINVAL
);
1511 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1516 _public_
int sd_event_source_set_priority(sd_event_source
*s
, int64_t priority
) {
1519 assert_return(s
, -EINVAL
);
1520 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1521 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1523 if (s
->priority
== priority
)
1526 if (s
->type
== SOURCE_SIGNAL
&& s
->enabled
!= SD_EVENT_OFF
) {
1527 struct signal_data
*old
, *d
;
1529 /* Move us from the signalfd belonging to the old
1530 * priority to the signalfd of the new priority */
1532 assert_se(old
= hashmap_get(s
->event
->signal_data
, &s
->priority
));
1534 s
->priority
= priority
;
1536 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, &d
);
1538 s
->priority
= old
->priority
;
1542 event_unmask_signal_data(s
->event
, old
, s
->signal
.sig
);
1544 s
->priority
= priority
;
1547 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1550 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1552 if (s
->type
== SOURCE_EXIT
)
1553 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1558 _public_
int sd_event_source_get_enabled(sd_event_source
*s
, int *m
) {
1559 assert_return(s
, -EINVAL
);
1560 assert_return(m
, -EINVAL
);
1561 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1567 _public_
int sd_event_source_set_enabled(sd_event_source
*s
, int m
) {
1570 assert_return(s
, -EINVAL
);
1571 assert_return(m
== SD_EVENT_OFF
|| m
== SD_EVENT_ON
|| m
== SD_EVENT_ONESHOT
, -EINVAL
);
1572 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1574 /* If we are dead anyway, we are fine with turning off
1575 * sources, but everything else needs to fail. */
1576 if (s
->event
->state
== SD_EVENT_FINISHED
)
1577 return m
== SD_EVENT_OFF
? 0 : -ESTALE
;
1579 if (s
->enabled
== m
)
1582 if (m
== SD_EVENT_OFF
) {
1587 source_io_unregister(s
);
1591 case SOURCE_TIME_REALTIME
:
1592 case SOURCE_TIME_BOOTTIME
:
1593 case SOURCE_TIME_MONOTONIC
:
1594 case SOURCE_TIME_REALTIME_ALARM
:
1595 case SOURCE_TIME_BOOTTIME_ALARM
: {
1596 struct clock_data
*d
;
1599 d
= event_get_clock_data(s
->event
, s
->type
);
1602 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1603 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1604 d
->needs_rearm
= true;
1611 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1617 assert(s
->event
->n_enabled_child_sources
> 0);
1618 s
->event
->n_enabled_child_sources
--;
1620 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1625 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1634 assert_not_reached("Wut? I shouldn't exist.");
1641 r
= source_io_register(s
, m
, s
->io
.events
);
1648 case SOURCE_TIME_REALTIME
:
1649 case SOURCE_TIME_BOOTTIME
:
1650 case SOURCE_TIME_MONOTONIC
:
1651 case SOURCE_TIME_REALTIME_ALARM
:
1652 case SOURCE_TIME_BOOTTIME_ALARM
: {
1653 struct clock_data
*d
;
1656 d
= event_get_clock_data(s
->event
, s
->type
);
1659 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1660 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1661 d
->needs_rearm
= true;
1669 r
= event_make_signal_data(s
->event
, s
->signal
.sig
, NULL
);
1671 s
->enabled
= SD_EVENT_OFF
;
1672 event_gc_signal_data(s
->event
, &s
->priority
, s
->signal
.sig
);
1680 if (s
->enabled
== SD_EVENT_OFF
)
1681 s
->event
->n_enabled_child_sources
++;
1685 r
= event_make_signal_data(s
->event
, SIGCHLD
, NULL
);
1687 s
->enabled
= SD_EVENT_OFF
;
1688 s
->event
->n_enabled_child_sources
--;
1689 event_gc_signal_data(s
->event
, &s
->priority
, SIGCHLD
);
1697 prioq_reshuffle(s
->event
->exit
, s
, &s
->exit
.prioq_index
);
1706 assert_not_reached("Wut? I shouldn't exist.");
1711 prioq_reshuffle(s
->event
->pending
, s
, &s
->pending_index
);
1714 prioq_reshuffle(s
->event
->prepare
, s
, &s
->prepare_index
);
1719 _public_
int sd_event_source_get_time(sd_event_source
*s
, uint64_t *usec
) {
1720 assert_return(s
, -EINVAL
);
1721 assert_return(usec
, -EINVAL
);
1722 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1723 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1725 *usec
= s
->time
.next
;
1729 _public_
int sd_event_source_set_time(sd_event_source
*s
, uint64_t usec
) {
1730 struct clock_data
*d
;
1732 assert_return(s
, -EINVAL
);
1733 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1734 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1735 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1736 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1738 s
->time
.next
= usec
;
1740 source_set_pending(s
, false);
1742 d
= event_get_clock_data(s
->event
, s
->type
);
1745 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
1746 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1747 d
->needs_rearm
= true;
1752 _public_
int sd_event_source_get_time_accuracy(sd_event_source
*s
, uint64_t *usec
) {
1753 assert_return(s
, -EINVAL
);
1754 assert_return(usec
, -EINVAL
);
1755 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1756 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1758 *usec
= s
->time
.accuracy
;
1762 _public_
int sd_event_source_set_time_accuracy(sd_event_source
*s
, uint64_t usec
) {
1763 struct clock_data
*d
;
1765 assert_return(s
, -EINVAL
);
1766 assert_return(usec
!= (uint64_t) -1, -EINVAL
);
1767 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1768 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1769 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1772 usec
= DEFAULT_ACCURACY_USEC
;
1774 s
->time
.accuracy
= usec
;
1776 source_set_pending(s
, false);
1778 d
= event_get_clock_data(s
->event
, s
->type
);
1781 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
1782 d
->needs_rearm
= true;
1787 _public_
int sd_event_source_get_time_clock(sd_event_source
*s
, clockid_t
*clock
) {
1788 assert_return(s
, -EINVAL
);
1789 assert_return(clock
, -EINVAL
);
1790 assert_return(EVENT_SOURCE_IS_TIME(s
->type
), -EDOM
);
1791 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1793 *clock
= event_source_type_to_clock(s
->type
);
1797 _public_
int sd_event_source_get_child_pid(sd_event_source
*s
, pid_t
*pid
) {
1798 assert_return(s
, -EINVAL
);
1799 assert_return(pid
, -EINVAL
);
1800 assert_return(s
->type
== SOURCE_CHILD
, -EDOM
);
1801 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1803 *pid
= s
->child
.pid
;
1807 _public_
int sd_event_source_set_prepare(sd_event_source
*s
, sd_event_handler_t callback
) {
1810 assert_return(s
, -EINVAL
);
1811 assert_return(s
->type
!= SOURCE_EXIT
, -EDOM
);
1812 assert_return(s
->event
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
1813 assert_return(!event_pid_changed(s
->event
), -ECHILD
);
1815 if (s
->prepare
== callback
)
1818 if (callback
&& s
->prepare
) {
1819 s
->prepare
= callback
;
1823 r
= prioq_ensure_allocated(&s
->event
->prepare
, prepare_prioq_compare
);
1827 s
->prepare
= callback
;
1830 r
= prioq_put(s
->event
->prepare
, s
, &s
->prepare_index
);
1834 prioq_remove(s
->event
->prepare
, s
, &s
->prepare_index
);
1839 _public_
void* sd_event_source_get_userdata(sd_event_source
*s
) {
1840 assert_return(s
, NULL
);
1845 _public_
void *sd_event_source_set_userdata(sd_event_source
*s
, void *userdata
) {
1848 assert_return(s
, NULL
);
1851 s
->userdata
= userdata
;
1856 static usec_t
sleep_between(sd_event
*e
, usec_t a
, usec_t b
) {
1867 initialize_perturb(e
);
1870 Find a good time to wake up again between times a and b. We
1871 have two goals here:
1873 a) We want to wake up as seldom as possible, hence prefer
1874 later times over earlier times.
1876 b) But if we have to wake up, then let's make sure to
1877 dispatch as much as possible on the entire system.
1879 We implement this by waking up everywhere at the same time
1880 within any given minute if we can, synchronised via the
1881 perturbation value determined from the boot ID. If we can't,
1882 then we try to find the same spot in every 10s, then 1s and
1883 then 250ms step. Otherwise, we pick the last possible time
1887 c
= (b
/ USEC_PER_MINUTE
) * USEC_PER_MINUTE
+ e
->perturb
;
1889 if (_unlikely_(c
< USEC_PER_MINUTE
))
1892 c
-= USEC_PER_MINUTE
;
1898 c
= (b
/ (USEC_PER_SEC
*10)) * (USEC_PER_SEC
*10) + (e
->perturb
% (USEC_PER_SEC
*10));
1900 if (_unlikely_(c
< USEC_PER_SEC
*10))
1903 c
-= USEC_PER_SEC
*10;
1909 c
= (b
/ USEC_PER_SEC
) * USEC_PER_SEC
+ (e
->perturb
% USEC_PER_SEC
);
1911 if (_unlikely_(c
< USEC_PER_SEC
))
1920 c
= (b
/ (USEC_PER_MSEC
*250)) * (USEC_PER_MSEC
*250) + (e
->perturb
% (USEC_PER_MSEC
*250));
1922 if (_unlikely_(c
< USEC_PER_MSEC
*250))
1925 c
-= USEC_PER_MSEC
*250;
1934 static int event_arm_timer(
1936 struct clock_data
*d
) {
1938 struct itimerspec its
= {};
1939 sd_event_source
*a
, *b
;
1946 if (!d
->needs_rearm
)
1949 d
->needs_rearm
= false;
1951 a
= prioq_peek(d
->earliest
);
1952 if (!a
|| a
->enabled
== SD_EVENT_OFF
) {
1957 if (d
->next
== USEC_INFINITY
)
1961 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1965 d
->next
= USEC_INFINITY
;
1969 b
= prioq_peek(d
->latest
);
1970 assert_se(b
&& b
->enabled
!= SD_EVENT_OFF
);
1972 t
= sleep_between(e
, a
->time
.next
, b
->time
.next
+ b
->time
.accuracy
);
1976 assert_se(d
->fd
>= 0);
1979 /* We don' want to disarm here, just mean some time looooong ago. */
1980 its
.it_value
.tv_sec
= 0;
1981 its
.it_value
.tv_nsec
= 1;
1983 timespec_store(&its
.it_value
, t
);
1985 r
= timerfd_settime(d
->fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
1993 static int process_io(sd_event
*e
, sd_event_source
*s
, uint32_t revents
) {
1996 assert(s
->type
== SOURCE_IO
);
1998 /* If the event source was already pending, we just OR in the
1999 * new revents, otherwise we reset the value. The ORing is
2000 * necessary to handle EPOLLONESHOT events properly where
2001 * readability might happen independently of writability, and
2002 * we need to keep track of both */
2005 s
->io
.revents
|= revents
;
2007 s
->io
.revents
= revents
;
2009 return source_set_pending(s
, true);
2012 static int flush_timer(sd_event
*e
, int fd
, uint32_t events
, usec_t
*next
) {
2019 assert_return(events
== EPOLLIN
, -EIO
);
2021 ss
= read(fd
, &x
, sizeof(x
));
2023 if (errno
== EAGAIN
|| errno
== EINTR
)
2029 if (_unlikely_(ss
!= sizeof(x
)))
2033 *next
= USEC_INFINITY
;
2038 static int process_timer(
2041 struct clock_data
*d
) {
2050 s
= prioq_peek(d
->earliest
);
2053 s
->enabled
== SD_EVENT_OFF
||
2057 r
= source_set_pending(s
, true);
2061 prioq_reshuffle(d
->earliest
, s
, &s
->time
.earliest_index
);
2062 prioq_reshuffle(d
->latest
, s
, &s
->time
.latest_index
);
2063 d
->needs_rearm
= true;
2069 static int process_child(sd_event
*e
) {
2076 e
->need_process_child
= false;
2079 So, this is ugly. We iteratively invoke waitid() with P_PID
2080 + WNOHANG for each PID we wait for, instead of using
2081 P_ALL. This is because we only want to get child
2082 information of very specific child processes, and not all
2083 of them. We might not have processed the SIGCHLD even of a
2084 previous invocation and we don't want to maintain a
2085 unbounded *per-child* event queue, hence we really don't
2086 want anything flushed out of the kernel's queue that we
2087 don't care about. Since this is O(n) this means that if you
2088 have a lot of processes you probably want to handle SIGCHLD
2091 We do not reap the children here (by using WNOWAIT), this
2092 is only done after the event source is dispatched so that
2093 the callback still sees the process as a zombie.
2096 HASHMAP_FOREACH(s
, e
->child_sources
, i
) {
2097 assert(s
->type
== SOURCE_CHILD
);
2102 if (s
->enabled
== SD_EVENT_OFF
)
2105 zero(s
->child
.siginfo
);
2106 r
= waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
,
2107 WNOHANG
| (s
->child
.options
& WEXITED
? WNOWAIT
: 0) | s
->child
.options
);
2111 if (s
->child
.siginfo
.si_pid
!= 0) {
2113 s
->child
.siginfo
.si_code
== CLD_EXITED
||
2114 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2115 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2117 if (!zombie
&& (s
->child
.options
& WEXITED
)) {
2118 /* If the child isn't dead then let's
2119 * immediately remove the state change
2120 * from the queue, since there's no
2121 * benefit in leaving it queued */
2123 assert(s
->child
.options
& (WSTOPPED
|WCONTINUED
));
2124 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|(s
->child
.options
& (WSTOPPED
|WCONTINUED
)));
2127 r
= source_set_pending(s
, true);
2136 static int process_signal(sd_event
*e
, struct signal_data
*d
, uint32_t events
) {
2137 bool read_one
= false;
2141 assert_return(events
== EPOLLIN
, -EIO
);
2143 /* If there's a signal queued on this priority and SIGCHLD is
2144 on this priority too, then make sure to recheck the
2145 children we watch. This is because we only ever dequeue
2146 the first signal per priority, and if we dequeue one, and
2147 SIGCHLD might be enqueued later we wouldn't know, but we
2148 might have higher priority children we care about hence we
2149 need to check that explicitly. */
2151 if (sigismember(&d
->sigset
, SIGCHLD
))
2152 e
->need_process_child
= true;
2154 /* If there's already an event source pending for this
2155 * priority we don't read another */
2160 struct signalfd_siginfo si
;
2162 sd_event_source
*s
= NULL
;
2164 n
= read(d
->fd
, &si
, sizeof(si
));
2166 if (errno
== EAGAIN
|| errno
== EINTR
)
2172 if (_unlikely_(n
!= sizeof(si
)))
2175 assert(si
.ssi_signo
< _NSIG
);
2179 if (e
->signal_sources
)
2180 s
= e
->signal_sources
[si
.ssi_signo
];
2186 s
->signal
.siginfo
= si
;
2189 r
= source_set_pending(s
, true);
2197 static int source_dispatch(sd_event_source
*s
) {
2201 assert(s
->pending
|| s
->type
== SOURCE_EXIT
);
2203 if (s
->type
!= SOURCE_DEFER
&& s
->type
!= SOURCE_EXIT
) {
2204 r
= source_set_pending(s
, false);
2209 if (s
->type
!= SOURCE_POST
) {
2213 /* If we execute a non-post source, let's mark all
2214 * post sources as pending */
2216 SET_FOREACH(z
, s
->event
->post_sources
, i
) {
2217 if (z
->enabled
== SD_EVENT_OFF
)
2220 r
= source_set_pending(z
, true);
2226 if (s
->enabled
== SD_EVENT_ONESHOT
) {
2227 r
= sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2232 s
->dispatching
= true;
2237 r
= s
->io
.callback(s
, s
->io
.fd
, s
->io
.revents
, s
->userdata
);
2240 case SOURCE_TIME_REALTIME
:
2241 case SOURCE_TIME_BOOTTIME
:
2242 case SOURCE_TIME_MONOTONIC
:
2243 case SOURCE_TIME_REALTIME_ALARM
:
2244 case SOURCE_TIME_BOOTTIME_ALARM
:
2245 r
= s
->time
.callback(s
, s
->time
.next
, s
->userdata
);
2249 r
= s
->signal
.callback(s
, &s
->signal
.siginfo
, s
->userdata
);
2252 case SOURCE_CHILD
: {
2255 zombie
= s
->child
.siginfo
.si_code
== CLD_EXITED
||
2256 s
->child
.siginfo
.si_code
== CLD_KILLED
||
2257 s
->child
.siginfo
.si_code
== CLD_DUMPED
;
2259 r
= s
->child
.callback(s
, &s
->child
.siginfo
, s
->userdata
);
2261 /* Now, reap the PID for good. */
2263 waitid(P_PID
, s
->child
.pid
, &s
->child
.siginfo
, WNOHANG
|WEXITED
);
2269 r
= s
->defer
.callback(s
, s
->userdata
);
2273 r
= s
->post
.callback(s
, s
->userdata
);
2277 r
= s
->exit
.callback(s
, s
->userdata
);
2280 case SOURCE_WATCHDOG
:
2281 case _SOURCE_EVENT_SOURCE_TYPE_MAX
:
2282 case _SOURCE_EVENT_SOURCE_TYPE_INVALID
:
2283 assert_not_reached("Wut? I shouldn't exist.");
2286 s
->dispatching
= false;
2290 log_debug_errno(r
, "Event source '%s' returned error, disabling: %m", s
->description
);
2292 log_debug_errno(r
, "Event source %p returned error, disabling: %m", s
);
2298 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2303 static int event_prepare(sd_event
*e
) {
2311 s
= prioq_peek(e
->prepare
);
2312 if (!s
|| s
->prepare_iteration
== e
->iteration
|| s
->enabled
== SD_EVENT_OFF
)
2315 s
->prepare_iteration
= e
->iteration
;
2316 r
= prioq_reshuffle(e
->prepare
, s
, &s
->prepare_index
);
2322 s
->dispatching
= true;
2323 r
= s
->prepare(s
, s
->userdata
);
2324 s
->dispatching
= false;
2328 log_debug_errno(r
, "Prepare callback of event source '%s' returned error, disabling: %m", s
->description
);
2330 log_debug_errno(r
, "Prepare callback of event source %p returned error, disabling: %m", s
);
2336 sd_event_source_set_enabled(s
, SD_EVENT_OFF
);
2342 static int dispatch_exit(sd_event
*e
) {
2348 p
= prioq_peek(e
->exit
);
2349 if (!p
|| p
->enabled
== SD_EVENT_OFF
) {
2350 e
->state
= SD_EVENT_FINISHED
;
2356 e
->state
= SD_EVENT_EXITING
;
2358 r
= source_dispatch(p
);
2360 e
->state
= SD_EVENT_INITIAL
;
2366 static sd_event_source
* event_next_pending(sd_event
*e
) {
2371 p
= prioq_peek(e
->pending
);
2375 if (p
->enabled
== SD_EVENT_OFF
)
2381 static int arm_watchdog(sd_event
*e
) {
2382 struct itimerspec its
= {};
2387 assert(e
->watchdog_fd
>= 0);
2389 t
= sleep_between(e
,
2390 e
->watchdog_last
+ (e
->watchdog_period
/ 2),
2391 e
->watchdog_last
+ (e
->watchdog_period
* 3 / 4));
2393 timespec_store(&its
.it_value
, t
);
2395 /* Make sure we never set the watchdog to 0, which tells the
2396 * kernel to disable it. */
2397 if (its
.it_value
.tv_sec
== 0 && its
.it_value
.tv_nsec
== 0)
2398 its
.it_value
.tv_nsec
= 1;
2400 r
= timerfd_settime(e
->watchdog_fd
, TFD_TIMER_ABSTIME
, &its
, NULL
);
2407 static int process_watchdog(sd_event
*e
) {
2413 /* Don't notify watchdog too often */
2414 if (e
->watchdog_last
+ e
->watchdog_period
/ 4 > e
->timestamp
.monotonic
)
2417 sd_notify(false, "WATCHDOG=1");
2418 e
->watchdog_last
= e
->timestamp
.monotonic
;
2420 return arm_watchdog(e
);
2423 _public_
int sd_event_prepare(sd_event
*e
) {
2426 assert_return(e
, -EINVAL
);
2427 assert_return(!event_pid_changed(e
), -ECHILD
);
2428 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2429 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2431 if (e
->exit_requested
)
2436 r
= event_prepare(e
);
2440 r
= event_arm_timer(e
, &e
->realtime
);
2444 r
= event_arm_timer(e
, &e
->boottime
);
2448 r
= event_arm_timer(e
, &e
->monotonic
);
2452 r
= event_arm_timer(e
, &e
->realtime_alarm
);
2456 r
= event_arm_timer(e
, &e
->boottime_alarm
);
2460 if (event_next_pending(e
) || e
->need_process_child
)
2463 e
->state
= SD_EVENT_ARMED
;
2468 e
->state
= SD_EVENT_ARMED
;
2469 r
= sd_event_wait(e
, 0);
2471 e
->state
= SD_EVENT_ARMED
;
2476 _public_
int sd_event_wait(sd_event
*e
, uint64_t timeout
) {
2477 struct epoll_event
*ev_queue
;
2478 unsigned ev_queue_max
;
2481 assert_return(e
, -EINVAL
);
2482 assert_return(!event_pid_changed(e
), -ECHILD
);
2483 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2484 assert_return(e
->state
== SD_EVENT_ARMED
, -EBUSY
);
2486 if (e
->exit_requested
) {
2487 e
->state
= SD_EVENT_PENDING
;
2491 ev_queue_max
= MAX(e
->n_sources
, 1u);
2492 ev_queue
= newa(struct epoll_event
, ev_queue_max
);
2494 m
= epoll_wait(e
->epoll_fd
, ev_queue
, ev_queue_max
,
2495 timeout
== (uint64_t) -1 ? -1 : (int) ((timeout
+ USEC_PER_MSEC
- 1) / USEC_PER_MSEC
));
2497 if (errno
== EINTR
) {
2498 e
->state
= SD_EVENT_PENDING
;
2506 dual_timestamp_get(&e
->timestamp
);
2507 e
->timestamp_boottime
= now(CLOCK_BOOTTIME
);
2509 for (i
= 0; i
< m
; i
++) {
2511 if (ev_queue
[i
].data
.ptr
== INT_TO_PTR(SOURCE_WATCHDOG
))
2512 r
= flush_timer(e
, e
->watchdog_fd
, ev_queue
[i
].events
, NULL
);
2514 WakeupType
*t
= ev_queue
[i
].data
.ptr
;
2518 case WAKEUP_EVENT_SOURCE
:
2519 r
= process_io(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2522 case WAKEUP_CLOCK_DATA
: {
2523 struct clock_data
*d
= ev_queue
[i
].data
.ptr
;
2524 r
= flush_timer(e
, d
->fd
, ev_queue
[i
].events
, &d
->next
);
2528 case WAKEUP_SIGNAL_DATA
:
2529 r
= process_signal(e
, ev_queue
[i
].data
.ptr
, ev_queue
[i
].events
);
2533 assert_not_reached("Invalid wake-up pointer");
2540 r
= process_watchdog(e
);
2544 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime
);
2548 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime
);
2552 r
= process_timer(e
, e
->timestamp
.monotonic
, &e
->monotonic
);
2556 r
= process_timer(e
, e
->timestamp
.realtime
, &e
->realtime_alarm
);
2560 r
= process_timer(e
, e
->timestamp_boottime
, &e
->boottime_alarm
);
2564 if (e
->need_process_child
) {
2565 r
= process_child(e
);
2570 if (event_next_pending(e
)) {
2571 e
->state
= SD_EVENT_PENDING
;
2579 e
->state
= SD_EVENT_INITIAL
;
2584 _public_
int sd_event_dispatch(sd_event
*e
) {
2588 assert_return(e
, -EINVAL
);
2589 assert_return(!event_pid_changed(e
), -ECHILD
);
2590 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2591 assert_return(e
->state
== SD_EVENT_PENDING
, -EBUSY
);
2593 if (e
->exit_requested
)
2594 return dispatch_exit(e
);
2596 p
= event_next_pending(e
);
2600 e
->state
= SD_EVENT_RUNNING
;
2601 r
= source_dispatch(p
);
2602 e
->state
= SD_EVENT_INITIAL
;
2609 e
->state
= SD_EVENT_INITIAL
;
2614 _public_
int sd_event_run(sd_event
*e
, uint64_t timeout
) {
2617 assert_return(e
, -EINVAL
);
2618 assert_return(!event_pid_changed(e
), -ECHILD
);
2619 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2620 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2622 r
= sd_event_prepare(e
);
2624 /* There was nothing? Then wait... */
2625 r
= sd_event_wait(e
, timeout
);
2628 /* There's something now, then let's dispatch it */
2629 r
= sd_event_dispatch(e
);
2639 _public_
int sd_event_loop(sd_event
*e
) {
2642 assert_return(e
, -EINVAL
);
2643 assert_return(!event_pid_changed(e
), -ECHILD
);
2644 assert_return(e
->state
== SD_EVENT_INITIAL
, -EBUSY
);
2648 while (e
->state
!= SD_EVENT_FINISHED
) {
2649 r
= sd_event_run(e
, (uint64_t) -1);
2661 _public_
int sd_event_get_fd(sd_event
*e
) {
2663 assert_return(e
, -EINVAL
);
2664 assert_return(!event_pid_changed(e
), -ECHILD
);
2669 _public_
int sd_event_get_state(sd_event
*e
) {
2670 assert_return(e
, -EINVAL
);
2671 assert_return(!event_pid_changed(e
), -ECHILD
);
2676 _public_
int sd_event_get_exit_code(sd_event
*e
, int *code
) {
2677 assert_return(e
, -EINVAL
);
2678 assert_return(code
, -EINVAL
);
2679 assert_return(!event_pid_changed(e
), -ECHILD
);
2681 if (!e
->exit_requested
)
2684 *code
= e
->exit_code
;
2688 _public_
int sd_event_exit(sd_event
*e
, int code
) {
2689 assert_return(e
, -EINVAL
);
2690 assert_return(e
->state
!= SD_EVENT_FINISHED
, -ESTALE
);
2691 assert_return(!event_pid_changed(e
), -ECHILD
);
2693 e
->exit_requested
= true;
2694 e
->exit_code
= code
;
2699 _public_
int sd_event_now(sd_event
*e
, clockid_t clock
, uint64_t *usec
) {
2700 assert_return(e
, -EINVAL
);
2701 assert_return(usec
, -EINVAL
);
2702 assert_return(!event_pid_changed(e
), -ECHILD
);
2704 if (!dual_timestamp_is_set(&e
->timestamp
)) {
2705 /* Implicitly fall back to now() if we never ran
2706 * before and thus have no cached time. */
2713 case CLOCK_REALTIME
:
2714 case CLOCK_REALTIME_ALARM
:
2715 *usec
= e
->timestamp
.realtime
;
2718 case CLOCK_MONOTONIC
:
2719 *usec
= e
->timestamp
.monotonic
;
2722 case CLOCK_BOOTTIME
:
2723 case CLOCK_BOOTTIME_ALARM
:
2724 *usec
= e
->timestamp_boottime
;
2731 _public_
int sd_event_default(sd_event
**ret
) {
2733 static thread_local sd_event
*default_event
= NULL
;
2738 return !!default_event
;
2740 if (default_event
) {
2741 *ret
= sd_event_ref(default_event
);
2745 r
= sd_event_new(&e
);
2749 e
->default_event_ptr
= &default_event
;
2757 _public_
int sd_event_get_tid(sd_event
*e
, pid_t
*tid
) {
2758 assert_return(e
, -EINVAL
);
2759 assert_return(tid
, -EINVAL
);
2760 assert_return(!event_pid_changed(e
), -ECHILD
);
2770 _public_
int sd_event_set_watchdog(sd_event
*e
, int b
) {
2773 assert_return(e
, -EINVAL
);
2774 assert_return(!event_pid_changed(e
), -ECHILD
);
2776 if (e
->watchdog
== !!b
)
2780 struct epoll_event ev
= {};
2782 r
= sd_watchdog_enabled(false, &e
->watchdog_period
);
2786 /* Issue first ping immediately */
2787 sd_notify(false, "WATCHDOG=1");
2788 e
->watchdog_last
= now(CLOCK_MONOTONIC
);
2790 e
->watchdog_fd
= timerfd_create(CLOCK_MONOTONIC
, TFD_NONBLOCK
|TFD_CLOEXEC
);
2791 if (e
->watchdog_fd
< 0)
2794 r
= arm_watchdog(e
);
2798 ev
.events
= EPOLLIN
;
2799 ev
.data
.ptr
= INT_TO_PTR(SOURCE_WATCHDOG
);
2801 r
= epoll_ctl(e
->epoll_fd
, EPOLL_CTL_ADD
, e
->watchdog_fd
, &ev
);
2808 if (e
->watchdog_fd
>= 0) {
2809 epoll_ctl(e
->epoll_fd
, EPOLL_CTL_DEL
, e
->watchdog_fd
, NULL
);
2810 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2818 e
->watchdog_fd
= safe_close(e
->watchdog_fd
);
2822 _public_
int sd_event_get_watchdog(sd_event
*e
) {
2823 assert_return(e
, -EINVAL
);
2824 assert_return(!event_pid_changed(e
), -ECHILD
);