src/basic/barrier.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <poll.h>
  23 #include <stdbool.h>
  24 #include <stdint.h>
  25 #include <stdlib.h>
  26 #include <sys/eventfd.h>
  27 #include <sys/types.h>
  28 #include <unistd.h>
  29
  30 #include "barrier.h"
  31 #include "fd-util.h"
  32 #include "macro.h"
  33
  34 /**
  35  * Barriers
  36  * This barrier implementation provides a simple synchronization method based
  37  * on file-descriptors that can safely be used between threads and processes. A
  38  * barrier object contains 2 shared counters based on eventfd. Both processes
  39  * can now place barriers and wait for the other end to reach a random or
  40  * specific barrier.
  41  * Barriers are numbered, so you can either wait for the other end to reach any
  42  * barrier or the last barrier that you placed. This way, you can use barriers
  43  * for one-way *and* full synchronization. Note that even-though barriers are
  44  * numbered, these numbers are internal and recycled once both sides reached the
  45  * same barrier (implemented as a simple signed counter). It is thus not
  46  * possible to address barriers by their ID.
  47  *
  48  * Barrier-API: Both ends can place as many barriers via barrier_place() as
  49  * they want and each pair of barriers on both sides will be implicitly linked.
  50  * Each side can use the barrier_wait/sync_*() family of calls to wait for the
  51  * other side to place a specific barrier. barrier_wait_next() waits until the
  52  * other side calls barrier_place(). No links between the barriers are
  53  * considered and this simply serves as most basic asynchronous barrier.
  54  * barrier_sync_next() is like barrier_wait_next() and waits for the other side
  55  * to place their next barrier via barrier_place(). However, it only waits for
  56  * barriers that are linked to a barrier we already placed. If the other side
  57  * already placed more barriers than we did, barrier_sync_next() returns
  58  * immediately.
  59  * barrier_sync() extends barrier_sync_next() and waits until the other end
  60  * placed as many barriers via barrier_place() as we did. If they already placed
  61  * as many as we did (or more), it returns immediately.
  62  *
  63  * Additionally to basic barriers, an abortion event is available.
  64  * barrier_abort() places an abortion event that cannot be undone. An abortion
  65  * immediately cancels all placed barriers and replaces them. Any running and
  66  * following wait/sync call besides barrier_wait_abortion() will immediately
  67  * return false on both sides (otherwise, they always return true).
  68  * barrier_abort() can be called multiple times on both ends and will be a
  69  * no-op if already called on this side.
  70  * barrier_wait_abortion() can be used to wait for the other side to call
  71  * barrier_abort() and is the only wait/sync call that does not return
  72  * immediately if we aborted outself. It only returns once the other side
  73  * called barrier_abort().
  74  *
  75  * Barriers can be used for in-process and inter-process synchronization.
  76  * However, for in-process synchronization you could just use mutexes.
  77  * Therefore, main target is IPC and we require both sides to *not* share the FD
  78  * table. If that's given, barriers provide target tracking: If the remote side
  79  * exit()s, an abortion event is implicitly queued on the other side. This way,
  80  * a sync/wait call will be woken up if the remote side crashed or exited
  81  * unexpectedly. However, note that these abortion events are only queued if the
  82  * barrier-queue has been drained. Therefore, it is safe to place a barrier and
  83  * exit. The other side can safely wait on the barrier even though the exit
  84  * queued an abortion event. Usually, the abortion event would overwrite the
  85  * barrier, however, that's not true for exit-abortion events. Those are only
  86  * queued if the barrier-queue is drained (thus, the receiving side has placed
  87  * more barriers than the remote side).
  88  */
  89
  90 /**
  91  * barrier_create() - Initialize a barrier object
  92  * @obj: barrier to initialize
  93  *
  94  * This initializes a barrier object. The caller is responsible of allocating
  95  * the memory and keeping it valid. The memory does not have to be zeroed
  96  * beforehand.
  97  * Two eventfd objects are allocated for each barrier. If allocation fails, an
  98  * error is returned.
  99  *
 100  * If this function fails, the barrier is reset to an invalid state so it is
 101  * safe to call barrier_destroy() on the object regardless whether the
 102  * initialization succeeded or not.
 103  *
 104  * The caller is responsible to destroy the object via barrier_destroy() before
 105  * releasing the underlying memory.
 106  *
 107  * Returns: 0 on success, negative error code on failure.
 108  */
 109 int barrier_create(Barrier *b) {
 110         _cleanup_(barrier_destroyp) Barrier *staging = b;
 111         int r;
 112
 113         assert(b);
 114
 115         b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 116         if (b->me < 0)
 117                 return -errno;
 118
 119         b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 120         if (b->them < 0)
 121                 return -errno;
 122
 123         r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
 124         if (r < 0)
 125                 return -errno;
 126
 127         staging = NULL;
 128         return 0;
 129 }
 130
 131 /**
 132  * barrier_destroy() - Destroy a barrier object
 133  * @b: barrier to destroy or NULL
 134  *
 135  * This destroys a barrier object that has previously been passed to
 136  * barrier_create(). The object is released and reset to invalid
 137  * state. Therefore, it is safe to call barrier_destroy() multiple
 138  * times or even if barrier_create() failed. However, barrier must be
 139  * always initialized with BARRIER_NULL.
 140  *
 141  * If @b is NULL, this is a no-op.
 142  */
 143 void barrier_destroy(Barrier *b) {
 144         if (!b)
 145                 return;
 146
 147         b->me = safe_close(b->me);
 148         b->them = safe_close(b->them);
 149         safe_close_pair(b->pipe);
 150         b->barriers = 0;
 151 }
 152
 153 /**
 154  * barrier_set_role() - Set the local role of the barrier
 155  * @b: barrier to operate on
 156  * @role: role to set on the barrier
 157  *
 158  * This sets the roles on a barrier object. This is needed to know
 159  * which side of the barrier you're on. Usually, the parent creates
 160  * the barrier via barrier_create() and then calls fork() or clone().
 161  * Therefore, the FDs are duplicated and the child retains the same
 162  * barrier object.
 163  *
 164  * Both sides need to call barrier_set_role() after fork() or clone()
 165  * are done. If this is not done, barriers will not work correctly.
 166  *
 167  * Note that barriers could be supported without fork() or clone(). However,
 168  * this is currently not needed so it hasn't been implemented.
 169  */
 170 void barrier_set_role(Barrier *b, unsigned int role) {
 171         int fd;
 172
 173         assert(b);
 174         assert(role == BARRIER_PARENT || role == BARRIER_CHILD);
 175         /* make sure this is only called once */
 176         assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
 177
 178         if (role == BARRIER_PARENT)
 179                 b->pipe[1] = safe_close(b->pipe[1]);
 180         else {
 181                 b->pipe[0] = safe_close(b->pipe[0]);
 182
 183                 /* swap me/them for children */
 184                 fd = b->me;
 185                 b->me = b->them;
 186                 b->them = fd;
 187         }
 188 }
 189
 190 /* places barrier; returns false if we aborted, otherwise true */
 191 static bool barrier_write(Barrier *b, uint64_t buf) {
 192         ssize_t len;
 193
 194         /* prevent new sync-points if we already aborted */
 195         if (barrier_i_aborted(b))
 196                 return false;
 197
 198         assert(b->me >= 0);
 199         do {
 200                 len = write(b->me, &buf, sizeof(buf));
 201         } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
 202
 203         if (len != sizeof(buf))
 204                 goto error;
 205
 206         /* lock if we aborted */
 207         if (buf >= (uint64_t)BARRIER_ABORTION) {
 208                 if (barrier_they_aborted(b))
 209                         b->barriers = BARRIER_WE_ABORTED;
 210                 else
 211                         b->barriers = BARRIER_I_ABORTED;
 212         } else if (!barrier_is_aborted(b))
 213                 b->barriers += buf;
 214
 215         return !barrier_i_aborted(b);
 216
 217 error:
 218         /* If there is an unexpected error, we have to make this fatal. There
 219          * is no way we can recover from sync-errors. Therefore, we close the
 220          * pipe-ends and treat this as abortion. The other end will notice the
 221          * pipe-close and treat it as abortion, too. */
 222
 223         safe_close_pair(b->pipe);
 224         b->barriers = BARRIER_WE_ABORTED;
 225         return false;
 226 }
 227
 228 /* waits for barriers; returns false if they aborted, otherwise true */
 229 static bool barrier_read(Barrier *b, int64_t comp) {
 230         if (barrier_they_aborted(b))
 231                 return false;
 232
 233         while (b->barriers > comp) {
 234                 struct pollfd pfd[2] = {
 235                         { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
 236                           .events = POLLHUP },
 237                         { .fd = b->them,
 238                           .events = POLLIN }};
 239                 uint64_t buf;
 240                 int r;
 241
 242                 r = poll(pfd, 2, -1);
 243                 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
 244                         continue;
 245                 else if (r < 0)
 246                         goto error;
 247
 248                 if (pfd[1].revents) {
 249                         ssize_t len;
 250
 251                         /* events on @them signal new data for us */
 252                         len = read(b->them, &buf, sizeof(buf));
 253                         if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
 254                                 continue;
 255
 256                         if (len != sizeof(buf))
 257                                 goto error;
 258                 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
 259                         /* POLLHUP on the pipe tells us the other side exited.
 260                          * We treat this as implicit abortion. But we only
 261                          * handle it if there's no event on the eventfd. This
 262                          * guarantees that exit-abortions do not overwrite real
 263                          * barriers. */
 264                         buf = BARRIER_ABORTION;
 265                 else
 266                         continue;
 267
 268                 /* lock if they aborted */
 269                 if (buf >= (uint64_t)BARRIER_ABORTION) {
 270                         if (barrier_i_aborted(b))
 271                                 b->barriers = BARRIER_WE_ABORTED;
 272                         else
 273                                 b->barriers = BARRIER_THEY_ABORTED;
 274                 } else if (!barrier_is_aborted(b))
 275                         b->barriers -= buf;
 276         }
 277
 278         return !barrier_they_aborted(b);
 279
 280 error:
 281         /* If there is an unexpected error, we have to make this fatal. There
 282          * is no way we can recover from sync-errors. Therefore, we close the
 283          * pipe-ends and treat this as abortion. The other end will notice the
 284          * pipe-close and treat it as abortion, too. */
 285
 286         safe_close_pair(b->pipe);
 287         b->barriers = BARRIER_WE_ABORTED;
 288         return false;
 289 }
 290
 291 /**
 292  * barrier_place() - Place a new barrier
 293  * @b: barrier object
 294  *
 295  * This places a new barrier on the barrier object. If either side already
 296  * aborted, this is a no-op and returns "false". Otherwise, the barrier is
 297  * placed and this returns "true".
 298  *
 299  * Returns: true if barrier was placed, false if either side aborted.
 300  */
 301 bool barrier_place(Barrier *b) {
 302         assert(b);
 303
 304         if (barrier_is_aborted(b))
 305                 return false;
 306
 307         barrier_write(b, BARRIER_SINGLE);
 308         return true;
 309 }
 310
 311 /**
 312  * barrier_abort() - Abort the synchronization
 313  * @b: barrier object to abort
 314  *
 315  * This aborts the barrier-synchronization. If barrier_abort() was already
 316  * called on this side, this is a no-op. Otherwise, the barrier is put into the
 317  * ABORT-state and will stay there. The other side is notified about the
 318  * abortion. Any following attempt to place normal barriers or to wait on normal
 319  * barriers will return immediately as "false".
 320  *
 321  * You can wait for the other side to call barrier_abort(), too. Use
 322  * barrier_wait_abortion() for that.
 323  *
 324  * Returns: false if the other side already aborted, true otherwise.
 325  */
 326 bool barrier_abort(Barrier *b) {
 327         assert(b);
 328
 329         barrier_write(b, BARRIER_ABORTION);
 330         return !barrier_they_aborted(b);
 331 }
 332
 333 /**
 334  * barrier_wait_next() - Wait for the next barrier of the other side
 335  * @b: barrier to operate on
 336  *
 337  * This waits until the other side places its next barrier. This is independent
 338  * of any barrier-links and just waits for any next barrier of the other side.
 339  *
 340  * If either side aborted, this returns false.
 341  *
 342  * Returns: false if either side aborted, true otherwise.
 343  */
 344 bool barrier_wait_next(Barrier *b) {
 345         assert(b);
 346
 347         if (barrier_is_aborted(b))
 348                 return false;
 349
 350         barrier_read(b, b->barriers - 1);
 351         return !barrier_is_aborted(b);
 352 }
 353
 354 /**
 355  * barrier_wait_abortion() - Wait for the other side to abort
 356  * @b: barrier to operate on
 357  *
 358  * This waits until the other side called barrier_abort(). This can be called
 359  * regardless whether the local side already called barrier_abort() or not.
 360  *
 361  * If the other side has already aborted, this returns immediately.
 362  *
 363  * Returns: false if the local side aborted, true otherwise.
 364  */
 365 bool barrier_wait_abortion(Barrier *b) {
 366         assert(b);
 367
 368         barrier_read(b, BARRIER_THEY_ABORTED);
 369         return !barrier_i_aborted(b);
 370 }
 371
 372 /**
 373  * barrier_sync_next() - Wait for the other side to place a next linked barrier
 374  * @b: barrier to operate on
 375  *
 376  * This is like barrier_wait_next() and waits for the other side to call
 377  * barrier_place(). However, this only waits for linked barriers. That means, if
 378  * the other side already placed more barriers than (or as much as) we did, this
 379  * returns immediately instead of waiting.
 380  *
 381  * If either side aborted, this returns false.
 382  *
 383  * Returns: false if either side aborted, true otherwise.
 384  */
 385 bool barrier_sync_next(Barrier *b) {
 386         assert(b);
 387
 388         if (barrier_is_aborted(b))
 389                 return false;
 390
 391         barrier_read(b, MAX((int64_t)0, b->barriers - 1));
 392         return !barrier_is_aborted(b);
 393 }
 394
 395 /**
 396  * barrier_sync() - Wait for the other side to place as many barriers as we did
 397  * @b: barrier to operate on
 398  *
 399  * This is like barrier_sync_next() but waits for the other side to call
 400  * barrier_place() as often as we did (in total). If they already placed as much
 401  * as we did (or more), this returns immediately instead of waiting.
 402  *
 403  * If either side aborted, this returns false.
 404  *
 405  * Returns: false if either side aborted, true otherwise.
 406  */
 407 bool barrier_sync(Barrier *b) {
 408         assert(b);
 409
 410         if (barrier_is_aborted(b))
 411                 return false;
 412
 413         barrier_read(b, 0);
 414         return !barrier_is_aborted(b);
 415 }