]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - ipc/sem.c
sysvipc/sem: mitigate semnum index against spectre v1
[mirror_ubuntu-bionic-kernel.git] / ipc / sem.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/ipc/sem.c
4 * Copyright (C) 1992 Krishna Balasubramanian
5 * Copyright (C) 1995 Eric Schenk, Bruno Haible
6 *
7 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
8 *
9 * SMP-threaded, sysctl's added
10 * (c) 1999 Manfred Spraul <manfred@colorfullife.com>
11 * Enforced range limit on SEM_UNDO
12 * (c) 2001 Red Hat Inc
13 * Lockless wakeup
14 * (c) 2003 Manfred Spraul <manfred@colorfullife.com>
15 * (c) 2016 Davidlohr Bueso <dave@stgolabs.net>
16 * Further wakeup optimizations, documentation
17 * (c) 2010 Manfred Spraul <manfred@colorfullife.com>
18 *
19 * support for audit of ipc object properties and permission changes
20 * Dustin Kirkland <dustin.kirkland@us.ibm.com>
21 *
22 * namespaces support
23 * OpenVZ, SWsoft Inc.
24 * Pavel Emelianov <xemul@openvz.org>
25 *
26 * Implementation notes: (May 2010)
27 * This file implements System V semaphores.
28 *
29 * User space visible behavior:
30 * - FIFO ordering for semop() operations (just FIFO, not starvation
31 * protection)
32 * - multiple semaphore operations that alter the same semaphore in
33 * one semop() are handled.
34 * - sem_ctime (time of last semctl()) is updated in the IPC_SET, SETVAL and
35 * SETALL calls.
36 * - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
37 * - undo adjustments at process exit are limited to 0..SEMVMX.
38 * - namespace are supported.
39 * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
40 * to /proc/sys/kernel/sem.
41 * - statistics about the usage are reported in /proc/sysvipc/sem.
42 *
43 * Internals:
44 * - scalability:
45 * - all global variables are read-mostly.
46 * - semop() calls and semctl(RMID) are synchronized by RCU.
47 * - most operations do write operations (actually: spin_lock calls) to
48 * the per-semaphore array structure.
49 * Thus: Perfect SMP scaling between independent semaphore arrays.
50 * If multiple semaphores in one array are used, then cache line
51 * trashing on the semaphore array spinlock will limit the scaling.
52 * - semncnt and semzcnt are calculated on demand in count_semcnt()
53 * - the task that performs a successful semop() scans the list of all
54 * sleeping tasks and completes any pending operations that can be fulfilled.
55 * Semaphores are actively given to waiting tasks (necessary for FIFO).
56 * (see update_queue())
57 * - To improve the scalability, the actual wake-up calls are performed after
58 * dropping all locks. (see wake_up_sem_queue_prepare())
59 * - All work is done by the waker, the woken up task does not have to do
60 * anything - not even acquiring a lock or dropping a refcount.
61 * - A woken up task may not even touch the semaphore array anymore, it may
62 * have been destroyed already by a semctl(RMID).
63 * - UNDO values are stored in an array (one per process and per
64 * semaphore array, lazily allocated). For backwards compatibility, multiple
65 * modes for the UNDO variables are supported (per process, per thread)
66 * (see copy_semundo, CLONE_SYSVSEM)
67 * - There are two lists of the pending operations: a per-array list
68 * and per-semaphore list (stored in the array). This allows to achieve FIFO
69 * ordering without always scanning all pending operations.
70 * The worst-case behavior is nevertheless O(N^2) for N wakeups.
71 */
72
73 #include <linux/slab.h>
74 #include <linux/spinlock.h>
75 #include <linux/init.h>
76 #include <linux/proc_fs.h>
77 #include <linux/time.h>
78 #include <linux/security.h>
79 #include <linux/syscalls.h>
80 #include <linux/audit.h>
81 #include <linux/capability.h>
82 #include <linux/seq_file.h>
83 #include <linux/rwsem.h>
84 #include <linux/nsproxy.h>
85 #include <linux/ipc_namespace.h>
86 #include <linux/sched/wake_q.h>
87 #include <linux/nospec.h>
88
89 #include <linux/uaccess.h>
90 #include "util.h"
91
92
93 /* One queue for each sleeping process in the system. */
94 struct sem_queue {
95 struct list_head list; /* queue of pending operations */
96 struct task_struct *sleeper; /* this process */
97 struct sem_undo *undo; /* undo structure */
98 int pid; /* process id of requesting process */
99 int status; /* completion status of operation */
100 struct sembuf *sops; /* array of pending operations */
101 struct sembuf *blocking; /* the operation that blocked */
102 int nsops; /* number of operations */
103 bool alter; /* does *sops alter the array? */
104 bool dupsop; /* sops on more than one sem_num */
105 };
106
107 /* Each task has a list of undo requests. They are executed automatically
108 * when the process exits.
109 */
110 struct sem_undo {
111 struct list_head list_proc; /* per-process list: *
112 * all undos from one process
113 * rcu protected */
114 struct rcu_head rcu; /* rcu struct for sem_undo */
115 struct sem_undo_list *ulp; /* back ptr to sem_undo_list */
116 struct list_head list_id; /* per semaphore array list:
117 * all undos for one array */
118 int semid; /* semaphore set identifier */
119 short *semadj; /* array of adjustments */
120 /* one per semaphore */
121 };
122
123 /* sem_undo_list controls shared access to the list of sem_undo structures
124 * that may be shared among all a CLONE_SYSVSEM task group.
125 */
126 struct sem_undo_list {
127 refcount_t refcnt;
128 spinlock_t lock;
129 struct list_head list_proc;
130 };
131
132
133 #define sem_ids(ns) ((ns)->ids[IPC_SEM_IDS])
134
135 static int newary(struct ipc_namespace *, struct ipc_params *);
136 static void freeary(struct ipc_namespace *, struct kern_ipc_perm *);
137 #ifdef CONFIG_PROC_FS
138 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
139 #endif
140
141 #define SEMMSL_FAST 256 /* 512 bytes on stack */
142 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
143
144 /*
145 * Switching from the mode suitable for simple ops
146 * to the mode for complex ops is costly. Therefore:
147 * use some hysteresis
148 */
149 #define USE_GLOBAL_LOCK_HYSTERESIS 10
150
151 /*
152 * Locking:
153 * a) global sem_lock() for read/write
154 * sem_undo.id_next,
155 * sem_array.complex_count,
156 * sem_array.pending{_alter,_const},
157 * sem_array.sem_undo
158 *
159 * b) global or semaphore sem_lock() for read/write:
160 * sem_array.sems[i].pending_{const,alter}:
161 *
162 * c) special:
163 * sem_undo_list.list_proc:
164 * * undo_list->lock for write
165 * * rcu for read
166 * use_global_lock:
167 * * global sem_lock() for write
168 * * either local or global sem_lock() for read.
169 *
170 * Memory ordering:
171 * Most ordering is enforced by using spin_lock() and spin_unlock().
172 * The special case is use_global_lock:
173 * Setting it from non-zero to 0 is a RELEASE, this is ensured by
174 * using smp_store_release().
175 * Testing if it is non-zero is an ACQUIRE, this is ensured by using
176 * smp_load_acquire().
177 * Setting it from 0 to non-zero must be ordered with regards to
178 * this smp_load_acquire(), this is guaranteed because the smp_load_acquire()
179 * is inside a spin_lock() and after a write from 0 to non-zero a
180 * spin_lock()+spin_unlock() is done.
181 */
182
183 #define sc_semmsl sem_ctls[0]
184 #define sc_semmns sem_ctls[1]
185 #define sc_semopm sem_ctls[2]
186 #define sc_semmni sem_ctls[3]
187
188 int sem_init_ns(struct ipc_namespace *ns)
189 {
190 ns->sc_semmsl = SEMMSL;
191 ns->sc_semmns = SEMMNS;
192 ns->sc_semopm = SEMOPM;
193 ns->sc_semmni = SEMMNI;
194 ns->used_sems = 0;
195 return ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
196 }
197
198 #ifdef CONFIG_IPC_NS
199 void sem_exit_ns(struct ipc_namespace *ns)
200 {
201 free_ipcs(ns, &sem_ids(ns), freeary);
202 idr_destroy(&ns->ids[IPC_SEM_IDS].ipcs_idr);
203 rhashtable_destroy(&ns->ids[IPC_SEM_IDS].key_ht);
204 }
205 #endif
206
207 int __init sem_init(void)
208 {
209 const int err = sem_init_ns(&init_ipc_ns);
210
211 ipc_init_proc_interface("sysvipc/sem",
212 " key semid perms nsems uid gid cuid cgid otime ctime\n",
213 IPC_SEM_IDS, sysvipc_sem_proc_show);
214 return err;
215 }
216
217 /**
218 * unmerge_queues - unmerge queues, if possible.
219 * @sma: semaphore array
220 *
221 * The function unmerges the wait queues if complex_count is 0.
222 * It must be called prior to dropping the global semaphore array lock.
223 */
224 static void unmerge_queues(struct sem_array *sma)
225 {
226 struct sem_queue *q, *tq;
227
228 /* complex operations still around? */
229 if (sma->complex_count)
230 return;
231 /*
232 * We will switch back to simple mode.
233 * Move all pending operation back into the per-semaphore
234 * queues.
235 */
236 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
237 struct sem *curr;
238 curr = &sma->sems[q->sops[0].sem_num];
239
240 list_add_tail(&q->list, &curr->pending_alter);
241 }
242 INIT_LIST_HEAD(&sma->pending_alter);
243 }
244
245 /**
246 * merge_queues - merge single semop queues into global queue
247 * @sma: semaphore array
248 *
249 * This function merges all per-semaphore queues into the global queue.
250 * It is necessary to achieve FIFO ordering for the pending single-sop
251 * operations when a multi-semop operation must sleep.
252 * Only the alter operations must be moved, the const operations can stay.
253 */
254 static void merge_queues(struct sem_array *sma)
255 {
256 int i;
257 for (i = 0; i < sma->sem_nsems; i++) {
258 struct sem *sem = &sma->sems[i];
259
260 list_splice_init(&sem->pending_alter, &sma->pending_alter);
261 }
262 }
263
264 static void sem_rcu_free(struct rcu_head *head)
265 {
266 struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu);
267 struct sem_array *sma = container_of(p, struct sem_array, sem_perm);
268
269 security_sem_free(sma);
270 kvfree(sma);
271 }
272
273 /*
274 * Enter the mode suitable for non-simple operations:
275 * Caller must own sem_perm.lock.
276 */
277 static void complexmode_enter(struct sem_array *sma)
278 {
279 int i;
280 struct sem *sem;
281
282 if (sma->use_global_lock > 0) {
283 /*
284 * We are already in global lock mode.
285 * Nothing to do, just reset the
286 * counter until we return to simple mode.
287 */
288 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
289 return;
290 }
291 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
292
293 for (i = 0; i < sma->sem_nsems; i++) {
294 sem = &sma->sems[i];
295 spin_lock(&sem->lock);
296 spin_unlock(&sem->lock);
297 }
298 }
299
300 /*
301 * Try to leave the mode that disallows simple operations:
302 * Caller must own sem_perm.lock.
303 */
304 static void complexmode_tryleave(struct sem_array *sma)
305 {
306 if (sma->complex_count) {
307 /* Complex ops are sleeping.
308 * We must stay in complex mode
309 */
310 return;
311 }
312 if (sma->use_global_lock == 1) {
313 /*
314 * Immediately after setting use_global_lock to 0,
315 * a simple op can start. Thus: all memory writes
316 * performed by the current operation must be visible
317 * before we set use_global_lock to 0.
318 */
319 smp_store_release(&sma->use_global_lock, 0);
320 } else {
321 sma->use_global_lock--;
322 }
323 }
324
325 #define SEM_GLOBAL_LOCK (-1)
326 /*
327 * If the request contains only one semaphore operation, and there are
328 * no complex transactions pending, lock only the semaphore involved.
329 * Otherwise, lock the entire semaphore array, since we either have
330 * multiple semaphores in our own semops, or we need to look at
331 * semaphores from other pending complex operations.
332 */
333 static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
334 int nsops)
335 {
336 struct sem *sem;
337 int idx;
338
339 if (nsops != 1) {
340 /* Complex operation - acquire a full lock */
341 ipc_lock_object(&sma->sem_perm);
342
343 /* Prevent parallel simple ops */
344 complexmode_enter(sma);
345 return SEM_GLOBAL_LOCK;
346 }
347
348 /*
349 * Only one semaphore affected - try to optimize locking.
350 * Optimized locking is possible if no complex operation
351 * is either enqueued or processed right now.
352 *
353 * Both facts are tracked by use_global_mode.
354 */
355 idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
356 sem = &sma->sems[idx];
357
358 /*
359 * Initial check for use_global_lock. Just an optimization,
360 * no locking, no memory barrier.
361 */
362 if (!sma->use_global_lock) {
363 /*
364 * It appears that no complex operation is around.
365 * Acquire the per-semaphore lock.
366 */
367 spin_lock(&sem->lock);
368
369 /* pairs with smp_store_release() */
370 if (!smp_load_acquire(&sma->use_global_lock)) {
371 /* fast path successful! */
372 return sops->sem_num;
373 }
374 spin_unlock(&sem->lock);
375 }
376
377 /* slow path: acquire the full lock */
378 ipc_lock_object(&sma->sem_perm);
379
380 if (sma->use_global_lock == 0) {
381 /*
382 * The use_global_lock mode ended while we waited for
383 * sma->sem_perm.lock. Thus we must switch to locking
384 * with sem->lock.
385 * Unlike in the fast path, there is no need to recheck
386 * sma->use_global_lock after we have acquired sem->lock:
387 * We own sma->sem_perm.lock, thus use_global_lock cannot
388 * change.
389 */
390 spin_lock(&sem->lock);
391
392 ipc_unlock_object(&sma->sem_perm);
393 return sops->sem_num;
394 } else {
395 /*
396 * Not a false alarm, thus continue to use the global lock
397 * mode. No need for complexmode_enter(), this was done by
398 * the caller that has set use_global_mode to non-zero.
399 */
400 return SEM_GLOBAL_LOCK;
401 }
402 }
403
404 static inline void sem_unlock(struct sem_array *sma, int locknum)
405 {
406 if (locknum == SEM_GLOBAL_LOCK) {
407 unmerge_queues(sma);
408 complexmode_tryleave(sma);
409 ipc_unlock_object(&sma->sem_perm);
410 } else {
411 struct sem *sem = &sma->sems[locknum];
412 spin_unlock(&sem->lock);
413 }
414 }
415
416 /*
417 * sem_lock_(check_) routines are called in the paths where the rwsem
418 * is not held.
419 *
420 * The caller holds the RCU read lock.
421 */
422 static inline struct sem_array *sem_obtain_object(struct ipc_namespace *ns, int id)
423 {
424 struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&sem_ids(ns), id);
425
426 if (IS_ERR(ipcp))
427 return ERR_CAST(ipcp);
428
429 return container_of(ipcp, struct sem_array, sem_perm);
430 }
431
432 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
433 int id)
434 {
435 struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&sem_ids(ns), id);
436
437 if (IS_ERR(ipcp))
438 return ERR_CAST(ipcp);
439
440 return container_of(ipcp, struct sem_array, sem_perm);
441 }
442
443 static inline void sem_lock_and_putref(struct sem_array *sma)
444 {
445 sem_lock(sma, NULL, -1);
446 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
447 }
448
449 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
450 {
451 ipc_rmid(&sem_ids(ns), &s->sem_perm);
452 }
453
454 static struct sem_array *sem_alloc(size_t nsems)
455 {
456 struct sem_array *sma;
457 size_t size;
458
459 if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0]))
460 return NULL;
461
462 size = sizeof(*sma) + nsems * sizeof(sma->sems[0]);
463 sma = kvmalloc(size, GFP_KERNEL);
464 if (unlikely(!sma))
465 return NULL;
466
467 memset(sma, 0, size);
468
469 return sma;
470 }
471
472 /**
473 * newary - Create a new semaphore set
474 * @ns: namespace
475 * @params: ptr to the structure that contains key, semflg and nsems
476 *
477 * Called with sem_ids.rwsem held (as a writer)
478 */
479 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
480 {
481 int retval;
482 struct sem_array *sma;
483 key_t key = params->key;
484 int nsems = params->u.nsems;
485 int semflg = params->flg;
486 int i;
487
488 if (!nsems)
489 return -EINVAL;
490 if (ns->used_sems + nsems > ns->sc_semmns)
491 return -ENOSPC;
492
493 sma = sem_alloc(nsems);
494 if (!sma)
495 return -ENOMEM;
496
497 sma->sem_perm.mode = (semflg & S_IRWXUGO);
498 sma->sem_perm.key = key;
499
500 sma->sem_perm.security = NULL;
501 retval = security_sem_alloc(sma);
502 if (retval) {
503 kvfree(sma);
504 return retval;
505 }
506
507 for (i = 0; i < nsems; i++) {
508 INIT_LIST_HEAD(&sma->sems[i].pending_alter);
509 INIT_LIST_HEAD(&sma->sems[i].pending_const);
510 spin_lock_init(&sma->sems[i].lock);
511 }
512
513 sma->complex_count = 0;
514 sma->use_global_lock = USE_GLOBAL_LOCK_HYSTERESIS;
515 INIT_LIST_HEAD(&sma->pending_alter);
516 INIT_LIST_HEAD(&sma->pending_const);
517 INIT_LIST_HEAD(&sma->list_id);
518 sma->sem_nsems = nsems;
519 sma->sem_ctime = ktime_get_real_seconds();
520
521 /* ipc_addid() locks sma upon success. */
522 retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
523 if (retval < 0) {
524 call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
525 return retval;
526 }
527 ns->used_sems += nsems;
528
529 sem_unlock(sma, -1);
530 rcu_read_unlock();
531
532 return sma->sem_perm.id;
533 }
534
535
536 /*
537 * Called with sem_ids.rwsem and ipcp locked.
538 */
539 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
540 {
541 struct sem_array *sma;
542
543 sma = container_of(ipcp, struct sem_array, sem_perm);
544 return security_sem_associate(sma, semflg);
545 }
546
547 /*
548 * Called with sem_ids.rwsem and ipcp locked.
549 */
550 static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
551 struct ipc_params *params)
552 {
553 struct sem_array *sma;
554
555 sma = container_of(ipcp, struct sem_array, sem_perm);
556 if (params->u.nsems > sma->sem_nsems)
557 return -EINVAL;
558
559 return 0;
560 }
561
562 SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
563 {
564 struct ipc_namespace *ns;
565 static const struct ipc_ops sem_ops = {
566 .getnew = newary,
567 .associate = sem_security,
568 .more_checks = sem_more_checks,
569 };
570 struct ipc_params sem_params;
571
572 ns = current->nsproxy->ipc_ns;
573
574 if (nsems < 0 || nsems > ns->sc_semmsl)
575 return -EINVAL;
576
577 sem_params.key = key;
578 sem_params.flg = semflg;
579 sem_params.u.nsems = nsems;
580
581 return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
582 }
583
584 /**
585 * perform_atomic_semop[_slow] - Attempt to perform semaphore
586 * operations on a given array.
587 * @sma: semaphore array
588 * @q: struct sem_queue that describes the operation
589 *
590 * Caller blocking are as follows, based the value
591 * indicated by the semaphore operation (sem_op):
592 *
593 * (1) >0 never blocks.
594 * (2) 0 (wait-for-zero operation): semval is non-zero.
595 * (3) <0 attempting to decrement semval to a value smaller than zero.
596 *
597 * Returns 0 if the operation was possible.
598 * Returns 1 if the operation is impossible, the caller must sleep.
599 * Returns <0 for error codes.
600 */
601 static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q)
602 {
603 int result, sem_op, nsops, pid;
604 struct sembuf *sop;
605 struct sem *curr;
606 struct sembuf *sops;
607 struct sem_undo *un;
608
609 sops = q->sops;
610 nsops = q->nsops;
611 un = q->undo;
612
613 for (sop = sops; sop < sops + nsops; sop++) {
614 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
615 curr = &sma->sems[idx];
616 sem_op = sop->sem_op;
617 result = curr->semval;
618
619 if (!sem_op && result)
620 goto would_block;
621
622 result += sem_op;
623 if (result < 0)
624 goto would_block;
625 if (result > SEMVMX)
626 goto out_of_range;
627
628 if (sop->sem_flg & SEM_UNDO) {
629 int undo = un->semadj[sop->sem_num] - sem_op;
630 /* Exceeding the undo range is an error. */
631 if (undo < (-SEMAEM - 1) || undo > SEMAEM)
632 goto out_of_range;
633 un->semadj[sop->sem_num] = undo;
634 }
635
636 curr->semval = result;
637 }
638
639 sop--;
640 pid = q->pid;
641 while (sop >= sops) {
642 sma->sems[sop->sem_num].sempid = pid;
643 sop--;
644 }
645
646 return 0;
647
648 out_of_range:
649 result = -ERANGE;
650 goto undo;
651
652 would_block:
653 q->blocking = sop;
654
655 if (sop->sem_flg & IPC_NOWAIT)
656 result = -EAGAIN;
657 else
658 result = 1;
659
660 undo:
661 sop--;
662 while (sop >= sops) {
663 sem_op = sop->sem_op;
664 sma->sems[sop->sem_num].semval -= sem_op;
665 if (sop->sem_flg & SEM_UNDO)
666 un->semadj[sop->sem_num] += sem_op;
667 sop--;
668 }
669
670 return result;
671 }
672
673 static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q)
674 {
675 int result, sem_op, nsops;
676 struct sembuf *sop;
677 struct sem *curr;
678 struct sembuf *sops;
679 struct sem_undo *un;
680
681 sops = q->sops;
682 nsops = q->nsops;
683 un = q->undo;
684
685 if (unlikely(q->dupsop))
686 return perform_atomic_semop_slow(sma, q);
687
688 /*
689 * We scan the semaphore set twice, first to ensure that the entire
690 * operation can succeed, therefore avoiding any pointless writes
691 * to shared memory and having to undo such changes in order to block
692 * until the operations can go through.
693 */
694 for (sop = sops; sop < sops + nsops; sop++) {
695 int idx = array_index_nospec(sop->sem_num, sma->sem_nsems);
696
697 curr = &sma->sems[idx];
698 sem_op = sop->sem_op;
699 result = curr->semval;
700
701 if (!sem_op && result)
702 goto would_block; /* wait-for-zero */
703
704 result += sem_op;
705 if (result < 0)
706 goto would_block;
707
708 if (result > SEMVMX)
709 return -ERANGE;
710
711 if (sop->sem_flg & SEM_UNDO) {
712 int undo = un->semadj[sop->sem_num] - sem_op;
713
714 /* Exceeding the undo range is an error. */
715 if (undo < (-SEMAEM - 1) || undo > SEMAEM)
716 return -ERANGE;
717 }
718 }
719
720 for (sop = sops; sop < sops + nsops; sop++) {
721 curr = &sma->sems[sop->sem_num];
722 sem_op = sop->sem_op;
723 result = curr->semval;
724
725 if (sop->sem_flg & SEM_UNDO) {
726 int undo = un->semadj[sop->sem_num] - sem_op;
727
728 un->semadj[sop->sem_num] = undo;
729 }
730 curr->semval += sem_op;
731 curr->sempid = q->pid;
732 }
733
734 return 0;
735
736 would_block:
737 q->blocking = sop;
738 return sop->sem_flg & IPC_NOWAIT ? -EAGAIN : 1;
739 }
740
741 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
742 struct wake_q_head *wake_q)
743 {
744 wake_q_add(wake_q, q->sleeper);
745 /*
746 * Rely on the above implicit barrier, such that we can
747 * ensure that we hold reference to the task before setting
748 * q->status. Otherwise we could race with do_exit if the
749 * task is awoken by an external event before calling
750 * wake_up_process().
751 */
752 WRITE_ONCE(q->status, error);
753 }
754
755 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
756 {
757 list_del(&q->list);
758 if (q->nsops > 1)
759 sma->complex_count--;
760 }
761
762 /** check_restart(sma, q)
763 * @sma: semaphore array
764 * @q: the operation that just completed
765 *
766 * update_queue is O(N^2) when it restarts scanning the whole queue of
767 * waiting operations. Therefore this function checks if the restart is
768 * really necessary. It is called after a previously waiting operation
769 * modified the array.
770 * Note that wait-for-zero operations are handled without restart.
771 */
772 static inline int check_restart(struct sem_array *sma, struct sem_queue *q)
773 {
774 /* pending complex alter operations are too difficult to analyse */
775 if (!list_empty(&sma->pending_alter))
776 return 1;
777
778 /* we were a sleeping complex operation. Too difficult */
779 if (q->nsops > 1)
780 return 1;
781
782 /* It is impossible that someone waits for the new value:
783 * - complex operations always restart.
784 * - wait-for-zero are handled seperately.
785 * - q is a previously sleeping simple operation that
786 * altered the array. It must be a decrement, because
787 * simple increments never sleep.
788 * - If there are older (higher priority) decrements
789 * in the queue, then they have observed the original
790 * semval value and couldn't proceed. The operation
791 * decremented to value - thus they won't proceed either.
792 */
793 return 0;
794 }
795
796 /**
797 * wake_const_ops - wake up non-alter tasks
798 * @sma: semaphore array.
799 * @semnum: semaphore that was modified.
800 * @wake_q: lockless wake-queue head.
801 *
802 * wake_const_ops must be called after a semaphore in a semaphore array
803 * was set to 0. If complex const operations are pending, wake_const_ops must
804 * be called with semnum = -1, as well as with the number of each modified
805 * semaphore.
806 * The tasks that must be woken up are added to @wake_q. The return code
807 * is stored in q->pid.
808 * The function returns 1 if at least one operation was completed successfully.
809 */
810 static int wake_const_ops(struct sem_array *sma, int semnum,
811 struct wake_q_head *wake_q)
812 {
813 struct sem_queue *q, *tmp;
814 struct list_head *pending_list;
815 int semop_completed = 0;
816
817 if (semnum == -1)
818 pending_list = &sma->pending_const;
819 else
820 pending_list = &sma->sems[semnum].pending_const;
821
822 list_for_each_entry_safe(q, tmp, pending_list, list) {
823 int error = perform_atomic_semop(sma, q);
824
825 if (error > 0)
826 continue;
827 /* operation completed, remove from queue & wakeup */
828 unlink_queue(sma, q);
829
830 wake_up_sem_queue_prepare(q, error, wake_q);
831 if (error == 0)
832 semop_completed = 1;
833 }
834
835 return semop_completed;
836 }
837
838 /**
839 * do_smart_wakeup_zero - wakeup all wait for zero tasks
840 * @sma: semaphore array
841 * @sops: operations that were performed
842 * @nsops: number of operations
843 * @wake_q: lockless wake-queue head
844 *
845 * Checks all required queue for wait-for-zero operations, based
846 * on the actual changes that were performed on the semaphore array.
847 * The function returns 1 if at least one operation was completed successfully.
848 */
849 static int do_smart_wakeup_zero(struct sem_array *sma, struct sembuf *sops,
850 int nsops, struct wake_q_head *wake_q)
851 {
852 int i;
853 int semop_completed = 0;
854 int got_zero = 0;
855
856 /* first: the per-semaphore queues, if known */
857 if (sops) {
858 for (i = 0; i < nsops; i++) {
859 int num = sops[i].sem_num;
860
861 if (sma->sems[num].semval == 0) {
862 got_zero = 1;
863 semop_completed |= wake_const_ops(sma, num, wake_q);
864 }
865 }
866 } else {
867 /*
868 * No sops means modified semaphores not known.
869 * Assume all were changed.
870 */
871 for (i = 0; i < sma->sem_nsems; i++) {
872 if (sma->sems[i].semval == 0) {
873 got_zero = 1;
874 semop_completed |= wake_const_ops(sma, i, wake_q);
875 }
876 }
877 }
878 /*
879 * If one of the modified semaphores got 0,
880 * then check the global queue, too.
881 */
882 if (got_zero)
883 semop_completed |= wake_const_ops(sma, -1, wake_q);
884
885 return semop_completed;
886 }
887
888
889 /**
890 * update_queue - look for tasks that can be completed.
891 * @sma: semaphore array.
892 * @semnum: semaphore that was modified.
893 * @wake_q: lockless wake-queue head.
894 *
895 * update_queue must be called after a semaphore in a semaphore array
896 * was modified. If multiple semaphores were modified, update_queue must
897 * be called with semnum = -1, as well as with the number of each modified
898 * semaphore.
899 * The tasks that must be woken up are added to @wake_q. The return code
900 * is stored in q->pid.
901 * The function internally checks if const operations can now succeed.
902 *
903 * The function return 1 if at least one semop was completed successfully.
904 */
905 static int update_queue(struct sem_array *sma, int semnum, struct wake_q_head *wake_q)
906 {
907 struct sem_queue *q, *tmp;
908 struct list_head *pending_list;
909 int semop_completed = 0;
910
911 if (semnum == -1)
912 pending_list = &sma->pending_alter;
913 else
914 pending_list = &sma->sems[semnum].pending_alter;
915
916 again:
917 list_for_each_entry_safe(q, tmp, pending_list, list) {
918 int error, restart;
919
920 /* If we are scanning the single sop, per-semaphore list of
921 * one semaphore and that semaphore is 0, then it is not
922 * necessary to scan further: simple increments
923 * that affect only one entry succeed immediately and cannot
924 * be in the per semaphore pending queue, and decrements
925 * cannot be successful if the value is already 0.
926 */
927 if (semnum != -1 && sma->sems[semnum].semval == 0)
928 break;
929
930 error = perform_atomic_semop(sma, q);
931
932 /* Does q->sleeper still need to sleep? */
933 if (error > 0)
934 continue;
935
936 unlink_queue(sma, q);
937
938 if (error) {
939 restart = 0;
940 } else {
941 semop_completed = 1;
942 do_smart_wakeup_zero(sma, q->sops, q->nsops, wake_q);
943 restart = check_restart(sma, q);
944 }
945
946 wake_up_sem_queue_prepare(q, error, wake_q);
947 if (restart)
948 goto again;
949 }
950 return semop_completed;
951 }
952
953 /**
954 * set_semotime - set sem_otime
955 * @sma: semaphore array
956 * @sops: operations that modified the array, may be NULL
957 *
958 * sem_otime is replicated to avoid cache line trashing.
959 * This function sets one instance to the current time.
960 */
961 static void set_semotime(struct sem_array *sma, struct sembuf *sops)
962 {
963 if (sops == NULL) {
964 sma->sems[0].sem_otime = get_seconds();
965 } else {
966 sma->sems[sops[0].sem_num].sem_otime =
967 get_seconds();
968 }
969 }
970
971 /**
972 * do_smart_update - optimized update_queue
973 * @sma: semaphore array
974 * @sops: operations that were performed
975 * @nsops: number of operations
976 * @otime: force setting otime
977 * @wake_q: lockless wake-queue head
978 *
979 * do_smart_update() does the required calls to update_queue and wakeup_zero,
980 * based on the actual changes that were performed on the semaphore array.
981 * Note that the function does not do the actual wake-up: the caller is
982 * responsible for calling wake_up_q().
983 * It is safe to perform this call after dropping all locks.
984 */
985 static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
986 int otime, struct wake_q_head *wake_q)
987 {
988 int i;
989
990 otime |= do_smart_wakeup_zero(sma, sops, nsops, wake_q);
991
992 if (!list_empty(&sma->pending_alter)) {
993 /* semaphore array uses the global queue - just process it. */
994 otime |= update_queue(sma, -1, wake_q);
995 } else {
996 if (!sops) {
997 /*
998 * No sops, thus the modified semaphores are not
999 * known. Check all.
1000 */
1001 for (i = 0; i < sma->sem_nsems; i++)
1002 otime |= update_queue(sma, i, wake_q);
1003 } else {
1004 /*
1005 * Check the semaphores that were increased:
1006 * - No complex ops, thus all sleeping ops are
1007 * decrease.
1008 * - if we decreased the value, then any sleeping
1009 * semaphore ops wont be able to run: If the
1010 * previous value was too small, then the new
1011 * value will be too small, too.
1012 */
1013 for (i = 0; i < nsops; i++) {
1014 if (sops[i].sem_op > 0) {
1015 otime |= update_queue(sma,
1016 sops[i].sem_num, wake_q);
1017 }
1018 }
1019 }
1020 }
1021 if (otime)
1022 set_semotime(sma, sops);
1023 }
1024
1025 /*
1026 * check_qop: Test if a queued operation sleeps on the semaphore semnum
1027 */
1028 static int check_qop(struct sem_array *sma, int semnum, struct sem_queue *q,
1029 bool count_zero)
1030 {
1031 struct sembuf *sop = q->blocking;
1032
1033 /*
1034 * Linux always (since 0.99.10) reported a task as sleeping on all
1035 * semaphores. This violates SUS, therefore it was changed to the
1036 * standard compliant behavior.
1037 * Give the administrators a chance to notice that an application
1038 * might misbehave because it relies on the Linux behavior.
1039 */
1040 pr_info_once("semctl(GETNCNT/GETZCNT) is since 3.16 Single Unix Specification compliant.\n"
1041 "The task %s (%d) triggered the difference, watch for misbehavior.\n",
1042 current->comm, task_pid_nr(current));
1043
1044 if (sop->sem_num != semnum)
1045 return 0;
1046
1047 if (count_zero && sop->sem_op == 0)
1048 return 1;
1049 if (!count_zero && sop->sem_op < 0)
1050 return 1;
1051
1052 return 0;
1053 }
1054
1055 /* The following counts are associated to each semaphore:
1056 * semncnt number of tasks waiting on semval being nonzero
1057 * semzcnt number of tasks waiting on semval being zero
1058 *
1059 * Per definition, a task waits only on the semaphore of the first semop
1060 * that cannot proceed, even if additional operation would block, too.
1061 */
1062 static int count_semcnt(struct sem_array *sma, ushort semnum,
1063 bool count_zero)
1064 {
1065 struct list_head *l;
1066 struct sem_queue *q;
1067 int semcnt;
1068
1069 semcnt = 0;
1070 /* First: check the simple operations. They are easy to evaluate */
1071 if (count_zero)
1072 l = &sma->sems[semnum].pending_const;
1073 else
1074 l = &sma->sems[semnum].pending_alter;
1075
1076 list_for_each_entry(q, l, list) {
1077 /* all task on a per-semaphore list sleep on exactly
1078 * that semaphore
1079 */
1080 semcnt++;
1081 }
1082
1083 /* Then: check the complex operations. */
1084 list_for_each_entry(q, &sma->pending_alter, list) {
1085 semcnt += check_qop(sma, semnum, q, count_zero);
1086 }
1087 if (count_zero) {
1088 list_for_each_entry(q, &sma->pending_const, list) {
1089 semcnt += check_qop(sma, semnum, q, count_zero);
1090 }
1091 }
1092 return semcnt;
1093 }
1094
1095 /* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
1096 * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
1097 * remains locked on exit.
1098 */
1099 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1100 {
1101 struct sem_undo *un, *tu;
1102 struct sem_queue *q, *tq;
1103 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
1104 int i;
1105 DEFINE_WAKE_Q(wake_q);
1106
1107 /* Free the existing undo structures for this semaphore set. */
1108 ipc_assert_locked_object(&sma->sem_perm);
1109 list_for_each_entry_safe(un, tu, &sma->list_id, list_id) {
1110 list_del(&un->list_id);
1111 spin_lock(&un->ulp->lock);
1112 un->semid = -1;
1113 list_del_rcu(&un->list_proc);
1114 spin_unlock(&un->ulp->lock);
1115 kfree_rcu(un, rcu);
1116 }
1117
1118 /* Wake up all pending processes and let them fail with EIDRM. */
1119 list_for_each_entry_safe(q, tq, &sma->pending_const, list) {
1120 unlink_queue(sma, q);
1121 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1122 }
1123
1124 list_for_each_entry_safe(q, tq, &sma->pending_alter, list) {
1125 unlink_queue(sma, q);
1126 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1127 }
1128 for (i = 0; i < sma->sem_nsems; i++) {
1129 struct sem *sem = &sma->sems[i];
1130 list_for_each_entry_safe(q, tq, &sem->pending_const, list) {
1131 unlink_queue(sma, q);
1132 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1133 }
1134 list_for_each_entry_safe(q, tq, &sem->pending_alter, list) {
1135 unlink_queue(sma, q);
1136 wake_up_sem_queue_prepare(q, -EIDRM, &wake_q);
1137 }
1138 }
1139
1140 /* Remove the semaphore set from the IDR */
1141 sem_rmid(ns, sma);
1142 sem_unlock(sma, -1);
1143 rcu_read_unlock();
1144
1145 wake_up_q(&wake_q);
1146 ns->used_sems -= sma->sem_nsems;
1147 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1148 }
1149
1150 static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
1151 {
1152 switch (version) {
1153 case IPC_64:
1154 return copy_to_user(buf, in, sizeof(*in));
1155 case IPC_OLD:
1156 {
1157 struct semid_ds out;
1158
1159 memset(&out, 0, sizeof(out));
1160
1161 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
1162
1163 out.sem_otime = in->sem_otime;
1164 out.sem_ctime = in->sem_ctime;
1165 out.sem_nsems = in->sem_nsems;
1166
1167 return copy_to_user(buf, &out, sizeof(out));
1168 }
1169 default:
1170 return -EINVAL;
1171 }
1172 }
1173
1174 static time64_t get_semotime(struct sem_array *sma)
1175 {
1176 int i;
1177 time64_t res;
1178
1179 res = sma->sems[0].sem_otime;
1180 for (i = 1; i < sma->sem_nsems; i++) {
1181 time64_t to = sma->sems[i].sem_otime;
1182
1183 if (to > res)
1184 res = to;
1185 }
1186 return res;
1187 }
1188
1189 static int semctl_stat(struct ipc_namespace *ns, int semid,
1190 int cmd, struct semid64_ds *semid64)
1191 {
1192 struct sem_array *sma;
1193 int id = 0;
1194 int err;
1195
1196 memset(semid64, 0, sizeof(*semid64));
1197
1198 rcu_read_lock();
1199 if (cmd == SEM_STAT) {
1200 sma = sem_obtain_object(ns, semid);
1201 if (IS_ERR(sma)) {
1202 err = PTR_ERR(sma);
1203 goto out_unlock;
1204 }
1205 id = sma->sem_perm.id;
1206 } else {
1207 sma = sem_obtain_object_check(ns, semid);
1208 if (IS_ERR(sma)) {
1209 err = PTR_ERR(sma);
1210 goto out_unlock;
1211 }
1212 }
1213
1214 err = -EACCES;
1215 if (ipcperms(ns, &sma->sem_perm, S_IRUGO))
1216 goto out_unlock;
1217
1218 err = security_sem_semctl(sma, cmd);
1219 if (err)
1220 goto out_unlock;
1221
1222 kernel_to_ipc64_perm(&sma->sem_perm, &semid64->sem_perm);
1223 semid64->sem_otime = get_semotime(sma);
1224 semid64->sem_ctime = sma->sem_ctime;
1225 semid64->sem_nsems = sma->sem_nsems;
1226 rcu_read_unlock();
1227 return id;
1228
1229 out_unlock:
1230 rcu_read_unlock();
1231 return err;
1232 }
1233
1234 static int semctl_info(struct ipc_namespace *ns, int semid,
1235 int cmd, void __user *p)
1236 {
1237 struct seminfo seminfo;
1238 int max_id;
1239 int err;
1240
1241 err = security_sem_semctl(NULL, cmd);
1242 if (err)
1243 return err;
1244
1245 memset(&seminfo, 0, sizeof(seminfo));
1246 seminfo.semmni = ns->sc_semmni;
1247 seminfo.semmns = ns->sc_semmns;
1248 seminfo.semmsl = ns->sc_semmsl;
1249 seminfo.semopm = ns->sc_semopm;
1250 seminfo.semvmx = SEMVMX;
1251 seminfo.semmnu = SEMMNU;
1252 seminfo.semmap = SEMMAP;
1253 seminfo.semume = SEMUME;
1254 down_read(&sem_ids(ns).rwsem);
1255 if (cmd == SEM_INFO) {
1256 seminfo.semusz = sem_ids(ns).in_use;
1257 seminfo.semaem = ns->used_sems;
1258 } else {
1259 seminfo.semusz = SEMUSZ;
1260 seminfo.semaem = SEMAEM;
1261 }
1262 max_id = ipc_get_maxid(&sem_ids(ns));
1263 up_read(&sem_ids(ns).rwsem);
1264 if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
1265 return -EFAULT;
1266 return (max_id < 0) ? 0 : max_id;
1267 }
1268
1269 static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
1270 int val)
1271 {
1272 struct sem_undo *un;
1273 struct sem_array *sma;
1274 struct sem *curr;
1275 int err;
1276 DEFINE_WAKE_Q(wake_q);
1277
1278 if (val > SEMVMX || val < 0)
1279 return -ERANGE;
1280
1281 rcu_read_lock();
1282 sma = sem_obtain_object_check(ns, semid);
1283 if (IS_ERR(sma)) {
1284 rcu_read_unlock();
1285 return PTR_ERR(sma);
1286 }
1287
1288 if (semnum < 0 || semnum >= sma->sem_nsems) {
1289 rcu_read_unlock();
1290 return -EINVAL;
1291 }
1292
1293
1294 if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
1295 rcu_read_unlock();
1296 return -EACCES;
1297 }
1298
1299 err = security_sem_semctl(sma, SETVAL);
1300 if (err) {
1301 rcu_read_unlock();
1302 return -EACCES;
1303 }
1304
1305 sem_lock(sma, NULL, -1);
1306
1307 if (!ipc_valid_object(&sma->sem_perm)) {
1308 sem_unlock(sma, -1);
1309 rcu_read_unlock();
1310 return -EIDRM;
1311 }
1312
1313 semnum = array_index_nospec(semnum, sma->sem_nsems);
1314 curr = &sma->sems[semnum];
1315
1316 ipc_assert_locked_object(&sma->sem_perm);
1317 list_for_each_entry(un, &sma->list_id, list_id)
1318 un->semadj[semnum] = 0;
1319
1320 curr->semval = val;
1321 curr->sempid = task_tgid_vnr(current);
1322 sma->sem_ctime = ktime_get_real_seconds();
1323 /* maybe some queued-up processes were waiting for this */
1324 do_smart_update(sma, NULL, 0, 0, &wake_q);
1325 sem_unlock(sma, -1);
1326 rcu_read_unlock();
1327 wake_up_q(&wake_q);
1328 return 0;
1329 }
1330
1331 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1332 int cmd, void __user *p)
1333 {
1334 struct sem_array *sma;
1335 struct sem *curr;
1336 int err, nsems;
1337 ushort fast_sem_io[SEMMSL_FAST];
1338 ushort *sem_io = fast_sem_io;
1339 DEFINE_WAKE_Q(wake_q);
1340
1341 rcu_read_lock();
1342 sma = sem_obtain_object_check(ns, semid);
1343 if (IS_ERR(sma)) {
1344 rcu_read_unlock();
1345 return PTR_ERR(sma);
1346 }
1347
1348 nsems = sma->sem_nsems;
1349
1350 err = -EACCES;
1351 if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO))
1352 goto out_rcu_wakeup;
1353
1354 err = security_sem_semctl(sma, cmd);
1355 if (err)
1356 goto out_rcu_wakeup;
1357
1358 err = -EACCES;
1359 switch (cmd) {
1360 case GETALL:
1361 {
1362 ushort __user *array = p;
1363 int i;
1364
1365 sem_lock(sma, NULL, -1);
1366 if (!ipc_valid_object(&sma->sem_perm)) {
1367 err = -EIDRM;
1368 goto out_unlock;
1369 }
1370 if (nsems > SEMMSL_FAST) {
1371 if (!ipc_rcu_getref(&sma->sem_perm)) {
1372 err = -EIDRM;
1373 goto out_unlock;
1374 }
1375 sem_unlock(sma, -1);
1376 rcu_read_unlock();
1377 sem_io = kvmalloc_array(nsems, sizeof(ushort),
1378 GFP_KERNEL);
1379 if (sem_io == NULL) {
1380 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1381 return -ENOMEM;
1382 }
1383
1384 rcu_read_lock();
1385 sem_lock_and_putref(sma);
1386 if (!ipc_valid_object(&sma->sem_perm)) {
1387 err = -EIDRM;
1388 goto out_unlock;
1389 }
1390 }
1391 for (i = 0; i < sma->sem_nsems; i++)
1392 sem_io[i] = sma->sems[i].semval;
1393 sem_unlock(sma, -1);
1394 rcu_read_unlock();
1395 err = 0;
1396 if (copy_to_user(array, sem_io, nsems*sizeof(ushort)))
1397 err = -EFAULT;
1398 goto out_free;
1399 }
1400 case SETALL:
1401 {
1402 int i;
1403 struct sem_undo *un;
1404
1405 if (!ipc_rcu_getref(&sma->sem_perm)) {
1406 err = -EIDRM;
1407 goto out_rcu_wakeup;
1408 }
1409 rcu_read_unlock();
1410
1411 if (nsems > SEMMSL_FAST) {
1412 sem_io = kvmalloc_array(nsems, sizeof(ushort),
1413 GFP_KERNEL);
1414 if (sem_io == NULL) {
1415 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1416 return -ENOMEM;
1417 }
1418 }
1419
1420 if (copy_from_user(sem_io, p, nsems*sizeof(ushort))) {
1421 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1422 err = -EFAULT;
1423 goto out_free;
1424 }
1425
1426 for (i = 0; i < nsems; i++) {
1427 if (sem_io[i] > SEMVMX) {
1428 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1429 err = -ERANGE;
1430 goto out_free;
1431 }
1432 }
1433 rcu_read_lock();
1434 sem_lock_and_putref(sma);
1435 if (!ipc_valid_object(&sma->sem_perm)) {
1436 err = -EIDRM;
1437 goto out_unlock;
1438 }
1439
1440 for (i = 0; i < nsems; i++) {
1441 sma->sems[i].semval = sem_io[i];
1442 sma->sems[i].sempid = task_tgid_vnr(current);
1443 }
1444
1445 ipc_assert_locked_object(&sma->sem_perm);
1446 list_for_each_entry(un, &sma->list_id, list_id) {
1447 for (i = 0; i < nsems; i++)
1448 un->semadj[i] = 0;
1449 }
1450 sma->sem_ctime = ktime_get_real_seconds();
1451 /* maybe some queued-up processes were waiting for this */
1452 do_smart_update(sma, NULL, 0, 0, &wake_q);
1453 err = 0;
1454 goto out_unlock;
1455 }
1456 /* GETVAL, GETPID, GETNCTN, GETZCNT: fall-through */
1457 }
1458 err = -EINVAL;
1459 if (semnum < 0 || semnum >= nsems)
1460 goto out_rcu_wakeup;
1461
1462 sem_lock(sma, NULL, -1);
1463 if (!ipc_valid_object(&sma->sem_perm)) {
1464 err = -EIDRM;
1465 goto out_unlock;
1466 }
1467
1468 semnum = array_index_nospec(semnum, nsems);
1469 curr = &sma->sems[semnum];
1470
1471 switch (cmd) {
1472 case GETVAL:
1473 err = curr->semval;
1474 goto out_unlock;
1475 case GETPID:
1476 err = curr->sempid;
1477 goto out_unlock;
1478 case GETNCNT:
1479 err = count_semcnt(sma, semnum, 0);
1480 goto out_unlock;
1481 case GETZCNT:
1482 err = count_semcnt(sma, semnum, 1);
1483 goto out_unlock;
1484 }
1485
1486 out_unlock:
1487 sem_unlock(sma, -1);
1488 out_rcu_wakeup:
1489 rcu_read_unlock();
1490 wake_up_q(&wake_q);
1491 out_free:
1492 if (sem_io != fast_sem_io)
1493 kvfree(sem_io);
1494 return err;
1495 }
1496
1497 static inline unsigned long
1498 copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
1499 {
1500 switch (version) {
1501 case IPC_64:
1502 if (copy_from_user(out, buf, sizeof(*out)))
1503 return -EFAULT;
1504 return 0;
1505 case IPC_OLD:
1506 {
1507 struct semid_ds tbuf_old;
1508
1509 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
1510 return -EFAULT;
1511
1512 out->sem_perm.uid = tbuf_old.sem_perm.uid;
1513 out->sem_perm.gid = tbuf_old.sem_perm.gid;
1514 out->sem_perm.mode = tbuf_old.sem_perm.mode;
1515
1516 return 0;
1517 }
1518 default:
1519 return -EINVAL;
1520 }
1521 }
1522
1523 /*
1524 * This function handles some semctl commands which require the rwsem
1525 * to be held in write mode.
1526 * NOTE: no locks must be held, the rwsem is taken inside this function.
1527 */
1528 static int semctl_down(struct ipc_namespace *ns, int semid,
1529 int cmd, struct semid64_ds *semid64)
1530 {
1531 struct sem_array *sma;
1532 int err;
1533 struct kern_ipc_perm *ipcp;
1534
1535 down_write(&sem_ids(ns).rwsem);
1536 rcu_read_lock();
1537
1538 ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
1539 &semid64->sem_perm, 0);
1540 if (IS_ERR(ipcp)) {
1541 err = PTR_ERR(ipcp);
1542 goto out_unlock1;
1543 }
1544
1545 sma = container_of(ipcp, struct sem_array, sem_perm);
1546
1547 err = security_sem_semctl(sma, cmd);
1548 if (err)
1549 goto out_unlock1;
1550
1551 switch (cmd) {
1552 case IPC_RMID:
1553 sem_lock(sma, NULL, -1);
1554 /* freeary unlocks the ipc object and rcu */
1555 freeary(ns, ipcp);
1556 goto out_up;
1557 case IPC_SET:
1558 sem_lock(sma, NULL, -1);
1559 err = ipc_update_perm(&semid64->sem_perm, ipcp);
1560 if (err)
1561 goto out_unlock0;
1562 sma->sem_ctime = ktime_get_real_seconds();
1563 break;
1564 default:
1565 err = -EINVAL;
1566 goto out_unlock1;
1567 }
1568
1569 out_unlock0:
1570 sem_unlock(sma, -1);
1571 out_unlock1:
1572 rcu_read_unlock();
1573 out_up:
1574 up_write(&sem_ids(ns).rwsem);
1575 return err;
1576 }
1577
1578 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
1579 {
1580 int version;
1581 struct ipc_namespace *ns;
1582 void __user *p = (void __user *)arg;
1583 struct semid64_ds semid64;
1584 int err;
1585
1586 if (semid < 0)
1587 return -EINVAL;
1588
1589 version = ipc_parse_version(&cmd);
1590 ns = current->nsproxy->ipc_ns;
1591
1592 switch (cmd) {
1593 case IPC_INFO:
1594 case SEM_INFO:
1595 return semctl_info(ns, semid, cmd, p);
1596 case IPC_STAT:
1597 case SEM_STAT:
1598 err = semctl_stat(ns, semid, cmd, &semid64);
1599 if (err < 0)
1600 return err;
1601 if (copy_semid_to_user(p, &semid64, version))
1602 err = -EFAULT;
1603 return err;
1604 case GETALL:
1605 case GETVAL:
1606 case GETPID:
1607 case GETNCNT:
1608 case GETZCNT:
1609 case SETALL:
1610 return semctl_main(ns, semid, semnum, cmd, p);
1611 case SETVAL: {
1612 int val;
1613 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
1614 /* big-endian 64bit */
1615 val = arg >> 32;
1616 #else
1617 /* 32bit or little-endian 64bit */
1618 val = arg;
1619 #endif
1620 return semctl_setval(ns, semid, semnum, val);
1621 }
1622 case IPC_SET:
1623 if (copy_semid_from_user(&semid64, p, version))
1624 return -EFAULT;
1625 case IPC_RMID:
1626 return semctl_down(ns, semid, cmd, &semid64);
1627 default:
1628 return -EINVAL;
1629 }
1630 }
1631
1632 #ifdef CONFIG_COMPAT
1633
1634 struct compat_semid_ds {
1635 struct compat_ipc_perm sem_perm;
1636 compat_time_t sem_otime;
1637 compat_time_t sem_ctime;
1638 compat_uptr_t sem_base;
1639 compat_uptr_t sem_pending;
1640 compat_uptr_t sem_pending_last;
1641 compat_uptr_t undo;
1642 unsigned short sem_nsems;
1643 };
1644
1645 static int copy_compat_semid_from_user(struct semid64_ds *out, void __user *buf,
1646 int version)
1647 {
1648 memset(out, 0, sizeof(*out));
1649 if (version == IPC_64) {
1650 struct compat_semid64_ds __user *p = buf;
1651 return get_compat_ipc64_perm(&out->sem_perm, &p->sem_perm);
1652 } else {
1653 struct compat_semid_ds __user *p = buf;
1654 return get_compat_ipc_perm(&out->sem_perm, &p->sem_perm);
1655 }
1656 }
1657
1658 static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in,
1659 int version)
1660 {
1661 if (version == IPC_64) {
1662 struct compat_semid64_ds v;
1663 memset(&v, 0, sizeof(v));
1664 to_compat_ipc64_perm(&v.sem_perm, &in->sem_perm);
1665 v.sem_otime = in->sem_otime;
1666 v.sem_ctime = in->sem_ctime;
1667 v.sem_nsems = in->sem_nsems;
1668 return copy_to_user(buf, &v, sizeof(v));
1669 } else {
1670 struct compat_semid_ds v;
1671 memset(&v, 0, sizeof(v));
1672 to_compat_ipc_perm(&v.sem_perm, &in->sem_perm);
1673 v.sem_otime = in->sem_otime;
1674 v.sem_ctime = in->sem_ctime;
1675 v.sem_nsems = in->sem_nsems;
1676 return copy_to_user(buf, &v, sizeof(v));
1677 }
1678 }
1679
1680 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
1681 {
1682 void __user *p = compat_ptr(arg);
1683 struct ipc_namespace *ns;
1684 struct semid64_ds semid64;
1685 int version = compat_ipc_parse_version(&cmd);
1686 int err;
1687
1688 ns = current->nsproxy->ipc_ns;
1689
1690 if (semid < 0)
1691 return -EINVAL;
1692
1693 switch (cmd & (~IPC_64)) {
1694 case IPC_INFO:
1695 case SEM_INFO:
1696 return semctl_info(ns, semid, cmd, p);
1697 case IPC_STAT:
1698 case SEM_STAT:
1699 err = semctl_stat(ns, semid, cmd, &semid64);
1700 if (err < 0)
1701 return err;
1702 if (copy_compat_semid_to_user(p, &semid64, version))
1703 err = -EFAULT;
1704 return err;
1705 case GETVAL:
1706 case GETPID:
1707 case GETNCNT:
1708 case GETZCNT:
1709 case GETALL:
1710 case SETALL:
1711 return semctl_main(ns, semid, semnum, cmd, p);
1712 case SETVAL:
1713 return semctl_setval(ns, semid, semnum, arg);
1714 case IPC_SET:
1715 if (copy_compat_semid_from_user(&semid64, p, version))
1716 return -EFAULT;
1717 /* fallthru */
1718 case IPC_RMID:
1719 return semctl_down(ns, semid, cmd, &semid64);
1720 default:
1721 return -EINVAL;
1722 }
1723 }
1724 #endif
1725
1726 /* If the task doesn't already have a undo_list, then allocate one
1727 * here. We guarantee there is only one thread using this undo list,
1728 * and current is THE ONE
1729 *
1730 * If this allocation and assignment succeeds, but later
1731 * portions of this code fail, there is no need to free the sem_undo_list.
1732 * Just let it stay associated with the task, and it'll be freed later
1733 * at exit time.
1734 *
1735 * This can block, so callers must hold no locks.
1736 */
1737 static inline int get_undo_list(struct sem_undo_list **undo_listp)
1738 {
1739 struct sem_undo_list *undo_list;
1740
1741 undo_list = current->sysvsem.undo_list;
1742 if (!undo_list) {
1743 undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
1744 if (undo_list == NULL)
1745 return -ENOMEM;
1746 spin_lock_init(&undo_list->lock);
1747 refcount_set(&undo_list->refcnt, 1);
1748 INIT_LIST_HEAD(&undo_list->list_proc);
1749
1750 current->sysvsem.undo_list = undo_list;
1751 }
1752 *undo_listp = undo_list;
1753 return 0;
1754 }
1755
1756 static struct sem_undo *__lookup_undo(struct sem_undo_list *ulp, int semid)
1757 {
1758 struct sem_undo *un;
1759
1760 list_for_each_entry_rcu(un, &ulp->list_proc, list_proc) {
1761 if (un->semid == semid)
1762 return un;
1763 }
1764 return NULL;
1765 }
1766
1767 static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
1768 {
1769 struct sem_undo *un;
1770
1771 assert_spin_locked(&ulp->lock);
1772
1773 un = __lookup_undo(ulp, semid);
1774 if (un) {
1775 list_del_rcu(&un->list_proc);
1776 list_add_rcu(&un->list_proc, &ulp->list_proc);
1777 }
1778 return un;
1779 }
1780
1781 /**
1782 * find_alloc_undo - lookup (and if not present create) undo array
1783 * @ns: namespace
1784 * @semid: semaphore array id
1785 *
1786 * The function looks up (and if not present creates) the undo structure.
1787 * The size of the undo structure depends on the size of the semaphore
1788 * array, thus the alloc path is not that straightforward.
1789 * Lifetime-rules: sem_undo is rcu-protected, on success, the function
1790 * performs a rcu_read_lock().
1791 */
1792 static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1793 {
1794 struct sem_array *sma;
1795 struct sem_undo_list *ulp;
1796 struct sem_undo *un, *new;
1797 int nsems, error;
1798
1799 error = get_undo_list(&ulp);
1800 if (error)
1801 return ERR_PTR(error);
1802
1803 rcu_read_lock();
1804 spin_lock(&ulp->lock);
1805 un = lookup_undo(ulp, semid);
1806 spin_unlock(&ulp->lock);
1807 if (likely(un != NULL))
1808 goto out;
1809
1810 /* no undo structure around - allocate one. */
1811 /* step 1: figure out the size of the semaphore array */
1812 sma = sem_obtain_object_check(ns, semid);
1813 if (IS_ERR(sma)) {
1814 rcu_read_unlock();
1815 return ERR_CAST(sma);
1816 }
1817
1818 nsems = sma->sem_nsems;
1819 if (!ipc_rcu_getref(&sma->sem_perm)) {
1820 rcu_read_unlock();
1821 un = ERR_PTR(-EIDRM);
1822 goto out;
1823 }
1824 rcu_read_unlock();
1825
1826 /* step 2: allocate new undo structure */
1827 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1828 if (!new) {
1829 ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
1830 return ERR_PTR(-ENOMEM);
1831 }
1832
1833 /* step 3: Acquire the lock on semaphore array */
1834 rcu_read_lock();
1835 sem_lock_and_putref(sma);
1836 if (!ipc_valid_object(&sma->sem_perm)) {
1837 sem_unlock(sma, -1);
1838 rcu_read_unlock();
1839 kfree(new);
1840 un = ERR_PTR(-EIDRM);
1841 goto out;
1842 }
1843 spin_lock(&ulp->lock);
1844
1845 /*
1846 * step 4: check for races: did someone else allocate the undo struct?
1847 */
1848 un = lookup_undo(ulp, semid);
1849 if (un) {
1850 kfree(new);
1851 goto success;
1852 }
1853 /* step 5: initialize & link new undo structure */
1854 new->semadj = (short *) &new[1];
1855 new->ulp = ulp;
1856 new->semid = semid;
1857 assert_spin_locked(&ulp->lock);
1858 list_add_rcu(&new->list_proc, &ulp->list_proc);
1859 ipc_assert_locked_object(&sma->sem_perm);
1860 list_add(&new->list_id, &sma->list_id);
1861 un = new;
1862
1863 success:
1864 spin_unlock(&ulp->lock);
1865 sem_unlock(sma, -1);
1866 out:
1867 return un;
1868 }
1869
1870 static long do_semtimedop(int semid, struct sembuf __user *tsops,
1871 unsigned nsops, const struct timespec64 *timeout)
1872 {
1873 int error = -EINVAL;
1874 struct sem_array *sma;
1875 struct sembuf fast_sops[SEMOPM_FAST];
1876 struct sembuf *sops = fast_sops, *sop;
1877 struct sem_undo *un;
1878 int max, locknum;
1879 bool undos = false, alter = false, dupsop = false;
1880 struct sem_queue queue;
1881 unsigned long dup = 0, jiffies_left = 0;
1882 struct ipc_namespace *ns;
1883
1884 ns = current->nsproxy->ipc_ns;
1885
1886 if (nsops < 1 || semid < 0)
1887 return -EINVAL;
1888 if (nsops > ns->sc_semopm)
1889 return -E2BIG;
1890 if (nsops > SEMOPM_FAST) {
1891 sops = kvmalloc(sizeof(*sops)*nsops, GFP_KERNEL);
1892 if (sops == NULL)
1893 return -ENOMEM;
1894 }
1895
1896 if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
1897 error = -EFAULT;
1898 goto out_free;
1899 }
1900
1901 if (timeout) {
1902 if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 ||
1903 timeout->tv_nsec >= 1000000000L) {
1904 error = -EINVAL;
1905 goto out_free;
1906 }
1907 jiffies_left = timespec64_to_jiffies(timeout);
1908 }
1909
1910 max = 0;
1911 for (sop = sops; sop < sops + nsops; sop++) {
1912 unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);
1913
1914 if (sop->sem_num >= max)
1915 max = sop->sem_num;
1916 if (sop->sem_flg & SEM_UNDO)
1917 undos = true;
1918 if (dup & mask) {
1919 /*
1920 * There was a previous alter access that appears
1921 * to have accessed the same semaphore, thus use
1922 * the dupsop logic. "appears", because the detection
1923 * can only check % BITS_PER_LONG.
1924 */
1925 dupsop = true;
1926 }
1927 if (sop->sem_op != 0) {
1928 alter = true;
1929 dup |= mask;
1930 }
1931 }
1932
1933 if (undos) {
1934 /* On success, find_alloc_undo takes the rcu_read_lock */
1935 un = find_alloc_undo(ns, semid);
1936 if (IS_ERR(un)) {
1937 error = PTR_ERR(un);
1938 goto out_free;
1939 }
1940 } else {
1941 un = NULL;
1942 rcu_read_lock();
1943 }
1944
1945 sma = sem_obtain_object_check(ns, semid);
1946 if (IS_ERR(sma)) {
1947 rcu_read_unlock();
1948 error = PTR_ERR(sma);
1949 goto out_free;
1950 }
1951
1952 error = -EFBIG;
1953 if (max >= sma->sem_nsems) {
1954 rcu_read_unlock();
1955 goto out_free;
1956 }
1957
1958 error = -EACCES;
1959 if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
1960 rcu_read_unlock();
1961 goto out_free;
1962 }
1963
1964 error = security_sem_semop(sma, sops, nsops, alter);
1965 if (error) {
1966 rcu_read_unlock();
1967 goto out_free;
1968 }
1969
1970 error = -EIDRM;
1971 locknum = sem_lock(sma, sops, nsops);
1972 /*
1973 * We eventually might perform the following check in a lockless
1974 * fashion, considering ipc_valid_object() locking constraints.
1975 * If nsops == 1 and there is no contention for sem_perm.lock, then
1976 * only a per-semaphore lock is held and it's OK to proceed with the
1977 * check below. More details on the fine grained locking scheme
1978 * entangled here and why it's RMID race safe on comments at sem_lock()
1979 */
1980 if (!ipc_valid_object(&sma->sem_perm))
1981 goto out_unlock_free;
1982 /*
1983 * semid identifiers are not unique - find_alloc_undo may have
1984 * allocated an undo structure, it was invalidated by an RMID
1985 * and now a new array with received the same id. Check and fail.
1986 * This case can be detected checking un->semid. The existence of
1987 * "un" itself is guaranteed by rcu.
1988 */
1989 if (un && un->semid == -1)
1990 goto out_unlock_free;
1991
1992 queue.sops = sops;
1993 queue.nsops = nsops;
1994 queue.undo = un;
1995 queue.pid = task_tgid_vnr(current);
1996 queue.alter = alter;
1997 queue.dupsop = dupsop;
1998
1999 error = perform_atomic_semop(sma, &queue);
2000 if (error == 0) { /* non-blocking succesfull path */
2001 DEFINE_WAKE_Q(wake_q);
2002
2003 /*
2004 * If the operation was successful, then do
2005 * the required updates.
2006 */
2007 if (alter)
2008 do_smart_update(sma, sops, nsops, 1, &wake_q);
2009 else
2010 set_semotime(sma, sops);
2011
2012 sem_unlock(sma, locknum);
2013 rcu_read_unlock();
2014 wake_up_q(&wake_q);
2015
2016 goto out_free;
2017 }
2018 if (error < 0) /* non-blocking error path */
2019 goto out_unlock_free;
2020
2021 /*
2022 * We need to sleep on this operation, so we put the current
2023 * task into the pending queue and go to sleep.
2024 */
2025 if (nsops == 1) {
2026 struct sem *curr;
2027 int idx = array_index_nospec(sops->sem_num, sma->sem_nsems);
2028 curr = &sma->sems[idx];
2029
2030 if (alter) {
2031 if (sma->complex_count) {
2032 list_add_tail(&queue.list,
2033 &sma->pending_alter);
2034 } else {
2035
2036 list_add_tail(&queue.list,
2037 &curr->pending_alter);
2038 }
2039 } else {
2040 list_add_tail(&queue.list, &curr->pending_const);
2041 }
2042 } else {
2043 if (!sma->complex_count)
2044 merge_queues(sma);
2045
2046 if (alter)
2047 list_add_tail(&queue.list, &sma->pending_alter);
2048 else
2049 list_add_tail(&queue.list, &sma->pending_const);
2050
2051 sma->complex_count++;
2052 }
2053
2054 do {
2055 queue.status = -EINTR;
2056 queue.sleeper = current;
2057
2058 __set_current_state(TASK_INTERRUPTIBLE);
2059 sem_unlock(sma, locknum);
2060 rcu_read_unlock();
2061
2062 if (timeout)
2063 jiffies_left = schedule_timeout(jiffies_left);
2064 else
2065 schedule();
2066
2067 /*
2068 * fastpath: the semop has completed, either successfully or
2069 * not, from the syscall pov, is quite irrelevant to us at this
2070 * point; we're done.
2071 *
2072 * We _do_ care, nonetheless, about being awoken by a signal or
2073 * spuriously. The queue.status is checked again in the
2074 * slowpath (aka after taking sem_lock), such that we can detect
2075 * scenarios where we were awakened externally, during the
2076 * window between wake_q_add() and wake_up_q().
2077 */
2078 error = READ_ONCE(queue.status);
2079 if (error != -EINTR) {
2080 /*
2081 * User space could assume that semop() is a memory
2082 * barrier: Without the mb(), the cpu could
2083 * speculatively read in userspace stale data that was
2084 * overwritten by the previous owner of the semaphore.
2085 */
2086 smp_mb();
2087 goto out_free;
2088 }
2089
2090 rcu_read_lock();
2091 locknum = sem_lock(sma, sops, nsops);
2092
2093 if (!ipc_valid_object(&sma->sem_perm))
2094 goto out_unlock_free;
2095
2096 error = READ_ONCE(queue.status);
2097
2098 /*
2099 * If queue.status != -EINTR we are woken up by another process.
2100 * Leave without unlink_queue(), but with sem_unlock().
2101 */
2102 if (error != -EINTR)
2103 goto out_unlock_free;
2104
2105 /*
2106 * If an interrupt occurred we have to clean up the queue.
2107 */
2108 if (timeout && jiffies_left == 0)
2109 error = -EAGAIN;
2110 } while (error == -EINTR && !signal_pending(current)); /* spurious */
2111
2112 unlink_queue(sma, &queue);
2113
2114 out_unlock_free:
2115 sem_unlock(sma, locknum);
2116 rcu_read_unlock();
2117 out_free:
2118 if (sops != fast_sops)
2119 kvfree(sops);
2120 return error;
2121 }
2122
2123 SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
2124 unsigned, nsops, const struct timespec __user *, timeout)
2125 {
2126 if (timeout) {
2127 struct timespec64 ts;
2128 if (get_timespec64(&ts, timeout))
2129 return -EFAULT;
2130 return do_semtimedop(semid, tsops, nsops, &ts);
2131 }
2132 return do_semtimedop(semid, tsops, nsops, NULL);
2133 }
2134
2135 #ifdef CONFIG_COMPAT
2136 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,
2137 unsigned, nsops,
2138 const struct compat_timespec __user *, timeout)
2139 {
2140 if (timeout) {
2141 struct timespec64 ts;
2142 if (compat_get_timespec64(&ts, timeout))
2143 return -EFAULT;
2144 return do_semtimedop(semid, tsems, nsops, &ts);
2145 }
2146 return do_semtimedop(semid, tsems, nsops, NULL);
2147 }
2148 #endif
2149
2150 SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops,
2151 unsigned, nsops)
2152 {
2153 return do_semtimedop(semid, tsops, nsops, NULL);
2154 }
2155
2156 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
2157 * parent and child tasks.
2158 */
2159
2160 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
2161 {
2162 struct sem_undo_list *undo_list;
2163 int error;
2164
2165 if (clone_flags & CLONE_SYSVSEM) {
2166 error = get_undo_list(&undo_list);
2167 if (error)
2168 return error;
2169 refcount_inc(&undo_list->refcnt);
2170 tsk->sysvsem.undo_list = undo_list;
2171 } else
2172 tsk->sysvsem.undo_list = NULL;
2173
2174 return 0;
2175 }
2176
2177 /*
2178 * add semadj values to semaphores, free undo structures.
2179 * undo structures are not freed when semaphore arrays are destroyed
2180 * so some of them may be out of date.
2181 * IMPLEMENTATION NOTE: There is some confusion over whether the
2182 * set of adjustments that needs to be done should be done in an atomic
2183 * manner or not. That is, if we are attempting to decrement the semval
2184 * should we queue up and wait until we can do so legally?
2185 * The original implementation attempted to do this (queue and wait).
2186 * The current implementation does not do so. The POSIX standard
2187 * and SVID should be consulted to determine what behavior is mandated.
2188 */
2189 void exit_sem(struct task_struct *tsk)
2190 {
2191 struct sem_undo_list *ulp;
2192
2193 ulp = tsk->sysvsem.undo_list;
2194 if (!ulp)
2195 return;
2196 tsk->sysvsem.undo_list = NULL;
2197
2198 if (!refcount_dec_and_test(&ulp->refcnt))
2199 return;
2200
2201 for (;;) {
2202 struct sem_array *sma;
2203 struct sem_undo *un;
2204 int semid, i;
2205 DEFINE_WAKE_Q(wake_q);
2206
2207 cond_resched();
2208
2209 rcu_read_lock();
2210 un = list_entry_rcu(ulp->list_proc.next,
2211 struct sem_undo, list_proc);
2212 if (&un->list_proc == &ulp->list_proc) {
2213 /*
2214 * We must wait for freeary() before freeing this ulp,
2215 * in case we raced with last sem_undo. There is a small
2216 * possibility where we exit while freeary() didn't
2217 * finish unlocking sem_undo_list.
2218 */
2219 spin_lock(&ulp->lock);
2220 spin_unlock(&ulp->lock);
2221 rcu_read_unlock();
2222 break;
2223 }
2224 spin_lock(&ulp->lock);
2225 semid = un->semid;
2226 spin_unlock(&ulp->lock);
2227
2228 /* exit_sem raced with IPC_RMID, nothing to do */
2229 if (semid == -1) {
2230 rcu_read_unlock();
2231 continue;
2232 }
2233
2234 sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, semid);
2235 /* exit_sem raced with IPC_RMID, nothing to do */
2236 if (IS_ERR(sma)) {
2237 rcu_read_unlock();
2238 continue;
2239 }
2240
2241 sem_lock(sma, NULL, -1);
2242 /* exit_sem raced with IPC_RMID, nothing to do */
2243 if (!ipc_valid_object(&sma->sem_perm)) {
2244 sem_unlock(sma, -1);
2245 rcu_read_unlock();
2246 continue;
2247 }
2248 un = __lookup_undo(ulp, semid);
2249 if (un == NULL) {
2250 /* exit_sem raced with IPC_RMID+semget() that created
2251 * exactly the same semid. Nothing to do.
2252 */
2253 sem_unlock(sma, -1);
2254 rcu_read_unlock();
2255 continue;
2256 }
2257
2258 /* remove un from the linked lists */
2259 ipc_assert_locked_object(&sma->sem_perm);
2260 list_del(&un->list_id);
2261
2262 /* we are the last process using this ulp, acquiring ulp->lock
2263 * isn't required. Besides that, we are also protected against
2264 * IPC_RMID as we hold sma->sem_perm lock now
2265 */
2266 list_del_rcu(&un->list_proc);
2267
2268 /* perform adjustments registered in un */
2269 for (i = 0; i < sma->sem_nsems; i++) {
2270 struct sem *semaphore = &sma->sems[i];
2271 if (un->semadj[i]) {
2272 semaphore->semval += un->semadj[i];
2273 /*
2274 * Range checks of the new semaphore value,
2275 * not defined by sus:
2276 * - Some unices ignore the undo entirely
2277 * (e.g. HP UX 11i 11.22, Tru64 V5.1)
2278 * - some cap the value (e.g. FreeBSD caps
2279 * at 0, but doesn't enforce SEMVMX)
2280 *
2281 * Linux caps the semaphore value, both at 0
2282 * and at SEMVMX.
2283 *
2284 * Manfred <manfred@colorfullife.com>
2285 */
2286 if (semaphore->semval < 0)
2287 semaphore->semval = 0;
2288 if (semaphore->semval > SEMVMX)
2289 semaphore->semval = SEMVMX;
2290 semaphore->sempid = task_tgid_vnr(current);
2291 }
2292 }
2293 /* maybe some queued-up processes were waiting for this */
2294 do_smart_update(sma, NULL, 0, 1, &wake_q);
2295 sem_unlock(sma, -1);
2296 rcu_read_unlock();
2297 wake_up_q(&wake_q);
2298
2299 kfree_rcu(un, rcu);
2300 }
2301 kfree(ulp);
2302 }
2303
2304 #ifdef CONFIG_PROC_FS
2305 static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
2306 {
2307 struct user_namespace *user_ns = seq_user_ns(s);
2308 struct kern_ipc_perm *ipcp = it;
2309 struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
2310 time64_t sem_otime;
2311
2312 /*
2313 * The proc interface isn't aware of sem_lock(), it calls
2314 * ipc_lock_object() directly (in sysvipc_find_ipc).
2315 * In order to stay compatible with sem_lock(), we must
2316 * enter / leave complex_mode.
2317 */
2318 complexmode_enter(sma);
2319
2320 sem_otime = get_semotime(sma);
2321
2322 seq_printf(s,
2323 "%10d %10d %4o %10u %5u %5u %5u %5u %10llu %10llu\n",
2324 sma->sem_perm.key,
2325 sma->sem_perm.id,
2326 sma->sem_perm.mode,
2327 sma->sem_nsems,
2328 from_kuid_munged(user_ns, sma->sem_perm.uid),
2329 from_kgid_munged(user_ns, sma->sem_perm.gid),
2330 from_kuid_munged(user_ns, sma->sem_perm.cuid),
2331 from_kgid_munged(user_ns, sma->sem_perm.cgid),
2332 sem_otime,
2333 sma->sem_ctime);
2334
2335 complexmode_tryleave(sma);
2336
2337 return 0;
2338 }
2339 #endif