1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
25 #include "path-util.h"
27 #include "cgroup-util.h"
30 #define CGROUP_CPU_QUOTA_PERIOD_USEC ((usec_t) 100 * USEC_PER_MSEC)
32 void cgroup_context_init(CGroupContext
*c
) {
35 /* Initialize everything to the kernel defaults, assuming the
36 * structure is preinitialized to 0 */
38 c
->cpu_shares
= (unsigned long) -1;
39 c
->startup_cpu_shares
= (unsigned long) -1;
40 c
->memory_limit
= (uint64_t) -1;
41 c
->blockio_weight
= (unsigned long) -1;
42 c
->startup_blockio_weight
= (unsigned long) -1;
44 c
->cpu_quota_per_sec_usec
= USEC_INFINITY
;
47 void cgroup_context_free_device_allow(CGroupContext
*c
, CGroupDeviceAllow
*a
) {
51 LIST_REMOVE(device_allow
, c
->device_allow
, a
);
56 void cgroup_context_free_blockio_device_weight(CGroupContext
*c
, CGroupBlockIODeviceWeight
*w
) {
60 LIST_REMOVE(device_weights
, c
->blockio_device_weights
, w
);
65 void cgroup_context_free_blockio_device_bandwidth(CGroupContext
*c
, CGroupBlockIODeviceBandwidth
*b
) {
69 LIST_REMOVE(device_bandwidths
, c
->blockio_device_bandwidths
, b
);
74 void cgroup_context_done(CGroupContext
*c
) {
77 while (c
->blockio_device_weights
)
78 cgroup_context_free_blockio_device_weight(c
, c
->blockio_device_weights
);
80 while (c
->blockio_device_bandwidths
)
81 cgroup_context_free_blockio_device_bandwidth(c
, c
->blockio_device_bandwidths
);
83 while (c
->device_allow
)
84 cgroup_context_free_device_allow(c
, c
->device_allow
);
87 void cgroup_context_dump(CGroupContext
*c
, FILE* f
, const char *prefix
) {
88 CGroupBlockIODeviceBandwidth
*b
;
89 CGroupBlockIODeviceWeight
*w
;
91 char u
[FORMAT_TIMESPAN_MAX
];
96 prefix
= strempty(prefix
);
99 "%sCPUAccounting=%s\n"
100 "%sBlockIOAccounting=%s\n"
101 "%sMemoryAccounting=%s\n"
103 "%sStartupCPUShares=%lu\n"
104 "%sCPUQuotaPerSecSec=%s\n"
105 "%sBlockIOWeight=%lu\n"
106 "%sStartupBlockIOWeight=%lu\n"
107 "%sMemoryLimit=%" PRIu64
"\n"
108 "%sDevicePolicy=%s\n"
110 prefix
, yes_no(c
->cpu_accounting
),
111 prefix
, yes_no(c
->blockio_accounting
),
112 prefix
, yes_no(c
->memory_accounting
),
113 prefix
, c
->cpu_shares
,
114 prefix
, c
->startup_cpu_shares
,
115 prefix
, format_timespan(u
, sizeof(u
), c
->cpu_quota_per_sec_usec
, 1),
116 prefix
, c
->blockio_weight
,
117 prefix
, c
->startup_blockio_weight
,
118 prefix
, c
->memory_limit
,
119 prefix
, cgroup_device_policy_to_string(c
->device_policy
),
120 prefix
, yes_no(c
->delegate
));
122 LIST_FOREACH(device_allow
, a
, c
->device_allow
)
124 "%sDeviceAllow=%s %s%s%s\n",
127 a
->r
? "r" : "", a
->w
? "w" : "", a
->m
? "m" : "");
129 LIST_FOREACH(device_weights
, w
, c
->blockio_device_weights
)
131 "%sBlockIODeviceWeight=%s %lu",
136 LIST_FOREACH(device_bandwidths
, b
, c
->blockio_device_bandwidths
) {
137 char buf
[FORMAT_BYTES_MAX
];
142 b
->read
? "BlockIOReadBandwidth" : "BlockIOWriteBandwidth",
144 format_bytes(buf
, sizeof(buf
), b
->bandwidth
));
148 static int lookup_blkio_device(const char *p
, dev_t
*dev
) {
157 return log_warning_errno(errno
, "Couldn't stat device %s: %m", p
);
159 if (S_ISBLK(st
.st_mode
))
161 else if (major(st
.st_dev
) != 0) {
162 /* If this is not a device node then find the block
163 * device this file is stored on */
166 /* If this is a partition, try to get the originating
168 block_get_whole_disk(*dev
, dev
);
170 log_warning("%s is not a block device and file system block device cannot be determined or is not local.", p
);
177 static int whitelist_device(const char *path
, const char *node
, const char *acc
) {
178 char buf
[2+DECIMAL_STR_MAX(dev_t
)*2+2+4];
185 if (stat(node
, &st
) < 0) {
186 log_warning("Couldn't stat device %s", node
);
190 if (!S_ISCHR(st
.st_mode
) && !S_ISBLK(st
.st_mode
)) {
191 log_warning("%s is not a device.", node
);
197 S_ISCHR(st
.st_mode
) ? 'c' : 'b',
198 major(st
.st_rdev
), minor(st
.st_rdev
),
201 r
= cg_set_attribute("devices", path
, "devices.allow", buf
);
203 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set devices.allow on %s: %s", path
, strerror(-r
));
208 static int whitelist_major(const char *path
, const char *name
, char type
, const char *acc
) {
209 _cleanup_fclose_
FILE *f
= NULL
;
216 assert(type
== 'b' || type
== 'c');
218 f
= fopen("/proc/devices", "re");
220 return log_warning_errno(errno
, "Cannot open /proc/devices to resolve %s (%c): %m", name
, type
);
222 FOREACH_LINE(line
, f
, goto fail
) {
223 char buf
[2+DECIMAL_STR_MAX(unsigned)+3+4], *p
, *w
;
228 if (type
== 'c' && streq(line
, "Character devices:")) {
233 if (type
== 'b' && streq(line
, "Block devices:")) {
248 w
= strpbrk(p
, WHITESPACE
);
253 r
= safe_atou(p
, &maj
);
260 w
+= strspn(w
, WHITESPACE
);
262 if (fnmatch(name
, w
, 0) != 0)
271 r
= cg_set_attribute("devices", path
, "devices.allow", buf
);
273 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set devices.allow on %s: %s", path
, strerror(-r
));
279 log_warning_errno(errno
, "Failed to read /proc/devices: %m");
283 void cgroup_context_apply(CGroupContext
*c
, CGroupControllerMask mask
, const char *path
, ManagerState state
) {
293 /* Some cgroup attributes are not support on the root cgroup,
294 * hence silently ignore */
295 is_root
= isempty(path
) || path_equal(path
, "/");
297 if ((mask
& CGROUP_CPU
) && !is_root
) {
298 char buf
[MAX(DECIMAL_STR_MAX(unsigned long), DECIMAL_STR_MAX(usec_t
)) + 1];
300 sprintf(buf
, "%lu\n",
301 IN_SET(state
, MANAGER_STARTING
, MANAGER_INITIALIZING
) && c
->startup_cpu_shares
!= (unsigned long) -1 ? c
->startup_cpu_shares
:
302 c
->cpu_shares
!= (unsigned long) -1 ? c
->cpu_shares
: 1024);
303 r
= cg_set_attribute("cpu", path
, "cpu.shares", buf
);
305 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set cpu.shares on %s: %s", path
, strerror(-r
));
307 sprintf(buf
, USEC_FMT
"\n", CGROUP_CPU_QUOTA_PERIOD_USEC
);
308 r
= cg_set_attribute("cpu", path
, "cpu.cfs_period_us", buf
);
310 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set cpu.cfs_period_us on %s: %s", path
, strerror(-r
));
312 if (c
->cpu_quota_per_sec_usec
!= USEC_INFINITY
) {
313 sprintf(buf
, USEC_FMT
"\n", c
->cpu_quota_per_sec_usec
* CGROUP_CPU_QUOTA_PERIOD_USEC
/ USEC_PER_SEC
);
314 r
= cg_set_attribute("cpu", path
, "cpu.cfs_quota_us", buf
);
316 r
= cg_set_attribute("cpu", path
, "cpu.cfs_quota_us", "-1");
318 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set cpu.cfs_quota_us on %s: %s", path
, strerror(-r
));
321 if (mask
& CGROUP_BLKIO
) {
322 char buf
[MAX3(DECIMAL_STR_MAX(unsigned long)+1,
323 DECIMAL_STR_MAX(dev_t
)*2+2+DECIMAL_STR_MAX(unsigned long)*1,
324 DECIMAL_STR_MAX(dev_t
)*2+2+DECIMAL_STR_MAX(uint64_t)+1)];
325 CGroupBlockIODeviceWeight
*w
;
326 CGroupBlockIODeviceBandwidth
*b
;
329 sprintf(buf
, "%lu\n", IN_SET(state
, MANAGER_STARTING
, MANAGER_INITIALIZING
) && c
->startup_blockio_weight
!= (unsigned long) -1 ? c
->startup_blockio_weight
:
330 c
->blockio_weight
!= (unsigned long) -1 ? c
->blockio_weight
: 1000);
331 r
= cg_set_attribute("blkio", path
, "blkio.weight", buf
);
333 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set blkio.weight on %s: %s", path
, strerror(-r
));
335 /* FIXME: no way to reset this list */
336 LIST_FOREACH(device_weights
, w
, c
->blockio_device_weights
) {
339 r
= lookup_blkio_device(w
->path
, &dev
);
343 sprintf(buf
, "%u:%u %lu", major(dev
), minor(dev
), w
->weight
);
344 r
= cg_set_attribute("blkio", path
, "blkio.weight_device", buf
);
346 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set blkio.weight_device on %s: %s", path
, strerror(-r
));
350 /* FIXME: no way to reset this list */
351 LIST_FOREACH(device_bandwidths
, b
, c
->blockio_device_bandwidths
) {
355 r
= lookup_blkio_device(b
->path
, &dev
);
359 a
= b
->read
? "blkio.throttle.read_bps_device" : "blkio.throttle.write_bps_device";
361 sprintf(buf
, "%u:%u %" PRIu64
"\n", major(dev
), minor(dev
), b
->bandwidth
);
362 r
= cg_set_attribute("blkio", path
, a
, buf
);
364 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set %s on %s: %s", a
, path
, strerror(-r
));
368 if (mask
& CGROUP_MEMORY
) {
369 if (c
->memory_limit
!= (uint64_t) -1) {
370 char buf
[DECIMAL_STR_MAX(uint64_t) + 1];
372 sprintf(buf
, "%" PRIu64
"\n", c
->memory_limit
);
373 r
= cg_set_attribute("memory", path
, "memory.limit_in_bytes", buf
);
375 r
= cg_set_attribute("memory", path
, "memory.limit_in_bytes", "-1");
378 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to set memory.limit_in_bytes on %s: %s", path
, strerror(-r
));
381 if ((mask
& CGROUP_DEVICE
) && !is_root
) {
382 CGroupDeviceAllow
*a
;
384 if (c
->device_allow
|| c
->device_policy
!= CGROUP_AUTO
)
385 r
= cg_set_attribute("devices", path
, "devices.deny", "a");
387 r
= cg_set_attribute("devices", path
, "devices.allow", "a");
389 log_full(r
== -ENOENT
? LOG_DEBUG
: LOG_WARNING
, "Failed to reset devices.list on %s: %s", path
, strerror(-r
));
391 if (c
->device_policy
== CGROUP_CLOSED
||
392 (c
->device_policy
== CGROUP_AUTO
&& c
->device_allow
)) {
393 static const char auto_devices
[] =
394 "/dev/null\0" "rwm\0"
395 "/dev/zero\0" "rwm\0"
396 "/dev/full\0" "rwm\0"
397 "/dev/random\0" "rwm\0"
398 "/dev/urandom\0" "rwm\0"
400 "/dev/pts/ptmx\0" "rw\0"; /* /dev/pts/ptmx may not be duplicated, but accessed */
404 NULSTR_FOREACH_PAIR(x
, y
, auto_devices
)
405 whitelist_device(path
, x
, y
);
407 whitelist_major(path
, "pts", 'c', "rw");
408 whitelist_major(path
, "kdbus", 'c', "rw");
409 whitelist_major(path
, "kdbus/*", 'c', "rw");
412 LIST_FOREACH(device_allow
, a
, c
->device_allow
) {
428 if (startswith(a
->path
, "/dev/"))
429 whitelist_device(path
, a
->path
, acc
);
430 else if (startswith(a
->path
, "block-"))
431 whitelist_major(path
, a
->path
+ 6, 'b', acc
);
432 else if (startswith(a
->path
, "char-"))
433 whitelist_major(path
, a
->path
+ 5, 'c', acc
);
435 log_debug("Ignoring device %s while writing cgroup attribute.", a
->path
);
440 CGroupControllerMask
cgroup_context_get_mask(CGroupContext
*c
) {
441 CGroupControllerMask mask
= 0;
443 /* Figure out which controllers we need */
445 if (c
->cpu_accounting
||
446 c
->cpu_shares
!= (unsigned long) -1 ||
447 c
->startup_cpu_shares
!= (unsigned long) -1 ||
448 c
->cpu_quota_per_sec_usec
!= USEC_INFINITY
)
449 mask
|= CGROUP_CPUACCT
| CGROUP_CPU
;
451 if (c
->blockio_accounting
||
452 c
->blockio_weight
!= (unsigned long) -1 ||
453 c
->startup_blockio_weight
!= (unsigned long) -1 ||
454 c
->blockio_device_weights
||
455 c
->blockio_device_bandwidths
)
456 mask
|= CGROUP_BLKIO
;
458 if (c
->memory_accounting
||
459 c
->memory_limit
!= (uint64_t) -1)
460 mask
|= CGROUP_MEMORY
;
462 if (c
->device_allow
||
463 c
->device_policy
!= CGROUP_AUTO
)
464 mask
|= CGROUP_DEVICE
;
469 CGroupControllerMask
unit_get_cgroup_mask(Unit
*u
) {
472 c
= unit_get_cgroup_context(u
);
476 /* If delegation is turned on, then turn on all cgroups,
477 * unless the process we fork into it is known to drop
478 * privileges anyway, and shouldn't get access to the
479 * controllers anyway. */
484 e
= unit_get_exec_context(u
);
485 if (!e
|| exec_context_maintains_privileges(e
))
486 return _CGROUP_CONTROLLER_MASK_ALL
;
489 return cgroup_context_get_mask(c
);
492 CGroupControllerMask
unit_get_members_mask(Unit
*u
) {
495 if (u
->cgroup_members_mask_valid
)
496 return u
->cgroup_members_mask
;
498 u
->cgroup_members_mask
= 0;
500 if (u
->type
== UNIT_SLICE
) {
504 SET_FOREACH(member
, u
->dependencies
[UNIT_BEFORE
], i
) {
509 if (UNIT_DEREF(member
->slice
) != u
)
512 u
->cgroup_members_mask
|=
513 unit_get_cgroup_mask(member
) |
514 unit_get_members_mask(member
);
518 u
->cgroup_members_mask_valid
= true;
519 return u
->cgroup_members_mask
;
522 CGroupControllerMask
unit_get_siblings_mask(Unit
*u
) {
525 if (UNIT_ISSET(u
->slice
))
526 return unit_get_members_mask(UNIT_DEREF(u
->slice
));
528 return unit_get_cgroup_mask(u
) | unit_get_members_mask(u
);
531 CGroupControllerMask
unit_get_target_mask(Unit
*u
) {
532 CGroupControllerMask mask
;
534 mask
= unit_get_cgroup_mask(u
) | unit_get_members_mask(u
) | unit_get_siblings_mask(u
);
535 mask
&= u
->manager
->cgroup_supported
;
540 /* Recurse from a unit up through its containing slices, propagating
541 * mask bits upward. A unit is also member of itself. */
542 void unit_update_cgroup_members_masks(Unit
*u
) {
543 CGroupControllerMask m
;
548 /* Calculate subtree mask */
549 m
= unit_get_cgroup_mask(u
) | unit_get_members_mask(u
);
551 /* See if anything changed from the previous invocation. If
552 * not, we're done. */
553 if (u
->cgroup_subtree_mask_valid
&& m
== u
->cgroup_subtree_mask
)
557 u
->cgroup_subtree_mask_valid
&&
558 ((m
& ~u
->cgroup_subtree_mask
) != 0) &&
559 ((~m
& u
->cgroup_subtree_mask
) == 0);
561 u
->cgroup_subtree_mask
= m
;
562 u
->cgroup_subtree_mask_valid
= true;
564 if (UNIT_ISSET(u
->slice
)) {
565 Unit
*s
= UNIT_DEREF(u
->slice
);
568 /* There's more set now than before. We
569 * propagate the new mask to the parent's mask
570 * (not caring if it actually was valid or
573 s
->cgroup_members_mask
|= m
;
576 /* There's less set now than before (or we
577 * don't know), we need to recalculate
578 * everything, so let's invalidate the
579 * parent's members mask */
581 s
->cgroup_members_mask_valid
= false;
583 /* And now make sure that this change also hits our
585 unit_update_cgroup_members_masks(s
);
589 static const char *migrate_callback(CGroupControllerMask mask
, void *userdata
) {
596 if (u
->cgroup_path
&&
597 u
->cgroup_realized
&&
598 (u
->cgroup_realized_mask
& mask
) == mask
)
599 return u
->cgroup_path
;
601 u
= UNIT_DEREF(u
->slice
);
607 static int unit_create_cgroups(Unit
*u
, CGroupControllerMask mask
) {
613 c
= unit_get_cgroup_context(u
);
617 if (!u
->cgroup_path
) {
618 _cleanup_free_
char *path
= NULL
;
620 path
= unit_default_cgroup_path(u
);
624 r
= hashmap_put(u
->manager
->cgroup_unit
, path
, u
);
626 log_error(r
== -EEXIST
? "cgroup %s exists already: %s" : "hashmap_put failed for %s: %s", path
, strerror(-r
));
630 u
->cgroup_path
= path
;
635 /* First, create our own group */
636 r
= cg_create_everywhere(u
->manager
->cgroup_supported
, mask
, u
->cgroup_path
);
638 return log_error_errno(r
, "Failed to create cgroup %s: %m", u
->cgroup_path
);
640 /* Keep track that this is now realized */
641 u
->cgroup_realized
= true;
642 u
->cgroup_realized_mask
= mask
;
644 if (u
->type
!= UNIT_SLICE
&& !c
->delegate
) {
646 /* Then, possibly move things over, but not if
647 * subgroups may contain processes, which is the case
648 * for slice and delegation units. */
649 r
= cg_migrate_everywhere(u
->manager
->cgroup_supported
, u
->cgroup_path
, u
->cgroup_path
, migrate_callback
, u
);
651 log_warning_errno(r
, "Failed to migrate cgroup from to %s: %m", u
->cgroup_path
);
657 int unit_attach_pids_to_cgroup(Unit
*u
) {
661 r
= unit_realize_cgroup(u
);
665 r
= cg_attach_many_everywhere(u
->manager
->cgroup_supported
, u
->cgroup_path
, u
->pids
, migrate_callback
, u
);
672 static bool unit_has_mask_realized(Unit
*u
, CGroupControllerMask mask
) {
675 return u
->cgroup_realized
&& u
->cgroup_realized_mask
== mask
;
678 /* Check if necessary controllers and attributes for a unit are in place.
681 * If not, create paths, move processes over, and set attributes.
683 * Returns 0 on success and < 0 on failure. */
684 static int unit_realize_cgroup_now(Unit
*u
, ManagerState state
) {
685 CGroupControllerMask mask
;
690 if (u
->in_cgroup_queue
) {
691 LIST_REMOVE(cgroup_queue
, u
->manager
->cgroup_queue
, u
);
692 u
->in_cgroup_queue
= false;
695 mask
= unit_get_target_mask(u
);
697 if (unit_has_mask_realized(u
, mask
))
700 /* First, realize parents */
701 if (UNIT_ISSET(u
->slice
)) {
702 r
= unit_realize_cgroup_now(UNIT_DEREF(u
->slice
), state
);
707 /* And then do the real work */
708 r
= unit_create_cgroups(u
, mask
);
712 /* Finally, apply the necessary attributes. */
713 cgroup_context_apply(unit_get_cgroup_context(u
), mask
, u
->cgroup_path
, state
);
718 static void unit_add_to_cgroup_queue(Unit
*u
) {
720 if (u
->in_cgroup_queue
)
723 LIST_PREPEND(cgroup_queue
, u
->manager
->cgroup_queue
, u
);
724 u
->in_cgroup_queue
= true;
727 unsigned manager_dispatch_cgroup_queue(Manager
*m
) {
733 state
= manager_state(m
);
735 while ((i
= m
->cgroup_queue
)) {
736 assert(i
->in_cgroup_queue
);
738 r
= unit_realize_cgroup_now(i
, state
);
740 log_warning_errno(r
, "Failed to realize cgroups for queued unit %s: %m", i
->id
);
748 static void unit_queue_siblings(Unit
*u
) {
751 /* This adds the siblings of the specified unit and the
752 * siblings of all parent units to the cgroup queue. (But
753 * neither the specified unit itself nor the parents.) */
755 while ((slice
= UNIT_DEREF(u
->slice
))) {
759 SET_FOREACH(m
, slice
->dependencies
[UNIT_BEFORE
], i
) {
763 /* Skip units that have a dependency on the slice
764 * but aren't actually in it. */
765 if (UNIT_DEREF(m
->slice
) != slice
)
768 /* No point in doing cgroup application for units
769 * without active processes. */
770 if (UNIT_IS_INACTIVE_OR_FAILED(unit_active_state(m
)))
773 /* If the unit doesn't need any new controllers
774 * and has current ones realized, it doesn't need
776 if (unit_has_mask_realized(m
, unit_get_target_mask(m
)))
779 unit_add_to_cgroup_queue(m
);
786 int unit_realize_cgroup(Unit
*u
) {
791 c
= unit_get_cgroup_context(u
);
795 /* So, here's the deal: when realizing the cgroups for this
796 * unit, we need to first create all parents, but there's more
797 * actually: for the weight-based controllers we also need to
798 * make sure that all our siblings (i.e. units that are in the
799 * same slice as we are) have cgroups, too. Otherwise, things
800 * would become very uneven as each of their processes would
801 * get as much resources as all our group together. This call
802 * will synchronously create the parent cgroups, but will
803 * defer work on the siblings to the next event loop
806 /* Add all sibling slices to the cgroup queue. */
807 unit_queue_siblings(u
);
809 /* And realize this one now (and apply the values) */
810 return unit_realize_cgroup_now(u
, manager_state(u
->manager
));
813 void unit_destroy_cgroup_if_empty(Unit
*u
) {
821 r
= cg_trim_everywhere(u
->manager
->cgroup_supported
, u
->cgroup_path
, !unit_has_name(u
, SPECIAL_ROOT_SLICE
));
823 log_debug_errno(r
, "Failed to destroy cgroup %s: %m", u
->cgroup_path
);
827 hashmap_remove(u
->manager
->cgroup_unit
, u
->cgroup_path
);
829 free(u
->cgroup_path
);
830 u
->cgroup_path
= NULL
;
831 u
->cgroup_realized
= false;
832 u
->cgroup_realized_mask
= 0;
835 pid_t
unit_search_main_pid(Unit
*u
) {
836 _cleanup_fclose_
FILE *f
= NULL
;
837 pid_t pid
= 0, npid
, mypid
;
844 if (cg_enumerate_processes(SYSTEMD_CGROUP_CONTROLLER
, u
->cgroup_path
, &f
) < 0)
848 while (cg_read_pid(f
, &npid
) > 0) {
854 /* Ignore processes that aren't our kids */
855 if (get_parent_of_pid(npid
, &ppid
) >= 0 && ppid
!= mypid
)
859 /* Dang, there's more than one daemonized PID
860 in this group, so we don't know what process
861 is the main process. */
872 int manager_setup_cgroup(Manager
*m
) {
873 _cleanup_free_
char *path
= NULL
;
878 /* 1. Determine hierarchy */
879 free(m
->cgroup_root
);
880 m
->cgroup_root
= NULL
;
882 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, 0, &m
->cgroup_root
);
884 return log_error_errno(r
, "Cannot determine cgroup we are running in: %m");
886 /* LEGACY: Already in /system.slice? If so, let's cut this
887 * off. This is to support live upgrades from older systemd
888 * versions where PID 1 was moved there. */
889 if (m
->running_as
== SYSTEMD_SYSTEM
) {
892 e
= endswith(m
->cgroup_root
, "/" SPECIAL_SYSTEM_SLICE
);
894 e
= endswith(m
->cgroup_root
, "/system");
899 /* And make sure to store away the root value without trailing
900 * slash, even for the root dir, so that we can easily prepend
902 if (streq(m
->cgroup_root
, "/"))
903 m
->cgroup_root
[0] = 0;
906 r
= cg_get_path(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, NULL
, &path
);
908 return log_error_errno(r
, "Cannot find cgroup mount point: %m");
910 log_debug("Using cgroup controller " SYSTEMD_CGROUP_CONTROLLER
". File system hierarchy is at %s.", path
);
913 /* 3. Install agent */
914 if (m
->running_as
== SYSTEMD_SYSTEM
) {
915 r
= cg_install_release_agent(SYSTEMD_CGROUP_CONTROLLER
, SYSTEMD_CGROUP_AGENT_PATH
);
917 log_warning_errno(r
, "Failed to install release agent, ignoring: %m");
919 log_debug("Installed release agent.");
921 log_debug("Release agent already installed.");
924 /* 4. Make sure we are in the root cgroup */
925 r
= cg_create_and_attach(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, 0);
927 return log_error_errno(r
, "Failed to create root cgroup hierarchy: %m");
929 /* 5. And pin it, so that it cannot be unmounted */
930 safe_close(m
->pin_cgroupfs_fd
);
932 m
->pin_cgroupfs_fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_DIRECTORY
|O_NOCTTY
|O_NONBLOCK
);
933 if (m
->pin_cgroupfs_fd
< 0)
934 return log_error_errno(errno
, "Failed to open pin file: %m");
936 /* 6. Always enable hierarchial support if it exists... */
937 cg_set_attribute("memory", "/", "memory.use_hierarchy", "1");
940 /* 7. Figure out which controllers are supported */
941 m
->cgroup_supported
= cg_mask_supported();
946 void manager_shutdown_cgroup(Manager
*m
, bool delete) {
949 /* We can't really delete the group, since we are in it. But
951 if (delete && m
->cgroup_root
)
952 cg_trim(SYSTEMD_CGROUP_CONTROLLER
, m
->cgroup_root
, false);
954 m
->pin_cgroupfs_fd
= safe_close(m
->pin_cgroupfs_fd
);
956 free(m
->cgroup_root
);
957 m
->cgroup_root
= NULL
;
960 Unit
* manager_get_unit_by_cgroup(Manager
*m
, const char *cgroup
) {
967 u
= hashmap_get(m
->cgroup_unit
, cgroup
);
981 u
= hashmap_get(m
->cgroup_unit
, p
);
987 Unit
*manager_get_unit_by_pid(Manager
*m
, pid_t pid
) {
988 _cleanup_free_
char *cgroup
= NULL
;
996 r
= cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER
, pid
, &cgroup
);
1000 return manager_get_unit_by_cgroup(m
, cgroup
);
1003 int manager_notify_cgroup_empty(Manager
*m
, const char *cgroup
) {
1010 u
= manager_get_unit_by_cgroup(m
, cgroup
);
1012 r
= cg_is_empty_recursive(SYSTEMD_CGROUP_CONTROLLER
, u
->cgroup_path
, true);
1014 if (UNIT_VTABLE(u
)->notify_cgroup_empty
)
1015 UNIT_VTABLE(u
)->notify_cgroup_empty(u
);
1017 unit_add_to_gc_queue(u
);
1024 static const char* const cgroup_device_policy_table
[_CGROUP_DEVICE_POLICY_MAX
] = {
1025 [CGROUP_AUTO
] = "auto",
1026 [CGROUP_CLOSED
] = "closed",
1027 [CGROUP_STRICT
] = "strict",
1030 DEFINE_STRING_TABLE_LOOKUP(cgroup_device_policy
, CGroupDevicePolicy
);