src/basic/cgroup-util.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2010 Lennart Poettering
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 #include <dirent.h>
  21 #include <errno.h>
  22 #include <ftw.h>
  23 #include <limits.h>
  24 #include <signal.h>
  25 #include <stddef.h>
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include <sys/stat.h>
  29 #include <sys/statfs.h>
  30 #include <sys/types.h>
  31 #include <unistd.h>
  32
  33 #include "alloc-util.h"
  34 #include "cgroup-util.h"
  35 #include "def.h"
  36 #include "dirent-util.h"
  37 #include "extract-word.h"
  38 #include "fd-util.h"
  39 #include "fileio.h"
  40 #include "formats-util.h"
  41 #include "fs-util.h"
  42 #include "log.h"
  43 #include "login-util.h"
  44 #include "macro.h"
  45 #include "missing.h"
  46 #include "mkdir.h"
  47 #include "parse-util.h"
  48 #include "path-util.h"
  49 #include "proc-cmdline.h"
  50 #include "process-util.h"
  51 #include "set.h"
  52 #include "special.h"
  53 #include "stat-util.h"
  54 #include "stdio-util.h"
  55 #include "string-table.h"
  56 #include "string-util.h"
  57 #include "unit-name.h"
  58 #include "user-util.h"
  59
  60 int cg_enumerate_processes(const char *controller, const char *path, FILE **_f) {
  61         _cleanup_free_ char *fs = NULL;
  62         FILE *f;
  63         int r;
  64
  65         assert(_f);
  66
  67         r = cg_get_path(controller, path, "cgroup.procs", &fs);
  68         if (r < 0)
  69                 return r;
  70
  71         f = fopen(fs, "re");
  72         if (!f)
  73                 return -errno;
  74
  75         *_f = f;
  76         return 0;
  77 }
  78
  79 int cg_read_pid(FILE *f, pid_t *_pid) {
  80         unsigned long ul;
  81
  82         /* Note that the cgroup.procs might contain duplicates! See
  83          * cgroups.txt for details. */
  84
  85         assert(f);
  86         assert(_pid);
  87
  88         errno = 0;
  89         if (fscanf(f, "%lu", &ul) != 1) {
  90
  91                 if (feof(f))
  92                         return 0;
  93
  94                 return errno > 0 ? -errno : -EIO;
  95         }
  96
  97         if (ul <= 0)
  98                 return -EIO;
  99
 100         *_pid = (pid_t) ul;
 101         return 1;
 102 }
 103
 104 int cg_read_event(const char *controller, const char *path, const char *event,
 105                   char **val)
 106 {
 107         _cleanup_free_ char *events = NULL, *content = NULL;
 108         char *p, *line;
 109         int r;
 110
 111         r = cg_get_path(controller, path, "cgroup.events", &events);
 112         if (r < 0)
 113                 return r;
 114
 115         r = read_full_file(events, &content, NULL);
 116         if (r < 0)
 117                 return r;
 118
 119         p = content;
 120         while ((line = strsep(&p, "\n"))) {
 121                 char *key;
 122
 123                 key = strsep(&line, " ");
 124                 if (!key || !line)
 125                         return -EINVAL;
 126
 127                 if (strcmp(key, event))
 128                         continue;
 129
 130                 *val = strdup(line);
 131                 return 0;
 132         }
 133
 134         return -ENOENT;
 135 }
 136
 137 int cg_enumerate_subgroups(const char *controller, const char *path, DIR **_d) {
 138         _cleanup_free_ char *fs = NULL;
 139         int r;
 140         DIR *d;
 141
 142         assert(_d);
 143
 144         /* This is not recursive! */
 145
 146         r = cg_get_path(controller, path, NULL, &fs);
 147         if (r < 0)
 148                 return r;
 149
 150         d = opendir(fs);
 151         if (!d)
 152                 return -errno;
 153
 154         *_d = d;
 155         return 0;
 156 }
 157
 158 int cg_read_subgroup(DIR *d, char **fn) {
 159         struct dirent *de;
 160
 161         assert(d);
 162         assert(fn);
 163
 164         FOREACH_DIRENT_ALL(de, d, return -errno) {
 165                 char *b;
 166
 167                 if (de->d_type != DT_DIR)
 168                         continue;
 169
 170                 if (streq(de->d_name, ".") ||
 171                     streq(de->d_name, ".."))
 172                         continue;
 173
 174                 b = strdup(de->d_name);
 175                 if (!b)
 176                         return -ENOMEM;
 177
 178                 *fn = b;
 179                 return 1;
 180         }
 181
 182         return 0;
 183 }
 184
 185 int cg_rmdir(const char *controller, const char *path) {
 186         _cleanup_free_ char *p = NULL;
 187         int r;
 188
 189         r = cg_get_path(controller, path, NULL, &p);
 190         if (r < 0)
 191                 return r;
 192
 193         r = rmdir(p);
 194         if (r < 0 && errno != ENOENT)
 195                 return -errno;
 196
 197         return 0;
 198 }
 199
 200 int cg_kill(
 201                 const char *controller,
 202                 const char *path,
 203                 int sig,
 204                 CGroupFlags flags,
 205                 Set *s,
 206                 cg_kill_log_func_t log_kill,
 207                 void *userdata) {
 208
 209         _cleanup_set_free_ Set *allocated_set = NULL;
 210         bool done = false;
 211         int r, ret = 0;
 212         pid_t my_pid;
 213
 214         assert(sig >= 0);
 215
 216          /* Don't send SIGCONT twice. Also, SIGKILL always works even when process is suspended, hence don't send
 217           * SIGCONT on SIGKILL. */
 218         if (IN_SET(sig, SIGCONT, SIGKILL))
 219                 flags &= ~CGROUP_SIGCONT;
 220
 221         /* This goes through the tasks list and kills them all. This
 222          * is repeated until no further processes are added to the
 223          * tasks list, to properly handle forking processes */
 224
 225         if (!s) {
 226                 s = allocated_set = set_new(NULL);
 227                 if (!s)
 228                         return -ENOMEM;
 229         }
 230
 231         my_pid = getpid();
 232
 233         do {
 234                 _cleanup_fclose_ FILE *f = NULL;
 235                 pid_t pid = 0;
 236                 done = true;
 237
 238                 r = cg_enumerate_processes(controller, path, &f);
 239                 if (r < 0) {
 240                         if (ret >= 0 && r != -ENOENT)
 241                                 return r;
 242
 243                         return ret;
 244                 }
 245
 246                 while ((r = cg_read_pid(f, &pid)) > 0) {
 247
 248                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 249                                 continue;
 250
 251                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 252                                 continue;
 253
 254                         if (log_kill)
 255                                 log_kill(pid, sig, userdata);
 256
 257                         /* If we haven't killed this process yet, kill
 258                          * it */
 259                         if (kill(pid, sig) < 0) {
 260                                 if (ret >= 0 && errno != ESRCH)
 261                                         ret = -errno;
 262                         } else {
 263                                 if (flags & CGROUP_SIGCONT)
 264                                         (void) kill(pid, SIGCONT);
 265
 266                                 if (ret == 0)
 267                                         ret = 1;
 268                         }
 269
 270                         done = false;
 271
 272                         r = set_put(s, PID_TO_PTR(pid));
 273                         if (r < 0) {
 274                                 if (ret >= 0)
 275                                         return r;
 276
 277                                 return ret;
 278                         }
 279                 }
 280
 281                 if (r < 0) {
 282                         if (ret >= 0)
 283                                 return r;
 284
 285                         return ret;
 286                 }
 287
 288                 /* To avoid racing against processes which fork
 289                  * quicker than we can kill them we repeat this until
 290                  * no new pids need to be killed. */
 291
 292         } while (!done);
 293
 294         return ret;
 295 }
 296
 297 int cg_kill_recursive(
 298                 const char *controller,
 299                 const char *path,
 300                 int sig,
 301                 CGroupFlags flags,
 302                 Set *s,
 303                 cg_kill_log_func_t log_kill,
 304                 void *userdata) {
 305
 306         _cleanup_set_free_ Set *allocated_set = NULL;
 307         _cleanup_closedir_ DIR *d = NULL;
 308         int r, ret;
 309         char *fn;
 310
 311         assert(path);
 312         assert(sig >= 0);
 313
 314         if (!s) {
 315                 s = allocated_set = set_new(NULL);
 316                 if (!s)
 317                         return -ENOMEM;
 318         }
 319
 320         ret = cg_kill(controller, path, sig, flags, s, log_kill, userdata);
 321
 322         r = cg_enumerate_subgroups(controller, path, &d);
 323         if (r < 0) {
 324                 if (ret >= 0 && r != -ENOENT)
 325                         return r;
 326
 327                 return ret;
 328         }
 329
 330         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 331                 _cleanup_free_ char *p = NULL;
 332
 333                 p = strjoin(path, "/", fn, NULL);
 334                 free(fn);
 335                 if (!p)
 336                         return -ENOMEM;
 337
 338                 r = cg_kill_recursive(controller, p, sig, flags, s, log_kill, userdata);
 339                 if (r != 0 && ret >= 0)
 340                         ret = r;
 341         }
 342         if (ret >= 0 && r < 0)
 343                 ret = r;
 344
 345         if (flags & CGROUP_REMOVE) {
 346                 r = cg_rmdir(controller, path);
 347                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 348                         return r;
 349         }
 350
 351         return ret;
 352 }
 353
 354 int cg_migrate(
 355                 const char *cfrom,
 356                 const char *pfrom,
 357                 const char *cto,
 358                 const char *pto,
 359                 CGroupFlags flags) {
 360
 361         bool done = false;
 362         _cleanup_set_free_ Set *s = NULL;
 363         int r, ret = 0;
 364         pid_t my_pid;
 365
 366         assert(cfrom);
 367         assert(pfrom);
 368         assert(cto);
 369         assert(pto);
 370
 371         s = set_new(NULL);
 372         if (!s)
 373                 return -ENOMEM;
 374
 375         my_pid = getpid();
 376
 377         do {
 378                 _cleanup_fclose_ FILE *f = NULL;
 379                 pid_t pid = 0;
 380                 done = true;
 381
 382                 r = cg_enumerate_processes(cfrom, pfrom, &f);
 383                 if (r < 0) {
 384                         if (ret >= 0 && r != -ENOENT)
 385                                 return r;
 386
 387                         return ret;
 388                 }
 389
 390                 while ((r = cg_read_pid(f, &pid)) > 0) {
 391
 392                         /* This might do weird stuff if we aren't a
 393                          * single-threaded program. However, we
 394                          * luckily know we are not */
 395                         if ((flags & CGROUP_IGNORE_SELF) && pid == my_pid)
 396                                 continue;
 397
 398                         if (set_get(s, PID_TO_PTR(pid)) == PID_TO_PTR(pid))
 399                                 continue;
 400
 401                         /* Ignore kernel threads. Since they can only
 402                          * exist in the root cgroup, we only check for
 403                          * them there. */
 404                         if (cfrom &&
 405                             (isempty(pfrom) || path_equal(pfrom, "/")) &&
 406                             is_kernel_thread(pid) > 0)
 407                                 continue;
 408
 409                         r = cg_attach(cto, pto, pid);
 410                         if (r < 0) {
 411                                 if (ret >= 0 && r != -ESRCH)
 412                                         ret = r;
 413                         } else if (ret == 0)
 414                                 ret = 1;
 415
 416                         done = false;
 417
 418                         r = set_put(s, PID_TO_PTR(pid));
 419                         if (r < 0) {
 420                                 if (ret >= 0)
 421                                         return r;
 422
 423                                 return ret;
 424                         }
 425                 }
 426
 427                 if (r < 0) {
 428                         if (ret >= 0)
 429                                 return r;
 430
 431                         return ret;
 432                 }
 433         } while (!done);
 434
 435         return ret;
 436 }
 437
 438 int cg_migrate_recursive(
 439                 const char *cfrom,
 440                 const char *pfrom,
 441                 const char *cto,
 442                 const char *pto,
 443                 CGroupFlags flags) {
 444
 445         _cleanup_closedir_ DIR *d = NULL;
 446         int r, ret = 0;
 447         char *fn;
 448
 449         assert(cfrom);
 450         assert(pfrom);
 451         assert(cto);
 452         assert(pto);
 453
 454         ret = cg_migrate(cfrom, pfrom, cto, pto, flags);
 455
 456         r = cg_enumerate_subgroups(cfrom, pfrom, &d);
 457         if (r < 0) {
 458                 if (ret >= 0 && r != -ENOENT)
 459                         return r;
 460
 461                 return ret;
 462         }
 463
 464         while ((r = cg_read_subgroup(d, &fn)) > 0) {
 465                 _cleanup_free_ char *p = NULL;
 466
 467                 p = strjoin(pfrom, "/", fn, NULL);
 468                 free(fn);
 469                 if (!p)
 470                         return -ENOMEM;
 471
 472                 r = cg_migrate_recursive(cfrom, p, cto, pto, flags);
 473                 if (r != 0 && ret >= 0)
 474                         ret = r;
 475         }
 476
 477         if (r < 0 && ret >= 0)
 478                 ret = r;
 479
 480         if (flags & CGROUP_REMOVE) {
 481                 r = cg_rmdir(cfrom, pfrom);
 482                 if (r < 0 && ret >= 0 && r != -ENOENT && r != -EBUSY)
 483                         return r;
 484         }
 485
 486         return ret;
 487 }
 488
 489 int cg_migrate_recursive_fallback(
 490                 const char *cfrom,
 491                 const char *pfrom,
 492                 const char *cto,
 493                 const char *pto,
 494                 CGroupFlags flags) {
 495
 496         int r;
 497
 498         assert(cfrom);
 499         assert(pfrom);
 500         assert(cto);
 501         assert(pto);
 502
 503         r = cg_migrate_recursive(cfrom, pfrom, cto, pto, flags);
 504         if (r < 0) {
 505                 char prefix[strlen(pto) + 1];
 506
 507                 /* This didn't work? Then let's try all prefixes of the destination */
 508
 509                 PATH_FOREACH_PREFIX(prefix, pto) {
 510                         int q;
 511
 512                         q = cg_migrate_recursive(cfrom, pfrom, cto, prefix, flags);
 513                         if (q >= 0)
 514                                 return q;
 515                 }
 516         }
 517
 518         return r;
 519 }
 520
 521 static const char *controller_to_dirname(const char *controller) {
 522         const char *e;
 523
 524         assert(controller);
 525
 526         /* Converts a controller name to the directory name below
 527          * /sys/fs/cgroup/ we want to mount it to. Effectively, this
 528          * just cuts off the name= prefixed used for named
 529          * hierarchies, if it is specified. */
 530
 531         e = startswith(controller, "name=");
 532         if (e)
 533                 return e;
 534
 535         return controller;
 536 }
 537
 538 static int join_path_legacy(const char *controller, const char *path, const char *suffix, char **fs) {
 539         const char *dn;
 540         char *t = NULL;
 541
 542         assert(fs);
 543         assert(controller);
 544
 545         dn = controller_to_dirname(controller);
 546
 547         if (isempty(path) && isempty(suffix))
 548                 t = strappend("/sys/fs/cgroup/", dn);
 549         else if (isempty(path))
 550                 t = strjoin("/sys/fs/cgroup/", dn, "/", suffix, NULL);
 551         else if (isempty(suffix))
 552                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, NULL);
 553         else
 554                 t = strjoin("/sys/fs/cgroup/", dn, "/", path, "/", suffix, NULL);
 555         if (!t)
 556                 return -ENOMEM;
 557
 558         *fs = t;
 559         return 0;
 560 }
 561
 562 static int join_path_unified(const char *path, const char *suffix, char **fs) {
 563         char *t;
 564
 565         assert(fs);
 566
 567         if (isempty(path) && isempty(suffix))
 568                 t = strdup("/sys/fs/cgroup");
 569         else if (isempty(path))
 570                 t = strappend("/sys/fs/cgroup/", suffix);
 571         else if (isempty(suffix))
 572                 t = strappend("/sys/fs/cgroup/", path);
 573         else
 574                 t = strjoin("/sys/fs/cgroup/", path, "/", suffix, NULL);
 575         if (!t)
 576                 return -ENOMEM;
 577
 578         *fs = t;
 579         return 0;
 580 }
 581
 582 int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs) {
 583         int unified, r;
 584
 585         assert(fs);
 586
 587         if (!controller) {
 588                 char *t;
 589
 590                 /* If no controller is specified, we return the path
 591                  * *below* the controllers, without any prefix. */
 592
 593                 if (!path && !suffix)
 594                         return -EINVAL;
 595
 596                 if (!suffix)
 597                         t = strdup(path);
 598                 else if (!path)
 599                         t = strdup(suffix);
 600                 else
 601                         t = strjoin(path, "/", suffix, NULL);
 602                 if (!t)
 603                         return -ENOMEM;
 604
 605                 *fs = path_kill_slashes(t);
 606                 return 0;
 607         }
 608
 609         if (!cg_controller_is_valid(controller))
 610                 return -EINVAL;
 611
 612         unified = cg_unified();
 613         if (unified < 0)
 614                 return unified;
 615
 616         if (unified > 0)
 617                 r = join_path_unified(path, suffix, fs);
 618         else
 619                 r = join_path_legacy(controller, path, suffix, fs);
 620         if (r < 0)
 621                 return r;
 622
 623         path_kill_slashes(*fs);
 624         return 0;
 625 }
 626
 627 static int controller_is_accessible(const char *controller) {
 628         int unified;
 629
 630         assert(controller);
 631
 632         /* Checks whether a specific controller is accessible,
 633          * i.e. its hierarchy mounted. In the unified hierarchy all
 634          * controllers are considered accessible, except for the named
 635          * hierarchies */
 636
 637         if (!cg_controller_is_valid(controller))
 638                 return -EINVAL;
 639
 640         unified = cg_unified();
 641         if (unified < 0)
 642                 return unified;
 643         if (unified > 0) {
 644                 /* We don't support named hierarchies if we are using
 645                  * the unified hierarchy. */
 646
 647                 if (streq(controller, SYSTEMD_CGROUP_CONTROLLER))
 648                         return 0;
 649
 650                 if (startswith(controller, "name="))
 651                         return -EOPNOTSUPP;
 652
 653         } else {
 654                 const char *cc, *dn;
 655
 656                 dn = controller_to_dirname(controller);
 657                 cc = strjoina("/sys/fs/cgroup/", dn);
 658
 659                 if (laccess(cc, F_OK) < 0)
 660                         return -errno;
 661         }
 662
 663         return 0;
 664 }
 665
 666 int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs) {
 667         int r;
 668
 669         assert(controller);
 670         assert(fs);
 671
 672         /* Check if the specified controller is actually accessible */
 673         r = controller_is_accessible(controller);
 674         if (r < 0)
 675                 return r;
 676
 677         return cg_get_path(controller, path, suffix, fs);
 678 }
 679
 680 static int trim_cb(const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) {
 681         assert(path);
 682         assert(sb);
 683         assert(ftwbuf);
 684
 685         if (typeflag != FTW_DP)
 686                 return 0;
 687
 688         if (ftwbuf->level < 1)
 689                 return 0;
 690
 691         (void) rmdir(path);
 692         return 0;
 693 }
 694
 695 int cg_trim(const char *controller, const char *path, bool delete_root) {
 696         _cleanup_free_ char *fs = NULL;
 697         int r = 0;
 698
 699         assert(path);
 700
 701         r = cg_get_path(controller, path, NULL, &fs);
 702         if (r < 0)
 703                 return r;
 704
 705         errno = 0;
 706         if (nftw(fs, trim_cb, 64, FTW_DEPTH|FTW_MOUNT|FTW_PHYS) != 0) {
 707                 if (errno == ENOENT)
 708                         r = 0;
 709                 else if (errno > 0)
 710                         r = -errno;
 711                 else
 712                         r = -EIO;
 713         }
 714
 715         if (delete_root) {
 716                 if (rmdir(fs) < 0 && errno != ENOENT)
 717                         return -errno;
 718         }
 719
 720         return r;
 721 }
 722
 723 int cg_create(const char *controller, const char *path) {
 724         _cleanup_free_ char *fs = NULL;
 725         int r;
 726
 727         r = cg_get_path_and_check(controller, path, NULL, &fs);
 728         if (r < 0)
 729                 return r;
 730
 731         r = mkdir_parents(fs, 0755);
 732         if (r < 0)
 733                 return r;
 734
 735         if (mkdir(fs, 0755) < 0) {
 736
 737                 if (errno == EEXIST)
 738                         return 0;
 739
 740                 return -errno;
 741         }
 742
 743         return 1;
 744 }
 745
 746 int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
 747         int r, q;
 748
 749         assert(pid >= 0);
 750
 751         r = cg_create(controller, path);
 752         if (r < 0)
 753                 return r;
 754
 755         q = cg_attach(controller, path, pid);
 756         if (q < 0)
 757                 return q;
 758
 759         /* This does not remove the cgroup on failure */
 760         return r;
 761 }
 762
 763 int cg_attach(const char *controller, const char *path, pid_t pid) {
 764         _cleanup_free_ char *fs = NULL;
 765         char c[DECIMAL_STR_MAX(pid_t) + 2];
 766         int r;
 767
 768         assert(path);
 769         assert(pid >= 0);
 770
 771         r = cg_get_path_and_check(controller, path, "cgroup.procs", &fs);
 772         if (r < 0)
 773                 return r;
 774
 775         if (pid == 0)
 776                 pid = getpid();
 777
 778         xsprintf(c, PID_FMT "\n", pid);
 779
 780         return write_string_file(fs, c, 0);
 781 }
 782
 783 int cg_attach_fallback(const char *controller, const char *path, pid_t pid) {
 784         int r;
 785
 786         assert(controller);
 787         assert(path);
 788         assert(pid >= 0);
 789
 790         r = cg_attach(controller, path, pid);
 791         if (r < 0) {
 792                 char prefix[strlen(path) + 1];
 793
 794                 /* This didn't work? Then let's try all prefixes of
 795                  * the destination */
 796
 797                 PATH_FOREACH_PREFIX(prefix, path) {
 798                         int q;
 799
 800                         q = cg_attach(controller, prefix, pid);
 801                         if (q >= 0)
 802                                 return q;
 803                 }
 804         }
 805
 806         return r;
 807 }
 808
 809 int cg_set_group_access(
 810                 const char *controller,
 811                 const char *path,
 812                 mode_t mode,
 813                 uid_t uid,
 814                 gid_t gid) {
 815
 816         _cleanup_free_ char *fs = NULL;
 817         int r;
 818
 819         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 820                 return 0;
 821
 822         if (mode != MODE_INVALID)
 823                 mode &= 0777;
 824
 825         r = cg_get_path(controller, path, NULL, &fs);
 826         if (r < 0)
 827                 return r;
 828
 829         return chmod_and_chown(fs, mode, uid, gid);
 830 }
 831
 832 int cg_set_task_access(
 833                 const char *controller,
 834                 const char *path,
 835                 mode_t mode,
 836                 uid_t uid,
 837                 gid_t gid) {
 838
 839         _cleanup_free_ char *fs = NULL, *procs = NULL;
 840         int r, unified;
 841
 842         assert(path);
 843
 844         if (mode == MODE_INVALID && uid == UID_INVALID && gid == GID_INVALID)
 845                 return 0;
 846
 847         if (mode != MODE_INVALID)
 848                 mode &= 0666;
 849
 850         r = cg_get_path(controller, path, "cgroup.procs", &fs);
 851         if (r < 0)
 852                 return r;
 853
 854         r = chmod_and_chown(fs, mode, uid, gid);
 855         if (r < 0)
 856                 return r;
 857
 858         unified = cg_unified();
 859         if (unified < 0)
 860                 return unified;
 861         if (unified)
 862                 return 0;
 863
 864         /* Compatibility, Always keep values for "tasks" in sync with
 865          * "cgroup.procs" */
 866         if (cg_get_path(controller, path, "tasks", &procs) >= 0)
 867                 (void) chmod_and_chown(procs, mode, uid, gid);
 868
 869         return 0;
 870 }
 871
 872 int cg_pid_get_path(const char *controller, pid_t pid, char **path) {
 873         _cleanup_fclose_ FILE *f = NULL;
 874         char line[LINE_MAX];
 875         const char *fs;
 876         size_t cs = 0;
 877         int unified;
 878
 879         assert(path);
 880         assert(pid >= 0);
 881
 882         unified = cg_unified();
 883         if (unified < 0)
 884                 return unified;
 885         if (unified == 0) {
 886                 if (controller) {
 887                         if (!cg_controller_is_valid(controller))
 888                                 return -EINVAL;
 889                 } else
 890                         controller = SYSTEMD_CGROUP_CONTROLLER;
 891
 892                 cs = strlen(controller);
 893         }
 894
 895         fs = procfs_file_alloca(pid, "cgroup");
 896         f = fopen(fs, "re");
 897         if (!f)
 898                 return errno == ENOENT ? -ESRCH : -errno;
 899
 900         FOREACH_LINE(line, f, return -errno) {
 901                 char *e, *p;
 902
 903                 truncate_nl(line);
 904
 905                 if (unified) {
 906                         e = startswith(line, "0:");
 907                         if (!e)
 908                                 continue;
 909
 910                         e = strchr(e, ':');
 911                         if (!e)
 912                                 continue;
 913                 } else {
 914                         char *l;
 915                         size_t k;
 916                         const char *word, *state;
 917                         bool found = false;
 918
 919                         l = strchr(line, ':');
 920                         if (!l)
 921                                 continue;
 922
 923                         l++;
 924                         e = strchr(l, ':');
 925                         if (!e)
 926                                 continue;
 927
 928                         *e = 0;
 929                         FOREACH_WORD_SEPARATOR(word, k, l, ",", state) {
 930                                 if (k == cs && memcmp(word, controller, cs) == 0) {
 931                                         found = true;
 932                                         break;
 933                                 }
 934                         }
 935
 936                         if (!found)
 937                                 continue;
 938                 }
 939
 940                 p = strdup(e + 1);
 941                 if (!p)
 942                         return -ENOMEM;
 943
 944                 *path = p;
 945                 return 0;
 946         }
 947
 948         return -ENODATA;
 949 }
 950
 951 int cg_install_release_agent(const char *controller, const char *agent) {
 952         _cleanup_free_ char *fs = NULL, *contents = NULL;
 953         const char *sc;
 954         int r, unified;
 955
 956         assert(agent);
 957
 958         unified = cg_unified();
 959         if (unified < 0)
 960                 return unified;
 961         if (unified) /* doesn't apply to unified hierarchy */
 962                 return -EOPNOTSUPP;
 963
 964         r = cg_get_path(controller, NULL, "release_agent", &fs);
 965         if (r < 0)
 966                 return r;
 967
 968         r = read_one_line_file(fs, &contents);
 969         if (r < 0)
 970                 return r;
 971
 972         sc = strstrip(contents);
 973         if (isempty(sc)) {
 974                 r = write_string_file(fs, agent, 0);
 975                 if (r < 0)
 976                         return r;
 977         } else if (!path_equal(sc, agent))
 978                 return -EEXIST;
 979
 980         fs = mfree(fs);
 981         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
 982         if (r < 0)
 983                 return r;
 984
 985         contents = mfree(contents);
 986         r = read_one_line_file(fs, &contents);
 987         if (r < 0)
 988                 return r;
 989
 990         sc = strstrip(contents);
 991         if (streq(sc, "0")) {
 992                 r = write_string_file(fs, "1", 0);
 993                 if (r < 0)
 994                         return r;
 995
 996                 return 1;
 997         }
 998
 999         if (!streq(sc, "1"))
1000                 return -EIO;
1001
1002         return 0;
1003 }
1004
1005 int cg_uninstall_release_agent(const char *controller) {
1006         _cleanup_free_ char *fs = NULL;
1007         int r, unified;
1008
1009         unified = cg_unified();
1010         if (unified < 0)
1011                 return unified;
1012         if (unified) /* Doesn't apply to unified hierarchy */
1013                 return -EOPNOTSUPP;
1014
1015         r = cg_get_path(controller, NULL, "notify_on_release", &fs);
1016         if (r < 0)
1017                 return r;
1018
1019         r = write_string_file(fs, "0", 0);
1020         if (r < 0)
1021                 return r;
1022
1023         fs = mfree(fs);
1024
1025         r = cg_get_path(controller, NULL, "release_agent", &fs);
1026         if (r < 0)
1027                 return r;
1028
1029         r = write_string_file(fs, "", 0);
1030         if (r < 0)
1031                 return r;
1032
1033         return 0;
1034 }
1035
1036 int cg_is_empty(const char *controller, const char *path) {
1037         _cleanup_fclose_ FILE *f = NULL;
1038         pid_t pid;
1039         int r;
1040
1041         assert(path);
1042
1043         r = cg_enumerate_processes(controller, path, &f);
1044         if (r == -ENOENT)
1045                 return 1;
1046         if (r < 0)
1047                 return r;
1048
1049         r = cg_read_pid(f, &pid);
1050         if (r < 0)
1051                 return r;
1052
1053         return r == 0;
1054 }
1055
1056 int cg_is_empty_recursive(const char *controller, const char *path) {
1057         int unified, r;
1058
1059         assert(path);
1060
1061         /* The root cgroup is always populated */
1062         if (controller && (isempty(path) || path_equal(path, "/")))
1063                 return false;
1064
1065         unified = cg_unified();
1066         if (unified < 0)
1067                 return unified;
1068
1069         if (unified > 0) {
1070                 _cleanup_free_ char *t = NULL;
1071
1072                 /* On the unified hierarchy we can check empty state
1073                  * via the "populated" attribute of "cgroup.events". */
1074
1075                 r = cg_read_event(controller, path, "populated", &t);
1076                 if (r < 0)
1077                         return r;
1078
1079                 return streq(t, "0");
1080         } else {
1081                 _cleanup_closedir_ DIR *d = NULL;
1082                 char *fn;
1083
1084                 r = cg_is_empty(controller, path);
1085                 if (r <= 0)
1086                         return r;
1087
1088                 r = cg_enumerate_subgroups(controller, path, &d);
1089                 if (r == -ENOENT)
1090                         return 1;
1091                 if (r < 0)
1092                         return r;
1093
1094                 while ((r = cg_read_subgroup(d, &fn)) > 0) {
1095                         _cleanup_free_ char *p = NULL;
1096
1097                         p = strjoin(path, "/", fn, NULL);
1098                         free(fn);
1099                         if (!p)
1100                                 return -ENOMEM;
1101
1102                         r = cg_is_empty_recursive(controller, p);
1103                         if (r <= 0)
1104                                 return r;
1105                 }
1106                 if (r < 0)
1107                         return r;
1108
1109                 return true;
1110         }
1111 }
1112
1113 int cg_split_spec(const char *spec, char **controller, char **path) {
1114         char *t = NULL, *u = NULL;
1115         const char *e;
1116
1117         assert(spec);
1118
1119         if (*spec == '/') {
1120                 if (!path_is_safe(spec))
1121                         return -EINVAL;
1122
1123                 if (path) {
1124                         t = strdup(spec);
1125                         if (!t)
1126                                 return -ENOMEM;
1127
1128                         *path = path_kill_slashes(t);
1129                 }
1130
1131                 if (controller)
1132                         *controller = NULL;
1133
1134                 return 0;
1135         }
1136
1137         e = strchr(spec, ':');
1138         if (!e) {
1139                 if (!cg_controller_is_valid(spec))
1140                         return -EINVAL;
1141
1142                 if (controller) {
1143                         t = strdup(spec);
1144                         if (!t)
1145                                 return -ENOMEM;
1146
1147                         *controller = t;
1148                 }
1149
1150                 if (path)
1151                         *path = NULL;
1152
1153                 return 0;
1154         }
1155
1156         t = strndup(spec, e-spec);
1157         if (!t)
1158                 return -ENOMEM;
1159         if (!cg_controller_is_valid(t)) {
1160                 free(t);
1161                 return -EINVAL;
1162         }
1163
1164         if (isempty(e+1))
1165                 u = NULL;
1166         else {
1167                 u = strdup(e+1);
1168                 if (!u) {
1169                         free(t);
1170                         return -ENOMEM;
1171                 }
1172
1173                 if (!path_is_safe(u) ||
1174                     !path_is_absolute(u)) {
1175                         free(t);
1176                         free(u);
1177                         return -EINVAL;
1178                 }
1179
1180                 path_kill_slashes(u);
1181         }
1182
1183         if (controller)
1184                 *controller = t;
1185         else
1186                 free(t);
1187
1188         if (path)
1189                 *path = u;
1190         else
1191                 free(u);
1192
1193         return 0;
1194 }
1195
1196 int cg_mangle_path(const char *path, char **result) {
1197         _cleanup_free_ char *c = NULL, *p = NULL;
1198         char *t;
1199         int r;
1200
1201         assert(path);
1202         assert(result);
1203
1204         /* First, check if it already is a filesystem path */
1205         if (path_startswith(path, "/sys/fs/cgroup")) {
1206
1207                 t = strdup(path);
1208                 if (!t)
1209                         return -ENOMEM;
1210
1211                 *result = path_kill_slashes(t);
1212                 return 0;
1213         }
1214
1215         /* Otherwise, treat it as cg spec */
1216         r = cg_split_spec(path, &c, &p);
1217         if (r < 0)
1218                 return r;
1219
1220         return cg_get_path(c ?: SYSTEMD_CGROUP_CONTROLLER, p ?: "/", NULL, result);
1221 }
1222
1223 int cg_get_root_path(char **path) {
1224         char *p, *e;
1225         int r;
1226
1227         assert(path);
1228
1229         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 1, &p);
1230         if (r < 0)
1231                 return r;
1232
1233         e = endswith(p, "/" SPECIAL_INIT_SCOPE);
1234         if (!e)
1235                 e = endswith(p, "/" SPECIAL_SYSTEM_SLICE); /* legacy */
1236         if (!e)
1237                 e = endswith(p, "/system"); /* even more legacy */
1238         if (e)
1239                 *e = 0;
1240
1241         *path = p;
1242         return 0;
1243 }
1244
1245 int cg_shift_path(const char *cgroup, const char *root, const char **shifted) {
1246         _cleanup_free_ char *rt = NULL;
1247         char *p;
1248         int r;
1249
1250         assert(cgroup);
1251         assert(shifted);
1252
1253         if (!root) {
1254                 /* If the root was specified let's use that, otherwise
1255                  * let's determine it from PID 1 */
1256
1257                 r = cg_get_root_path(&rt);
1258                 if (r < 0)
1259                         return r;
1260
1261                 root = rt;
1262         }
1263
1264         p = path_startswith(cgroup, root);
1265         if (p && p > cgroup)
1266                 *shifted = p - 1;
1267         else
1268                 *shifted = cgroup;
1269
1270         return 0;
1271 }
1272
1273 int cg_pid_get_path_shifted(pid_t pid, const char *root, char **cgroup) {
1274         _cleanup_free_ char *raw = NULL;
1275         const char *c;
1276         int r;
1277
1278         assert(pid >= 0);
1279         assert(cgroup);
1280
1281         r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &raw);
1282         if (r < 0)
1283                 return r;
1284
1285         r = cg_shift_path(raw, root, &c);
1286         if (r < 0)
1287                 return r;
1288
1289         if (c == raw) {
1290                 *cgroup = raw;
1291                 raw = NULL;
1292         } else {
1293                 char *n;
1294
1295                 n = strdup(c);
1296                 if (!n)
1297                         return -ENOMEM;
1298
1299                 *cgroup = n;
1300         }
1301
1302         return 0;
1303 }
1304
1305 int cg_path_decode_unit(const char *cgroup, char **unit) {
1306         char *c, *s;
1307         size_t n;
1308
1309         assert(cgroup);
1310         assert(unit);
1311
1312         n = strcspn(cgroup, "/");
1313         if (n < 3)
1314                 return -ENXIO;
1315
1316         c = strndupa(cgroup, n);
1317         c = cg_unescape(c);
1318
1319         if (!unit_name_is_valid(c, UNIT_NAME_PLAIN|UNIT_NAME_INSTANCE))
1320                 return -ENXIO;
1321
1322         s = strdup(c);
1323         if (!s)
1324                 return -ENOMEM;
1325
1326         *unit = s;
1327         return 0;
1328 }
1329
1330 static bool valid_slice_name(const char *p, size_t n) {
1331
1332         if (!p)
1333                 return false;
1334
1335         if (n < strlen("x.slice"))
1336                 return false;
1337
1338         if (memcmp(p + n - 6, ".slice", 6) == 0) {
1339                 char buf[n+1], *c;
1340
1341                 memcpy(buf, p, n);
1342                 buf[n] = 0;
1343
1344                 c = cg_unescape(buf);
1345
1346                 return unit_name_is_valid(c, UNIT_NAME_PLAIN);
1347         }
1348
1349         return false;
1350 }
1351
1352 static const char *skip_slices(const char *p) {
1353         assert(p);
1354
1355         /* Skips over all slice assignments */
1356
1357         for (;;) {
1358                 size_t n;
1359
1360                 p += strspn(p, "/");
1361
1362                 n = strcspn(p, "/");
1363                 if (!valid_slice_name(p, n))
1364                         return p;
1365
1366                 p += n;
1367         }
1368 }
1369
1370 int cg_path_get_unit(const char *path, char **ret) {
1371         const char *e;
1372         char *unit;
1373         int r;
1374
1375         assert(path);
1376         assert(ret);
1377
1378         e = skip_slices(path);
1379
1380         r = cg_path_decode_unit(e, &unit);
1381         if (r < 0)
1382                 return r;
1383
1384         /* We skipped over the slices, don't accept any now */
1385         if (endswith(unit, ".slice")) {
1386                 free(unit);
1387                 return -ENXIO;
1388         }
1389
1390         *ret = unit;
1391         return 0;
1392 }
1393
1394 int cg_pid_get_unit(pid_t pid, char **unit) {
1395         _cleanup_free_ char *cgroup = NULL;
1396         int r;
1397
1398         assert(unit);
1399
1400         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1401         if (r < 0)
1402                 return r;
1403
1404         return cg_path_get_unit(cgroup, unit);
1405 }
1406
1407 /**
1408  * Skip session-*.scope, but require it to be there.
1409  */
1410 static const char *skip_session(const char *p) {
1411         size_t n;
1412
1413         if (isempty(p))
1414                 return NULL;
1415
1416         p += strspn(p, "/");
1417
1418         n = strcspn(p, "/");
1419         if (n < strlen("session-x.scope"))
1420                 return NULL;
1421
1422         if (memcmp(p, "session-", 8) == 0 && memcmp(p + n - 6, ".scope", 6) == 0) {
1423                 char buf[n - 8 - 6 + 1];
1424
1425                 memcpy(buf, p + 8, n - 8 - 6);
1426                 buf[n - 8 - 6] = 0;
1427
1428                 /* Note that session scopes never need unescaping,
1429                  * since they cannot conflict with the kernel's own
1430                  * names, hence we don't need to call cg_unescape()
1431                  * here. */
1432
1433                 if (!session_id_valid(buf))
1434                         return false;
1435
1436                 p += n;
1437                 p += strspn(p, "/");
1438                 return p;
1439         }
1440
1441         return NULL;
1442 }
1443
1444 /**
1445  * Skip user@*.service, but require it to be there.
1446  */
1447 static const char *skip_user_manager(const char *p) {
1448         size_t n;
1449
1450         if (isempty(p))
1451                 return NULL;
1452
1453         p += strspn(p, "/");
1454
1455         n = strcspn(p, "/");
1456         if (n < strlen("user@x.service"))
1457                 return NULL;
1458
1459         if (memcmp(p, "user@", 5) == 0 && memcmp(p + n - 8, ".service", 8) == 0) {
1460                 char buf[n - 5 - 8 + 1];
1461
1462                 memcpy(buf, p + 5, n - 5 - 8);
1463                 buf[n - 5 - 8] = 0;
1464
1465                 /* Note that user manager services never need unescaping,
1466                  * since they cannot conflict with the kernel's own
1467                  * names, hence we don't need to call cg_unescape()
1468                  * here. */
1469
1470                 if (parse_uid(buf, NULL) < 0)
1471                         return NULL;
1472
1473                 p += n;
1474                 p += strspn(p, "/");
1475
1476                 return p;
1477         }
1478
1479         return NULL;
1480 }
1481
1482 static const char *skip_user_prefix(const char *path) {
1483         const char *e, *t;
1484
1485         assert(path);
1486
1487         /* Skip slices, if there are any */
1488         e = skip_slices(path);
1489
1490         /* Skip the user manager, if it's in the path now... */
1491         t = skip_user_manager(e);
1492         if (t)
1493                 return t;
1494
1495         /* Alternatively skip the user session if it is in the path... */
1496         return skip_session(e);
1497 }
1498
1499 int cg_path_get_user_unit(const char *path, char **ret) {
1500         const char *t;
1501
1502         assert(path);
1503         assert(ret);
1504
1505         t = skip_user_prefix(path);
1506         if (!t)
1507                 return -ENXIO;
1508
1509         /* And from here on it looks pretty much the same as for a
1510          * system unit, hence let's use the same parser from here
1511          * on. */
1512         return cg_path_get_unit(t, ret);
1513 }
1514
1515 int cg_pid_get_user_unit(pid_t pid, char **unit) {
1516         _cleanup_free_ char *cgroup = NULL;
1517         int r;
1518
1519         assert(unit);
1520
1521         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1522         if (r < 0)
1523                 return r;
1524
1525         return cg_path_get_user_unit(cgroup, unit);
1526 }
1527
1528 int cg_path_get_machine_name(const char *path, char **machine) {
1529         _cleanup_free_ char *u = NULL;
1530         const char *sl;
1531         int r;
1532
1533         r = cg_path_get_unit(path, &u);
1534         if (r < 0)
1535                 return r;
1536
1537         sl = strjoina("/run/systemd/machines/unit:", u);
1538         return readlink_malloc(sl, machine);
1539 }
1540
1541 int cg_pid_get_machine_name(pid_t pid, char **machine) {
1542         _cleanup_free_ char *cgroup = NULL;
1543         int r;
1544
1545         assert(machine);
1546
1547         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1548         if (r < 0)
1549                 return r;
1550
1551         return cg_path_get_machine_name(cgroup, machine);
1552 }
1553
1554 int cg_path_get_session(const char *path, char **session) {
1555         _cleanup_free_ char *unit = NULL;
1556         char *start, *end;
1557         int r;
1558
1559         assert(path);
1560
1561         r = cg_path_get_unit(path, &unit);
1562         if (r < 0)
1563                 return r;
1564
1565         start = startswith(unit, "session-");
1566         if (!start)
1567                 return -ENXIO;
1568         end = endswith(start, ".scope");
1569         if (!end)
1570                 return -ENXIO;
1571
1572         *end = 0;
1573         if (!session_id_valid(start))
1574                 return -ENXIO;
1575
1576         if (session) {
1577                 char *rr;
1578
1579                 rr = strdup(start);
1580                 if (!rr)
1581                         return -ENOMEM;
1582
1583                 *session = rr;
1584         }
1585
1586         return 0;
1587 }
1588
1589 int cg_pid_get_session(pid_t pid, char **session) {
1590         _cleanup_free_ char *cgroup = NULL;
1591         int r;
1592
1593         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1594         if (r < 0)
1595                 return r;
1596
1597         return cg_path_get_session(cgroup, session);
1598 }
1599
1600 int cg_path_get_owner_uid(const char *path, uid_t *uid) {
1601         _cleanup_free_ char *slice = NULL;
1602         char *start, *end;
1603         int r;
1604
1605         assert(path);
1606
1607         r = cg_path_get_slice(path, &slice);
1608         if (r < 0)
1609                 return r;
1610
1611         start = startswith(slice, "user-");
1612         if (!start)
1613                 return -ENXIO;
1614         end = endswith(start, ".slice");
1615         if (!end)
1616                 return -ENXIO;
1617
1618         *end = 0;
1619         if (parse_uid(start, uid) < 0)
1620                 return -ENXIO;
1621
1622         return 0;
1623 }
1624
1625 int cg_pid_get_owner_uid(pid_t pid, uid_t *uid) {
1626         _cleanup_free_ char *cgroup = NULL;
1627         int r;
1628
1629         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1630         if (r < 0)
1631                 return r;
1632
1633         return cg_path_get_owner_uid(cgroup, uid);
1634 }
1635
1636 int cg_path_get_slice(const char *p, char **slice) {
1637         const char *e = NULL;
1638
1639         assert(p);
1640         assert(slice);
1641
1642         /* Finds the right-most slice unit from the beginning, but
1643          * stops before we come to the first non-slice unit. */
1644
1645         for (;;) {
1646                 size_t n;
1647
1648                 p += strspn(p, "/");
1649
1650                 n = strcspn(p, "/");
1651                 if (!valid_slice_name(p, n)) {
1652
1653                         if (!e) {
1654                                 char *s;
1655
1656                                 s = strdup("-.slice");
1657                                 if (!s)
1658                                         return -ENOMEM;
1659
1660                                 *slice = s;
1661                                 return 0;
1662                         }
1663
1664                         return cg_path_decode_unit(e, slice);
1665                 }
1666
1667                 e = p;
1668                 p += n;
1669         }
1670 }
1671
1672 int cg_pid_get_slice(pid_t pid, char **slice) {
1673         _cleanup_free_ char *cgroup = NULL;
1674         int r;
1675
1676         assert(slice);
1677
1678         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1679         if (r < 0)
1680                 return r;
1681
1682         return cg_path_get_slice(cgroup, slice);
1683 }
1684
1685 int cg_path_get_user_slice(const char *p, char **slice) {
1686         const char *t;
1687         assert(p);
1688         assert(slice);
1689
1690         t = skip_user_prefix(p);
1691         if (!t)
1692                 return -ENXIO;
1693
1694         /* And now it looks pretty much the same as for a system
1695          * slice, so let's just use the same parser from here on. */
1696         return cg_path_get_slice(t, slice);
1697 }
1698
1699 int cg_pid_get_user_slice(pid_t pid, char **slice) {
1700         _cleanup_free_ char *cgroup = NULL;
1701         int r;
1702
1703         assert(slice);
1704
1705         r = cg_pid_get_path_shifted(pid, NULL, &cgroup);
1706         if (r < 0)
1707                 return r;
1708
1709         return cg_path_get_user_slice(cgroup, slice);
1710 }
1711
1712 char *cg_escape(const char *p) {
1713         bool need_prefix = false;
1714
1715         /* This implements very minimal escaping for names to be used
1716          * as file names in the cgroup tree: any name which might
1717          * conflict with a kernel name or is prefixed with '_' is
1718          * prefixed with a '_'. That way, when reading cgroup names it
1719          * is sufficient to remove a single prefixing underscore if
1720          * there is one. */
1721
1722         /* The return value of this function (unlike cg_unescape())
1723          * needs free()! */
1724
1725         if (p[0] == 0 ||
1726             p[0] == '_' ||
1727             p[0] == '.' ||
1728             streq(p, "notify_on_release") ||
1729             streq(p, "release_agent") ||
1730             streq(p, "tasks") ||
1731             startswith(p, "cgroup."))
1732                 need_prefix = true;
1733         else {
1734                 const char *dot;
1735
1736                 dot = strrchr(p, '.');
1737                 if (dot) {
1738                         CGroupController c;
1739                         size_t l = dot - p;
1740
1741                         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1742                                 const char *n;
1743
1744                                 n = cgroup_controller_to_string(c);
1745
1746                                 if (l != strlen(n))
1747                                         continue;
1748
1749                                 if (memcmp(p, n, l) != 0)
1750                                         continue;
1751
1752                                 need_prefix = true;
1753                                 break;
1754                         }
1755                 }
1756         }
1757
1758         if (need_prefix)
1759                 return strappend("_", p);
1760
1761         return strdup(p);
1762 }
1763
1764 char *cg_unescape(const char *p) {
1765         assert(p);
1766
1767         /* The return value of this function (unlike cg_escape())
1768          * doesn't need free()! */
1769
1770         if (p[0] == '_')
1771                 return (char*) p+1;
1772
1773         return (char*) p;
1774 }
1775
1776 #define CONTROLLER_VALID                        \
1777         DIGITS LETTERS                          \
1778         "_"
1779
1780 bool cg_controller_is_valid(const char *p) {
1781         const char *t, *s;
1782
1783         if (!p)
1784                 return false;
1785
1786         s = startswith(p, "name=");
1787         if (s)
1788                 p = s;
1789
1790         if (*p == 0 || *p == '_')
1791                 return false;
1792
1793         for (t = p; *t; t++)
1794                 if (!strchr(CONTROLLER_VALID, *t))
1795                         return false;
1796
1797         if (t - p > FILENAME_MAX)
1798                 return false;
1799
1800         return true;
1801 }
1802
1803 int cg_slice_to_path(const char *unit, char **ret) {
1804         _cleanup_free_ char *p = NULL, *s = NULL, *e = NULL;
1805         const char *dash;
1806         int r;
1807
1808         assert(unit);
1809         assert(ret);
1810
1811         if (streq(unit, "-.slice")) {
1812                 char *x;
1813
1814                 x = strdup("");
1815                 if (!x)
1816                         return -ENOMEM;
1817                 *ret = x;
1818                 return 0;
1819         }
1820
1821         if (!unit_name_is_valid(unit, UNIT_NAME_PLAIN))
1822                 return -EINVAL;
1823
1824         if (!endswith(unit, ".slice"))
1825                 return -EINVAL;
1826
1827         r = unit_name_to_prefix(unit, &p);
1828         if (r < 0)
1829                 return r;
1830
1831         dash = strchr(p, '-');
1832
1833         /* Don't allow initial dashes */
1834         if (dash == p)
1835                 return -EINVAL;
1836
1837         while (dash) {
1838                 _cleanup_free_ char *escaped = NULL;
1839                 char n[dash - p + sizeof(".slice")];
1840
1841                 /* Don't allow trailing or double dashes */
1842                 if (dash[1] == 0 || dash[1] == '-')
1843                         return -EINVAL;
1844
1845                 strcpy(stpncpy(n, p, dash - p), ".slice");
1846                 if (!unit_name_is_valid(n, UNIT_NAME_PLAIN))
1847                         return -EINVAL;
1848
1849                 escaped = cg_escape(n);
1850                 if (!escaped)
1851                         return -ENOMEM;
1852
1853                 if (!strextend(&s, escaped, "/", NULL))
1854                         return -ENOMEM;
1855
1856                 dash = strchr(dash+1, '-');
1857         }
1858
1859         e = cg_escape(unit);
1860         if (!e)
1861                 return -ENOMEM;
1862
1863         if (!strextend(&s, e, NULL))
1864                 return -ENOMEM;
1865
1866         *ret = s;
1867         s = NULL;
1868
1869         return 0;
1870 }
1871
1872 int cg_set_attribute(const char *controller, const char *path, const char *attribute, const char *value) {
1873         _cleanup_free_ char *p = NULL;
1874         int r;
1875
1876         r = cg_get_path(controller, path, attribute, &p);
1877         if (r < 0)
1878                 return r;
1879
1880         return write_string_file(p, value, 0);
1881 }
1882
1883 int cg_get_attribute(const char *controller, const char *path, const char *attribute, char **ret) {
1884         _cleanup_free_ char *p = NULL;
1885         int r;
1886
1887         r = cg_get_path(controller, path, attribute, &p);
1888         if (r < 0)
1889                 return r;
1890
1891         return read_one_line_file(p, ret);
1892 }
1893
1894 int cg_create_everywhere(CGroupMask supported, CGroupMask mask, const char *path) {
1895         CGroupController c;
1896         int r, unified;
1897
1898         /* This one will create a cgroup in our private tree, but also
1899          * duplicate it in the trees specified in mask, and remove it
1900          * in all others */
1901
1902         /* First create the cgroup in our own hierarchy. */
1903         r = cg_create(SYSTEMD_CGROUP_CONTROLLER, path);
1904         if (r < 0)
1905                 return r;
1906
1907         /* If we are in the unified hierarchy, we are done now */
1908         unified = cg_unified();
1909         if (unified < 0)
1910                 return unified;
1911         if (unified > 0)
1912                 return 0;
1913
1914         /* Otherwise, do the same in the other hierarchies */
1915         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1916                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1917                 const char *n;
1918
1919                 n = cgroup_controller_to_string(c);
1920
1921                 if (mask & bit)
1922                         (void) cg_create(n, path);
1923                 else if (supported & bit)
1924                         (void) cg_trim(n, path, true);
1925         }
1926
1927         return 0;
1928 }
1929
1930 int cg_attach_everywhere(CGroupMask supported, const char *path, pid_t pid, cg_migrate_callback_t path_callback, void *userdata) {
1931         CGroupController c;
1932         int r, unified;
1933
1934         r = cg_attach(SYSTEMD_CGROUP_CONTROLLER, path, pid);
1935         if (r < 0)
1936                 return r;
1937
1938         unified = cg_unified();
1939         if (unified < 0)
1940                 return unified;
1941         if (unified > 0)
1942                 return 0;
1943
1944         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1945                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1946                 const char *p = NULL;
1947
1948                 if (!(supported & bit))
1949                         continue;
1950
1951                 if (path_callback)
1952                         p = path_callback(bit, userdata);
1953
1954                 if (!p)
1955                         p = path;
1956
1957                 (void) cg_attach_fallback(cgroup_controller_to_string(c), p, pid);
1958         }
1959
1960         return 0;
1961 }
1962
1963 int cg_attach_many_everywhere(CGroupMask supported, const char *path, Set* pids, cg_migrate_callback_t path_callback, void *userdata) {
1964         Iterator i;
1965         void *pidp;
1966         int r = 0;
1967
1968         SET_FOREACH(pidp, pids, i) {
1969                 pid_t pid = PTR_TO_PID(pidp);
1970                 int q;
1971
1972                 q = cg_attach_everywhere(supported, path, pid, path_callback, userdata);
1973                 if (q < 0 && r >= 0)
1974                         r = q;
1975         }
1976
1977         return r;
1978 }
1979
1980 int cg_migrate_everywhere(CGroupMask supported, const char *from, const char *to, cg_migrate_callback_t to_callback, void *userdata) {
1981         CGroupController c;
1982         int r = 0, unified;
1983
1984         if (!path_equal(from, to))  {
1985                 r = cg_migrate_recursive(SYSTEMD_CGROUP_CONTROLLER, from, SYSTEMD_CGROUP_CONTROLLER, to, CGROUP_REMOVE);
1986                 if (r < 0)
1987                         return r;
1988         }
1989
1990         unified = cg_unified();
1991         if (unified < 0)
1992                 return unified;
1993         if (unified > 0)
1994                 return r;
1995
1996         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
1997                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
1998                 const char *p = NULL;
1999
2000                 if (!(supported & bit))
2001                         continue;
2002
2003                 if (to_callback)
2004                         p = to_callback(bit, userdata);
2005
2006                 if (!p)
2007                         p = to;
2008
2009                 (void) cg_migrate_recursive_fallback(SYSTEMD_CGROUP_CONTROLLER, to, cgroup_controller_to_string(c), p, 0);
2010         }
2011
2012         return 0;
2013 }
2014
2015 int cg_trim_everywhere(CGroupMask supported, const char *path, bool delete_root) {
2016         CGroupController c;
2017         int r, unified;
2018
2019         r = cg_trim(SYSTEMD_CGROUP_CONTROLLER, path, delete_root);
2020         if (r < 0)
2021                 return r;
2022
2023         unified = cg_unified();
2024         if (unified < 0)
2025                 return unified;
2026         if (unified > 0)
2027                 return r;
2028
2029         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2030                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2031
2032                 if (!(supported & bit))
2033                         continue;
2034
2035                 (void) cg_trim(cgroup_controller_to_string(c), path, delete_root);
2036         }
2037
2038         return 0;
2039 }
2040
2041 int cg_mask_supported(CGroupMask *ret) {
2042         CGroupMask mask = 0;
2043         int r, unified;
2044
2045         /* Determines the mask of supported cgroup controllers. Only
2046          * includes controllers we can make sense of and that are
2047          * actually accessible. */
2048
2049         unified = cg_unified();
2050         if (unified < 0)
2051                 return unified;
2052         if (unified > 0) {
2053                 _cleanup_free_ char *root = NULL, *controllers = NULL, *path = NULL;
2054                 const char *c;
2055
2056                 /* In the unified hierarchy we can read the supported
2057                  * and accessible controllers from a the top-level
2058                  * cgroup attribute */
2059
2060                 r = cg_get_root_path(&root);
2061                 if (r < 0)
2062                         return r;
2063
2064                 r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, root, "cgroup.controllers", &path);
2065                 if (r < 0)
2066                         return r;
2067
2068                 r = read_one_line_file(path, &controllers);
2069                 if (r < 0)
2070                         return r;
2071
2072                 c = controllers;
2073                 for (;;) {
2074                         _cleanup_free_ char *n = NULL;
2075                         CGroupController v;
2076
2077                         r = extract_first_word(&c, &n, NULL, 0);
2078                         if (r < 0)
2079                                 return r;
2080                         if (r == 0)
2081                                 break;
2082
2083                         v = cgroup_controller_from_string(n);
2084                         if (v < 0)
2085                                 continue;
2086
2087                         mask |= CGROUP_CONTROLLER_TO_MASK(v);
2088                 }
2089
2090                 /* Currently, we only support the memory, io and pids
2091                  * controller in the unified hierarchy, mask
2092                  * everything else off. */
2093                 mask &= CGROUP_MASK_MEMORY | CGROUP_MASK_IO | CGROUP_MASK_PIDS;
2094
2095         } else {
2096                 CGroupController c;
2097
2098                 /* In the legacy hierarchy, we check whether which
2099                  * hierarchies are mounted. */
2100
2101                 for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2102                         const char *n;
2103
2104                         n = cgroup_controller_to_string(c);
2105                         if (controller_is_accessible(n) >= 0)
2106                                 mask |= CGROUP_CONTROLLER_TO_MASK(c);
2107                 }
2108         }
2109
2110         *ret = mask;
2111         return 0;
2112 }
2113
2114 int cg_kernel_controllers(Set *controllers) {
2115         _cleanup_fclose_ FILE *f = NULL;
2116         char buf[LINE_MAX];
2117         int r;
2118
2119         assert(controllers);
2120
2121         /* Determines the full list of kernel-known controllers. Might
2122          * include controllers we don't actually support, arbitrary
2123          * named hierarchies and controllers that aren't currently
2124          * accessible (because not mounted). */
2125
2126         f = fopen("/proc/cgroups", "re");
2127         if (!f) {
2128                 if (errno == ENOENT)
2129                         return 0;
2130                 return -errno;
2131         }
2132
2133         /* Ignore the header line */
2134         (void) fgets(buf, sizeof(buf), f);
2135
2136         for (;;) {
2137                 char *controller;
2138                 int enabled = 0;
2139
2140                 errno = 0;
2141                 if (fscanf(f, "%ms %*i %*i %i", &controller, &enabled) != 2) {
2142
2143                         if (feof(f))
2144                                 break;
2145
2146                         if (ferror(f) && errno > 0)
2147                                 return -errno;
2148
2149                         return -EBADMSG;
2150                 }
2151
2152                 if (!enabled) {
2153                         free(controller);
2154                         continue;
2155                 }
2156
2157                 if (!cg_controller_is_valid(controller)) {
2158                         free(controller);
2159                         return -EBADMSG;
2160                 }
2161
2162                 r = set_consume(controllers, controller);
2163                 if (r < 0)
2164                         return r;
2165         }
2166
2167         return 0;
2168 }
2169
2170 static thread_local int unified_cache = -1;
2171
2172 int cg_unified(void) {
2173         struct statfs fs;
2174
2175         /* Checks if we support the unified hierarchy. Returns an
2176          * error when the cgroup hierarchies aren't mounted yet or we
2177          * have any other trouble determining if the unified hierarchy
2178          * is supported. */
2179
2180         if (unified_cache >= 0)
2181                 return unified_cache;
2182
2183         if (statfs("/sys/fs/cgroup/", &fs) < 0)
2184                 return -errno;
2185
2186         if (F_TYPE_EQUAL(fs.f_type, CGROUP2_SUPER_MAGIC))
2187                 unified_cache = true;
2188         else if (F_TYPE_EQUAL(fs.f_type, TMPFS_MAGIC))
2189                 unified_cache = false;
2190         else
2191                 return -ENOMEDIUM;
2192
2193         return unified_cache;
2194 }
2195
2196 void cg_unified_flush(void) {
2197         unified_cache = -1;
2198 }
2199
2200 int cg_enable_everywhere(CGroupMask supported, CGroupMask mask, const char *p) {
2201         _cleanup_free_ char *fs = NULL;
2202         CGroupController c;
2203         int r, unified;
2204
2205         assert(p);
2206
2207         if (supported == 0)
2208                 return 0;
2209
2210         unified = cg_unified();
2211         if (unified < 0)
2212                 return unified;
2213         if (!unified) /* on the legacy hiearchy there's no joining of controllers defined */
2214                 return 0;
2215
2216         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, p, "cgroup.subtree_control", &fs);
2217         if (r < 0)
2218                 return r;
2219
2220         for (c = 0; c < _CGROUP_CONTROLLER_MAX; c++) {
2221                 CGroupMask bit = CGROUP_CONTROLLER_TO_MASK(c);
2222                 const char *n;
2223
2224                 if (!(supported & bit))
2225                         continue;
2226
2227                 n = cgroup_controller_to_string(c);
2228                 {
2229                         char s[1 + strlen(n) + 1];
2230
2231                         s[0] = mask & bit ? '+' : '-';
2232                         strcpy(s + 1, n);
2233
2234                         r = write_string_file(fs, s, 0);
2235                         if (r < 0)
2236                                 log_debug_errno(r, "Failed to enable controller %s for %s (%s): %m", n, p, fs);
2237                 }
2238         }
2239
2240         return 0;
2241 }
2242
2243 bool cg_is_unified_wanted(void) {
2244         static thread_local int wanted = -1;
2245         int r, unified;
2246
2247         /* If the hierarchy is already mounted, then follow whatever
2248          * was chosen for it. */
2249         unified = cg_unified();
2250         if (unified >= 0)
2251                 return unified;
2252
2253         /* Otherwise, let's see what the kernel command line has to
2254          * say. Since checking that is expensive, let's cache the
2255          * result. */
2256         if (wanted >= 0)
2257                 return wanted;
2258
2259         r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy", NULL);
2260         if (r > 0)
2261                 return (wanted = true);
2262         else {
2263                 _cleanup_free_ char *value = NULL;
2264
2265                 r = get_proc_cmdline_key("systemd.unified_cgroup_hierarchy=", &value);
2266                 if (r < 0)
2267                         return false;
2268                 if (r == 0)
2269                         return (wanted = false);
2270
2271                 return (wanted = parse_boolean(value) > 0);
2272         }
2273 }
2274
2275 bool cg_is_legacy_wanted(void) {
2276         return !cg_is_unified_wanted();
2277 }
2278
2279 int cg_weight_parse(const char *s, uint64_t *ret) {
2280         uint64_t u;
2281         int r;
2282
2283         if (isempty(s)) {
2284                 *ret = CGROUP_WEIGHT_INVALID;
2285                 return 0;
2286         }
2287
2288         r = safe_atou64(s, &u);
2289         if (r < 0)
2290                 return r;
2291
2292         if (u < CGROUP_WEIGHT_MIN || u > CGROUP_WEIGHT_MAX)
2293                 return -ERANGE;
2294
2295         *ret = u;
2296         return 0;
2297 }
2298
2299 const uint64_t cgroup_io_limit_defaults[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2300         [CGROUP_IO_RBPS_MAX]    = CGROUP_LIMIT_MAX,
2301         [CGROUP_IO_WBPS_MAX]    = CGROUP_LIMIT_MAX,
2302         [CGROUP_IO_RIOPS_MAX]   = CGROUP_LIMIT_MAX,
2303         [CGROUP_IO_WIOPS_MAX]   = CGROUP_LIMIT_MAX,
2304 };
2305
2306 static const char* const cgroup_io_limit_type_table[_CGROUP_IO_LIMIT_TYPE_MAX] = {
2307         [CGROUP_IO_RBPS_MAX]    = "IOReadBandwidthMax",
2308         [CGROUP_IO_WBPS_MAX]    = "IOWriteBandwidthMax",
2309         [CGROUP_IO_RIOPS_MAX]   = "IOReadIOPSMax",
2310         [CGROUP_IO_WIOPS_MAX]   = "IOWriteIOPSMax",
2311 };
2312
2313 DEFINE_STRING_TABLE_LOOKUP(cgroup_io_limit_type, CGroupIOLimitType);
2314
2315 int cg_cpu_shares_parse(const char *s, uint64_t *ret) {
2316         uint64_t u;
2317         int r;
2318
2319         if (isempty(s)) {
2320                 *ret = CGROUP_CPU_SHARES_INVALID;
2321                 return 0;
2322         }
2323
2324         r = safe_atou64(s, &u);
2325         if (r < 0)
2326                 return r;
2327
2328         if (u < CGROUP_CPU_SHARES_MIN || u > CGROUP_CPU_SHARES_MAX)
2329                 return -ERANGE;
2330
2331         *ret = u;
2332         return 0;
2333 }
2334
2335 int cg_blkio_weight_parse(const char *s, uint64_t *ret) {
2336         uint64_t u;
2337         int r;
2338
2339         if (isempty(s)) {
2340                 *ret = CGROUP_BLKIO_WEIGHT_INVALID;
2341                 return 0;
2342         }
2343
2344         r = safe_atou64(s, &u);
2345         if (r < 0)
2346                 return r;
2347
2348         if (u < CGROUP_BLKIO_WEIGHT_MIN || u > CGROUP_BLKIO_WEIGHT_MAX)
2349                 return -ERANGE;
2350
2351         *ret = u;
2352         return 0;
2353 }
2354
2355 static const char *cgroup_controller_table[_CGROUP_CONTROLLER_MAX] = {
2356         [CGROUP_CONTROLLER_CPU] = "cpu",
2357         [CGROUP_CONTROLLER_CPUACCT] = "cpuacct",
2358         [CGROUP_CONTROLLER_IO] = "io",
2359         [CGROUP_CONTROLLER_BLKIO] = "blkio",
2360         [CGROUP_CONTROLLER_MEMORY] = "memory",
2361         [CGROUP_CONTROLLER_DEVICES] = "devices",
2362         [CGROUP_CONTROLLER_PIDS] = "pids",
2363 };
2364
2365 DEFINE_STRING_TABLE_LOOKUP(cgroup_controller, CGroupController);