]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/cgroup.c
Fix lxc-user-nic to work on bionic
[mirror_lxc.git] / src / lxc / cgroup.c
CommitLineData
576f946d 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
576f946d 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23#define _GNU_SOURCE
24#include <stdio.h>
25#undef _GNU_SOURCE
26#include <stdlib.h>
27#include <errno.h>
576f946d 28#include <unistd.h>
29#include <string.h>
341a9bd8 30#include <dirent.h>
576f946d 31#include <fcntl.h>
b98f7d6e 32#include <ctype.h>
576f946d 33#include <sys/types.h>
34#include <sys/stat.h>
35#include <sys/param.h>
36#include <sys/inotify.h>
37#include <netinet/in.h>
38#include <net/if.h>
39
e2bcd7db 40#include "error.h"
881450bb 41#include "config.h"
ae5c8b8e 42#include "commands.h"
b98f7d6e
SH
43#include "list.h"
44#include "conf.h"
36eb9bde 45
36eb9bde 46#include <lxc/log.h>
00b3c2e2
CLG
47#include <lxc/cgroup.h>
48#include <lxc/start.h>
36eb9bde 49
edaf8b1b
SG
50#if IS_BIONIC
51#include <../include/lxcmntent.h>
52#else
53#include <mntent.h>
54#endif
55
120ce443
SG
56#ifndef HAVE_GETLINE
57#ifdef HAVE_FGETLN
58#include <../include/getline.h>
59#endif
60#endif
61
36eb9bde 62lxc_log_define(lxc_cgroup, lxc);
576f946d 63
5193cc3d 64#define MTAB "/proc/mounts"
576f946d 65
fd37327f
ÇO
66/* In the case of a bind mount, there could be two long pathnames in the
67 * mntent plus options so use large enough buffer size
68 */
69#define LARGE_MAXPATHLEN 4 * MAXPATHLEN
70
1d39a065
DW
71/* Check if a mount is a cgroup hierarchy for any subsystem.
72 * Return the first subsystem found (or NULL if none).
73 */
74static char *mount_has_subsystem(const struct mntent *mntent)
75{
76 FILE *f;
5270bf4b 77 char *c, *ret = NULL;
1d39a065
DW
78 char line[MAXPATHLEN];
79
80 /* read the list of subsystems from the kernel */
81 f = fopen("/proc/cgroups", "r");
82 if (!f)
83 return 0;
84
85 /* skip the first line, which contains column headings */
00b6be44
SH
86 if (!fgets(line, MAXPATHLEN, f)) {
87 fclose(f);
1d39a065 88 return 0;
00b6be44 89 }
1d39a065
DW
90
91 while (fgets(line, MAXPATHLEN, f)) {
92 c = strchr(line, '\t');
93 if (!c)
94 continue;
95 *c = '\0';
96
97 ret = hasmntopt(mntent, line);
98 if (ret)
99 break;
100 }
101
102 fclose(f);
103 return ret;
104}
105
d08ba6ec 106/*
23622a2a 107 * Determine mountpoint for a cgroup subsystem.
b98f7d6e 108 * @dest: a passed-in buffer of at least size MAXPATHLEN into which the path
ae5c8b8e 109 * is copied.
b98f7d6e 110 * @subsystem: cgroup subsystem (i.e. freezer)
ae5c8b8e 111 *
b98f7d6e 112 * Returns true on success, false on error.
ae5c8b8e 113 */
b98f7d6e 114bool get_subsys_mount(char *dest, const char *subsystem)
576f946d 115{
93d564ed 116 struct mntent mntent_r;
bcbd102c 117 FILE *file = NULL;
b98f7d6e
SH
118 int ret;
119 bool retv = false;
fd37327f
ÇO
120 char buf[LARGE_MAXPATHLEN] = {0};
121
bcbd102c
SH
122 file = setmntent(MTAB, "r");
123 if (!file) {
124 SYSERROR("failed to open %s", MTAB);
5193cc3d 125 return -1;
bcbd102c 126 }
0d9f8e18 127
93d564ed 128 while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
b98f7d6e 129 if (strcmp(mntent_r.mnt_type, "cgroup"))
bcbd102c 130 continue;
ef6e34ee 131
1d39a065 132 if (subsystem) {
fd37327f 133 if (!hasmntopt(&mntent_r, subsystem))
1d39a065 134 continue;
ae5c8b8e 135 } else {
fd37327f 136 if (!mount_has_subsystem(&mntent_r))
1d39a065
DW
137 continue;
138 }
ad08bbb7 139
b98f7d6e 140 ret = snprintf(dest, MAXPATHLEN, "%s", mntent_r.mnt_dir);
ad08bbb7
DW
141 if (ret < 0 || ret >= MAXPATHLEN)
142 goto fail;
143
b98f7d6e 144 retv = true;
ad08bbb7 145 goto out;
bcbd102c 146 };
576f946d 147
d08ba6ec
SH
148fail:
149 DEBUG("Failed to find cgroup for %s\n",
150 subsystem ? subsystem : "(NULL)");
ad08bbb7
DW
151out:
152 endmntent(file);
b98f7d6e 153 return retv;
5193cc3d
DL
154}
155
ae5c8b8e 156/*
b98f7d6e
SH
157 * is_in_cgroup: check whether pid is found in the passed-in cgroup tasks
158 * file.
159 * @path: in full path to a cgroup tasks file
160 * Note that in most cases the file will simply not exist, which is ok - it
161 * just means that's not our cgroup.
ae5c8b8e 162 */
b98f7d6e 163static bool is_in_cgroup(pid_t pid, char *path)
0b9c21ab 164{
b98f7d6e
SH
165 int cmppid;
166 FILE *f = fopen(path, "r");
167 char *line = NULL;
168 size_t sz = 0;
fd37327f 169
b98f7d6e
SH
170 if (!f)
171 return false;
172 while (getline(&line, &sz, f) != -1) {
173 if (sscanf(line, "%d", &cmppid) == 1 && cmppid == pid) {
174 fclose(f);
175 free(line);
176 return true;
ae5c8b8e 177 }
ae5c8b8e 178 }
b98f7d6e
SH
179 fclose(f);
180 if (line)
181 free(line);
182 return false;
0b9c21ab
SH
183}
184
ae5c8b8e 185/*
2acf7795
DE
186 * lxc_cgroup_path_get: Get the absolute pathname for a cgroup
187 * file for a running container.
188 *
189 * @subsystem : subsystem of interest (e.g. "freezer"). If NULL, then
190 * the first cgroup entry in mtab will be used.
191 * @name : name of container to connect to
192 * @lxcpath : the lxcpath in which the container is running
ae5c8b8e
SH
193 *
194 * This is the exported function, which determines cgpath from the
2acf7795 195 * lxc-start of the @name container running in @lxcpath.
ae5c8b8e 196 *
2acf7795
DE
197 * Returns path on success, NULL on error. The caller must free()
198 * the returned path.
ae5c8b8e 199 */
2acf7795
DE
200char *lxc_cgroup_path_get(const char *subsystem, const char *name,
201 const char *lxcpath)
fd4f5a56 202{
b98f7d6e
SH
203 char *cgpath, *cgp, path[MAXPATHLEN], *pathp, *p;
204 pid_t initpid = lxc_cmd_get_init_pid(name, lxcpath);
205 int ret;
206
6fe93aa1 207 if (initpid < 0)
b98f7d6e 208 return NULL;
fd4f5a56 209
b98f7d6e
SH
210 cgpath = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem);
211 if (!cgpath)
2acf7795 212 return NULL;
fd4f5a56 213
b98f7d6e
SH
214 if (!get_subsys_mount(path, subsystem))
215 return NULL;
216
217 pathp = path + strlen(path);
218 /*
219 * find a mntpt where i have the subsystem mounted, then find
220 * a subset cgpath under that which has pid in it.
221 *
222 * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z,
223 * then look for ourselves in:
224 * /x/y/z/a/b/c/d/tasks
225 * /x/y/z/b/c/d/tasks
226 * /x/y/z/c/d/tasks
227 * /x/y/z/d/tasks
228 * /x/y/z/tasks
229 */
230 cgp = cgpath;
231 while (cgp[0]) {
232 ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "%s/tasks", cgp);
233 if (ret < 0 || ret >= MAXPATHLEN)
234 return NULL;
235 if (!is_in_cgroup(initpid, path)) {
236 // does not exist, try the next one
237 cgp = index(cgp+1, '/');
238 if (!cgp)
239 break;
240 continue;
241 }
242 break;
243 }
244 if (!cgp || !*cgp) {
245 // try just the path
246 ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "/tasks");
247 if (ret < 0 || ret >= MAXPATHLEN)
248 return NULL;
249 if (!is_in_cgroup(initpid, path)) {
250 return NULL;
251 }
252 return strdup("/");
253 }
254 // path still has 'tasks' on the end, drop it
c32981c3 255 if ((p = strrchr(path, '/')) != NULL)
b98f7d6e
SH
256 *p = '\0';
257 return strdup(path);
ae5c8b8e
SH
258}
259
260/*
2acf7795
DE
261 * do_cgroup_set: Write a value into a cgroup file
262 *
263 * @path : absolute path to cgroup file
264 * @value : value to write into file
265 *
266 * Returns 0 on success, < 0 on error.
ae5c8b8e
SH
267 */
268static int do_cgroup_set(const char *path, const char *value)
269{
270 int fd, ret;
271
272 if ((fd = open(path, O_WRONLY)) < 0) {
273 SYSERROR("open %s : %s", path, strerror(errno));
274 return -1;
fd4f5a56
DL
275 }
276
ae5c8b8e
SH
277 if ((ret = write(fd, value, strlen(value))) < 0) {
278 close(fd);
279 SYSERROR("write %s : %s", path, strerror(errno));
280 return ret;
281 }
fd4f5a56 282
ae5c8b8e
SH
283 if ((ret = close(fd)) < 0) {
284 SYSERROR("close %s : %s", path, strerror(errno));
285 return ret;
286 }
287 return 0;
fd4f5a56
DL
288}
289
b98f7d6e
SH
290static bool cgroup_devices_has_deny(struct lxc_handler *h, char *v)
291{
292 char *cgabspath, path[MAXPATHLEN];
293 FILE *f;
294 char *line = NULL;
295 size_t len = 0;
296 bool ret = true;
297
298 // XXX FIXME if users could use something other than 'lxc.devices.deny = a'.
299 // not sure they ever do, but they *could*
300 // right now, I'm assuming they do NOT
301 if (strcmp(v, "a") && strcmp(v, "a *:* rwm"))
302 return false;
303 cgabspath = cgroup_get_subsys_path(h, "devices");
304 if (!cgabspath)
305 return -1;
306
307 ret = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath);
308 if (ret < 0 || ret >= MAXPATHLEN) {
309 ERROR("pathname too long for devices.list");
310 return -1;
311 }
312
6fe93aa1 313 if (!(f = fopen(path, "r")))
b98f7d6e 314 return -1;
b98f7d6e
SH
315
316 while (getline(&line, &len, f) != -1) {
317 size_t len = strlen(line);
318 if (len > 0 && line[len-1] == '\n')
319 line[len-1] = '\0';
320 if (strcmp(line, "a *:* rwm") == 0) {
321 ret = false;
322 goto out;
323 }
324 }
325
326out:
327 fclose(f);
328 if (line)
329 free(line);
330 return ret;
331}
332
333static bool cgroup_devices_has_allow(struct lxc_handler *h, char *v)
334{
335 char *cgabspath, path[MAXPATHLEN];
336 bool ret = false;
337 FILE *f;
338 char *line = NULL;
339 size_t len = 0;
340
341 cgabspath = cgroup_get_subsys_path(h, "devices");
342 if (!cgabspath)
343 return -1;
344
345 ret = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath);
346 if (ret < 0 || ret >= MAXPATHLEN) {
347 ERROR("pathname too long to for devices.list");
348 return -1;
349 }
350
6fe93aa1 351 if (!(f = fopen(path, "r")))
b98f7d6e 352 return -1;
b98f7d6e
SH
353
354 while (getline(&line, &len, f) != -1) {
355 size_t len = strlen(line);
356 if (len > 0 && line[len-1] == '\n')
357 line[len-1] = '\0';
358 if (strcmp(line, v) == 0) {
359 ret = true;
360 goto out;
361 }
362 }
363
364out:
365 if (line)
366 free(line);
367 fclose(f);
368 return ret;
369}
370
ae5c8b8e 371/*
2acf7795
DE
372 * lxc_cgroup_set_bypath: Write a value into a cgroup file
373 *
374 * @cgrelpath : a container's relative cgroup path (e.g. "lxc/c1")
375 * @filename : the cgroup file to write (e.g. "freezer.state")
376 * @value : value to write into file
ae5c8b8e
SH
377 *
378 * Returns 0 on success, < 0 on error.
379 */
b98f7d6e
SH
380int lxc_cgroup_set_value(struct lxc_handler *handler, const char *filename,
381 const char *value)
fd4f5a56 382{
b98f7d6e 383 char *cgabspath, path[MAXPATHLEN], *p;
ae5c8b8e 384 int ret;
c8f7c563 385
b98f7d6e
SH
386 ret = snprintf(path, MAXPATHLEN, "%s", filename);
387 if (ret < 0 || ret >= MAXPATHLEN)
388 return -1;
389 if ((p = index(path, '.')) != NULL)
390 *p = '\0';
391 cgabspath = cgroup_get_subsys_path(handler, path);
2acf7795
DE
392 if (!cgabspath)
393 return -1;
c8f7c563 394
2acf7795 395 ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename);
ae5c8b8e 396 if (ret < 0 || ret >= MAXPATHLEN) {
b98f7d6e
SH
397 ERROR("pathname too long to set cgroup value %s to %s",
398 filename, value);
399 return -1;
ae5c8b8e 400 }
c8f7c563 401
b98f7d6e 402 return do_cgroup_set(path, value);
c8f7c563
CS
403}
404
ae5c8b8e 405/*
2acf7795 406 * lxc_cgroup_set: Write a value into a cgroup file
ae5c8b8e 407 *
2acf7795
DE
408 * @name : name of container to connect to
409 * @filename : the cgroup file to write (e.g. "freezer.state")
410 * @value : value to write into file
411 * @lxcpath : the lxcpath in which the container is running
ae5c8b8e
SH
412 *
413 * Returns 0 on success, < 0 on error.
414 */
ae5c8b8e
SH
415int lxc_cgroup_set(const char *name, const char *filename, const char *value,
416 const char *lxcpath)
c8f7c563 417{
ae5c8b8e 418 int ret;
2acf7795 419 char *cgabspath;
ae5c8b8e 420 char path[MAXPATHLEN];
b98f7d6e
SH
421 char *subsystem = alloca(strlen(filename)+1), *p;
422 strcpy(subsystem, filename);
423
424 if ((p = index(subsystem, '.')) != NULL)
425 *p = '\0';
ae5c8b8e 426
b98f7d6e 427 cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath);
2acf7795
DE
428 if (!cgabspath)
429 return -1;
ae5c8b8e 430
2acf7795 431 ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename);
ae5c8b8e
SH
432 if (ret < 0 || ret >= MAXPATHLEN) {
433 ERROR("pathname too long");
2acf7795
DE
434 ret = -1;
435 goto out;
ae5c8b8e
SH
436 }
437
2acf7795 438 ret = do_cgroup_set(path, value);
fd37327f 439
2acf7795
DE
440out:
441 free(cgabspath);
442 return ret;
c8f7c563
CS
443}
444
ae5c8b8e 445/*
2acf7795 446 * lxc_cgroup_get: Read value from a cgroup file
ae5c8b8e 447 *
2acf7795
DE
448 * @name : name of container to connect to
449 * @filename : the cgroup file to read (e.g. "freezer.state")
450 * @value : a pre-allocated buffer to copy the answer into
451 * @len : the length of pre-allocated @value
452 * @lxcpath : the lxcpath in which the container is running
ae5c8b8e 453 *
2acf7795 454 * Returns the number of bytes read on success, < 0 on error
ae5c8b8e 455 *
2acf7795
DE
456 * If you pass in NULL value or 0 len, the return value will be the size of
457 * the file, and @value will not contain the contents.
ae5c8b8e
SH
458 *
459 * Note that we can't get the file size quickly through stat or lseek.
460 * Therefore if you pass in len > 0 but less than the file size, your only
461 * indication will be that the return value will be equal to the passed-in ret.
462 * We will not return the actual full file size.
463 */
464int lxc_cgroup_get(const char *name, const char *filename, char *value,
465 size_t len, const char *lxcpath)
c8f7c563 466{
2acf7795
DE
467 int fd, ret;
468 char *cgabspath;
ae5c8b8e 469 char path[MAXPATHLEN];
b98f7d6e 470 char *subsystem = alloca(strlen(filename)+1), *p;
460a1cf0 471
b98f7d6e
SH
472 strcpy(subsystem, filename);
473
474 if ((p = index(subsystem, '.')) != NULL)
475 *p = '\0';
476
477 cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath);
2acf7795
DE
478 if (!cgabspath)
479 return -1;
fd4f5a56 480
2acf7795
DE
481 ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename);
482 if (ret < 0 || ret >= MAXPATHLEN) {
9ba8130c 483 ERROR("pathname too long");
2acf7795
DE
484 ret = -1;
485 goto out;
9ba8130c 486 }
fd4f5a56 487
ae5c8b8e 488 fd = open(path, O_RDONLY);
c8f7c563 489 if (fd < 0) {
ae5c8b8e 490 ERROR("open %s : %s", path, strerror(errno));
2acf7795
DE
491 ret = -1;
492 goto out;
fd4f5a56
DL
493 }
494
ae5c8b8e
SH
495 if (!len || !value) {
496 char buf[100];
497 int count = 0;
498 while ((ret = read(fd, buf, 100)) > 0)
499 count += ret;
500 if (ret >= 0)
501 ret = count;
502 } else {
503 memset(value, 0, len);
504 ret = read(fd, value, len);
fd4f5a56
DL
505 }
506
ae5c8b8e
SH
507 if (ret < 0)
508 ERROR("read %s : %s", path, strerror(errno));
509
510 close(fd);
2acf7795
DE
511out:
512 free(cgabspath);
ae5c8b8e 513 return ret;
c8f7c563
CS
514}
515
b98f7d6e 516int lxc_cgroup_nrtasks(struct lxc_handler *handler)
c8f7c563 517{
ae5c8b8e 518 char path[MAXPATHLEN];
2acf7795 519 int pid, ret;
ae5c8b8e 520 FILE *file;
c8f7c563 521
b98f7d6e 522 if (!handler->cgroup)
2acf7795 523 return -1;
c8f7c563 524
b98f7d6e
SH
525 /* XXX Should we use a specific subsystem rather than the first one we
526 * found (handler->cgroup->curcgroup)? */
527 ret = snprintf(path, MAXPATHLEN, "%s/tasks", handler->cgroup->curcgroup);
2acf7795 528 if (ret < 0 || ret >= MAXPATHLEN) {
ae5c8b8e 529 ERROR("pathname too long");
b98f7d6e 530 return -1;
ae5c8b8e 531 }
c8f7c563 532
ae5c8b8e
SH
533 file = fopen(path, "r");
534 if (!file) {
535 SYSERROR("fopen '%s' failed", path);
b98f7d6e 536 return -1;
c8f7c563
CS
537 }
538
2acf7795 539 ret = 0;
ae5c8b8e 540 while (fscanf(file, "%d", &pid) != EOF)
2acf7795 541 ret++;
fd4f5a56 542
ae5c8b8e 543 fclose(file);
2acf7795 544 return ret;
fd4f5a56
DL
545}
546
b98f7d6e
SH
547static int in_subsys_list(const char *s, const char *list)
548{
549 char *token, *str, *saveptr = NULL;
550
551 if (!list || !s)
552 return 0;
553
554 str = alloca(strlen(list)+1);
555 strcpy(str, list);
556 for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) {
557 if (strcmp(s, token) == 0)
558 return 1;
559 }
560
561 return 0;
562}
563
564static void set_clone_children(struct mntent *m)
fc7de561
SH
565{
566 char path[MAXPATHLEN];
567 FILE *fout;
568 int ret;
569
b98f7d6e
SH
570 if (!in_subsys_list("cpuset", m->mnt_opts))
571 return;
572 ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", m->mnt_dir);
fc7de561
SH
573 if (ret < 0 || ret > MAXPATHLEN)
574 return;
575 fout = fopen(path, "w");
576 if (!fout)
577 return;
578 fprintf(fout, "1\n");
579 fclose(fout);
580}
581
b98f7d6e
SH
582static bool have_visited(char *opts, char *visited, char *all_subsystems)
583{
584 char *str, *s = NULL, *token;
585
586 str = alloca(strlen(opts)+1);
587 strcpy(str, opts);
588 for (; (token = strtok_r(str, ",", &s)); str = NULL) {
589 if (!in_subsys_list(token, all_subsystems))
590 continue;
591 if (visited && in_subsys_list(token, visited))
592 return true;
593 }
594
595 return false;
596}
597
598static bool is_in_desclist(struct cgroup_desc *d, char *opts, char *all_subsystems)
599{
600 while (d) {
601 if (have_visited(opts, d->subsystems, all_subsystems))
602 return true;
603 d = d->next;
604 }
605 return false;
606}
607
608static char *record_visited(char *opts, char *all_subsystems)
609{
610 char *s = NULL, *token, *str;
611 int oldlen = 0, newlen, toklen;
612 char *visited = NULL;
613
614 str = alloca(strlen(opts)+1);
615 strcpy(str, opts);
616 for (; (token = strtok_r(str, ",", &s)); str = NULL) {
617 if (!in_subsys_list(token, all_subsystems))
618 continue;
619 toklen = strlen(token);
620 newlen = oldlen + toklen + 1; // ',' + token or token + '\0'
621 visited = realloc(visited, newlen);
622 if (!visited)
623 return (char *)-ENOMEM;
624 if (oldlen)
625 strcat(visited, ",");
626 else
627 *visited = '\0';
628 strcat(visited, token);
629 }
630
631 return visited;
632}
633
634static char *get_all_subsystems(void)
9a93d992
SH
635{
636 FILE *f;
637 char *line = NULL, *ret = NULL;
638 size_t len;
639 int first = 1;
640
641 /* read the list of subsystems from the kernel */
642 f = fopen("/proc/cgroups", "r");
643 if (!f)
644 return NULL;
645
646 while (getline(&line, &len, f) != -1) {
647 char *c;
648 int oldlen, newlen, inc;
649
650 /* skip the first line */
651 if (first) {
652 first=0;
653 continue;
654 }
655
656 c = strchr(line, '\t');
657 if (!c)
658 continue;
659 *c = '\0';
660
661 oldlen = ret ? strlen(ret) : 0;
662 newlen = oldlen + strlen(line) + 2;
663 ret = realloc(ret, newlen);
664 if (!ret)
665 goto out;
666 inc = snprintf(ret + oldlen, newlen, ",%s", line);
667 if (inc < 0 || inc >= newlen) {
668 free(ret);
669 ret = NULL;
670 goto out;
671 }
672 }
673
674out:
fa9ac567
SH
675 if (line)
676 free(line);
9a93d992
SH
677 fclose(f);
678 return ret;
679}
680
b98f7d6e
SH
681/*
682 * /etc/lxc/lxc.conf can contain lxc.cgroup.use = entries.
683 * If any of those are present, then lxc will ONLY consider
684 * cgroup filesystems mounted at one of the listed entries.
685 */
686static char *get_cgroup_uselist()
9a93d992 687{
b98f7d6e
SH
688 FILE *f;
689 char *line = NULL, *ret = NULL;
690 size_t sz = 0, retsz = 0, newsz;
9a93d992 691
b98f7d6e
SH
692 if ((f = fopen(LXC_GLOBAL_CONF, "r")) == NULL)
693 return NULL;
694 while (getline(&line, &sz, f) != -1) {
695 char *p = line;
696 while (*p && isblank(*p))
697 p++;
698 if (strncmp(p, "lxc.cgroup.use", 14) != 0)
699 continue;
700 p = index(p, '=');
701 if (!p)
702 continue;
703 p++;
704 while (*p && isblank(*p))
705 p++;
706 if (strlen(p) < 1)
707 continue;
708 newsz = retsz + strlen(p);
709 if (retsz == 0)
710 newsz += 1; // for trailing \0
711 // the last line in the file could lack \n
712 if (p[strlen(p)-1] != '\n')
713 newsz += 1;
714 ret = realloc(ret, newsz);
715 if (!ret) {
716 ERROR("Out of memory reading cgroup uselist");
717 fclose(f);
718 free(line);
719 return (char *)-ENOMEM;
720 }
721 if (retsz == 0)
722 strcpy(ret, p);
723 else
724 strcat(ret, p);
725 if (p[strlen(p)-1] != '\n')
726 ret[newsz-2] = '\0';
727 ret[newsz-1] = '\0';
728 retsz = newsz;
9a93d992
SH
729 }
730
b98f7d6e
SH
731 if (line)
732 free(line);
733 return ret;
9a93d992
SH
734}
735
b98f7d6e 736static bool is_in_uselist(char *uselist, struct mntent *m)
9a93d992 737{
b98f7d6e
SH
738 char *p;
739 if (!uselist)
740 return true;
741 if (!*uselist)
742 return false;
743 while (*uselist) {
744 p = index(uselist, '\n');
745 if (strncmp(m->mnt_dir, uselist, p - uselist) == 0)
746 return true;
747 uselist = p+1;
9a93d992 748 }
b98f7d6e 749 return false;
9a93d992
SH
750}
751
b98f7d6e 752static bool find_real_cgroup(struct cgroup_desc *d, char *path)
9a93d992 753{
b98f7d6e
SH
754 FILE *f;
755 char *line = NULL, *p, *p2;
756 int ret = 0;
757 size_t len;
9a93d992 758
b98f7d6e
SH
759 if ((f = fopen("/proc/self/cgroup", "r")) == NULL) {
760 SYSERROR("Error opening /proc/self/cgroups");
761 return false;
762 }
763
764 // If there is no subsystem, ignore the mount. Note we may want
765 // to change this, so that unprivileged users can use a unbound
766 // cgroup mount to arrange their container tasks.
767 if (!d->subsystems) {
768 fclose(f);
769 return false;
770 }
771 while (getline(&line, &len, f) != -1) {
772 if (!(p = index(line, ':')))
9a93d992 773 continue;
b98f7d6e 774 if (!(p2 = index(++p, ':')))
9a93d992 775 continue;
b98f7d6e
SH
776 *p2 = '\0';
777 // in case of multiple mounts it may be more correct to
778 // insist all subsystems be the same
779 if (in_subsys_list(p, d->subsystems))
780 goto found;
781 }
9a93d992 782
b98f7d6e
SH
783 if (line)
784 free(line);
785 fclose(f);
786 return false;;
787
788found:
789 fclose(f);
790 ret = snprintf(path, MAXPATHLEN, "%s", p2+1);
791 if (ret < 0 || ret >= MAXPATHLEN) {
792 free(line);
793 return false;
794 }
795 free(line);
796 return true;
9a93d992
SH
797}
798
b98f7d6e 799
ae5c8b8e 800/*
b98f7d6e
SH
801 * for a given cgroup mount entry, and a to-be-created container,
802 * 1. Figure out full path of the cgroup we are currently in,
803 * 2. Find a new free cgroup which is $path / $lxc_name with an
804 * optional '-$n' where n is an ever-increasing integer.
ae5c8b8e 805 */
b98f7d6e 806static char *find_free_cgroup(struct cgroup_desc *d, const char *lxc_name)
460a1cf0 807{
b98f7d6e
SH
808 char tail[20], cgpath[MAXPATHLEN], *cgp, path[MAXPATHLEN];
809 int i = 0, ret;
810 size_t l;
fd37327f 811
b98f7d6e
SH
812 if (!find_real_cgroup(d, cgpath)) {
813 ERROR("Failed to find current cgroup");
814 return NULL;
460a1cf0
DW
815 }
816
b98f7d6e
SH
817 /*
818 * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z,
819 * then look for ourselves in:
820 * /x/y/z/a/b/c/d/tasks
821 * /x/y/z/b/c/d/tasks
822 * /x/y/z/c/d/tasks
823 * /x/y/z/d/tasks
824 * /x/y/z/tasks
825 */
826 cgp = cgpath;
827 while (cgp[0]) {
828 ret = snprintf(path, MAXPATHLEN, "%s%s/tasks", d->mntpt, cgp);
ae5c8b8e 829 if (ret < 0 || ret >= MAXPATHLEN)
b98f7d6e
SH
830 return NULL;
831 if (!is_in_cgroup(getpid(), path)) {
832 // does not exist, try the next one
833 cgp = index(cgp+1, '/');
834 if (!cgp)
835 break;
836 continue;
c8f7c563 837 }
b98f7d6e
SH
838 break;
839 }
840 if (!cgp || !*cgp) {
841 // try just the path
842 ret = snprintf(path, MAXPATHLEN, "%s/tasks", d->mntpt);
843 if (ret < 0 || ret >= MAXPATHLEN)
844 return NULL;
845 if (!is_in_cgroup(getpid(), path))
846 return NULL;
847 }
848 // found it
849 // path has '/tasks' at end, drop that
c32981c3 850 if (!(cgp = strrchr(path, '/'))) {
b98f7d6e
SH
851 ERROR("Got nonsensical path name %s\n", path);
852 return NULL;
853 }
854 *cgp = '\0';
c8f7c563 855
b98f7d6e
SH
856 if (strlen(path) + strlen(lxc_name) + 20 > MAXPATHLEN) {
857 ERROR("Error: cgroup path too long");
858 return NULL;
859 }
860 tail[0] = '\0';
861 while (1) {
862 struct stat sb;
863 int freebytes = MAXPATHLEN - (cgp - path);
864
865 if (i) {
866 ret = snprintf(tail, 20, "-%d", i);
867 if (ret < 0 || ret >= 20)
868 return NULL;
869 }
870 ret = snprintf(cgp, freebytes, "/%s%s", lxc_name, tail);
871 if (ret < 0 || ret >= freebytes)
872 return NULL;
873 if (stat(path, &sb) == -1)
874 break;
875 i++;
c8f7c563
CS
876 }
877
b98f7d6e
SH
878 l = strlen(cgpath);
879 ret = snprintf(cgpath + l, MAXPATHLEN - l, "/%s%s", lxc_name, tail);
880 if (ret < 0 || ret >= (MAXPATHLEN - l)) {
881 ERROR("Out of memory");
882 return NULL;
883 }
884 if ((d->realcgroup = strdup(cgpath)) == NULL) {
885 ERROR("Out of memory");
886 return NULL;
887 }
888 l = strlen(d->realcgroup);
889 if (l > 0 && d->realcgroup[l-1] == '\n')
890 d->realcgroup[l-1] = '\0';
891 return strdup(path);
c8f7c563
CS
892}
893
d08ba6ec 894/*
ae5c8b8e
SH
895 * For a new container, find a cgroup path which is unique in all cgroup mounts.
896 * I.e. if r1 is already running, then /lxc/r1-1 may be used.
897 *
898 * @lxcgroup: the cgroup 'group' the contaienr should run in. By default, this
899 * is just 'lxc'. Admins may wish to group some containers into other groups,
900 * i.e. 'build', to take advantage of cgroup hierarchy to simplify group
901 * administration. Also, unprivileged users who are placed into a cgroup by
902 * libcgroup_pam will be using that cgroup rather than the system-wide 'lxc'
903 * group.
904 * @name: the name of the container
905 *
906 * The chosen cgpath is returned as a strdup'd string. The caller will have to
907 * free that eventually, however the lxc monitor will keep that string so as to
908 * return it in response to a LXC_COMMAND_CGROUP query.
909 *
23622a2a
SH
910 * Note the path is relative to cgroup mounts. I.e. if the freezer subsystem
911 * is at /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the
912 * freezer cgroup's full path will be /sys/fs/cgroup/freezer/lxc/r1/.
ae5c8b8e 913 *
ae5c8b8e 914 * Races won't be determintal, you'll just end up with leftover unused cgroups
d08ba6ec 915 */
b98f7d6e 916struct cgroup_desc *lxc_cgroup_path_create(const char *name)
d08ba6ec 917{
b98f7d6e 918 struct cgroup_desc *retdesc = NULL, *newdesc = NULL;
ae5c8b8e 919 FILE *file = NULL;
93d564ed 920 struct mntent mntent_r;
fd37327f 921 char buf[LARGE_MAXPATHLEN] = {0};
b98f7d6e
SH
922 char *all_subsystems = get_all_subsystems();
923 char *cgroup_uselist = get_cgroup_uselist();
d08ba6ec 924
b98f7d6e
SH
925 if (cgroup_uselist == (char *)-ENOMEM) {
926 if (all_subsystems)
927 free(all_subsystems);
928 return NULL;
929 }
930 if (!all_subsystems) {
931 ERROR("failed to get a list of all cgroup subsystems");
932 if (cgroup_uselist)
933 free(cgroup_uselist);
9a93d992 934 return NULL;
9a93d992 935 }
ae5c8b8e
SH
936 file = setmntent(MTAB, "r");
937 if (!file) {
938 SYSERROR("failed to open %s", MTAB);
b98f7d6e
SH
939 free(all_subsystems);
940 if (cgroup_uselist)
941 free(cgroup_uselist);
942 return NULL;
d08ba6ec 943 }
1ac470c0 944
93d564ed 945 while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) {
fd4f5a56 946
fd37327f 947 if (strcmp(mntent_r.mnt_type, "cgroup"))
ae5c8b8e 948 continue;
b98f7d6e
SH
949
950 if (cgroup_uselist && !is_in_uselist(cgroup_uselist, &mntent_r))
ae5c8b8e 951 continue;
e4659536 952
9a93d992 953 /* make sure we haven't checked this subsystem already */
b98f7d6e 954 if (is_in_desclist(retdesc, mntent_r.mnt_opts, all_subsystems))
9a93d992 955 continue;
9a93d992 956
b98f7d6e
SH
957 if (!(newdesc = malloc(sizeof(struct cgroup_desc)))) {
958 ERROR("Out of memory reading cgroups");
ae5c8b8e 959 goto fail;
b98f7d6e
SH
960 }
961 newdesc->subsystems = record_visited(mntent_r.mnt_opts, all_subsystems);
962 if (newdesc->subsystems == (char *)-ENOMEM) {
963 ERROR("Out of memory recording cgroup subsystems");
964 free(newdesc);
965 newdesc = NULL;
966 goto fail;
967 }
968 if (!newdesc->subsystems) {
969 free(newdesc);
970 newdesc = NULL;
971 continue;
972 }
973 newdesc->mntpt = strdup(mntent_r.mnt_dir);
974 newdesc->realcgroup = NULL;
975 newdesc->curcgroup = find_free_cgroup(newdesc, name);
976 if (!newdesc->mntpt || !newdesc->curcgroup) {
977 ERROR("Out of memory reading cgroups");
978 goto fail;
979 }
5193cc3d 980
b98f7d6e 981 set_clone_children(&mntent_r);
fd4f5a56 982
b98f7d6e
SH
983 if (mkdir(newdesc->curcgroup, 0755)) {
984 ERROR("Error creating cgroup %s", newdesc->curcgroup);
ae5c8b8e 985 goto fail;
e7f40d8a 986 }
b98f7d6e
SH
987 newdesc->next = retdesc;
988 retdesc = newdesc;
d08ba6ec
SH
989 }
990
ae5c8b8e 991 endmntent(file);
b98f7d6e
SH
992 free(all_subsystems);
993 if (cgroup_uselist)
994 free(cgroup_uselist);
995 return retdesc;
ae5c8b8e
SH
996
997fail:
998 endmntent(file);
b98f7d6e
SH
999 free(all_subsystems);
1000 if (cgroup_uselist)
1001 free(cgroup_uselist);
1002 if (newdesc) {
1003 if (newdesc->mntpt)
1004 free(newdesc->mntpt);
1005 if (newdesc->subsystems)
1006 free(newdesc->subsystems);
1007 if (newdesc->curcgroup)
1008 free(newdesc->curcgroup);
1009 if (newdesc->realcgroup)
1010 free(newdesc->realcgroup);
1011 free(newdesc);
1012 }
1013 while (retdesc) {
1014 struct cgroup_desc *t = retdesc;
1015 retdesc = retdesc->next;
1016 if (t->mntpt)
1017 free(t->mntpt);
1018 if (t->subsystems)
1019 free(t->subsystems);
1020 if (t->curcgroup)
1021 free(t->curcgroup);
1022 if (t->realcgroup)
1023 free(t->realcgroup);
1024 free(t);
1025
1026 }
ae5c8b8e 1027 return NULL;
36b86299
DL
1028}
1029
b98f7d6e 1030static bool lxc_cgroup_enter_one(const char *dir, int pid)
36b86299 1031{
23622a2a 1032 char path[MAXPATHLEN];
b98f7d6e
SH
1033 int ret;
1034 FILE *fout;
ef342abb 1035
b98f7d6e
SH
1036 ret = snprintf(path, MAXPATHLEN, "%s/tasks", dir);
1037 if (ret < 0 || ret >= MAXPATHLEN) {
1038 ERROR("Error entering cgroup");
1039 return false;
ef342abb 1040 }
b98f7d6e
SH
1041 fout = fopen(path, "w");
1042 if (!fout) {
1043 SYSERROR("Error entering cgroup");
1044 return false;
1045 }
1046 if (fprintf(fout, "%d\n", (int)pid) < 0) {
1047 ERROR("Error writing pid to %s to enter cgroup", path);
1048 fclose(fout);
1049 return false;
1050 }
1051 if (fclose(fout) < 0) {
1052 SYSERROR("Error writing pid to %s to enter cgroup", path);
1053 return false;
ae5c8b8e 1054 }
f0e64b8b 1055
b98f7d6e
SH
1056 return true;
1057}
1058
1059int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid)
1060{
1061 while (cgroups) {
1062 if (!cgroups->subsystems)
1063 goto next;
1064
1065 if (!lxc_cgroup_enter_one(cgroups->curcgroup, pid))
1066 return -1;
1067next:
1068 cgroups = cgroups->next;
1069 }
1070 return 0;
bcbd102c
SH
1071}
1072
60bf62d4 1073static int cgroup_rmdir(char *dirname)
341a9bd8
SH
1074{
1075 struct dirent dirent, *direntp;
341a9bd8
SH
1076 DIR *dir;
1077 int ret;
1078 char pathname[MAXPATHLEN];
1079
1080 dir = opendir(dirname);
1081 if (!dir) {
1082 WARN("failed to open directory: %m");
1083 return -1;
1084 }
1085
341a9bd8
SH
1086 while (!readdir_r(dir, &dirent, &direntp)) {
1087 struct stat mystat;
9ba8130c 1088 int rc;
341a9bd8
SH
1089
1090 if (!direntp)
1091 break;
1092
1093 if (!strcmp(direntp->d_name, ".") ||
b98f7d6e 1094 !strcmp(direntp->d_name, ".."))
341a9bd8
SH
1095 continue;
1096
9ba8130c
SH
1097 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
1098 if (rc < 0 || rc >= MAXPATHLEN) {
1099 ERROR("pathname too long");
1100 continue;
1101 }
341a9bd8
SH
1102 ret = stat(pathname, &mystat);
1103 if (ret)
1104 continue;
1105 if (S_ISDIR(mystat.st_mode))
60bf62d4 1106 cgroup_rmdir(pathname);
341a9bd8
SH
1107 }
1108
1109 ret = rmdir(dirname);
1110
1111 if (closedir(dir))
1112 ERROR("failed to close directory");
1113 return ret;
341a9bd8 1114}
bcbd102c 1115
bcbd102c
SH
1116/*
1117 * for each mounted cgroup, destroy the cgroup for the container
1118 */
b98f7d6e 1119void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups)
a6ddef61 1120{
b98f7d6e
SH
1121 while (cgroups) {
1122 struct cgroup_desc *next = cgroups->next;
1123 if (cgroup_rmdir(cgroups->curcgroup) < 0)
1124 SYSERROR("Error removing cgroup directory %s", cgroups->curcgroup);
1125 free(cgroups->mntpt);
1126 free(cgroups->subsystems);
1127 free(cgroups->curcgroup);
1128 free(cgroups->realcgroup);
1129 free(cgroups);
1130 cgroups = next;
bcbd102c 1131 }
a6ddef61
MN
1132}
1133
ae5c8b8e 1134int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath)
576f946d 1135{
b98f7d6e
SH
1136 FILE *f;
1137 char *line = NULL, ret = -1;
1138 size_t len = 0;
1139 int first = 1;
ef6e34ee 1140 char *dirpath;
576f946d 1141
b98f7d6e
SH
1142 /* read the list of subsystems from the kernel */
1143 f = fopen("/proc/cgroups", "r");
1144 if (!f)
1145 return ret;
1146
1147 while (getline(&line, &len, f) != -1) {
1148 char *c;
1149
1150 /* skip the first line */
1151 if (first) {
1152 first=0;
1153 continue;
1154 }
1155
1156 c = strchr(line, '\t');
1157 if (!c)
1158 continue;
1159 *c = '\0';
1160 dirpath = lxc_cgroup_path_get(line, name, lxcpath);
1161 if (!dirpath)
1162 continue;
1163
1164 INFO("joining pid %d to cgroup %s", pid, dirpath);
1165 if (lxc_cgroup_enter_one(dirpath, pid)) {
1166 ERROR("Failed joining %d to %s\n", pid, dirpath);
1167 continue;
1168 }
8b92dc3a 1169 }
576f946d 1170
b98f7d6e
SH
1171 if (line)
1172 free(line);
1173 fclose(f);
ef6e34ee 1174 return ret;
e0f888d9 1175}
283678ed 1176
b98f7d6e 1177bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d)
283678ed
SH
1178{
1179 char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN];
1180 FILE *f;
1181 int ret, junk;
b98f7d6e 1182 size_t sz = 0, l1, l2;
283678ed
SH
1183 char *end = index(subsystem, '.');
1184 int len = end ? (end - subsystem) : strlen(subsystem);
b98f7d6e
SH
1185 const char *cgpath = NULL;
1186
1187 while (d) {
1188 if (in_subsys_list("devices", d->subsystems)) {
1189 cgpath = d->realcgroup;
1190 l1 = strlen(cgpath);
1191 break;
1192 }
1193 d = d->next;
1194 }
1195 if (!d)
1196 return false;
283678ed
SH
1197
1198 ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid);
1199 if (ret < 0 || ret >= MAXPATHLEN)
1200 return false;
1201 if ((f = fopen(filepath, "r")) == NULL)
1202 return false;
1203 while (getline(&line, &sz, f) != -1) {
1204 // nr:subsystem:path
1205 v2[0] = v2[1] = '\0';
1206 ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2);
1207 if (ret != 3) {
1208 fclose(f);
b98f7d6e 1209 free(line);
283678ed
SH
1210 return false;
1211 }
1212 len = end ? end - subsystem : strlen(subsystem);
1213 if (strncmp(v1, subsystem, len) != 0)
1214 continue;
1215 // v2 will start with '/', skip it by using v2+1
1216 // we must be in SUBcgroup, so make sure l2 > l1
1217 l2 = strlen(v2+1);
1218 if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) {
1219 fclose(f);
b98f7d6e 1220 free(line);
283678ed
SH
1221 return true;
1222 }
1223 }
1224 fclose(f);
b98f7d6e
SH
1225 if (line)
1226 free(line);
283678ed
SH
1227 return false;
1228}
b113383b 1229
b98f7d6e 1230char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys)
b113383b 1231{
b98f7d6e 1232 struct cgroup_desc *d;
b113383b 1233
b98f7d6e
SH
1234 for (d = handler->cgroup; d; d = d->next) {
1235 if (in_subsys_list(subsys, d->subsystems))
1236 return d->realcgroup;
1237 }
b113383b 1238
b98f7d6e
SH
1239 return NULL;
1240}
1241
1242static int _setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups,
1243 int devices)
1244{
1245 struct lxc_list *iterator;
1246 struct lxc_cgroup *cg;
1247 int ret = -1;
1248
1249 if (lxc_list_empty(cgroups))
1250 return 0;
1251
1252 lxc_list_for_each(iterator, cgroups) {
1253 cg = iterator->elem;
1254
1255 if (devices == !strncmp("devices", cg->subsystem, 7)) {
1256 if (strcmp(cg->subsystem, "devices.deny") == 0 &&
1257 cgroup_devices_has_deny(h, cg->value))
1258 continue;
1259 if (strcmp(cg->subsystem, "devices.allow") == 0 &&
1260 cgroup_devices_has_allow(h, cg->value))
1261 continue;
1262 if (lxc_cgroup_set_value(h, cg->subsystem, cg->value)) {
1263 ERROR("Error setting %s to %s for %s\n",
1264 cg->subsystem, cg->value, h->name);
1265 goto out;
1266 }
b113383b 1267 }
b98f7d6e
SH
1268
1269 DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value);
b113383b
SH
1270 }
1271
b98f7d6e
SH
1272 ret = 0;
1273 INFO("cgroup has been setup");
1274out:
b113383b
SH
1275 return ret;
1276}
b98f7d6e
SH
1277
1278int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups)
1279{
1280 return _setup_cgroup(h, cgroups, 1);
1281}
1282
1283int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups)
1284{
1285 return _setup_cgroup(h, cgroups, 0);
1286}