]> git.proxmox.com Git - mirror_iproute2.git/blob - ip/ipvrf.c
Drop capabilities if not running ip exec vrf with libcap
[mirror_iproute2.git] / ip / ipvrf.c
1 /*
2 * ipvrf.c "ip vrf"
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: David Ahern <dsa@cumulusnetworks.com>
10 *
11 */
12
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <sys/socket.h>
16 #include <sys/mount.h>
17 #include <linux/bpf.h>
18 #include <linux/if.h>
19 #include <fcntl.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <string.h>
24 #include <dirent.h>
25 #include <errno.h>
26 #include <limits.h>
27
28 #include "rt_names.h"
29 #include "utils.h"
30 #include "ip_common.h"
31 #include "bpf_util.h"
32
33 #define CGRP_PROC_FILE "/cgroup.procs"
34
35 static struct link_filter vrf_filter;
36
37 static void usage(void)
38 {
39 fprintf(stderr, "Usage: ip vrf show [NAME] ...\n");
40 fprintf(stderr, " ip vrf exec [NAME] cmd ...\n");
41 fprintf(stderr, " ip vrf identify [PID]\n");
42 fprintf(stderr, " ip vrf pids [NAME]\n");
43
44 exit(-1);
45 }
46
47 /*
48 * parse process based cgroup file looking for PATH/vrf/NAME where
49 * NAME is the name of the vrf the process is associated with
50 */
51 static int vrf_identify(pid_t pid, char *name, size_t len)
52 {
53 char path[PATH_MAX];
54 char buf[4096];
55 char *vrf, *end;
56 FILE *fp;
57
58 snprintf(path, sizeof(path), "/proc/%d/cgroup", pid);
59 fp = fopen(path, "r");
60 if (!fp)
61 return -1;
62
63 memset(name, 0, len);
64
65 while (fgets(buf, sizeof(buf), fp)) {
66 /* want the controller-less cgroup */
67 if (strstr(buf, "::/") == NULL)
68 continue;
69
70 vrf = strstr(buf, "/vrf/");
71 if (vrf) {
72 vrf += 5; /* skip past "/vrf/" */
73 end = strchr(vrf, '\n');
74 if (end)
75 *end = '\0';
76
77 strlcpy(name, vrf, len);
78 break;
79 }
80 }
81
82 fclose(fp);
83
84 return 0;
85 }
86
87 static int ipvrf_identify(int argc, char **argv)
88 {
89 char vrf[32];
90 int rc;
91 unsigned int pid;
92
93 if (argc < 1)
94 pid = getpid();
95 else if (argc > 1)
96 invarg("Extra arguments specified\n", argv[1]);
97 else if (get_unsigned(&pid, argv[0], 10))
98 invarg("Invalid pid\n", argv[0]);
99
100 rc = vrf_identify(pid, vrf, sizeof(vrf));
101 if (!rc) {
102 if (vrf[0] != '\0')
103 printf("%s\n", vrf);
104 } else {
105 fprintf(stderr, "Failed to lookup vrf association: %s\n",
106 strerror(errno));
107 }
108
109 return rc;
110 }
111
112 /* read PATH/vrf/NAME/cgroup.procs file */
113 static void read_cgroup_pids(const char *base_path, char *name)
114 {
115 char path[PATH_MAX];
116 char buf[4096];
117 FILE *fp;
118
119 if (snprintf(path, sizeof(path), "%s/vrf/%s%s",
120 base_path, name, CGRP_PROC_FILE) >= sizeof(path))
121 return;
122
123 fp = fopen(path, "r");
124 if (!fp)
125 return; /* no cgroup file, nothing to show */
126
127 /* dump contents (pids) of cgroup.procs */
128 while (fgets(buf, sizeof(buf), fp)) {
129 char *nl, comm[32];
130
131 nl = strchr(buf, '\n');
132 if (nl)
133 *nl = '\0';
134
135 if (get_command_name(buf, comm, sizeof(comm)))
136 strcpy(comm, "<terminated?>");
137
138 printf("%5s %s\n", buf, comm);
139 }
140
141 fclose(fp);
142 }
143
144 /* recurse path looking for PATH[/NETNS]/vrf/NAME */
145 static int recurse_dir(char *base_path, char *name, const char *netns)
146 {
147 char path[PATH_MAX];
148 struct dirent *de;
149 struct stat fstat;
150 int rc;
151 DIR *d;
152
153 d = opendir(base_path);
154 if (!d)
155 return -1;
156
157 while ((de = readdir(d)) != NULL) {
158 if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
159 continue;
160
161 if (!strcmp(de->d_name, "vrf")) {
162 const char *pdir = strrchr(base_path, '/');
163
164 /* found a 'vrf' directory. if it is for the given
165 * namespace then dump the cgroup pids
166 */
167 if (*netns == '\0' ||
168 (pdir && !strcmp(pdir+1, netns)))
169 read_cgroup_pids(base_path, name);
170
171 continue;
172 }
173
174 /* is this a subdir that needs to be walked */
175 if (snprintf(path, sizeof(path), "%s/%s",
176 base_path, de->d_name) >= sizeof(path))
177 continue;
178
179 if (lstat(path, &fstat) < 0)
180 continue;
181
182 if (S_ISDIR(fstat.st_mode)) {
183 rc = recurse_dir(path, name, netns);
184 if (rc != 0)
185 goto out;
186 }
187 }
188
189 rc = 0;
190 out:
191 closedir(d);
192
193 return rc;
194 }
195
196 static int ipvrf_get_netns(char *netns, int len)
197 {
198 if (netns_identify_pid("self", netns, len-3)) {
199 fprintf(stderr, "Failed to get name of network namespace: %s\n",
200 strerror(errno));
201 return -1;
202 }
203
204 if (*netns != '\0')
205 strcat(netns, "-ns");
206
207 return 0;
208 }
209
210 static int ipvrf_pids(int argc, char **argv)
211 {
212 char *mnt, *vrf;
213 char netns[256];
214 int ret = -1;
215
216 if (argc != 1) {
217 fprintf(stderr, "Invalid arguments\n");
218 return -1;
219 }
220
221 vrf = argv[0];
222 if (!name_is_vrf(vrf)) {
223 fprintf(stderr, "Invalid VRF name\n");
224 return -1;
225 }
226
227 mnt = find_cgroup2_mount();
228 if (!mnt)
229 return -1;
230
231 if (ipvrf_get_netns(netns, sizeof(netns)) < 0)
232 goto out;
233
234 ret = recurse_dir(mnt, vrf, netns);
235
236 out:
237 free(mnt);
238
239 return ret;
240 }
241
242 /* load BPF program to set sk_bound_dev_if for sockets */
243 static char bpf_log_buf[256*1024];
244
245 static int prog_load(int idx)
246 {
247 struct bpf_insn prog[] = {
248 BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
249 BPF_MOV64_IMM(BPF_REG_3, idx),
250 BPF_MOV64_IMM(BPF_REG_2,
251 offsetof(struct bpf_sock, bound_dev_if)),
252 BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
253 offsetof(struct bpf_sock, bound_dev_if)),
254 BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
255 BPF_EXIT_INSN(),
256 };
257
258 return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
259 "GPL", bpf_log_buf, sizeof(bpf_log_buf));
260 }
261
262 static int vrf_configure_cgroup(const char *path, int ifindex)
263 {
264 int rc = -1, cg_fd, prog_fd = -1;
265
266 cg_fd = open(path, O_DIRECTORY | O_RDONLY);
267 if (cg_fd < 0) {
268 fprintf(stderr,
269 "Failed to open cgroup path: '%s'\n",
270 strerror(errno));
271 goto out;
272 }
273
274 /*
275 * Load bpf program into kernel and attach to cgroup to affect
276 * socket creates
277 */
278 prog_fd = prog_load(ifindex);
279 if (prog_fd < 0) {
280 fprintf(stderr, "Failed to load BPF prog: '%s'\n",
281 strerror(errno));
282
283 if (errno != EPERM) {
284 fprintf(stderr,
285 "Kernel compiled with CGROUP_BPF enabled?\n");
286 }
287 goto out;
288 }
289
290 if (bpf_prog_attach_fd(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) {
291 fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n",
292 strerror(errno));
293 goto out;
294 }
295
296 rc = 0;
297 out:
298 close(cg_fd);
299 close(prog_fd);
300
301 return rc;
302 }
303
304 /* get base path for controller-less cgroup for a process.
305 * path returned does not include /vrf/NAME if it exists
306 */
307 static int vrf_path(char *vpath, size_t len)
308 {
309 char path[PATH_MAX];
310 char buf[4096];
311 char *vrf;
312 FILE *fp;
313
314 snprintf(path, sizeof(path), "/proc/%d/cgroup", getpid());
315 fp = fopen(path, "r");
316 if (!fp)
317 return -1;
318
319 vpath[0] = '\0';
320
321 while (fgets(buf, sizeof(buf), fp)) {
322 char *start, *nl;
323
324 start = strstr(buf, "::/");
325 if (!start)
326 continue;
327
328 /* advance past '::' */
329 start += 2;
330
331 nl = strchr(start, '\n');
332 if (nl)
333 *nl = '\0';
334
335 vrf = strstr(start, "/vrf");
336 if (vrf)
337 *vrf = '\0';
338
339 strlcpy(vpath, start, len);
340
341 /* if vrf path is just / then return nothing */
342 if (!strcmp(vpath, "/"))
343 vpath[0] = '\0';
344
345 break;
346 }
347
348 fclose(fp);
349
350 return 0;
351 }
352
353 static int vrf_switch(const char *name)
354 {
355 char path[PATH_MAX], *mnt, pid[16];
356 char vpath[PATH_MAX], netns[256];
357 int ifindex = 0;
358 int rc = -1, len, fd = -1;
359
360 if (strcmp(name, "default")) {
361 ifindex = name_is_vrf(name);
362 if (!ifindex) {
363 fprintf(stderr, "Invalid VRF name\n");
364 return -1;
365 }
366 }
367
368 mnt = find_cgroup2_mount();
369 if (!mnt)
370 return -1;
371
372 /* -1 on length to add '/' to the end */
373 if (ipvrf_get_netns(netns, sizeof(netns) - 1) < 0)
374 goto out;
375
376 if (vrf_path(vpath, sizeof(vpath)) < 0) {
377 fprintf(stderr, "Failed to get base cgroup path: %s\n",
378 strerror(errno));
379 goto out;
380 }
381
382 /* if path already ends in netns then don't add it again */
383 if (*netns != '\0') {
384 char *pdir = strrchr(vpath, '/');
385
386 if (!pdir)
387 pdir = vpath;
388 else
389 pdir++;
390
391 if (strcmp(pdir, netns) == 0)
392 *pdir = '\0';
393
394 strcat(netns, "/");
395 }
396
397 /* path to cgroup; make sure buffer has room to cat "/cgroup.procs"
398 * to the end of the path
399 */
400 len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE),
401 "%s%s/%svrf/%s",
402 mnt, vpath, netns, ifindex ? name : "");
403 if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) {
404 fprintf(stderr, "Invalid path to cgroup2 mount\n");
405 goto out;
406 }
407
408 if (make_path(path, 0755)) {
409 fprintf(stderr, "Failed to setup vrf cgroup2 directory\n");
410 goto out;
411 }
412
413 if (ifindex && vrf_configure_cgroup(path, ifindex))
414 goto out;
415
416 /*
417 * write pid to cgroup.procs making process part of cgroup
418 */
419 strcat(path, CGRP_PROC_FILE);
420 fd = open(path, O_RDWR | O_APPEND);
421 if (fd < 0) {
422 fprintf(stderr, "Failed to open cgroups.procs file: %s.\n",
423 strerror(errno));
424 goto out;
425 }
426
427 snprintf(pid, sizeof(pid), "%d", getpid());
428 if (write(fd, pid, strlen(pid)) < 0) {
429 fprintf(stderr, "Failed to join cgroup\n");
430 goto out2;
431 }
432
433 rc = 0;
434 out2:
435 close(fd);
436 out:
437 free(mnt);
438
439 drop_cap();
440
441 return rc;
442 }
443
444 static int ipvrf_exec(int argc, char **argv)
445 {
446 if (argc < 1) {
447 fprintf(stderr, "No VRF name specified\n");
448 return -1;
449 }
450 if (argc < 2) {
451 fprintf(stderr, "No command specified\n");
452 return -1;
453 }
454
455 if (vrf_switch(argv[0]))
456 return -1;
457
458 return -cmd_exec(argv[1], argv + 1, !!batch_mode);
459 }
460
461 /* reset VRF association of current process to default VRF;
462 * used by netns_exec
463 */
464 void vrf_reset(void)
465 {
466 char vrf[32];
467
468 if (vrf_identify(getpid(), vrf, sizeof(vrf)) ||
469 (vrf[0] == '\0'))
470 return;
471
472 vrf_switch("default");
473 }
474
475 static int ipvrf_filter_req(struct nlmsghdr *nlh, int reqlen)
476 {
477 struct rtattr *linkinfo;
478 int err;
479
480 if (vrf_filter.kind) {
481 linkinfo = addattr_nest(nlh, reqlen, IFLA_LINKINFO);
482
483 err = addattr_l(nlh, reqlen, IFLA_INFO_KIND, vrf_filter.kind,
484 strlen(vrf_filter.kind));
485 if (err)
486 return err;
487
488 addattr_nest_end(nlh, linkinfo);
489 }
490
491 return 0;
492 }
493
494 /* input arg is linkinfo */
495 static __u32 vrf_table_linkinfo(struct rtattr *li[])
496 {
497 struct rtattr *attr[IFLA_VRF_MAX + 1];
498
499 if (li[IFLA_INFO_DATA]) {
500 parse_rtattr_nested(attr, IFLA_VRF_MAX, li[IFLA_INFO_DATA]);
501
502 if (attr[IFLA_VRF_TABLE])
503 return rta_getattr_u32(attr[IFLA_VRF_TABLE]);
504 }
505
506 return 0;
507 }
508
509 static int ipvrf_print(struct nlmsghdr *n)
510 {
511 struct ifinfomsg *ifi = NLMSG_DATA(n);
512 struct rtattr *tb[IFLA_MAX+1];
513 struct rtattr *li[IFLA_INFO_MAX+1];
514 int len = n->nlmsg_len;
515 const char *name;
516 __u32 tb_id;
517
518 len -= NLMSG_LENGTH(sizeof(*ifi));
519 if (len < 0)
520 return 0;
521
522 if (vrf_filter.ifindex && vrf_filter.ifindex != ifi->ifi_index)
523 return 0;
524
525 parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
526
527 /* kernel does not support filter by master device */
528 if (tb[IFLA_MASTER]) {
529 int master = *(int *)RTA_DATA(tb[IFLA_MASTER]);
530
531 if (vrf_filter.master && master != vrf_filter.master)
532 return 0;
533 }
534
535 if (!tb[IFLA_IFNAME]) {
536 fprintf(stderr,
537 "BUG: device with ifindex %d has nil ifname\n",
538 ifi->ifi_index);
539 return 0;
540 }
541 name = rta_getattr_str(tb[IFLA_IFNAME]);
542
543 /* missing LINKINFO means not VRF. e.g., kernel does not
544 * support filtering on kind, so userspace needs to handle
545 */
546 if (!tb[IFLA_LINKINFO])
547 return 0;
548
549 parse_rtattr_nested(li, IFLA_INFO_MAX, tb[IFLA_LINKINFO]);
550
551 if (!li[IFLA_INFO_KIND])
552 return 0;
553
554 if (strcmp(RTA_DATA(li[IFLA_INFO_KIND]), "vrf"))
555 return 0;
556
557 tb_id = vrf_table_linkinfo(li);
558 if (!tb_id) {
559 fprintf(stderr,
560 "BUG: VRF %s is missing table id\n", name);
561 return 0;
562 }
563
564 printf("%-16s %5u", name, tb_id);
565
566 printf("\n");
567 return 1;
568 }
569
570 static int ipvrf_show(int argc, char **argv)
571 {
572 struct nlmsg_chain linfo = { NULL, NULL};
573 int rc = 0;
574
575 vrf_filter.kind = "vrf";
576
577 if (argc > 1)
578 usage();
579
580 if (argc == 1) {
581 __u32 tb_id;
582
583 tb_id = ipvrf_get_table(argv[0]);
584 if (!tb_id) {
585 fprintf(stderr, "Invalid VRF\n");
586 return 1;
587 }
588 printf("%s %u\n", argv[0], tb_id);
589 return 0;
590 }
591
592 if (ip_linkaddr_list(0, ipvrf_filter_req, &linfo, NULL) == 0) {
593 struct nlmsg_list *l;
594 unsigned nvrf = 0;
595 int n;
596
597 n = printf("%-16s %5s\n", "Name", "Table");
598 printf("%.*s\n", n-1, "-----------------------");
599 for (l = linfo.head; l; l = l->next)
600 nvrf += ipvrf_print(&l->h);
601
602 if (!nvrf)
603 printf("No VRF has been configured\n");
604 } else
605 rc = 1;
606
607 free_nlmsg_chain(&linfo);
608
609 return rc;
610 }
611
612 int do_ipvrf(int argc, char **argv)
613 {
614 if (argc == 0)
615 return ipvrf_show(0, NULL);
616
617 if (matches(*argv, "identify") == 0)
618 return ipvrf_identify(argc-1, argv+1);
619
620 if (matches(*argv, "pids") == 0)
621 return ipvrf_pids(argc-1, argv+1);
622
623 if (matches(*argv, "exec") == 0)
624 return ipvrf_exec(argc-1, argv+1);
625
626 if (matches(*argv, "show") == 0 ||
627 matches(*argv, "lst") == 0 ||
628 matches(*argv, "list") == 0)
629 return ipvrf_show(argc-1, argv+1);
630
631 if (matches(*argv, "help") == 0)
632 usage();
633
634 fprintf(stderr, "Command \"%s\" is unknown, try \"ip vrf help\".\n",
635 *argv);
636
637 exit(-1);
638 }