]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/cmd/lxc_user_nic.c
02915b882fc5efcdcedeeca57275a7b501bba561
[mirror_lxc.git] / src / lxc / cmd / lxc_user_nic.c
1 /*
2 *
3 * Copyright © 2013 Serge Hallyn <serge.hallyn@ubuntu.com>.
4 * Copyright © 2013 Canonical Ltd.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #ifndef _GNU_SOURCE
21 #define _GNU_SOURCE 1
22 #endif
23 #include <alloca.h>
24 #include <arpa/inet.h>
25 #include <ctype.h>
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <grp.h>
29 #include <linux/netlink.h>
30 #include <linux/rtnetlink.h>
31 #include <linux/sockios.h>
32 #include <net/if.h>
33 #include <net/if_arp.h>
34 #include <netinet/in.h>
35 #include <pwd.h>
36 #include <sched.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/file.h>
42 #include <sys/ioctl.h>
43 #include <sys/mman.h>
44 #include <sys/param.h>
45 #include <sys/socket.h>
46 #include <sys/stat.h>
47 #include <sys/types.h>
48 #include <unistd.h>
49
50 #include "config.h"
51 #include "log.h"
52 #include "network.h"
53 #include "parse.h"
54 #include "raw_syscalls.h"
55 #include "syscall_wrappers.h"
56 #include "utils.h"
57
58 #ifndef HAVE_STRLCPY
59 #include "include/strlcpy.h"
60 #endif
61
62 #define usernic_debug_stream(stream, format, ...) \
63 do { \
64 fprintf(stream, "%s: %d: %s: " format, __FILE__, __LINE__, \
65 __func__, __VA_ARGS__); \
66 } while (false)
67
68 #define usernic_error(format, ...) usernic_debug_stream(stderr, format, __VA_ARGS__)
69
70 static void usage(char *me, bool fail)
71 {
72 fprintf(stderr, "Usage: %s create {lxcpath} {name} {pid} {type} "
73 "{bridge} {nicname}\n", me);
74 fprintf(stderr, "Usage: %s delete {lxcpath} {name} "
75 "{/proc/<pid>/ns/net} {type} {bridge} {nicname}\n", me);
76 fprintf(stderr, "{nicname} is the name to use inside the container\n");
77
78 if (fail)
79 _exit(EXIT_FAILURE);
80
81 _exit(EXIT_SUCCESS);
82 }
83
84 static int open_and_lock(char *path)
85 {
86 int fd, ret;
87 struct flock lk;
88
89 fd = open(path, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);
90 if (fd < 0) {
91 CMD_SYSERROR("Failed to open \"%s\"\n", path);
92 return -1;
93 }
94
95 lk.l_type = F_WRLCK;
96 lk.l_whence = SEEK_SET;
97 lk.l_start = 0;
98 lk.l_len = 0;
99
100 ret = fcntl(fd, F_SETLKW, &lk);
101 if (ret < 0) {
102 CMD_SYSERROR("Failed to lock \"%s\"\n", path);
103 close(fd);
104 return -1;
105 }
106
107 return fd;
108 }
109
110 static char *get_username(void)
111 {
112 struct passwd pwent;
113 struct passwd *pwentp = NULL;
114 char *buf;
115 char *username;
116 size_t bufsize;
117 int ret;
118
119 bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
120 if (bufsize == -1)
121 bufsize = 1024;
122
123 buf = malloc(bufsize);
124 if (!buf)
125 return NULL;
126
127 ret = getpwuid_r(getuid(), &pwent, buf, bufsize, &pwentp);
128 if (!pwentp) {
129 if (ret == 0)
130 usernic_error("%s", "Could not find matched password record\n");
131
132 CMD_SYSERROR("Failed to get username: %u\n", getuid());
133 free(buf);
134 return NULL;
135 }
136
137 username = strdup(pwent.pw_name);
138 free(buf);
139
140 return username;
141 }
142
143 static void free_groupnames(char **groupnames)
144 {
145 int i;
146
147 if (!groupnames)
148 return;
149
150 for (i = 0; groupnames[i]; i++)
151 free(groupnames[i]);
152
153 free(groupnames);
154 }
155
156 static char **get_groupnames(void)
157 {
158 int ngroups;
159 gid_t *group_ids;
160 int ret, i;
161 char **groupnames;
162 struct group grent;
163 struct group *grentp = NULL;
164 char *buf;
165 size_t bufsize;
166
167 ngroups = getgroups(0, NULL);
168 if (ngroups < 0) {
169 CMD_SYSERROR("Failed to get number of groups the user belongs to\n");
170 return NULL;
171 } else if (ngroups == 0) {
172 return NULL;
173 }
174
175 group_ids = malloc(sizeof(gid_t) * ngroups);
176 if (!group_ids) {
177 CMD_SYSERROR("Failed to allocate memory while getting groups the user belongs to\n");
178 return NULL;
179 }
180
181 ret = getgroups(ngroups, group_ids);
182 if (ret < 0) {
183 free(group_ids);
184 CMD_SYSERROR("Failed to get process groups\n");
185 return NULL;
186 }
187
188 groupnames = malloc(sizeof(char *) * (ngroups + 1));
189 if (!groupnames) {
190 free(group_ids);
191 CMD_SYSERROR("Failed to allocate memory while getting group names\n");
192 return NULL;
193 }
194
195 memset(groupnames, 0, sizeof(char *) * (ngroups + 1));
196
197 bufsize = sysconf(_SC_GETGR_R_SIZE_MAX);
198 if (bufsize == -1)
199 bufsize = 1024;
200
201 buf = malloc(bufsize);
202 if (!buf) {
203 free(group_ids);
204 free_groupnames(groupnames);
205 CMD_SYSERROR("Failed to allocate memory while getting group names\n");
206 return NULL;
207 }
208
209 for (i = 0; i < ngroups; i++) {
210 ret = getgrgid_r(group_ids[i], &grent, buf, bufsize, &grentp);
211 if (!grentp) {
212 if (ret == 0)
213 usernic_error("%s", "Could not find matched group record\n");
214
215 CMD_SYSERROR("Failed to get group name: %u\n", group_ids[i]);
216 free(buf);
217 free(group_ids);
218 free_groupnames(groupnames);
219 return NULL;
220 }
221
222 groupnames[i] = strdup(grent.gr_name);
223 if (!groupnames[i]) {
224 usernic_error("Failed to copy group name \"%s\"", grent.gr_name);
225 free(buf);
226 free(group_ids);
227 free_groupnames(groupnames);
228 return NULL;
229 }
230 }
231
232 free(buf);
233 free(group_ids);
234
235 return groupnames;
236 }
237
238 static bool name_is_in_groupnames(char *name, char **groupnames)
239 {
240 while (groupnames) {
241 if (!strcmp(name, *groupnames))
242 return true;
243 groupnames++;
244 }
245
246 return false;
247 }
248
249 struct alloted_s {
250 char *name;
251 int allowed;
252 struct alloted_s *next;
253 };
254
255 static struct alloted_s *append_alloted(struct alloted_s **head, char *name,
256 int n)
257 {
258 struct alloted_s *cur, *al;
259
260 if (!head || !name) {
261 /* Sanity check. Parameters should not be null. */
262 usernic_error("%s\n", "Unexpected NULL argument");
263 return NULL;
264 }
265
266 al = malloc(sizeof(struct alloted_s));
267 if (!al) {
268 CMD_SYSERROR("Failed to allocate memory\n");
269 return NULL;
270 }
271
272 al->name = strdup(name);
273 if (!al->name) {
274 free(al);
275 return NULL;
276 }
277
278 al->allowed = n;
279 al->next = NULL;
280
281 if (!*head) {
282 *head = al;
283 return al;
284 }
285
286 cur = *head;
287 while (cur->next)
288 cur = cur->next;
289 cur->next = al;
290
291 return al;
292 }
293
294 static void free_alloted(struct alloted_s **head)
295 {
296 struct alloted_s *cur;
297
298 if (!head)
299 return;
300
301 cur = *head;
302 while (cur) {
303 cur = cur->next;
304 free((*head)->name);
305 free(*head);
306 *head = cur;
307 }
308 }
309
310 /* The configuration file consists of lines of the form:
311 *
312 * user type bridge count
313 * or
314 * @group type bridge count
315 *
316 * Return the count entry for the calling user if there is one. Else
317 * return -1.
318 */
319 static int get_alloted(char *me, char *intype, char *link,
320 struct alloted_s **alloted)
321 {
322 int n, ret;
323 char name[100], type[100], br[100];
324 char **groups;
325 FILE *fin;
326 int count = 0;
327 size_t len = 0;
328 char *line = NULL;
329
330 fin = fopen(LXC_USERNIC_CONF, "r");
331 if (!fin) {
332 CMD_SYSERROR("Failed to open \"%s\"\n", LXC_USERNIC_CONF);
333 return -1;
334 }
335
336 groups = get_groupnames();
337 while ((getline(&line, &len, fin)) != -1) {
338 ret = sscanf(line, "%99[^ \t] %99[^ \t] %99[^ \t] %d", name,
339 type, br, &n);
340 if (ret != 4)
341 continue;
342
343 if (strlen(name) == 0)
344 continue;
345
346 if (strcmp(name, me)) {
347 if (name[0] != '@')
348 continue;
349
350 if (!name_is_in_groupnames(name + 1, groups))
351 continue;
352 }
353
354 if (strcmp(type, intype))
355 continue;
356
357 if (strcmp(link, br))
358 continue;
359
360 /* Found the user or group with the appropriate settings,
361 * therefore finish the search. What to do if there are more
362 * than one applicable lines? not specified in the docs. Since
363 * getline is implemented with realloc, we don't need to free
364 * line until exiting func.
365 *
366 * If append_alloted returns NULL, e.g. due to a malloc error,
367 * we set count to 0 and break the loop, allowing cleanup and
368 * then exiting from main().
369 */
370 if (!append_alloted(alloted, name, n)) {
371 count = 0;
372 break;
373 }
374
375 count += n;
376 }
377
378 free_groupnames(groups);
379 fclose(fin);
380 free(line);
381
382 /* Now return the total number of nics that this user can create. */
383 return count;
384 }
385
386 static char *get_eol(char *s, char *e)
387 {
388 while ((s < e) && *s && (*s != '\n'))
389 s++;
390
391 return s;
392 }
393
394 static char *get_eow(char *s, char *e)
395 {
396 while ((s < e) && *s && !isblank(*s) && (*s != '\n'))
397 s++;
398
399 return s;
400 }
401
402 static char *find_line(char *buf_start, char *buf_end, char *name,
403 char *net_type, char *net_link, char *net_dev,
404 bool *owner, bool *found, bool *keep)
405 {
406 char *end_of_line, *end_of_word, *line;
407
408 while (buf_start < buf_end) {
409 size_t len;
410 char netdev_name[IFNAMSIZ];
411
412 *found = false;
413 *keep = true;
414 *owner = false;
415
416 end_of_line = get_eol(buf_start, buf_end);
417 if (end_of_line >= buf_end)
418 return NULL;
419
420 line = buf_start;
421 if (*buf_start == '#')
422 goto next;
423
424 while ((buf_start < buf_end) && isblank(*buf_start))
425 buf_start++;
426
427 /* Check whether the line contains the caller's name. */
428 end_of_word = get_eow(buf_start, buf_end);
429 /* corrupt db */
430 if (!end_of_word)
431 return NULL;
432
433 if (strncmp(buf_start, name, strlen(name)))
434 *found = false;
435
436 *owner = true;
437
438 buf_start = end_of_word + 1;
439 while ((buf_start < buf_end) && isblank(*buf_start))
440 buf_start++;
441
442 /* Check whether line is of the right network type. */
443 end_of_word = get_eow(buf_start, buf_end);
444 /* corrupt db */
445 if (!end_of_word)
446 return NULL;
447
448 if (strncmp(buf_start, net_type, strlen(net_type)))
449 *found = false;
450
451 buf_start = end_of_word + 1;
452 while ((buf_start < buf_end) && isblank(*buf_start))
453 buf_start++;
454
455 /* Check whether line is contains the right link. */
456 end_of_word = get_eow(buf_start, buf_end);
457 /* corrupt db */
458 if (!end_of_word)
459 return NULL;
460
461 if (strncmp(buf_start, net_link, strlen(net_link)))
462 *found = false;
463
464 buf_start = end_of_word + 1;
465 while ((buf_start < buf_end) && isblank(*buf_start))
466 buf_start++;
467
468 /* Check whether line contains the right network device. */
469 end_of_word = get_eow(buf_start, buf_end);
470 /* corrupt db */
471 if (!end_of_word)
472 return NULL;
473
474 len = end_of_word - buf_start;
475 /* corrupt db */
476 if (len >= IFNAMSIZ)
477 return NULL;
478
479 memcpy(netdev_name, buf_start, len);
480 netdev_name[len] = '\0';
481 *keep = lxc_nic_exists(netdev_name);
482
483 if (net_dev && !strcmp(netdev_name, net_dev))
484 *found = true;
485
486 return line;
487
488 next:
489 buf_start = end_of_line + 1;
490 }
491
492 return NULL;
493 }
494
495 static int instantiate_veth(char *veth1, char *veth2)
496 {
497 int ret;
498
499 ret = lxc_veth_create(veth1, veth2);
500 if (ret < 0) {
501 errno = -ret;
502 CMD_SYSERROR("Failed to create %s-%s\n", veth1, veth2);
503 return -1;
504 }
505
506 /* Changing the high byte of the mac address to 0xfe, the bridge
507 * interface will always keep the host's mac address and not take the
508 * mac address of a container.
509 */
510 ret = setup_private_host_hw_addr(veth1);
511 if (ret < 0) {
512 errno = -ret;
513 CMD_SYSERROR("Failed to change mac address of host interface %s\n", veth1);
514 }
515
516 return netdev_set_flag(veth1, IFF_UP);
517 }
518
519 static int get_mtu(char *name)
520 {
521 int idx;
522
523 idx = if_nametoindex(name);
524 if (idx < 0)
525 return -1;
526
527 return netdev_get_mtu(idx);
528 }
529
530 static int create_nic(char *nic, char *br, int pid, char **cnic)
531 {
532 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
533 int mtu, ret;
534
535 ret = snprintf(veth1buf, IFNAMSIZ, "%s", nic);
536 if (ret < 0 || ret >= IFNAMSIZ) {
537 usernic_error("%s", "Could not create nic name\n");
538 return -1;
539 }
540
541 ret = snprintf(veth2buf, IFNAMSIZ, "%sp", veth1buf);
542 if (ret < 0 || ret >= IFNAMSIZ) {
543 usernic_error("%s\n", "Could not create nic name");
544 return -1;
545 }
546
547 /* create the nics */
548 ret = instantiate_veth(veth1buf, veth2buf);
549 if (ret < 0) {
550 usernic_error("%s", "Error creating veth tunnel\n");
551 return -1;
552 }
553
554 if (strcmp(br, "none")) {
555 /* copy the bridge's mtu to both ends */
556 mtu = get_mtu(br);
557 if (mtu > 0) {
558 ret = lxc_netdev_set_mtu(veth1buf, mtu);
559 if (ret < 0) {
560 usernic_error("Failed to set mtu to %d on %s\n",
561 mtu, veth1buf);
562 goto out_del;
563 }
564
565 ret = lxc_netdev_set_mtu(veth2buf, mtu);
566 if (ret < 0) {
567 usernic_error("Failed to set mtu to %d on %s\n",
568 mtu, veth2buf);
569 goto out_del;
570 }
571 }
572
573 /* attach veth1 to bridge */
574 ret = lxc_bridge_attach(br, veth1buf);
575 if (ret < 0) {
576 usernic_error("Error attaching %s to %s\n", veth1buf, br);
577 goto out_del;
578 }
579 }
580
581 /* pass veth2 to target netns */
582 ret = lxc_netdev_move_by_name(veth2buf, pid, NULL);
583 if (ret < 0) {
584 usernic_error("Error moving %s to network namespace of %d\n",
585 veth2buf, pid);
586 goto out_del;
587 }
588
589 *cnic = strdup(veth2buf);
590 if (!*cnic) {
591 usernic_error("Failed to copy string \"%s\"\n", veth2buf);
592 return -1;
593 }
594
595 return 0;
596
597 out_del:
598 lxc_netdev_delete_by_name(veth1buf);
599 return -1;
600 }
601
602 struct entry_line {
603 char *start;
604 int len;
605 bool keep;
606 };
607
608 static bool cull_entries(int fd, char *name, char *net_type, char *net_link,
609 char *net_dev, bool *found_nicname)
610 {
611 int i, ret;
612 char *buf, *buf_end, *buf_start;
613 struct stat sb;
614 int n = 0;
615 bool found, keep;
616 struct entry_line *entry_lines = NULL;
617
618 ret = fstat(fd, &sb);
619 if (ret < 0) {
620 CMD_SYSERROR("Failed to fstat\n");
621 return false;
622 }
623
624 if (!sb.st_size)
625 return false;
626
627 buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
628 if (buf == MAP_FAILED) {
629 CMD_SYSERROR("Failed to establish shared memory mapping\n");
630 return false;
631 }
632
633 buf_start = buf;
634 buf_end = buf + sb.st_size;
635 while ((buf_start = find_line(buf_start, buf_end, name, net_type,
636 net_link, net_dev, &(bool){true}, &found,
637 &keep))) {
638 struct entry_line *newe;
639
640 newe = realloc(entry_lines, sizeof(*entry_lines) * (n + 1));
641 if (!newe) {
642 free(entry_lines);
643 lxc_strmunmap(buf, sb.st_size);
644 return false;
645 }
646
647 if (found)
648 *found_nicname = true;
649
650 entry_lines = newe;
651 entry_lines[n].start = buf_start;
652 entry_lines[n].len = get_eol(buf_start, buf_end) - entry_lines[n].start;
653 entry_lines[n].keep = keep;
654 n++;
655
656 buf_start += entry_lines[n - 1].len + 1;
657 if (buf_start >= buf_end)
658 break;
659 }
660
661 buf_start = buf;
662
663 for (i = 0; i < n; i++) {
664 if (!entry_lines[i].keep)
665 continue;
666
667 memcpy(buf_start, entry_lines[i].start, entry_lines[i].len);
668 buf_start += entry_lines[i].len;
669 *buf_start = '\n';
670 buf_start++;
671 }
672
673 free(entry_lines);
674
675 ret = ftruncate(fd, buf_start - buf);
676 lxc_strmunmap(buf, sb.st_size);
677 if (ret < 0)
678 CMD_SYSERROR("Failed to set new file size\n");
679
680 return true;
681 }
682
683 static int count_entries(char *buf, off_t len, char *name, char *net_type, char *net_link)
684 {
685 int count = 0;
686 bool owner = false;
687 char *buf_end;
688
689 buf_end = &buf[len];
690 while ((buf = find_line(buf, buf_end, name, net_type, net_link, NULL,
691 &owner, &(bool){true}, &(bool){true}))) {
692 if (owner)
693 count++;
694
695 buf = get_eol(buf, buf_end) + 1;
696 if (buf >= buf_end)
697 break;
698 }
699
700 return count;
701 }
702
703 /* The dbfile has lines of the format: user type bridge nicname. */
704 static char *get_nic_if_avail(int fd, struct alloted_s *names, int pid,
705 char *intype, char *br, int allowed, char **cnic)
706 {
707 int ret;
708 size_t slen;
709 char *newline, *owner;
710 char nicname[IFNAMSIZ];
711 struct stat sb;
712 struct alloted_s *n;
713 char *buf = NULL;
714
715 for (n = names; n != NULL; n = n->next)
716 cull_entries(fd, n->name, intype, br, NULL, NULL);
717
718 if (allowed == 0)
719 return NULL;
720
721 owner = names->name;
722
723 ret = fstat(fd, &sb);
724 if (ret < 0) {
725 CMD_SYSERROR("Failed to fstat\n");
726 return NULL;
727 }
728
729 if (sb.st_size > 0) {
730 buf = lxc_strmmap(NULL, sb.st_size, PROT_READ | PROT_WRITE,
731 MAP_SHARED, fd, 0);
732 if (buf == MAP_FAILED) {
733 CMD_SYSERROR("Failed to establish shared memory mapping\n");
734 return NULL;
735 }
736
737 owner = NULL;
738
739 for (n = names; n != NULL; n = n->next) {
740 int count;
741
742 count = count_entries(buf, sb.st_size, n->name, intype, br);
743 if (count >= n->allowed)
744 continue;
745
746 owner = n->name;
747 break;
748 }
749
750 lxc_strmunmap(buf, sb.st_size);
751 }
752
753 if (!owner)
754 return NULL;
755
756 ret = snprintf(nicname, sizeof(nicname), "vethXXXXXX");
757 if (ret < 0 || (size_t)ret >= sizeof(nicname))
758 return NULL;
759
760 if (!lxc_mkifname(nicname))
761 return NULL;
762
763 ret = create_nic(nicname, br, pid, cnic);
764 if (ret < 0) {
765 usernic_error("%s", "Failed to create new nic\n");
766 return NULL;
767 }
768
769 /* strlen(owner)
770 * +
771 * " "
772 * +
773 * strlen(intype)
774 * +
775 * " "
776 * +
777 * strlen(br)
778 * +
779 * " "
780 * +
781 * strlen(nicname)
782 * +
783 * \n
784 * +
785 * \0
786 */
787 slen = strlen(owner) + strlen(intype) + strlen(br) + strlen(nicname) + 4;
788 newline = malloc(slen + 1);
789 if (!newline) {
790 free(newline);
791 CMD_SYSERROR("Failed allocate memory\n");
792 return NULL;
793 }
794
795 ret = snprintf(newline, slen + 1, "%s %s %s %s\n", owner, intype, br, nicname);
796 if (ret < 0 || (size_t)ret >= (slen + 1)) {
797 if (lxc_netdev_delete_by_name(nicname) != 0)
798 usernic_error("Error unlinking %s\n", nicname);
799
800 free(newline);
801 return NULL;
802 }
803
804 /* Note that the file needs to be truncated to the size **without** the
805 * \0 byte! Files are not \0-terminated!
806 */
807 ret = ftruncate(fd, sb.st_size + slen);
808 if (ret < 0)
809 CMD_SYSERROR("Failed to truncate file\n");
810
811 buf = lxc_strmmap(NULL, sb.st_size + slen, PROT_READ | PROT_WRITE,
812 MAP_SHARED, fd, 0);
813 if (buf == MAP_FAILED) {
814 CMD_SYSERROR("Failed to establish shared memory mapping\n");
815
816 if (lxc_netdev_delete_by_name(nicname) != 0)
817 usernic_error("Error unlinking %s\n", nicname);
818
819 free(newline);
820 return NULL;
821 }
822
823 /* Note that the memory needs to be moved in the buffer **without** the
824 * \0 byte! Files are not \0-terminated!
825 */
826 memmove(buf + sb.st_size, newline, slen);
827 free(newline);
828 lxc_strmunmap(buf, sb.st_size + slen);
829
830 return strdup(nicname);
831 }
832
833 static bool create_db_dir(char *fnam)
834 {
835 int ret;
836 char *p;
837 size_t len;
838
839 len = strlen(fnam);
840 p = alloca(len + 1);
841 (void)strlcpy(p, fnam, len + 1);
842 fnam = p;
843 p = p + 1;
844
845 again:
846 while (*p && *p != '/')
847 p++;
848
849 if (!*p)
850 return true;
851
852 *p = '\0';
853
854 ret = mkdir(fnam, 0755);
855 if (ret < 0 && errno != EEXIST) {
856 CMD_SYSERROR("Failed to create %s\n", fnam);
857 *p = '/';
858 return false;
859 }
860
861 *(p++) = '/';
862
863 goto again;
864 }
865
866 static char *lxc_secure_rename_in_ns(int pid, char *oldname, char *newname,
867 int *container_veth_ifidx)
868 {
869 int ofd, ret;
870 pid_t pid_self;
871 uid_t ruid, suid, euid;
872 char ifname[IFNAMSIZ];
873 char *string_ret = NULL, *name = NULL;
874 int fd = -1, ifindex = -1;
875
876 pid_self = lxc_raw_getpid();
877
878 ofd = lxc_preserve_ns(pid_self, "net");
879 if (ofd < 0) {
880 usernic_error("Failed opening network namespace path for %d", pid_self);
881 return NULL;
882 }
883
884 fd = lxc_preserve_ns(pid, "net");
885 if (fd < 0) {
886 usernic_error("Failed opening network namespace path for %d", pid);
887 goto do_partial_cleanup;
888 }
889
890 ret = getresuid(&ruid, &euid, &suid);
891 if (ret < 0) {
892 CMD_SYSERROR("Failed to retrieve real, effective, and saved user IDs\n");
893 goto do_partial_cleanup;
894 }
895
896 ret = setns(fd, CLONE_NEWNET);
897 close(fd);
898 fd = -1;
899 if (ret < 0) {
900 CMD_SYSERROR("Failed to setns() to the network namespace of "
901 "the container with PID %d\n", pid);
902 goto do_partial_cleanup;
903 }
904
905 ret = setresuid(ruid, ruid, 0);
906 if (ret < 0) {
907 CMD_SYSERROR("Failed to drop privilege by setting effective "
908 "user id and real user id to %d, and saved user "
909 "ID to 0\n", ruid);
910 /* It's ok to jump to do_full_cleanup here since setresuid()
911 * will succeed when trying to set real, effective, and saved to
912 * values they currently have.
913 */
914 goto do_full_cleanup;
915 }
916
917 /* Check if old interface exists. */
918 ifindex = if_nametoindex(oldname);
919 if (!ifindex) {
920 CMD_SYSERROR("Failed to get netdev index\n");
921 goto do_full_cleanup;
922 }
923
924 /* When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
925 * netlink will replace the format specifier with an appropriate index.
926 * So we pass "eth%d".
927 */
928 if (newname)
929 name = newname;
930 else
931 name = "eth%d";
932
933 ret = lxc_netdev_rename_by_name(oldname, name);
934 name = NULL;
935 if (ret < 0) {
936 usernic_error("Error %d renaming netdev %s to %s in container\n",
937 ret, oldname, newname ? newname : "eth%d");
938 goto do_full_cleanup;
939 }
940
941 /* Retrieve new name for interface. */
942 if (!if_indextoname(ifindex, ifname)) {
943 CMD_SYSERROR("Failed to get new netdev name\n");
944 goto do_full_cleanup;
945 }
946
947 /* Allocation failure for strdup() is checked below. */
948 name = strdup(ifname);
949 string_ret = name;
950 *container_veth_ifidx = ifindex;
951
952 do_full_cleanup:
953 ret = setresuid(ruid, euid, suid);
954 if (ret < 0) {
955 CMD_SYSERROR("Failed to restore privilege by setting "
956 "effective user id to %d, real user id to %d, "
957 "and saved user ID to %d\n", ruid, euid, suid);
958
959 string_ret = NULL;
960 }
961
962 ret = setns(ofd, CLONE_NEWNET);
963 if (ret < 0) {
964 CMD_SYSERROR("Failed to setns() to original network namespace "
965 "of PID %d\n", ofd);
966
967 string_ret = NULL;
968 }
969
970 do_partial_cleanup:
971 if (fd >= 0)
972 close(fd);
973
974 if (!string_ret && name)
975 free(name);
976
977 close(ofd);
978
979 return string_ret;
980 }
981
982 /* If the caller (real uid, not effective uid) may read the /proc/[pid]/ns/net,
983 * then it is either the caller's netns or one which it created.
984 */
985 static bool may_access_netns(int pid)
986 {
987 int ret;
988 char s[200];
989 uid_t ruid, suid, euid;
990 bool may_access = false;
991
992 ret = getresuid(&ruid, &euid, &suid);
993 if (ret < 0) {
994 CMD_SYSERROR("Failed to retrieve real, effective, and saved user IDs\n");
995 return false;
996 }
997
998 ret = setresuid(ruid, ruid, euid);
999 if (ret < 0) {
1000 CMD_SYSERROR("Failed to drop privilege by setting effective "
1001 "user id and real user id to %d, and saved user "
1002 "ID to %d\n", ruid, euid);
1003 return false;
1004 }
1005
1006 ret = snprintf(s, 200, "/proc/%d/ns/net", pid);
1007 if (ret < 0 || ret >= 200)
1008 return false;
1009
1010 ret = access(s, R_OK);
1011 may_access = true;
1012 if (ret < 0) {
1013 may_access = false;
1014 CMD_SYSERROR("Uid %d may not access %s\n", (int)ruid, s);
1015 }
1016
1017 ret = setresuid(ruid, euid, suid);
1018 if (ret < 0) {
1019 CMD_SYSERROR("Failed to restore user id to %d, real user id "
1020 "to %d, and saved user ID to %d\n", ruid, euid, suid);
1021 may_access = false;
1022 }
1023
1024 return may_access;
1025 }
1026
1027 struct user_nic_args {
1028 char *cmd;
1029 char *lxc_path;
1030 char *lxc_name;
1031 char *pid;
1032 char *type;
1033 char *link;
1034 char *veth_name;
1035 };
1036
1037 #define LXC_USERNIC_CREATE 0
1038 #define LXC_USERNIC_DELETE 1
1039
1040 static bool is_privileged_over_netns(int netns_fd)
1041 {
1042 int ofd, ret;
1043 pid_t pid_self;
1044 uid_t euid, ruid, suid;
1045 bool bret = false;
1046
1047 pid_self = lxc_raw_getpid();
1048
1049 ofd = lxc_preserve_ns(pid_self, "net");
1050 if (ofd < 0) {
1051 usernic_error("Failed opening network namespace path for %d", pid_self);
1052 return false;
1053 }
1054
1055 ret = getresuid(&ruid, &euid, &suid);
1056 if (ret < 0) {
1057 CMD_SYSERROR("Failed to retrieve real, effective, and saved user IDs\n");
1058 goto do_partial_cleanup;
1059 }
1060
1061 ret = setns(netns_fd, CLONE_NEWNET);
1062 if (ret < 0) {
1063 CMD_SYSERROR("Failed to setns() to network namespace\n");
1064 goto do_partial_cleanup;
1065 }
1066
1067 ret = setresuid(ruid, ruid, 0);
1068 if (ret < 0) {
1069 CMD_SYSERROR("Failed to drop privilege by setting effective "
1070 "user id and real user id to %d, and saved user "
1071 "ID to 0\n", ruid);
1072 /* It's ok to jump to do_full_cleanup here since setresuid()
1073 * will succeed when trying to set real, effective, and saved to
1074 * values they currently have.
1075 */
1076 goto do_full_cleanup;
1077 }
1078
1079 /* Test whether we are privileged over the network namespace. To do this
1080 * we try to delete the loopback interface which is not possible. If we
1081 * are privileged over the network namespace we will get ENOTSUP. If we
1082 * are not privileged over the network namespace we will get EPERM.
1083 */
1084 ret = lxc_netdev_delete_by_name("lo");
1085 if (ret == -ENOTSUP)
1086 bret = true;
1087
1088 do_full_cleanup:
1089 ret = setresuid(ruid, euid, suid);
1090 if (ret < 0) {
1091 CMD_SYSERROR("Failed to restore privilege by setting "
1092 "effective user id to %d, real user id to %d, "
1093 "and saved user ID to %d\n", ruid, euid, suid);
1094 bret = false;
1095 }
1096
1097 ret = setns(ofd, CLONE_NEWNET);
1098 if (ret < 0) {
1099 CMD_SYSERROR("Failed to setns() to original network namespace "
1100 "of PID %d\n", ofd);
1101 bret = false;
1102 }
1103
1104 do_partial_cleanup:
1105 close(ofd);
1106 return bret;
1107 }
1108
1109 int main(int argc, char *argv[])
1110 {
1111 int fd, n, pid, request, ret;
1112 char *me, *newname;
1113 struct user_nic_args args;
1114 int container_veth_ifidx = -1, host_veth_ifidx = -1, netns_fd = -1;
1115 char *cnic = NULL, *nicname = NULL;
1116 struct alloted_s *alloted = NULL;
1117
1118 if (argc < 7 || argc > 8) {
1119 usage(argv[0], true);
1120 _exit(EXIT_FAILURE);
1121 }
1122
1123 memset(&args, 0, sizeof(struct user_nic_args));
1124
1125 args.cmd = argv[1];
1126 args.lxc_path = argv[2];
1127 args.lxc_name = argv[3];
1128 args.pid = argv[4];
1129 args.type = argv[5];
1130 args.link = argv[6];
1131 if (argc >= 8)
1132 args.veth_name = argv[7];
1133
1134 if (!strcmp(args.cmd, "create")) {
1135 request = LXC_USERNIC_CREATE;
1136 } else if (!strcmp(args.cmd, "delete")) {
1137 request = LXC_USERNIC_DELETE;
1138 } else {
1139 usage(argv[0], true);
1140 _exit(EXIT_FAILURE);
1141 }
1142
1143 /* Set a sane env, because we are setuid-root. */
1144 ret = clearenv();
1145 if (ret) {
1146 usernic_error("%s", "Failed to clear environment\n");
1147 _exit(EXIT_FAILURE);
1148 }
1149
1150 ret = setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1);
1151 if (ret < 0) {
1152 usernic_error("%s", "Failed to set PATH, exiting\n");
1153 _exit(EXIT_FAILURE);
1154 }
1155
1156 me = get_username();
1157 if (!me) {
1158 usernic_error("%s", "Failed to get username\n");
1159 _exit(EXIT_FAILURE);
1160 }
1161
1162 if (request == LXC_USERNIC_CREATE) {
1163 ret = lxc_safe_int(args.pid, &pid);
1164 if (ret < 0) {
1165 usernic_error("Could not read pid: %s\n", args.pid);
1166 _exit(EXIT_FAILURE);
1167 }
1168 } else if (request == LXC_USERNIC_DELETE) {
1169 char opath[LXC_PROC_PID_FD_LEN];
1170
1171 /* Open the path with O_PATH which will not trigger an actual
1172 * open(). Don't report an errno to the caller to not leak
1173 * information whether the path exists or not.
1174 * When stracing setuid is stripped so this is not a concern
1175 * either.
1176 */
1177 netns_fd = open(args.pid, O_PATH | O_CLOEXEC);
1178 if (netns_fd < 0) {
1179 usernic_error("Failed to open \"%s\"\n", args.pid);
1180 _exit(EXIT_FAILURE);
1181 }
1182
1183 if (!fhas_fs_type(netns_fd, NSFS_MAGIC)) {
1184 usernic_error("Path \"%s\" does not refer to a network namespace path\n", args.pid);
1185 close(netns_fd);
1186 _exit(EXIT_FAILURE);
1187 }
1188
1189 ret = snprintf(opath, sizeof(opath), "/proc/self/fd/%d", netns_fd);
1190 if (ret < 0 || (size_t)ret >= sizeof(opath)) {
1191 close(netns_fd);
1192 _exit(EXIT_FAILURE);
1193 }
1194
1195 /* Now get an fd that we can use in setns() calls. */
1196 ret = open(opath, O_RDONLY | O_CLOEXEC);
1197 if (ret < 0) {
1198 CMD_SYSERROR("Failed to open \"%s\"\n", args.pid);
1199 close(netns_fd);
1200 _exit(EXIT_FAILURE);
1201 }
1202
1203 close(netns_fd);
1204 netns_fd = ret;
1205 }
1206
1207 if (!create_db_dir(LXC_USERNIC_DB)) {
1208 usernic_error("%s", "Failed to create directory for db file\n");
1209
1210 if (netns_fd >= 0)
1211 close(netns_fd);
1212
1213 _exit(EXIT_FAILURE);
1214 }
1215
1216 fd = open_and_lock(LXC_USERNIC_DB);
1217 if (fd < 0) {
1218 usernic_error("Failed to lock %s\n", LXC_USERNIC_DB);
1219
1220 if (netns_fd >= 0)
1221 close(netns_fd);
1222
1223 _exit(EXIT_FAILURE);
1224 }
1225
1226 if (request == LXC_USERNIC_CREATE) {
1227 if (!may_access_netns(pid)) {
1228 usernic_error("User %s may not modify netns for pid %d\n", me, pid);
1229 _exit(EXIT_FAILURE);
1230 }
1231 } else if (request == LXC_USERNIC_DELETE) {
1232 bool has_priv;
1233
1234 has_priv = is_privileged_over_netns(netns_fd);
1235 close(netns_fd);
1236 if (!has_priv) {
1237 usernic_error("%s", "Process is not privileged over "
1238 "network namespace\n");
1239 _exit(EXIT_FAILURE);
1240 }
1241 }
1242
1243 n = get_alloted(me, args.type, args.link, &alloted);
1244 free(me);
1245
1246 if (request == LXC_USERNIC_DELETE) {
1247 int ret;
1248 struct alloted_s *it;
1249 bool found_nicname = false;
1250
1251 if (!is_ovs_bridge(args.link)) {
1252 usernic_error("%s", "Deletion of non ovs type network "
1253 "devices not implemented\n");
1254 close(fd);
1255 free_alloted(&alloted);
1256 _exit(EXIT_FAILURE);
1257 }
1258
1259 /* Check whether the network device we are supposed to delete
1260 * exists in the db. If it doesn't we will not delete it as we
1261 * need to assume the network device is not under our control.
1262 * As a side effect we also clear any invalid entries from the
1263 * database.
1264 */
1265 for (it = alloted; it; it = it->next)
1266 cull_entries(fd, it->name, args.type, args.link,
1267 args.veth_name, &found_nicname);
1268 close(fd);
1269 free_alloted(&alloted);
1270
1271 if (!found_nicname) {
1272 usernic_error("Caller is not allowed to delete network "
1273 "device \"%s\"\n", args.veth_name);
1274 _exit(EXIT_FAILURE);
1275 }
1276
1277 ret = lxc_ovs_delete_port(args.link, args.veth_name);
1278 if (ret < 0) {
1279 usernic_error("Failed to remove port \"%s\" from "
1280 "openvswitch bridge \"%s\"",
1281 args.veth_name, args.link);
1282 _exit(EXIT_FAILURE);
1283 }
1284
1285 _exit(EXIT_SUCCESS);
1286 }
1287
1288 if (n > 0)
1289 nicname = get_nic_if_avail(fd, alloted, pid, args.type,
1290 args.link, n, &cnic);
1291
1292 close(fd);
1293 free_alloted(&alloted);
1294
1295 if (!nicname) {
1296 usernic_error("%s", "Quota reached\n");
1297 _exit(EXIT_FAILURE);
1298 }
1299
1300 /* Now rename the link. */
1301 newname = lxc_secure_rename_in_ns(pid, cnic, args.veth_name,
1302 &container_veth_ifidx);
1303 if (!newname) {
1304 usernic_error("%s", "Failed to rename the link\n");
1305
1306 ret = lxc_netdev_delete_by_name(cnic);
1307 if (ret < 0)
1308 usernic_error("Failed to delete \"%s\"\n", cnic);
1309
1310 free(nicname);
1311 _exit(EXIT_FAILURE);
1312 }
1313
1314 host_veth_ifidx = if_nametoindex(nicname);
1315 if (!host_veth_ifidx) {
1316 free(newname);
1317 free(nicname);
1318 CMD_SYSERROR("Failed to get netdev index\n");
1319 _exit(EXIT_FAILURE);
1320 }
1321
1322 /* Write names of veth pairs and their ifindeces to stout:
1323 * (e.g. eth0:731:veth9MT2L4:730)
1324 */
1325 fprintf(stdout, "%s:%d:%s:%d\n", newname, container_veth_ifidx, nicname,
1326 host_veth_ifidx);
1327 free(newname);
1328 free(nicname);
1329
1330 fflush(stdout);
1331 _exit(EXIT_SUCCESS);
1332 }