]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
start: don't let data_sock users close the fd
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
811ef482 24#define _GNU_SOURCE
cb0dc11b
CB
25#include <ctype.h>
26#include <errno.h>
27#include <fcntl.h>
0ad19a3f 28#include <stdio.h>
0ad19a3f 29#include <stdlib.h>
0ad19a3f 30#include <string.h>
dd1d77f9 31#include <time.h>
cb0dc11b 32#include <unistd.h>
0ad19a3f 33#include <arpa/inet.h>
0ad19a3f 34#include <linux/netlink.h>
35#include <linux/rtnetlink.h>
36#include <linux/sockios.h>
cb0dc11b
CB
37#include <net/ethernet.h>
38#include <net/if.h>
39#include <net/if_arp.h>
40#include <netinet/in.h>
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
f549edcc 47
7ab1ba02 48#include "af_unix.h"
72d0e1cb 49#include "conf.h"
811ef482 50#include "config.h"
cb0dc11b
CB
51#include "log.h"
52#include "network.h"
53#include "nl.h"
0d204771 54#include "utils.h"
0ad19a3f 55
a0265685
SG
56#if HAVE_IFADDRS_H
57#include <ifaddrs.h>
58#else
59#include <../include/ifaddrs.h>
60#endif
61
0ad19a3f 62#ifndef IFLA_LINKMODE
cb0dc11b 63#define IFLA_LINKMODE 17
0ad19a3f 64#endif
65
66#ifndef IFLA_LINKINFO
cb0dc11b 67#define IFLA_LINKINFO 18
0ad19a3f 68#endif
69
70#ifndef IFLA_NET_NS_PID
cb0dc11b 71#define IFLA_NET_NS_PID 19
0ad19a3f 72#endif
73
74#ifndef IFLA_INFO_KIND
cb0dc11b 75#define IFLA_INFO_KIND 1
0ad19a3f 76#endif
77
26c39028 78#ifndef IFLA_VLAN_ID
cb0dc11b 79#define IFLA_VLAN_ID 1
26c39028
JHS
80#endif
81
0ad19a3f 82#ifndef IFLA_INFO_DATA
cb0dc11b 83#define IFLA_INFO_DATA 2
0ad19a3f 84#endif
85
86#ifndef VETH_INFO_PEER
cb0dc11b 87#define VETH_INFO_PEER 1
0ad19a3f 88#endif
89
e892973e 90#ifndef IFLA_MACVLAN_MODE
cb0dc11b 91#define IFLA_MACVLAN_MODE 1
e892973e
DL
92#endif
93
cb0dc11b 94lxc_log_define(lxc_network, lxc);
f8fee0e2 95
811ef482
CB
96typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
97
98static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
99{
100 int bridge_index, err;
101 char *veth1, *veth2;
102 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
103 unsigned int mtu = 0;
104
de4855a8 105 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
106 veth1 = netdev->priv.veth_attr.pair;
107 if (handler->conf->reboot)
108 lxc_netdev_delete_by_name(veth1);
109 } else {
110 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
111 if (err < 0 || (size_t)err >= sizeof(veth1buf))
112 return -1;
113
114 veth1 = lxc_mkifname(veth1buf);
115 if (!veth1)
116 return -1;
117
118 /* store away for deconf */
119 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
120 }
121
122 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
123 veth2 = lxc_mkifname(veth2buf);
124 if (!veth2)
125 goto out_delete;
126
127 err = lxc_veth_create(veth1, veth2);
128 if (err) {
129 ERROR("Failed to create veth pair \"%s\" and \"%s\": %s", veth1,
130 veth2, strerror(-err));
131 goto out_delete;
132 }
133
134 /* changing the high byte of the mac address to 0xfe, the bridge interface
135 * will always keep the host's mac address and not take the mac address
136 * of a container */
137 err = setup_private_host_hw_addr(veth1);
138 if (err) {
139 ERROR("Failed to change mac address of host interface \"%s\": %s",
140 veth1, strerror(-err));
141 goto out_delete;
142 }
143
8da62485
CB
144 /* Retrieve ifindex of the host's veth device. */
145 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
146 if (!netdev->priv.veth_attr.ifindex) {
147 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
148 goto out_delete;
149 }
150
151 /* Note that we're retrieving the container's ifindex in the host's
152 * network namespace because we need it to move the device from the
153 * host's network namespace to the container's network namespace later
154 * on.
155 */
811ef482
CB
156 netdev->ifindex = if_nametoindex(veth2);
157 if (!netdev->ifindex) {
158 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
159 goto out_delete;
160 }
161
162 if (netdev->mtu) {
163 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
164 WARN("Failed to parse mtu");
165 else
166 INFO("Retrieved mtu %d", mtu);
de4855a8 167 } else if (netdev->link[0] != '\0') {
811ef482
CB
168 bridge_index = if_nametoindex(netdev->link);
169 if (bridge_index) {
170 mtu = netdev_get_mtu(bridge_index);
171 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
172 } else {
173 mtu = netdev_get_mtu(netdev->ifindex);
174 INFO("Retrieved mtu %d from %s", mtu, veth2);
175 }
176 }
177
178 if (mtu) {
179 err = lxc_netdev_set_mtu(veth1, mtu);
180 if (!err)
181 err = lxc_netdev_set_mtu(veth2, mtu);
182 if (err) {
183 ERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
184 "and \"%s\": %s",
185 mtu, veth1, veth2, strerror(-err));
186 goto out_delete;
187 }
188 }
189
de4855a8 190 if (netdev->link[0] != '\0') {
811ef482
CB
191 err = lxc_bridge_attach(netdev->link, veth1);
192 if (err) {
193 ERROR("Failed to attach \"%s\" to bridge \"%s\": %s",
194 veth1, netdev->link, strerror(-err));
195 goto out_delete;
196 }
197 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
198 }
199
200 err = lxc_netdev_up(veth1);
201 if (err) {
202 ERROR("Failed to set \"%s\" up: %s", veth1, strerror(-err));
203 goto out_delete;
204 }
205
206 if (netdev->upscript) {
207 err = run_script(handler->name, "net", netdev->upscript, "up",
208 "veth", veth1, (char*) NULL);
209 if (err)
210 goto out_delete;
211 }
212
213 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
214 netdev->ifindex);
215
216 return 0;
217
218out_delete:
219 if (netdev->ifindex != 0)
220 lxc_netdev_delete_by_name(veth1);
811ef482
CB
221 return -1;
222}
223
224static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
225{
226 char peerbuf[IFNAMSIZ], *peer;
227 int err;
228
de4855a8 229 if (netdev->link[0] == '\0') {
811ef482
CB
230 ERROR("No link for macvlan network device specified");
231 return -1;
232 }
233
234 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
235 if (err < 0 || (size_t)err >= sizeof(peerbuf))
236 return -1;
237
238 peer = lxc_mkifname(peerbuf);
239 if (!peer)
240 return -1;
241
242 err = lxc_macvlan_create(netdev->link, peer,
243 netdev->priv.macvlan_attr.mode);
244 if (err) {
245 ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s",
246 peer, netdev->link, strerror(-err));
966e9f1f 247 goto on_error;
811ef482
CB
248 }
249
250 netdev->ifindex = if_nametoindex(peer);
251 if (!netdev->ifindex) {
252 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 253 goto on_error;
811ef482
CB
254 }
255
256 if (netdev->upscript) {
257 err = run_script(handler->name, "net", netdev->upscript, "up",
258 "macvlan", netdev->link, (char*) NULL);
259 if (err)
966e9f1f 260 goto on_error;
811ef482
CB
261 }
262
263 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
264 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
265
266 return 0;
966e9f1f
CB
267
268on_error:
811ef482 269 lxc_netdev_delete_by_name(peer);
811ef482
CB
270 return -1;
271}
272
273static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
274{
275 char peer[IFNAMSIZ];
276 int err;
277 static uint16_t vlan_cntr = 0;
278 unsigned int mtu = 0;
279
de4855a8 280 if (netdev->link[0] == '\0') {
811ef482
CB
281 ERROR("No link for vlan network device specified");
282 return -1;
283 }
284
285 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
286 if (err < 0 || (size_t)err >= sizeof(peer))
287 return -1;
288
289 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
290 if (err) {
291 ERROR("Failed to create vlan interface \"%s\" on \"%s\": %s",
292 peer, netdev->link, strerror(-err));
293 return -1;
294 }
295
296 netdev->ifindex = if_nametoindex(peer);
297 if (!netdev->ifindex) {
298 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
299 lxc_netdev_delete_by_name(peer);
300 return -1;
301 }
302
303 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\" (vlan1000)",
304 peer, netdev->ifindex);
305 if (netdev->mtu) {
306 if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
307 ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".",
308 netdev->ifindex,
de4855a8 309 netdev->name[0] != '\0' ? netdev->name : "(null)");
811ef482
CB
310 return -1;
311 }
312 err = lxc_netdev_set_mtu(peer, mtu);
313 if (err) {
314 ERROR("Failed to set mtu \"%s\" for \"%s\": %s",
315 netdev->mtu, peer, strerror(-err));
316 lxc_netdev_delete_by_name(peer);
317 return -1;
318 }
319 }
320
321 return 0;
322}
323
324static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
325{
de4855a8 326 if (netdev->link[0] == '\0') {
811ef482
CB
327 ERROR("No link for physical interface specified");
328 return -1;
329 }
330
790255cf
CB
331 /* Note that we're retrieving the container's ifindex in the host's
332 * network namespace because we need it to move the device from the
333 * host's network namespace to the container's network namespace later
334 * on.
335 * Note that netdev->link will contain the name of the physical network
336 * device in the host's namespace.
337 */
811ef482
CB
338 netdev->ifindex = if_nametoindex(netdev->link);
339 if (!netdev->ifindex) {
340 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
341 return -1;
342 }
343
790255cf
CB
344 /* Store the ifindex of the host's network device in the host's
345 * namespace.
346 */
347 netdev->priv.phys_attr.ifindex = netdev->ifindex;
348
811ef482
CB
349 if (netdev->upscript) {
350 int err;
351 err = run_script(handler->name, "net", netdev->upscript,
352 "up", "phys", netdev->link, (char*) NULL);
353 if (err)
354 return -1;
355 }
356
357 return 0;
358}
359
360static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
361{
362 netdev->ifindex = 0;
363 if (netdev->upscript) {
364 int err;
365 err = run_script(handler->name, "net", netdev->upscript,
366 "up", "empty", (char*) NULL);
367 if (err)
368 return -1;
369 }
370 return 0;
371}
372
373static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
374{
375 netdev->ifindex = 0;
376 return 0;
377}
378
379static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
380 [LXC_NET_VETH] = instantiate_veth,
381 [LXC_NET_MACVLAN] = instantiate_macvlan,
382 [LXC_NET_VLAN] = instantiate_vlan,
383 [LXC_NET_PHYS] = instantiate_phys,
384 [LXC_NET_EMPTY] = instantiate_empty,
385 [LXC_NET_NONE] = instantiate_none,
386};
387
388static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
389{
390 char *veth1;
391 int err;
392
de4855a8 393 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
394 veth1 = netdev->priv.veth_attr.pair;
395 else
396 veth1 = netdev->priv.veth_attr.veth1;
397
398 if (netdev->downscript) {
399 err = run_script(handler->name, "net", netdev->downscript,
400 "down", "veth", veth1, (char*) NULL);
401 if (err)
402 return -1;
403 }
404 return 0;
405}
406
407static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
408{
409 int err;
410
411 if (netdev->downscript) {
412 err = run_script(handler->name, "net", netdev->downscript,
413 "down", "macvlan", netdev->link,
414 (char*) NULL);
415 if (err)
416 return -1;
417 }
418 return 0;
419}
420
421static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
422{
423 return 0;
424}
425
426static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
427{
428 int err;
429
430 if (netdev->downscript) {
431 err = run_script(handler->name, "net", netdev->downscript,
432 "down", "phys", netdev->link, (char*) NULL);
433 if (err)
434 return -1;
435 }
436 return 0;
437}
438
439static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
440{
441 int err;
442
443 if (netdev->downscript) {
444 err = run_script(handler->name, "net", netdev->downscript,
445 "down", "empty", (char*) NULL);
446 if (err)
447 return -1;
448 }
449 return 0;
450}
451
452static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
453{
454 return 0;
455}
456
457static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
458 [LXC_NET_VETH] = shutdown_veth,
459 [LXC_NET_MACVLAN] = shutdown_macvlan,
460 [LXC_NET_VLAN] = shutdown_vlan,
461 [LXC_NET_PHYS] = shutdown_phys,
462 [LXC_NET_EMPTY] = shutdown_empty,
463 [LXC_NET_NONE] = shutdown_none,
464};
465
ebc73a67 466int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 467{
ebc73a67 468 int err;
0ad19a3f 469 struct nl_handler nlh;
06f976ca 470 struct ifinfomsg *ifi;
ebc73a67 471 struct nlmsg *nlmsg = NULL;
0ad19a3f 472
3cfc0f3a
MN
473 err = netlink_open(&nlh, NETLINK_ROUTE);
474 if (err)
475 return err;
0ad19a3f 476
3cfc0f3a 477 err = -ENOMEM;
0ad19a3f 478 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
479 if (!nlmsg)
480 goto out;
481
ebc73a67 482 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
483 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
484
485 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
486 if (!ifi)
487 goto out;
06f976ca
SZ
488 ifi->ifi_family = AF_UNSPEC;
489 ifi->ifi_index = ifindex;
0ad19a3f 490
491 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
492 goto out;
493
8d357196
DY
494 if (ifname != NULL) {
495 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
496 goto out;
497 }
498
3cfc0f3a 499 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 500out:
501 netlink_close(&nlh);
502 nlmsg_free(nlmsg);
503 return err;
504}
505
ebc73a67
CB
506/* If we are asked to move a wireless interface, then we must actually move its
507 * phyN device. Detect that condition and return the physname here. The physname
508 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
509 */
510#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 511static char *is_wlan(const char *ifname)
e5848d39 512{
ebc73a67 513 int i, ret;
e5848d39 514 long physlen;
ebc73a67
CB
515 size_t len;
516 char *path;
e5848d39 517 FILE *f;
ebc73a67
CB
518 struct stat sb;
519 char *physname = NULL;
e5848d39 520
ebc73a67
CB
521 len = strlen(ifname) + strlen(PHYSNAME) - 1;
522 path = alloca(len + 1);
e5848d39 523 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 524 if (ret < 0 || (size_t)ret >= len)
e5848d39 525 goto bad;
ebc73a67 526
e5848d39
SH
527 ret = stat(path, &sb);
528 if (ret)
529 goto bad;
ebc73a67
CB
530
531 f = fopen(path, "r");
532 if (!f)
e5848d39 533 goto bad;
ebc73a67 534
1a0e70ac 535 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
536 fseek(f, 0, SEEK_END);
537 physlen = ftell(f);
538 fseek(f, 0, SEEK_SET);
ebc73a67
CB
539
540 physname = malloc(physlen + 1);
ee54ea9a 541 if (!physname) {
acf47e1b 542 fclose(f);
e5848d39 543 goto bad;
ee54ea9a 544 }
ebc73a67
CB
545
546 memset(physname, 0, physlen + 1);
e5848d39
SH
547 ret = fread(physname, 1, physlen, f);
548 fclose(f);
549 if (ret < 0)
550 goto bad;
551
ebc73a67 552 for (i = 0; i < physlen; i++) {
e5848d39
SH
553 if (physname[i] == '\n')
554 physname[i] = '\0';
ebc73a67 555
e5848d39
SH
556 if (physname[i] == '\0')
557 break;
558 }
559
560 return physname;
561
562bad:
f10fad2f 563 free(physname);
e5848d39
SH
564 return NULL;
565}
566
ebc73a67
CB
567static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
568 const char *new)
e5848d39 569{
ebc73a67 570 pid_t fpid;
e5848d39 571
ebc73a67 572 fpid = fork();
e5848d39
SH
573 if (fpid < 0)
574 return -1;
ebc73a67 575
e5848d39
SH
576 if (fpid != 0)
577 return wait_for_pid(fpid);
ebc73a67 578
e5848d39
SH
579 if (!switch_to_ns(pid, "net"))
580 return -1;
ebc73a67 581
e5848d39
SH
582 exit(lxc_netdev_rename_by_name(old, new));
583}
584
ebc73a67
CB
585static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
586 const char *newname)
e5848d39 587{
e5848d39 588 char *cmd;
ebc73a67
CB
589 pid_t fpid;
590 int err = -1;
e5848d39
SH
591
592 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
593 * However, IIUC this involves a bit more complicated work to talk to
594 * the 80211 module, so for now just call out to iw.
e5848d39
SH
595 */
596 cmd = on_path("iw", NULL);
597 if (!cmd)
598 goto out1;
599 free(cmd);
600
601 fpid = fork();
602 if (fpid < 0)
603 goto out1;
ebc73a67 604
e5848d39
SH
605 if (fpid == 0) {
606 char pidstr[30];
607 sprintf(pidstr, "%d", pid);
ebc73a67
CB
608 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
609 (char *)NULL);
610 exit(EXIT_FAILURE);
e5848d39 611 }
ebc73a67 612
e5848d39
SH
613 if (wait_for_pid(fpid))
614 goto out1;
615
616 err = 0;
617 if (newname)
618 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
619
620out1:
621 free(physname);
622 return err;
623}
624
8d357196 625int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
626{
627 int index;
e5848d39 628 char *physname;
8befa924 629
8befa924
SH
630 if (!ifname)
631 return -EINVAL;
632
32571606 633 index = if_nametoindex(ifname);
49428bf3
DY
634 if (!index)
635 return -EINVAL;
32571606 636
ebc73a67
CB
637 physname = is_wlan(ifname);
638 if (physname)
e5848d39
SH
639 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
640
8d357196 641 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
642}
643
b84f58b9 644int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 645{
b84f58b9 646 int err;
ebc73a67
CB
647 struct ifinfomsg *ifi;
648 struct nl_handler nlh;
649 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 650
3cfc0f3a
MN
651 err = netlink_open(&nlh, NETLINK_ROUTE);
652 if (err)
653 return err;
0ad19a3f 654
3cfc0f3a 655 err = -ENOMEM;
0ad19a3f 656 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
657 if (!nlmsg)
658 goto out;
659
06f976ca 660 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 661 if (!answer)
662 goto out;
663
ebc73a67 664 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
665 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
666
667 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
668 if (!ifi)
669 goto out;
06f976ca
SZ
670 ifi->ifi_family = AF_UNSPEC;
671 ifi->ifi_index = ifindex;
0ad19a3f 672
3cfc0f3a 673 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 674out:
675 netlink_close(&nlh);
676 nlmsg_free(answer);
677 nlmsg_free(nlmsg);
678 return err;
679}
680
b84f58b9
DL
681int lxc_netdev_delete_by_name(const char *name)
682{
683 int index;
684
685 index = if_nametoindex(name);
686 if (!index)
687 return -EINVAL;
688
689 return lxc_netdev_delete_by_index(index);
690}
691
692int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 693{
ebc73a67 694 int err, len;
06f976ca 695 struct ifinfomsg *ifi;
ebc73a67
CB
696 struct nl_handler nlh;
697 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 698
3cfc0f3a
MN
699 err = netlink_open(&nlh, NETLINK_ROUTE);
700 if (err)
701 return err;
b9a5bb58 702
b84f58b9 703 len = strlen(newname);
dae3fdf6 704 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
705 goto out;
706
3cfc0f3a 707 err = -ENOMEM;
b9a5bb58
DL
708 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
709 if (!nlmsg)
710 goto out;
711
06f976ca 712 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
713 if (!answer)
714 goto out;
715
ebc73a67 716 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
717 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
718
719 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
720 if (!ifi)
721 goto out;
06f976ca
SZ
722 ifi->ifi_family = AF_UNSPEC;
723 ifi->ifi_index = ifindex;
b84f58b9
DL
724
725 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
726 goto out;
b9a5bb58 727
3cfc0f3a 728 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
729out:
730 netlink_close(&nlh);
731 nlmsg_free(answer);
732 nlmsg_free(nlmsg);
733 return err;
734}
735
b84f58b9
DL
736int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
737{
738 int len, index;
739
740 len = strlen(oldname);
dae3fdf6 741 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
742 return -EINVAL;
743
744 index = if_nametoindex(oldname);
745 if (!index)
746 return -EINVAL;
747
748 return lxc_netdev_rename_by_index(index, newname);
749}
750
8befa924 751int netdev_set_flag(const char *name, int flag)
0ad19a3f 752{
ebc73a67 753 int err, index, len;
06f976ca 754 struct ifinfomsg *ifi;
ebc73a67
CB
755 struct nl_handler nlh;
756 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 757
3cfc0f3a
MN
758 err = netlink_open(&nlh, NETLINK_ROUTE);
759 if (err)
760 return err;
0ad19a3f 761
3cfc0f3a 762 err = -EINVAL;
0ad19a3f 763 len = strlen(name);
dae3fdf6 764 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 765 goto out;
766
3cfc0f3a 767 err = -ENOMEM;
0ad19a3f 768 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
769 if (!nlmsg)
770 goto out;
771
06f976ca 772 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 773 if (!answer)
774 goto out;
775
3cfc0f3a 776 err = -EINVAL;
0ad19a3f 777 index = if_nametoindex(name);
778 if (!index)
779 goto out;
780
ebc73a67 781 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
782 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
783
784 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
785 if (!ifi) {
786 err = -ENOMEM;
787 goto out;
788 }
06f976ca
SZ
789 ifi->ifi_family = AF_UNSPEC;
790 ifi->ifi_index = index;
791 ifi->ifi_change |= IFF_UP;
792 ifi->ifi_flags |= flag;
0ad19a3f 793
794 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 795out:
796 netlink_close(&nlh);
797 nlmsg_free(nlmsg);
798 nlmsg_free(answer);
799 return err;
800}
801
ebc73a67 802int netdev_get_flag(const char *name, int *flag)
efa1cf45 803{
ebc73a67 804 int err, index, len;
a4318300 805 struct ifinfomsg *ifi;
ebc73a67
CB
806 struct nl_handler nlh;
807 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
808
809 if (!name)
810 return -EINVAL;
811
812 err = netlink_open(&nlh, NETLINK_ROUTE);
813 if (err)
814 return err;
815
816 err = -EINVAL;
817 len = strlen(name);
818 if (len == 1 || len >= IFNAMSIZ)
819 goto out;
820
821 err = -ENOMEM;
822 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
823 if (!nlmsg)
824 goto out;
825
06f976ca 826 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
827 if (!answer)
828 goto out;
829
830 err = -EINVAL;
831 index = if_nametoindex(name);
832 if (!index)
833 goto out;
834
06f976ca
SZ
835 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
836 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
837
838 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
839 if (!ifi) {
840 err = -ENOMEM;
841 goto out;
842 }
06f976ca
SZ
843 ifi->ifi_family = AF_UNSPEC;
844 ifi->ifi_index = index;
efa1cf45
DY
845
846 err = netlink_transaction(&nlh, nlmsg, answer);
847 if (err)
848 goto out;
849
06f976ca 850 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
851
852 *flag = ifi->ifi_flags;
853out:
854 netlink_close(&nlh);
855 nlmsg_free(nlmsg);
856 nlmsg_free(answer);
857 return err;
858}
859
860/*
861 * \brief Check a interface is up or not.
862 *
863 * \param name: name for the interface.
864 *
865 * \return int.
866 * 0 means interface is down.
867 * 1 means interface is up.
868 * Others means error happened, and ret-value is the error number.
869 */
ebc73a67 870int lxc_netdev_isup(const char *name)
efa1cf45 871{
ebc73a67 872 int err, flag;
efa1cf45
DY
873
874 err = netdev_get_flag(name, &flag);
875 if (err)
ebc73a67
CB
876 return err;
877
efa1cf45
DY
878 if (flag & IFF_UP)
879 return 1;
ebc73a67 880
efa1cf45 881 return 0;
efa1cf45
DY
882}
883
0130df54
SH
884int netdev_get_mtu(int ifindex)
885{
ebc73a67 886 int answer_len, err, res;
0130df54 887 struct nl_handler nlh;
06f976ca 888 struct ifinfomsg *ifi;
0130df54 889 struct nlmsghdr *msg;
ebc73a67
CB
890 int readmore = 0, recv_len = 0;
891 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
892
893 err = netlink_open(&nlh, NETLINK_ROUTE);
894 if (err)
895 return err;
896
897 err = -ENOMEM;
898 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
899 if (!nlmsg)
900 goto out;
901
06f976ca 902 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
903 if (!answer)
904 goto out;
905
906 /* Save the answer buffer length, since it will be overwritten
907 * on the first receive (and we might need to receive more than
ebc73a67
CB
908 * once.
909 */
06f976ca
SZ
910 answer_len = answer->nlmsghdr->nlmsg_len;
911
ebc73a67 912 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 913 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 914
06f976ca 915 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
916 if (!ifi)
917 goto out;
06f976ca 918 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
919
920 /* Send the request for addresses, which returns all addresses
921 * on all interfaces. */
922 err = netlink_send(&nlh, nlmsg);
923 if (err < 0)
924 goto out;
925
926 do {
927 /* Restore the answer buffer length, it might have been
ebc73a67
CB
928 * overwritten by a previous receive.
929 */
06f976ca 930 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
931
932 /* Get the (next) batch of reply messages */
933 err = netlink_rcv(&nlh, answer);
934 if (err < 0)
935 goto out;
936
937 recv_len = err;
938 err = 0;
939
940 /* Satisfy the typing for the netlink macros */
06f976ca 941 msg = answer->nlmsghdr;
0130df54
SH
942
943 while (NLMSG_OK(msg, recv_len)) {
944
945 /* Stop reading if we see an error message */
946 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
947 struct nlmsgerr *errmsg =
948 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
949 err = errmsg->error;
950 goto out;
951 }
952
953 /* Stop reading if we see a NLMSG_DONE message */
954 if (msg->nlmsg_type == NLMSG_DONE) {
955 readmore = 0;
956 break;
957 }
958
06f976ca 959 ifi = NLMSG_DATA(msg);
0130df54
SH
960 if (ifi->ifi_index == ifindex) {
961 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
962 int attr_len =
963 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 964 res = 0;
ebc73a67
CB
965 while (RTA_OK(rta, attr_len)) {
966 /* Found a local address for the
967 * requested interface, return it.
968 */
0130df54 969 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
970 memcpy(&res, RTA_DATA(rta),
971 sizeof(int));
0130df54
SH
972 err = res;
973 goto out;
974 }
975 rta = RTA_NEXT(rta, attr_len);
976 }
0130df54
SH
977 }
978
ebc73a67
CB
979 /* Keep reading more data from the socket if the last
980 * message had the NLF_F_MULTI flag set.
981 */
0130df54
SH
982 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
983
ebc73a67 984 /* Look at the next message received in this buffer. */
0130df54
SH
985 msg = NLMSG_NEXT(msg, recv_len);
986 }
987 } while (readmore);
988
ebc73a67 989 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
990 err = -1;
991
992out:
993 netlink_close(&nlh);
994 nlmsg_free(answer);
995 nlmsg_free(nlmsg);
996 return err;
997}
998
d472214b 999int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1000{
ebc73a67 1001 int err, index, len;
06f976ca 1002 struct ifinfomsg *ifi;
ebc73a67
CB
1003 struct nl_handler nlh;
1004 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1005
3cfc0f3a
MN
1006 err = netlink_open(&nlh, NETLINK_ROUTE);
1007 if (err)
1008 return err;
75d09f83 1009
3cfc0f3a 1010 err = -EINVAL;
75d09f83 1011 len = strlen(name);
dae3fdf6 1012 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1013 goto out;
1014
3cfc0f3a 1015 err = -ENOMEM;
75d09f83
DL
1016 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1017 if (!nlmsg)
1018 goto out;
1019
06f976ca 1020 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1021 if (!answer)
1022 goto out;
1023
3cfc0f3a 1024 err = -EINVAL;
75d09f83
DL
1025 index = if_nametoindex(name);
1026 if (!index)
1027 goto out;
1028
ebc73a67 1029 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1030 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1031
1032 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1033 if (!ifi) {
1034 err = -ENOMEM;
1035 goto out;
1036 }
06f976ca
SZ
1037 ifi->ifi_family = AF_UNSPEC;
1038 ifi->ifi_index = index;
75d09f83
DL
1039
1040 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1041 goto out;
1042
1043 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1044out:
1045 netlink_close(&nlh);
1046 nlmsg_free(nlmsg);
1047 nlmsg_free(answer);
1048 return err;
1049}
1050
d472214b 1051int lxc_netdev_up(const char *name)
0ad19a3f 1052{
d472214b 1053 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1054}
1055
d472214b 1056int lxc_netdev_down(const char *name)
0ad19a3f 1057{
d472214b 1058 return netdev_set_flag(name, 0);
0ad19a3f 1059}
1060
497353b6 1061int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1062{
ebc73a67 1063 int err, len;
06f976ca 1064 struct ifinfomsg *ifi;
ebc73a67 1065 struct nl_handler nlh;
0ad19a3f 1066 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1067 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1068
3cfc0f3a
MN
1069 err = netlink_open(&nlh, NETLINK_ROUTE);
1070 if (err)
1071 return err;
0ad19a3f 1072
3cfc0f3a 1073 err = -EINVAL;
0ad19a3f 1074 len = strlen(name1);
dae3fdf6 1075 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1076 goto out;
1077
1078 len = strlen(name2);
dae3fdf6 1079 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1080 goto out;
1081
3cfc0f3a 1082 err = -ENOMEM;
0ad19a3f 1083 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1084 if (!nlmsg)
1085 goto out;
1086
06f976ca 1087 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1088 if (!answer)
1089 goto out;
1090
06f976ca 1091 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1092 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1093 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1094
1095 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1096 if (!ifi)
1097 goto out;
06f976ca 1098 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1099
3cfc0f3a 1100 err = -EINVAL;
79e68309 1101 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1102 if (!nest1)
1103 goto out;
1104
1105 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1106 goto out;
1107
1108 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1109 if (!nest2)
1110 goto out;
1111
1112 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1113 if (!nest3)
1114 goto out;
1115
06f976ca 1116 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1117 if (!ifi) {
1118 err = -ENOMEM;
06f976ca 1119 goto out;
25a9939b 1120 }
0ad19a3f 1121
1122 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1123 goto out;
1124
1125 nla_end_nested(nlmsg, nest3);
0ad19a3f 1126 nla_end_nested(nlmsg, nest2);
0ad19a3f 1127 nla_end_nested(nlmsg, nest1);
1128
1129 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1130 goto out;
1131
3cfc0f3a 1132 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1133out:
1134 netlink_close(&nlh);
1135 nlmsg_free(answer);
1136 nlmsg_free(nlmsg);
1137 return err;
1138}
1139
ebc73a67 1140/* TODO: merge with lxc_macvlan_create */
7c11d57a 1141int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1142{
ebc73a67 1143 int err, len, lindex;
06f976ca 1144 struct ifinfomsg *ifi;
ebc73a67 1145 struct nl_handler nlh;
26c39028 1146 struct rtattr *nest, *nest2;
ebc73a67 1147 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1148
3cfc0f3a
MN
1149 err = netlink_open(&nlh, NETLINK_ROUTE);
1150 if (err)
1151 return err;
26c39028 1152
3cfc0f3a 1153 err = -EINVAL;
26c39028 1154 len = strlen(master);
dae3fdf6 1155 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1156 goto err3;
1157
1158 len = strlen(name);
dae3fdf6 1159 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1160 goto err3;
1161
3cfc0f3a 1162 err = -ENOMEM;
26c39028
JHS
1163 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1164 if (!nlmsg)
1165 goto err3;
1166
06f976ca 1167 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1168 if (!answer)
1169 goto err2;
1170
3cfc0f3a 1171 err = -EINVAL;
26c39028
JHS
1172 lindex = if_nametoindex(master);
1173 if (!lindex)
1174 goto err1;
1175
06f976ca 1176 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1177 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1178 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1179
1180 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1181 if (!ifi) {
1182 err = -ENOMEM;
1183 goto err1;
1184 }
06f976ca 1185 ifi->ifi_family = AF_UNSPEC;
26c39028 1186
79e68309 1187 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1188 if (!nest)
1189 goto err1;
1190
1191 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1192 goto err1;
1193
1194 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1195 if (!nest2)
1196 goto err1;
e892973e 1197
26c39028
JHS
1198 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1199 goto err1;
e892973e 1200
26c39028 1201 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1202 nla_end_nested(nlmsg, nest);
1203
1204 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1205 goto err1;
1206
1207 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1208 goto err1;
1209
3cfc0f3a 1210 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1211err1:
1212 nlmsg_free(answer);
1213err2:
1214 nlmsg_free(nlmsg);
1215err3:
1216 netlink_close(&nlh);
1217 return err;
1218}
1219
e892973e 1220int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1221{
ebc73a67 1222 int err, index, len;
06f976ca 1223 struct ifinfomsg *ifi;
ebc73a67 1224 struct nl_handler nlh;
e892973e 1225 struct rtattr *nest, *nest2;
ebc73a67 1226 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1227
3cfc0f3a
MN
1228 err = netlink_open(&nlh, NETLINK_ROUTE);
1229 if (err)
1230 return err;
0ad19a3f 1231
3cfc0f3a 1232 err = -EINVAL;
0ad19a3f 1233 len = strlen(master);
dae3fdf6 1234 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1235 goto out;
1236
1237 len = strlen(name);
dae3fdf6 1238 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1239 goto out;
1240
3cfc0f3a 1241 err = -ENOMEM;
0ad19a3f 1242 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1243 if (!nlmsg)
1244 goto out;
1245
06f976ca 1246 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1247 if (!answer)
1248 goto out;
1249
3cfc0f3a 1250 err = -EINVAL;
0ad19a3f 1251 index = if_nametoindex(master);
1252 if (!index)
1253 goto out;
1254
06f976ca 1255 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1256 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1257 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1258
1259 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1260 if (!ifi) {
1261 err = -ENOMEM;
1262 goto out;
1263 }
06f976ca 1264 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1265
79e68309 1266 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1267 if (!nest)
1268 goto out;
1269
1270 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1271 goto out;
1272
e892973e
DL
1273 if (mode) {
1274 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1275 if (!nest2)
1276 goto out;
1277
1278 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1279 goto out;
1280
1281 nla_end_nested(nlmsg, nest2);
1282 }
1283
0ad19a3f 1284 nla_end_nested(nlmsg, nest);
1285
1286 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1287 goto out;
1288
1289 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1290 goto out;
1291
3cfc0f3a 1292 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1293out:
1294 netlink_close(&nlh);
1295 nlmsg_free(answer);
1296 nlmsg_free(nlmsg);
1297 return err;
1298}
1299
1300static int proc_sys_net_write(const char *path, const char *value)
1301{
ebc73a67
CB
1302 int fd;
1303 int err = 0;
0ad19a3f 1304
1305 fd = open(path, O_WRONLY);
1306 if (fd < 0)
1307 return -errno;
1308
1309 if (write(fd, value, strlen(value)) < 0)
1310 err = -errno;
1311
1312 close(fd);
1313 return err;
1314}
1315
1316static int ip_forward_set(const char *ifname, int family, int flag)
1317{
9ba8130c 1318 int rc;
ebc73a67 1319 char path[MAXPATHLEN];
0ad19a3f 1320
1321 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1322 return -EINVAL;
0ad19a3f 1323
9ba8130c 1324 rc = snprintf(path, MAXPATHLEN, "/proc/sys/net/%s/conf/%s/forwarding",
ebc73a67
CB
1325 family == AF_INET ? "ipv4" : "ipv6", ifname);
1326 if (rc < 0 || (size_t)rc >= MAXPATHLEN)
9ba8130c 1327 return -E2BIG;
0ad19a3f 1328
ebc73a67 1329 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1330}
1331
497353b6 1332int lxc_ip_forward_on(const char *ifname, int family)
0ad19a3f 1333{
1334 return ip_forward_set(ifname, family, 1);
1335}
1336
497353b6 1337int lxc_ip_forward_off(const char *ifname, int family)
0ad19a3f 1338{
1339 return ip_forward_set(ifname, family, 0);
1340}
1341
1342static int neigh_proxy_set(const char *ifname, int family, int flag)
1343{
9ba8130c 1344 int ret;
ebc73a67 1345 char path[MAXPATHLEN];
0ad19a3f 1346
1347 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1348 return -EINVAL;
0ad19a3f 1349
9ba8130c 1350 ret = snprintf(path, MAXPATHLEN, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1351 family == AF_INET ? "ipv4" : "ipv6", ifname,
1352 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1353 if (ret < 0 || (size_t)ret >= MAXPATHLEN)
9ba8130c 1354 return -E2BIG;
0ad19a3f 1355
ebc73a67 1356 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1357}
1358
497353b6 1359int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1360{
1361 return neigh_proxy_set(name, family, 1);
1362}
1363
497353b6 1364int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1365{
1366 return neigh_proxy_set(name, family, 0);
1367}
1368
1369int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1370{
1f1b18e7
DL
1371 int i = 0;
1372 unsigned val;
ebc73a67
CB
1373 char c;
1374 unsigned char *data;
1f1b18e7
DL
1375
1376 sockaddr->sa_family = ARPHRD_ETHER;
1377 data = (unsigned char *)sockaddr->sa_data;
1378
1379 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1380 val = 0;
1381 c = *macaddr++;
1382 if (isdigit(c))
1383 val = c - '0';
1384 else if (c >= 'a' && c <= 'f')
1385 val = c - 'a' + 10;
1386 else if (c >= 'A' && c <= 'F')
1387 val = c - 'A' + 10;
1388 else
1389 return -EINVAL;
1390
1391 val <<= 4;
1392 c = *macaddr;
1393 if (isdigit(c))
1394 val |= c - '0';
1395 else if (c >= 'a' && c <= 'f')
1396 val |= c - 'a' + 10;
1397 else if (c >= 'A' && c <= 'F')
1398 val |= c - 'A' + 10;
1399 else if (c == ':' || c == 0)
1400 val >>= 4;
1401 else
1402 return -EINVAL;
1403 if (c != 0)
1404 macaddr++;
1405 *data++ = (unsigned char)(val & 0377);
1406 i++;
1407
1408 if (*macaddr == ':')
1409 macaddr++;
0ad19a3f 1410 }
0ad19a3f 1411
1f1b18e7 1412 return 0;
0ad19a3f 1413}
1414
ebc73a67
CB
1415static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1416 void *acast, int prefix)
0ad19a3f 1417{
ebc73a67 1418 int addrlen, err;
06f976ca 1419 struct ifaddrmsg *ifa;
ebc73a67
CB
1420 struct nl_handler nlh;
1421 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1422
ebc73a67
CB
1423 addrlen = family == AF_INET ? sizeof(struct in_addr)
1424 : sizeof(struct in6_addr);
4bf1968d 1425
3cfc0f3a
MN
1426 err = netlink_open(&nlh, NETLINK_ROUTE);
1427 if (err)
1428 return err;
0ad19a3f 1429
3cfc0f3a 1430 err = -ENOMEM;
0ad19a3f 1431 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1432 if (!nlmsg)
1433 goto out;
1434
06f976ca 1435 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1436 if (!answer)
1437 goto out;
1438
06f976ca 1439 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1440 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1441 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1442
1443 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1444 if (!ifa)
25a9939b 1445 goto out;
06f976ca
SZ
1446 ifa->ifa_prefixlen = prefix;
1447 ifa->ifa_index = ifindex;
1448 ifa->ifa_family = family;
1449 ifa->ifa_scope = 0;
acf47e1b 1450
3cfc0f3a 1451 err = -EINVAL;
4bf1968d 1452 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1453 goto out;
1454
4bf1968d 1455 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1456 goto out;
1457
d8948a52 1458 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1459 goto out;
1460
ebc73a67 1461 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1462 err = -EPROTONOSUPPORT;
79881dc6
DL
1463 if (family == AF_INET6 &&
1464 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1465 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1466 goto out;
0ad19a3f 1467
3cfc0f3a 1468 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1469out:
1470 netlink_close(&nlh);
1471 nlmsg_free(answer);
1472 nlmsg_free(nlmsg);
1473 return err;
1474}
1475
1f1b18e7 1476int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1477 struct in6_addr *mcast, struct in6_addr *acast,
1478 int prefix)
1f1b18e7
DL
1479{
1480 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1481}
1482
ebc73a67
CB
1483int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1484 int prefix)
1f1b18e7
DL
1485{
1486 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1487}
1488
ebc73a67
CB
1489/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1490 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1491 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1492 */
ebc73a67
CB
1493static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1494{
1495 int addrlen;
06f976ca
SZ
1496 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1497 struct rtattr *rta = IFA_RTA(ifa);
1498 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1499
06f976ca 1500 if (ifa->ifa_family != family)
19a26f82
MK
1501 return 0;
1502
ebc73a67
CB
1503 addrlen = family == AF_INET ? sizeof(struct in_addr)
1504 : sizeof(struct in6_addr);
19a26f82
MK
1505
1506 /* Loop over the rtattr's in this message */
ebc73a67 1507 while (RTA_OK(rta, attr_len)) {
19a26f82 1508 /* Found a local address for the requested interface,
ebc73a67
CB
1509 * return it.
1510 */
1511 if (rta->rta_type == IFA_LOCAL ||
1512 rta->rta_type == IFA_ADDRESS) {
1513 /* Sanity check. The family check above should make sure
1514 * the address length is correct, but check here just in
1515 * case.
1516 */
19a26f82
MK
1517 if (RTA_PAYLOAD(rta) != addrlen)
1518 return -1;
1519
ebc73a67
CB
1520 /* We might have found an IFA_ADDRESS before, which we
1521 * now overwrite with an IFA_LOCAL.
1522 */
dd66e5ad 1523 if (!*res) {
19a26f82 1524 *res = malloc(addrlen);
dd66e5ad
DE
1525 if (!*res)
1526 return -1;
1527 }
19a26f82
MK
1528
1529 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
1530 if (rta->rta_type == IFA_LOCAL)
1531 break;
1532 }
1533 rta = RTA_NEXT(rta, attr_len);
1534 }
1535 return 0;
1536}
1537
1538static int ip_addr_get(int family, int ifindex, void **res)
1539{
ebc73a67 1540 int answer_len, err;
06f976ca 1541 struct ifaddrmsg *ifa;
ebc73a67 1542 struct nl_handler nlh;
19a26f82 1543 struct nlmsghdr *msg;
ebc73a67
CB
1544 int readmore = 0, recv_len = 0;
1545 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
1546
1547 err = netlink_open(&nlh, NETLINK_ROUTE);
1548 if (err)
1549 return err;
1550
1551 err = -ENOMEM;
1552 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1553 if (!nlmsg)
1554 goto out;
1555
06f976ca 1556 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
1557 if (!answer)
1558 goto out;
1559
ebc73a67
CB
1560 /* Save the answer buffer length, since it will be overwritten on the
1561 * first receive (and we might need to receive more than once).
1562 */
06f976ca
SZ
1563 answer_len = answer->nlmsghdr->nlmsg_len;
1564
ebc73a67 1565 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 1566 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 1567
06f976ca 1568 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
1569 if (!ifa)
1570 goto out;
06f976ca 1571 ifa->ifa_family = family;
19a26f82 1572
ebc73a67
CB
1573 /* Send the request for addresses, which returns all addresses on all
1574 * interfaces.
1575 */
19a26f82
MK
1576 err = netlink_send(&nlh, nlmsg);
1577 if (err < 0)
1578 goto out;
19a26f82
MK
1579
1580 do {
1581 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1582 * overwritten by a previous receive.
1583 */
06f976ca 1584 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 1585
ebc73a67 1586 /* Get the (next) batch of reply messages. */
19a26f82
MK
1587 err = netlink_rcv(&nlh, answer);
1588 if (err < 0)
1589 goto out;
1590
1591 recv_len = err;
1592 err = 0;
1593
ebc73a67 1594 /* Satisfy the typing for the netlink macros. */
06f976ca 1595 msg = answer->nlmsghdr;
19a26f82
MK
1596
1597 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 1598 /* Stop reading if we see an error message. */
19a26f82 1599 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1600 struct nlmsgerr *errmsg =
1601 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
1602 err = errmsg->error;
1603 goto out;
1604 }
1605
ebc73a67 1606 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
1607 if (msg->nlmsg_type == NLMSG_DONE) {
1608 readmore = 0;
1609 break;
1610 }
1611
1612 if (msg->nlmsg_type != RTM_NEWADDR) {
1613 err = -1;
1614 goto out;
1615 }
1616
06f976ca
SZ
1617 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
1618 if (ifa->ifa_index == ifindex) {
1619 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
1620 err = -1;
1621 goto out;
1622 }
1623
ebc73a67 1624 /* Found a result, stop searching. */
19a26f82
MK
1625 if (*res)
1626 goto out;
1627 }
1628
ebc73a67
CB
1629 /* Keep reading more data from the socket if the last
1630 * message had the NLF_F_MULTI flag set.
1631 */
19a26f82
MK
1632 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1633
ebc73a67 1634 /* Look at the next message received in this buffer. */
19a26f82
MK
1635 msg = NLMSG_NEXT(msg, recv_len);
1636 }
1637 } while (readmore);
1638
1639 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
1640 * error.
1641 */
19a26f82
MK
1642 err = -1;
1643
1644out:
1645 netlink_close(&nlh);
1646 nlmsg_free(answer);
1647 nlmsg_free(nlmsg);
1648 return err;
1649}
1650
1651int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
1652{
ebc73a67 1653 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
1654}
1655
ebc73a67 1656int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 1657{
ebc73a67 1658 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
1659}
1660
f8fee0e2
MK
1661static int ip_gateway_add(int family, int ifindex, void *gw)
1662{
ebc73a67 1663 int addrlen, err;
f8fee0e2 1664 struct nl_handler nlh;
06f976ca 1665 struct rtmsg *rt;
ebc73a67 1666 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 1667
ebc73a67
CB
1668 addrlen = family == AF_INET ? sizeof(struct in_addr)
1669 : sizeof(struct in6_addr);
f8fee0e2
MK
1670
1671 err = netlink_open(&nlh, NETLINK_ROUTE);
1672 if (err)
1673 return err;
1674
1675 err = -ENOMEM;
1676 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1677 if (!nlmsg)
1678 goto out;
1679
06f976ca 1680 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
1681 if (!answer)
1682 goto out;
1683
06f976ca 1684 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1685 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1686 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
1687
1688 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
1689 if (!rt)
1690 goto out;
06f976ca
SZ
1691 rt->rtm_family = family;
1692 rt->rtm_table = RT_TABLE_MAIN;
1693 rt->rtm_scope = RT_SCOPE_UNIVERSE;
1694 rt->rtm_protocol = RTPROT_BOOT;
1695 rt->rtm_type = RTN_UNICAST;
f8fee0e2 1696 /* "default" destination */
06f976ca 1697 rt->rtm_dst_len = 0;
f8fee0e2
MK
1698
1699 err = -EINVAL;
1700 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
1701 goto out;
1702
1703 /* Adding the interface index enables the use of link-local
ebc73a67
CB
1704 * addresses for the gateway.
1705 */
f8fee0e2
MK
1706 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
1707 goto out;
1708
1709 err = netlink_transaction(&nlh, nlmsg, answer);
1710out:
1711 netlink_close(&nlh);
1712 nlmsg_free(answer);
1713 nlmsg_free(nlmsg);
1714 return err;
1715}
1716
1717int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
1718{
1719 return ip_gateway_add(AF_INET, ifindex, gw);
1720}
1721
1722int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
1723{
1724 return ip_gateway_add(AF_INET6, ifindex, gw);
1725}
1726
77dcf03a
GL
1727static int ip_route_dest_add(int family, int ifindex, void *dest)
1728{
ebc73a67 1729 int addrlen, err;
77dcf03a 1730 struct nl_handler nlh;
06f976ca 1731 struct rtmsg *rt;
ebc73a67 1732 struct nlmsg *answer = NULL, *nlmsg = NULL;
acf47e1b 1733
ebc73a67
CB
1734 addrlen = family == AF_INET ? sizeof(struct in_addr)
1735 : sizeof(struct in6_addr);
acf47e1b 1736
77dcf03a
GL
1737 err = netlink_open(&nlh, NETLINK_ROUTE);
1738 if (err)
1739 return err;
acf47e1b 1740
77dcf03a
GL
1741 err = -ENOMEM;
1742 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1743 if (!nlmsg)
1744 goto out;
acf47e1b 1745
06f976ca 1746 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
77dcf03a
GL
1747 if (!answer)
1748 goto out;
acf47e1b 1749
06f976ca 1750 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1751 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1752 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
1753
1754 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
1755 if (!rt)
1756 goto out;
06f976ca
SZ
1757 rt->rtm_family = family;
1758 rt->rtm_table = RT_TABLE_MAIN;
1759 rt->rtm_scope = RT_SCOPE_LINK;
1760 rt->rtm_protocol = RTPROT_BOOT;
1761 rt->rtm_type = RTN_UNICAST;
ebc73a67 1762 rt->rtm_dst_len = addrlen * 8;
acf47e1b 1763
77dcf03a
GL
1764 err = -EINVAL;
1765 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
1766 goto out;
1767 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
1768 goto out;
1769 err = netlink_transaction(&nlh, nlmsg, answer);
1770out:
1771 netlink_close(&nlh);
1772 nlmsg_free(answer);
1773 nlmsg_free(nlmsg);
1774 return err;
1775}
1776
1777int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest)
1778{
1779 return ip_route_dest_add(AF_INET, ifindex, dest);
1780}
1781
1782int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest)
1783{
1784 return ip_route_dest_add(AF_INET6, ifindex, dest);
1785}
1786
581c75e7 1787bool is_ovs_bridge(const char *bridge)
0d204771 1788{
ebc73a67 1789 int ret;
0d204771 1790 struct stat sb;
ebc73a67 1791 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 1792
ebc73a67
CB
1793 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
1794 bridge);
1795 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
1796 return false;
1797
1798 ret = stat(brdirname, &sb);
1799 if (ret < 0 && errno == ENOENT)
0d204771 1800 return true;
ebc73a67 1801
0d204771
SH
1802 return false;
1803}
1804
581c75e7
CB
1805struct ovs_veth_args {
1806 const char *bridge;
1807 const char *nic;
1808};
1809
cb0dc11b
CB
1810/* Called from a background thread - when nic goes away, remove it from the
1811 * bridge.
c43cbc04 1812 */
581c75e7 1813static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 1814{
581c75e7 1815 struct ovs_veth_args *args = data;
cb0dc11b 1816
581c75e7
CB
1817 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
1818 (char *)NULL);
1819 return -1;
c43cbc04
SH
1820}
1821
581c75e7 1822int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 1823{
c43cbc04 1824 int ret;
581c75e7
CB
1825 char cmd_output[MAXPATHLEN];
1826 struct ovs_veth_args args;
6ad22d06 1827
581c75e7
CB
1828 args.bridge = bridge;
1829 args.nic = nic;
1830 ret = run_command(cmd_output, sizeof(cmd_output),
1831 lxc_ovs_delete_port_exec, (void *)&args);
1832 if (ret < 0) {
1833 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
1834 "%s", bridge, nic, cmd_output);
6ad22d06 1835 return -1;
581c75e7 1836 }
0d204771 1837
581c75e7
CB
1838 return 0;
1839}
ebc73a67 1840
581c75e7
CB
1841static int lxc_ovs_attach_bridge_exec(void *data)
1842{
1843 struct ovs_veth_args *args = data;
ebc73a67 1844
581c75e7
CB
1845 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
1846 (char *)NULL);
1847 return -1;
1848}
ebc73a67 1849
581c75e7
CB
1850static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
1851{
1852 int ret;
1853 char cmd_output[MAXPATHLEN];
1854 struct ovs_veth_args args;
ebc73a67 1855
581c75e7
CB
1856 args.bridge = bridge;
1857 args.nic = nic;
1858 ret = run_command(cmd_output, sizeof(cmd_output),
1859 lxc_ovs_attach_bridge_exec, (void *)&args);
1860 if (ret < 0) {
1861 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
1862 bridge, nic, cmd_output);
1863 return -1;
c43cbc04 1864 }
0d204771 1865
581c75e7 1866 return 0;
0d204771 1867}
0d204771 1868
581c75e7 1869int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 1870{
ebc73a67 1871 int err, fd, index;
0ad19a3f 1872 struct ifreq ifr;
1873
dae3fdf6 1874 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 1875 return -EINVAL;
0ad19a3f 1876
1877 index = if_nametoindex(ifname);
1878 if (!index)
3cfc0f3a 1879 return -EINVAL;
0ad19a3f 1880
0d204771 1881 if (is_ovs_bridge(bridge))
581c75e7 1882 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 1883
0ad19a3f 1884 fd = socket(AF_INET, SOCK_STREAM, 0);
1885 if (fd < 0)
3cfc0f3a 1886 return -errno;
0ad19a3f 1887
ebc73a67
CB
1888 strncpy(ifr.ifr_name, bridge, IFNAMSIZ - 1);
1889 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 1890 ifr.ifr_ifindex = index;
7d163508 1891 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 1892 close(fd);
3cfc0f3a
MN
1893 if (err)
1894 err = -errno;
0ad19a3f 1895
1896 return err;
1897}
72d0e1cb 1898
ebc73a67 1899static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 1900 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
1901 [LXC_NET_VETH] = "veth",
1902 [LXC_NET_MACVLAN] = "macvlan",
72d0e1cb 1903 [LXC_NET_PHYS] = "phys",
b343592b
BP
1904 [LXC_NET_VLAN] = "vlan",
1905 [LXC_NET_NONE] = "none",
72d0e1cb
SG
1906};
1907
1908const char *lxc_net_type_to_str(int type)
1909{
1910 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
1911 return NULL;
ebc73a67 1912
72d0e1cb
SG
1913 return lxc_network_types[type];
1914}
8befa924 1915
ebc73a67 1916static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 1917
966e9f1f 1918char *lxc_mkifname(char *template)
a0265685 1919{
a0265685 1920 unsigned int seed;
ebc73a67
CB
1921 FILE *urandom;
1922 struct ifaddrs *ifa, *ifaddr;
966e9f1f
CB
1923 char name[IFNAMSIZ];
1924 bool exists = false;
1925 size_t i = 0;
a0265685 1926
535e8859
CB
1927 if (strlen(template) >= IFNAMSIZ)
1928 return NULL;
1929
ebc73a67 1930 /* Get all the network interfaces. */
a0265685
SG
1931 getifaddrs(&ifaddr);
1932
ebc73a67
CB
1933 /* Initialize the random number generator. */
1934 urandom = fopen("/dev/urandom", "r");
a0265685 1935 if (urandom != NULL) {
ebc73a67 1936 if (fread(&seed, sizeof(seed), 1, urandom) <= 0)
a0265685 1937 seed = time(0);
a0265685 1938 fclose(urandom);
ebc73a67 1939 } else {
a0265685 1940 seed = time(0);
ebc73a67 1941 }
a0265685
SG
1942
1943#ifndef HAVE_RAND_R
1944 srand(seed);
1945#endif
1946
ebc73a67
CB
1947 /* Generate random names until we find one that doesn't exist. */
1948 while (true) {
966e9f1f
CB
1949 name[0] = '\0';
1950 strcpy(name, template);
a0265685 1951
966e9f1f 1952 exists = false;
a0265685
SG
1953 for (i = 0; i < strlen(name); i++) {
1954 if (name[i] == 'X') {
1955#ifdef HAVE_RAND_R
966e9f1f 1956 name[i] = padchar[rand_r(&seed) % (strlen(padchar) - 1)];
a0265685
SG
1957#else
1958 name[i] = padchar[rand() % (strlen(padchar) - 1)];
1959#endif
1960 }
1961 }
1962
1963 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
1964 if (!strcmp(ifa->ifa_name, name)) {
1965 exists = true;
a0265685
SG
1966 break;
1967 }
1968 }
1969
966e9f1f 1970 if (!exists)
a0265685 1971 break;
a0265685
SG
1972 }
1973
1974 freeifaddrs(ifaddr);
966e9f1f 1975 return strcpy(template, name);
a0265685
SG
1976}
1977
8befa924
SH
1978int setup_private_host_hw_addr(char *veth1)
1979{
ebc73a67 1980 int err, sockfd;
8befa924 1981 struct ifreq ifr;
8befa924 1982
8befa924 1983 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
8befa924
SH
1984 if (sockfd < 0)
1985 return -errno;
1986
ebc73a67
CB
1987 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
1988 if (err < 0 || (size_t)err >= IFNAMSIZ)
1989 return -E2BIG;
1990
8befa924
SH
1991 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
1992 if (err < 0) {
8befa924 1993 close(sockfd);
8befa924
SH
1994 return -errno;
1995 }
1996
1997 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
1998 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 1999 close(sockfd);
8befa924
SH
2000 if (err < 0)
2001 return -errno;
2002
2003 return 0;
2004}
811ef482
CB
2005
2006int lxc_find_gateway_addresses(struct lxc_handler *handler)
2007{
2008 struct lxc_list *network = &handler->conf->network;
2009 struct lxc_list *iterator;
2010 struct lxc_netdev *netdev;
2011 int link_index;
2012
2013 lxc_list_for_each(iterator, network) {
2014 netdev = iterator->elem;
2015
2016 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2017 continue;
2018
2019 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2020 ERROR("Automatic gateway detection is only supported "
2021 "for veth and macvlan");
2022 return -1;
2023 }
2024
de4855a8 2025 if (netdev->link[0] == '\0') {
811ef482
CB
2026 ERROR("Automatic gateway detection needs a link interface");
2027 return -1;
2028 }
2029
2030 link_index = if_nametoindex(netdev->link);
2031 if (!link_index)
2032 return -EINVAL;
2033
2034 if (netdev->ipv4_gateway_auto) {
2035 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2036 ERROR("Failed to automatically find ipv4 gateway "
2037 "address from link interface \"%s\"", netdev->link);
2038 return -1;
2039 }
2040 }
2041
2042 if (netdev->ipv6_gateway_auto) {
2043 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2044 ERROR("Failed to automatically find ipv6 gateway "
2045 "address from link interface \"%s\"", netdev->link);
2046 return -1;
2047 }
2048 }
2049 }
2050
2051 return 0;
2052}
2053
2054#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
74c6e2b0
CB
2055static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
2056 struct lxc_netdev *netdev, pid_t pid)
811ef482
CB
2057{
2058 int ret;
2059 pid_t child;
2060 int bytes, pipefd[2];
2061 char *token, *saveptr = NULL;
2062 char netdev_link[IFNAMSIZ + 1];
2063 char buffer[MAXPATHLEN] = {0};
2064
2065 if (netdev->type != LXC_NET_VETH) {
2066 ERROR("Network type %d not support for unprivileged use", netdev->type);
2067 return -1;
2068 }
2069
2070 ret = pipe(pipefd);
2071 if (ret < 0) {
2072 SYSERROR("Failed to create pipe");
2073 return -1;
2074 }
2075
2076 child = fork();
2077 if (child < 0) {
2078 SYSERROR("Failed to create new process");
2079 close(pipefd[0]);
2080 close(pipefd[1]);
2081 return -1;
2082 }
2083
2084 if (child == 0) {
2085 int ret;
2086 char pidstr[LXC_NUMSTRLEN64];
2087
2088 close(pipefd[0]);
2089
2090 ret = dup2(pipefd[1], STDOUT_FILENO);
2091 if (ret >= 0)
2092 ret = dup2(pipefd[1], STDERR_FILENO);
2093 close(pipefd[1]);
2094 if (ret < 0) {
2095 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2096 exit(EXIT_FAILURE);
2097 }
2098
de4855a8 2099 if (netdev->link[0] != '\0')
811ef482
CB
2100 strncpy(netdev_link, netdev->link, IFNAMSIZ);
2101 else
2102 strncpy(netdev_link, "none", IFNAMSIZ);
2103
2104 ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
2105 if (ret < 0 || ret >= LXC_NUMSTRLEN64)
2106 exit(EXIT_FAILURE);
2107 pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
2108
2109 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2110 lxcname, pidstr, netdev_link,
de4855a8
CB
2111 netdev->name[0] != '\0' ? netdev->name : "(null)");
2112 if (netdev->name[0] != '\0')
811ef482
CB
2113 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2114 lxcpath, lxcname, pidstr, "veth", netdev_link,
2115 netdev->name, (char *)NULL);
2116 else
2117 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2118 lxcpath, lxcname, pidstr, "veth", netdev_link,
2119 (char *)NULL);
2120 SYSERROR("Failed to execute lxc-user-nic");
2121 exit(EXIT_FAILURE);
2122 }
2123
2124 /* close the write-end of the pipe */
2125 close(pipefd[1]);
2126
2127 bytes = read(pipefd[0], &buffer, MAXPATHLEN);
2128 if (bytes < 0) {
74c6e2b0 2129 SYSERROR("Failed to read from pipe file descriptor");
811ef482
CB
2130 close(pipefd[0]);
2131 return -1;
2132 }
2133 buffer[bytes - 1] = '\0';
2134
2135 ret = wait_for_pid(child);
2136 close(pipefd[0]);
2137 if (ret != 0) {
2138 ERROR("lxc-user-nic failed to configure requested network: %s",
2139 buffer[0] != '\0' ? buffer : "(null)");
2140 return -1;
2141 }
2142 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2143
2144 /* netdev->name */
2145 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2146 if (!token) {
2147 ERROR("Failed to parse lxc-user-nic output");
811ef482 2148 return -1;
74c6e2b0 2149 }
811ef482 2150
811ef482
CB
2151 memset(netdev->name, 0, IFNAMSIZ + 1);
2152 strncpy(netdev->name, token, IFNAMSIZ);
2153
74c6e2b0 2154 /* netdev->ifindex */
811ef482 2155 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2156 if (!token) {
2157 ERROR("Failed to parse lxc-user-nic output");
811ef482 2158 return -1;
74c6e2b0 2159 }
811ef482 2160
74c6e2b0
CB
2161 ret = lxc_safe_int(token, &netdev->ifindex);
2162 if (ret < 0) {
2163 ERROR("%s - Failed to convert string \"%s\" to integer",
2164 strerror(-ret), token);
811ef482
CB
2165 return -1;
2166 }
2167
74c6e2b0 2168 /* netdev->priv.veth_attr.veth1 */
811ef482 2169 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2170 if (!token) {
2171 ERROR("Failed to parse lxc-user-nic output");
811ef482 2172 return -1;
74c6e2b0 2173 }
811ef482 2174
74c6e2b0
CB
2175 if (strlen(token) >= IFNAMSIZ) {
2176 ERROR("Host side veth device name returned by lxc-user-nic is "
2177 "too long");
2178 return -E2BIG;
2179 }
2180 strcpy(netdev->priv.veth_attr.veth1, token);
2181
2182 /* netdev->priv.veth_attr.ifindex */
2183 token = strtok_r(NULL, ":", &saveptr);
2184 if (!token) {
2185 ERROR("Failed to parse lxc-user-nic output");
2186 return -1;
2187 }
2188
2189 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2190 if (ret < 0) {
74c6e2b0
CB
2191 ERROR("%s - Failed to convert string \"%s\" to integer",
2192 strerror(-ret), token);
811ef482
CB
2193 return -1;
2194 }
2195
2196 return 0;
2197}
2198
1bd8d726
CB
2199static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
2200 struct lxc_netdev *netdev,
2201 const char *netns_path)
811ef482
CB
2202{
2203 int bytes, ret;
2204 pid_t child;
2205 int pipefd[2];
2206 char buffer[MAXPATHLEN] = {0};
2207
2208 if (netdev->type != LXC_NET_VETH) {
2209 ERROR("Network type %d not support for unprivileged use", netdev->type);
2210 return -1;
2211 }
2212
2213 ret = pipe(pipefd);
2214 if (ret < 0) {
2215 SYSERROR("Failed to create pipe");
2216 return -1;
2217 }
2218
2219 child = fork();
2220 if (child < 0) {
2221 SYSERROR("Failed to create new process");
2222 close(pipefd[0]);
2223 close(pipefd[1]);
2224 return -1;
2225 }
2226
2227 if (child == 0) {
8843fde4 2228 char *hostveth;
811ef482 2229 int ret;
811ef482
CB
2230
2231 close(pipefd[0]);
2232
2233 ret = dup2(pipefd[1], STDOUT_FILENO);
2234 if (ret >= 0)
2235 ret = dup2(pipefd[1], STDERR_FILENO);
2236 close(pipefd[1]);
2237 if (ret < 0) {
2238 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2239 exit(EXIT_FAILURE);
2240 }
2241
8843fde4
CB
2242 if (netdev->priv.veth_attr.pair[0] != '\0')
2243 hostveth = netdev->priv.veth_attr.pair;
2244 else
2245 hostveth = netdev->priv.veth_attr.veth1;
2246 if (hostveth[0] == '\0') {
74c6e2b0
CB
2247 SYSERROR("Host side veth device name is missing");
2248 exit(EXIT_FAILURE);
2249 }
2250
de4855a8 2251 if (netdev->link[0] == '\0') {
811ef482 2252 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0
CB
2253 "missing", netdev->priv.veth_attr.veth1);
2254 exit(EXIT_FAILURE);
2255 }
811ef482 2256
811ef482 2257 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2258 lxcname, netns_path, netdev->link, hostveth);
811ef482 2259 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2260 lxcname, netns_path, "veth", netdev->link, hostveth,
2261 (char *)NULL);
811ef482
CB
2262 SYSERROR("Failed to exec lxc-user-nic.");
2263 exit(EXIT_FAILURE);
2264 }
2265
2266 close(pipefd[1]);
2267
2268 bytes = read(pipefd[0], &buffer, MAXPATHLEN);
2269 if (bytes < 0) {
2270 SYSERROR("Failed to read from pipe file descriptor.");
2271 close(pipefd[0]);
2272 return -1;
2273 }
2274 buffer[bytes - 1] = '\0';
2275
2276 if (wait_for_pid(child) != 0) {
2277 ERROR("lxc-user-nic failed to delete requested network: %s",
2278 buffer[0] != '\0' ? buffer : "(null)");
2279 close(pipefd[0]);
2280 return -1;
2281 }
2282
2283 close(pipefd[0]);
2284
2285 return 0;
2286}
2287
1bd8d726
CB
2288bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2289{
2290 int ret;
2291 struct lxc_list *iterator;
2292 struct lxc_list *network = &handler->conf->network;
2293 /* strlen("/proc/") = 6
2294 * +
2295 * LXC_NUMSTRLEN64
2296 * +
2297 * strlen("/fd/") = 4
2298 * +
2299 * LXC_NUMSTRLEN64
2300 * +
2301 * \0
2302 */
2303 char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
2304 bool deleted_all = true;
2305
d0fbc7ba 2306 if (handler->am_root)
1bd8d726
CB
2307 return true;
2308
2309 *netns_path = '\0';
2310
2311 if (handler->netnsfd < 0) {
2312 DEBUG("Cannot not guarantee safe deletion of network devices. "
2313 "Manual cleanup maybe needed");
2314 return false;
2315 }
2316
2317 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
2318 getpid(), handler->netnsfd);
2319 if (ret < 0 || ret >= sizeof(netns_path))
2320 return false;
2321
2322 lxc_list_for_each(iterator, network) {
2323 char *hostveth = NULL;
2324 struct lxc_netdev *netdev = iterator->elem;
2325
2326 /* We can only delete devices whose ifindex we have. If we don't
2327 * have the index it means that we didn't create it.
2328 */
2329 if (!netdev->ifindex)
2330 continue;
2331
2332 if (netdev->type == LXC_NET_PHYS) {
2333 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2334 netdev->link);
2335 if (ret < 0)
2336 WARN("Failed to rename interface with index %d "
2337 "to its initial name \"%s\"",
2338 netdev->ifindex, netdev->link);
2339 else
2340 TRACE("Renamed interface with index %d to its "
2341 "initial name \"%s\"",
2342 netdev->ifindex, netdev->link);
2343 continue;
2344 }
2345
2346 ret = netdev_deconf[netdev->type](handler, netdev);
2347 if (ret < 0)
2348 WARN("Failed to deconfigure network device");
2349
2350 if (netdev->type != LXC_NET_VETH)
2351 continue;
2352
2353 if (!is_ovs_bridge(netdev->link))
2354 continue;
2355
8843fde4
CB
2356 if (netdev->priv.veth_attr.pair[0] != '\0')
2357 hostveth = netdev->priv.veth_attr.pair;
2358 else
2359 hostveth = netdev->priv.veth_attr.veth1;
2360 if (hostveth[0] == '\0')
2361 continue;
2362
1bd8d726
CB
2363 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2364 handler->name, netdev,
2365 netns_path);
2366 if (ret < 0) {
2367 deleted_all = false;
2368 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2369 "bridge \"%s\"", hostveth, netdev->link);
1bd8d726
CB
2370 continue;
2371 }
2372 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2373 netdev->link);
2374 }
2375
2376 return deleted_all;
2377}
2378
811ef482
CB
2379int lxc_create_network_priv(struct lxc_handler *handler)
2380{
811ef482
CB
2381 struct lxc_list *iterator;
2382 struct lxc_list *network = &handler->conf->network;
2383
d0fbc7ba 2384 if (!handler->am_root)
811ef482
CB
2385 return 0;
2386
2387 lxc_list_for_each(iterator, network) {
2388 struct lxc_netdev *netdev = iterator->elem;
2389
2390 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
2391 ERROR("Invalid network configuration type %d", netdev->type);
2392 return -1;
2393 }
2394
2395 if (netdev_conf[netdev->type](handler, netdev)) {
2396 ERROR("Failed to create network device");
2397 return -1;
2398 }
2399
2400 }
2401
2402 return 0;
2403}
2404
74c6e2b0
CB
2405int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
2406 struct lxc_list *network, pid_t pid)
811ef482 2407{
535e8859 2408 int ret;
811ef482
CB
2409 char ifname[IFNAMSIZ];
2410 struct lxc_list *iterator;
2411
74c6e2b0
CB
2412 if (am_unpriv())
2413 return 0;
811ef482
CB
2414
2415 lxc_list_for_each(iterator, network) {
2416 struct lxc_netdev *netdev = iterator->elem;
2417
811ef482
CB
2418 if (!netdev->ifindex)
2419 continue;
2420
2421 /* retrieve the name of the interface */
2422 if (!if_indextoname(netdev->ifindex, ifname)) {
2423 ERROR("No interface corresponding to ifindex \"%d\"",
2424 netdev->ifindex);
2425 return -1;
2426 }
2427
535e8859
CB
2428 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
2429 if (ret) {
811ef482
CB
2430 ERROR("Failed to move network device \"%s\" to "
2431 "network namespace %d: %s", ifname, pid,
535e8859 2432 strerror(-ret));
811ef482
CB
2433 return -1;
2434 }
2435
2436 DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
790255cf 2437 "of %d",
535e8859 2438 ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
811ef482
CB
2439 pid);
2440 }
2441
2442 return 0;
2443}
2444
74c6e2b0
CB
2445int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
2446 struct lxc_list *network, pid_t pid)
2447{
2448 struct lxc_list *iterator;
2449
2450 if (!am_unpriv())
2451 return 0;
2452
2453 lxc_list_for_each(iterator, network) {
2454 struct lxc_netdev *netdev = iterator->elem;
2455
2456 if (netdev->type == LXC_NET_EMPTY)
2457 continue;
2458
2459 if (netdev->type == LXC_NET_NONE)
2460 continue;
2461
2462 if (netdev->type != LXC_NET_VETH) {
2463 ERROR("Networks of type %s are not supported by "
2464 "unprivileged containers",
2465 lxc_net_type_to_str(netdev->type));
2466 return -1;
2467 }
2468
2469 if (netdev->mtu)
2470 INFO("mtu ignored due to insufficient privilege");
2471
2472 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev, pid))
2473 return -1;
2474 }
2475
2476 return 0;
2477}
2478
1bd8d726 2479bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
2480{
2481 int ret;
2482 struct lxc_list *iterator;
2483 struct lxc_list *network = &handler->conf->network;
2484 bool deleted_all = true;
2485
d0fbc7ba 2486 if (!handler->am_root)
1bd8d726
CB
2487 return true;
2488
811ef482
CB
2489 lxc_list_for_each(iterator, network) {
2490 char *hostveth = NULL;
2491 struct lxc_netdev *netdev = iterator->elem;
2492
2493 /* We can only delete devices whose ifindex we have. If we don't
2494 * have the index it means that we didn't create it.
2495 */
2496 if (!netdev->ifindex)
2497 continue;
2498
2499 if (netdev->type == LXC_NET_PHYS) {
2500 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
2501 if (ret < 0)
2502 WARN("Failed to rename interface with index %d "
b809f232
CB
2503 "from \"%s\" to its initial name \"%s\"",
2504 netdev->ifindex, netdev->name, netdev->link);
811ef482 2505 else
29589196
CB
2506 TRACE("Renamed interface with index %d from "
2507 "\"%s\" to its initial name \"%s\"",
2508 netdev->ifindex, netdev->name,
2509 netdev->link);
811ef482
CB
2510 continue;
2511 }
2512
2513 ret = netdev_deconf[netdev->type](handler, netdev);
2514 if (ret < 0)
2515 WARN("Failed to deconfigure network device");
2516
2517 /* Recent kernels remove the virtual interfaces when the network
2518 * namespace is destroyed but in case we did not move the
2519 * interface to the network namespace, we have to destroy it.
2520 */
1bd8d726
CB
2521 ret = lxc_netdev_delete_by_index(netdev->ifindex);
2522 if (-ret == ENODEV) {
2523 INFO("Interface \"%s\" with index %d already "
2524 "deleted or existing in different network "
2525 "namespace",
de4855a8 2526 netdev->name[0] != '\0' ? netdev->name : "(null)",
1bd8d726
CB
2527 netdev->ifindex);
2528 } else if (ret < 0) {
2529 deleted_all = false;
2530 WARN("Failed to remove interface \"%s\" with "
2531 "index %d: %s",
de4855a8 2532 netdev->name[0] != '\0' ? netdev->name : "(null)",
1bd8d726
CB
2533 netdev->ifindex, strerror(-ret));
2534 continue;
811ef482 2535 }
1bd8d726 2536 INFO("Removed interface \"%s\" with index %d",
de4855a8 2537 netdev->name[0] != '\0' ? netdev->name : "(null)",
1bd8d726 2538 netdev->ifindex);
811ef482
CB
2539
2540 if (netdev->type != LXC_NET_VETH)
2541 continue;
2542
811ef482
CB
2543 /* Explicitly delete host veth device to prevent lingering
2544 * devices. We had issues in LXD around this.
2545 */
de4855a8 2546 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
2547 hostveth = netdev->priv.veth_attr.pair;
2548 else
2549 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 2550 if (hostveth[0] == '\0')
811ef482
CB
2551 continue;
2552
2553 ret = lxc_netdev_delete_by_name(hostveth);
2554 if (ret < 0) {
2555 deleted_all = false;
2556 WARN("Failed to remove interface \"%s\" from \"%s\": %s",
2557 hostveth, netdev->link, strerror(-ret));
2558 continue;
2559 }
2560 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
2561
2562 if (!is_ovs_bridge(netdev->link)) {
2563 netdev->priv.veth_attr.veth1[0] = '\0';
2564 continue;
2565 }
2566
2567 /* Delete the openvswitch port. */
2568 ret = lxc_ovs_delete_port(netdev->link, hostveth);
2569 if (ret < 0)
2570 WARN("Failed to remove port \"%s\" from openvswitch "
2571 "bridge \"%s\"", hostveth, netdev->link);
2572 else
2573 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
2574 hostveth, netdev->link);
2575
2576 netdev->priv.veth_attr.veth1[0] = '\0';
2577 }
2578
2579 return deleted_all;
2580}
2581
2582int lxc_requests_empty_network(struct lxc_handler *handler)
2583{
2584 struct lxc_list *network = &handler->conf->network;
2585 struct lxc_list *iterator;
2586 bool found_none = false, found_nic = false;
2587
2588 if (lxc_list_empty(network))
2589 return 0;
2590
2591 lxc_list_for_each(iterator, network) {
2592 struct lxc_netdev *netdev = iterator->elem;
2593
2594 if (netdev->type == LXC_NET_NONE)
2595 found_none = true;
2596 else
2597 found_nic = true;
2598 }
2599 if (found_none && !found_nic)
2600 return 1;
2601 return 0;
2602}
2603
2604/* try to move physical nics to the init netns */
b809f232 2605int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
2606{
2607 int ret;
b809f232 2608 int oldfd;
811ef482 2609 char ifname[IFNAMSIZ];
b809f232
CB
2610 struct lxc_list *iterator;
2611 int netnsfd = handler->netnsfd;
2612 struct lxc_conf *conf = handler->conf;
811ef482 2613
b809f232
CB
2614 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
2615 * the parent network namespace. We won't have this capability if we are
2616 * unprivileged.
2617 */
d0fbc7ba 2618 if (!handler->am_root)
b809f232 2619 return 0;
811ef482 2620
b809f232 2621 TRACE("Moving physical network devices back to parent network namespace");
811ef482
CB
2622
2623 oldfd = lxc_preserve_ns(getpid(), "net");
2624 if (oldfd < 0) {
2625 SYSERROR("Failed to preserve network namespace");
b809f232 2626 return -1;
811ef482
CB
2627 }
2628
b809f232 2629 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
2630 if (ret < 0) {
2631 SYSERROR("Failed to enter network namespace");
2632 close(oldfd);
b809f232 2633 return -1;
811ef482
CB
2634 }
2635
b809f232
CB
2636 lxc_list_for_each(iterator, &conf->network) {
2637 struct lxc_netdev *netdev = iterator->elem;
811ef482 2638
b809f232
CB
2639 if (netdev->type != LXC_NET_PHYS)
2640 continue;
2641
2642 /* Retrieve the name of the interface in the container's network
2643 * namespace.
2644 */
2645 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 2646 WARN("No interface corresponding to ifindex %d",
b809f232 2647 netdev->ifindex);
811ef482
CB
2648 continue;
2649 }
b809f232
CB
2650
2651 ret = lxc_netdev_move_by_name(ifname, 1, netdev->link);
2652 if (ret < 0)
811ef482
CB
2653 WARN("Error moving network device \"%s\" back to "
2654 "network namespace", ifname);
b809f232
CB
2655 else
2656 TRACE("Moved network device \"%s\" back to network "
2657 "namespace", ifname);
811ef482 2658 }
811ef482 2659
b809f232 2660 ret = setns(oldfd, CLONE_NEWNET);
811ef482 2661 close(oldfd);
b809f232
CB
2662 if (ret < 0) {
2663 SYSERROR("Failed to enter network namespace");
2664 return -1;
2665 }
2666
2667 return 0;
811ef482
CB
2668}
2669
2670static int setup_hw_addr(char *hwaddr, const char *ifname)
2671{
2672 struct sockaddr sockaddr;
2673 struct ifreq ifr;
2674 int ret, fd, saved_errno;
2675
2676 ret = lxc_convert_mac(hwaddr, &sockaddr);
2677 if (ret) {
2678 ERROR("Mac address \"%s\" conversion failed: %s", hwaddr,
2679 strerror(-ret));
2680 return -1;
2681 }
2682
2683 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
2684 ifr.ifr_name[IFNAMSIZ-1] = '\0';
2685 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2686
2687 fd = socket(AF_INET, SOCK_DGRAM, 0);
2688 if (fd < 0)
2689 return -1;
2690
2691 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2692 saved_errno = errno;
2693 close(fd);
2694 if (ret)
2695 ERROR("Failed to perform ioctl: %s", strerror(saved_errno));
2696
2697 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
2698 ifr.ifr_name);
2699
2700 return ret;
2701}
2702
2703static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
2704{
2705 struct lxc_list *iterator;
2706 int err;
2707
2708 lxc_list_for_each(iterator, ip) {
2709 struct lxc_inetdev *inetdev = iterator->elem;
2710
2711 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2712 &inetdev->bcast, inetdev->prefix);
2713 if (err) {
2714 ERROR("Failed to setup ipv4 address for network device "
2715 "with eifindex %d: %s", ifindex, strerror(-err));
2716 return -1;
2717 }
2718 }
2719
2720 return 0;
2721}
2722
2723static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
2724{
2725 struct lxc_list *iterator;
2726 int err;
2727
2728 lxc_list_for_each(iterator, ip) {
2729 struct lxc_inet6dev *inet6dev = iterator->elem;
2730
2731 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
2732 &inet6dev->mcast, &inet6dev->acast,
2733 inet6dev->prefix);
2734 if (err) {
2735 ERROR("Failed to setup ipv6 address for network device "
2736 "with eifindex %d: %s", ifindex, strerror(-err));
2737 return -1;
2738 }
2739 }
2740
2741 return 0;
2742}
2743
2744static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
2745{
2746 char ifname[IFNAMSIZ];
2747 int err;
2748 const char *net_type_name;
2749 char *current_ifname = ifname;
2750
2751 /* empty network namespace */
2752 if (!netdev->ifindex) {
2753 if (netdev->flags & IFF_UP) {
2754 err = lxc_netdev_up("lo");
2755 if (err) {
2756 ERROR("Failed to set the loopback network "
2757 "device up: %s",
2758 strerror(-err));
2759 return -1;
2760 }
2761 }
2762
2763 if (netdev->type == LXC_NET_EMPTY)
2764 return 0;
2765
2766 if (netdev->type == LXC_NET_NONE)
2767 return 0;
2768
2769 if (netdev->type != LXC_NET_VETH) {
2770 net_type_name = lxc_net_type_to_str(netdev->type);
2771 ERROR("%s networks are not supported for containers "
535e8859 2772 "not setup up by privileged users", net_type_name);
811ef482
CB
2773 return -1;
2774 }
2775
2776 netdev->ifindex = if_nametoindex(netdev->name);
2777 }
2778
2779 /* get the new ifindex in case of physical netdev */
2780 if (netdev->type == LXC_NET_PHYS) {
2781 netdev->ifindex = if_nametoindex(netdev->link);
2782 if (!netdev->ifindex) {
2783 ERROR("Failed to get ifindex for network device \"%s\"",
2784 netdev->link);
2785 return -1;
2786 }
2787 }
2788
2789 /* retrieve the name of the interface */
2790 if (!if_indextoname(netdev->ifindex, current_ifname)) {
2791 ERROR("Failed get name for network device with ifindex %d",
2792 netdev->ifindex);
2793 return -1;
2794 }
2795
2796 /* Default: let the system to choose one interface name.
2797 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
2798 * netlink will replace the format specifier with an appropriate index.
2799 */
de4855a8
CB
2800 if (netdev->name[0] == '\0') {
2801 if (netdev->type == LXC_NET_PHYS)
2802 strcpy(netdev->name, netdev->link);
2803 else
2804 strcpy(netdev->name, "eth%d");
2805 }
811ef482
CB
2806
2807 /* rename the interface name */
2808 if (strcmp(ifname, netdev->name) != 0) {
2809 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2810 if (err) {
2811 ERROR("Failed to rename network device \"%s\" to "
2812 "\"%s\": %s", ifname, netdev->name, strerror(-err));
2813 return -1;
2814 }
2815 }
2816
2817 /* Re-read the name of the interface because its name has changed
2818 * and would be automatically allocated by the system
2819 */
2820 if (!if_indextoname(netdev->ifindex, current_ifname)) {
2821 ERROR("Failed get name for network device with ifindex %d",
2822 netdev->ifindex);
2823 return -1;
2824 }
2825
790255cf
CB
2826 /* Now update the recorded name of the network device to reflect the
2827 * name of the network device in the child's network namespace. We will
2828 * later on send this information back to the parent.
2829 */
2830 strcpy(netdev->name, current_ifname);
2831
811ef482
CB
2832 /* set a mac address */
2833 if (netdev->hwaddr) {
2834 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
2835 ERROR("Failed to setup hw address for network device \"%s\"",
2836 current_ifname);
2837 return -1;
2838 }
2839 }
2840
2841 /* setup ipv4 addresses on the interface */
2842 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
2843 ERROR("Failed to setup ip addresses for network device \"%s\"",
2844 ifname);
2845 return -1;
2846 }
2847
2848 /* setup ipv6 addresses on the interface */
2849 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
2850 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
2851 ifname);
2852 return -1;
2853 }
2854
2855 /* set the network device up */
2856 if (netdev->flags & IFF_UP) {
2857 int err;
2858
2859 err = lxc_netdev_up(current_ifname);
2860 if (err) {
2861 ERROR("Failed to set network device \"%s\" up: %s",
2862 current_ifname, strerror(-err));
2863 return -1;
2864 }
2865
2866 /* the network is up, make the loopback up too */
2867 err = lxc_netdev_up("lo");
2868 if (err) {
2869 ERROR("Failed to set the loopback network device up: %s",
2870 strerror(-err));
2871 return -1;
2872 }
2873 }
2874
2875 /* We can only set up the default routes after bringing
2876 * up the interface, sine bringing up the interface adds
2877 * the link-local routes and we can't add a default
2878 * route if the gateway is not reachable. */
2879
2880 /* setup ipv4 gateway on the interface */
2881 if (netdev->ipv4_gateway) {
2882 if (!(netdev->flags & IFF_UP)) {
2883 ERROR("Cannot add ipv4 gateway for network device "
2884 "\"%s\" when not bringing up the interface", ifname);
2885 return -1;
2886 }
2887
2888 if (lxc_list_empty(&netdev->ipv4)) {
2889 ERROR("Cannot add ipv4 gateway for network device "
2890 "\"%s\" when not assigning an address", ifname);
2891 return -1;
2892 }
2893
2894 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2895 if (err) {
2896 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2897 if (err) {
2898 ERROR("Failed to add ipv4 dest for network "
2899 "device \"%s\": %s", ifname, strerror(-err));
2900 }
2901
2902 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2903 if (err) {
2904 ERROR("Failed to setup ipv4 gateway for "
2905 "network device \"%s\": %s",
2906 ifname, strerror(-err));
2907 if (netdev->ipv4_gateway_auto) {
2908 char buf[INET_ADDRSTRLEN];
2909 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2910 ERROR("Fried to set autodetected ipv4 gateway \"%s\"", buf);
2911 }
2912 return -1;
2913 }
2914 }
2915 }
2916
2917 /* setup ipv6 gateway on the interface */
2918 if (netdev->ipv6_gateway) {
2919 if (!(netdev->flags & IFF_UP)) {
2920 ERROR("Cannot add ipv6 gateway for network device "
2921 "\"%s\" when not bringing up the interface", ifname);
2922 return -1;
2923 }
2924
2925 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2926 ERROR("Cannot add ipv6 gateway for network device "
2927 "\"%s\" when not assigning an address", ifname);
2928 return -1;
2929 }
2930
2931 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2932 if (err) {
2933 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2934 if (err) {
2935 ERROR("Failed to add ipv6 dest for network "
2936 "device \"%s\": %s", ifname, strerror(-err));
2937 }
2938
2939 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2940 if (err) {
2941 ERROR("Failed to setup ipv6 gateway for "
2942 "network device \"%s\": %s", ifname,
2943 strerror(-err));
2944 if (netdev->ipv6_gateway_auto) {
2945 char buf[INET6_ADDRSTRLEN];
2946 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2947 ERROR("Tried to set autodetected ipv6 "
2948 "gateway for network device "
2949 "\"%s\"", buf);
2950 }
2951 return -1;
2952 }
2953 }
2954 }
2955
74c6e2b0 2956 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
2957
2958 return 0;
2959}
2960
2961int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
2962 struct lxc_list *network)
2963{
2964 struct lxc_list *iterator;
2965 struct lxc_netdev *netdev;
2966
811ef482
CB
2967 lxc_list_for_each(iterator, network) {
2968 netdev = iterator->elem;
2969
2970 /* REMOVE in LXC 3.0 */
2971 if (netdev->idx < 0) {
2972 ERROR("WARNING: using \"lxc.network.*\" keys to define "
2973 "networks is DEPRECATED, please switch to using "
2974 "\"lxc.net.[i].* keys\"");
2975 }
2976
2977 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
2978 ERROR("failed to setup netdev");
2979 return -1;
2980 }
2981 }
2982
2983 if (!lxc_list_empty(network))
2984 INFO("network has been setup");
2985
2986 return 0;
2987}
7ab1ba02
CB
2988
2989int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
2990{
2991 struct lxc_list *iterator;
2992 struct lxc_list *network = &handler->conf->network;
2993 int data_sock = handler->data_sock[0];
2994
d0fbc7ba 2995 if (handler->am_root)
7ab1ba02
CB
2996 return 0;
2997
2998 lxc_list_for_each(iterator, network) {
2999 int ret;
3000 struct lxc_netdev *netdev = iterator->elem;
3001
3002 if (netdev->type != LXC_NET_VETH)
3003 continue;
3004
a1ae535a 3005 ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3006 if (ret < 0)
7ab1ba02 3007 return -1;
7729f8e5 3008 TRACE("Sent network device name \"%s\" to child", netdev->name);
7ab1ba02
CB
3009 }
3010
3011 return 0;
3012}
3013
3014int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3015{
3016 struct lxc_list *iterator;
3017 struct lxc_list *network = &handler->conf->network;
3018 int data_sock = handler->data_sock[1];
3019
d0fbc7ba 3020 if (handler->am_root)
7ab1ba02
CB
3021 return 0;
3022
3023 lxc_list_for_each(iterator, network) {
3024 int ret;
3025 struct lxc_netdev *netdev = iterator->elem;
3026
3027 if (netdev->type != LXC_NET_VETH)
3028 continue;
3029
a1ae535a 3030 ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3031 if (ret < 0)
7ab1ba02 3032 return -1;
7729f8e5 3033 TRACE("Received network device name \"%s\" from parent", netdev->name);
7ab1ba02
CB
3034 }
3035
3036 return 0;
3037}
a1ae535a
CB
3038
3039int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3040{
3041 struct lxc_list *iterator, *network;
3042 int data_sock = handler->data_sock[0];
3043
3044 if (!handler->am_root)
3045 return 0;
3046
3047 network = &handler->conf->network;
3048 lxc_list_for_each(iterator, network) {
3049 int ret;
3050 struct lxc_netdev *netdev = iterator->elem;
3051
3052 /* Send network device name in the child's namespace to parent. */
3053 ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
3054 if (ret < 0)
7729f8e5 3055 return -1;
a1ae535a
CB
3056
3057 /* Send network device ifindex in the child's namespace to
3058 * parent.
3059 */
3060 ret = send(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3061 if (ret < 0)
7729f8e5 3062 return -1;
a1ae535a
CB
3063 }
3064
3065 TRACE("Sent network device names and ifindeces to parent");
3066 return 0;
a1ae535a
CB
3067}
3068
3069int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3070{
3071 struct lxc_list *iterator, *network;
3072 int data_sock = handler->data_sock[1];
3073
3074 if (!handler->am_root)
3075 return 0;
3076
3077 network = &handler->conf->network;
3078 lxc_list_for_each(iterator, network) {
3079 int ret;
3080 struct lxc_netdev *netdev = iterator->elem;
3081
3082 /* Receive network device name in the child's namespace to
3083 * parent.
3084 */
3085 ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
3086 if (ret < 0)
7729f8e5 3087 return -1;
a1ae535a
CB
3088
3089 /* Receive network device ifindex in the child's namespace to
3090 * parent.
3091 */
3092 ret = recv(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3093 if (ret < 0)
7729f8e5 3094 return -1;
a1ae535a
CB
3095 }
3096
3097 return 0;
a1ae535a 3098}