]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: fix grammar
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
811ef482 24#define _GNU_SOURCE
cb0dc11b
CB
25#include <ctype.h>
26#include <errno.h>
27#include <fcntl.h>
0ad19a3f 28#include <stdio.h>
0ad19a3f 29#include <stdlib.h>
0ad19a3f 30#include <string.h>
dd1d77f9 31#include <time.h>
cb0dc11b 32#include <unistd.h>
0ad19a3f 33#include <arpa/inet.h>
0ad19a3f 34#include <linux/netlink.h>
35#include <linux/rtnetlink.h>
36#include <linux/sockios.h>
cb0dc11b
CB
37#include <net/ethernet.h>
38#include <net/if.h>
39#include <net/if_arp.h>
40#include <netinet/in.h>
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
f549edcc 47
7ab1ba02 48#include "af_unix.h"
72d0e1cb 49#include "conf.h"
811ef482 50#include "config.h"
cb0dc11b
CB
51#include "log.h"
52#include "network.h"
53#include "nl.h"
0d204771 54#include "utils.h"
0ad19a3f 55
a0265685
SG
56#if HAVE_IFADDRS_H
57#include <ifaddrs.h>
58#else
59#include <../include/ifaddrs.h>
60#endif
61
0ad19a3f 62#ifndef IFLA_LINKMODE
cb0dc11b 63#define IFLA_LINKMODE 17
0ad19a3f 64#endif
65
66#ifndef IFLA_LINKINFO
cb0dc11b 67#define IFLA_LINKINFO 18
0ad19a3f 68#endif
69
70#ifndef IFLA_NET_NS_PID
cb0dc11b 71#define IFLA_NET_NS_PID 19
0ad19a3f 72#endif
73
74#ifndef IFLA_INFO_KIND
cb0dc11b 75#define IFLA_INFO_KIND 1
0ad19a3f 76#endif
77
26c39028 78#ifndef IFLA_VLAN_ID
cb0dc11b 79#define IFLA_VLAN_ID 1
26c39028
JHS
80#endif
81
0ad19a3f 82#ifndef IFLA_INFO_DATA
cb0dc11b 83#define IFLA_INFO_DATA 2
0ad19a3f 84#endif
85
86#ifndef VETH_INFO_PEER
cb0dc11b 87#define VETH_INFO_PEER 1
0ad19a3f 88#endif
89
e892973e 90#ifndef IFLA_MACVLAN_MODE
cb0dc11b 91#define IFLA_MACVLAN_MODE 1
e892973e
DL
92#endif
93
cb0dc11b 94lxc_log_define(lxc_network, lxc);
f8fee0e2 95
811ef482
CB
96typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
97
98static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
99{
100 int bridge_index, err;
101 char *veth1, *veth2;
102 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
103 unsigned int mtu = 0;
104
de4855a8 105 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
106 veth1 = netdev->priv.veth_attr.pair;
107 if (handler->conf->reboot)
108 lxc_netdev_delete_by_name(veth1);
109 } else {
110 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
111 if (err < 0 || (size_t)err >= sizeof(veth1buf))
112 return -1;
113
114 veth1 = lxc_mkifname(veth1buf);
115 if (!veth1)
116 return -1;
117
118 /* store away for deconf */
119 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
120 }
121
122 snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
123 veth2 = lxc_mkifname(veth2buf);
124 if (!veth2)
125 goto out_delete;
126
127 err = lxc_veth_create(veth1, veth2);
128 if (err) {
129 ERROR("Failed to create veth pair \"%s\" and \"%s\": %s", veth1,
130 veth2, strerror(-err));
131 goto out_delete;
132 }
133
134 /* changing the high byte of the mac address to 0xfe, the bridge interface
135 * will always keep the host's mac address and not take the mac address
136 * of a container */
137 err = setup_private_host_hw_addr(veth1);
138 if (err) {
139 ERROR("Failed to change mac address of host interface \"%s\": %s",
140 veth1, strerror(-err));
141 goto out_delete;
142 }
143
8da62485
CB
144 /* Retrieve ifindex of the host's veth device. */
145 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
146 if (!netdev->priv.veth_attr.ifindex) {
147 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
148 goto out_delete;
149 }
150
151 /* Note that we're retrieving the container's ifindex in the host's
152 * network namespace because we need it to move the device from the
153 * host's network namespace to the container's network namespace later
154 * on.
155 */
811ef482
CB
156 netdev->ifindex = if_nametoindex(veth2);
157 if (!netdev->ifindex) {
158 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
159 goto out_delete;
160 }
161
162 if (netdev->mtu) {
163 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
164 WARN("Failed to parse mtu");
165 else
166 INFO("Retrieved mtu %d", mtu);
de4855a8 167 } else if (netdev->link[0] != '\0') {
811ef482
CB
168 bridge_index = if_nametoindex(netdev->link);
169 if (bridge_index) {
170 mtu = netdev_get_mtu(bridge_index);
171 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
172 } else {
173 mtu = netdev_get_mtu(netdev->ifindex);
174 INFO("Retrieved mtu %d from %s", mtu, veth2);
175 }
176 }
177
178 if (mtu) {
179 err = lxc_netdev_set_mtu(veth1, mtu);
180 if (!err)
181 err = lxc_netdev_set_mtu(veth2, mtu);
182 if (err) {
183 ERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
184 "and \"%s\": %s",
185 mtu, veth1, veth2, strerror(-err));
186 goto out_delete;
187 }
188 }
189
de4855a8 190 if (netdev->link[0] != '\0') {
811ef482
CB
191 err = lxc_bridge_attach(netdev->link, veth1);
192 if (err) {
193 ERROR("Failed to attach \"%s\" to bridge \"%s\": %s",
194 veth1, netdev->link, strerror(-err));
195 goto out_delete;
196 }
197 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
198 }
199
200 err = lxc_netdev_up(veth1);
201 if (err) {
202 ERROR("Failed to set \"%s\" up: %s", veth1, strerror(-err));
203 goto out_delete;
204 }
205
206 if (netdev->upscript) {
207 err = run_script(handler->name, "net", netdev->upscript, "up",
208 "veth", veth1, (char*) NULL);
209 if (err)
210 goto out_delete;
211 }
212
213 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
214 netdev->ifindex);
215
216 return 0;
217
218out_delete:
219 if (netdev->ifindex != 0)
220 lxc_netdev_delete_by_name(veth1);
de4855a8 221 if (netdev->priv.veth_attr.pair != veth1)
811ef482
CB
222 free(veth1);
223 free(veth2);
224 return -1;
225}
226
227static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
228{
229 char peerbuf[IFNAMSIZ], *peer;
230 int err;
231
de4855a8 232 if (netdev->link[0] == '\0') {
811ef482
CB
233 ERROR("No link for macvlan network device specified");
234 return -1;
235 }
236
237 err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
238 if (err < 0 || (size_t)err >= sizeof(peerbuf))
239 return -1;
240
241 peer = lxc_mkifname(peerbuf);
242 if (!peer)
243 return -1;
244
245 err = lxc_macvlan_create(netdev->link, peer,
246 netdev->priv.macvlan_attr.mode);
247 if (err) {
248 ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s",
249 peer, netdev->link, strerror(-err));
250 goto out;
251 }
252
253 netdev->ifindex = if_nametoindex(peer);
254 if (!netdev->ifindex) {
255 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
256 goto out;
257 }
258
259 if (netdev->upscript) {
260 err = run_script(handler->name, "net", netdev->upscript, "up",
261 "macvlan", netdev->link, (char*) NULL);
262 if (err)
263 goto out;
264 }
265
266 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
267 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
268
269 return 0;
270out:
271 lxc_netdev_delete_by_name(peer);
272 free(peer);
273 return -1;
274}
275
276static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
277{
278 char peer[IFNAMSIZ];
279 int err;
280 static uint16_t vlan_cntr = 0;
281 unsigned int mtu = 0;
282
de4855a8 283 if (netdev->link[0] == '\0') {
811ef482
CB
284 ERROR("No link for vlan network device specified");
285 return -1;
286 }
287
288 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
289 if (err < 0 || (size_t)err >= sizeof(peer))
290 return -1;
291
292 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
293 if (err) {
294 ERROR("Failed to create vlan interface \"%s\" on \"%s\": %s",
295 peer, netdev->link, strerror(-err));
296 return -1;
297 }
298
299 netdev->ifindex = if_nametoindex(peer);
300 if (!netdev->ifindex) {
301 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
302 lxc_netdev_delete_by_name(peer);
303 return -1;
304 }
305
306 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\" (vlan1000)",
307 peer, netdev->ifindex);
308 if (netdev->mtu) {
309 if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
310 ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".",
311 netdev->ifindex,
de4855a8 312 netdev->name[0] != '\0' ? netdev->name : "(null)");
811ef482
CB
313 return -1;
314 }
315 err = lxc_netdev_set_mtu(peer, mtu);
316 if (err) {
317 ERROR("Failed to set mtu \"%s\" for \"%s\": %s",
318 netdev->mtu, peer, strerror(-err));
319 lxc_netdev_delete_by_name(peer);
320 return -1;
321 }
322 }
323
324 return 0;
325}
326
327static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
328{
de4855a8 329 if (netdev->link[0] == '\0') {
811ef482
CB
330 ERROR("No link for physical interface specified");
331 return -1;
332 }
333
790255cf
CB
334 /* Note that we're retrieving the container's ifindex in the host's
335 * network namespace because we need it to move the device from the
336 * host's network namespace to the container's network namespace later
337 * on.
338 * Note that netdev->link will contain the name of the physical network
339 * device in the host's namespace.
340 */
811ef482
CB
341 netdev->ifindex = if_nametoindex(netdev->link);
342 if (!netdev->ifindex) {
343 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
344 return -1;
345 }
346
790255cf
CB
347 /* Store the ifindex of the host's network device in the host's
348 * namespace.
349 */
350 netdev->priv.phys_attr.ifindex = netdev->ifindex;
351
811ef482
CB
352 if (netdev->upscript) {
353 int err;
354 err = run_script(handler->name, "net", netdev->upscript,
355 "up", "phys", netdev->link, (char*) NULL);
356 if (err)
357 return -1;
358 }
359
360 return 0;
361}
362
363static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
364{
365 netdev->ifindex = 0;
366 if (netdev->upscript) {
367 int err;
368 err = run_script(handler->name, "net", netdev->upscript,
369 "up", "empty", (char*) NULL);
370 if (err)
371 return -1;
372 }
373 return 0;
374}
375
376static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
377{
378 netdev->ifindex = 0;
379 return 0;
380}
381
382static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
383 [LXC_NET_VETH] = instantiate_veth,
384 [LXC_NET_MACVLAN] = instantiate_macvlan,
385 [LXC_NET_VLAN] = instantiate_vlan,
386 [LXC_NET_PHYS] = instantiate_phys,
387 [LXC_NET_EMPTY] = instantiate_empty,
388 [LXC_NET_NONE] = instantiate_none,
389};
390
391static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
392{
393 char *veth1;
394 int err;
395
de4855a8 396 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
397 veth1 = netdev->priv.veth_attr.pair;
398 else
399 veth1 = netdev->priv.veth_attr.veth1;
400
401 if (netdev->downscript) {
402 err = run_script(handler->name, "net", netdev->downscript,
403 "down", "veth", veth1, (char*) NULL);
404 if (err)
405 return -1;
406 }
407 return 0;
408}
409
410static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
411{
412 int err;
413
414 if (netdev->downscript) {
415 err = run_script(handler->name, "net", netdev->downscript,
416 "down", "macvlan", netdev->link,
417 (char*) NULL);
418 if (err)
419 return -1;
420 }
421 return 0;
422}
423
424static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
425{
426 return 0;
427}
428
429static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
430{
431 int err;
432
433 if (netdev->downscript) {
434 err = run_script(handler->name, "net", netdev->downscript,
435 "down", "phys", netdev->link, (char*) NULL);
436 if (err)
437 return -1;
438 }
439 return 0;
440}
441
442static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
443{
444 int err;
445
446 if (netdev->downscript) {
447 err = run_script(handler->name, "net", netdev->downscript,
448 "down", "empty", (char*) NULL);
449 if (err)
450 return -1;
451 }
452 return 0;
453}
454
455static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
456{
457 return 0;
458}
459
460static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
461 [LXC_NET_VETH] = shutdown_veth,
462 [LXC_NET_MACVLAN] = shutdown_macvlan,
463 [LXC_NET_VLAN] = shutdown_vlan,
464 [LXC_NET_PHYS] = shutdown_phys,
465 [LXC_NET_EMPTY] = shutdown_empty,
466 [LXC_NET_NONE] = shutdown_none,
467};
468
ebc73a67 469int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 470{
ebc73a67 471 int err;
0ad19a3f 472 struct nl_handler nlh;
06f976ca 473 struct ifinfomsg *ifi;
ebc73a67 474 struct nlmsg *nlmsg = NULL;
0ad19a3f 475
3cfc0f3a
MN
476 err = netlink_open(&nlh, NETLINK_ROUTE);
477 if (err)
478 return err;
0ad19a3f 479
3cfc0f3a 480 err = -ENOMEM;
0ad19a3f 481 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
482 if (!nlmsg)
483 goto out;
484
ebc73a67 485 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
486 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
487
488 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
489 if (!ifi)
490 goto out;
06f976ca
SZ
491 ifi->ifi_family = AF_UNSPEC;
492 ifi->ifi_index = ifindex;
0ad19a3f 493
494 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
495 goto out;
496
8d357196
DY
497 if (ifname != NULL) {
498 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
499 goto out;
500 }
501
3cfc0f3a 502 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 503out:
504 netlink_close(&nlh);
505 nlmsg_free(nlmsg);
506 return err;
507}
508
ebc73a67
CB
509/* If we are asked to move a wireless interface, then we must actually move its
510 * phyN device. Detect that condition and return the physname here. The physname
511 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
512 */
513#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 514static char *is_wlan(const char *ifname)
e5848d39 515{
ebc73a67 516 int i, ret;
e5848d39 517 long physlen;
ebc73a67
CB
518 size_t len;
519 char *path;
e5848d39 520 FILE *f;
ebc73a67
CB
521 struct stat sb;
522 char *physname = NULL;
e5848d39 523
ebc73a67
CB
524 len = strlen(ifname) + strlen(PHYSNAME) - 1;
525 path = alloca(len + 1);
e5848d39 526 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 527 if (ret < 0 || (size_t)ret >= len)
e5848d39 528 goto bad;
ebc73a67 529
e5848d39
SH
530 ret = stat(path, &sb);
531 if (ret)
532 goto bad;
ebc73a67
CB
533
534 f = fopen(path, "r");
535 if (!f)
e5848d39 536 goto bad;
ebc73a67 537
1a0e70ac 538 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
539 fseek(f, 0, SEEK_END);
540 physlen = ftell(f);
541 fseek(f, 0, SEEK_SET);
ebc73a67
CB
542
543 physname = malloc(physlen + 1);
ee54ea9a 544 if (!physname) {
acf47e1b 545 fclose(f);
e5848d39 546 goto bad;
ee54ea9a 547 }
ebc73a67
CB
548
549 memset(physname, 0, physlen + 1);
e5848d39
SH
550 ret = fread(physname, 1, physlen, f);
551 fclose(f);
552 if (ret < 0)
553 goto bad;
554
ebc73a67 555 for (i = 0; i < physlen; i++) {
e5848d39
SH
556 if (physname[i] == '\n')
557 physname[i] = '\0';
ebc73a67 558
e5848d39
SH
559 if (physname[i] == '\0')
560 break;
561 }
562
563 return physname;
564
565bad:
f10fad2f 566 free(physname);
e5848d39
SH
567 return NULL;
568}
569
ebc73a67
CB
570static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
571 const char *new)
e5848d39 572{
ebc73a67 573 pid_t fpid;
e5848d39 574
ebc73a67 575 fpid = fork();
e5848d39
SH
576 if (fpid < 0)
577 return -1;
ebc73a67 578
e5848d39
SH
579 if (fpid != 0)
580 return wait_for_pid(fpid);
ebc73a67 581
e5848d39
SH
582 if (!switch_to_ns(pid, "net"))
583 return -1;
ebc73a67 584
e5848d39
SH
585 exit(lxc_netdev_rename_by_name(old, new));
586}
587
ebc73a67
CB
588static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
589 const char *newname)
e5848d39 590{
e5848d39 591 char *cmd;
ebc73a67
CB
592 pid_t fpid;
593 int err = -1;
e5848d39
SH
594
595 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
596 * However, IIUC this involves a bit more complicated work to talk to
597 * the 80211 module, so for now just call out to iw.
e5848d39
SH
598 */
599 cmd = on_path("iw", NULL);
600 if (!cmd)
601 goto out1;
602 free(cmd);
603
604 fpid = fork();
605 if (fpid < 0)
606 goto out1;
ebc73a67 607
e5848d39
SH
608 if (fpid == 0) {
609 char pidstr[30];
610 sprintf(pidstr, "%d", pid);
ebc73a67
CB
611 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
612 (char *)NULL);
613 exit(EXIT_FAILURE);
e5848d39 614 }
ebc73a67 615
e5848d39
SH
616 if (wait_for_pid(fpid))
617 goto out1;
618
619 err = 0;
620 if (newname)
621 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
622
623out1:
624 free(physname);
625 return err;
626}
627
8d357196 628int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
629{
630 int index;
e5848d39 631 char *physname;
8befa924 632
8befa924
SH
633 if (!ifname)
634 return -EINVAL;
635
32571606 636 index = if_nametoindex(ifname);
49428bf3
DY
637 if (!index)
638 return -EINVAL;
32571606 639
ebc73a67
CB
640 physname = is_wlan(ifname);
641 if (physname)
e5848d39
SH
642 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
643
8d357196 644 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
645}
646
b84f58b9 647int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 648{
b84f58b9 649 int err;
ebc73a67
CB
650 struct ifinfomsg *ifi;
651 struct nl_handler nlh;
652 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 653
3cfc0f3a
MN
654 err = netlink_open(&nlh, NETLINK_ROUTE);
655 if (err)
656 return err;
0ad19a3f 657
3cfc0f3a 658 err = -ENOMEM;
0ad19a3f 659 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
660 if (!nlmsg)
661 goto out;
662
06f976ca 663 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 664 if (!answer)
665 goto out;
666
ebc73a67 667 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
668 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
669
670 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
671 if (!ifi)
672 goto out;
06f976ca
SZ
673 ifi->ifi_family = AF_UNSPEC;
674 ifi->ifi_index = ifindex;
0ad19a3f 675
3cfc0f3a 676 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 677out:
678 netlink_close(&nlh);
679 nlmsg_free(answer);
680 nlmsg_free(nlmsg);
681 return err;
682}
683
b84f58b9
DL
684int lxc_netdev_delete_by_name(const char *name)
685{
686 int index;
687
688 index = if_nametoindex(name);
689 if (!index)
690 return -EINVAL;
691
692 return lxc_netdev_delete_by_index(index);
693}
694
695int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 696{
ebc73a67 697 int err, len;
06f976ca 698 struct ifinfomsg *ifi;
ebc73a67
CB
699 struct nl_handler nlh;
700 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 701
3cfc0f3a
MN
702 err = netlink_open(&nlh, NETLINK_ROUTE);
703 if (err)
704 return err;
b9a5bb58 705
b84f58b9 706 len = strlen(newname);
dae3fdf6 707 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
708 goto out;
709
3cfc0f3a 710 err = -ENOMEM;
b9a5bb58
DL
711 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
712 if (!nlmsg)
713 goto out;
714
06f976ca 715 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
716 if (!answer)
717 goto out;
718
ebc73a67 719 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
720 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
721
722 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
723 if (!ifi)
724 goto out;
06f976ca
SZ
725 ifi->ifi_family = AF_UNSPEC;
726 ifi->ifi_index = ifindex;
b84f58b9
DL
727
728 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
729 goto out;
b9a5bb58 730
3cfc0f3a 731 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
732out:
733 netlink_close(&nlh);
734 nlmsg_free(answer);
735 nlmsg_free(nlmsg);
736 return err;
737}
738
b84f58b9
DL
739int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
740{
741 int len, index;
742
743 len = strlen(oldname);
dae3fdf6 744 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
745 return -EINVAL;
746
747 index = if_nametoindex(oldname);
748 if (!index)
749 return -EINVAL;
750
751 return lxc_netdev_rename_by_index(index, newname);
752}
753
8befa924 754int netdev_set_flag(const char *name, int flag)
0ad19a3f 755{
ebc73a67 756 int err, index, len;
06f976ca 757 struct ifinfomsg *ifi;
ebc73a67
CB
758 struct nl_handler nlh;
759 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 760
3cfc0f3a
MN
761 err = netlink_open(&nlh, NETLINK_ROUTE);
762 if (err)
763 return err;
0ad19a3f 764
3cfc0f3a 765 err = -EINVAL;
0ad19a3f 766 len = strlen(name);
dae3fdf6 767 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 768 goto out;
769
3cfc0f3a 770 err = -ENOMEM;
0ad19a3f 771 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
772 if (!nlmsg)
773 goto out;
774
06f976ca 775 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 776 if (!answer)
777 goto out;
778
3cfc0f3a 779 err = -EINVAL;
0ad19a3f 780 index = if_nametoindex(name);
781 if (!index)
782 goto out;
783
ebc73a67 784 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
785 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
786
787 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
788 if (!ifi) {
789 err = -ENOMEM;
790 goto out;
791 }
06f976ca
SZ
792 ifi->ifi_family = AF_UNSPEC;
793 ifi->ifi_index = index;
794 ifi->ifi_change |= IFF_UP;
795 ifi->ifi_flags |= flag;
0ad19a3f 796
797 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 798out:
799 netlink_close(&nlh);
800 nlmsg_free(nlmsg);
801 nlmsg_free(answer);
802 return err;
803}
804
ebc73a67 805int netdev_get_flag(const char *name, int *flag)
efa1cf45 806{
ebc73a67 807 int err, index, len;
a4318300 808 struct ifinfomsg *ifi;
ebc73a67
CB
809 struct nl_handler nlh;
810 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
811
812 if (!name)
813 return -EINVAL;
814
815 err = netlink_open(&nlh, NETLINK_ROUTE);
816 if (err)
817 return err;
818
819 err = -EINVAL;
820 len = strlen(name);
821 if (len == 1 || len >= IFNAMSIZ)
822 goto out;
823
824 err = -ENOMEM;
825 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
826 if (!nlmsg)
827 goto out;
828
06f976ca 829 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
830 if (!answer)
831 goto out;
832
833 err = -EINVAL;
834 index = if_nametoindex(name);
835 if (!index)
836 goto out;
837
06f976ca
SZ
838 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
839 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
840
841 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
842 if (!ifi) {
843 err = -ENOMEM;
844 goto out;
845 }
06f976ca
SZ
846 ifi->ifi_family = AF_UNSPEC;
847 ifi->ifi_index = index;
efa1cf45
DY
848
849 err = netlink_transaction(&nlh, nlmsg, answer);
850 if (err)
851 goto out;
852
06f976ca 853 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
854
855 *flag = ifi->ifi_flags;
856out:
857 netlink_close(&nlh);
858 nlmsg_free(nlmsg);
859 nlmsg_free(answer);
860 return err;
861}
862
863/*
864 * \brief Check a interface is up or not.
865 *
866 * \param name: name for the interface.
867 *
868 * \return int.
869 * 0 means interface is down.
870 * 1 means interface is up.
871 * Others means error happened, and ret-value is the error number.
872 */
ebc73a67 873int lxc_netdev_isup(const char *name)
efa1cf45 874{
ebc73a67 875 int err, flag;
efa1cf45
DY
876
877 err = netdev_get_flag(name, &flag);
878 if (err)
ebc73a67
CB
879 return err;
880
efa1cf45
DY
881 if (flag & IFF_UP)
882 return 1;
ebc73a67 883
efa1cf45 884 return 0;
efa1cf45
DY
885}
886
0130df54
SH
887int netdev_get_mtu(int ifindex)
888{
ebc73a67 889 int answer_len, err, res;
0130df54 890 struct nl_handler nlh;
06f976ca 891 struct ifinfomsg *ifi;
0130df54 892 struct nlmsghdr *msg;
ebc73a67
CB
893 int readmore = 0, recv_len = 0;
894 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
895
896 err = netlink_open(&nlh, NETLINK_ROUTE);
897 if (err)
898 return err;
899
900 err = -ENOMEM;
901 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
902 if (!nlmsg)
903 goto out;
904
06f976ca 905 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
906 if (!answer)
907 goto out;
908
909 /* Save the answer buffer length, since it will be overwritten
910 * on the first receive (and we might need to receive more than
ebc73a67
CB
911 * once.
912 */
06f976ca
SZ
913 answer_len = answer->nlmsghdr->nlmsg_len;
914
ebc73a67 915 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 916 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 917
06f976ca 918 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
919 if (!ifi)
920 goto out;
06f976ca 921 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
922
923 /* Send the request for addresses, which returns all addresses
924 * on all interfaces. */
925 err = netlink_send(&nlh, nlmsg);
926 if (err < 0)
927 goto out;
928
929 do {
930 /* Restore the answer buffer length, it might have been
ebc73a67
CB
931 * overwritten by a previous receive.
932 */
06f976ca 933 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
934
935 /* Get the (next) batch of reply messages */
936 err = netlink_rcv(&nlh, answer);
937 if (err < 0)
938 goto out;
939
940 recv_len = err;
941 err = 0;
942
943 /* Satisfy the typing for the netlink macros */
06f976ca 944 msg = answer->nlmsghdr;
0130df54
SH
945
946 while (NLMSG_OK(msg, recv_len)) {
947
948 /* Stop reading if we see an error message */
949 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
950 struct nlmsgerr *errmsg =
951 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
952 err = errmsg->error;
953 goto out;
954 }
955
956 /* Stop reading if we see a NLMSG_DONE message */
957 if (msg->nlmsg_type == NLMSG_DONE) {
958 readmore = 0;
959 break;
960 }
961
06f976ca 962 ifi = NLMSG_DATA(msg);
0130df54
SH
963 if (ifi->ifi_index == ifindex) {
964 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
965 int attr_len =
966 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 967 res = 0;
ebc73a67
CB
968 while (RTA_OK(rta, attr_len)) {
969 /* Found a local address for the
970 * requested interface, return it.
971 */
0130df54 972 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
973 memcpy(&res, RTA_DATA(rta),
974 sizeof(int));
0130df54
SH
975 err = res;
976 goto out;
977 }
978 rta = RTA_NEXT(rta, attr_len);
979 }
0130df54
SH
980 }
981
ebc73a67
CB
982 /* Keep reading more data from the socket if the last
983 * message had the NLF_F_MULTI flag set.
984 */
0130df54
SH
985 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
986
ebc73a67 987 /* Look at the next message received in this buffer. */
0130df54
SH
988 msg = NLMSG_NEXT(msg, recv_len);
989 }
990 } while (readmore);
991
ebc73a67 992 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
993 err = -1;
994
995out:
996 netlink_close(&nlh);
997 nlmsg_free(answer);
998 nlmsg_free(nlmsg);
999 return err;
1000}
1001
d472214b 1002int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1003{
ebc73a67 1004 int err, index, len;
06f976ca 1005 struct ifinfomsg *ifi;
ebc73a67
CB
1006 struct nl_handler nlh;
1007 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1008
3cfc0f3a
MN
1009 err = netlink_open(&nlh, NETLINK_ROUTE);
1010 if (err)
1011 return err;
75d09f83 1012
3cfc0f3a 1013 err = -EINVAL;
75d09f83 1014 len = strlen(name);
dae3fdf6 1015 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1016 goto out;
1017
3cfc0f3a 1018 err = -ENOMEM;
75d09f83
DL
1019 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1020 if (!nlmsg)
1021 goto out;
1022
06f976ca 1023 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1024 if (!answer)
1025 goto out;
1026
3cfc0f3a 1027 err = -EINVAL;
75d09f83
DL
1028 index = if_nametoindex(name);
1029 if (!index)
1030 goto out;
1031
ebc73a67 1032 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1033 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1034
1035 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1036 if (!ifi) {
1037 err = -ENOMEM;
1038 goto out;
1039 }
06f976ca
SZ
1040 ifi->ifi_family = AF_UNSPEC;
1041 ifi->ifi_index = index;
75d09f83
DL
1042
1043 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1044 goto out;
1045
1046 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1047out:
1048 netlink_close(&nlh);
1049 nlmsg_free(nlmsg);
1050 nlmsg_free(answer);
1051 return err;
1052}
1053
d472214b 1054int lxc_netdev_up(const char *name)
0ad19a3f 1055{
d472214b 1056 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1057}
1058
d472214b 1059int lxc_netdev_down(const char *name)
0ad19a3f 1060{
d472214b 1061 return netdev_set_flag(name, 0);
0ad19a3f 1062}
1063
497353b6 1064int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1065{
ebc73a67 1066 int err, len;
06f976ca 1067 struct ifinfomsg *ifi;
ebc73a67 1068 struct nl_handler nlh;
0ad19a3f 1069 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1070 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1071
3cfc0f3a
MN
1072 err = netlink_open(&nlh, NETLINK_ROUTE);
1073 if (err)
1074 return err;
0ad19a3f 1075
3cfc0f3a 1076 err = -EINVAL;
0ad19a3f 1077 len = strlen(name1);
dae3fdf6 1078 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1079 goto out;
1080
1081 len = strlen(name2);
dae3fdf6 1082 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1083 goto out;
1084
3cfc0f3a 1085 err = -ENOMEM;
0ad19a3f 1086 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1087 if (!nlmsg)
1088 goto out;
1089
06f976ca 1090 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1091 if (!answer)
1092 goto out;
1093
06f976ca 1094 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1095 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1096 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1097
1098 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1099 if (!ifi)
1100 goto out;
06f976ca 1101 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1102
3cfc0f3a 1103 err = -EINVAL;
79e68309 1104 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1105 if (!nest1)
1106 goto out;
1107
1108 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1109 goto out;
1110
1111 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1112 if (!nest2)
1113 goto out;
1114
1115 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1116 if (!nest3)
1117 goto out;
1118
06f976ca 1119 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1120 if (!ifi) {
1121 err = -ENOMEM;
06f976ca 1122 goto out;
25a9939b 1123 }
0ad19a3f 1124
1125 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1126 goto out;
1127
1128 nla_end_nested(nlmsg, nest3);
0ad19a3f 1129 nla_end_nested(nlmsg, nest2);
0ad19a3f 1130 nla_end_nested(nlmsg, nest1);
1131
1132 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1133 goto out;
1134
3cfc0f3a 1135 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1136out:
1137 netlink_close(&nlh);
1138 nlmsg_free(answer);
1139 nlmsg_free(nlmsg);
1140 return err;
1141}
1142
ebc73a67 1143/* TODO: merge with lxc_macvlan_create */
7c11d57a 1144int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1145{
ebc73a67 1146 int err, len, lindex;
06f976ca 1147 struct ifinfomsg *ifi;
ebc73a67 1148 struct nl_handler nlh;
26c39028 1149 struct rtattr *nest, *nest2;
ebc73a67 1150 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1151
3cfc0f3a
MN
1152 err = netlink_open(&nlh, NETLINK_ROUTE);
1153 if (err)
1154 return err;
26c39028 1155
3cfc0f3a 1156 err = -EINVAL;
26c39028 1157 len = strlen(master);
dae3fdf6 1158 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1159 goto err3;
1160
1161 len = strlen(name);
dae3fdf6 1162 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1163 goto err3;
1164
3cfc0f3a 1165 err = -ENOMEM;
26c39028
JHS
1166 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1167 if (!nlmsg)
1168 goto err3;
1169
06f976ca 1170 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1171 if (!answer)
1172 goto err2;
1173
3cfc0f3a 1174 err = -EINVAL;
26c39028
JHS
1175 lindex = if_nametoindex(master);
1176 if (!lindex)
1177 goto err1;
1178
06f976ca 1179 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1180 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1181 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1182
1183 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1184 if (!ifi) {
1185 err = -ENOMEM;
1186 goto err1;
1187 }
06f976ca 1188 ifi->ifi_family = AF_UNSPEC;
26c39028 1189
79e68309 1190 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1191 if (!nest)
1192 goto err1;
1193
1194 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1195 goto err1;
1196
1197 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1198 if (!nest2)
1199 goto err1;
e892973e 1200
26c39028
JHS
1201 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1202 goto err1;
e892973e 1203
26c39028 1204 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1205 nla_end_nested(nlmsg, nest);
1206
1207 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1208 goto err1;
1209
1210 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1211 goto err1;
1212
3cfc0f3a 1213 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1214err1:
1215 nlmsg_free(answer);
1216err2:
1217 nlmsg_free(nlmsg);
1218err3:
1219 netlink_close(&nlh);
1220 return err;
1221}
1222
e892973e 1223int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1224{
ebc73a67 1225 int err, index, len;
06f976ca 1226 struct ifinfomsg *ifi;
ebc73a67 1227 struct nl_handler nlh;
e892973e 1228 struct rtattr *nest, *nest2;
ebc73a67 1229 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1230
3cfc0f3a
MN
1231 err = netlink_open(&nlh, NETLINK_ROUTE);
1232 if (err)
1233 return err;
0ad19a3f 1234
3cfc0f3a 1235 err = -EINVAL;
0ad19a3f 1236 len = strlen(master);
dae3fdf6 1237 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1238 goto out;
1239
1240 len = strlen(name);
dae3fdf6 1241 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1242 goto out;
1243
3cfc0f3a 1244 err = -ENOMEM;
0ad19a3f 1245 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1246 if (!nlmsg)
1247 goto out;
1248
06f976ca 1249 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1250 if (!answer)
1251 goto out;
1252
3cfc0f3a 1253 err = -EINVAL;
0ad19a3f 1254 index = if_nametoindex(master);
1255 if (!index)
1256 goto out;
1257
06f976ca 1258 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1259 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1260 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1261
1262 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1263 if (!ifi) {
1264 err = -ENOMEM;
1265 goto out;
1266 }
06f976ca 1267 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1268
79e68309 1269 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1270 if (!nest)
1271 goto out;
1272
1273 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1274 goto out;
1275
e892973e
DL
1276 if (mode) {
1277 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1278 if (!nest2)
1279 goto out;
1280
1281 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1282 goto out;
1283
1284 nla_end_nested(nlmsg, nest2);
1285 }
1286
0ad19a3f 1287 nla_end_nested(nlmsg, nest);
1288
1289 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1290 goto out;
1291
1292 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1293 goto out;
1294
3cfc0f3a 1295 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1296out:
1297 netlink_close(&nlh);
1298 nlmsg_free(answer);
1299 nlmsg_free(nlmsg);
1300 return err;
1301}
1302
1303static int proc_sys_net_write(const char *path, const char *value)
1304{
ebc73a67
CB
1305 int fd;
1306 int err = 0;
0ad19a3f 1307
1308 fd = open(path, O_WRONLY);
1309 if (fd < 0)
1310 return -errno;
1311
1312 if (write(fd, value, strlen(value)) < 0)
1313 err = -errno;
1314
1315 close(fd);
1316 return err;
1317}
1318
1319static int ip_forward_set(const char *ifname, int family, int flag)
1320{
9ba8130c 1321 int rc;
ebc73a67 1322 char path[MAXPATHLEN];
0ad19a3f 1323
1324 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1325 return -EINVAL;
0ad19a3f 1326
9ba8130c 1327 rc = snprintf(path, MAXPATHLEN, "/proc/sys/net/%s/conf/%s/forwarding",
ebc73a67
CB
1328 family == AF_INET ? "ipv4" : "ipv6", ifname);
1329 if (rc < 0 || (size_t)rc >= MAXPATHLEN)
9ba8130c 1330 return -E2BIG;
0ad19a3f 1331
ebc73a67 1332 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1333}
1334
497353b6 1335int lxc_ip_forward_on(const char *ifname, int family)
0ad19a3f 1336{
1337 return ip_forward_set(ifname, family, 1);
1338}
1339
497353b6 1340int lxc_ip_forward_off(const char *ifname, int family)
0ad19a3f 1341{
1342 return ip_forward_set(ifname, family, 0);
1343}
1344
1345static int neigh_proxy_set(const char *ifname, int family, int flag)
1346{
9ba8130c 1347 int ret;
ebc73a67 1348 char path[MAXPATHLEN];
0ad19a3f 1349
1350 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1351 return -EINVAL;
0ad19a3f 1352
9ba8130c 1353 ret = snprintf(path, MAXPATHLEN, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1354 family == AF_INET ? "ipv4" : "ipv6", ifname,
1355 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1356 if (ret < 0 || (size_t)ret >= MAXPATHLEN)
9ba8130c 1357 return -E2BIG;
0ad19a3f 1358
ebc73a67 1359 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1360}
1361
497353b6 1362int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1363{
1364 return neigh_proxy_set(name, family, 1);
1365}
1366
497353b6 1367int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1368{
1369 return neigh_proxy_set(name, family, 0);
1370}
1371
1372int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1373{
1f1b18e7
DL
1374 int i = 0;
1375 unsigned val;
ebc73a67
CB
1376 char c;
1377 unsigned char *data;
1f1b18e7
DL
1378
1379 sockaddr->sa_family = ARPHRD_ETHER;
1380 data = (unsigned char *)sockaddr->sa_data;
1381
1382 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1383 val = 0;
1384 c = *macaddr++;
1385 if (isdigit(c))
1386 val = c - '0';
1387 else if (c >= 'a' && c <= 'f')
1388 val = c - 'a' + 10;
1389 else if (c >= 'A' && c <= 'F')
1390 val = c - 'A' + 10;
1391 else
1392 return -EINVAL;
1393
1394 val <<= 4;
1395 c = *macaddr;
1396 if (isdigit(c))
1397 val |= c - '0';
1398 else if (c >= 'a' && c <= 'f')
1399 val |= c - 'a' + 10;
1400 else if (c >= 'A' && c <= 'F')
1401 val |= c - 'A' + 10;
1402 else if (c == ':' || c == 0)
1403 val >>= 4;
1404 else
1405 return -EINVAL;
1406 if (c != 0)
1407 macaddr++;
1408 *data++ = (unsigned char)(val & 0377);
1409 i++;
1410
1411 if (*macaddr == ':')
1412 macaddr++;
0ad19a3f 1413 }
0ad19a3f 1414
1f1b18e7 1415 return 0;
0ad19a3f 1416}
1417
ebc73a67
CB
1418static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1419 void *acast, int prefix)
0ad19a3f 1420{
ebc73a67 1421 int addrlen, err;
06f976ca 1422 struct ifaddrmsg *ifa;
ebc73a67
CB
1423 struct nl_handler nlh;
1424 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1425
ebc73a67
CB
1426 addrlen = family == AF_INET ? sizeof(struct in_addr)
1427 : sizeof(struct in6_addr);
4bf1968d 1428
3cfc0f3a
MN
1429 err = netlink_open(&nlh, NETLINK_ROUTE);
1430 if (err)
1431 return err;
0ad19a3f 1432
3cfc0f3a 1433 err = -ENOMEM;
0ad19a3f 1434 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1435 if (!nlmsg)
1436 goto out;
1437
06f976ca 1438 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1439 if (!answer)
1440 goto out;
1441
06f976ca 1442 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1443 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1444 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1445
1446 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1447 if (!ifa)
25a9939b 1448 goto out;
06f976ca
SZ
1449 ifa->ifa_prefixlen = prefix;
1450 ifa->ifa_index = ifindex;
1451 ifa->ifa_family = family;
1452 ifa->ifa_scope = 0;
acf47e1b 1453
3cfc0f3a 1454 err = -EINVAL;
4bf1968d 1455 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1456 goto out;
1457
4bf1968d 1458 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1459 goto out;
1460
d8948a52 1461 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1462 goto out;
1463
ebc73a67 1464 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1465 err = -EPROTONOSUPPORT;
79881dc6
DL
1466 if (family == AF_INET6 &&
1467 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1468 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1469 goto out;
0ad19a3f 1470
3cfc0f3a 1471 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1472out:
1473 netlink_close(&nlh);
1474 nlmsg_free(answer);
1475 nlmsg_free(nlmsg);
1476 return err;
1477}
1478
1f1b18e7 1479int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1480 struct in6_addr *mcast, struct in6_addr *acast,
1481 int prefix)
1f1b18e7
DL
1482{
1483 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1484}
1485
ebc73a67
CB
1486int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1487 int prefix)
1f1b18e7
DL
1488{
1489 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1490}
1491
ebc73a67
CB
1492/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1493 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1494 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1495 */
ebc73a67
CB
1496static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1497{
1498 int addrlen;
06f976ca
SZ
1499 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1500 struct rtattr *rta = IFA_RTA(ifa);
1501 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1502
06f976ca 1503 if (ifa->ifa_family != family)
19a26f82
MK
1504 return 0;
1505
ebc73a67
CB
1506 addrlen = family == AF_INET ? sizeof(struct in_addr)
1507 : sizeof(struct in6_addr);
19a26f82
MK
1508
1509 /* Loop over the rtattr's in this message */
ebc73a67 1510 while (RTA_OK(rta, attr_len)) {
19a26f82 1511 /* Found a local address for the requested interface,
ebc73a67
CB
1512 * return it.
1513 */
1514 if (rta->rta_type == IFA_LOCAL ||
1515 rta->rta_type == IFA_ADDRESS) {
1516 /* Sanity check. The family check above should make sure
1517 * the address length is correct, but check here just in
1518 * case.
1519 */
19a26f82
MK
1520 if (RTA_PAYLOAD(rta) != addrlen)
1521 return -1;
1522
ebc73a67
CB
1523 /* We might have found an IFA_ADDRESS before, which we
1524 * now overwrite with an IFA_LOCAL.
1525 */
dd66e5ad 1526 if (!*res) {
19a26f82 1527 *res = malloc(addrlen);
dd66e5ad
DE
1528 if (!*res)
1529 return -1;
1530 }
19a26f82
MK
1531
1532 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
1533 if (rta->rta_type == IFA_LOCAL)
1534 break;
1535 }
1536 rta = RTA_NEXT(rta, attr_len);
1537 }
1538 return 0;
1539}
1540
1541static int ip_addr_get(int family, int ifindex, void **res)
1542{
ebc73a67 1543 int answer_len, err;
06f976ca 1544 struct ifaddrmsg *ifa;
ebc73a67 1545 struct nl_handler nlh;
19a26f82 1546 struct nlmsghdr *msg;
ebc73a67
CB
1547 int readmore = 0, recv_len = 0;
1548 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
1549
1550 err = netlink_open(&nlh, NETLINK_ROUTE);
1551 if (err)
1552 return err;
1553
1554 err = -ENOMEM;
1555 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1556 if (!nlmsg)
1557 goto out;
1558
06f976ca 1559 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
1560 if (!answer)
1561 goto out;
1562
ebc73a67
CB
1563 /* Save the answer buffer length, since it will be overwritten on the
1564 * first receive (and we might need to receive more than once).
1565 */
06f976ca
SZ
1566 answer_len = answer->nlmsghdr->nlmsg_len;
1567
ebc73a67 1568 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 1569 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 1570
06f976ca 1571 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
1572 if (!ifa)
1573 goto out;
06f976ca 1574 ifa->ifa_family = family;
19a26f82 1575
ebc73a67
CB
1576 /* Send the request for addresses, which returns all addresses on all
1577 * interfaces.
1578 */
19a26f82
MK
1579 err = netlink_send(&nlh, nlmsg);
1580 if (err < 0)
1581 goto out;
19a26f82
MK
1582
1583 do {
1584 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1585 * overwritten by a previous receive.
1586 */
06f976ca 1587 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 1588
ebc73a67 1589 /* Get the (next) batch of reply messages. */
19a26f82
MK
1590 err = netlink_rcv(&nlh, answer);
1591 if (err < 0)
1592 goto out;
1593
1594 recv_len = err;
1595 err = 0;
1596
ebc73a67 1597 /* Satisfy the typing for the netlink macros. */
06f976ca 1598 msg = answer->nlmsghdr;
19a26f82
MK
1599
1600 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 1601 /* Stop reading if we see an error message. */
19a26f82 1602 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1603 struct nlmsgerr *errmsg =
1604 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
1605 err = errmsg->error;
1606 goto out;
1607 }
1608
ebc73a67 1609 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
1610 if (msg->nlmsg_type == NLMSG_DONE) {
1611 readmore = 0;
1612 break;
1613 }
1614
1615 if (msg->nlmsg_type != RTM_NEWADDR) {
1616 err = -1;
1617 goto out;
1618 }
1619
06f976ca
SZ
1620 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
1621 if (ifa->ifa_index == ifindex) {
1622 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
1623 err = -1;
1624 goto out;
1625 }
1626
ebc73a67 1627 /* Found a result, stop searching. */
19a26f82
MK
1628 if (*res)
1629 goto out;
1630 }
1631
ebc73a67
CB
1632 /* Keep reading more data from the socket if the last
1633 * message had the NLF_F_MULTI flag set.
1634 */
19a26f82
MK
1635 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1636
ebc73a67 1637 /* Look at the next message received in this buffer. */
19a26f82
MK
1638 msg = NLMSG_NEXT(msg, recv_len);
1639 }
1640 } while (readmore);
1641
1642 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
1643 * error.
1644 */
19a26f82
MK
1645 err = -1;
1646
1647out:
1648 netlink_close(&nlh);
1649 nlmsg_free(answer);
1650 nlmsg_free(nlmsg);
1651 return err;
1652}
1653
1654int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
1655{
ebc73a67 1656 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
1657}
1658
ebc73a67 1659int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 1660{
ebc73a67 1661 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
1662}
1663
f8fee0e2
MK
1664static int ip_gateway_add(int family, int ifindex, void *gw)
1665{
ebc73a67 1666 int addrlen, err;
f8fee0e2 1667 struct nl_handler nlh;
06f976ca 1668 struct rtmsg *rt;
ebc73a67 1669 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 1670
ebc73a67
CB
1671 addrlen = family == AF_INET ? sizeof(struct in_addr)
1672 : sizeof(struct in6_addr);
f8fee0e2
MK
1673
1674 err = netlink_open(&nlh, NETLINK_ROUTE);
1675 if (err)
1676 return err;
1677
1678 err = -ENOMEM;
1679 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1680 if (!nlmsg)
1681 goto out;
1682
06f976ca 1683 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
1684 if (!answer)
1685 goto out;
1686
06f976ca 1687 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1688 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1689 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
1690
1691 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
1692 if (!rt)
1693 goto out;
06f976ca
SZ
1694 rt->rtm_family = family;
1695 rt->rtm_table = RT_TABLE_MAIN;
1696 rt->rtm_scope = RT_SCOPE_UNIVERSE;
1697 rt->rtm_protocol = RTPROT_BOOT;
1698 rt->rtm_type = RTN_UNICAST;
f8fee0e2 1699 /* "default" destination */
06f976ca 1700 rt->rtm_dst_len = 0;
f8fee0e2
MK
1701
1702 err = -EINVAL;
1703 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
1704 goto out;
1705
1706 /* Adding the interface index enables the use of link-local
ebc73a67
CB
1707 * addresses for the gateway.
1708 */
f8fee0e2
MK
1709 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
1710 goto out;
1711
1712 err = netlink_transaction(&nlh, nlmsg, answer);
1713out:
1714 netlink_close(&nlh);
1715 nlmsg_free(answer);
1716 nlmsg_free(nlmsg);
1717 return err;
1718}
1719
1720int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
1721{
1722 return ip_gateway_add(AF_INET, ifindex, gw);
1723}
1724
1725int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
1726{
1727 return ip_gateway_add(AF_INET6, ifindex, gw);
1728}
1729
77dcf03a
GL
1730static int ip_route_dest_add(int family, int ifindex, void *dest)
1731{
ebc73a67 1732 int addrlen, err;
77dcf03a 1733 struct nl_handler nlh;
06f976ca 1734 struct rtmsg *rt;
ebc73a67 1735 struct nlmsg *answer = NULL, *nlmsg = NULL;
acf47e1b 1736
ebc73a67
CB
1737 addrlen = family == AF_INET ? sizeof(struct in_addr)
1738 : sizeof(struct in6_addr);
acf47e1b 1739
77dcf03a
GL
1740 err = netlink_open(&nlh, NETLINK_ROUTE);
1741 if (err)
1742 return err;
acf47e1b 1743
77dcf03a
GL
1744 err = -ENOMEM;
1745 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1746 if (!nlmsg)
1747 goto out;
acf47e1b 1748
06f976ca 1749 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
77dcf03a
GL
1750 if (!answer)
1751 goto out;
acf47e1b 1752
06f976ca 1753 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1754 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1755 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
1756
1757 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
1758 if (!rt)
1759 goto out;
06f976ca
SZ
1760 rt->rtm_family = family;
1761 rt->rtm_table = RT_TABLE_MAIN;
1762 rt->rtm_scope = RT_SCOPE_LINK;
1763 rt->rtm_protocol = RTPROT_BOOT;
1764 rt->rtm_type = RTN_UNICAST;
ebc73a67 1765 rt->rtm_dst_len = addrlen * 8;
acf47e1b 1766
77dcf03a
GL
1767 err = -EINVAL;
1768 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
1769 goto out;
1770 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
1771 goto out;
1772 err = netlink_transaction(&nlh, nlmsg, answer);
1773out:
1774 netlink_close(&nlh);
1775 nlmsg_free(answer);
1776 nlmsg_free(nlmsg);
1777 return err;
1778}
1779
1780int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest)
1781{
1782 return ip_route_dest_add(AF_INET, ifindex, dest);
1783}
1784
1785int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest)
1786{
1787 return ip_route_dest_add(AF_INET6, ifindex, dest);
1788}
1789
581c75e7 1790bool is_ovs_bridge(const char *bridge)
0d204771 1791{
ebc73a67 1792 int ret;
0d204771 1793 struct stat sb;
ebc73a67 1794 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 1795
ebc73a67
CB
1796 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
1797 bridge);
1798 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
1799 return false;
1800
1801 ret = stat(brdirname, &sb);
1802 if (ret < 0 && errno == ENOENT)
0d204771 1803 return true;
ebc73a67 1804
0d204771
SH
1805 return false;
1806}
1807
581c75e7
CB
1808struct ovs_veth_args {
1809 const char *bridge;
1810 const char *nic;
1811};
1812
cb0dc11b
CB
1813/* Called from a background thread - when nic goes away, remove it from the
1814 * bridge.
c43cbc04 1815 */
581c75e7 1816static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 1817{
581c75e7 1818 struct ovs_veth_args *args = data;
cb0dc11b 1819
581c75e7
CB
1820 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
1821 (char *)NULL);
1822 return -1;
c43cbc04
SH
1823}
1824
581c75e7 1825int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 1826{
c43cbc04 1827 int ret;
581c75e7
CB
1828 char cmd_output[MAXPATHLEN];
1829 struct ovs_veth_args args;
6ad22d06 1830
581c75e7
CB
1831 args.bridge = bridge;
1832 args.nic = nic;
1833 ret = run_command(cmd_output, sizeof(cmd_output),
1834 lxc_ovs_delete_port_exec, (void *)&args);
1835 if (ret < 0) {
1836 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
1837 "%s", bridge, nic, cmd_output);
6ad22d06 1838 return -1;
581c75e7 1839 }
0d204771 1840
581c75e7
CB
1841 return 0;
1842}
ebc73a67 1843
581c75e7
CB
1844static int lxc_ovs_attach_bridge_exec(void *data)
1845{
1846 struct ovs_veth_args *args = data;
ebc73a67 1847
581c75e7
CB
1848 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
1849 (char *)NULL);
1850 return -1;
1851}
ebc73a67 1852
581c75e7
CB
1853static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
1854{
1855 int ret;
1856 char cmd_output[MAXPATHLEN];
1857 struct ovs_veth_args args;
ebc73a67 1858
581c75e7
CB
1859 args.bridge = bridge;
1860 args.nic = nic;
1861 ret = run_command(cmd_output, sizeof(cmd_output),
1862 lxc_ovs_attach_bridge_exec, (void *)&args);
1863 if (ret < 0) {
1864 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
1865 bridge, nic, cmd_output);
1866 return -1;
c43cbc04 1867 }
0d204771 1868
581c75e7 1869 return 0;
0d204771 1870}
0d204771 1871
581c75e7 1872int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 1873{
ebc73a67 1874 int err, fd, index;
0ad19a3f 1875 struct ifreq ifr;
1876
dae3fdf6 1877 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 1878 return -EINVAL;
0ad19a3f 1879
1880 index = if_nametoindex(ifname);
1881 if (!index)
3cfc0f3a 1882 return -EINVAL;
0ad19a3f 1883
0d204771 1884 if (is_ovs_bridge(bridge))
581c75e7 1885 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 1886
0ad19a3f 1887 fd = socket(AF_INET, SOCK_STREAM, 0);
1888 if (fd < 0)
3cfc0f3a 1889 return -errno;
0ad19a3f 1890
ebc73a67
CB
1891 strncpy(ifr.ifr_name, bridge, IFNAMSIZ - 1);
1892 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 1893 ifr.ifr_ifindex = index;
7d163508 1894 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 1895 close(fd);
3cfc0f3a
MN
1896 if (err)
1897 err = -errno;
0ad19a3f 1898
1899 return err;
1900}
72d0e1cb 1901
ebc73a67 1902static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 1903 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
1904 [LXC_NET_VETH] = "veth",
1905 [LXC_NET_MACVLAN] = "macvlan",
72d0e1cb 1906 [LXC_NET_PHYS] = "phys",
b343592b
BP
1907 [LXC_NET_VLAN] = "vlan",
1908 [LXC_NET_NONE] = "none",
72d0e1cb
SG
1909};
1910
1911const char *lxc_net_type_to_str(int type)
1912{
1913 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
1914 return NULL;
ebc73a67 1915
72d0e1cb
SG
1916 return lxc_network_types[type];
1917}
8befa924 1918
ebc73a67 1919static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 1920
811ef482 1921char *lxc_mkifname(const char *template)
a0265685 1922{
ebc73a67 1923 int ifexists = 0;
84760c11 1924 size_t i = 0;
ebc73a67 1925 char *name = NULL;
a0265685 1926 unsigned int seed;
ebc73a67
CB
1927 FILE *urandom;
1928 struct ifaddrs *ifa, *ifaddr;
a0265685 1929
535e8859
CB
1930 if (strlen(template) >= IFNAMSIZ)
1931 return NULL;
1932
ebc73a67 1933 /* Get all the network interfaces. */
a0265685
SG
1934 getifaddrs(&ifaddr);
1935
ebc73a67
CB
1936 /* Initialize the random number generator. */
1937 urandom = fopen("/dev/urandom", "r");
a0265685 1938 if (urandom != NULL) {
ebc73a67 1939 if (fread(&seed, sizeof(seed), 1, urandom) <= 0)
a0265685 1940 seed = time(0);
a0265685 1941 fclose(urandom);
ebc73a67 1942 } else {
a0265685 1943 seed = time(0);
ebc73a67 1944 }
a0265685
SG
1945
1946#ifndef HAVE_RAND_R
1947 srand(seed);
1948#endif
1949
ebc73a67
CB
1950 /* Generate random names until we find one that doesn't exist. */
1951 while (true) {
a0265685
SG
1952 ifexists = 0;
1953 name = strdup(template);
1954
1955 if (name == NULL)
1956 return NULL;
1957
1958 for (i = 0; i < strlen(name); i++) {
1959 if (name[i] == 'X') {
1960#ifdef HAVE_RAND_R
ebc73a67
CB
1961 name[i] = padchar[rand_r(&seed) %
1962 (strlen(padchar) - 1)];
a0265685
SG
1963#else
1964 name[i] = padchar[rand() % (strlen(padchar) - 1)];
1965#endif
1966 }
1967 }
1968
1969 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
1970 if (strcmp(ifa->ifa_name, name) == 0) {
1971 ifexists = 1;
1972 break;
1973 }
1974 }
1975
1976 if (ifexists == 0)
1977 break;
1978
1979 free(name);
1980 }
1981
1982 freeifaddrs(ifaddr);
1983 return name;
1984}
1985
8befa924
SH
1986int setup_private_host_hw_addr(char *veth1)
1987{
ebc73a67 1988 int err, sockfd;
8befa924 1989 struct ifreq ifr;
8befa924 1990
8befa924 1991 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
8befa924
SH
1992 if (sockfd < 0)
1993 return -errno;
1994
ebc73a67
CB
1995 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
1996 if (err < 0 || (size_t)err >= IFNAMSIZ)
1997 return -E2BIG;
1998
8befa924
SH
1999 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2000 if (err < 0) {
8befa924 2001 close(sockfd);
8befa924
SH
2002 return -errno;
2003 }
2004
2005 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2006 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2007 close(sockfd);
8befa924
SH
2008 if (err < 0)
2009 return -errno;
2010
2011 return 0;
2012}
811ef482
CB
2013
2014int lxc_find_gateway_addresses(struct lxc_handler *handler)
2015{
2016 struct lxc_list *network = &handler->conf->network;
2017 struct lxc_list *iterator;
2018 struct lxc_netdev *netdev;
2019 int link_index;
2020
2021 lxc_list_for_each(iterator, network) {
2022 netdev = iterator->elem;
2023
2024 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2025 continue;
2026
2027 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2028 ERROR("Automatic gateway detection is only supported "
2029 "for veth and macvlan");
2030 return -1;
2031 }
2032
de4855a8 2033 if (netdev->link[0] == '\0') {
811ef482
CB
2034 ERROR("Automatic gateway detection needs a link interface");
2035 return -1;
2036 }
2037
2038 link_index = if_nametoindex(netdev->link);
2039 if (!link_index)
2040 return -EINVAL;
2041
2042 if (netdev->ipv4_gateway_auto) {
2043 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2044 ERROR("Failed to automatically find ipv4 gateway "
2045 "address from link interface \"%s\"", netdev->link);
2046 return -1;
2047 }
2048 }
2049
2050 if (netdev->ipv6_gateway_auto) {
2051 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2052 ERROR("Failed to automatically find ipv6 gateway "
2053 "address from link interface \"%s\"", netdev->link);
2054 return -1;
2055 }
2056 }
2057 }
2058
2059 return 0;
2060}
2061
2062#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
74c6e2b0
CB
2063static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
2064 struct lxc_netdev *netdev, pid_t pid)
811ef482
CB
2065{
2066 int ret;
2067 pid_t child;
2068 int bytes, pipefd[2];
2069 char *token, *saveptr = NULL;
2070 char netdev_link[IFNAMSIZ + 1];
2071 char buffer[MAXPATHLEN] = {0};
2072
2073 if (netdev->type != LXC_NET_VETH) {
2074 ERROR("Network type %d not support for unprivileged use", netdev->type);
2075 return -1;
2076 }
2077
2078 ret = pipe(pipefd);
2079 if (ret < 0) {
2080 SYSERROR("Failed to create pipe");
2081 return -1;
2082 }
2083
2084 child = fork();
2085 if (child < 0) {
2086 SYSERROR("Failed to create new process");
2087 close(pipefd[0]);
2088 close(pipefd[1]);
2089 return -1;
2090 }
2091
2092 if (child == 0) {
2093 int ret;
2094 char pidstr[LXC_NUMSTRLEN64];
2095
2096 close(pipefd[0]);
2097
2098 ret = dup2(pipefd[1], STDOUT_FILENO);
2099 if (ret >= 0)
2100 ret = dup2(pipefd[1], STDERR_FILENO);
2101 close(pipefd[1]);
2102 if (ret < 0) {
2103 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2104 exit(EXIT_FAILURE);
2105 }
2106
de4855a8 2107 if (netdev->link[0] != '\0')
811ef482
CB
2108 strncpy(netdev_link, netdev->link, IFNAMSIZ);
2109 else
2110 strncpy(netdev_link, "none", IFNAMSIZ);
2111
2112 ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
2113 if (ret < 0 || ret >= LXC_NUMSTRLEN64)
2114 exit(EXIT_FAILURE);
2115 pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
2116
2117 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2118 lxcname, pidstr, netdev_link,
de4855a8
CB
2119 netdev->name[0] != '\0' ? netdev->name : "(null)");
2120 if (netdev->name[0] != '\0')
811ef482
CB
2121 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2122 lxcpath, lxcname, pidstr, "veth", netdev_link,
2123 netdev->name, (char *)NULL);
2124 else
2125 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2126 lxcpath, lxcname, pidstr, "veth", netdev_link,
2127 (char *)NULL);
2128 SYSERROR("Failed to execute lxc-user-nic");
2129 exit(EXIT_FAILURE);
2130 }
2131
2132 /* close the write-end of the pipe */
2133 close(pipefd[1]);
2134
2135 bytes = read(pipefd[0], &buffer, MAXPATHLEN);
2136 if (bytes < 0) {
74c6e2b0 2137 SYSERROR("Failed to read from pipe file descriptor");
811ef482
CB
2138 close(pipefd[0]);
2139 return -1;
2140 }
2141 buffer[bytes - 1] = '\0';
2142
2143 ret = wait_for_pid(child);
2144 close(pipefd[0]);
2145 if (ret != 0) {
2146 ERROR("lxc-user-nic failed to configure requested network: %s",
2147 buffer[0] != '\0' ? buffer : "(null)");
2148 return -1;
2149 }
2150 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2151
2152 /* netdev->name */
2153 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2154 if (!token) {
2155 ERROR("Failed to parse lxc-user-nic output");
811ef482 2156 return -1;
74c6e2b0 2157 }
811ef482 2158
811ef482
CB
2159 memset(netdev->name, 0, IFNAMSIZ + 1);
2160 strncpy(netdev->name, token, IFNAMSIZ);
2161
74c6e2b0 2162 /* netdev->ifindex */
811ef482 2163 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2164 if (!token) {
2165 ERROR("Failed to parse lxc-user-nic output");
811ef482 2166 return -1;
74c6e2b0 2167 }
811ef482 2168
74c6e2b0
CB
2169 ret = lxc_safe_int(token, &netdev->ifindex);
2170 if (ret < 0) {
2171 ERROR("%s - Failed to convert string \"%s\" to integer",
2172 strerror(-ret), token);
811ef482
CB
2173 return -1;
2174 }
2175
74c6e2b0 2176 /* netdev->priv.veth_attr.veth1 */
811ef482 2177 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2178 if (!token) {
2179 ERROR("Failed to parse lxc-user-nic output");
811ef482 2180 return -1;
74c6e2b0 2181 }
811ef482 2182
74c6e2b0
CB
2183 if (strlen(token) >= IFNAMSIZ) {
2184 ERROR("Host side veth device name returned by lxc-user-nic is "
2185 "too long");
2186 return -E2BIG;
2187 }
2188 strcpy(netdev->priv.veth_attr.veth1, token);
2189
2190 /* netdev->priv.veth_attr.ifindex */
2191 token = strtok_r(NULL, ":", &saveptr);
2192 if (!token) {
2193 ERROR("Failed to parse lxc-user-nic output");
2194 return -1;
2195 }
2196
2197 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2198 if (ret < 0) {
74c6e2b0
CB
2199 ERROR("%s - Failed to convert string \"%s\" to integer",
2200 strerror(-ret), token);
811ef482
CB
2201 return -1;
2202 }
2203
2204 return 0;
2205}
2206
1bd8d726
CB
2207static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
2208 struct lxc_netdev *netdev,
2209 const char *netns_path)
811ef482
CB
2210{
2211 int bytes, ret;
2212 pid_t child;
2213 int pipefd[2];
2214 char buffer[MAXPATHLEN] = {0};
2215
2216 if (netdev->type != LXC_NET_VETH) {
2217 ERROR("Network type %d not support for unprivileged use", netdev->type);
2218 return -1;
2219 }
2220
2221 ret = pipe(pipefd);
2222 if (ret < 0) {
2223 SYSERROR("Failed to create pipe");
2224 return -1;
2225 }
2226
2227 child = fork();
2228 if (child < 0) {
2229 SYSERROR("Failed to create new process");
2230 close(pipefd[0]);
2231 close(pipefd[1]);
2232 return -1;
2233 }
2234
2235 if (child == 0) {
8843fde4 2236 char *hostveth;
811ef482 2237 int ret;
811ef482
CB
2238
2239 close(pipefd[0]);
2240
2241 ret = dup2(pipefd[1], STDOUT_FILENO);
2242 if (ret >= 0)
2243 ret = dup2(pipefd[1], STDERR_FILENO);
2244 close(pipefd[1]);
2245 if (ret < 0) {
2246 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2247 exit(EXIT_FAILURE);
2248 }
2249
8843fde4
CB
2250 if (netdev->priv.veth_attr.pair[0] != '\0')
2251 hostveth = netdev->priv.veth_attr.pair;
2252 else
2253 hostveth = netdev->priv.veth_attr.veth1;
2254 if (hostveth[0] == '\0') {
74c6e2b0
CB
2255 SYSERROR("Host side veth device name is missing");
2256 exit(EXIT_FAILURE);
2257 }
2258
de4855a8 2259 if (netdev->link[0] == '\0') {
811ef482 2260 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0
CB
2261 "missing", netdev->priv.veth_attr.veth1);
2262 exit(EXIT_FAILURE);
2263 }
811ef482 2264
811ef482 2265 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2266 lxcname, netns_path, netdev->link, hostveth);
811ef482 2267 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2268 lxcname, netns_path, "veth", netdev->link, hostveth,
2269 (char *)NULL);
811ef482
CB
2270 SYSERROR("Failed to exec lxc-user-nic.");
2271 exit(EXIT_FAILURE);
2272 }
2273
2274 close(pipefd[1]);
2275
2276 bytes = read(pipefd[0], &buffer, MAXPATHLEN);
2277 if (bytes < 0) {
2278 SYSERROR("Failed to read from pipe file descriptor.");
2279 close(pipefd[0]);
2280 return -1;
2281 }
2282 buffer[bytes - 1] = '\0';
2283
2284 if (wait_for_pid(child) != 0) {
2285 ERROR("lxc-user-nic failed to delete requested network: %s",
2286 buffer[0] != '\0' ? buffer : "(null)");
2287 close(pipefd[0]);
2288 return -1;
2289 }
2290
2291 close(pipefd[0]);
2292
2293 return 0;
2294}
2295
1bd8d726
CB
2296bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2297{
2298 int ret;
2299 struct lxc_list *iterator;
2300 struct lxc_list *network = &handler->conf->network;
2301 /* strlen("/proc/") = 6
2302 * +
2303 * LXC_NUMSTRLEN64
2304 * +
2305 * strlen("/fd/") = 4
2306 * +
2307 * LXC_NUMSTRLEN64
2308 * +
2309 * \0
2310 */
2311 char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
2312 bool deleted_all = true;
2313
d0fbc7ba 2314 if (handler->am_root)
1bd8d726
CB
2315 return true;
2316
2317 *netns_path = '\0';
2318
2319 if (handler->netnsfd < 0) {
2320 DEBUG("Cannot not guarantee safe deletion of network devices. "
2321 "Manual cleanup maybe needed");
2322 return false;
2323 }
2324
2325 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
2326 getpid(), handler->netnsfd);
2327 if (ret < 0 || ret >= sizeof(netns_path))
2328 return false;
2329
2330 lxc_list_for_each(iterator, network) {
2331 char *hostveth = NULL;
2332 struct lxc_netdev *netdev = iterator->elem;
2333
2334 /* We can only delete devices whose ifindex we have. If we don't
2335 * have the index it means that we didn't create it.
2336 */
2337 if (!netdev->ifindex)
2338 continue;
2339
2340 if (netdev->type == LXC_NET_PHYS) {
2341 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2342 netdev->link);
2343 if (ret < 0)
2344 WARN("Failed to rename interface with index %d "
2345 "to its initial name \"%s\"",
2346 netdev->ifindex, netdev->link);
2347 else
2348 TRACE("Renamed interface with index %d to its "
2349 "initial name \"%s\"",
2350 netdev->ifindex, netdev->link);
2351 continue;
2352 }
2353
2354 ret = netdev_deconf[netdev->type](handler, netdev);
2355 if (ret < 0)
2356 WARN("Failed to deconfigure network device");
2357
2358 if (netdev->type != LXC_NET_VETH)
2359 continue;
2360
2361 if (!is_ovs_bridge(netdev->link))
2362 continue;
2363
8843fde4
CB
2364 if (netdev->priv.veth_attr.pair[0] != '\0')
2365 hostveth = netdev->priv.veth_attr.pair;
2366 else
2367 hostveth = netdev->priv.veth_attr.veth1;
2368 if (hostveth[0] == '\0')
2369 continue;
2370
1bd8d726
CB
2371 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2372 handler->name, netdev,
2373 netns_path);
2374 if (ret < 0) {
2375 deleted_all = false;
2376 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2377 "bridge \"%s\"", hostveth, netdev->link);
1bd8d726
CB
2378 continue;
2379 }
2380 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2381 netdev->link);
2382 }
2383
2384 return deleted_all;
2385}
2386
811ef482
CB
2387int lxc_create_network_priv(struct lxc_handler *handler)
2388{
811ef482
CB
2389 struct lxc_list *iterator;
2390 struct lxc_list *network = &handler->conf->network;
2391
d0fbc7ba 2392 if (!handler->am_root)
811ef482
CB
2393 return 0;
2394
2395 lxc_list_for_each(iterator, network) {
2396 struct lxc_netdev *netdev = iterator->elem;
2397
2398 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
2399 ERROR("Invalid network configuration type %d", netdev->type);
2400 return -1;
2401 }
2402
2403 if (netdev_conf[netdev->type](handler, netdev)) {
2404 ERROR("Failed to create network device");
2405 return -1;
2406 }
2407
2408 }
2409
2410 return 0;
2411}
2412
74c6e2b0
CB
2413int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
2414 struct lxc_list *network, pid_t pid)
811ef482 2415{
535e8859 2416 int ret;
811ef482
CB
2417 char ifname[IFNAMSIZ];
2418 struct lxc_list *iterator;
2419
74c6e2b0
CB
2420 if (am_unpriv())
2421 return 0;
811ef482
CB
2422
2423 lxc_list_for_each(iterator, network) {
2424 struct lxc_netdev *netdev = iterator->elem;
2425
811ef482
CB
2426 if (!netdev->ifindex)
2427 continue;
2428
2429 /* retrieve the name of the interface */
2430 if (!if_indextoname(netdev->ifindex, ifname)) {
2431 ERROR("No interface corresponding to ifindex \"%d\"",
2432 netdev->ifindex);
2433 return -1;
2434 }
2435
535e8859
CB
2436 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
2437 if (ret) {
811ef482
CB
2438 ERROR("Failed to move network device \"%s\" to "
2439 "network namespace %d: %s", ifname, pid,
535e8859 2440 strerror(-ret));
811ef482
CB
2441 return -1;
2442 }
2443
2444 DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
790255cf 2445 "of %d",
535e8859 2446 ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
811ef482
CB
2447 pid);
2448 }
2449
2450 return 0;
2451}
2452
74c6e2b0
CB
2453int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
2454 struct lxc_list *network, pid_t pid)
2455{
2456 struct lxc_list *iterator;
2457
2458 if (!am_unpriv())
2459 return 0;
2460
2461 lxc_list_for_each(iterator, network) {
2462 struct lxc_netdev *netdev = iterator->elem;
2463
2464 if (netdev->type == LXC_NET_EMPTY)
2465 continue;
2466
2467 if (netdev->type == LXC_NET_NONE)
2468 continue;
2469
2470 if (netdev->type != LXC_NET_VETH) {
2471 ERROR("Networks of type %s are not supported by "
2472 "unprivileged containers",
2473 lxc_net_type_to_str(netdev->type));
2474 return -1;
2475 }
2476
2477 if (netdev->mtu)
2478 INFO("mtu ignored due to insufficient privilege");
2479
2480 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev, pid))
2481 return -1;
2482 }
2483
2484 return 0;
2485}
2486
1bd8d726 2487bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
2488{
2489 int ret;
2490 struct lxc_list *iterator;
2491 struct lxc_list *network = &handler->conf->network;
2492 bool deleted_all = true;
2493
d0fbc7ba 2494 if (!handler->am_root)
1bd8d726
CB
2495 return true;
2496
811ef482
CB
2497 lxc_list_for_each(iterator, network) {
2498 char *hostveth = NULL;
2499 struct lxc_netdev *netdev = iterator->elem;
2500
2501 /* We can only delete devices whose ifindex we have. If we don't
2502 * have the index it means that we didn't create it.
2503 */
2504 if (!netdev->ifindex)
2505 continue;
2506
2507 if (netdev->type == LXC_NET_PHYS) {
2508 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
2509 if (ret < 0)
2510 WARN("Failed to rename interface with index %d "
b809f232
CB
2511 "from \"%s\" to its initial name \"%s\"",
2512 netdev->ifindex, netdev->name, netdev->link);
811ef482 2513 else
29589196
CB
2514 TRACE("Renamed interface with index %d from "
2515 "\"%s\" to its initial name \"%s\"",
2516 netdev->ifindex, netdev->name,
2517 netdev->link);
811ef482
CB
2518 continue;
2519 }
2520
2521 ret = netdev_deconf[netdev->type](handler, netdev);
2522 if (ret < 0)
2523 WARN("Failed to deconfigure network device");
2524
2525 /* Recent kernels remove the virtual interfaces when the network
2526 * namespace is destroyed but in case we did not move the
2527 * interface to the network namespace, we have to destroy it.
2528 */
1bd8d726
CB
2529 ret = lxc_netdev_delete_by_index(netdev->ifindex);
2530 if (-ret == ENODEV) {
2531 INFO("Interface \"%s\" with index %d already "
2532 "deleted or existing in different network "
2533 "namespace",
de4855a8 2534 netdev->name[0] != '\0' ? netdev->name : "(null)",
1bd8d726
CB
2535 netdev->ifindex);
2536 } else if (ret < 0) {
2537 deleted_all = false;
2538 WARN("Failed to remove interface \"%s\" with "
2539 "index %d: %s",
de4855a8 2540 netdev->name[0] != '\0' ? netdev->name : "(null)",
1bd8d726
CB
2541 netdev->ifindex, strerror(-ret));
2542 continue;
811ef482 2543 }
1bd8d726 2544 INFO("Removed interface \"%s\" with index %d",
de4855a8 2545 netdev->name[0] != '\0' ? netdev->name : "(null)",
1bd8d726 2546 netdev->ifindex);
811ef482
CB
2547
2548 if (netdev->type != LXC_NET_VETH)
2549 continue;
2550
811ef482
CB
2551 /* Explicitly delete host veth device to prevent lingering
2552 * devices. We had issues in LXD around this.
2553 */
de4855a8 2554 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
2555 hostveth = netdev->priv.veth_attr.pair;
2556 else
2557 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 2558 if (hostveth[0] == '\0')
811ef482
CB
2559 continue;
2560
2561 ret = lxc_netdev_delete_by_name(hostveth);
2562 if (ret < 0) {
2563 deleted_all = false;
2564 WARN("Failed to remove interface \"%s\" from \"%s\": %s",
2565 hostveth, netdev->link, strerror(-ret));
2566 continue;
2567 }
2568 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
2569
2570 if (!is_ovs_bridge(netdev->link)) {
2571 netdev->priv.veth_attr.veth1[0] = '\0';
2572 continue;
2573 }
2574
2575 /* Delete the openvswitch port. */
2576 ret = lxc_ovs_delete_port(netdev->link, hostveth);
2577 if (ret < 0)
2578 WARN("Failed to remove port \"%s\" from openvswitch "
2579 "bridge \"%s\"", hostveth, netdev->link);
2580 else
2581 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
2582 hostveth, netdev->link);
2583
2584 netdev->priv.veth_attr.veth1[0] = '\0';
2585 }
2586
2587 return deleted_all;
2588}
2589
2590int lxc_requests_empty_network(struct lxc_handler *handler)
2591{
2592 struct lxc_list *network = &handler->conf->network;
2593 struct lxc_list *iterator;
2594 bool found_none = false, found_nic = false;
2595
2596 if (lxc_list_empty(network))
2597 return 0;
2598
2599 lxc_list_for_each(iterator, network) {
2600 struct lxc_netdev *netdev = iterator->elem;
2601
2602 if (netdev->type == LXC_NET_NONE)
2603 found_none = true;
2604 else
2605 found_nic = true;
2606 }
2607 if (found_none && !found_nic)
2608 return 1;
2609 return 0;
2610}
2611
2612/* try to move physical nics to the init netns */
b809f232 2613int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
2614{
2615 int ret;
b809f232 2616 int oldfd;
811ef482 2617 char ifname[IFNAMSIZ];
b809f232
CB
2618 struct lxc_list *iterator;
2619 int netnsfd = handler->netnsfd;
2620 struct lxc_conf *conf = handler->conf;
811ef482 2621
b809f232
CB
2622 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
2623 * the parent network namespace. We won't have this capability if we are
2624 * unprivileged.
2625 */
d0fbc7ba 2626 if (!handler->am_root)
b809f232 2627 return 0;
811ef482 2628
b809f232 2629 TRACE("Moving physical network devices back to parent network namespace");
811ef482
CB
2630
2631 oldfd = lxc_preserve_ns(getpid(), "net");
2632 if (oldfd < 0) {
2633 SYSERROR("Failed to preserve network namespace");
b809f232 2634 return -1;
811ef482
CB
2635 }
2636
b809f232 2637 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
2638 if (ret < 0) {
2639 SYSERROR("Failed to enter network namespace");
2640 close(oldfd);
b809f232 2641 return -1;
811ef482
CB
2642 }
2643
b809f232
CB
2644 lxc_list_for_each(iterator, &conf->network) {
2645 struct lxc_netdev *netdev = iterator->elem;
811ef482 2646
b809f232
CB
2647 if (netdev->type != LXC_NET_PHYS)
2648 continue;
2649
2650 /* Retrieve the name of the interface in the container's network
2651 * namespace.
2652 */
2653 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 2654 WARN("No interface corresponding to ifindex %d",
b809f232 2655 netdev->ifindex);
811ef482
CB
2656 continue;
2657 }
b809f232
CB
2658
2659 ret = lxc_netdev_move_by_name(ifname, 1, netdev->link);
2660 if (ret < 0)
811ef482
CB
2661 WARN("Error moving network device \"%s\" back to "
2662 "network namespace", ifname);
b809f232
CB
2663 else
2664 TRACE("Moved network device \"%s\" back to network "
2665 "namespace", ifname);
811ef482 2666 }
811ef482 2667
b809f232 2668 ret = setns(oldfd, CLONE_NEWNET);
811ef482 2669 close(oldfd);
b809f232
CB
2670 if (ret < 0) {
2671 SYSERROR("Failed to enter network namespace");
2672 return -1;
2673 }
2674
2675 return 0;
811ef482
CB
2676}
2677
2678static int setup_hw_addr(char *hwaddr, const char *ifname)
2679{
2680 struct sockaddr sockaddr;
2681 struct ifreq ifr;
2682 int ret, fd, saved_errno;
2683
2684 ret = lxc_convert_mac(hwaddr, &sockaddr);
2685 if (ret) {
2686 ERROR("Mac address \"%s\" conversion failed: %s", hwaddr,
2687 strerror(-ret));
2688 return -1;
2689 }
2690
2691 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
2692 ifr.ifr_name[IFNAMSIZ-1] = '\0';
2693 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
2694
2695 fd = socket(AF_INET, SOCK_DGRAM, 0);
2696 if (fd < 0)
2697 return -1;
2698
2699 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
2700 saved_errno = errno;
2701 close(fd);
2702 if (ret)
2703 ERROR("Failed to perform ioctl: %s", strerror(saved_errno));
2704
2705 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
2706 ifr.ifr_name);
2707
2708 return ret;
2709}
2710
2711static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
2712{
2713 struct lxc_list *iterator;
2714 int err;
2715
2716 lxc_list_for_each(iterator, ip) {
2717 struct lxc_inetdev *inetdev = iterator->elem;
2718
2719 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
2720 &inetdev->bcast, inetdev->prefix);
2721 if (err) {
2722 ERROR("Failed to setup ipv4 address for network device "
2723 "with eifindex %d: %s", ifindex, strerror(-err));
2724 return -1;
2725 }
2726 }
2727
2728 return 0;
2729}
2730
2731static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
2732{
2733 struct lxc_list *iterator;
2734 int err;
2735
2736 lxc_list_for_each(iterator, ip) {
2737 struct lxc_inet6dev *inet6dev = iterator->elem;
2738
2739 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
2740 &inet6dev->mcast, &inet6dev->acast,
2741 inet6dev->prefix);
2742 if (err) {
2743 ERROR("Failed to setup ipv6 address for network device "
2744 "with eifindex %d: %s", ifindex, strerror(-err));
2745 return -1;
2746 }
2747 }
2748
2749 return 0;
2750}
2751
2752static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
2753{
2754 char ifname[IFNAMSIZ];
2755 int err;
2756 const char *net_type_name;
2757 char *current_ifname = ifname;
2758
2759 /* empty network namespace */
2760 if (!netdev->ifindex) {
2761 if (netdev->flags & IFF_UP) {
2762 err = lxc_netdev_up("lo");
2763 if (err) {
2764 ERROR("Failed to set the loopback network "
2765 "device up: %s",
2766 strerror(-err));
2767 return -1;
2768 }
2769 }
2770
2771 if (netdev->type == LXC_NET_EMPTY)
2772 return 0;
2773
2774 if (netdev->type == LXC_NET_NONE)
2775 return 0;
2776
2777 if (netdev->type != LXC_NET_VETH) {
2778 net_type_name = lxc_net_type_to_str(netdev->type);
2779 ERROR("%s networks are not supported for containers "
535e8859 2780 "not setup up by privileged users", net_type_name);
811ef482
CB
2781 return -1;
2782 }
2783
2784 netdev->ifindex = if_nametoindex(netdev->name);
2785 }
2786
2787 /* get the new ifindex in case of physical netdev */
2788 if (netdev->type == LXC_NET_PHYS) {
2789 netdev->ifindex = if_nametoindex(netdev->link);
2790 if (!netdev->ifindex) {
2791 ERROR("Failed to get ifindex for network device \"%s\"",
2792 netdev->link);
2793 return -1;
2794 }
2795 }
2796
2797 /* retrieve the name of the interface */
2798 if (!if_indextoname(netdev->ifindex, current_ifname)) {
2799 ERROR("Failed get name for network device with ifindex %d",
2800 netdev->ifindex);
2801 return -1;
2802 }
2803
2804 /* Default: let the system to choose one interface name.
2805 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
2806 * netlink will replace the format specifier with an appropriate index.
2807 */
de4855a8
CB
2808 if (netdev->name[0] == '\0') {
2809 if (netdev->type == LXC_NET_PHYS)
2810 strcpy(netdev->name, netdev->link);
2811 else
2812 strcpy(netdev->name, "eth%d");
2813 }
811ef482
CB
2814
2815 /* rename the interface name */
2816 if (strcmp(ifname, netdev->name) != 0) {
2817 err = lxc_netdev_rename_by_name(ifname, netdev->name);
2818 if (err) {
2819 ERROR("Failed to rename network device \"%s\" to "
2820 "\"%s\": %s", ifname, netdev->name, strerror(-err));
2821 return -1;
2822 }
2823 }
2824
2825 /* Re-read the name of the interface because its name has changed
2826 * and would be automatically allocated by the system
2827 */
2828 if (!if_indextoname(netdev->ifindex, current_ifname)) {
2829 ERROR("Failed get name for network device with ifindex %d",
2830 netdev->ifindex);
2831 return -1;
2832 }
2833
790255cf
CB
2834 /* Now update the recorded name of the network device to reflect the
2835 * name of the network device in the child's network namespace. We will
2836 * later on send this information back to the parent.
2837 */
2838 strcpy(netdev->name, current_ifname);
2839
811ef482
CB
2840 /* set a mac address */
2841 if (netdev->hwaddr) {
2842 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
2843 ERROR("Failed to setup hw address for network device \"%s\"",
2844 current_ifname);
2845 return -1;
2846 }
2847 }
2848
2849 /* setup ipv4 addresses on the interface */
2850 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
2851 ERROR("Failed to setup ip addresses for network device \"%s\"",
2852 ifname);
2853 return -1;
2854 }
2855
2856 /* setup ipv6 addresses on the interface */
2857 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
2858 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
2859 ifname);
2860 return -1;
2861 }
2862
2863 /* set the network device up */
2864 if (netdev->flags & IFF_UP) {
2865 int err;
2866
2867 err = lxc_netdev_up(current_ifname);
2868 if (err) {
2869 ERROR("Failed to set network device \"%s\" up: %s",
2870 current_ifname, strerror(-err));
2871 return -1;
2872 }
2873
2874 /* the network is up, make the loopback up too */
2875 err = lxc_netdev_up("lo");
2876 if (err) {
2877 ERROR("Failed to set the loopback network device up: %s",
2878 strerror(-err));
2879 return -1;
2880 }
2881 }
2882
2883 /* We can only set up the default routes after bringing
2884 * up the interface, sine bringing up the interface adds
2885 * the link-local routes and we can't add a default
2886 * route if the gateway is not reachable. */
2887
2888 /* setup ipv4 gateway on the interface */
2889 if (netdev->ipv4_gateway) {
2890 if (!(netdev->flags & IFF_UP)) {
2891 ERROR("Cannot add ipv4 gateway for network device "
2892 "\"%s\" when not bringing up the interface", ifname);
2893 return -1;
2894 }
2895
2896 if (lxc_list_empty(&netdev->ipv4)) {
2897 ERROR("Cannot add ipv4 gateway for network device "
2898 "\"%s\" when not assigning an address", ifname);
2899 return -1;
2900 }
2901
2902 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2903 if (err) {
2904 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
2905 if (err) {
2906 ERROR("Failed to add ipv4 dest for network "
2907 "device \"%s\": %s", ifname, strerror(-err));
2908 }
2909
2910 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
2911 if (err) {
2912 ERROR("Failed to setup ipv4 gateway for "
2913 "network device \"%s\": %s",
2914 ifname, strerror(-err));
2915 if (netdev->ipv4_gateway_auto) {
2916 char buf[INET_ADDRSTRLEN];
2917 inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
2918 ERROR("Fried to set autodetected ipv4 gateway \"%s\"", buf);
2919 }
2920 return -1;
2921 }
2922 }
2923 }
2924
2925 /* setup ipv6 gateway on the interface */
2926 if (netdev->ipv6_gateway) {
2927 if (!(netdev->flags & IFF_UP)) {
2928 ERROR("Cannot add ipv6 gateway for network device "
2929 "\"%s\" when not bringing up the interface", ifname);
2930 return -1;
2931 }
2932
2933 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
2934 ERROR("Cannot add ipv6 gateway for network device "
2935 "\"%s\" when not assigning an address", ifname);
2936 return -1;
2937 }
2938
2939 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2940 if (err) {
2941 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
2942 if (err) {
2943 ERROR("Failed to add ipv6 dest for network "
2944 "device \"%s\": %s", ifname, strerror(-err));
2945 }
2946
2947 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
2948 if (err) {
2949 ERROR("Failed to setup ipv6 gateway for "
2950 "network device \"%s\": %s", ifname,
2951 strerror(-err));
2952 if (netdev->ipv6_gateway_auto) {
2953 char buf[INET6_ADDRSTRLEN];
2954 inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
2955 ERROR("Tried to set autodetected ipv6 "
2956 "gateway for network device "
2957 "\"%s\"", buf);
2958 }
2959 return -1;
2960 }
2961 }
2962 }
2963
74c6e2b0 2964 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
2965
2966 return 0;
2967}
2968
2969int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
2970 struct lxc_list *network)
2971{
2972 struct lxc_list *iterator;
2973 struct lxc_netdev *netdev;
2974
811ef482
CB
2975 lxc_list_for_each(iterator, network) {
2976 netdev = iterator->elem;
2977
2978 /* REMOVE in LXC 3.0 */
2979 if (netdev->idx < 0) {
2980 ERROR("WARNING: using \"lxc.network.*\" keys to define "
2981 "networks is DEPRECATED, please switch to using "
2982 "\"lxc.net.[i].* keys\"");
2983 }
2984
2985 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
2986 ERROR("failed to setup netdev");
2987 return -1;
2988 }
2989 }
2990
2991 if (!lxc_list_empty(network))
2992 INFO("network has been setup");
2993
2994 return 0;
2995}
7ab1ba02
CB
2996
2997int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
2998{
2999 struct lxc_list *iterator;
3000 struct lxc_list *network = &handler->conf->network;
3001 int data_sock = handler->data_sock[0];
3002
d0fbc7ba 3003 if (handler->am_root)
7ab1ba02
CB
3004 return 0;
3005
3006 lxc_list_for_each(iterator, network) {
3007 int ret;
3008 struct lxc_netdev *netdev = iterator->elem;
3009
3010 if (netdev->type != LXC_NET_VETH)
3011 continue;
3012
a1ae535a 3013 ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
7ab1ba02
CB
3014 if (ret < 0) {
3015 close(handler->data_sock[0]);
3016 close(handler->data_sock[1]);
3017 return -1;
3018 } else {
3019 TRACE("Sent network device name \"%s\" to child",
3020 netdev->name);
3021 }
3022 }
3023
3024 return 0;
3025}
3026
3027int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3028{
3029 struct lxc_list *iterator;
3030 struct lxc_list *network = &handler->conf->network;
3031 int data_sock = handler->data_sock[1];
3032
d0fbc7ba 3033 if (handler->am_root)
7ab1ba02
CB
3034 return 0;
3035
3036 lxc_list_for_each(iterator, network) {
3037 int ret;
3038 struct lxc_netdev *netdev = iterator->elem;
3039
3040 if (netdev->type != LXC_NET_VETH)
3041 continue;
3042
a1ae535a 3043 ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
7ab1ba02
CB
3044 if (ret < 0) {
3045 close(handler->data_sock[0]);
3046 close(handler->data_sock[1]);
3047 return -1;
3048 } else {
3049 TRACE("Received network device name \"%s\" from parent",
3050 netdev->name);
3051 }
3052 }
3053
3054 return 0;
3055}
a1ae535a
CB
3056
3057int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3058{
3059 struct lxc_list *iterator, *network;
3060 int data_sock = handler->data_sock[0];
3061
3062 if (!handler->am_root)
3063 return 0;
3064
3065 network = &handler->conf->network;
3066 lxc_list_for_each(iterator, network) {
3067 int ret;
3068 struct lxc_netdev *netdev = iterator->elem;
3069
3070 /* Send network device name in the child's namespace to parent. */
3071 ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
3072 if (ret < 0)
3073 goto on_error;
3074
3075 /* Send network device ifindex in the child's namespace to
3076 * parent.
3077 */
3078 ret = send(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3079 if (ret < 0)
3080 goto on_error;
3081 }
3082
3083 TRACE("Sent network device names and ifindeces to parent");
3084 return 0;
3085
3086on_error:
3087 close(handler->data_sock[0]);
3088 close(handler->data_sock[1]);
3089 return -1;
3090}
3091
3092int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3093{
3094 struct lxc_list *iterator, *network;
3095 int data_sock = handler->data_sock[1];
3096
3097 if (!handler->am_root)
3098 return 0;
3099
3100 network = &handler->conf->network;
3101 lxc_list_for_each(iterator, network) {
3102 int ret;
3103 struct lxc_netdev *netdev = iterator->elem;
3104
3105 /* Receive network device name in the child's namespace to
3106 * parent.
3107 */
3108 ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
3109 if (ret < 0)
3110 goto on_error;
3111
3112 /* Receive network device ifindex in the child's namespace to
3113 * parent.
3114 */
3115 ret = recv(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3116 if (ret < 0)
3117 goto on_error;
3118 }
3119
3120 return 0;
3121
3122on_error:
3123 close(handler->data_sock[0]);
3124 close(handler->data_sock[1]);
3125 return -1;
3126}