]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: Adds mode param (bridge, router) to veth network setting
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
0ad19a3f 1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
0ad19a3f 8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0ad19a3f 22 */
cb0dc11b 23
d38dd64a
CB
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
27#include <arpa/inet.h>
cb0dc11b
CB
28#include <ctype.h>
29#include <errno.h>
30#include <fcntl.h>
0ad19a3f 31#include <linux/netlink.h>
32#include <linux/rtnetlink.h>
33#include <linux/sockios.h>
cb0dc11b
CB
34#include <net/ethernet.h>
35#include <net/if.h>
36#include <net/if_arp.h>
37#include <netinet/in.h>
d38dd64a
CB
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
cb0dc11b
CB
41#include <sys/inotify.h>
42#include <sys/ioctl.h>
43#include <sys/param.h>
44#include <sys/socket.h>
45#include <sys/stat.h>
46#include <sys/types.h>
d38dd64a
CB
47#include <time.h>
48#include <unistd.h>
f549edcc 49
d38dd64a 50#include "../include/netns_ifaddrs.h"
7ab1ba02 51#include "af_unix.h"
72d0e1cb 52#include "conf.h"
811ef482 53#include "config.h"
e3233f26 54#include "file_utils.h"
cb0dc11b 55#include "log.h"
8335fd40 56#include "macro.h"
95ea3d1f 57#include "memory_utils.h"
cb0dc11b
CB
58#include "network.h"
59#include "nl.h"
d7b58715 60#include "raw_syscalls.h"
59524108 61#include "syscall_wrappers.h"
0d204771 62#include "utils.h"
0ad19a3f 63
9de31d5a
CB
64#ifndef HAVE_STRLCPY
65#include "include/strlcpy.h"
66#endif
67
ac2cecc4 68lxc_log_define(network, lxc);
f8fee0e2 69
811ef482 70typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
3ebffb98 71static const char loop_device[] = "lo";
811ef482 72
b670016a 73static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 74{
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121}
122
123static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124{
b670016a 125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 126}
127
128static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129{
b670016a 130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131}
132
133static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134{
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136}
137
138static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139{
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 141}
142
d4a7da46 143static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144{
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160}
161
162static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163{
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179}
180
811ef482
CB
181static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182{
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
de4855a8 188 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
d34212ad
CB
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
811ef482
CB
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
6d1400b5 215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482
CB
217 goto out_delete;
218 }
219
24190194
CB
220 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
221
811ef482
CB
222 /* changing the high byte of the mac address to 0xfe, the bridge interface
223 * will always keep the host's mac address and not take the mac address
224 * of a container */
225 err = setup_private_host_hw_addr(veth1);
226 if (err) {
6d1400b5 227 errno = -err;
228 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
229 goto out_delete;
230 }
231
8da62485
CB
232 /* Retrieve ifindex of the host's veth device. */
233 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
234 if (!netdev->priv.veth_attr.ifindex) {
235 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
236 goto out_delete;
237 }
238
239 /* Note that we're retrieving the container's ifindex in the host's
240 * network namespace because we need it to move the device from the
241 * host's network namespace to the container's network namespace later
242 * on.
243 */
811ef482
CB
244 netdev->ifindex = if_nametoindex(veth2);
245 if (!netdev->ifindex) {
246 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
247 goto out_delete;
248 }
249
250 if (netdev->mtu) {
251 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
252 WARN("Failed to parse mtu");
253 else
254 INFO("Retrieved mtu %d", mtu);
de4855a8 255 } else if (netdev->link[0] != '\0') {
811ef482
CB
256 bridge_index = if_nametoindex(netdev->link);
257 if (bridge_index) {
258 mtu = netdev_get_mtu(bridge_index);
259 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
260 } else {
261 mtu = netdev_get_mtu(netdev->ifindex);
262 INFO("Retrieved mtu %d from %s", mtu, veth2);
263 }
264 }
265
266 if (mtu) {
267 err = lxc_netdev_set_mtu(veth1, mtu);
268 if (!err)
269 err = lxc_netdev_set_mtu(veth2, mtu);
6d1400b5 270
811ef482 271 if (err) {
6d1400b5 272 errno = -err;
273 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
274 "and \"%s\"", mtu, veth1, veth2);
811ef482
CB
275 goto out_delete;
276 }
277 }
278
de4855a8 279 if (netdev->link[0] != '\0') {
811ef482
CB
280 err = lxc_bridge_attach(netdev->link, veth1);
281 if (err) {
6d1400b5 282 errno = -err;
283 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
284 veth1, netdev->link);
811ef482
CB
285 goto out_delete;
286 }
287 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
288 }
289
290 err = lxc_netdev_up(veth1);
291 if (err) {
6d1400b5 292 errno = -err;
293 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
294 goto out_delete;
295 }
296
d4a7da46 297 /* setup ipv4 routes on the host interface */
298 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
299 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
300 goto out_delete;
301 }
302
303 /* setup ipv6 routes on the host interface */
304 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
305 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
306 goto out_delete;
307 }
308
811ef482 309 if (netdev->upscript) {
14a7b0f9
CB
310 char *argv[] = {
311 "veth",
312 netdev->link,
990b9ac3 313 veth1,
14a7b0f9
CB
314 NULL,
315 };
316
317 err = run_script_argv(handler->name,
318 handler->conf->hooks_version, "net",
319 netdev->upscript, "up", argv);
320 if (err < 0)
811ef482
CB
321 goto out_delete;
322 }
323
324 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
325 netdev->ifindex);
326
327 return 0;
328
329out_delete:
330 if (netdev->ifindex != 0)
331 lxc_netdev_delete_by_name(veth1);
811ef482
CB
332 return -1;
333}
334
335static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
336{
8021de25 337 char peer[IFNAMSIZ];
811ef482 338 int err;
3bef7b7b 339 unsigned int mtu = 0;
811ef482 340
de4855a8 341 if (netdev->link[0] == '\0') {
811ef482
CB
342 ERROR("No link for macvlan network device specified");
343 return -1;
344 }
345
8021de25
CB
346 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
347 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
348 return -1;
349
8021de25 350 if (!lxc_mkifname(peer))
811ef482
CB
351 return -1;
352
353 err = lxc_macvlan_create(netdev->link, peer,
354 netdev->priv.macvlan_attr.mode);
355 if (err) {
6d1400b5 356 errno = -err;
357 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
358 peer, netdev->link);
966e9f1f 359 goto on_error;
811ef482
CB
360 }
361
a9704f05
CB
362 strlcpy(netdev->created_name, peer, IFNAMSIZ);
363
811ef482
CB
364 netdev->ifindex = if_nametoindex(peer);
365 if (!netdev->ifindex) {
366 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 367 goto on_error;
811ef482
CB
368 }
369
3bef7b7b
TP
370 if (netdev->mtu) {
371 err = lxc_safe_uint(netdev->mtu, &mtu);
372 if (err < 0) {
373 errno = -err;
374 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
375 goto on_error;
376 }
377
378 err = lxc_netdev_set_mtu(peer, mtu);
379 if (err < 0) {
380 errno = -err;
381 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
382 goto on_error;
383 }
384 }
385
811ef482 386 if (netdev->upscript) {
14a7b0f9
CB
387 char *argv[] = {
388 "macvlan",
389 netdev->link,
390 NULL,
391 };
392
393 err = run_script_argv(handler->name,
394 handler->conf->hooks_version, "net",
395 netdev->upscript, "up", argv);
396 if (err < 0)
966e9f1f 397 goto on_error;
811ef482
CB
398 }
399
400 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
401 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
402
403 return 0;
966e9f1f
CB
404
405on_error:
811ef482 406 lxc_netdev_delete_by_name(peer);
811ef482
CB
407 return -1;
408}
409
c9f52382 410static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
411{
412 int err, index, len;
413 struct ifinfomsg *ifi;
414 struct nl_handler nlh;
415 struct rtattr *nest, *nest2;
416 struct nlmsg *answer = NULL, *nlmsg = NULL;
417
418 len = strlen(master);
419 if (len == 1 || len >= IFNAMSIZ)
420 return minus_one_set_errno(EINVAL);
421
422 len = strlen(name);
423 if (len == 1 || len >= IFNAMSIZ)
424 return minus_one_set_errno(EINVAL);
425
426 index = if_nametoindex(master);
427 if (!index)
428 return minus_one_set_errno(EINVAL);
429
430 err = netlink_open(&nlh, NETLINK_ROUTE);
431 if (err)
432 return minus_one_set_errno(-err);
433
434 err = -ENOMEM;
435 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
436 if (!nlmsg)
437 goto out;
438
439 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
440 if (!answer)
441 goto out;
442
443 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
444 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
445
446 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
447 if (!ifi) {
448 goto out;
449 }
450 ifi->ifi_family = AF_UNSPEC;
451
452 err = -EPROTO;
453 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
454 if (!nest)
455 goto out;
456
457 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
458 goto out;
459
460 if (mode) {
461 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
462 if (!nest2)
463 goto out;
464
465 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
466 goto out;
467
468 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
469 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
470 */
471 if (isolation > 0) {
472 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
473 goto out;
474 }
475
476 nla_end_nested(nlmsg, nest2);
477 }
478
479 nla_end_nested(nlmsg, nest);
480
481 if (nla_put_u32(nlmsg, IFLA_LINK, index))
482 goto out;
483
484 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
485 goto out;
486
487 err = netlink_transaction(&nlh, nlmsg, answer);
488out:
489 netlink_close(&nlh);
490 nlmsg_free(answer);
491 nlmsg_free(nlmsg);
492 if (err < 0)
493 return minus_one_set_errno(-err);
494 return 0;
495}
496
497static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
498{
dd119206 499 char peer[IFNAMSIZ];
c9f52382 500 int err;
006e135e 501 unsigned int mtu = 0;
c9f52382 502
503 if (netdev->link[0] == '\0') {
504 ERROR("No link for ipvlan network device specified");
505 return -1;
506 }
507
dd119206
CB
508 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
509 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 510 return -1;
511
dd119206 512 if (!lxc_mkifname(peer))
c9f52382 513 return -1;
514
dd119206
CB
515 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
516 netdev->priv.ipvlan_attr.isolation);
c9f52382 517 if (err) {
dd119206
CB
518 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
519 peer, netdev->link);
c9f52382 520 goto on_error;
521 }
522
e7fdd504
CB
523 strlcpy(netdev->created_name, peer, IFNAMSIZ);
524
c9f52382 525 netdev->ifindex = if_nametoindex(peer);
526 if (!netdev->ifindex) {
527 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
528 goto on_error;
529 }
530
006e135e 531 if (netdev->mtu) {
532 err = lxc_safe_uint(netdev->mtu, &mtu);
533 if (err < 0) {
534 errno = -err;
dd119206
CB
535 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
536 netdev->mtu, peer);
006e135e 537 goto on_error;
538 }
539
540 err = lxc_netdev_set_mtu(peer, mtu);
541 if (err < 0) {
542 errno = -err;
dd119206
CB
543 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
544 netdev->mtu, peer);
006e135e 545 goto on_error;
546 }
547 }
548
c9f52382 549 if (netdev->upscript) {
550 char *argv[] = {
551 "ipvlan",
552 netdev->link,
553 NULL,
554 };
555
dd119206
CB
556 err = run_script_argv(handler->name, handler->conf->hooks_version,
557 "net", netdev->upscript, "up", argv);
c9f52382 558 if (err < 0)
559 goto on_error;
560 }
561
dd119206
CB
562 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
563 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 564
565 return 0;
566
567on_error:
568 lxc_netdev_delete_by_name(peer);
569 return -1;
570}
571
811ef482
CB
572static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
573{
574 char peer[IFNAMSIZ];
575 int err;
576 static uint16_t vlan_cntr = 0;
577 unsigned int mtu = 0;
578
de4855a8 579 if (netdev->link[0] == '\0') {
811ef482
CB
580 ERROR("No link for vlan network device specified");
581 return -1;
582 }
583
d4d68410
CB
584 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
585 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
586 if (err < 0 || (size_t)err >= sizeof(peer))
587 return -1;
588
589 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
590 if (err) {
6d1400b5 591 errno = -err;
592 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
593 peer, netdev->link);
811ef482
CB
594 return -1;
595 }
596
83530dba
CB
597 strlcpy(netdev->created_name, peer, IFNAMSIZ);
598
811ef482
CB
599 netdev->ifindex = if_nametoindex(peer);
600 if (!netdev->ifindex) {
601 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 602 goto on_error;
603 }
604
605 if (netdev->mtu) {
606 err = lxc_safe_uint(netdev->mtu, &mtu);
607 if (err < 0) {
608 errno = -err;
d4d68410
CB
609 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
610 netdev->mtu, peer);
3e2a7b08 611 goto on_error;
612 }
613
614 err = lxc_netdev_set_mtu(peer, mtu);
615 if (err) {
616 errno = -err;
d4d68410
CB
617 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
618 netdev->mtu, peer);
3e2a7b08 619 goto on_error;
620 }
811ef482
CB
621 }
622
3a73d9f1 623 if (netdev->upscript) {
624 char *argv[] = {
625 "vlan",
626 netdev->link,
627 NULL,
628 };
629
d4d68410
CB
630 err = run_script_argv(handler->name, handler->conf->hooks_version,
631 "net", netdev->upscript, "up", argv);
19abca58 632 if (err < 0) {
3e2a7b08 633 goto on_error;
19abca58 634 }
3a73d9f1 635 }
636
d4d68410
CB
637 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
638 netdev->ifindex);
811ef482
CB
639
640 return 0;
3e2a7b08 641
642on_error:
643 lxc_netdev_delete_by_name(peer);
644 return -1;
811ef482
CB
645}
646
647static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
648{
0b154989 649 int err, mtu_orig = 0;
3bef7b7b 650 unsigned int mtu = 0;
14a7b0f9 651
de4855a8 652 if (netdev->link[0] == '\0') {
811ef482
CB
653 ERROR("No link for physical interface specified");
654 return -1;
655 }
656
75b074ee
CB
657 /*
658 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
659 * network namespace because we need it to move the device from the
660 * host's network namespace to the container's network namespace later
661 * on.
662 * Note that netdev->link will contain the name of the physical network
663 * device in the host's namespace.
664 */
811ef482
CB
665 netdev->ifindex = if_nametoindex(netdev->link);
666 if (!netdev->ifindex) {
667 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
668 return -1;
669 }
670
61302ef7
CB
671 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
672
75b074ee
CB
673 /*
674 * Store the ifindex of the host's network device in the host's
790255cf
CB
675 * namespace.
676 */
677 netdev->priv.phys_attr.ifindex = netdev->ifindex;
678
75b074ee
CB
679 /*
680 * Get original device MTU setting and store for restoration after
681 * container shutdown.
682 */
0b154989
TP
683 mtu_orig = netdev_get_mtu(netdev->ifindex);
684 if (mtu_orig < 0) {
685 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
686 return minus_one_set_errno(-mtu_orig);
687 }
688
689 netdev->priv.phys_attr.mtu = mtu_orig;
690
3bef7b7b
TP
691 if (netdev->mtu) {
692 err = lxc_safe_uint(netdev->mtu, &mtu);
693 if (err < 0) {
694 errno = -err;
75b074ee
CB
695 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
696 netdev->mtu, netdev->link);
3bef7b7b
TP
697 return -1;
698 }
14a7b0f9 699
3bef7b7b
TP
700 err = lxc_netdev_set_mtu(netdev->link, mtu);
701 if (err < 0) {
702 errno = -err;
75b074ee
CB
703 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
704 netdev->mtu, netdev->link);
3bef7b7b
TP
705 return -1;
706 }
707 }
708
709 if (netdev->upscript) {
710 char *argv[] = {
711 "phys",
712 netdev->link,
713 NULL,
714 };
715
75b074ee
CB
716 err = run_script_argv(handler->name, handler->conf->hooks_version,
717 "net", netdev->upscript, "up", argv);
3bef7b7b
TP
718 if (err < 0) {
719 return -1;
720 }
721 }
722
75b074ee
CB
723 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
724 netdev->ifindex);
811ef482
CB
725
726 return 0;
727}
728
729static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
730{
14a7b0f9
CB
731 int ret;
732 char *argv[] = {
733 "empty",
734 NULL,
735 };
736
811ef482 737 netdev->ifindex = 0;
14a7b0f9
CB
738 if (!netdev->upscript)
739 return 0;
740
741 ret = run_script_argv(handler->name, handler->conf->hooks_version,
742 "net", netdev->upscript, "up", argv);
743 if (ret < 0)
744 return -1;
745
811ef482
CB
746 return 0;
747}
748
749static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
750{
751 netdev->ifindex = 0;
752 return 0;
753}
754
755static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
756 [LXC_NET_VETH] = instantiate_veth,
757 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 758 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
759 [LXC_NET_VLAN] = instantiate_vlan,
760 [LXC_NET_PHYS] = instantiate_phys,
761 [LXC_NET_EMPTY] = instantiate_empty,
762 [LXC_NET_NONE] = instantiate_none,
763};
764
765static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
766{
14a7b0f9
CB
767 int ret;
768 char *argv[] = {
769 "veth",
770 netdev->link,
771 NULL,
772 NULL,
773 };
774
775 if (!netdev->downscript)
776 return 0;
811ef482 777
de4855a8 778 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 779 argv[2] = netdev->priv.veth_attr.pair;
811ef482 780 else
14a7b0f9
CB
781 argv[2] = netdev->priv.veth_attr.veth1;
782
783 ret = run_script_argv(handler->name,
784 handler->conf->hooks_version, "net",
785 netdev->downscript, "down", argv);
786 if (ret < 0)
787 return -1;
811ef482 788
811ef482
CB
789 return 0;
790}
791
792static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
793{
14a7b0f9
CB
794 int ret;
795 char *argv[] = {
796 "macvlan",
797 netdev->link,
798 NULL,
799 };
800
801 if (!netdev->downscript)
802 return 0;
803
804 ret = run_script_argv(handler->name, handler->conf->hooks_version,
805 "net", netdev->downscript, "down", argv);
806 if (ret < 0)
807 return -1;
811ef482 808
811ef482
CB
809 return 0;
810}
811
c9f52382 812static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
813{
814 int ret;
815 char *argv[] = {
816 "ipvlan",
817 netdev->link,
818 NULL,
819 };
820
821 if (!netdev->downscript)
822 return 0;
823
824 ret = run_script_argv(handler->name, handler->conf->hooks_version,
825 "net", netdev->downscript, "down", argv);
826 if (ret < 0)
827 return -1;
828
829 return 0;
830}
831
811ef482
CB
832static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
833{
3a73d9f1 834 int ret;
835 char *argv[] = {
836 "vlan",
837 netdev->link,
838 NULL,
839 };
840
841 if (!netdev->downscript)
842 return 0;
843
844 ret = run_script_argv(handler->name, handler->conf->hooks_version,
845 "net", netdev->downscript, "down", argv);
846 if (ret < 0)
847 return -1;
848
811ef482
CB
849 return 0;
850}
851
852static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
853{
14a7b0f9
CB
854 int ret;
855 char *argv[] = {
856 "phys",
857 netdev->link,
858 NULL,
859 };
860
861 if (!netdev->downscript)
862 return 0;
863
864 ret = run_script_argv(handler->name, handler->conf->hooks_version,
865 "net", netdev->downscript, "down", argv);
866 if (ret < 0)
867 return -1;
811ef482 868
811ef482
CB
869 return 0;
870}
871
872static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
873{
14a7b0f9
CB
874 int ret;
875 char *argv[] = {
876 "empty",
877 NULL,
878 };
879
880 if (!netdev->downscript)
881 return 0;
882
883 ret = run_script_argv(handler->name, handler->conf->hooks_version,
884 "net", netdev->downscript, "down", argv);
885 if (ret < 0)
886 return -1;
811ef482 887
811ef482
CB
888 return 0;
889}
890
891static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
892{
893 return 0;
894}
895
896static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
897 [LXC_NET_VETH] = shutdown_veth,
898 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 899 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
900 [LXC_NET_VLAN] = shutdown_vlan,
901 [LXC_NET_PHYS] = shutdown_phys,
902 [LXC_NET_EMPTY] = shutdown_empty,
903 [LXC_NET_NONE] = shutdown_none,
904};
905
0037ab49
TP
906static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
907{
908 int err;
909 struct nl_handler nlh;
910 struct ifinfomsg *ifi;
911 struct nlmsg *nlmsg = NULL;
912
913 err = netlink_open(&nlh, NETLINK_ROUTE);
914 if (err)
915 return err;
916
917 err = -ENOMEM;
918 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
919 if (!nlmsg)
920 goto out;
921
922 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
923 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
924
925 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
926 if (!ifi)
927 goto out;
928 ifi->ifi_family = AF_UNSPEC;
929 ifi->ifi_index = ifindex;
930
931 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
932 goto out;
933
934 if (ifname != NULL) {
935 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
936 goto out;
937 }
938
939 err = netlink_transaction(&nlh, nlmsg, nlmsg);
940out:
941 netlink_close(&nlh);
942 nlmsg_free(nlmsg);
943 return err;
944}
945
ebc73a67 946int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 947{
ebc73a67 948 int err;
0ad19a3f 949 struct nl_handler nlh;
06f976ca 950 struct ifinfomsg *ifi;
ebc73a67 951 struct nlmsg *nlmsg = NULL;
0ad19a3f 952
3cfc0f3a
MN
953 err = netlink_open(&nlh, NETLINK_ROUTE);
954 if (err)
955 return err;
0ad19a3f 956
3cfc0f3a 957 err = -ENOMEM;
0ad19a3f 958 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
959 if (!nlmsg)
960 goto out;
961
ebc73a67 962 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
963 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
964
965 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
966 if (!ifi)
967 goto out;
06f976ca
SZ
968 ifi->ifi_family = AF_UNSPEC;
969 ifi->ifi_index = ifindex;
0ad19a3f 970
971 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
972 goto out;
973
8d357196
DY
974 if (ifname != NULL) {
975 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
976 goto out;
977 }
978
3cfc0f3a 979 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 980out:
981 netlink_close(&nlh);
982 nlmsg_free(nlmsg);
983 return err;
984}
985
ebc73a67
CB
986/* If we are asked to move a wireless interface, then we must actually move its
987 * phyN device. Detect that condition and return the physname here. The physname
988 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
989 */
990#define PHYSNAME "/sys/class/net/%s/phy80211/name"
ebc73a67 991static char *is_wlan(const char *ifname)
e5848d39 992{
b0293710 993 __do_free char *path = NULL;
ebc73a67 994 int i, ret;
e5848d39 995 long physlen;
ebc73a67 996 size_t len;
e5848d39 997 FILE *f;
ebc73a67 998 char *physname = NULL;
e5848d39 999
ebc73a67 1000 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1001 path = must_realloc(NULL, len + 1);
e5848d39 1002 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1003 if (ret < 0 || (size_t)ret >= len)
e5848d39 1004 goto bad;
ebc73a67 1005
ebc73a67
CB
1006 f = fopen(path, "r");
1007 if (!f)
e5848d39 1008 goto bad;
ebc73a67 1009
1a0e70ac 1010 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1011 fseek(f, 0, SEEK_END);
1012 physlen = ftell(f);
1013 fseek(f, 0, SEEK_SET);
7d1cde93
SX
1014 if (physlen < 0) {
1015 fclose(f);
0382c0da 1016 goto bad;
7d1cde93 1017 }
ebc73a67
CB
1018
1019 physname = malloc(physlen + 1);
ee54ea9a 1020 if (!physname) {
acf47e1b 1021 fclose(f);
e5848d39 1022 goto bad;
ee54ea9a 1023 }
ebc73a67
CB
1024
1025 memset(physname, 0, physlen + 1);
e5848d39
SH
1026 ret = fread(physname, 1, physlen, f);
1027 fclose(f);
1028 if (ret < 0)
1029 goto bad;
1030
ebc73a67 1031 for (i = 0; i < physlen; i++) {
e5848d39
SH
1032 if (physname[i] == '\n')
1033 physname[i] = '\0';
ebc73a67 1034
e5848d39
SH
1035 if (physname[i] == '\0')
1036 break;
1037 }
1038
1039 return physname;
1040
1041bad:
f10fad2f 1042 free(physname);
e5848d39
SH
1043 return NULL;
1044}
1045
ebc73a67
CB
1046static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1047 const char *new)
e5848d39 1048{
ebc73a67 1049 pid_t fpid;
e5848d39 1050
ebc73a67 1051 fpid = fork();
e5848d39
SH
1052 if (fpid < 0)
1053 return -1;
ebc73a67 1054
e5848d39
SH
1055 if (fpid != 0)
1056 return wait_for_pid(fpid);
ebc73a67 1057
e5848d39
SH
1058 if (!switch_to_ns(pid, "net"))
1059 return -1;
ebc73a67 1060
05ec44f8 1061 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1062}
1063
ebc73a67
CB
1064static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1065 const char *newname)
e5848d39 1066{
e5848d39 1067 char *cmd;
ebc73a67
CB
1068 pid_t fpid;
1069 int err = -1;
e5848d39
SH
1070
1071 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1072 * However, IIUC this involves a bit more complicated work to talk to
1073 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1074 */
1075 cmd = on_path("iw", NULL);
1076 if (!cmd)
1077 goto out1;
1078 free(cmd);
1079
1080 fpid = fork();
1081 if (fpid < 0)
1082 goto out1;
ebc73a67 1083
e5848d39
SH
1084 if (fpid == 0) {
1085 char pidstr[30];
1086 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1087 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1088 (char *)NULL);
ebd582ae 1089 _exit(EXIT_FAILURE);
e5848d39 1090 }
ebc73a67 1091
e5848d39
SH
1092 if (wait_for_pid(fpid))
1093 goto out1;
1094
1095 err = 0;
1096 if (newname)
1097 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1098
1099out1:
1100 free(physname);
1101 return err;
1102}
1103
8d357196 1104int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924
SH
1105{
1106 int index;
e5848d39 1107 char *physname;
8befa924 1108
8befa924
SH
1109 if (!ifname)
1110 return -EINVAL;
1111
32571606 1112 index = if_nametoindex(ifname);
49428bf3
DY
1113 if (!index)
1114 return -EINVAL;
32571606 1115
ebc73a67
CB
1116 physname = is_wlan(ifname);
1117 if (physname)
e5848d39
SH
1118 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1119
8d357196 1120 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1121}
1122
b84f58b9 1123int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1124{
b84f58b9 1125 int err;
ebc73a67
CB
1126 struct ifinfomsg *ifi;
1127 struct nl_handler nlh;
1128 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1129
3cfc0f3a
MN
1130 err = netlink_open(&nlh, NETLINK_ROUTE);
1131 if (err)
1132 return err;
0ad19a3f 1133
3cfc0f3a 1134 err = -ENOMEM;
0ad19a3f 1135 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1136 if (!nlmsg)
1137 goto out;
1138
06f976ca 1139 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1140 if (!answer)
1141 goto out;
1142
ebc73a67 1143 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1144 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1145
1146 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1147 if (!ifi)
1148 goto out;
06f976ca
SZ
1149 ifi->ifi_family = AF_UNSPEC;
1150 ifi->ifi_index = ifindex;
0ad19a3f 1151
3cfc0f3a 1152 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1153out:
1154 netlink_close(&nlh);
1155 nlmsg_free(answer);
1156 nlmsg_free(nlmsg);
1157 return err;
1158}
1159
b84f58b9
DL
1160int lxc_netdev_delete_by_name(const char *name)
1161{
1162 int index;
1163
1164 index = if_nametoindex(name);
1165 if (!index)
1166 return -EINVAL;
1167
1168 return lxc_netdev_delete_by_index(index);
1169}
1170
1171int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1172{
ebc73a67 1173 int err, len;
06f976ca 1174 struct ifinfomsg *ifi;
ebc73a67
CB
1175 struct nl_handler nlh;
1176 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1177
3cfc0f3a
MN
1178 err = netlink_open(&nlh, NETLINK_ROUTE);
1179 if (err)
1180 return err;
b9a5bb58 1181
b84f58b9 1182 len = strlen(newname);
90d79629
CB
1183 if (len == 1 || len >= IFNAMSIZ) {
1184 err = -EINVAL;
b84f58b9 1185 goto out;
90d79629 1186 }
b84f58b9 1187
3cfc0f3a 1188 err = -ENOMEM;
b9a5bb58
DL
1189 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1190 if (!nlmsg)
1191 goto out;
1192
06f976ca 1193 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1194 if (!answer)
1195 goto out;
1196
ebc73a67 1197 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1198 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1199
1200 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1201 if (!ifi)
1202 goto out;
06f976ca
SZ
1203 ifi->ifi_family = AF_UNSPEC;
1204 ifi->ifi_index = ifindex;
b84f58b9
DL
1205
1206 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1207 goto out;
b9a5bb58 1208
3cfc0f3a 1209 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1210out:
1211 netlink_close(&nlh);
1212 nlmsg_free(answer);
1213 nlmsg_free(nlmsg);
1214 return err;
1215}
1216
b84f58b9
DL
1217int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1218{
1219 int len, index;
1220
1221 len = strlen(oldname);
dae3fdf6 1222 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1223 return -EINVAL;
1224
1225 index = if_nametoindex(oldname);
1226 if (!index)
1227 return -EINVAL;
1228
1229 return lxc_netdev_rename_by_index(index, newname);
1230}
1231
8befa924 1232int netdev_set_flag(const char *name, int flag)
0ad19a3f 1233{
ebc73a67 1234 int err, index, len;
06f976ca 1235 struct ifinfomsg *ifi;
ebc73a67
CB
1236 struct nl_handler nlh;
1237 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1238
3cfc0f3a
MN
1239 err = netlink_open(&nlh, NETLINK_ROUTE);
1240 if (err)
1241 return err;
0ad19a3f 1242
3cfc0f3a 1243 err = -EINVAL;
0ad19a3f 1244 len = strlen(name);
dae3fdf6 1245 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1246 goto out;
1247
3cfc0f3a 1248 err = -ENOMEM;
0ad19a3f 1249 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1250 if (!nlmsg)
1251 goto out;
1252
06f976ca 1253 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1254 if (!answer)
1255 goto out;
1256
3cfc0f3a 1257 err = -EINVAL;
0ad19a3f 1258 index = if_nametoindex(name);
1259 if (!index)
1260 goto out;
1261
ebc73a67 1262 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1263 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1264
1265 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1266 if (!ifi) {
1267 err = -ENOMEM;
1268 goto out;
1269 }
06f976ca
SZ
1270 ifi->ifi_family = AF_UNSPEC;
1271 ifi->ifi_index = index;
1272 ifi->ifi_change |= IFF_UP;
1273 ifi->ifi_flags |= flag;
0ad19a3f 1274
1275 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1276out:
1277 netlink_close(&nlh);
1278 nlmsg_free(nlmsg);
1279 nlmsg_free(answer);
1280 return err;
1281}
1282
ebc73a67 1283int netdev_get_flag(const char *name, int *flag)
efa1cf45 1284{
ebc73a67 1285 int err, index, len;
a4318300 1286 struct ifinfomsg *ifi;
ebc73a67
CB
1287 struct nl_handler nlh;
1288 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1289
1290 if (!name)
1291 return -EINVAL;
1292
1293 err = netlink_open(&nlh, NETLINK_ROUTE);
1294 if (err)
1295 return err;
1296
1297 err = -EINVAL;
1298 len = strlen(name);
1299 if (len == 1 || len >= IFNAMSIZ)
1300 goto out;
1301
1302 err = -ENOMEM;
1303 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1304 if (!nlmsg)
1305 goto out;
1306
06f976ca 1307 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1308 if (!answer)
1309 goto out;
1310
1311 err = -EINVAL;
1312 index = if_nametoindex(name);
1313 if (!index)
1314 goto out;
1315
06f976ca
SZ
1316 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1317 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1318
1319 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1320 if (!ifi) {
1321 err = -ENOMEM;
1322 goto out;
1323 }
06f976ca
SZ
1324 ifi->ifi_family = AF_UNSPEC;
1325 ifi->ifi_index = index;
efa1cf45
DY
1326
1327 err = netlink_transaction(&nlh, nlmsg, answer);
1328 if (err)
1329 goto out;
1330
06f976ca 1331 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1332
1333 *flag = ifi->ifi_flags;
1334out:
1335 netlink_close(&nlh);
1336 nlmsg_free(nlmsg);
1337 nlmsg_free(answer);
1338 return err;
1339}
1340
1341/*
1342 * \brief Check a interface is up or not.
1343 *
1344 * \param name: name for the interface.
1345 *
1346 * \return int.
1347 * 0 means interface is down.
1348 * 1 means interface is up.
1349 * Others means error happened, and ret-value is the error number.
1350 */
ebc73a67 1351int lxc_netdev_isup(const char *name)
efa1cf45 1352{
ebc73a67 1353 int err, flag;
efa1cf45
DY
1354
1355 err = netdev_get_flag(name, &flag);
1356 if (err)
ebc73a67
CB
1357 return err;
1358
efa1cf45
DY
1359 if (flag & IFF_UP)
1360 return 1;
ebc73a67 1361
efa1cf45 1362 return 0;
efa1cf45
DY
1363}
1364
0130df54
SH
1365int netdev_get_mtu(int ifindex)
1366{
ebc73a67 1367 int answer_len, err, res;
0130df54 1368 struct nl_handler nlh;
06f976ca 1369 struct ifinfomsg *ifi;
0130df54 1370 struct nlmsghdr *msg;
ebc73a67
CB
1371 int readmore = 0, recv_len = 0;
1372 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1373
1374 err = netlink_open(&nlh, NETLINK_ROUTE);
1375 if (err)
1376 return err;
1377
1378 err = -ENOMEM;
1379 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1380 if (!nlmsg)
1381 goto out;
1382
06f976ca 1383 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1384 if (!answer)
1385 goto out;
1386
1387 /* Save the answer buffer length, since it will be overwritten
1388 * on the first receive (and we might need to receive more than
ebc73a67
CB
1389 * once.
1390 */
06f976ca
SZ
1391 answer_len = answer->nlmsghdr->nlmsg_len;
1392
ebc73a67 1393 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1394 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1395
06f976ca 1396 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1397 if (!ifi)
1398 goto out;
06f976ca 1399 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1400
1401 /* Send the request for addresses, which returns all addresses
1402 * on all interfaces. */
1403 err = netlink_send(&nlh, nlmsg);
1404 if (err < 0)
1405 goto out;
1406
6ce39620
CB
1407#pragma GCC diagnostic push
1408#pragma GCC diagnostic ignored "-Wcast-align"
1409
0130df54
SH
1410 do {
1411 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1412 * overwritten by a previous receive.
1413 */
06f976ca 1414 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1415
1416 /* Get the (next) batch of reply messages */
1417 err = netlink_rcv(&nlh, answer);
1418 if (err < 0)
1419 goto out;
1420
1421 recv_len = err;
0130df54
SH
1422
1423 /* Satisfy the typing for the netlink macros */
06f976ca 1424 msg = answer->nlmsghdr;
0130df54
SH
1425
1426 while (NLMSG_OK(msg, recv_len)) {
1427
1428 /* Stop reading if we see an error message */
1429 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1430 struct nlmsgerr *errmsg =
1431 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1432 err = errmsg->error;
1433 goto out;
1434 }
1435
1436 /* Stop reading if we see a NLMSG_DONE message */
1437 if (msg->nlmsg_type == NLMSG_DONE) {
1438 readmore = 0;
1439 break;
1440 }
1441
06f976ca 1442 ifi = NLMSG_DATA(msg);
0130df54
SH
1443 if (ifi->ifi_index == ifindex) {
1444 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1445 int attr_len =
1446 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1447 res = 0;
ebc73a67
CB
1448 while (RTA_OK(rta, attr_len)) {
1449 /* Found a local address for the
1450 * requested interface, return it.
1451 */
0130df54 1452 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1453 memcpy(&res, RTA_DATA(rta),
1454 sizeof(int));
0130df54
SH
1455 err = res;
1456 goto out;
1457 }
1458 rta = RTA_NEXT(rta, attr_len);
1459 }
0130df54
SH
1460 }
1461
ebc73a67
CB
1462 /* Keep reading more data from the socket if the last
1463 * message had the NLF_F_MULTI flag set.
1464 */
0130df54
SH
1465 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1466
ebc73a67 1467 /* Look at the next message received in this buffer. */
0130df54
SH
1468 msg = NLMSG_NEXT(msg, recv_len);
1469 }
1470 } while (readmore);
1471
6ce39620
CB
1472#pragma GCC diagnostic pop
1473
ebc73a67 1474 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1475 err = -1;
1476
1477out:
1478 netlink_close(&nlh);
1479 nlmsg_free(answer);
1480 nlmsg_free(nlmsg);
1481 return err;
1482}
1483
d472214b 1484int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1485{
ebc73a67 1486 int err, index, len;
06f976ca 1487 struct ifinfomsg *ifi;
ebc73a67
CB
1488 struct nl_handler nlh;
1489 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1490
3cfc0f3a
MN
1491 err = netlink_open(&nlh, NETLINK_ROUTE);
1492 if (err)
1493 return err;
75d09f83 1494
3cfc0f3a 1495 err = -EINVAL;
75d09f83 1496 len = strlen(name);
dae3fdf6 1497 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1498 goto out;
1499
3cfc0f3a 1500 err = -ENOMEM;
75d09f83
DL
1501 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1502 if (!nlmsg)
1503 goto out;
1504
06f976ca 1505 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1506 if (!answer)
1507 goto out;
1508
3cfc0f3a 1509 err = -EINVAL;
75d09f83
DL
1510 index = if_nametoindex(name);
1511 if (!index)
1512 goto out;
1513
ebc73a67 1514 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1515 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1516
1517 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1518 if (!ifi) {
1519 err = -ENOMEM;
1520 goto out;
1521 }
06f976ca
SZ
1522 ifi->ifi_family = AF_UNSPEC;
1523 ifi->ifi_index = index;
75d09f83
DL
1524
1525 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1526 goto out;
1527
1528 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1529out:
1530 netlink_close(&nlh);
1531 nlmsg_free(nlmsg);
1532 nlmsg_free(answer);
1533 return err;
1534}
1535
d472214b 1536int lxc_netdev_up(const char *name)
0ad19a3f 1537{
d472214b 1538 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1539}
1540
d472214b 1541int lxc_netdev_down(const char *name)
0ad19a3f 1542{
d472214b 1543 return netdev_set_flag(name, 0);
0ad19a3f 1544}
1545
497353b6 1546int lxc_veth_create(const char *name1, const char *name2)
0ad19a3f 1547{
ebc73a67 1548 int err, len;
06f976ca 1549 struct ifinfomsg *ifi;
ebc73a67 1550 struct nl_handler nlh;
0ad19a3f 1551 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1552 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1553
3cfc0f3a
MN
1554 err = netlink_open(&nlh, NETLINK_ROUTE);
1555 if (err)
1556 return err;
0ad19a3f 1557
3cfc0f3a 1558 err = -EINVAL;
0ad19a3f 1559 len = strlen(name1);
dae3fdf6 1560 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1561 goto out;
1562
1563 len = strlen(name2);
dae3fdf6 1564 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1565 goto out;
1566
3cfc0f3a 1567 err = -ENOMEM;
0ad19a3f 1568 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1569 if (!nlmsg)
1570 goto out;
1571
06f976ca 1572 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1573 if (!answer)
1574 goto out;
1575
06f976ca 1576 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1577 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1578 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1579
1580 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1581 if (!ifi)
1582 goto out;
06f976ca 1583 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1584
3cfc0f3a 1585 err = -EINVAL;
79e68309 1586 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1587 if (!nest1)
1588 goto out;
1589
1590 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1591 goto out;
1592
1593 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1594 if (!nest2)
1595 goto out;
1596
1597 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1598 if (!nest3)
1599 goto out;
1600
06f976ca 1601 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1602 if (!ifi) {
1603 err = -ENOMEM;
06f976ca 1604 goto out;
25a9939b 1605 }
0ad19a3f 1606
1607 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1608 goto out;
1609
1610 nla_end_nested(nlmsg, nest3);
0ad19a3f 1611 nla_end_nested(nlmsg, nest2);
0ad19a3f 1612 nla_end_nested(nlmsg, nest1);
1613
1614 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1615 goto out;
1616
3cfc0f3a 1617 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1618out:
1619 netlink_close(&nlh);
1620 nlmsg_free(answer);
1621 nlmsg_free(nlmsg);
1622 return err;
1623}
1624
ebc73a67 1625/* TODO: merge with lxc_macvlan_create */
7c11d57a 1626int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1627{
ebc73a67 1628 int err, len, lindex;
06f976ca 1629 struct ifinfomsg *ifi;
ebc73a67 1630 struct nl_handler nlh;
26c39028 1631 struct rtattr *nest, *nest2;
ebc73a67 1632 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1633
3cfc0f3a
MN
1634 err = netlink_open(&nlh, NETLINK_ROUTE);
1635 if (err)
1636 return err;
26c39028 1637
3cfc0f3a 1638 err = -EINVAL;
26c39028 1639 len = strlen(master);
dae3fdf6 1640 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1641 goto err3;
1642
1643 len = strlen(name);
dae3fdf6 1644 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1645 goto err3;
1646
3cfc0f3a 1647 err = -ENOMEM;
26c39028
JHS
1648 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1649 if (!nlmsg)
1650 goto err3;
1651
06f976ca 1652 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1653 if (!answer)
1654 goto err2;
1655
3cfc0f3a 1656 err = -EINVAL;
26c39028
JHS
1657 lindex = if_nametoindex(master);
1658 if (!lindex)
1659 goto err1;
1660
06f976ca 1661 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1662 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1663 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1664
1665 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1666 if (!ifi) {
1667 err = -ENOMEM;
1668 goto err1;
1669 }
06f976ca 1670 ifi->ifi_family = AF_UNSPEC;
26c39028 1671
79e68309 1672 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1673 if (!nest)
1674 goto err1;
1675
1676 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1677 goto err1;
1678
1679 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1680 if (!nest2)
1681 goto err1;
e892973e 1682
26c39028
JHS
1683 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1684 goto err1;
e892973e 1685
26c39028 1686 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1687 nla_end_nested(nlmsg, nest);
1688
1689 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1690 goto err1;
1691
1692 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1693 goto err1;
1694
3cfc0f3a 1695 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1696err1:
1697 nlmsg_free(answer);
1698err2:
1699 nlmsg_free(nlmsg);
1700err3:
1701 netlink_close(&nlh);
1702 return err;
1703}
1704
e892973e 1705int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1706{
ebc73a67 1707 int err, index, len;
06f976ca 1708 struct ifinfomsg *ifi;
ebc73a67 1709 struct nl_handler nlh;
e892973e 1710 struct rtattr *nest, *nest2;
ebc73a67 1711 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1712
3cfc0f3a
MN
1713 err = netlink_open(&nlh, NETLINK_ROUTE);
1714 if (err)
1715 return err;
0ad19a3f 1716
3cfc0f3a 1717 err = -EINVAL;
0ad19a3f 1718 len = strlen(master);
dae3fdf6 1719 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1720 goto out;
1721
1722 len = strlen(name);
dae3fdf6 1723 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1724 goto out;
1725
3cfc0f3a 1726 err = -ENOMEM;
0ad19a3f 1727 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1728 if (!nlmsg)
1729 goto out;
1730
06f976ca 1731 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1732 if (!answer)
1733 goto out;
1734
3cfc0f3a 1735 err = -EINVAL;
0ad19a3f 1736 index = if_nametoindex(master);
1737 if (!index)
1738 goto out;
1739
06f976ca 1740 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1741 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1742 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1743
1744 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1745 if (!ifi) {
1746 err = -ENOMEM;
1747 goto out;
1748 }
06f976ca 1749 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1750
79e68309 1751 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1752 if (!nest)
1753 goto out;
1754
1755 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1756 goto out;
1757
e892973e
DL
1758 if (mode) {
1759 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1760 if (!nest2)
1761 goto out;
1762
1763 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1764 goto out;
1765
1766 nla_end_nested(nlmsg, nest2);
1767 }
1768
0ad19a3f 1769 nla_end_nested(nlmsg, nest);
1770
1771 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1772 goto out;
1773
1774 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1775 goto out;
1776
3cfc0f3a 1777 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1778out:
1779 netlink_close(&nlh);
1780 nlmsg_free(answer);
1781 nlmsg_free(nlmsg);
1782 return err;
1783}
1784
1785static int proc_sys_net_write(const char *path, const char *value)
1786{
ebc73a67
CB
1787 int fd;
1788 int err = 0;
0ad19a3f 1789
1790 fd = open(path, O_WRONLY);
1791 if (fd < 0)
1792 return -errno;
1793
f640cf46 1794 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1795 err = -errno;
1796
1797 close(fd);
1798 return err;
1799}
1800
6509154d 1801static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1802{
1803 int ret;
1804 char path[PATH_MAX];
1805 char buf[1] = "";
1806
1807 if (family != AF_INET && family != AF_INET6)
1808 return minus_one_set_errno(EINVAL);
1809
1810 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1811 family == AF_INET ? "ipv4" : "ipv6", ifname,
1812 "forwarding");
1813 if (ret < 0 || (size_t)ret >= PATH_MAX)
1814 return minus_one_set_errno(E2BIG);
1815
1816 return lxc_read_file_expect(path, buf, 1, "1");
1817}
1818
0ad19a3f 1819static int neigh_proxy_set(const char *ifname, int family, int flag)
1820{
9ba8130c 1821 int ret;
419590da 1822 char path[PATH_MAX];
0ad19a3f 1823
1824 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1825 return -EINVAL;
0ad19a3f 1826
419590da 1827 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1828 family == AF_INET ? "ipv4" : "ipv6", ifname,
1829 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 1830 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 1831 return -E2BIG;
0ad19a3f 1832
ebc73a67 1833 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1834}
1835
6509154d 1836static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1837{
1838 int ret;
1839 char path[PATH_MAX];
1840 char buf[1] = "";
1841
1842 if (family != AF_INET && family != AF_INET6)
1843 return minus_one_set_errno(EINVAL);
1844
1845 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1846 family == AF_INET ? "ipv4" : "ipv6", ifname,
1847 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1848 if (ret < 0 || (size_t)ret >= PATH_MAX)
1849 return minus_one_set_errno(E2BIG);
1850
1851 return lxc_read_file_expect(path, buf, 1, "1");
1852}
1853
497353b6 1854int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1855{
1856 return neigh_proxy_set(name, family, 1);
1857}
1858
497353b6 1859int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1860{
1861 return neigh_proxy_set(name, family, 0);
1862}
1863
1864int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1865{
1f1b18e7
DL
1866 int i = 0;
1867 unsigned val;
ebc73a67
CB
1868 char c;
1869 unsigned char *data;
1f1b18e7
DL
1870
1871 sockaddr->sa_family = ARPHRD_ETHER;
1872 data = (unsigned char *)sockaddr->sa_data;
1873
1874 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1875 c = *macaddr++;
1876 if (isdigit(c))
1877 val = c - '0';
1878 else if (c >= 'a' && c <= 'f')
1879 val = c - 'a' + 10;
1880 else if (c >= 'A' && c <= 'F')
1881 val = c - 'A' + 10;
1882 else
1883 return -EINVAL;
1884
1885 val <<= 4;
1886 c = *macaddr;
1887 if (isdigit(c))
1888 val |= c - '0';
1889 else if (c >= 'a' && c <= 'f')
1890 val |= c - 'a' + 10;
1891 else if (c >= 'A' && c <= 'F')
1892 val |= c - 'A' + 10;
1893 else if (c == ':' || c == 0)
1894 val >>= 4;
1895 else
1896 return -EINVAL;
1897 if (c != 0)
1898 macaddr++;
1899 *data++ = (unsigned char)(val & 0377);
1900 i++;
1901
1902 if (*macaddr == ':')
1903 macaddr++;
0ad19a3f 1904 }
0ad19a3f 1905
1f1b18e7 1906 return 0;
0ad19a3f 1907}
1908
ebc73a67
CB
1909static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1910 void *acast, int prefix)
0ad19a3f 1911{
ebc73a67 1912 int addrlen, err;
06f976ca 1913 struct ifaddrmsg *ifa;
ebc73a67
CB
1914 struct nl_handler nlh;
1915 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1916
ebc73a67
CB
1917 addrlen = family == AF_INET ? sizeof(struct in_addr)
1918 : sizeof(struct in6_addr);
4bf1968d 1919
3cfc0f3a
MN
1920 err = netlink_open(&nlh, NETLINK_ROUTE);
1921 if (err)
1922 return err;
0ad19a3f 1923
3cfc0f3a 1924 err = -ENOMEM;
0ad19a3f 1925 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1926 if (!nlmsg)
1927 goto out;
1928
06f976ca 1929 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1930 if (!answer)
1931 goto out;
1932
06f976ca 1933 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1934 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
1935 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1936
1937 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 1938 if (!ifa)
25a9939b 1939 goto out;
06f976ca
SZ
1940 ifa->ifa_prefixlen = prefix;
1941 ifa->ifa_index = ifindex;
1942 ifa->ifa_family = family;
1943 ifa->ifa_scope = 0;
acf47e1b 1944
3cfc0f3a 1945 err = -EINVAL;
4bf1968d 1946 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 1947 goto out;
1948
4bf1968d 1949 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 1950 goto out;
1951
d8948a52 1952 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
1953 goto out;
1954
ebc73a67 1955 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 1956 err = -EPROTONOSUPPORT;
79881dc6
DL
1957 if (family == AF_INET6 &&
1958 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1959 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 1960 goto out;
0ad19a3f 1961
3cfc0f3a 1962 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1963out:
1964 netlink_close(&nlh);
1965 nlmsg_free(answer);
1966 nlmsg_free(nlmsg);
1967 return err;
1968}
1969
1f1b18e7 1970int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
1971 struct in6_addr *mcast, struct in6_addr *acast,
1972 int prefix)
1f1b18e7
DL
1973{
1974 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1975}
1976
ebc73a67
CB
1977int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1978 int prefix)
1f1b18e7
DL
1979{
1980 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1981}
1982
ebc73a67
CB
1983/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1984 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1985 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 1986 */
6ce39620
CB
1987#pragma GCC diagnostic push
1988#pragma GCC diagnostic ignored "-Wcast-align"
1989
ebc73a67
CB
1990static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1991{
1992 int addrlen;
06f976ca
SZ
1993 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1994 struct rtattr *rta = IFA_RTA(ifa);
1995 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 1996
06f976ca 1997 if (ifa->ifa_family != family)
19a26f82
MK
1998 return 0;
1999
ebc73a67
CB
2000 addrlen = family == AF_INET ? sizeof(struct in_addr)
2001 : sizeof(struct in6_addr);
19a26f82
MK
2002
2003 /* Loop over the rtattr's in this message */
ebc73a67 2004 while (RTA_OK(rta, attr_len)) {
19a26f82 2005 /* Found a local address for the requested interface,
ebc73a67
CB
2006 * return it.
2007 */
2008 if (rta->rta_type == IFA_LOCAL ||
2009 rta->rta_type == IFA_ADDRESS) {
2010 /* Sanity check. The family check above should make sure
2011 * the address length is correct, but check here just in
2012 * case.
2013 */
19a26f82
MK
2014 if (RTA_PAYLOAD(rta) != addrlen)
2015 return -1;
2016
ebc73a67
CB
2017 /* We might have found an IFA_ADDRESS before, which we
2018 * now overwrite with an IFA_LOCAL.
2019 */
dd66e5ad 2020 if (!*res) {
19a26f82 2021 *res = malloc(addrlen);
dd66e5ad
DE
2022 if (!*res)
2023 return -1;
2024 }
19a26f82
MK
2025
2026 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2027 if (rta->rta_type == IFA_LOCAL)
2028 break;
2029 }
2030 rta = RTA_NEXT(rta, attr_len);
2031 }
2032 return 0;
2033}
2034
6ce39620
CB
2035#pragma GCC diagnostic pop
2036
19a26f82
MK
2037static int ip_addr_get(int family, int ifindex, void **res)
2038{
ebc73a67 2039 int answer_len, err;
06f976ca 2040 struct ifaddrmsg *ifa;
ebc73a67 2041 struct nl_handler nlh;
19a26f82 2042 struct nlmsghdr *msg;
ebc73a67
CB
2043 int readmore = 0, recv_len = 0;
2044 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2045
2046 err = netlink_open(&nlh, NETLINK_ROUTE);
2047 if (err)
2048 return err;
2049
2050 err = -ENOMEM;
2051 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2052 if (!nlmsg)
2053 goto out;
2054
06f976ca 2055 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2056 if (!answer)
2057 goto out;
2058
ebc73a67
CB
2059 /* Save the answer buffer length, since it will be overwritten on the
2060 * first receive (and we might need to receive more than once).
2061 */
06f976ca
SZ
2062 answer_len = answer->nlmsghdr->nlmsg_len;
2063
ebc73a67 2064 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2065 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2066
06f976ca 2067 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2068 if (!ifa)
2069 goto out;
06f976ca 2070 ifa->ifa_family = family;
19a26f82 2071
ebc73a67
CB
2072 /* Send the request for addresses, which returns all addresses on all
2073 * interfaces.
2074 */
19a26f82
MK
2075 err = netlink_send(&nlh, nlmsg);
2076 if (err < 0)
2077 goto out;
19a26f82 2078
6ce39620
CB
2079#pragma GCC diagnostic push
2080#pragma GCC diagnostic ignored "-Wcast-align"
2081
19a26f82
MK
2082 do {
2083 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2084 * overwritten by a previous receive.
2085 */
06f976ca 2086 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2087
ebc73a67 2088 /* Get the (next) batch of reply messages. */
19a26f82
MK
2089 err = netlink_rcv(&nlh, answer);
2090 if (err < 0)
2091 goto out;
2092
2093 recv_len = err;
2094 err = 0;
2095
ebc73a67 2096 /* Satisfy the typing for the netlink macros. */
06f976ca 2097 msg = answer->nlmsghdr;
19a26f82
MK
2098
2099 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2100 /* Stop reading if we see an error message. */
19a26f82 2101 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2102 struct nlmsgerr *errmsg =
2103 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2104 err = errmsg->error;
2105 goto out;
2106 }
2107
ebc73a67 2108 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2109 if (msg->nlmsg_type == NLMSG_DONE) {
2110 readmore = 0;
2111 break;
2112 }
2113
2114 if (msg->nlmsg_type != RTM_NEWADDR) {
2115 err = -1;
2116 goto out;
2117 }
2118
06f976ca
SZ
2119 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2120 if (ifa->ifa_index == ifindex) {
2121 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2122 err = -1;
2123 goto out;
2124 }
2125
ebc73a67 2126 /* Found a result, stop searching. */
19a26f82
MK
2127 if (*res)
2128 goto out;
2129 }
2130
ebc73a67
CB
2131 /* Keep reading more data from the socket if the last
2132 * message had the NLF_F_MULTI flag set.
2133 */
19a26f82
MK
2134 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2135
ebc73a67 2136 /* Look at the next message received in this buffer. */
19a26f82
MK
2137 msg = NLMSG_NEXT(msg, recv_len);
2138 }
2139 } while (readmore);
2140
6ce39620
CB
2141#pragma GCC diagnostic pop
2142
19a26f82 2143 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2144 * error.
2145 */
19a26f82
MK
2146 err = -1;
2147
2148out:
2149 netlink_close(&nlh);
2150 nlmsg_free(answer);
2151 nlmsg_free(nlmsg);
2152 return err;
2153}
2154
2155int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2156{
ebc73a67 2157 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2158}
2159
ebc73a67 2160int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2161{
ebc73a67 2162 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2163}
2164
f8fee0e2
MK
2165static int ip_gateway_add(int family, int ifindex, void *gw)
2166{
ebc73a67 2167 int addrlen, err;
f8fee0e2 2168 struct nl_handler nlh;
06f976ca 2169 struct rtmsg *rt;
ebc73a67 2170 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2171
ebc73a67
CB
2172 addrlen = family == AF_INET ? sizeof(struct in_addr)
2173 : sizeof(struct in6_addr);
f8fee0e2
MK
2174
2175 err = netlink_open(&nlh, NETLINK_ROUTE);
2176 if (err)
2177 return err;
2178
2179 err = -ENOMEM;
2180 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2181 if (!nlmsg)
2182 goto out;
2183
06f976ca 2184 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2185 if (!answer)
2186 goto out;
2187
06f976ca 2188 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2189 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2190 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2191
2192 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2193 if (!rt)
2194 goto out;
06f976ca
SZ
2195 rt->rtm_family = family;
2196 rt->rtm_table = RT_TABLE_MAIN;
2197 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2198 rt->rtm_protocol = RTPROT_BOOT;
2199 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2200 /* "default" destination */
06f976ca 2201 rt->rtm_dst_len = 0;
f8fee0e2
MK
2202
2203 err = -EINVAL;
a2f9a670 2204
2205 /* If gateway address not supplied, then a device route will be created instead */
2206 if (gw != NULL) {
2207 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2208 goto out;
2209 }
f8fee0e2
MK
2210
2211 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2212 * addresses for the gateway.
2213 */
f8fee0e2
MK
2214 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2215 goto out;
2216
2217 err = netlink_transaction(&nlh, nlmsg, answer);
2218out:
2219 netlink_close(&nlh);
2220 nlmsg_free(answer);
2221 nlmsg_free(nlmsg);
2222 return err;
2223}
2224
2225int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2226{
2227 return ip_gateway_add(AF_INET, ifindex, gw);
2228}
2229
2230int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2231{
2232 return ip_gateway_add(AF_INET6, ifindex, gw);
2233}
581c75e7 2234bool is_ovs_bridge(const char *bridge)
0d204771 2235{
ebc73a67 2236 int ret;
0d204771 2237 struct stat sb;
ebc73a67 2238 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2239
ebc73a67
CB
2240 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2241 bridge);
2242 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2243 return false;
2244
2245 ret = stat(brdirname, &sb);
2246 if (ret < 0 && errno == ENOENT)
0d204771 2247 return true;
ebc73a67 2248
0d204771
SH
2249 return false;
2250}
2251
581c75e7
CB
2252struct ovs_veth_args {
2253 const char *bridge;
2254 const char *nic;
2255};
2256
cb0dc11b
CB
2257/* Called from a background thread - when nic goes away, remove it from the
2258 * bridge.
c43cbc04 2259 */
581c75e7 2260static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2261{
581c75e7 2262 struct ovs_veth_args *args = data;
cb0dc11b 2263
581c75e7
CB
2264 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2265 (char *)NULL);
2266 return -1;
c43cbc04
SH
2267}
2268
581c75e7 2269int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2270{
c43cbc04 2271 int ret;
419590da 2272 char cmd_output[PATH_MAX];
581c75e7 2273 struct ovs_veth_args args;
6ad22d06 2274
581c75e7
CB
2275 args.bridge = bridge;
2276 args.nic = nic;
2277 ret = run_command(cmd_output, sizeof(cmd_output),
2278 lxc_ovs_delete_port_exec, (void *)&args);
2279 if (ret < 0) {
2280 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2281 "%s", bridge, nic, cmd_output);
6ad22d06 2282 return -1;
581c75e7 2283 }
0d204771 2284
581c75e7
CB
2285 return 0;
2286}
ebc73a67 2287
581c75e7
CB
2288static int lxc_ovs_attach_bridge_exec(void *data)
2289{
2290 struct ovs_veth_args *args = data;
ebc73a67 2291
581c75e7
CB
2292 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2293 (char *)NULL);
2294 return -1;
2295}
ebc73a67 2296
581c75e7
CB
2297static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2298{
2299 int ret;
419590da 2300 char cmd_output[PATH_MAX];
581c75e7 2301 struct ovs_veth_args args;
ebc73a67 2302
581c75e7
CB
2303 args.bridge = bridge;
2304 args.nic = nic;
2305 ret = run_command(cmd_output, sizeof(cmd_output),
2306 lxc_ovs_attach_bridge_exec, (void *)&args);
2307 if (ret < 0) {
2308 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2309 bridge, nic, cmd_output);
2310 return -1;
c43cbc04 2311 }
0d204771 2312
581c75e7 2313 return 0;
0d204771 2314}
0d204771 2315
581c75e7 2316int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2317{
ebc73a67 2318 int err, fd, index;
9de31d5a 2319 size_t retlen;
0ad19a3f 2320 struct ifreq ifr;
2321
dae3fdf6 2322 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2323 return -EINVAL;
0ad19a3f 2324
2325 index = if_nametoindex(ifname);
2326 if (!index)
3cfc0f3a 2327 return -EINVAL;
0ad19a3f 2328
0d204771 2329 if (is_ovs_bridge(bridge))
581c75e7 2330 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2331
ad9429e5 2332 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2333 if (fd < 0)
3cfc0f3a 2334 return -errno;
0ad19a3f 2335
9de31d5a 2336 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2337 if (retlen >= IFNAMSIZ) {
2338 close(fd);
9de31d5a 2339 return -E2BIG;
42cc4083 2340 }
9de31d5a 2341
ebc73a67 2342 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2343 ifr.ifr_ifindex = index;
7d163508 2344 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2345 close(fd);
3cfc0f3a
MN
2346 if (err)
2347 err = -errno;
0ad19a3f 2348
2349 return err;
2350}
72d0e1cb 2351
ebc73a67 2352static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2353 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2354 [LXC_NET_VETH] = "veth",
2355 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2356 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2357 [LXC_NET_PHYS] = "phys",
b343592b
BP
2358 [LXC_NET_VLAN] = "vlan",
2359 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2360};
2361
2362const char *lxc_net_type_to_str(int type)
2363{
2364 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2365 return NULL;
ebc73a67 2366
72d0e1cb
SG
2367 return lxc_network_types[type];
2368}
8befa924 2369
ebc73a67 2370static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2371
966e9f1f 2372char *lxc_mkifname(char *template)
a0265685 2373{
2d7bf744 2374 int ret;
b1e44ed1 2375 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2376 char name[IFNAMSIZ];
2377 bool exists = false;
2378 size_t i = 0;
280cc35f 2379#ifdef HAVE_RAND_R
2380 unsigned int seed;
2381
2382 seed = randseed(false);
2383#else
2384
2385 (void)randseed(true);
2386#endif
a0265685 2387
535e8859
CB
2388 if (strlen(template) >= IFNAMSIZ)
2389 return NULL;
2390
ebc73a67 2391 /* Get all the network interfaces. */
b1e44ed1 2392 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2393 if (ret < 0) {
6d1400b5 2394 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2395 return NULL;
2396 }
a0265685 2397
ebc73a67 2398 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2399 for (;;) {
966e9f1f 2400 name[0] = '\0';
94b1cade 2401 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2402
966e9f1f 2403 exists = false;
280cc35f 2404
a0265685
SG
2405 for (i = 0; i < strlen(name); i++) {
2406 if (name[i] == 'X') {
2407#ifdef HAVE_RAND_R
8523344a 2408 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2409#else
8523344a 2410 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2411#endif
2412 }
2413 }
2414
2415 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2416 if (!strcmp(ifa->ifa_name, name)) {
2417 exists = true;
a0265685
SG
2418 break;
2419 }
2420 }
2421
966e9f1f 2422 if (!exists)
a0265685 2423 break;
a0265685
SG
2424 }
2425
b1e44ed1 2426 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2427 (void)strlcpy(template, name, strlen(template) + 1);
2428
2429 return template;
a0265685
SG
2430}
2431
8befa924
SH
2432int setup_private_host_hw_addr(char *veth1)
2433{
ebc73a67 2434 int err, sockfd;
8befa924 2435 struct ifreq ifr;
8befa924 2436
ad9429e5 2437 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2438 if (sockfd < 0)
2439 return -errno;
2440
ebc73a67 2441 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2442 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2443 close(sockfd);
ebc73a67 2444 return -E2BIG;
87c6e5db 2445 }
ebc73a67 2446
8befa924
SH
2447 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2448 if (err < 0) {
8befa924 2449 close(sockfd);
8befa924
SH
2450 return -errno;
2451 }
2452
2453 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2454 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2455 close(sockfd);
8befa924
SH
2456 if (err < 0)
2457 return -errno;
2458
2459 return 0;
2460}
811ef482
CB
2461
2462int lxc_find_gateway_addresses(struct lxc_handler *handler)
2463{
2464 struct lxc_list *network = &handler->conf->network;
2465 struct lxc_list *iterator;
2466 struct lxc_netdev *netdev;
2467 int link_index;
2468
2469 lxc_list_for_each(iterator, network) {
2470 netdev = iterator->elem;
2471
2472 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2473 continue;
2474
2475 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
03ca4af8 2476 ERROR("Automatic gateway detection is only supported for veth and macvlan");
811ef482
CB
2477 return -1;
2478 }
2479
de4855a8 2480 if (netdev->link[0] == '\0') {
811ef482
CB
2481 ERROR("Automatic gateway detection needs a link interface");
2482 return -1;
2483 }
2484
2485 link_index = if_nametoindex(netdev->link);
2486 if (!link_index)
2487 return -EINVAL;
2488
2489 if (netdev->ipv4_gateway_auto) {
2490 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
03ca4af8
TP
2491 ERROR("Failed to automatically find ipv4 gateway address from link interface \"%s\"",
2492 netdev->link);
811ef482
CB
2493 return -1;
2494 }
2495 }
2496
2497 if (netdev->ipv6_gateway_auto) {
2498 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
03ca4af8
TP
2499 ERROR("Failed to automatically find ipv6 gateway address from link interface \"%s\"",
2500 netdev->link);
811ef482
CB
2501 return -1;
2502 }
2503 }
2504 }
2505
2506 return 0;
2507}
2508
2509#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2510static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2511 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2512{
2513 int ret;
2514 pid_t child;
2515 int bytes, pipefd[2];
2516 char *token, *saveptr = NULL;
095ead80 2517 char netdev_link[IFNAMSIZ];
419590da 2518 char buffer[PATH_MAX] = {0};
94b1cade 2519 size_t retlen;
811ef482
CB
2520
2521 if (netdev->type != LXC_NET_VETH) {
2522 ERROR("Network type %d not support for unprivileged use", netdev->type);
2523 return -1;
2524 }
2525
2526 ret = pipe(pipefd);
2527 if (ret < 0) {
2528 SYSERROR("Failed to create pipe");
2529 return -1;
2530 }
2531
2532 child = fork();
2533 if (child < 0) {
2534 SYSERROR("Failed to create new process");
2535 close(pipefd[0]);
2536 close(pipefd[1]);
2537 return -1;
2538 }
2539
2540 if (child == 0) {
8335fd40 2541 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2542
2543 close(pipefd[0]);
2544
2545 ret = dup2(pipefd[1], STDOUT_FILENO);
2546 if (ret >= 0)
2547 ret = dup2(pipefd[1], STDERR_FILENO);
2548 close(pipefd[1]);
2549 if (ret < 0) {
2550 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2551 _exit(EXIT_FAILURE);
811ef482
CB
2552 }
2553
de4855a8 2554 if (netdev->link[0] != '\0')
9de31d5a 2555 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2556 else
9de31d5a
CB
2557 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2558 if (retlen >= IFNAMSIZ) {
2559 SYSERROR("Invalid network device name");
2560 _exit(EXIT_FAILURE);
2561 }
811ef482 2562
8335fd40
CB
2563 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2564 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2565 _exit(EXIT_FAILURE);
8335fd40 2566 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2567
2568 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2569 lxcname, pidstr, netdev_link,
de4855a8
CB
2570 netdev->name[0] != '\0' ? netdev->name : "(null)");
2571 if (netdev->name[0] != '\0')
811ef482
CB
2572 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2573 lxcpath, lxcname, pidstr, "veth", netdev_link,
2574 netdev->name, (char *)NULL);
2575 else
2576 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2577 lxcpath, lxcname, pidstr, "veth", netdev_link,
2578 (char *)NULL);
2579 SYSERROR("Failed to execute lxc-user-nic");
78070056 2580 _exit(EXIT_FAILURE);
811ef482
CB
2581 }
2582
2583 /* close the write-end of the pipe */
2584 close(pipefd[1]);
2585
419590da 2586 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2587 if (bytes < 0) {
74c6e2b0 2588 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2589 close(pipefd[0]);
6b9f82a9
CB
2590 } else {
2591 buffer[bytes - 1] = '\0';
811ef482 2592 }
811ef482
CB
2593
2594 ret = wait_for_pid(child);
2595 close(pipefd[0]);
6b9f82a9 2596 if (ret != 0 || bytes < 0) {
811ef482
CB
2597 ERROR("lxc-user-nic failed to configure requested network: %s",
2598 buffer[0] != '\0' ? buffer : "(null)");
2599 return -1;
2600 }
2601 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2602
2603 /* netdev->name */
2604 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2605 if (!token) {
2606 ERROR("Failed to parse lxc-user-nic output");
811ef482 2607 return -1;
74c6e2b0 2608 }
811ef482 2609
e389f2af
CB
2610 /*
2611 * lxc-user-nic will take care of proper network device naming. So
2612 * netdev->name and netdev->created_name need to be identical to not
2613 * trigger another rename later on.
2614 */
2615 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2616 if (retlen < IFNAMSIZ)
2617 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2618 if (retlen >= IFNAMSIZ) {
2619 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2620 return -E2BIG;
2621 }
811ef482 2622
74c6e2b0 2623 /* netdev->ifindex */
811ef482 2624 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2625 if (!token) {
2626 ERROR("Failed to parse lxc-user-nic output");
811ef482 2627 return -1;
74c6e2b0 2628 }
811ef482 2629
74c6e2b0
CB
2630 ret = lxc_safe_int(token, &netdev->ifindex);
2631 if (ret < 0) {
6d1400b5 2632 errno = -ret;
2633 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2634 return -1;
2635 }
2636
74c6e2b0 2637 /* netdev->priv.veth_attr.veth1 */
811ef482 2638 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2639 if (!token) {
2640 ERROR("Failed to parse lxc-user-nic output");
811ef482 2641 return -1;
74c6e2b0 2642 }
811ef482 2643
94b1cade
DJ
2644 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2645 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2646 ERROR("Host side veth device name returned by lxc-user-nic is "
2647 "too long");
2648 return -E2BIG;
2649 }
74c6e2b0
CB
2650
2651 /* netdev->priv.veth_attr.ifindex */
2652 token = strtok_r(NULL, ":", &saveptr);
2653 if (!token) {
2654 ERROR("Failed to parse lxc-user-nic output");
2655 return -1;
2656 }
2657
2658 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2659 if (ret < 0) {
6d1400b5 2660 errno = -ret;
2661 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2662 return -1;
2663 }
2664
4d781681 2665 if (netdev->upscript) {
2666 char *argv[] = {
2667 "veth",
2668 netdev->link,
2669 netdev->priv.veth_attr.veth1,
2670 NULL,
2671 };
2672
e389f2af
CB
2673 ret = run_script_argv(lxcname, hooks_version, "net",
2674 netdev->upscript, "up", argv);
4d781681 2675 if (ret < 0)
2676 return -1;
2677 }
2678
811ef482
CB
2679 return 0;
2680}
2681
f0ecc19d 2682static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2683 struct lxc_netdev *netdev,
2684 const char *netns_path)
811ef482
CB
2685{
2686 int bytes, ret;
2687 pid_t child;
2688 int pipefd[2];
419590da 2689 char buffer[PATH_MAX] = {0};
811ef482
CB
2690
2691 if (netdev->type != LXC_NET_VETH) {
2692 ERROR("Network type %d not support for unprivileged use", netdev->type);
2693 return -1;
2694 }
2695
2696 ret = pipe(pipefd);
2697 if (ret < 0) {
2698 SYSERROR("Failed to create pipe");
2699 return -1;
2700 }
2701
2702 child = fork();
2703 if (child < 0) {
2704 SYSERROR("Failed to create new process");
2705 close(pipefd[0]);
2706 close(pipefd[1]);
2707 return -1;
2708 }
2709
2710 if (child == 0) {
8843fde4 2711 char *hostveth;
811ef482
CB
2712
2713 close(pipefd[0]);
2714
2715 ret = dup2(pipefd[1], STDOUT_FILENO);
2716 if (ret >= 0)
2717 ret = dup2(pipefd[1], STDERR_FILENO);
2718 close(pipefd[1]);
2719 if (ret < 0) {
2720 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2721 _exit(EXIT_FAILURE);
811ef482
CB
2722 }
2723
8843fde4
CB
2724 if (netdev->priv.veth_attr.pair[0] != '\0')
2725 hostveth = netdev->priv.veth_attr.pair;
2726 else
2727 hostveth = netdev->priv.veth_attr.veth1;
2728 if (hostveth[0] == '\0') {
74c6e2b0 2729 SYSERROR("Host side veth device name is missing");
a30b9023 2730 _exit(EXIT_FAILURE);
74c6e2b0
CB
2731 }
2732
de4855a8 2733 if (netdev->link[0] == '\0') {
811ef482 2734 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2735 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2736 _exit(EXIT_FAILURE);
74c6e2b0 2737 }
811ef482 2738
811ef482 2739 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2740 lxcname, netns_path, netdev->link, hostveth);
811ef482 2741 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2742 lxcname, netns_path, "veth", netdev->link, hostveth,
2743 (char *)NULL);
811ef482 2744 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2745 _exit(EXIT_FAILURE);
811ef482
CB
2746 }
2747
2748 close(pipefd[1]);
2749
419590da 2750 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
2751 if (bytes < 0) {
2752 SYSERROR("Failed to read from pipe file descriptor.");
2753 close(pipefd[0]);
6b9f82a9
CB
2754 } else {
2755 buffer[bytes - 1] = '\0';
811ef482 2756 }
811ef482 2757
6b9f82a9
CB
2758 ret = wait_for_pid(child);
2759 close(pipefd[0]);
2760 if (ret != 0 || bytes < 0) {
811ef482
CB
2761 ERROR("lxc-user-nic failed to delete requested network: %s",
2762 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2763 return -1;
2764 }
2765
811ef482
CB
2766 return 0;
2767}
2768
1bd8d726
CB
2769bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2770{
2771 int ret;
2772 struct lxc_list *iterator;
2773 struct lxc_list *network = &handler->conf->network;
2774 /* strlen("/proc/") = 6
2775 * +
8335fd40 2776 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2777 * +
2778 * strlen("/fd/") = 4
2779 * +
8335fd40 2780 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2781 * +
2782 * \0
2783 */
8335fd40 2784 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2785
2786 *netns_path = '\0';
2787
28d9e29e 2788 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
2789 DEBUG("Cannot not guarantee safe deletion of network devices. "
2790 "Manual cleanup maybe needed");
2791 return false;
2792 }
2793
2794 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2795 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2796 if (ret < 0 || ret >= sizeof(netns_path))
2797 return false;
2798
2799 lxc_list_for_each(iterator, network) {
2800 char *hostveth = NULL;
2801 struct lxc_netdev *netdev = iterator->elem;
2802
2803 /* We can only delete devices whose ifindex we have. If we don't
2804 * have the index it means that we didn't create it.
2805 */
2806 if (!netdev->ifindex)
2807 continue;
2808
2809 if (netdev->type == LXC_NET_PHYS) {
2810 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2811 netdev->link);
2812 if (ret < 0)
2813 WARN("Failed to rename interface with index %d "
2814 "to its initial name \"%s\"",
2815 netdev->ifindex, netdev->link);
2816 else
2817 TRACE("Renamed interface with index %d to its "
2818 "initial name \"%s\"",
2819 netdev->ifindex, netdev->link);
b3259dc6
TP
2820
2821 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2822 goto clear_ifindices;
1bd8d726
CB
2823 }
2824
2825 ret = netdev_deconf[netdev->type](handler, netdev);
2826 if (ret < 0)
2827 WARN("Failed to deconfigure network device");
2828
2829 if (netdev->type != LXC_NET_VETH)
66a7c406 2830 goto clear_ifindices;
1bd8d726 2831
c869be20 2832 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 2833 goto clear_ifindices;
1bd8d726 2834
8843fde4
CB
2835 if (netdev->priv.veth_attr.pair[0] != '\0')
2836 hostveth = netdev->priv.veth_attr.pair;
2837 else
2838 hostveth = netdev->priv.veth_attr.veth1;
2839 if (hostveth[0] == '\0')
66a7c406 2840 goto clear_ifindices;
8843fde4 2841
1bd8d726
CB
2842 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2843 handler->name, netdev,
2844 netns_path);
2845 if (ret < 0) {
1bd8d726 2846 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 2847 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 2848 goto clear_ifindices;
1bd8d726
CB
2849 }
2850 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2851 netdev->link);
66a7c406
CB
2852
2853clear_ifindices:
ad2ddfcd 2854 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
2855 * have cached stale data which would cause it to fail on reboot
2856 * we're we don't re-read the on-disk config file.
2857 */
2858 netdev->ifindex = 0;
2859 if (netdev->type == LXC_NET_PHYS) {
2860 netdev->priv.phys_attr.ifindex = 0;
2861 } else if (netdev->type == LXC_NET_VETH) {
2862 netdev->priv.veth_attr.veth1[0] = '\0';
2863 netdev->priv.veth_attr.ifindex = 0;
2864 }
1bd8d726
CB
2865 }
2866
bb84beda 2867 return true;
1bd8d726
CB
2868}
2869
6509154d 2870struct ip_proxy_args {
2871 const char *ip;
2872 const char *dev;
2873};
2874
2875static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2876{
2877 struct ip_proxy_args *args = data;
2878
2879 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2880 return -1;
2881}
2882
2883static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2884{
2885 struct ip_proxy_args *args = data;
2886
2887 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2888 return -1;
2889}
2890
2891static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2892{
2893 int ret;
2894 char cmd_output[PATH_MAX];
2895 struct ip_proxy_args args = {
2896 .ip = ip,
2897 .dev = dev,
2898 };
2899
2900 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2901 if (ret < 0) {
2902 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2903 return -1;
2904 }
2905
2906 return 0;
2907}
2908
2909static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2910{
2911 int ret;
2912 char cmd_output[PATH_MAX];
2913 struct ip_proxy_args args = {
2914 .ip = ip,
2915 .dev = dev,
2916 };
2917
2918 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2919 if (ret < 0) {
2920 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2921 return -1;
2922 }
2923
2924 return 0;
2925}
2926
2927static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2928 struct lxc_list *cur, *next;
2929 struct lxc_inetdev *inet4dev;
2930 struct lxc_inet6dev *inet6dev;
2931 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2932 int err = 0;
2933 unsigned int lo_ifindex = 0;
6509154d 2934
2935 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2936 if (!lxc_list_empty(&netdev->ipv4)) {
2937 /* Check for net.ipv4.conf.[link].forwarding=1 */
2938 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2939 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2940 return minus_one_set_errno(EINVAL);
2941 }
2942 }
2943
2944 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2945 if (!lxc_list_empty(&netdev->ipv6)) {
2946 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2947 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2948 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2949 return minus_one_set_errno(EINVAL);
2950 }
2951
2952 /* Check for net.ipv6.conf.[link].forwarding=1 */
2953 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2954 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2955 return minus_one_set_errno(EINVAL);
2956 }
2957 }
2958
b670016a 2959 /* Perform IPVLAN specific checks. */
2960 if (netdev->type == LXC_NET_IPVLAN) {
2961 /* Check mode is l3s as other modes do not work with l2proxy. */
2962 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2963 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2964 return minus_one_set_errno(EINVAL);
2965 }
2966
2967 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 2968 lo_ifindex = if_nametoindex(loop_device);
b670016a 2969 if (lo_ifindex == 0) {
3ebffb98 2970 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 2971 return minus_one_set_errno(EINVAL);
2972 }
2973 }
2974
6509154d 2975 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2976 inet4dev = cur->elem;
2977 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2978 return minus_one_set_errno(-errno);
2979
2980 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2981 return minus_one_set_errno(EINVAL);
b670016a 2982
2983 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2984 if (netdev->type == LXC_NET_IPVLAN) {
2985 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2986 if (err < 0) {
3ebffb98 2987 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 2988 return minus_one_set_errno(-err);
2989 }
2990 }
6509154d 2991 }
2992
2993 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2994 inet6dev = cur->elem;
2995 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2996 return minus_one_set_errno(-errno);
2997
2998 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2999 return minus_one_set_errno(EINVAL);
b670016a 3000
3001 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3002 if (netdev->type == LXC_NET_IPVLAN) {
3003 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
3004 if (err < 0) {
3ebffb98 3005 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3006 return minus_one_set_errno(-err);
3007 }
3008 }
6509154d 3009 }
3010
3011 return 0;
3012}
3013
b670016a 3014static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
3015 char bufinet4[INET_ADDRSTRLEN];
3016 unsigned int errCount = 0;
3017
3018 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3019 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3020 return minus_one_set_errno(EINVAL);
3021 }
3022
3023 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3024 if (lo_ifindex > 0) {
3025 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3026 errCount++;
3027 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3028 }
3029 }
3030
3031 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3032 if (link[0] != '\0') {
3033 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3034 errCount++;
3035 }
3036
3037 if (errCount > 0)
3038 return minus_one_set_errno(EINVAL);
3039
3040 return 0;
3041}
3042
3043static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3044 char bufinet6[INET6_ADDRSTRLEN];
3045 unsigned int errCount = 0;
3046
3047 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3048 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3049 return minus_one_set_errno(EINVAL);
3050 }
3051
3052 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3053 if (lo_ifindex > 0) {
3054 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3055 errCount++;
3056 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3057 }
3058 }
3059
3060 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3061 if (link[0] != '\0') {
3062 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3063 errCount++;
3064 }
3065
3066 if (errCount > 0)
3067 return minus_one_set_errno(EINVAL);
3068
3069 return 0;
3070}
3071
6509154d 3072static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3073 unsigned int lo_ifindex = 0;
3074 unsigned int errCount = 0;
6509154d 3075 struct lxc_list *cur, *next;
3076 struct lxc_inetdev *inet4dev;
3077 struct lxc_inet6dev *inet6dev;
6509154d 3078
b670016a 3079 /* Perform IPVLAN specific checks. */
3080 if (netdev->type == LXC_NET_IPVLAN) {
3081 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3082 lo_ifindex = if_nametoindex(loop_device);
b670016a 3083 if (lo_ifindex == 0) {
3084 errCount++;
3ebffb98 3085 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3086 }
b670016a 3087 }
6509154d 3088
b670016a 3089 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3090 inet4dev = cur->elem;
3091 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3092 errCount++;
6509154d 3093 }
3094
3095 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3096 inet6dev = cur->elem;
b670016a 3097 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3098 errCount++;
6509154d 3099 }
3100
b670016a 3101 if (errCount > 0)
6509154d 3102 return minus_one_set_errno(EINVAL);
3103
3104 return 0;
3105}
3106
e389f2af 3107static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3108{
811ef482
CB
3109 struct lxc_list *iterator;
3110 struct lxc_list *network = &handler->conf->network;
3111
811ef482
CB
3112 lxc_list_for_each(iterator, network) {
3113 struct lxc_netdev *netdev = iterator->elem;
3114
3115 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3116 ERROR("Invalid network configuration type %d", netdev->type);
3117 return -1;
3118 }
3119
6509154d 3120 /* Setup l2proxy entries if enabled and used with a link property */
3121 if (netdev->l2proxy && netdev->link[0] != '\0') {
3122 if (lxc_setup_l2proxy(netdev)) {
3123 ERROR("Failed to setup l2proxy");
3124 return -1;
3125 }
3126 }
3127
811ef482
CB
3128 if (netdev_conf[netdev->type](handler, netdev)) {
3129 ERROR("Failed to create network device");
3130 return -1;
3131 }
811ef482
CB
3132 }
3133
3134 return 0;
3135}
3136
e389f2af 3137int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3138{
e389f2af
CB
3139 pid_t pid = handler->pid;
3140 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3141 struct lxc_list *iterator;
3142
e0010464 3143 if (am_guest_unpriv())
74c6e2b0 3144 return 0;
811ef482
CB
3145
3146 lxc_list_for_each(iterator, network) {
e389f2af 3147 int ret;
811ef482
CB
3148 struct lxc_netdev *netdev = iterator->elem;
3149
811ef482
CB
3150 if (!netdev->ifindex)
3151 continue;
3152
24190194 3153 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
535e8859 3154 if (ret) {
6d1400b5 3155 errno = -ret;
24190194
CB
3156 SYSERROR("Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3157 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3158 return -1;
3159 }
3160
24190194
CB
3161 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3162 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3163 }
3164
3165 return 0;
3166}
3167
3c09b97c
CB
3168static int network_requires_advanced_setup(int type)
3169{
3170 if (type == LXC_NET_EMPTY)
3171 return false;
3172
3173 if (type == LXC_NET_NONE)
3174 return false;
3175
3176 return true;
3177}
3178
e389f2af 3179static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3180{
e389f2af
CB
3181 int hooks_version = handler->conf->hooks_version;
3182 const char *lxcname = handler->name;
3183 const char *lxcpath = handler->lxcpath;
3184 struct lxc_list *network = &handler->conf->network;
3185 pid_t pid = handler->pid;
74c6e2b0
CB
3186 struct lxc_list *iterator;
3187
74c6e2b0
CB
3188 lxc_list_for_each(iterator, network) {
3189 struct lxc_netdev *netdev = iterator->elem;
3190
3c09b97c 3191 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3192 continue;
3193
3194 if (netdev->type != LXC_NET_VETH) {
e389f2af 3195 ERROR("Networks of type %s are not supported by unprivileged containers",
74c6e2b0
CB
3196 lxc_net_type_to_str(netdev->type));
3197 return -1;
3198 }
3199
3200 if (netdev->mtu)
3201 INFO("mtu ignored due to insufficient privilege");
3202
e389f2af
CB
3203 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3204 pid, hooks_version))
74c6e2b0
CB
3205 return -1;
3206 }
3207
3208 return 0;
3209}
3210
1bd8d726 3211bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3212{
3213 int ret;
3214 struct lxc_list *iterator;
3215 struct lxc_list *network = &handler->conf->network;
1bd8d726 3216
811ef482
CB
3217 lxc_list_for_each(iterator, network) {
3218 char *hostveth = NULL;
3219 struct lxc_netdev *netdev = iterator->elem;
3220
3221 /* We can only delete devices whose ifindex we have. If we don't
3222 * have the index it means that we didn't create it.
3223 */
3224 if (!netdev->ifindex)
3225 continue;
3226
6509154d 3227 /* Delete l2proxy entries if enabled and used with a link property */
3228 if (netdev->l2proxy && netdev->link[0] != '\0') {
3229 if (lxc_delete_l2proxy(netdev))
3230 WARN("Failed to delete all l2proxy config");
3231 /* Don't return, let the network be cleaned up as normal. */
3232 }
3233
811ef482
CB
3234 if (netdev->type == LXC_NET_PHYS) {
3235 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3236 if (ret < 0)
3237 WARN("Failed to rename interface with index %d "
b809f232
CB
3238 "from \"%s\" to its initial name \"%s\"",
3239 netdev->ifindex, netdev->name, netdev->link);
0b154989 3240 else {
29589196
CB
3241 TRACE("Renamed interface with index %d from "
3242 "\"%s\" to its initial name \"%s\"",
3243 netdev->ifindex, netdev->name,
3244 netdev->link);
0b154989
TP
3245
3246 /* Restore original MTU */
3247 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3248 if (ret < 0) {
3249 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3250 netdev->link, netdev->priv.phys_attr.mtu);
3251 } else {
3252 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3253 netdev->link, netdev->priv.phys_attr.mtu);
3254 }
3255 }
b3259dc6
TP
3256
3257 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3258 goto clear_ifindices;
811ef482
CB
3259 }
3260
3261 ret = netdev_deconf[netdev->type](handler, netdev);
3262 if (ret < 0)
3263 WARN("Failed to deconfigure network device");
3264
3265 /* Recent kernels remove the virtual interfaces when the network
3266 * namespace is destroyed but in case we did not move the
3267 * interface to the network namespace, we have to destroy it.
3268 */
1bd8d726 3269 ret = lxc_netdev_delete_by_index(netdev->ifindex);
78ab281c
CB
3270 if (ret < 0) {
3271 if (errno != ENODEV) {
3272 WARN("Failed to remove interface \"%s\" with index %d",
3273 netdev->name[0] != '\0' ? netdev->name : "(null)",
3274 netdev->ifindex);
3275 goto clear_ifindices;
3276 }
3277 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
24548539
CB
3278 netdev->name[0] != '\0' ? netdev->name : "(null)",
3279 netdev->ifindex);
811ef482 3280 }
1bd8d726 3281 INFO("Removed interface \"%s\" with index %d",
52845118
CB
3282 netdev->name[0] != '\0' ? netdev->name : "(null)",
3283 netdev->ifindex);
811ef482
CB
3284
3285 if (netdev->type != LXC_NET_VETH)
66a7c406 3286 goto clear_ifindices;
811ef482 3287
811ef482
CB
3288 /* Explicitly delete host veth device to prevent lingering
3289 * devices. We had issues in LXD around this.
3290 */
de4855a8 3291 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3292 hostveth = netdev->priv.veth_attr.pair;
3293 else
3294 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3295 if (hostveth[0] == '\0')
66a7c406 3296 goto clear_ifindices;
811ef482
CB
3297
3298 ret = lxc_netdev_delete_by_name(hostveth);
3299 if (ret < 0) {
24548539
CB
3300 WARN("Failed to remove interface \"%s\" from \"%s\"",
3301 hostveth, netdev->link);
66a7c406 3302 goto clear_ifindices;
811ef482
CB
3303 }
3304 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3305
c869be20 3306 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3307 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3308 netdev->ifindex = 0;
3309 netdev->priv.veth_attr.ifindex = 0;
3310 goto clear_ifindices;
811ef482
CB
3311 }
3312
3313 /* Delete the openvswitch port. */
3314 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3315 if (ret < 0)
3316 WARN("Failed to remove port \"%s\" from openvswitch "
3317 "bridge \"%s\"", hostveth, netdev->link);
3318 else
3319 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3320 hostveth, netdev->link);
3321
66a7c406 3322clear_ifindices:
ad2ddfcd 3323 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3324 * have cached stale data which would cause it to fail on reboot
3325 * we're we don't re-read the on-disk config file.
3326 */
3327 netdev->ifindex = 0;
3328 if (netdev->type == LXC_NET_PHYS) {
3329 netdev->priv.phys_attr.ifindex = 0;
3330 } else if (netdev->type == LXC_NET_VETH) {
3331 netdev->priv.veth_attr.veth1[0] = '\0';
3332 netdev->priv.veth_attr.ifindex = 0;
3333 }
811ef482
CB
3334 }
3335
bb84beda 3336 return true;
811ef482
CB
3337}
3338
3339int lxc_requests_empty_network(struct lxc_handler *handler)
3340{
3341 struct lxc_list *network = &handler->conf->network;
3342 struct lxc_list *iterator;
3343 bool found_none = false, found_nic = false;
3344
3345 if (lxc_list_empty(network))
3346 return 0;
3347
3348 lxc_list_for_each(iterator, network) {
3349 struct lxc_netdev *netdev = iterator->elem;
3350
3351 if (netdev->type == LXC_NET_NONE)
3352 found_none = true;
3353 else
3354 found_nic = true;
3355 }
3356 if (found_none && !found_nic)
3357 return 1;
3358 return 0;
3359}
3360
3361/* try to move physical nics to the init netns */
b809f232 3362int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3363{
3364 int ret;
b809f232 3365 int oldfd;
811ef482 3366 char ifname[IFNAMSIZ];
b809f232 3367 struct lxc_list *iterator;
28d9e29e 3368 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3369 struct lxc_conf *conf = handler->conf;
811ef482 3370
b809f232
CB
3371 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3372 * the parent network namespace. We won't have this capability if we are
3373 * unprivileged.
3374 */
d0fbc7ba 3375 if (!handler->am_root)
b809f232 3376 return 0;
811ef482 3377
b809f232 3378 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3379
0037ab49 3380 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3381 if (oldfd < 0) {
3382 SYSERROR("Failed to preserve network namespace");
b809f232 3383 return -1;
811ef482
CB
3384 }
3385
b809f232 3386 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3387 if (ret < 0) {
3388 SYSERROR("Failed to enter network namespace");
3389 close(oldfd);
b809f232 3390 return -1;
811ef482
CB
3391 }
3392
b809f232
CB
3393 lxc_list_for_each(iterator, &conf->network) {
3394 struct lxc_netdev *netdev = iterator->elem;
811ef482 3395
b809f232
CB
3396 if (netdev->type != LXC_NET_PHYS)
3397 continue;
3398
3399 /* Retrieve the name of the interface in the container's network
3400 * namespace.
3401 */
3402 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3403 WARN("No interface corresponding to ifindex %d",
b809f232 3404 netdev->ifindex);
811ef482
CB
3405 continue;
3406 }
b809f232 3407
0037ab49 3408 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3409 if (ret < 0)
811ef482
CB
3410 WARN("Error moving network device \"%s\" back to "
3411 "network namespace", ifname);
b809f232
CB
3412 else
3413 TRACE("Moved network device \"%s\" back to network "
3414 "namespace", ifname);
811ef482 3415 }
811ef482 3416
b809f232 3417 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3418 close(oldfd);
b809f232
CB
3419 if (ret < 0) {
3420 SYSERROR("Failed to enter network namespace");
3421 return -1;
3422 }
3423
3424 return 0;
811ef482
CB
3425}
3426
3427static int setup_hw_addr(char *hwaddr, const char *ifname)
3428{
3429 struct sockaddr sockaddr;
3430 struct ifreq ifr;
6d1400b5 3431 int ret, fd;
811ef482
CB
3432
3433 ret = lxc_convert_mac(hwaddr, &sockaddr);
3434 if (ret) {
6d1400b5 3435 errno = -ret;
3436 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3437 return -1;
3438 }
3439
3440 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3441 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3442 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3443
ad9429e5 3444 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3445 if (fd < 0)
3446 return -1;
3447
3448 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3449 if (ret)
6d1400b5 3450 SYSERROR("Failed to perform ioctl");
3451
3452 close(fd);
811ef482
CB
3453
3454 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3455 ifr.ifr_name);
3456
3457 return ret;
3458}
3459
3460static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3461{
3462 struct lxc_list *iterator;
3463 int err;
3464
3465 lxc_list_for_each(iterator, ip) {
3466 struct lxc_inetdev *inetdev = iterator->elem;
3467
3468 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3469 &inetdev->bcast, inetdev->prefix);
3470 if (err) {
6d1400b5 3471 errno = -err;
3472 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3473 "with ifindex %d", ifindex);
811ef482
CB
3474 return -1;
3475 }
3476 }
3477
3478 return 0;
3479}
3480
3481static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3482{
3483 struct lxc_list *iterator;
3484 int err;
3485
3486 lxc_list_for_each(iterator, ip) {
3487 struct lxc_inet6dev *inet6dev = iterator->elem;
3488
3489 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3490 &inet6dev->mcast, &inet6dev->acast,
3491 inet6dev->prefix);
3492 if (err) {
6d1400b5 3493 errno = -err;
3494 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3495 "with ifindex %d", ifindex);
811ef482
CB
3496 return -1;
3497 }
3498 }
3499
3500 return 0;
3501}
3502
3503static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3504{
3505 char ifname[IFNAMSIZ];
3506 int err;
811ef482 3507 char *current_ifname = ifname;
009d6127 3508 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3509
3510 /* empty network namespace */
3511 if (!netdev->ifindex) {
3512 if (netdev->flags & IFF_UP) {
3513 err = lxc_netdev_up("lo");
3514 if (err) {
6d1400b5 3515 errno = -err;
3516 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3517 return -1;
3518 }
3519 }
3520
3521 if (netdev->type == LXC_NET_EMPTY)
3522 return 0;
3523
3524 if (netdev->type == LXC_NET_NONE)
3525 return 0;
3526
e389f2af
CB
3527 netdev->ifindex = if_nametoindex(netdev->created_name);
3528 if (!netdev->ifindex)
3529 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3c09b97c 3530 netdev->created_name ?: "(null)");
811ef482
CB
3531 }
3532
3533 /* get the new ifindex in case of physical netdev */
3534 if (netdev->type == LXC_NET_PHYS) {
3535 netdev->ifindex = if_nametoindex(netdev->link);
3536 if (!netdev->ifindex) {
3537 ERROR("Failed to get ifindex for network device \"%s\"",
3538 netdev->link);
3539 return -1;
3540 }
3541 }
3542
3543 /* retrieve the name of the interface */
3544 if (!if_indextoname(netdev->ifindex, current_ifname)) {
e389f2af
CB
3545 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3546 netdev->ifindex);
811ef482
CB
3547 return -1;
3548 }
3549
e389f2af 3550 /* Default: let the system choose an interface name.
811ef482
CB
3551 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3552 * netlink will replace the format specifier with an appropriate index.
3553 */
de4855a8
CB
3554 if (netdev->name[0] == '\0') {
3555 if (netdev->type == LXC_NET_PHYS)
94b1cade 3556 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
de4855a8 3557 else
94b1cade 3558 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
de4855a8 3559 }
811ef482
CB
3560
3561 /* rename the interface name */
e389f2af
CB
3562 if (strcmp(current_ifname, netdev->name) != 0) {
3563 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
811ef482 3564 if (err) {
6d1400b5 3565 errno = -err;
3566 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
e389f2af 3567 current_ifname, netdev->name);
811ef482
CB
3568 return -1;
3569 }
e389f2af
CB
3570
3571 TRACE("Renamed network device from \"%s\" to \"%s\"",
3572 current_ifname, netdev->name);
811ef482
CB
3573 }
3574
3575 /* Re-read the name of the interface because its name has changed
3576 * and would be automatically allocated by the system
3577 */
3578 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3579 ERROR("Failed get name for network device with ifindex %d",
3580 netdev->ifindex);
3581 return -1;
3582 }
3583
790255cf
CB
3584 /* Now update the recorded name of the network device to reflect the
3585 * name of the network device in the child's network namespace. We will
3586 * later on send this information back to the parent.
3587 */
94b1cade 3588 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
790255cf 3589
811ef482
CB
3590 /* set a mac address */
3591 if (netdev->hwaddr) {
3592 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3593 ERROR("Failed to setup hw address for network device \"%s\"",
3594 current_ifname);
3595 return -1;
3596 }
3597 }
3598
3599 /* setup ipv4 addresses on the interface */
3600 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3601 ERROR("Failed to setup ip addresses for network device \"%s\"",
e389f2af 3602 current_ifname);
811ef482
CB
3603 return -1;
3604 }
3605
3606 /* setup ipv6 addresses on the interface */
3607 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3608 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
e389f2af 3609 current_ifname);
811ef482
CB
3610 return -1;
3611 }
3612
3613 /* set the network device up */
3614 if (netdev->flags & IFF_UP) {
811ef482
CB
3615 err = lxc_netdev_up(current_ifname);
3616 if (err) {
6d1400b5 3617 errno = -err;
3618 SYSERROR("Failed to set network device \"%s\" up",
3619 current_ifname);
811ef482
CB
3620 return -1;
3621 }
3622
3623 /* the network is up, make the loopback up too */
3624 err = lxc_netdev_up("lo");
3625 if (err) {
6d1400b5 3626 errno = -err;
3627 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3628 return -1;
3629 }
3630 }
3631
811ef482 3632 /* setup ipv4 gateway on the interface */
a2f9a670 3633 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3634 if (!(netdev->flags & IFF_UP)) {
3635 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3636 "\"%s\" when not bringing up the interface", current_ifname);
811ef482
CB
3637 return -1;
3638 }
3639
3640 if (lxc_list_empty(&netdev->ipv4)) {
3641 ERROR("Cannot add ipv4 gateway for network device "
e389f2af 3642 "\"%s\" when not assigning an address", current_ifname);
811ef482
CB
3643 return -1;
3644 }
3645
a2f9a670 3646 /* Setup device route if ipv4_gateway_dev is enabled */
3647 if (netdev->ipv4_gateway_dev) {
3648 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3649 if (err < 0) {
3650 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
e389f2af 3651 current_ifname);
a2f9a670 3652 return minus_one_set_errno(-err);
811ef482 3653 }
a2f9a670 3654 } else {
009d6127 3655 /* Check the gateway address is valid */
3656 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3657 return minus_one_set_errno(errno);
3658
3659 /* Try adding a default route to the gateway address */
811ef482 3660 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3661 if (err < 0) {
3662 /* If adding the default route fails, this could be because the
3663 * gateway address is in a different subnet to the container's address.
3664 * To work around this, we try adding a static device route to the
3665 * gateway address first, and then try again.
3666 */
a2f9a670 3667 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3668 if (err < 0) {
a2f9a670 3669 errno = -err;
009d6127 3670 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
e389f2af 3671 bufinet4, current_ifname);
009d6127 3672 return -1;
a2f9a670 3673 }
6d1400b5 3674
a2f9a670 3675 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3676 if (err < 0) {
a2f9a670 3677 errno = -err;
009d6127 3678 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
e389f2af 3679 bufinet4, current_ifname);
a2f9a670 3680 return -1;
811ef482 3681 }
811ef482
CB
3682 }
3683 }
3684 }
3685
3686 /* setup ipv6 gateway on the interface */
a2f9a670 3687 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482 3688 if (!(netdev->flags & IFF_UP)) {
e389f2af
CB
3689 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3690 current_ifname);
811ef482
CB
3691 return -1;
3692 }
3693
3694 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
e389f2af
CB
3695 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3696 current_ifname);
811ef482
CB
3697 return -1;
3698 }
3699
a2f9a670 3700 /* Setup device route if ipv6_gateway_dev is enabled */
3701 if (netdev->ipv6_gateway_dev) {
3702 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3703 if (err < 0) {
3704 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
e389f2af 3705 current_ifname);
a2f9a670 3706 return minus_one_set_errno(-err);
811ef482 3707 }
a2f9a670 3708 } else {
009d6127 3709 /* Check the gateway address is valid */
3710 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3711 return minus_one_set_errno(errno);
3712
3713 /* Try adding a default route to the gateway address */
811ef482 3714 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3715 if (err < 0) {
3716 /* If adding the default route fails, this could be because the
3717 * gateway address is in a different subnet to the container's address.
3718 * To work around this, we try adding a static device route to the
3719 * gateway address first, and then try again.
3720 */
a2f9a670 3721 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3722 if (err < 0) {
a2f9a670 3723 errno = -err;
009d6127 3724 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
e389f2af 3725 bufinet6, current_ifname);
009d6127 3726 return -1;
a2f9a670 3727 }
6d1400b5 3728
a2f9a670 3729 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3730 if (err < 0) {
a2f9a670 3731 errno = -err;
009d6127 3732 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
e389f2af 3733 bufinet6, current_ifname);
a2f9a670 3734 return -1;
811ef482 3735 }
811ef482
CB
3736 }
3737 }
3738 }
3739
74c6e2b0 3740 DEBUG("Network device \"%s\" has been setup", current_ifname);
811ef482
CB
3741
3742 return 0;
3743}
3744
3745int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3746 struct lxc_list *network)
3747{
3748 struct lxc_list *iterator;
811ef482 3749
811ef482 3750 lxc_list_for_each(iterator, network) {
e389f2af 3751 struct lxc_netdev *netdev = iterator->elem;
811ef482 3752
811ef482 3753 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
e389f2af 3754 ERROR("Failed to setup netdev");
811ef482
CB
3755 return -1;
3756 }
3757 }
3758
3759 if (!lxc_list_empty(network))
e389f2af 3760 INFO("Network has been setup");
811ef482
CB
3761
3762 return 0;
3763}
7ab1ba02 3764
3c09b97c 3765int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3766{
3767 struct lxc_list *iterator;
3768 struct lxc_list *network = &handler->conf->network;
3769 int data_sock = handler->data_sock[0];
3770
7ab1ba02
CB
3771 lxc_list_for_each(iterator, network) {
3772 int ret;
3773 struct lxc_netdev *netdev = iterator->elem;
3774
3c09b97c 3775 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3776 continue;
3777
7fbb15ec 3778 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3779 if (ret < 0)
7ab1ba02 3780 return -1;
e389f2af
CB
3781
3782 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3783 if (ret < 0)
3784 return -1;
3785
3786 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3787 }
3788
3789 return 0;
3790}
3791
3c09b97c 3792int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3793{
3794 struct lxc_list *iterator;
3795 struct lxc_list *network = &handler->conf->network;
3796 int data_sock = handler->data_sock[1];
3797
7ab1ba02
CB
3798 lxc_list_for_each(iterator, network) {
3799 int ret;
3800 struct lxc_netdev *netdev = iterator->elem;
3801
3c09b97c 3802 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3803 continue;
3804
e3233f26 3805 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3806 if (ret < 0)
7ab1ba02 3807 return -1;
e389f2af
CB
3808
3809 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3810 if (ret < 0)
3811 return -1;
3812 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3813 }
3814
3815 return 0;
3816}
a1ae535a
CB
3817
3818int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3819{
3820 struct lxc_list *iterator, *network;
3821 int data_sock = handler->data_sock[0];
3822
3823 if (!handler->am_root)
3824 return 0;
3825
3826 network = &handler->conf->network;
3827 lxc_list_for_each(iterator, network) {
3828 int ret;
3829 struct lxc_netdev *netdev = iterator->elem;
3830
3831 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3832 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3833 if (ret < 0)
7729f8e5 3834 return -1;
a1ae535a
CB
3835
3836 /* Send network device ifindex in the child's namespace to
3837 * parent.
3838 */
7fbb15ec 3839 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3840 if (ret < 0)
7729f8e5 3841 return -1;
a1ae535a
CB
3842 }
3843
e389f2af
CB
3844 if (!lxc_list_empty(network))
3845 TRACE("Sent network device names and ifindices to parent");
3846
a1ae535a 3847 return 0;
a1ae535a
CB
3848}
3849
3850int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3851{
3852 struct lxc_list *iterator, *network;
3853 int data_sock = handler->data_sock[1];
3854
3855 if (!handler->am_root)
3856 return 0;
3857
3858 network = &handler->conf->network;
3859 lxc_list_for_each(iterator, network) {
3860 int ret;
3861 struct lxc_netdev *netdev = iterator->elem;
3862
3863 /* Receive network device name in the child's namespace to
3864 * parent.
3865 */
e3233f26 3866 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3867 if (ret < 0)
7729f8e5 3868 return -1;
a1ae535a
CB
3869
3870 /* Receive network device ifindex in the child's namespace to
3871 * parent.
3872 */
e3233f26 3873 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3874 if (ret < 0)
7729f8e5 3875 return -1;
a1ae535a
CB
3876 }
3877
3878 return 0;
a1ae535a 3879}
bb84beda
CB
3880
3881void lxc_delete_network(struct lxc_handler *handler)
3882{
3883 bool bret;
3884
3885 if (handler->am_root)
3886 bret = lxc_delete_network_priv(handler);
3887 else
3888 bret = lxc_delete_network_unpriv(handler);
3889 if (!bret)
3890 DEBUG("Failed to delete network devices");
3891 else
3892 DEBUG("Deleted network devices");
3893}
1cd95214 3894
1cd95214
CB
3895int lxc_netns_set_nsid(int fd)
3896{
41a3300d 3897 int ret;
0ce60f0d
CB
3898 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3899 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3900 NLMSG_ALIGN(1024)];
1cd95214 3901 struct nl_handler nlh;
0ce60f0d
CB
3902 struct nlmsghdr *hdr;
3903 struct rtgenmsg *msg;
bfcedc7e 3904 int saved_errno;
9d036caa
CB
3905 const __s32 ns_id = -1;
3906 const __u32 netns_fd = fd;
1cd95214
CB
3907
3908 ret = netlink_open(&nlh, NETLINK_ROUTE);
3909 if (ret < 0)
41a3300d 3910 return -1;
1cd95214 3911
0ce60f0d 3912 memset(buf, 0, sizeof(buf));
6ce39620
CB
3913
3914#pragma GCC diagnostic push
3915#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3916 hdr = (struct nlmsghdr *)buf;
3917 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3918#pragma GCC diagnostic pop
1cd95214 3919
0ce60f0d
CB
3920 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3921 hdr->nlmsg_type = RTM_NEWNSID;
3922 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3923 hdr->nlmsg_pid = 0;
3924 hdr->nlmsg_seq = RTM_NEWNSID;
3925 msg->rtgen_family = AF_UNSPEC;
1cd95214 3926
9d036caa
CB
3927 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3928 if (ret < 0)
3929 goto on_error;
3930
3931 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3932 if (ret < 0)
3933 goto on_error;
1cd95214 3934
9fbbc427 3935 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
3936
3937on_error:
bfcedc7e 3938 saved_errno = errno;
1cd95214 3939 netlink_close(&nlh);
bfcedc7e 3940 errno = saved_errno;
1cd95214 3941
9d036caa 3942 return ret;
1cd95214 3943}
938980ba
CB
3944
3945static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3946{
3947
3948 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3949
3950 while (RTA_OK(rta, len)) {
3951 unsigned short type = rta->rta_type;
3952
3953 if ((type <= max) && (!tb[type]))
3954 tb[type] = rta;
3955
6ce39620
CB
3956#pragma GCC diagnostic push
3957#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3958 rta = RTA_NEXT(rta, len);
6ce39620 3959#pragma GCC diagnostic pop
938980ba
CB
3960 }
3961
3962 return 0;
3963}
3964
3965static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3966{
3967 return *(__s32 *)RTA_DATA(rta);
3968}
3969
3970#ifndef NETNS_RTA
3971#define NETNS_RTA(r) \
3972 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3973#endif
3974
3975int lxc_netns_get_nsid(int fd)
3976{
3977 int ret;
3978 ssize_t len;
3979 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3980 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3981 NLMSG_ALIGN(1024)];
938980ba
CB
3982 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3983 struct nl_handler nlh;
3984 struct nlmsghdr *hdr;
3985 struct rtgenmsg *msg;
3986 int saved_errno;
3987 __u32 netns_fd = fd;
3988
3989 ret = netlink_open(&nlh, NETLINK_ROUTE);
3990 if (ret < 0)
3991 return -1;
3992
3993 memset(buf, 0, sizeof(buf));
6ce39620
CB
3994
3995#pragma GCC diagnostic push
3996#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3997 hdr = (struct nlmsghdr *)buf;
3998 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3999#pragma GCC diagnostic pop
938980ba
CB
4000
4001 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4002 hdr->nlmsg_type = RTM_GETNSID;
4003 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4004 hdr->nlmsg_pid = 0;
4005 hdr->nlmsg_seq = RTM_GETNSID;
4006 msg->rtgen_family = AF_UNSPEC;
4007
9d036caa
CB
4008 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4009 if (ret == 0)
4010 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 4011
938980ba
CB
4012 saved_errno = errno;
4013 netlink_close(&nlh);
4014 errno = saved_errno;
4015 if (ret < 0)
4016 return -1;
4017
9d036caa 4018 errno = EINVAL;
938980ba
CB
4019 msg = NLMSG_DATA(hdr);
4020 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4021 if (len < 0)
4022 return -1;
4023
6ce39620
CB
4024#pragma GCC diagnostic push
4025#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4026 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4027 if (tb[__LXC_NETNSA_NSID])
4028 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4029#pragma GCC diagnostic pop
938980ba
CB
4030
4031 return -1;
4032}
e389f2af
CB
4033
4034int lxc_create_network(struct lxc_handler *handler)
4035{
4036 int ret;
4037
e389f2af
CB
4038 if (handler->am_root) {
4039 ret = lxc_create_network_priv(handler);
4040 if (ret)
4041 return -1;
4042
4043 return lxc_network_move_created_netdev_priv(handler);
4044 }
4045
4046 return lxc_create_network_unpriv(handler);
4047}